/* On the Solaris, compile with:

cc -mt -fast bts_hand.c -I$CODEDIR/runtime/sparc $CODEDIR/runtime/sparc/fsynch.o $CODEDIR/runtime/sparc/fthread.o -o bts_hand -lm

*/


#include <stdio.h>
#include <fthread.h>
#include <fsynch.h>

#include <stdio.h>
#include <limits.h>
#include <sys/times.h>
#include <time.h>

static float c2_SysSec;
static float c2_UserSec;
static clock_t start_time;
static clock_t end_time;


void StartClock() {
  struct tms before;

  start_time  = times(&before);

  c2_SysSec  = (float)before.tms_stime/(float)CLK_TCK;
  c2_UserSec = (float)before.tms_utime/(float)CLK_TCK;

}

void StopClock() {
  struct tms after;

  end_time    = times(&after);

  c2_SysSec  = (float)after.tms_stime/(float)CLK_TCK - c2_SysSec;
  c2_UserSec = (float)after.tms_utime/(float)CLK_TCK - c2_UserSec;
}


/* Signal a halt to the computation */

void StopAll()
{


  printf("Time between completion of node and firing of node:\n");
  printf("  Wall clock time = %f seconds.\n",(float)(end_time - start_time)/(float)CLK_TCK);
  printf("  System time: %f seconds.\n", c2_SysSec);
  printf("  User time  : %f seconds.\n", c2_UserSec);

  fflush(stdout);
  fflush(stderr);
}


typedef double real;
typedef double *Vector;
typedef Vector *Matrix;

static Vector b;
static Matrix a;
static int n;     /* system size */
static int blk;   /* size of a blk */
static int WhichBlock;

void readsize(printp)
  int *printp;
{
  int numblks;

  printf("Enter the size of the block system: ");
  scanf("%d", &numblks);

  printf("Enter the size of a block (integer): ");
  scanf("%d", &blk);
  n = numblks*blk;

  printf("Print all (0 or 1): ");
  scanf("%d", printp);
}

  /* Allocate space for b */

Vector MakeB()
{
   return (Vector) malloc(n*sizeof(real));
}

  /* Allocate space for A */

Matrix MakeA()
{
   Matrix t;
   int i;

   t = (Matrix) malloc(n*sizeof(Vector));

   for (i = 0; i < n; i++)
      t[i] = (Vector) malloc((i+1)*sizeof(real));

   return t;
}

  /* Allocare and obtain values for a and b */

void readsys()
{
   int i, j;
   Vector tmp;

   for (i = 0; i < n; i++) {
      b[i] = i + 1.0;
      tmp = a[i];
      for (j = 0; j <= i; j++)
        tmp[j] = 1.0;
   }
}


  /* initialize number of blocks and count of blocks solved */

void setnumblks(nb)
  int *nb;
{
   int i = n % blk;

   if (i == 0)
     *nb = n / blk;
   else
     *nb = (n/blk) + 1;
}


  /* Solve a block wb */

void solveblock(wb)
  int wb;
{
  int init = wb * blk;
  int final = init + blk;
  double sum;
  Vector tmp;
  int i, j;

  if (final > n)
    final = n;

  for (i = init; i < final; i++) {
     sum = b[i];
     tmp = a[i];
     for (j = init; j < i; j++)
       sum = sum - tmp[j]*b[j];
     b[i] = sum / tmp[i];
  }
}


   /* Do matrix vector multiply for a block brow, bcol */

void blkmult(brow, bcol)
   int brow;
   int bcol;
{
   int init_row = brow * blk;
   int end_row = init_row + blk;
   int init_col = bcol * blk;
   int end_col = init_col + blk;
   int init = brow * blk;
   double sum;
   Vector tmp;
   int i, j;

   if (end_row > n)
     end_row = n;

   for (i = init_row; i < end_row; i++) {
      sum = b[i];
      tmp = a[i];
      for (j = init_col; j < end_col; j++)
        sum = sum - tmp[j]*b[j];
      b[i] = sum;
   }
}


   /* print b */

void printans(printp)
   int printp;
{
   int start, stop;
   int i;

   if (printp == 0) {
      start = n/2;
      stop = start;
   } else {
      start = 0;
      stop = n - 1;
   }

   for (i = start; i <= stop; i++)
     printf("x[%d] = %f\n", i, b[i]);
}


Thread *proc[32];   /* Note this has to be shared.  Could put it */
                           /*   on stack in main */
Barrier hold[35];


void ThreadProc(id)
   int id;
{
   int Count = 0;

   for (;;) {
      while (WhichBlock != Count) /* skip */ ;

      blkmult(id, Count);
      BarrierHit(&hold[Count]);
      Count++;

      if (Count == id) break;
   }
}

   
main ()
{
   int printp;
   int BlocksToSolve;
   int i, j;

   WhichBlock = -1;

   readsize(&printp);         /* asserts that blk divides n, but alg */
                              /* does not require this */
   a = MakeA();
   b = MakeB();

   setnumblks(&BlocksToSolve);

   StartClock();

   for (j = 1; j < BlocksToSolve; j++) {
     int *jj;

     jj = (int *) malloc(sizeof(int));
     *jj = j;
     proc[j] = ThreadStart(ThreadProc, 1, "ThreadProc", 1*sizeof(int), jj);
   }

   readsys();

   for (i = 0; i < BlocksToSolve; i++) {
      solveblock(i);
      BarrierInit(&hold[i], BlocksToSolve - i, "hold");
      WhichBlock = i;
      BarrierHit(&hold[i]);
   }

   StopClock();
   StopAll();

   printans(printp);

   for (j = 1; j < BlocksToSolve; j++)
     ThreadJoin(proc[j]);
}
