#include "mpi.h" 
#include "PLA.h"
                       /* macro define storage of A and x to be FORTRAN-like */
#define A(i,j,lda) a[ ((j)-1)*(lda) + (i)-1 ] 
#define X(i)       x[ (i)-1 ]
                          /* define y to be global, to allow it to be passed 
                                                   to routine process_vector */
double *y;

void A_x_fill( PLA_Obj a_global, PLA_Obj x_global )
{
  int me, m, n, lda, i, j, zero_or_one, i_one = 1;
  PLA_Template  template;
  double *a, *x, d_zero = 0.0, d_one = 1.0;

  PLA_Obj_template( a_global, &template );      
  PLA_Temp_zero_or_one( template, &zero_or_one );     /* extract zero_or_one */
  PLA_Temp_comm_all_rank( template, &me );      /* extract this nodes's rank */

  PLA_Obj_set_to_zero( a_global );                             /* zero out A */
  PLA_Obj_set_to_zero( x_global );                             /* zero out x */
  PLA_Obj_global_length( a_global, &m );     /* extract global row dimension */
  PLA_Obj_global_width ( a_global, &n );     /* extract global col dimension */

  PLA_API_begin( );               /* enter application interface (API) state */
    PLA_Obj_API_open( a_global );   /* open matrix A for reading and writing */
    PLA_Obj_API_open( x_global );   /* open vector x for reading and writing */
    if ( me == 0 ){                     /* Fill global matrix A and vector x */
                                /* create local matrix and vector to be used 
                                      on node zero to create a sample matrix */
      lda = m;
      a = ( double * ) malloc( lda * n * sizeof( double ) );
      x = ( double * ) malloc( n * sizeof( double ) );
      y = ( double * ) malloc( m * sizeof( double ) ); 
      for (j=1;j<=n;j++) {               /* fill A and x with random numbers */
        for (i=1;i<=m;i++)
          A(i,j,lda) = (double) i+j/n;
        X( j ) = (double) j/n;
      }
                          /* distribute entries of A and x to global objects */
      PLA_API_axpy_matrix_to_global( m, n, &d_one, a, lda, a_global, 
                                          zero_or_one, zero_or_one );
      PLA_API_axpy_vector_to_global( n, &d_one, x, 1, x_global, zero_or_one );
                             /* compute result y = A x sequentially, to be 
                                                   checked in process_vector */
      PLA_dgemv( "Notranspose", &m, &n, &d_one, a, &lda, x, &i_one, 
                                        &d_zero, y, &i_one ); 
      free( a );        free( x );                   /* free temporary space */
    }
                                   /* close matrix A for reading and writing */
    PLA_Obj_API_close( a_global );    
                                   /* close vector x for reading and writing */
    PLA_Obj_API_close( x_global );   
  PLA_API_end( );                                          /* exit API state */
}
