#include
#include
#include
#include "FLAME.h"
#include "Chol_prototypes.h"
/* Various constants that control what gets timed */
#define TRUE 1
#define FALSE 0
#define OCTAVE TRUE
#define TIME_UNB_VAR1 TRUE
#define TIME_UNB_VAR2 TRUE
#define TIME_UNB_VAR3 TRUE
#define TIME_BLK_VAR1 TRUE
#define TIME_BLK_VAR2 TRUE
#define TIME_BLK_VAR3 TRUE
#define TIME_LAPACK FALSE
int main(int argc, char *argv[])
{
int n, nfirst, nlast, ninc, nlast_unb, i, irep,
nrepeats, nb_alg;
double
dtime, dtime_best,
gflops, max_gflops,
diff, d_n;
FLA_Obj
A, Aref, Aold, delta;
/* Initialize FLAME */
FLA_Init( );
/* Every time trial is repeated "repeat" times and the fastest run in recorded */
printf( "%% number of repeats:" );
scanf( "%d", &nrepeats );
printf( "%% %d\n", nrepeats );
/* Enter the max GFLOPS attainable
This is used to set the y-axis range for the graphs. Here is how
you figure out what to enter (on Linux machines):
1) more /proc/cpuinfo (this lists the contents of this file).
2) read through this and figure out the clock rate of the machine (in GHz).
3) Find out (from an expert of from the web) the number of floating point
instructions that can be performed per core per clock cycle.
4) Figure out if you are using "multithreaded BLAS" which automatically
parallelize calls to the Basic Linear Algebra Subprograms. If so,
check how many cores are available.
5) Multiply 2) x 3) x 4) and enter this in response to the below.
If you enter a value for max GFLOPS that is lower that the maximum that
is observed in the experiments, then the top of the graph is set to the
observed maximum. Thus, one possibility is to simply set this to 0.0.
*/
printf( "%% enter max GFLOPS:" );
scanf( "%lf", &max_gflops );
printf( "%% %lf\n", max_gflops );
/* Enter the algorithmic block size */
printf( "%% enter nb_alg:" );
scanf( "%d", &nb_alg );
printf( "%% %d\n", nb_alg );
/* Timing trials for matrix sizes n=nfirst to nlast in increments
of ninc will be performed. Unblocked versions are only tested to
nlast_unb */
printf( "%% enter nfirst, nlast, ninc, nlast_unb:" );
scanf( "%d%d%d%d", &nfirst, &nlast, &ninc, &nlast_unb );
printf( "%% %d %d %d %d\n", nfirst, nlast, ninc, nlast_unb );
i = 1;
for ( n=nfirst; n<= nlast; n+=ninc ){
/* Allocate space for the matrices */
FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &A );
FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Aref );
FLA_Obj_create( FLA_DOUBLE, n, n, 1, n, &Aold );
FLA_Obj_create( FLA_DOUBLE, 1, 1, 1, 1, &delta );
/* Generate random matrix A and save in Aold */
FLA_Random_matrix( Aold );
/* Add something large to the diagonal to make sure it isn't ill-conditionsed */
d_n = ( double ) n;
*( ( double * ) FLA_Obj_buffer_at_view( delta ) ) = d_n;
FLA_Shift_diag( FLA_NO_CONJUGATE, delta, Aold );
/* Set gflops = billions of floating point operations that will be performed */
gflops = 1.0/3.0 * n * n * n * 1.0e-09;
/* Time the reference implementation */
#if TIME_LAPACK == TRUE
#else
// if ( n <= nlast_unb )
#endif
{
for ( irep=0; irep max_gflops )
max_gflops = gflops / dtime_best;
fflush( stdout );
/* Time the your implementations */
/* Variant 1 unblocked */
if ( n <= nlast_unb ){
for ( irep=0; irep