libflame  12600
Functions
FLA_Axpy_external_gpu.c File Reference

(r12600)

Functions

FLA_Error FLA_Axpy_external_gpu (FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj B, void *B_gpu)

Function Documentation

FLA_Error FLA_Axpy_external_gpu ( FLA_Obj  alpha,
FLA_Obj  A,
void *  A_gpu,
FLA_Obj  B,
void *  B_gpu 
)

References FLA_Axpy_check(), FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLASH_Queue_exec_task_gpu().

{
  FLA_Datatype datatype;
  int          m_B, n_B;
  int          ldim_A, inc_A;
  int          ldim_B, inc_B;
  int          i;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Axpy_check( alpha, A, B );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  ldim_A   = FLA_Obj_length( A );
  inc_A    = 1;

  m_B      = FLA_Obj_length( B );
  n_B      = FLA_Obj_width( B );
  ldim_B   = FLA_Obj_length( B );
  inc_B    = 1;

  switch ( datatype ){

  case FLA_FLOAT:
  {
    float* buff_alpha = ( float* ) FLA_FLOAT_PTR( alpha );
    float* buff_A_gpu = ( float* ) A_gpu;
    float* buff_B_gpu = ( float* ) B_gpu;

    for ( i = 0; i < n_B; i++ )
      cublasSaxpy( m_B,
                   *buff_alpha,
                   buff_A_gpu + i * ldim_A, inc_A,
                   buff_B_gpu + i * ldim_B, inc_B );

    break;
  }

  case FLA_DOUBLE:
  {
    double* buff_alpha = ( double* ) FLA_DOUBLE_PTR( alpha );
    double* buff_A_gpu = ( double* ) A_gpu;
    double* buff_B_gpu = ( double* ) B_gpu;

    for ( i = 0; i < n_B; i++ )
      cublasDaxpy( m_B,
                   *buff_alpha,
                   buff_A_gpu + i * ldim_A, inc_A,
                   buff_B_gpu + i * ldim_B, inc_B );

    break;
  }

  case FLA_COMPLEX:
  {
    cuComplex* buff_alpha = ( cuComplex* ) FLA_COMPLEX_PTR( alpha );
    cuComplex* buff_A_gpu = ( cuComplex* ) A_gpu;
    cuComplex* buff_B_gpu = ( cuComplex* ) B_gpu;

    for ( i = 0; i < n_B; i++ )
      cublasCaxpy( m_B,
                   *buff_alpha,
                   buff_A_gpu + i * ldim_A, inc_A,
                   buff_B_gpu + i * ldim_B, inc_B );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    cuDoubleComplex* buff_alpha = ( cuDoubleComplex* ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    cuDoubleComplex* buff_A_gpu = ( cuDoubleComplex* ) A_gpu;
    cuDoubleComplex* buff_B_gpu = ( cuDoubleComplex* ) B_gpu;

    for ( i = 0; i < n_B; i++ )
      cublasZaxpy( m_B,
                   *buff_alpha,
                   buff_A_gpu + i * ldim_A, inc_A,
                   buff_B_gpu + i * ldim_B, inc_B );

    break;
  }

  }
  
  return FLA_SUCCESS;
}