libflame  12600
Functions
FLA_Scal_external_gpu.c File Reference

(r12600)

Functions

FLA_Error FLA_Scal_external_gpu (FLA_Obj alpha, FLA_Obj A, void *A_gpu)

Function Documentation

FLA_Error FLA_Scal_external_gpu ( FLA_Obj  alpha,
FLA_Obj  A,
void *  A_gpu 
)

References FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_equals(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, and FLA_Scal_check().

Referenced by FLA_Gemm_external_gpu(), and FLASH_Queue_exec_task_gpu().

{
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          ldim_A, inc_A;
  int          i;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
    FLA_Scal_check( alpha, A );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  if ( FLA_Obj_equals( alpha, FLA_ONE ) )
  {
    return FLA_SUCCESS;
  }

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  ldim_A   = FLA_Obj_length( A );
  inc_A    = 1;

  switch ( datatype ){

  case FLA_FLOAT:
  {
    float* buff_alpha = ( float* ) FLA_FLOAT_PTR( alpha );
    float* buff_A_gpu = ( float* ) A_gpu;

    for ( i = 0; i < n_A; i++ )
      cublasSscal( m_A,
                   *buff_alpha,
                   buff_A_gpu + i * ldim_A, inc_A );

    break;
  }

  case FLA_DOUBLE:
  {
    double* buff_alpha = ( double* ) FLA_DOUBLE_PTR( alpha );
    double* buff_A_gpu = ( double* ) A_gpu;

    for ( i = 0; i < n_A; i++ )
      cublasDscal( m_A,
                   *buff_alpha,
                   buff_A_gpu + i * ldim_A, inc_A );

    break;
  }

  case FLA_COMPLEX:
  {
    cuComplex* buff_alpha = ( cuComplex* ) FLA_COMPLEX_PTR( alpha );
    cuComplex* buff_A_gpu = ( cuComplex* ) A_gpu;

    for ( i = 0; i < n_A; i++ )
      cublasCscal( m_A,
                   *buff_alpha,
                   buff_A_gpu + i * ldim_A, inc_A );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    cuDoubleComplex* buff_alpha = ( cuDoubleComplex* ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    cuDoubleComplex* buff_A_gpu = ( cuDoubleComplex* ) A_gpu;

    for ( i = 0; i < n_A; i++ )
      cublasZscal( m_A,
                   *buff_alpha,
                   buff_A_gpu + i * ldim_A, inc_A );

    break;
  }

  }

  return FLA_SUCCESS;
}