libflame
12600
|
Functions | |
FLA_Error | FLA_Scalr_external_gpu (FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, void *A_gpu) |
FLA_Error FLA_Scalr_external_gpu | ( | FLA_Uplo | uplo, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
void * | A_gpu | ||
) |
References FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_equals(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, and FLA_Scalr_check().
Referenced by FLASH_Queue_exec_task_gpu().
{ FLA_Datatype datatype; int m_A, n_A; int ldim_A, inc_A; int i; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Scalr_check( uplo, alpha, A ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; if ( FLA_Obj_equals( alpha, FLA_ONE ) ) { return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); ldim_A = FLA_Obj_length( A ); inc_A = 1; if ( uplo == FLA_LOWER_TRIANGULAR ){ switch ( datatype ){ case FLA_FLOAT: { float* buff_alpha = ( float* ) FLA_FLOAT_PTR( alpha ); float* buff_A_gpu = ( float* ) A_gpu; for ( i = 0; i < min( n_A, m_A ); i++ ) cublasSscal( m_A - i, *buff_alpha, buff_A_gpu + i * ldim_A + i, inc_A ); break; } case FLA_DOUBLE: { double* buff_alpha = ( double* ) FLA_DOUBLE_PTR( alpha ); double* buff_A_gpu = ( double* ) A_gpu; for ( i = 0; i < min( n_A, m_A ); i++ ) cublasDscal( m_A - i, *buff_alpha, buff_A_gpu + i * ldim_A + i, inc_A ); break; } case FLA_COMPLEX: { cuComplex* buff_alpha = ( cuComplex* ) FLA_COMPLEX_PTR( alpha ); cuComplex* buff_A_gpu = ( cuComplex* ) A_gpu; for ( i = 0; i < min( n_A, m_A ); i++ ) cublasCscal( m_A - i, *buff_alpha, buff_A_gpu + i * ldim_A + i, inc_A ); break; } case FLA_DOUBLE_COMPLEX: { cuDoubleComplex* buff_alpha = ( cuDoubleComplex* ) FLA_DOUBLE_COMPLEX_PTR( alpha ); cuDoubleComplex* buff_A_gpu = ( cuDoubleComplex* ) A_gpu; for ( i = 0; i < min( n_A, m_A ); i++ ) cublasZscal( m_A - i, *buff_alpha, buff_A_gpu + i * ldim_A + i, inc_A ); break; } } } else if ( uplo == FLA_UPPER_TRIANGULAR ){ switch ( datatype ){ case FLA_FLOAT: { float* buff_alpha = ( float* ) FLA_FLOAT_PTR( alpha ); float* buff_A_gpu = ( float* ) A_gpu; for ( i = 0; i < n_A; i++ ) cublasSscal( min( i + 1, m_A ), *buff_alpha, buff_A_gpu + i * ldim_A, inc_A ); break; } case FLA_DOUBLE: { double* buff_alpha = ( double* ) FLA_DOUBLE_PTR( alpha ); double* buff_A_gpu = ( double* ) A_gpu; for ( i = 0; i < n_A; i++ ) cublasDscal( min( i + 1, m_A ), *buff_alpha, buff_A_gpu + i * ldim_A, inc_A ); break; } case FLA_COMPLEX: { cuComplex* buff_alpha = ( cuComplex* ) FLA_COMPLEX_PTR( alpha ); cuComplex* buff_A_gpu = ( cuComplex* ) A_gpu; for ( i = 0; i < n_A; i++ ) cublasCscal( min( i + 1, m_A ), *buff_alpha, buff_A_gpu + i * ldim_A, inc_A ); break; } case FLA_DOUBLE_COMPLEX: { cuDoubleComplex* buff_alpha = ( cuDoubleComplex* ) FLA_DOUBLE_COMPLEX_PTR( alpha ); cuDoubleComplex* buff_A_gpu = ( cuDoubleComplex* ) A_gpu; for ( i = 0; i < n_A; i++ ) cublasZscal( min( i + 1, m_A ), *buff_alpha, buff_A_gpu + i * ldim_A, inc_A ); break; } } } return FLA_SUCCESS; }