|
libflame
12600
|
Go to the source code of this file.
| FLA_Error FLA_Gemm | ( | FLA_Trans | transa, |
| FLA_Trans | transb, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_error_level(), FLA_Gemm_check(), FLA_Gemm_external(), and FLA_Gemm_internal().
Referenced by FLA_Svd_ext_u_unb_var1(), FLA_Svd_uv_unb_var1(), and FLA_Svd_uv_unb_var2().
{
FLA_Error r_val = FLA_SUCCESS;
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Gemm_check( transa, transb, alpha, A, B, beta, C );
#ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES
r_val = FLA_Gemm_internal( transa, transb, alpha, A, B, beta, C, fla_gemm_cntl_mm_op );
#else
r_val = FLA_Gemm_external( transa, transb, alpha, A, B, beta, C );
#endif
return r_val;
}
| FLA_Error FLA_Gemm_cc_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_cc().
{
return FLA_Gemm_external( FLA_CONJ_NO_TRANSPOSE, FLA_CONJ_NO_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_ch_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_ch().
{
return FLA_Gemm_external( FLA_CONJ_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_check | ( | FLA_Trans | transa, |
| FLA_Trans | transb, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), and FLA_Check_valid_trans().
Referenced by FLA_Gemm(), FLA_Gemm_external(), FLA_Gemm_external_gpu(), and FLASH_Gemm().
{
FLA_Error e_val;
e_val = FLA_Check_valid_trans( transa );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_trans( transb );
FLA_Check_error_code( e_val );
e_val = FLA_Check_floating_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_nonconstant_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, C );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_matrix_matrix_dims( transa, transb, A, B, C );
FLA_Check_error_code( e_val );
return FLA_SUCCESS;
}
| FLA_Error FLA_Gemm_cn_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_cn().
{
return FLA_Gemm_external( FLA_CONJ_NO_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_ct_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_ct().
{
return FLA_Gemm_external( FLA_CONJ_NO_TRANSPOSE, FLA_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_external | ( | FLA_Trans | transa, |
| FLA_Trans | transb, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References bl1_cgemm(), bl1_dgemm(), bl1_sgemm(), bl1_zgemm(), FLA_Check_error_level(), FLA_Gemm_check(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_trans(), and FLA_Scal_external().
Referenced by FLA_Bidiag_UT_u_blf_var4(), FLA_Bidiag_UT_u_blk_var4(), FLA_Bidiag_UT_u_blk_var5(), FLA_Gemm(), FLA_Gemm_cc_task(), FLA_Gemm_ch_task(), FLA_Gemm_cn_task(), FLA_Gemm_ct_task(), FLA_Gemm_hc_task(), FLA_Gemm_hh_task(), FLA_Gemm_hn_task(), FLA_Gemm_ht_task(), FLA_Gemm_nc_task(), FLA_Gemm_nh_task(), FLA_Gemm_nn_task(), FLA_Gemm_nt_task(), FLA_Gemm_task(), FLA_Gemm_tc_task(), FLA_Gemm_th_task(), FLA_Gemm_tn_task(), FLA_Gemm_tt_task(), FLA_Hess_UT_blf_var2(), FLA_Hess_UT_blf_var3(), FLA_Hess_UT_blf_var4(), FLA_Hess_UT_blk_var1(), FLA_Hess_UT_blk_var2(), FLA_Hess_UT_blk_var3(), FLA_Hess_UT_blk_var4(), FLA_Hess_UT_blk_var5(), FLA_LQ_UT_blk_var2(), FLA_QR_UT_blk_var2(), FLA_QR_UT_piv_blk_var2(), FLA_SA_FS_blk(), and FLA_SA_LU_blk().
{
FLA_Datatype datatype;
int k_AB;
int m_A, n_A;
int m_C, n_C;
int rs_A, cs_A;
int rs_B, cs_B;
int rs_C, cs_C;
trans1_t blis_transa;
trans1_t blis_transb;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Gemm_check( transa, transb, alpha, A, B, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) )
{
FLA_Scal_external( beta, C );
return FLA_SUCCESS;
}
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
rs_B = FLA_Obj_row_stride( B );
cs_B = FLA_Obj_col_stride( B );
m_C = FLA_Obj_length( C );
n_C = FLA_Obj_width( C );
rs_C = FLA_Obj_row_stride( C );
cs_C = FLA_Obj_col_stride( C );
if ( transa == FLA_NO_TRANSPOSE || transa == FLA_CONJ_NO_TRANSPOSE )
k_AB = n_A;
else
k_AB = m_A;
FLA_Param_map_flame_to_blis_trans( transa, &blis_transa );
FLA_Param_map_flame_to_blis_trans( transb, &blis_transb );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
float *buff_C = ( float * ) FLA_FLOAT_PTR( C );
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
bl1_sgemm( blis_transa,
blis_transb,
m_C,
k_AB,
n_C,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE:
{
double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
double *buff_C = ( double * ) FLA_DOUBLE_PTR( C );
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
bl1_dgemm( blis_transa,
blis_transb,
m_C,
k_AB,
n_C,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_COMPLEX:
{
scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C );
scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta );
bl1_cgemm( blis_transa,
blis_transb,
m_C,
k_AB,
n_C,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
bl1_zgemm( blis_transa,
blis_transb,
m_C,
k_AB,
n_C,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Gemm_external_gpu | ( | FLA_Trans | transa, |
| FLA_Trans | transb, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| void * | A_gpu, | ||
| FLA_Obj | B, | ||
| void * | B_gpu, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| void * | C_gpu | ||
| ) |
References FLA_Check_error_level(), FLA_Gemm_check(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_trans(), and FLA_Scal_external_gpu().
Referenced by FLASH_Queue_exec_task_gpu().
{
FLA_Datatype datatype;
int k_AB;
int m_A, n_A;
int m_C, n_C;
int ldim_A;
int ldim_B;
int ldim_C;
char blas_transa;
char blas_transb;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Gemm_check( transa, transb, alpha, A, B, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) )
{
FLA_Scal_external_gpu( beta, C, C_gpu );
return FLA_SUCCESS;
}
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
ldim_A = FLA_Obj_length( A );
ldim_B = FLA_Obj_length( B );
m_C = FLA_Obj_length( C );
n_C = FLA_Obj_width( C );
ldim_C = FLA_Obj_length( C );
if ( transa == FLA_NO_TRANSPOSE || transa == FLA_CONJ_NO_TRANSPOSE )
k_AB = n_A;
else
k_AB = m_A;
FLA_Param_map_flame_to_netlib_trans( transa, &blas_transa );
FLA_Param_map_flame_to_netlib_trans( transb, &blas_transb );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
cublasSgemm( blas_transa,
blas_transb,
m_C,
n_C,
k_AB,
*buff_alpha,
( float * ) A_gpu, ldim_A,
( float * ) B_gpu, ldim_B,
*buff_beta,
( float * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE:
{
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
cublasDgemm( blas_transa,
blas_transb,
m_C,
n_C,
k_AB,
*buff_alpha,
( double * ) A_gpu, ldim_A,
( double * ) B_gpu, ldim_B,
*buff_beta,
( double * ) C_gpu, ldim_C );
break;
}
case FLA_COMPLEX:
{
cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha );
cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta );
cublasCgemm( blas_transa,
blas_transb,
m_C,
n_C,
k_AB,
*buff_alpha,
( cuComplex * ) A_gpu, ldim_A,
( cuComplex * ) B_gpu, ldim_B,
*buff_beta,
( cuComplex * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
cublasZgemm( blas_transa,
blas_transb,
m_C,
n_C,
k_AB,
*buff_alpha,
( cuDoubleComplex * ) A_gpu, ldim_A,
( cuDoubleComplex * ) B_gpu, ldim_B,
*buff_beta,
( cuDoubleComplex * ) C_gpu, ldim_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Gemm_hc_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_hc().
{
return FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_CONJ_NO_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_hh_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_hh().
{
return FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_hn_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_hn().
{
return FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_ht_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_ht().
{
return FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_internal_check | ( | FLA_Trans | transa, |
| FLA_Trans | transb, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Gemm_internal().
{
FLA_Error e_val;
// Abort if the control structure is NULL.
e_val = FLA_Check_null_pointer( ( void* ) cntl );
FLA_Check_error_code( e_val );
// Verify that the object element types are identical.
e_val = FLA_Check_identical_object_elemtype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_elemtype( A, C );
FLA_Check_error_code( e_val );
// Verify conformality between all the objects. This check works regardless
// of whether the element type is FLA_MATRIX or FLA_SCALAR because the
// element length and width are used instead of scalar length and width.
e_val = FLA_Check_matrix_matrix_dims( transa, transb, A, B, C );
FLA_Check_error_code( e_val );
return FLA_SUCCESS;
}
| FLA_Error FLA_Gemm_nc_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_nc().
{
return FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_NO_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_nh_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_nh().
{
return FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_nn_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_nn().
{
return FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_nt_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_nt().
{
return FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_task | ( | FLA_Trans | transa, |
| FLA_Trans | transb, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{
return FLA_Gemm_external( transa, transb, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_tc_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_tc().
{
return FLA_Gemm_external( FLA_TRANSPOSE, FLA_CONJ_NO_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_th_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_th().
{
return FLA_Gemm_external( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_tn_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_tn().
{
return FLA_Gemm_external( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemm_tt_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_gemm_t * | cntl | ||
| ) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_tt().
{
return FLA_Gemm_external( FLA_TRANSPOSE, FLA_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Gemp | ( | FLA_Trans | transa, |
| FLA_Trans | transb, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
| FLA_Error FLA_Gepm | ( | FLA_Trans | transa, |
| FLA_Trans | transb, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
| FLA_Error FLA_Gepp | ( | FLA_Trans | transa, |
| FLA_Trans | transb, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
| FLA_Error FLA_Hemm | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_error_level(), FLA_Hemm_check(), FLA_Hemm_external(), and FLA_Hemm_internal().
{
FLA_Error r_val = FLA_SUCCESS;
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Hemm_check( side, uplo, alpha, A, B, beta, C );
#ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES
r_val = FLA_Hemm_internal( side, uplo, alpha, A, B, beta, C, fla_hemm_cntl_mm );
#else
r_val = FLA_Hemm_external( side, uplo, alpha, A, B, beta, C );
#endif
return r_val;
}
| FLA_Error FLA_Hemm_check | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_consistent_object_datatype(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_leftright_side(), and FLA_Check_valid_uplo().
Referenced by FLA_Hemm(), FLA_Hemm_external(), FLA_Hemm_external_gpu(), and FLASH_Hemm().
{
FLA_Error e_val;
e_val = FLA_Check_valid_leftright_side( side );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_uplo( uplo );
FLA_Check_error_code( e_val );
e_val = FLA_Check_nonconstant_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, C );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_square( A );
FLA_Check_error_code( e_val );
if ( side == FLA_LEFT )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Hemm_external | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References bl1_chemm(), bl1_dsymm(), bl1_ssymm(), bl1_zhemm(), FLA_Check_error_level(), FLA_Hemm_check(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_side(), and FLA_Param_map_flame_to_blis_uplo().
Referenced by FLA_Hemm(), FLA_Hemm_ll_task(), FLA_Hemm_lu_task(), FLA_Hemm_rl_task(), FLA_Hemm_ru_task(), and FLA_Hemm_task().
{
FLA_Datatype datatype;
int m_C, n_C;
int rs_A, cs_A;
int rs_B, cs_B;
int rs_C, cs_C;
side1_t blis_side;
uplo1_t blis_uplo;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Hemm_check( side, uplo, alpha, A, B, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
rs_B = FLA_Obj_row_stride( B );
cs_B = FLA_Obj_col_stride( B );
m_C = FLA_Obj_length( C );
n_C = FLA_Obj_width( C );
rs_C = FLA_Obj_row_stride( C );
cs_C = FLA_Obj_col_stride( C );
FLA_Param_map_flame_to_blis_side( side, &blis_side );
FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
float *buff_C = ( float * ) FLA_FLOAT_PTR( C );
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
bl1_ssymm( blis_side,
blis_uplo,
m_C,
n_C,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE:
{
double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
double *buff_C = ( double * ) FLA_DOUBLE_PTR( C );
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
bl1_dsymm( blis_side,
blis_uplo,
m_C,
n_C,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_COMPLEX:
{
scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C );
scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta );
bl1_chemm( blis_side,
blis_uplo,
m_C,
n_C,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
bl1_zhemm( blis_side,
blis_uplo,
m_C,
n_C,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Hemm_external_gpu | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| void * | A_gpu, | ||
| FLA_Obj | B, | ||
| void * | B_gpu, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| void * | C_gpu | ||
| ) |
References FLA_Check_error_level(), FLA_Hemm_check(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_side(), and FLA_Param_map_flame_to_netlib_uplo().
Referenced by FLASH_Queue_exec_task_gpu().
{
FLA_Datatype datatype;
int m_C, n_C;
int ldim_A;
int ldim_B;
int ldim_C;
char blas_side;
char blas_uplo;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Hemm_check( side, uplo, alpha, A, B, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
ldim_A = FLA_Obj_length( A );
ldim_B = FLA_Obj_length( B );
m_C = FLA_Obj_length( C );
n_C = FLA_Obj_width( C );
ldim_C = FLA_Obj_length( C );
FLA_Param_map_flame_to_netlib_side( side, &blas_side );
FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
cublasSsymm( blas_side,
blas_uplo,
m_C,
n_C,
*buff_alpha,
( float * ) A_gpu, ldim_A,
( float * ) B_gpu, ldim_B,
*buff_beta,
( float * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE:
{
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
cublasDsymm( blas_side,
blas_uplo,
m_C,
n_C,
*buff_alpha,
( double * ) A_gpu, ldim_A,
( double * ) B_gpu, ldim_B,
*buff_beta,
( double * ) C_gpu, ldim_C );
break;
}
case FLA_COMPLEX:
{
cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha );
cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta );
cublasChemm( blas_side,
blas_uplo,
m_C,
n_C,
*buff_alpha,
( cuComplex * ) A_gpu, ldim_A,
( cuComplex * ) B_gpu, ldim_B,
*buff_beta,
( cuComplex * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
cublasZhemm( blas_side,
blas_uplo,
m_C,
n_C,
*buff_alpha,
( cuDoubleComplex * ) A_gpu, ldim_A,
( cuDoubleComplex * ) B_gpu, ldim_B,
*buff_beta,
( cuDoubleComplex * ) C_gpu, ldim_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Hemm_internal_check | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_hemm_t * | cntl | ||
| ) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Hemm_internal().
{
FLA_Error e_val;
// Abort if the control structure is NULL.
e_val = FLA_Check_null_pointer( ( void* ) cntl );
FLA_Check_error_code( e_val );
// Verify that the object element types are identical.
e_val = FLA_Check_identical_object_elemtype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_elemtype( A, C );
FLA_Check_error_code( e_val );
// Verify conformality between all the objects. This check works regardless
// of whether the element type is FLA_MATRIX or FLA_SCALAR because the
// element length and width are used instead of scalar length and width.
if ( side == FLA_LEFT )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Hemm_ll_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_hemm_t * | cntl | ||
| ) |
References FLA_Hemm_external().
Referenced by FLA_Hemm_ll().
{
return FLA_Hemm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, alpha, A, B, beta, C );
}
| FLA_Error FLA_Hemm_lu_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_hemm_t * | cntl | ||
| ) |
References FLA_Hemm_external().
Referenced by FLA_Hemm_lu().
{
return FLA_Hemm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, alpha, A, B, beta, C );
}
| FLA_Error FLA_Hemm_rl_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_hemm_t * | cntl | ||
| ) |
References FLA_Hemm_external().
Referenced by FLA_Hemm_rl().
{
return FLA_Hemm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, alpha, A, B, beta, C );
}
| FLA_Error FLA_Hemm_ru_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_hemm_t * | cntl | ||
| ) |
References FLA_Hemm_external().
Referenced by FLA_Hemm_ru().
{
return FLA_Hemm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, alpha, A, B, beta, C );
}
| FLA_Error FLA_Hemm_task | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_hemm_t * | cntl | ||
| ) |
References FLA_Hemm_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{
return FLA_Hemm_external( side, uplo, alpha, A, B, beta, C );
}
| FLA_Error FLA_Her2k | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_error_level(), FLA_Her2k_check(), FLA_Her2k_external(), and FLA_Her2k_internal().
{
FLA_Error r_val = FLA_SUCCESS;
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Her2k_check( uplo, trans, alpha, A, B, beta, C );
#ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES
r_val = FLA_Her2k_internal( uplo, trans, alpha, A, B, beta, C, fla_her2k_cntl_mm );
#else
r_val = FLA_Her2k_external( uplo, trans, alpha, A, B, beta, C );
#endif
return r_val;
}
| FLA_Error FLA_Her2k_check | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_consistent_object_datatype(), FLA_Check_identical_object_datatype(), FLA_Check_identical_object_precision(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_real_object(), FLA_Check_square(), FLA_Check_valid_complex_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Her2k(), FLA_Her2k_external(), FLA_Her2k_external_gpu(), and FLASH_Her2k().
{
FLA_Error e_val;
e_val = FLA_Check_valid_uplo( uplo );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_complex_trans( trans );
FLA_Check_error_code( e_val );
e_val = FLA_Check_nonconstant_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_real_object( beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, C );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_precision( A, beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_square( C );
FLA_Check_error_code( e_val );
if ( trans == FLA_NO_TRANSPOSE )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Her2k_external | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References bl1_cher2k(), bl1_dsyr2k(), bl1_ssyr2k(), bl1_zher2k(), FLA_Check_error_level(), FLA_Her2k_check(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Scal_external().
Referenced by FLA_Her2k(), FLA_Her2k_lh_task(), FLA_Her2k_ln_task(), FLA_Her2k_task(), FLA_Her2k_uh_task(), FLA_Her2k_un_task(), FLA_Tridiag_UT_l_blf_var3(), and FLA_Tridiag_UT_l_blk_var3().
{
FLA_Datatype datatype;
int k_AB;
int m_A, n_A;
int m_C;
int rs_A, cs_A;
int rs_B, cs_B;
int rs_C, cs_C;
uplo1_t blis_uplo;
trans1_t blis_trans;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Her2k_check( uplo, trans, alpha, A, B, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) )
{
FLA_Scal_external( beta, C );
return FLA_SUCCESS;
}
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
rs_B = FLA_Obj_row_stride( B );
cs_B = FLA_Obj_col_stride( B );
m_C = FLA_Obj_length( C );
rs_C = FLA_Obj_row_stride( C );
cs_C = FLA_Obj_col_stride( C );
if ( trans == FLA_NO_TRANSPOSE )
k_AB = n_A;
else
k_AB = m_A;
FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
FLA_Param_map_flame_to_blis_trans( trans, &blis_trans );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
float *buff_C = ( float * ) FLA_FLOAT_PTR( C );
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
bl1_ssyr2k( blis_uplo,
blis_trans,
m_C,
k_AB,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE:
{
double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
double *buff_C = ( double * ) FLA_DOUBLE_PTR( C );
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
bl1_dsyr2k( blis_uplo,
blis_trans,
m_C,
k_AB,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_COMPLEX:
{
scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C );
scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
bl1_cher2k( blis_uplo,
blis_trans,
m_C,
k_AB,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
bl1_zher2k( blis_uplo,
blis_trans,
m_C,
k_AB,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Her2k_external_gpu | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| void * | A_gpu, | ||
| FLA_Obj | B, | ||
| void * | B_gpu, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| void * | C_gpu | ||
| ) |
References FLA_Check_error_level(), FLA_Her2k_check(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_trans(), and FLA_Param_map_flame_to_netlib_uplo().
Referenced by FLASH_Queue_exec_task_gpu().
{
FLA_Datatype datatype;
int k_AB;
int m_A, n_A;
int m_C;
int ldim_A;
int ldim_B;
int ldim_C;
char blas_uplo;
char blas_trans;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Her2k_check( uplo, trans, alpha, A, B, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
ldim_A = FLA_Obj_length( A );
ldim_B = FLA_Obj_length( B );
m_C = FLA_Obj_length( C );
ldim_C = FLA_Obj_length( C );
if ( trans == FLA_NO_TRANSPOSE )
k_AB = n_A;
else
k_AB = m_A;
FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
cublasSsyr2k( blas_uplo,
blas_trans,
m_C,
k_AB,
*buff_alpha,
( float * ) A_gpu, ldim_A,
( float * ) B_gpu, ldim_B,
*buff_beta,
( float * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE:
{
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
cublasDsyr2k( blas_uplo,
blas_trans,
m_C,
k_AB,
*buff_alpha,
( double * ) A_gpu, ldim_A,
( double * ) B_gpu, ldim_B,
*buff_beta,
( double * ) C_gpu, ldim_C );
break;
}
case FLA_COMPLEX:
{
cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
cublasCher2k( blas_uplo,
blas_trans,
m_C,
k_AB,
*buff_alpha,
( cuComplex * ) A_gpu, ldim_A,
( cuComplex * ) B_gpu, ldim_B,
*buff_beta,
( cuComplex * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
cublasZher2k( blas_uplo,
blas_trans,
m_C,
k_AB,
*buff_alpha,
( cuDoubleComplex * ) A_gpu, ldim_A,
( cuDoubleComplex * ) B_gpu, ldim_B,
*buff_beta,
( cuDoubleComplex * ) C_gpu, ldim_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Her2k_internal_check | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_her2k_t * | cntl | ||
| ) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Her2k_internal().
{
FLA_Error e_val;
// Abort if the control structure is NULL.
e_val = FLA_Check_null_pointer( ( void* ) cntl );
FLA_Check_error_code( e_val );
// Verify that the object element types are identical.
e_val = FLA_Check_identical_object_elemtype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_elemtype( A, C );
FLA_Check_error_code( e_val );
// Verify conformality between all the objects. This check works regardless
// of whether the element type is FLA_MATRIX or FLA_SCALAR because the
// element length and width are used instead of scalar length and width.
if ( trans == FLA_NO_TRANSPOSE )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Her2k_lh_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_her2k_t * | cntl | ||
| ) |
References FLA_Her2k_external().
Referenced by FLA_Her2k_lh().
{
return FLA_Her2k_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Her2k_ln_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_her2k_t * | cntl | ||
| ) |
References FLA_Her2k_external().
Referenced by FLA_Her2k_ln().
{
return FLA_Her2k_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Her2k_task | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_her2k_t * | cntl | ||
| ) |
References FLA_Her2k_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{
return FLA_Her2k_external( uplo, trans, alpha, A, B, beta, C );
}
| FLA_Error FLA_Her2k_uh_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_her2k_t * | cntl | ||
| ) |
References FLA_Her2k_external().
Referenced by FLA_Her2k_uh().
{
return FLA_Her2k_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Her2k_un_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_her2k_t * | cntl | ||
| ) |
References FLA_Her2k_external().
Referenced by FLA_Her2k_un().
{
return FLA_Her2k_external( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Herk | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_error_level(), FLA_Herk_check(), FLA_Herk_external(), and FLA_Herk_internal().
{
FLA_Error r_val = FLA_SUCCESS;
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Herk_check( uplo, trans, alpha, A, beta, C );
#ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES
r_val = FLA_Herk_internal( uplo, trans, alpha, A, beta, C, fla_herk_cntl_mm );
#else
r_val = FLA_Herk_external( uplo, trans, alpha, A, beta, C );
#endif
return r_val;
}
| FLA_Error FLA_Herk_check | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_identical_object_datatype(), FLA_Check_identical_object_precision(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_real_object(), FLA_Check_square(), FLA_Check_valid_complex_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Herk(), FLA_Herk_external(), FLA_Herk_external_gpu(), and FLASH_Herk().
{
FLA_Error e_val;
e_val = FLA_Check_valid_uplo( uplo );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_complex_trans( trans );
FLA_Check_error_code( e_val );
e_val = FLA_Check_nonconstant_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_real_object( alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_real_object( beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, C );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_precision( A, alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_precision( A, beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_square( C );
FLA_Check_error_code( e_val );
if ( trans == FLA_NO_TRANSPOSE )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, A, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, A, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Herk_external | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References bl1_cherk(), bl1_dsyrk(), bl1_ssyrk(), bl1_zherk(), FLA_Check_error_level(), FLA_Herk_check(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Scal_external().
Referenced by FLA_Herk(), FLA_Herk_lh_task(), FLA_Herk_ln_task(), FLA_Herk_task(), FLA_Herk_uh_task(), FLA_Herk_un_task(), FLA_Random_spd_matrix(), and FLA_UDdate_UT_unb_var1().
{
FLA_Datatype datatype;
int k_A;
int m_A, n_A;
int m_C;
int rs_A, cs_A;
int rs_C, cs_C;
uplo1_t blis_uplo;
trans1_t blis_trans;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Herk_check( uplo, trans, alpha, A, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
if ( FLA_Obj_has_zero_dim( A ) )
{
FLA_Scal_external( beta, C );
return FLA_SUCCESS;
}
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
m_C = FLA_Obj_length( C );
rs_C = FLA_Obj_row_stride( C );
cs_C = FLA_Obj_col_stride( C );
if ( trans == FLA_NO_TRANSPOSE )
k_A = n_A;
else
k_A = m_A;
FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
FLA_Param_map_flame_to_blis_trans( trans, &blis_trans );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
float *buff_C = ( float * ) FLA_FLOAT_PTR( C );
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
bl1_ssyrk( blis_uplo,
blis_trans,
m_C,
k_A,
buff_alpha,
buff_A, rs_A, cs_A,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE:
{
double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
double *buff_C = ( double * ) FLA_DOUBLE_PTR( C );
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
bl1_dsyrk( blis_uplo,
blis_trans,
m_C,
k_A,
buff_alpha,
buff_A, rs_A, cs_A,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_COMPLEX:
{
scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C );
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
bl1_cherk( blis_uplo,
blis_trans,
m_C,
k_A,
buff_alpha,
buff_A, rs_A, cs_A,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
bl1_zherk( blis_uplo,
blis_trans,
m_C,
k_A,
buff_alpha,
buff_A, rs_A, cs_A,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Herk_external_gpu | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| void * | A_gpu, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| void * | C_gpu | ||
| ) |
References FLA_Check_error_level(), FLA_Herk_check(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_trans(), and FLA_Param_map_flame_to_netlib_uplo().
Referenced by FLASH_Queue_exec_task_gpu().
{
FLA_Datatype datatype;
int k_A;
int m_A, n_A;
int m_C;
int ldim_A;
int ldim_C;
char blas_uplo;
char blas_trans;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Herk_check( uplo, trans, alpha, A, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
ldim_A = FLA_Obj_length( A );
m_C = FLA_Obj_length( C );
ldim_C = FLA_Obj_length( C );
if ( trans == FLA_NO_TRANSPOSE )
k_A = n_A;
else
k_A = m_A;
FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
cublasSsyrk( blas_uplo,
blas_trans,
m_C,
k_A,
*buff_alpha,
( float * ) A_gpu, ldim_A,
*buff_beta,
( float * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE:
{
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
cublasDsyrk( blas_uplo,
blas_trans,
m_C,
k_A,
*buff_alpha,
( double * ) A_gpu, ldim_A,
*buff_beta,
( double * ) C_gpu, ldim_C );
break;
}
case FLA_COMPLEX:
{
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
cublasCherk( blas_uplo,
blas_trans,
m_C,
k_A,
*buff_alpha,
( cuComplex * ) A_gpu, ldim_A,
*buff_beta,
( cuComplex * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
cublasZherk( blas_uplo,
blas_trans,
m_C,
k_A,
*buff_alpha,
( cuDoubleComplex * ) A_gpu, ldim_A,
*buff_beta,
( cuDoubleComplex * ) C_gpu, ldim_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Herk_internal_check | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_herk_t * | cntl | ||
| ) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Herk_internal().
{
FLA_Error e_val;
// Abort if the control structure is NULL.
e_val = FLA_Check_null_pointer( ( void* ) cntl );
FLA_Check_error_code( e_val );
// Verify that the object element types are identical.
e_val = FLA_Check_identical_object_elemtype( A, C );
FLA_Check_error_code( e_val );
// Verify conformality between all the objects. This check works regardless
// of whether the element type is FLA_MATRIX or FLA_SCALAR because the
// element length and width are used instead of scalar length and width.
if ( trans == FLA_NO_TRANSPOSE )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, A, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, A, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Herk_lh_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_herk_t * | cntl | ||
| ) |
References FLA_Herk_external().
Referenced by FLA_Herk_lh().
{
return FLA_Herk_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, alpha, A, beta, C );
}
| FLA_Error FLA_Herk_ln_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_herk_t * | cntl | ||
| ) |
References FLA_Herk_external().
Referenced by FLA_Herk_ln().
{
return FLA_Herk_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, beta, C );
}
| FLA_Error FLA_Herk_task | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_herk_t * | cntl | ||
| ) |
References FLA_Herk_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{
return FLA_Herk_external( uplo, trans, alpha, A, beta, C );
}
| FLA_Error FLA_Herk_uh_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_herk_t * | cntl | ||
| ) |
References FLA_Herk_external().
Referenced by FLA_Herk_uh().
{
return FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, alpha, A, beta, C );
}
| FLA_Error FLA_Herk_un_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_herk_t * | cntl | ||
| ) |
References FLA_Herk_external().
Referenced by FLA_Herk_un().
{
return FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, beta, C );
}
| FLA_Error FLA_Symm | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_error_level(), FLA_Symm_check(), FLA_Symm_external(), and FLA_Symm_internal().
{
FLA_Error r_val = FLA_SUCCESS;
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Symm_check( side, uplo, alpha, A, B, beta, C );
#ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES
r_val = FLA_Symm_internal( side, uplo, alpha, A, B, beta, C, fla_symm_cntl_mm );
#else
r_val = FLA_Symm_external( side, uplo, alpha, A, B, beta, C );
#endif
return r_val;
}
| FLA_Error FLA_Symm_check | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_leftright_side(), and FLA_Check_valid_uplo().
Referenced by FLA_Symm(), FLA_Symm_external(), FLA_Symm_external_gpu(), and FLASH_Symm().
{
FLA_Error e_val;
e_val = FLA_Check_valid_leftright_side( side );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_uplo( uplo );
FLA_Check_error_code( e_val );
e_val = FLA_Check_floating_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_nonconstant_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, C );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_square( A );
FLA_Check_error_code( e_val );
if ( side == FLA_LEFT )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Symm_external | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References bl1_csymm(), bl1_dsymm(), bl1_ssymm(), bl1_zsymm(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_side(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Symm_check().
Referenced by FLA_Symm(), FLA_Symm_ll_task(), FLA_Symm_lu_task(), FLA_Symm_rl_task(), FLA_Symm_ru_task(), and FLA_Symm_task().
{
FLA_Datatype datatype;
int m_C, n_C;
int rs_A, cs_A;
int rs_B, cs_B;
int rs_C, cs_C;
side1_t blis_side;
uplo1_t blis_uplo;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Symm_check( side, uplo, alpha, A, B, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
rs_B = FLA_Obj_row_stride( B );
cs_B = FLA_Obj_col_stride( B );
m_C = FLA_Obj_length( C );
n_C = FLA_Obj_width( C );
rs_C = FLA_Obj_row_stride( C );
cs_C = FLA_Obj_col_stride( C );
FLA_Param_map_flame_to_blis_side( side, &blis_side );
FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
float *buff_C = ( float * ) FLA_FLOAT_PTR( C );
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
bl1_ssymm( blis_side,
blis_uplo,
m_C,
n_C,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE:
{
double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
double *buff_C = ( double * ) FLA_DOUBLE_PTR( C );
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
bl1_dsymm( blis_side,
blis_uplo,
m_C,
n_C,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_COMPLEX:
{
scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C );
scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta );
bl1_csymm( blis_side,
blis_uplo,
m_C,
n_C,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
bl1_zsymm( blis_side,
blis_uplo,
m_C,
n_C,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Symm_external_gpu | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| void * | A_gpu, | ||
| FLA_Obj | B, | ||
| void * | B_gpu, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| void * | C_gpu | ||
| ) |
References FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_side(), FLA_Param_map_flame_to_netlib_uplo(), and FLA_Symm_check().
Referenced by FLASH_Queue_exec_task_gpu().
{
FLA_Datatype datatype;
int m_C, n_C;
int ldim_A;
int ldim_B;
int ldim_C;
char blas_side;
char blas_uplo;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Symm_check( side, uplo, alpha, A, B, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
ldim_A = FLA_Obj_length( A );
ldim_B = FLA_Obj_length( B );
m_C = FLA_Obj_length( C );
n_C = FLA_Obj_width( C );
ldim_C = FLA_Obj_length( C );
FLA_Param_map_flame_to_netlib_side( side, &blas_side );
FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
cublasSsymm( blas_side,
blas_uplo,
m_C,
n_C,
*buff_alpha,
( float * ) A_gpu, ldim_A,
( float * ) B_gpu, ldim_B,
*buff_beta,
( float * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE:
{
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
cublasDsymm( blas_side,
blas_uplo,
m_C,
n_C,
*buff_alpha,
( double * ) A_gpu, ldim_A,
( double * ) B_gpu, ldim_B,
*buff_beta,
( double * ) C_gpu, ldim_C );
break;
}
case FLA_COMPLEX:
{
cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha );
cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta );
cublasCsymm( blas_side,
blas_uplo,
m_C,
n_C,
*buff_alpha,
( cuComplex * ) A_gpu, ldim_A,
( cuComplex * ) B_gpu, ldim_B,
*buff_beta,
( cuComplex * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
cublasZsymm( blas_side,
blas_uplo,
m_C,
n_C,
*buff_alpha,
( cuDoubleComplex * ) A_gpu, ldim_A,
( cuDoubleComplex * ) B_gpu, ldim_B,
*buff_beta,
( cuDoubleComplex * ) C_gpu, ldim_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Symm_internal_check | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_symm_t * | cntl | ||
| ) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Symm_internal().
{
FLA_Error e_val;
// Abort if the control structure is NULL.
e_val = FLA_Check_null_pointer( ( void* ) cntl );
FLA_Check_error_code( e_val );
// Verify that the object element types are identical.
e_val = FLA_Check_identical_object_elemtype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_elemtype( A, C );
FLA_Check_error_code( e_val );
// Verify conformality between all the objects. This check works regardless
// of whether the element type is FLA_MATRIX or FLA_SCALAR because the
// element length and width are used instead of scalar length and width.
if ( side == FLA_LEFT )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Symm_ll_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_symm_t * | cntl | ||
| ) |
References FLA_Symm_external().
Referenced by FLA_Symm_ll().
{
return FLA_Symm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, alpha, A, B, beta, C );
}
| FLA_Error FLA_Symm_lu_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_symm_t * | cntl | ||
| ) |
References FLA_Symm_external().
Referenced by FLA_Symm_lu().
{
return FLA_Symm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, alpha, A, B, beta, C );
}
| FLA_Error FLA_Symm_rl_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_symm_t * | cntl | ||
| ) |
References FLA_Symm_external().
Referenced by FLA_Symm_rl().
{
return FLA_Symm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, alpha, A, B, beta, C );
}
| FLA_Error FLA_Symm_ru_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_symm_t * | cntl | ||
| ) |
References FLA_Symm_external().
Referenced by FLA_Symm_ru().
{
return FLA_Symm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, alpha, A, B, beta, C );
}
| FLA_Error FLA_Symm_task | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_symm_t * | cntl | ||
| ) |
References FLA_Symm_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{
return FLA_Symm_external( side, uplo, alpha, A, B, beta, C );
}
| FLA_Error FLA_Syr2k | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_error_level(), FLA_Syr2k_check(), FLA_Syr2k_external(), and FLA_Syr2k_internal().
{
FLA_Error r_val = FLA_SUCCESS;
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Syr2k_check( uplo, trans, alpha, A, B, beta, C );
#ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES
r_val = FLA_Syr2k_internal( uplo, trans, alpha, A, B, beta, C, fla_syr2k_cntl_mm );
#else
r_val = FLA_Syr2k_external( uplo, trans, alpha, A, B, beta, C );
#endif
return r_val;
}
| FLA_Error FLA_Syr2k_check | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_real_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Syr2k(), FLA_Syr2k_external(), FLA_Syr2k_external_gpu(), and FLASH_Syr2k().
{
FLA_Error e_val;
e_val = FLA_Check_valid_uplo( uplo );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_real_trans( trans );
FLA_Check_error_code( e_val );
e_val = FLA_Check_floating_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_nonconstant_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, C );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_square( C );
FLA_Check_error_code( e_val );
if ( trans == FLA_NO_TRANSPOSE )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Syr2k_external | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References bl1_csyr2k(), bl1_dsyr2k(), bl1_ssyr2k(), bl1_zsyr2k(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), FLA_Scal_external(), and FLA_Syr2k_check().
Referenced by FLA_Syr2k(), FLA_Syr2k_ln_task(), FLA_Syr2k_lt_task(), FLA_Syr2k_task(), FLA_Syr2k_un_task(), and FLA_Syr2k_ut_task().
{
FLA_Datatype datatype;
int k_AB;
int m_A, n_A;
int m_C;
int rs_A, cs_A;
int rs_B, cs_B;
int rs_C, cs_C;
uplo1_t blis_uplo;
trans1_t blis_trans;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Syr2k_check( uplo, trans, alpha, A, B, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) )
{
FLA_Scal_external( beta, C );
return FLA_SUCCESS;
}
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
rs_B = FLA_Obj_row_stride( B );
cs_B = FLA_Obj_col_stride( B );
m_C = FLA_Obj_length( C );
rs_C = FLA_Obj_row_stride( C );
cs_C = FLA_Obj_col_stride( C );
if ( trans == FLA_NO_TRANSPOSE )
k_AB = n_A;
else
k_AB = m_A;
FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
FLA_Param_map_flame_to_blis_trans( trans, &blis_trans );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
float *buff_C = ( float * ) FLA_FLOAT_PTR( C );
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
bl1_ssyr2k( blis_uplo,
blis_trans,
m_C,
k_AB,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE:
{
double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
double *buff_C = ( double * ) FLA_DOUBLE_PTR( C );
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
bl1_dsyr2k( blis_uplo,
blis_trans,
m_C,
k_AB,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_COMPLEX:
{
scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C );
scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta );
bl1_csyr2k( blis_uplo,
blis_trans,
m_C,
k_AB,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
bl1_zsyr2k( blis_uplo,
blis_trans,
m_C,
k_AB,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Syr2k_external_gpu | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| void * | A_gpu, | ||
| FLA_Obj | B, | ||
| void * | B_gpu, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| void * | C_gpu | ||
| ) |
References FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_trans(), FLA_Param_map_flame_to_netlib_uplo(), and FLA_Syr2k_check().
Referenced by FLASH_Queue_exec_task_gpu().
{
FLA_Datatype datatype;
int k_AB;
int m_A, n_A;
int m_C;
int ldim_A;
int ldim_B;
int ldim_C;
char blas_uplo;
char blas_trans;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Syr2k_check( uplo, trans, alpha, A, B, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
ldim_A = FLA_Obj_length( A );
ldim_B = FLA_Obj_length( B );
m_C = FLA_Obj_length( C );
ldim_C = FLA_Obj_length( C );
if ( trans == FLA_NO_TRANSPOSE )
k_AB = n_A;
else
k_AB = m_A;
FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
cublasSsyr2k( blas_uplo,
blas_trans,
m_C,
k_AB,
*buff_alpha,
( float * ) A_gpu, ldim_A,
( float * ) B_gpu, ldim_B,
*buff_beta,
( float * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE:
{
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
cublasDsyr2k( blas_uplo,
blas_trans,
m_C,
k_AB,
*buff_alpha,
( double * ) A_gpu, ldim_A,
( double * ) B_gpu, ldim_B,
*buff_beta,
( double * ) C_gpu, ldim_C );
break;
}
case FLA_COMPLEX:
{
cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha );
cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta );
cublasCsyr2k( blas_uplo,
blas_trans,
m_C,
k_AB,
*buff_alpha,
( cuComplex * ) A_gpu, ldim_A,
( cuComplex * ) B_gpu, ldim_B,
*buff_beta,
( cuComplex * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
cublasZsyr2k( blas_uplo,
blas_trans,
m_C,
k_AB,
*buff_alpha,
( cuDoubleComplex * ) A_gpu, ldim_A,
( cuDoubleComplex * ) B_gpu, ldim_B,
*buff_beta,
( cuDoubleComplex * ) C_gpu, ldim_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Syr2k_internal_check | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_syr2k_t * | cntl | ||
| ) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Syr2k_internal().
{
FLA_Error e_val;
// Abort if the control structure is NULL.
e_val = FLA_Check_null_pointer( ( void* ) cntl );
FLA_Check_error_code( e_val );
// Verify that the object element types are identical.
e_val = FLA_Check_identical_object_elemtype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_elemtype( A, C );
FLA_Check_error_code( e_val );
// Verify conformality between all the objects. This check works regardless
// of whether the element type is FLA_MATRIX or FLA_SCALAR because the
// element length and width are used instead of scalar length and width.
if ( trans == FLA_NO_TRANSPOSE )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Syr2k_ln_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_syr2k_t * | cntl | ||
| ) |
References FLA_Syr2k_external().
Referenced by FLA_Syr2k_ln().
{
return FLA_Syr2k_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Syr2k_lt_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_syr2k_t * | cntl | ||
| ) |
References FLA_Syr2k_external().
Referenced by FLA_Syr2k_lt().
{
return FLA_Syr2k_external( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Syr2k_task | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_syr2k_t * | cntl | ||
| ) |
References FLA_Syr2k_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{
return FLA_Syr2k_external( uplo, trans, alpha, A, B, beta, C );
}
| FLA_Error FLA_Syr2k_un_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_syr2k_t * | cntl | ||
| ) |
References FLA_Syr2k_external().
Referenced by FLA_Syr2k_un().
{
return FLA_Syr2k_external( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Syr2k_ut_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_syr2k_t * | cntl | ||
| ) |
References FLA_Syr2k_external().
Referenced by FLA_Syr2k_ut().
{
return FLA_Syr2k_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, alpha, A, B, beta, C );
}
| FLA_Error FLA_Syrk | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_error_level(), FLA_Syrk_check(), FLA_Syrk_external(), and FLA_Syrk_internal().
{
FLA_Error r_val = FLA_SUCCESS;
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Syrk_check( uplo, trans, alpha, A, beta, C );
#ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES
r_val = FLA_Syrk_internal( uplo, trans, alpha, A, beta, C, fla_syrk_cntl_mm );
#else
r_val = FLA_Syrk_external( uplo, trans, alpha, A, beta, C );
#endif
return r_val;
}
| FLA_Error FLA_Syrk_check | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_real_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Syrk(), FLA_Syrk_external(), FLA_Syrk_external_gpu(), and FLASH_Syrk().
{
FLA_Error e_val;
e_val = FLA_Check_valid_uplo( uplo );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_real_trans( trans );
FLA_Check_error_code( e_val );
e_val = FLA_Check_floating_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_nonconstant_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, C );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_square( C );
FLA_Check_error_code( e_val );
if ( trans == FLA_NO_TRANSPOSE )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, A, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, A, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Syrk_external | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References bl1_csyrk(), bl1_dsyrk(), bl1_ssyrk(), bl1_zsyrk(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), FLA_Scal_external(), and FLA_Syrk_check().
Referenced by FLA_Syrk(), FLA_Syrk_ln_task(), FLA_Syrk_lt_task(), FLA_Syrk_task(), FLA_Syrk_un_task(), and FLA_Syrk_ut_task().
{
FLA_Datatype datatype;
int k_A;
int m_A, n_A;
int m_C;
int rs_A, cs_A;
int rs_C, cs_C;
uplo1_t blis_uplo;
trans1_t blis_trans;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Syrk_check( uplo, trans, alpha, A, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
if ( FLA_Obj_has_zero_dim( A ) )
{
FLA_Scal_external( beta, C );
return FLA_SUCCESS;
}
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
m_C = FLA_Obj_length( C );
rs_C = FLA_Obj_row_stride( C );
cs_C = FLA_Obj_col_stride( C );
if ( trans == FLA_NO_TRANSPOSE )
k_A = n_A;
else
k_A = m_A;
FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
FLA_Param_map_flame_to_blis_trans( trans, &blis_trans );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
float *buff_C = ( float * ) FLA_FLOAT_PTR( C );
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
bl1_ssyrk( blis_uplo,
blis_trans,
m_C,
k_A,
buff_alpha,
buff_A, rs_A, cs_A,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE:
{
double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
double *buff_C = ( double * ) FLA_DOUBLE_PTR( C );
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
bl1_dsyrk( blis_uplo,
blis_trans,
m_C,
k_A,
buff_alpha,
buff_A, rs_A, cs_A,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_COMPLEX:
{
scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C );
scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta );
bl1_csyrk( blis_uplo,
blis_trans,
m_C,
k_A,
buff_alpha,
buff_A, rs_A, cs_A,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
bl1_zsyrk( blis_uplo,
blis_trans,
m_C,
k_A,
buff_alpha,
buff_A, rs_A, cs_A,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Syrk_external_gpu | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| void * | A_gpu, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| void * | C_gpu | ||
| ) |
References FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_trans(), FLA_Param_map_flame_to_netlib_uplo(), and FLA_Syrk_check().
Referenced by FLASH_Queue_exec_task_gpu().
{
FLA_Datatype datatype;
int k_A;
int m_A, n_A;
int m_C;
int ldim_A;
int ldim_C;
char blas_uplo;
char blas_trans;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Syrk_check( uplo, trans, alpha, A, beta, C );
if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
ldim_A = FLA_Obj_length( A );
m_C = FLA_Obj_length( C );
ldim_C = FLA_Obj_length( C );
if ( trans == FLA_NO_TRANSPOSE )
k_A = n_A;
else
k_A = m_A;
FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
cublasSsyrk( blas_uplo,
blas_trans,
m_C,
k_A,
*buff_alpha,
( float * ) A_gpu, ldim_A,
*buff_beta,
( float * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE:
{
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
cublasDsyrk( blas_uplo,
blas_trans,
m_C,
k_A,
*buff_alpha,
( double * ) A_gpu, ldim_A,
*buff_beta,
( double * ) C_gpu, ldim_C );
break;
}
case FLA_COMPLEX:
{
cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha );
cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta );
cublasCsyrk( blas_uplo,
blas_trans,
m_C,
k_A,
*buff_alpha,
( cuComplex * ) A_gpu, ldim_A,
*buff_beta,
( cuComplex * ) C_gpu, ldim_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
cublasZsyrk( blas_uplo,
blas_trans,
m_C,
k_A,
*buff_alpha,
( cuDoubleComplex * ) A_gpu, ldim_A,
*buff_beta,
( cuDoubleComplex * ) C_gpu, ldim_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Syrk_internal_check | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_syrk_t * | cntl | ||
| ) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Syrk_internal().
{
FLA_Error e_val;
// Abort if the control structure is NULL.
e_val = FLA_Check_null_pointer( ( void* ) cntl );
FLA_Check_error_code( e_val );
// Verify that the object element types are identical.
e_val = FLA_Check_identical_object_elemtype( A, C );
FLA_Check_error_code( e_val );
// Verify conformality between all the objects. This check works regardless
// of whether the element type is FLA_MATRIX or FLA_SCALAR because the
// element length and width are used instead of scalar length and width.
if ( trans == FLA_NO_TRANSPOSE )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, A, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, A, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Syrk_ln_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_syrk_t * | cntl | ||
| ) |
References FLA_Syrk_external().
Referenced by FLA_Syrk_ln().
{
return FLA_Syrk_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, beta, C );
}
| FLA_Error FLA_Syrk_lt_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_syrk_t * | cntl | ||
| ) |
References FLA_Syrk_external().
Referenced by FLA_Syrk_lt().
{
return FLA_Syrk_external( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, alpha, A, beta, C );
}
| FLA_Error FLA_Syrk_task | ( | FLA_Uplo | uplo, |
| FLA_Trans | trans, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_syrk_t * | cntl | ||
| ) |
References FLA_Syrk_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{
return FLA_Syrk_external( uplo, trans, alpha, A, beta, C );
}
| FLA_Error FLA_Syrk_un_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_syrk_t * | cntl | ||
| ) |
References FLA_Syrk_external().
Referenced by FLA_Syrk_un().
{
return FLA_Syrk_external( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, beta, C );
}
| FLA_Error FLA_Syrk_ut_task | ( | FLA_Obj | alpha, |
| FLA_Obj | A, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C, | ||
| fla_syrk_t * | cntl | ||
| ) |
References FLA_Syrk_external().
Referenced by FLA_Syrk_ut().
{
return FLA_Syrk_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, alpha, A, beta, C );
}
| FLA_Error FLA_Trmm | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | trans, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B | ||
| ) |
References FLA_Check_error_level(), FLA_Trmm_check(), FLA_Trmm_external(), and FLA_Trmm_internal().
{
FLA_Error r_val = FLA_SUCCESS;
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Trmm_check( side, uplo, trans, diag, alpha, A, B );
#ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES
r_val = FLA_Trmm_internal( side, uplo, trans, diag, alpha, A, B, fla_trmm_cntl_mm );
#else
r_val = FLA_Trmm_external( side, uplo, trans, diag, alpha, A, B );
#endif
return r_val;
}
| FLA_Error FLA_Trmm_check | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | transa, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B | ||
| ) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_diag(), FLA_Check_valid_leftright_side(), FLA_Check_valid_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Trmm(), FLA_Trmm_external(), FLA_Trmm_external_gpu(), and FLASH_Trmm().
{
FLA_Error e_val;
e_val = FLA_Check_valid_leftright_side( side );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_uplo( uplo );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_trans( trans );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_diag( diag );
FLA_Check_error_code( e_val );
e_val = FLA_Check_floating_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_nonconstant_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_square( A );
FLA_Check_error_code( e_val );
if ( side == FLA_LEFT )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, B );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, B );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Trmm_external | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | trans, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B | ||
| ) |
References bl1_ctrmm(), bl1_dtrmm(), bl1_strmm(), bl1_ztrmm(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_diag(), FLA_Param_map_flame_to_blis_side(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Trmm_check().
Referenced by FLA_LQ_UT_blk_var2(), FLA_QR_UT_blk_var2(), FLA_Trmm(), FLA_Trmm_llc_task(), FLA_Trmm_llh_task(), FLA_Trmm_lln_task(), FLA_Trmm_llt_task(), FLA_Trmm_luc_task(), FLA_Trmm_luh_task(), FLA_Trmm_lun_task(), FLA_Trmm_lut_task(), FLA_Trmm_rlc_task(), FLA_Trmm_rlh_task(), FLA_Trmm_rln_task(), FLA_Trmm_rlt_task(), FLA_Trmm_ruc_task(), FLA_Trmm_ruh_task(), FLA_Trmm_run_task(), FLA_Trmm_rut_task(), and FLA_Trmm_task().
{
FLA_Datatype datatype;
int m_B, n_B;
int rs_A, cs_A;
int rs_B, cs_B;
side1_t blis_side;
uplo1_t blis_uplo;
trans1_t blis_trans;
diag1_t blis_diag;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Trmm_check( side, uplo, trans, diag, alpha, A, B );
if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
m_B = FLA_Obj_length( B );
n_B = FLA_Obj_width( B );
rs_B = FLA_Obj_row_stride( B );
cs_B = FLA_Obj_col_stride( B );
FLA_Param_map_flame_to_blis_side( side, &blis_side );
FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
FLA_Param_map_flame_to_blis_trans( trans, &blis_trans );
FLA_Param_map_flame_to_blis_diag( diag, &blis_diag );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
bl1_strmm( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B );
break;
}
case FLA_DOUBLE:
{
double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
bl1_dtrmm( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B );
break;
}
case FLA_COMPLEX:
{
scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
bl1_ctrmm( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
bl1_ztrmm( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Trmm_external_gpu | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | trans, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| void * | A_gpu, | ||
| FLA_Obj | B, | ||
| void * | B_gpu | ||
| ) |
References FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_diag(), FLA_Param_map_flame_to_netlib_side(), FLA_Param_map_flame_to_netlib_trans(), FLA_Param_map_flame_to_netlib_uplo(), and FLA_Trmm_check().
Referenced by FLASH_Queue_exec_task_gpu().
{
FLA_Datatype datatype;
int m_B, n_B;
int ldim_A;
int ldim_B;
char blas_side;
char blas_uplo;
char blas_trans;
char blas_diag;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Trmm_check( side, uplo, trans, diag, alpha, A, B );
if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
ldim_A = FLA_Obj_length( A );
m_B = FLA_Obj_length( B );
n_B = FLA_Obj_width( B );
ldim_B = FLA_Obj_length( B );
FLA_Param_map_flame_to_netlib_side( side, &blas_side );
FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans );
FLA_Param_map_flame_to_netlib_diag( diag, &blas_diag );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
cublasStrmm( blas_side,
blas_uplo,
blas_trans,
blas_diag,
m_B,
n_B,
*buff_alpha,
( float * ) A_gpu, ldim_A,
( float * ) B_gpu, ldim_B );
break;
}
case FLA_DOUBLE:
{
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
cublasDtrmm( blas_side,
blas_uplo,
blas_trans,
blas_diag,
m_B,
n_B,
*buff_alpha,
( double * ) A_gpu, ldim_A,
( double * ) B_gpu, ldim_B );
break;
}
case FLA_COMPLEX:
{
cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha );
cublasCtrmm( blas_side,
blas_uplo,
blas_trans,
blas_diag,
m_B,
n_B,
*buff_alpha,
( cuComplex * ) A_gpu, ldim_A,
( cuComplex * ) B_gpu, ldim_B );
break;
}
case FLA_DOUBLE_COMPLEX:
{
cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
cublasZtrmm( blas_side,
blas_uplo,
blas_trans,
blas_diag,
m_B,
n_B,
*buff_alpha,
( cuDoubleComplex * ) A_gpu, ldim_A,
( cuDoubleComplex * ) B_gpu, ldim_B );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Trmm_internal_check | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | trans, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Trmm_internal().
{
FLA_Error e_val;
// Abort if the control structure is NULL.
e_val = FLA_Check_null_pointer( ( void* ) cntl );
FLA_Check_error_code( e_val );
// Verify that the object element types are identical.
e_val = FLA_Check_identical_object_elemtype( A, B );
FLA_Check_error_code( e_val );
// Verify conformality between all the objects. This check works regardless
// of whether the element type is FLA_MATRIX or FLA_SCALAR because the
// element length and width are used instead of scalar length and width.
if ( side == FLA_LEFT )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, B );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, B );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Trmm_llc_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_llc().
{
return FLA_Trmm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_llh_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_llh().
{
return FLA_Trmm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_lln_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_lln().
{
return FLA_Trmm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_llt_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_llt().
{
return FLA_Trmm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_luc_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_luc().
{
return FLA_Trmm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_luh_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_luh().
{
return FLA_Trmm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_lun_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_lun().
{
return FLA_Trmm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_lut_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_lut().
{
return FLA_Trmm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_rlc_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_rlc().
{
return FLA_Trmm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_rlh_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_rlh().
{
return FLA_Trmm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_rln_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_rln().
{
return FLA_Trmm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_rlt_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_rlt().
{
return FLA_Trmm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_ruc_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_ruc().
{
return FLA_Trmm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_ruh_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_ruh().
{
return FLA_Trmm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_run_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_run().
{
return FLA_Trmm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_rut_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_rut().
{
return FLA_Trmm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trmm_task | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | trans, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trmm_t * | cntl | ||
| ) |
References FLA_Trmm_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{
return FLA_Trmm_external( side, uplo, trans, diag, alpha, A, B );
}
| FLA_Error FLA_Trmmsx | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | transa, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Trmmsx_external().
{
return FLA_Trmmsx_external( side, uplo, trans, diag, alpha, A, B, beta, C );
}
| FLA_Error FLA_Trmmsx_check | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | transa, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_diag(), FLA_Check_valid_leftright_side(), FLA_Check_valid_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Trmmsx_external().
{
FLA_Error e_val;
e_val = FLA_Check_valid_leftright_side( side );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_uplo( uplo );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_trans( trans );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_diag( diag );
FLA_Check_error_code( e_val );
e_val = FLA_Check_floating_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_nonconstant_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, C );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_square( A );
FLA_Check_error_code( e_val );
if ( side == FLA_LEFT )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Trmmsx_external | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | transa, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References bl1_ctrmmsx(), bl1_dtrmmsx(), bl1_strmmsx(), bl1_ztrmmsx(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_diag(), FLA_Param_map_flame_to_blis_side(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Trmmsx_check().
Referenced by FLA_Trmmsx().
{
FLA_Datatype datatype;
int m_B, n_B;
int rs_A, cs_A;
int rs_B, cs_B;
int rs_C, cs_C;
side1_t blis_side;
uplo1_t blis_uplo;
trans1_t blis_trans;
diag1_t blis_diag;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Trmmsx_check( side, uplo, trans, diag, alpha, A, B, beta, C );
if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
m_B = FLA_Obj_length( B );
n_B = FLA_Obj_width( B );
rs_B = FLA_Obj_row_stride( B );
cs_B = FLA_Obj_col_stride( B );
rs_C = FLA_Obj_row_stride( C );
cs_C = FLA_Obj_col_stride( C );
FLA_Param_map_flame_to_blis_side( side, &blis_side );
FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
FLA_Param_map_flame_to_blis_trans( trans, &blis_trans );
FLA_Param_map_flame_to_blis_diag( diag, &blis_diag );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
float *buff_C = ( float * ) FLA_FLOAT_PTR( C );
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
bl1_strmmsx( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE:
{
double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
double *buff_C = ( double * ) FLA_DOUBLE_PTR( C );
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
bl1_dtrmmsx( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_COMPLEX:
{
scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C );
scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta );
bl1_ctrmmsx( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
bl1_ztrmmsx( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Trsm | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | trans, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B | ||
| ) |
References FLA_Check_error_level(), FLA_Trsm_check(), FLA_Trsm_external(), and FLA_Trsm_internal().
{
FLA_Error r_val = FLA_SUCCESS;
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Trsm_check( side, uplo, trans, diag, alpha, A, B );
#ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES
r_val = FLA_Trsm_internal( side, uplo, trans, diag, alpha, A, B, fla_trsm_cntl_mm );
#else
r_val = FLA_Trsm_external( side, uplo, trans, diag, alpha, A, B );
#endif
return r_val;
}
| FLA_Error FLA_Trsm_check | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | transa, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B | ||
| ) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_diag(), FLA_Check_valid_leftright_side(), FLA_Check_valid_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Trsm(), FLA_Trsm_external(), FLA_Trsm_external_gpu(), and FLASH_Trsm().
{
FLA_Error e_val;
e_val = FLA_Check_valid_leftright_side( side );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_uplo( uplo );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_trans( trans );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_diag( diag );
FLA_Check_error_code( e_val );
e_val = FLA_Check_floating_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_nonconstant_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_square( A );
FLA_Check_error_code( e_val );
if ( side == FLA_LEFT )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, B );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, B );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Trsm_external | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | trans, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B | ||
| ) |
References bl1_ctrsm(), bl1_dtrsm(), bl1_strsm(), bl1_ztrsm(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_diag(), FLA_Param_map_flame_to_blis_side(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Trsm_check().
Referenced by FLA_Chol_solve(), FLA_Hess_UT_blf_var2(), FLA_Hess_UT_blf_var3(), FLA_Hess_UT_blf_var4(), FLA_Hess_UT_blk_var1(), FLA_Hess_UT_blk_var2(), FLA_Hess_UT_blk_var3(), FLA_Hess_UT_blk_var4(), FLA_Hess_UT_blk_var5(), FLA_LQ_UT_solve(), FLA_LU_nopiv_blk_var1(), FLA_LU_nopiv_blk_var2(), FLA_LU_nopiv_blk_var3(), FLA_LU_nopiv_solve(), FLA_LU_nopiv_unb_var1(), FLA_LU_nopiv_unb_var2(), FLA_LU_nopiv_unb_var3(), FLA_LU_piv_blk_var3(), FLA_LU_piv_solve(), FLA_LU_piv_unb_var3(), FLA_LU_piv_unb_var3b(), FLA_QR_UT_solve(), FLA_SA_FS_blk(), FLA_SA_LU_blk(), FLA_Trsm(), FLA_Trsm_llc_task(), FLA_Trsm_llh_task(), FLA_Trsm_lln_task(), FLA_Trsm_llt_task(), FLA_Trsm_luc_task(), FLA_Trsm_luh_task(), FLA_Trsm_lun_task(), FLA_Trsm_lut_task(), FLA_Trsm_piv_task(), FLA_Trsm_rlc_task(), FLA_Trsm_rlh_task(), FLA_Trsm_rln_task(), FLA_Trsm_rlt_task(), FLA_Trsm_ruc_task(), FLA_Trsm_ruh_task(), FLA_Trsm_run_task(), FLA_Trsm_rut_task(), FLA_Trsm_task(), and FLA_UDdate_UT_solve().
{
FLA_Datatype datatype;
int m_B, n_B;
int rs_A, cs_A;
int rs_B, cs_B;
side1_t blis_side;
uplo1_t blis_uplo;
trans1_t blis_trans;
diag1_t blis_diag;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Trsm_check( side, uplo, trans, diag, alpha, A, B );
if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
m_B = FLA_Obj_length( B );
n_B = FLA_Obj_width( B );
rs_B = FLA_Obj_row_stride( B );
cs_B = FLA_Obj_col_stride( B );
FLA_Param_map_flame_to_blis_side( side, &blis_side );
FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
FLA_Param_map_flame_to_blis_trans( trans, &blis_trans );
FLA_Param_map_flame_to_blis_diag( diag, &blis_diag );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
bl1_strsm( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B );
break;
}
case FLA_DOUBLE:
{
double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
bl1_dtrsm( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B );
break;
}
case FLA_COMPLEX:
{
scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
bl1_ctrsm( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
bl1_ztrsm( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Trsm_external_gpu | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | trans, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| void * | A_gpu, | ||
| FLA_Obj | B, | ||
| void * | B_gpu | ||
| ) |
References FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_diag(), FLA_Param_map_flame_to_netlib_side(), FLA_Param_map_flame_to_netlib_trans(), FLA_Param_map_flame_to_netlib_uplo(), and FLA_Trsm_check().
Referenced by FLASH_Queue_exec_task_gpu().
{
FLA_Datatype datatype;
int m_B, n_B;
int ldim_A;
int ldim_B;
char blas_side;
char blas_uplo;
char blas_trans;
char blas_diag;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Trsm_check( side, uplo, trans, diag, alpha, A, B );
if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
ldim_A = FLA_Obj_length( A );
m_B = FLA_Obj_length( B );
n_B = FLA_Obj_width( B );
ldim_B = FLA_Obj_length( B );
FLA_Param_map_flame_to_netlib_side( side, &blas_side );
FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans );
FLA_Param_map_flame_to_netlib_diag( diag, &blas_diag );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
cublasStrsm( blas_side,
blas_uplo,
blas_trans,
blas_diag,
m_B,
n_B,
*buff_alpha,
( float * ) A_gpu, ldim_A,
( float * ) B_gpu, ldim_B );
break;
}
case FLA_DOUBLE:
{
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
cublasDtrsm( blas_side,
blas_uplo,
blas_trans,
blas_diag,
m_B,
n_B,
*buff_alpha,
( double * ) A_gpu, ldim_A,
( double * ) B_gpu, ldim_B );
break;
}
case FLA_COMPLEX:
{
cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha );
cublasCtrsm( blas_side,
blas_uplo,
blas_trans,
blas_diag,
m_B,
n_B,
*buff_alpha,
( cuComplex * ) A_gpu, ldim_A,
( cuComplex * ) B_gpu, ldim_B );
break;
}
case FLA_DOUBLE_COMPLEX:
{
cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
cublasZtrsm( blas_side,
blas_uplo,
blas_trans,
blas_diag,
m_B,
n_B,
*buff_alpha,
( cuDoubleComplex * ) A_gpu, ldim_A,
( cuDoubleComplex * ) B_gpu, ldim_B );
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Trsm_internal_check | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | trans, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), FLA_Check_null_pointer(), FLA_Check_object_length_equals(), and FLA_Obj_length().
Referenced by FLA_Trsm_internal().
{
FLA_Error e_val;
// Abort if the control structure is NULL.
e_val = FLA_Check_null_pointer( ( void* ) cntl );
FLA_Check_error_code( e_val );
// Verify that the object element types are identical.
e_val = FLA_Check_identical_object_elemtype( A, B );
FLA_Check_error_code( e_val );
// Verify conformality between all the objects. This check works regardless
// of whether the element type is FLA_MATRIX or FLA_SCALAR because the
// element length and width are used instead of scalar length and width.
if ( side == FLA_LEFT )
{
//e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, B );
//FLA_Check_error_code( e_val );
e_val = FLA_Check_object_length_equals( A, FLA_Obj_length( B ) );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, B );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Trsm_llc_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_llc().
{
return FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_llh_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_llh().
{
return FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_lln_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_lln().
{
return FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_llt_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_llt().
{
return FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_luc_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_luc().
{
return FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_luh_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_luh().
{
return FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_lun_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_lun().
{
return FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_lut_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_lut().
{
return FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_rlc_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_rlc().
{
return FLA_Trsm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_rlh_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_rlh().
{
return FLA_Trsm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_rln_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_rln().
{
return FLA_Trsm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_rlt_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_rlt().
{
return FLA_Trsm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_ruc_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_ruc().
{
return FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_ruh_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_ruh().
{
return FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_run_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_run().
{
return FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_rut_task | ( | FLA_Diag | diag, |
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_rut().
{
return FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B );
}
| FLA_Error FLA_Trsm_task | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | trans, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| fla_trsm_t * | cntl | ||
| ) |
References FLA_Trsm_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{
return FLA_Trsm_external( side, uplo, trans, diag, alpha, A, B );
}
| FLA_Error FLA_Trsmsx | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | transa, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Trsmsx_external().
{
return FLA_Trsmsx_external( side, uplo, trans, diag, alpha, A, B, beta, C );
}
| FLA_Error FLA_Trsmsx_check | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | transa, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_diag(), FLA_Check_valid_leftright_side(), FLA_Check_valid_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Trsmsx_external().
{
FLA_Error e_val;
e_val = FLA_Check_valid_leftright_side( side );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_uplo( uplo );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_trans( trans );
FLA_Check_error_code( e_val );
e_val = FLA_Check_valid_diag( diag );
FLA_Check_error_code( e_val );
e_val = FLA_Check_floating_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_nonconstant_object( A );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, B );
FLA_Check_error_code( e_val );
e_val = FLA_Check_identical_object_datatype( A, C );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_consistent_object_datatype( A, beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( alpha );
FLA_Check_error_code( e_val );
e_val = FLA_Check_if_scalar( beta );
FLA_Check_error_code( e_val );
e_val = FLA_Check_square( A );
FLA_Check_error_code( e_val );
if ( side == FLA_LEFT )
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C );
FLA_Check_error_code( e_val );
}
else
{
e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, C );
FLA_Check_error_code( e_val );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Trsmsx_external | ( | FLA_Side | side, |
| FLA_Uplo | uplo, | ||
| FLA_Trans | transa, | ||
| FLA_Diag | diag, | ||
| FLA_Obj | alpha, | ||
| FLA_Obj | A, | ||
| FLA_Obj | B, | ||
| FLA_Obj | beta, | ||
| FLA_Obj | C | ||
| ) |
References bl1_ctrsmsx(), bl1_dtrsmsx(), bl1_strsmsx(), bl1_ztrsmsx(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_diag(), FLA_Param_map_flame_to_blis_side(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Trsmsx_check().
Referenced by FLA_Trsmsx().
{
FLA_Datatype datatype;
int m_B, n_B;
int rs_A, cs_A;
int rs_B, cs_B;
int rs_C, cs_C;
side1_t blis_side;
uplo1_t blis_uplo;
trans1_t blis_trans;
diag1_t blis_diag;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Trsmsx_check( side, uplo, trans, diag, alpha, A, B, beta, C );
if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS;
datatype = FLA_Obj_datatype( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
m_B = FLA_Obj_length( B );
n_B = FLA_Obj_width( B );
rs_B = FLA_Obj_row_stride( B );
cs_B = FLA_Obj_col_stride( B );
rs_C = FLA_Obj_row_stride( C );
cs_C = FLA_Obj_col_stride( C );
FLA_Param_map_flame_to_blis_side( side, &blis_side );
FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
FLA_Param_map_flame_to_blis_trans( trans, &blis_trans );
FLA_Param_map_flame_to_blis_diag( diag, &blis_diag );
switch( datatype ){
case FLA_FLOAT:
{
float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
float *buff_C = ( float * ) FLA_FLOAT_PTR( C );
float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta );
bl1_strsmsx( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE:
{
double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
double *buff_C = ( double * ) FLA_DOUBLE_PTR( C );
double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta );
bl1_dtrsmsx( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_COMPLEX:
{
scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C );
scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta );
bl1_ctrsmsx( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );
bl1_ztrsmsx( blis_side,
blis_uplo,
blis_trans,
blis_diag,
m_B,
n_B,
buff_alpha,
buff_A, rs_A, cs_A,
buff_B, rs_B, cs_B,
buff_beta,
buff_C, rs_C, cs_C );
break;
}
}
return FLA_SUCCESS;
}
1.7.6.1