libflame
12600
|
Go to the source code of this file.
FLA_Error FLA_Gemm | ( | FLA_Trans | transa, |
FLA_Trans | transb, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_error_level(), FLA_Gemm_check(), FLA_Gemm_external(), and FLA_Gemm_internal().
Referenced by FLA_Svd_ext_u_unb_var1(), FLA_Svd_uv_unb_var1(), and FLA_Svd_uv_unb_var2().
{ FLA_Error r_val = FLA_SUCCESS; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Gemm_check( transa, transb, alpha, A, B, beta, C ); #ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES r_val = FLA_Gemm_internal( transa, transb, alpha, A, B, beta, C, fla_gemm_cntl_mm_op ); #else r_val = FLA_Gemm_external( transa, transb, alpha, A, B, beta, C ); #endif return r_val; }
FLA_Error FLA_Gemm_cc_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_cc().
{ return FLA_Gemm_external( FLA_CONJ_NO_TRANSPOSE, FLA_CONJ_NO_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_ch_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_ch().
{ return FLA_Gemm_external( FLA_CONJ_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_check | ( | FLA_Trans | transa, |
FLA_Trans | transb, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), and FLA_Check_valid_trans().
Referenced by FLA_Gemm(), FLA_Gemm_external(), FLA_Gemm_external_gpu(), and FLASH_Gemm().
{ FLA_Error e_val; e_val = FLA_Check_valid_trans( transa ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_trans( transb ); FLA_Check_error_code( e_val ); e_val = FLA_Check_floating_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_nonconstant_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, C ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_matrix_matrix_dims( transa, transb, A, B, C ); FLA_Check_error_code( e_val ); return FLA_SUCCESS; }
FLA_Error FLA_Gemm_cn_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_cn().
{ return FLA_Gemm_external( FLA_CONJ_NO_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_ct_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_ct().
{ return FLA_Gemm_external( FLA_CONJ_NO_TRANSPOSE, FLA_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_external | ( | FLA_Trans | transa, |
FLA_Trans | transb, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References bl1_cgemm(), bl1_dgemm(), bl1_sgemm(), bl1_zgemm(), FLA_Check_error_level(), FLA_Gemm_check(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_trans(), and FLA_Scal_external().
Referenced by FLA_Bidiag_UT_u_blf_var4(), FLA_Bidiag_UT_u_blk_var4(), FLA_Bidiag_UT_u_blk_var5(), FLA_Gemm(), FLA_Gemm_cc_task(), FLA_Gemm_ch_task(), FLA_Gemm_cn_task(), FLA_Gemm_ct_task(), FLA_Gemm_hc_task(), FLA_Gemm_hh_task(), FLA_Gemm_hn_task(), FLA_Gemm_ht_task(), FLA_Gemm_nc_task(), FLA_Gemm_nh_task(), FLA_Gemm_nn_task(), FLA_Gemm_nt_task(), FLA_Gemm_task(), FLA_Gemm_tc_task(), FLA_Gemm_th_task(), FLA_Gemm_tn_task(), FLA_Gemm_tt_task(), FLA_Hess_UT_blf_var2(), FLA_Hess_UT_blf_var3(), FLA_Hess_UT_blf_var4(), FLA_Hess_UT_blk_var1(), FLA_Hess_UT_blk_var2(), FLA_Hess_UT_blk_var3(), FLA_Hess_UT_blk_var4(), FLA_Hess_UT_blk_var5(), FLA_LQ_UT_blk_var2(), FLA_QR_UT_blk_var2(), FLA_QR_UT_piv_blk_var2(), FLA_SA_FS_blk(), and FLA_SA_LU_blk().
{ FLA_Datatype datatype; int k_AB; int m_A, n_A; int m_C, n_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; trans1_t blis_transa; trans1_t blis_transb; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Gemm_check( transa, transb, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) ) { FLA_Scal_external( beta, C ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); if ( transa == FLA_NO_TRANSPOSE || transa == FLA_CONJ_NO_TRANSPOSE ) k_AB = n_A; else k_AB = m_A; FLA_Param_map_flame_to_blis_trans( transa, &blis_transa ); FLA_Param_map_flame_to_blis_trans( transb, &blis_transb ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_sgemm( blis_transa, blis_transb, m_C, k_AB, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dgemm( blis_transa, blis_transb, m_C, k_AB, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bl1_cgemm( blis_transa, blis_transb, m_C, k_AB, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bl1_zgemm( blis_transa, blis_transb, m_C, k_AB, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_external_gpu | ( | FLA_Trans | transa, |
FLA_Trans | transb, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
void * | A_gpu, | ||
FLA_Obj | B, | ||
void * | B_gpu, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
void * | C_gpu | ||
) |
References FLA_Check_error_level(), FLA_Gemm_check(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_trans(), and FLA_Scal_external_gpu().
Referenced by FLASH_Queue_exec_task_gpu().
{ FLA_Datatype datatype; int k_AB; int m_A, n_A; int m_C, n_C; int ldim_A; int ldim_B; int ldim_C; char blas_transa; char blas_transb; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Gemm_check( transa, transb, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) ) { FLA_Scal_external_gpu( beta, C, C_gpu ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); ldim_A = FLA_Obj_length( A ); ldim_B = FLA_Obj_length( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); ldim_C = FLA_Obj_length( C ); if ( transa == FLA_NO_TRANSPOSE || transa == FLA_CONJ_NO_TRANSPOSE ) k_AB = n_A; else k_AB = m_A; FLA_Param_map_flame_to_netlib_trans( transa, &blas_transa ); FLA_Param_map_flame_to_netlib_trans( transb, &blas_transb ); switch( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasSgemm( blas_transa, blas_transb, m_C, n_C, k_AB, *buff_alpha, ( float * ) A_gpu, ldim_A, ( float * ) B_gpu, ldim_B, *buff_beta, ( float * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasDgemm( blas_transa, blas_transb, m_C, n_C, k_AB, *buff_alpha, ( double * ) A_gpu, ldim_A, ( double * ) B_gpu, ldim_B, *buff_beta, ( double * ) C_gpu, ldim_C ); break; } case FLA_COMPLEX: { cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha ); cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta ); cublasCgemm( blas_transa, blas_transb, m_C, n_C, k_AB, *buff_alpha, ( cuComplex * ) A_gpu, ldim_A, ( cuComplex * ) B_gpu, ldim_B, *buff_beta, ( cuComplex * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE_COMPLEX: { cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); cublasZgemm( blas_transa, blas_transb, m_C, n_C, k_AB, *buff_alpha, ( cuDoubleComplex * ) A_gpu, ldim_A, ( cuDoubleComplex * ) B_gpu, ldim_B, *buff_beta, ( cuDoubleComplex * ) C_gpu, ldim_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_hc_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_hc().
{ return FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_CONJ_NO_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_hh_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_hh().
{ return FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_hn_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_hn().
{ return FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_ht_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_ht().
{ return FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_internal_check | ( | FLA_Trans | transa, |
FLA_Trans | transb, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Gemm_internal().
{ FLA_Error e_val; // Abort if the control structure is NULL. e_val = FLA_Check_null_pointer( ( void* ) cntl ); FLA_Check_error_code( e_val ); // Verify that the object element types are identical. e_val = FLA_Check_identical_object_elemtype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_elemtype( A, C ); FLA_Check_error_code( e_val ); // Verify conformality between all the objects. This check works regardless // of whether the element type is FLA_MATRIX or FLA_SCALAR because the // element length and width are used instead of scalar length and width. e_val = FLA_Check_matrix_matrix_dims( transa, transb, A, B, C ); FLA_Check_error_code( e_val ); return FLA_SUCCESS; }
FLA_Error FLA_Gemm_nc_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_nc().
{ return FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_NO_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_nh_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_nh().
{ return FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_nn_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_nn().
{ return FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_nt_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_nt().
{ return FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_task | ( | FLA_Trans | transa, |
FLA_Trans | transb, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{ return FLA_Gemm_external( transa, transb, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_tc_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_tc().
{ return FLA_Gemm_external( FLA_TRANSPOSE, FLA_CONJ_NO_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_th_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_th().
{ return FLA_Gemm_external( FLA_TRANSPOSE, FLA_CONJ_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_tn_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_tn().
{ return FLA_Gemm_external( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemm_tt_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Gemm_external().
Referenced by FLA_Gemm_tt().
{ return FLA_Gemm_external( FLA_TRANSPOSE, FLA_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Gemp | ( | FLA_Trans | transa, |
FLA_Trans | transb, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
FLA_Error FLA_Gepm | ( | FLA_Trans | transa, |
FLA_Trans | transb, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
FLA_Error FLA_Gepp | ( | FLA_Trans | transa, |
FLA_Trans | transb, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
FLA_Error FLA_Hemm | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_error_level(), FLA_Hemm_check(), FLA_Hemm_external(), and FLA_Hemm_internal().
{ FLA_Error r_val = FLA_SUCCESS; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Hemm_check( side, uplo, alpha, A, B, beta, C ); #ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES r_val = FLA_Hemm_internal( side, uplo, alpha, A, B, beta, C, fla_hemm_cntl_mm ); #else r_val = FLA_Hemm_external( side, uplo, alpha, A, B, beta, C ); #endif return r_val; }
FLA_Error FLA_Hemm_check | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_consistent_object_datatype(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_leftright_side(), and FLA_Check_valid_uplo().
Referenced by FLA_Hemm(), FLA_Hemm_external(), FLA_Hemm_external_gpu(), and FLASH_Hemm().
{ FLA_Error e_val; e_val = FLA_Check_valid_leftright_side( side ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_uplo( uplo ); FLA_Check_error_code( e_val ); e_val = FLA_Check_nonconstant_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, C ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_square( A ); FLA_Check_error_code( e_val ); if ( side == FLA_LEFT ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Hemm_external | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References bl1_chemm(), bl1_dsymm(), bl1_ssymm(), bl1_zhemm(), FLA_Check_error_level(), FLA_Hemm_check(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_side(), and FLA_Param_map_flame_to_blis_uplo().
Referenced by FLA_Hemm(), FLA_Hemm_ll_task(), FLA_Hemm_lu_task(), FLA_Hemm_rl_task(), FLA_Hemm_ru_task(), and FLA_Hemm_task().
{ FLA_Datatype datatype; int m_C, n_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; side1_t blis_side; uplo1_t blis_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Hemm_check( side, uplo, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); FLA_Param_map_flame_to_blis_side( side, &blis_side ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_ssymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dsymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bl1_chemm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bl1_zhemm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Hemm_external_gpu | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
void * | A_gpu, | ||
FLA_Obj | B, | ||
void * | B_gpu, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
void * | C_gpu | ||
) |
References FLA_Check_error_level(), FLA_Hemm_check(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_side(), and FLA_Param_map_flame_to_netlib_uplo().
Referenced by FLASH_Queue_exec_task_gpu().
{ FLA_Datatype datatype; int m_C, n_C; int ldim_A; int ldim_B; int ldim_C; char blas_side; char blas_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Hemm_check( side, uplo, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); ldim_A = FLA_Obj_length( A ); ldim_B = FLA_Obj_length( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); ldim_C = FLA_Obj_length( C ); FLA_Param_map_flame_to_netlib_side( side, &blas_side ); FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); switch( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasSsymm( blas_side, blas_uplo, m_C, n_C, *buff_alpha, ( float * ) A_gpu, ldim_A, ( float * ) B_gpu, ldim_B, *buff_beta, ( float * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasDsymm( blas_side, blas_uplo, m_C, n_C, *buff_alpha, ( double * ) A_gpu, ldim_A, ( double * ) B_gpu, ldim_B, *buff_beta, ( double * ) C_gpu, ldim_C ); break; } case FLA_COMPLEX: { cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha ); cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta ); cublasChemm( blas_side, blas_uplo, m_C, n_C, *buff_alpha, ( cuComplex * ) A_gpu, ldim_A, ( cuComplex * ) B_gpu, ldim_B, *buff_beta, ( cuComplex * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE_COMPLEX: { cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); cublasZhemm( blas_side, blas_uplo, m_C, n_C, *buff_alpha, ( cuDoubleComplex * ) A_gpu, ldim_A, ( cuDoubleComplex * ) B_gpu, ldim_B, *buff_beta, ( cuDoubleComplex * ) C_gpu, ldim_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Hemm_internal_check | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_hemm_t * | cntl | ||
) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Hemm_internal().
{ FLA_Error e_val; // Abort if the control structure is NULL. e_val = FLA_Check_null_pointer( ( void* ) cntl ); FLA_Check_error_code( e_val ); // Verify that the object element types are identical. e_val = FLA_Check_identical_object_elemtype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_elemtype( A, C ); FLA_Check_error_code( e_val ); // Verify conformality between all the objects. This check works regardless // of whether the element type is FLA_MATRIX or FLA_SCALAR because the // element length and width are used instead of scalar length and width. if ( side == FLA_LEFT ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Hemm_ll_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_hemm_t * | cntl | ||
) |
References FLA_Hemm_external().
Referenced by FLA_Hemm_ll().
{ return FLA_Hemm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, alpha, A, B, beta, C ); }
FLA_Error FLA_Hemm_lu_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_hemm_t * | cntl | ||
) |
References FLA_Hemm_external().
Referenced by FLA_Hemm_lu().
{ return FLA_Hemm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, alpha, A, B, beta, C ); }
FLA_Error FLA_Hemm_rl_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_hemm_t * | cntl | ||
) |
References FLA_Hemm_external().
Referenced by FLA_Hemm_rl().
{ return FLA_Hemm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, alpha, A, B, beta, C ); }
FLA_Error FLA_Hemm_ru_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_hemm_t * | cntl | ||
) |
References FLA_Hemm_external().
Referenced by FLA_Hemm_ru().
{ return FLA_Hemm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, alpha, A, B, beta, C ); }
FLA_Error FLA_Hemm_task | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_hemm_t * | cntl | ||
) |
References FLA_Hemm_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{ return FLA_Hemm_external( side, uplo, alpha, A, B, beta, C ); }
FLA_Error FLA_Her2k | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_error_level(), FLA_Her2k_check(), FLA_Her2k_external(), and FLA_Her2k_internal().
{ FLA_Error r_val = FLA_SUCCESS; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Her2k_check( uplo, trans, alpha, A, B, beta, C ); #ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES r_val = FLA_Her2k_internal( uplo, trans, alpha, A, B, beta, C, fla_her2k_cntl_mm ); #else r_val = FLA_Her2k_external( uplo, trans, alpha, A, B, beta, C ); #endif return r_val; }
FLA_Error FLA_Her2k_check | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_consistent_object_datatype(), FLA_Check_identical_object_datatype(), FLA_Check_identical_object_precision(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_real_object(), FLA_Check_square(), FLA_Check_valid_complex_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Her2k(), FLA_Her2k_external(), FLA_Her2k_external_gpu(), and FLASH_Her2k().
{ FLA_Error e_val; e_val = FLA_Check_valid_uplo( uplo ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_complex_trans( trans ); FLA_Check_error_code( e_val ); e_val = FLA_Check_nonconstant_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_real_object( beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, C ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_precision( A, beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_square( C ); FLA_Check_error_code( e_val ); if ( trans == FLA_NO_TRANSPOSE ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Her2k_external | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References bl1_cher2k(), bl1_dsyr2k(), bl1_ssyr2k(), bl1_zher2k(), FLA_Check_error_level(), FLA_Her2k_check(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Scal_external().
Referenced by FLA_Her2k(), FLA_Her2k_lh_task(), FLA_Her2k_ln_task(), FLA_Her2k_task(), FLA_Her2k_uh_task(), FLA_Her2k_un_task(), FLA_Tridiag_UT_l_blf_var3(), and FLA_Tridiag_UT_l_blk_var3().
{ FLA_Datatype datatype; int k_AB; int m_A, n_A; int m_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; uplo1_t blis_uplo; trans1_t blis_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Her2k_check( uplo, trans, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) ) { FLA_Scal_external( beta, C ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); if ( trans == FLA_NO_TRANSPOSE ) k_AB = n_A; else k_AB = m_A; FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_ssyr2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dsyr2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_cher2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_zher2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Her2k_external_gpu | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
void * | A_gpu, | ||
FLA_Obj | B, | ||
void * | B_gpu, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
void * | C_gpu | ||
) |
References FLA_Check_error_level(), FLA_Her2k_check(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_trans(), and FLA_Param_map_flame_to_netlib_uplo().
Referenced by FLASH_Queue_exec_task_gpu().
{ FLA_Datatype datatype; int k_AB; int m_A, n_A; int m_C; int ldim_A; int ldim_B; int ldim_C; char blas_uplo; char blas_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Her2k_check( uplo, trans, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); ldim_A = FLA_Obj_length( A ); ldim_B = FLA_Obj_length( B ); m_C = FLA_Obj_length( C ); ldim_C = FLA_Obj_length( C ); if ( trans == FLA_NO_TRANSPOSE ) k_AB = n_A; else k_AB = m_A; FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasSsyr2k( blas_uplo, blas_trans, m_C, k_AB, *buff_alpha, ( float * ) A_gpu, ldim_A, ( float * ) B_gpu, ldim_B, *buff_beta, ( float * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasDsyr2k( blas_uplo, blas_trans, m_C, k_AB, *buff_alpha, ( double * ) A_gpu, ldim_A, ( double * ) B_gpu, ldim_B, *buff_beta, ( double * ) C_gpu, ldim_C ); break; } case FLA_COMPLEX: { cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasCher2k( blas_uplo, blas_trans, m_C, k_AB, *buff_alpha, ( cuComplex * ) A_gpu, ldim_A, ( cuComplex * ) B_gpu, ldim_B, *buff_beta, ( cuComplex * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE_COMPLEX: { cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasZher2k( blas_uplo, blas_trans, m_C, k_AB, *buff_alpha, ( cuDoubleComplex * ) A_gpu, ldim_A, ( cuDoubleComplex * ) B_gpu, ldim_B, *buff_beta, ( cuDoubleComplex * ) C_gpu, ldim_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Her2k_internal_check | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_her2k_t * | cntl | ||
) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Her2k_internal().
{ FLA_Error e_val; // Abort if the control structure is NULL. e_val = FLA_Check_null_pointer( ( void* ) cntl ); FLA_Check_error_code( e_val ); // Verify that the object element types are identical. e_val = FLA_Check_identical_object_elemtype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_elemtype( A, C ); FLA_Check_error_code( e_val ); // Verify conformality between all the objects. This check works regardless // of whether the element type is FLA_MATRIX or FLA_SCALAR because the // element length and width are used instead of scalar length and width. if ( trans == FLA_NO_TRANSPOSE ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Her2k_lh_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_her2k_t * | cntl | ||
) |
References FLA_Her2k_external().
Referenced by FLA_Her2k_lh().
{ return FLA_Her2k_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Her2k_ln_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_her2k_t * | cntl | ||
) |
References FLA_Her2k_external().
Referenced by FLA_Her2k_ln().
{ return FLA_Her2k_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Her2k_task | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_her2k_t * | cntl | ||
) |
References FLA_Her2k_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{ return FLA_Her2k_external( uplo, trans, alpha, A, B, beta, C ); }
FLA_Error FLA_Her2k_uh_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_her2k_t * | cntl | ||
) |
References FLA_Her2k_external().
Referenced by FLA_Her2k_uh().
{ return FLA_Her2k_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Her2k_un_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_her2k_t * | cntl | ||
) |
References FLA_Her2k_external().
Referenced by FLA_Her2k_un().
{ return FLA_Her2k_external( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Herk | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_error_level(), FLA_Herk_check(), FLA_Herk_external(), and FLA_Herk_internal().
{ FLA_Error r_val = FLA_SUCCESS; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Herk_check( uplo, trans, alpha, A, beta, C ); #ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES r_val = FLA_Herk_internal( uplo, trans, alpha, A, beta, C, fla_herk_cntl_mm ); #else r_val = FLA_Herk_external( uplo, trans, alpha, A, beta, C ); #endif return r_val; }
FLA_Error FLA_Herk_check | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_identical_object_datatype(), FLA_Check_identical_object_precision(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_real_object(), FLA_Check_square(), FLA_Check_valid_complex_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Herk(), FLA_Herk_external(), FLA_Herk_external_gpu(), and FLASH_Herk().
{ FLA_Error e_val; e_val = FLA_Check_valid_uplo( uplo ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_complex_trans( trans ); FLA_Check_error_code( e_val ); e_val = FLA_Check_nonconstant_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_real_object( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_real_object( beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, C ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_precision( A, alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_precision( A, beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_square( C ); FLA_Check_error_code( e_val ); if ( trans == FLA_NO_TRANSPOSE ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, A, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, A, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Herk_external | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References bl1_cherk(), bl1_dsyrk(), bl1_ssyrk(), bl1_zherk(), FLA_Check_error_level(), FLA_Herk_check(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Scal_external().
Referenced by FLA_Herk(), FLA_Herk_lh_task(), FLA_Herk_ln_task(), FLA_Herk_task(), FLA_Herk_uh_task(), FLA_Herk_un_task(), FLA_Random_spd_matrix(), and FLA_UDdate_UT_unb_var1().
{ FLA_Datatype datatype; int k_A; int m_A, n_A; int m_C; int rs_A, cs_A; int rs_C, cs_C; uplo1_t blis_uplo; trans1_t blis_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Herk_check( uplo, trans, alpha, A, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; if ( FLA_Obj_has_zero_dim( A ) ) { FLA_Scal_external( beta, C ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); m_C = FLA_Obj_length( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); if ( trans == FLA_NO_TRANSPOSE ) k_A = n_A; else k_A = m_A; FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_ssyrk( blis_uplo, blis_trans, m_C, k_A, buff_alpha, buff_A, rs_A, cs_A, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dsyrk( blis_uplo, blis_trans, m_C, k_A, buff_alpha, buff_A, rs_A, cs_A, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_cherk( blis_uplo, blis_trans, m_C, k_A, buff_alpha, buff_A, rs_A, cs_A, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_zherk( blis_uplo, blis_trans, m_C, k_A, buff_alpha, buff_A, rs_A, cs_A, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Herk_external_gpu | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
void * | A_gpu, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
void * | C_gpu | ||
) |
References FLA_Check_error_level(), FLA_Herk_check(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_trans(), and FLA_Param_map_flame_to_netlib_uplo().
Referenced by FLASH_Queue_exec_task_gpu().
{ FLA_Datatype datatype; int k_A; int m_A, n_A; int m_C; int ldim_A; int ldim_C; char blas_uplo; char blas_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Herk_check( uplo, trans, alpha, A, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); ldim_A = FLA_Obj_length( A ); m_C = FLA_Obj_length( C ); ldim_C = FLA_Obj_length( C ); if ( trans == FLA_NO_TRANSPOSE ) k_A = n_A; else k_A = m_A; FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasSsyrk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( float * ) A_gpu, ldim_A, *buff_beta, ( float * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasDsyrk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( double * ) A_gpu, ldim_A, *buff_beta, ( double * ) C_gpu, ldim_C ); break; } case FLA_COMPLEX: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasCherk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( cuComplex * ) A_gpu, ldim_A, *buff_beta, ( cuComplex * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE_COMPLEX: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasZherk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( cuDoubleComplex * ) A_gpu, ldim_A, *buff_beta, ( cuDoubleComplex * ) C_gpu, ldim_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Herk_internal_check | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_herk_t * | cntl | ||
) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Herk_internal().
{ FLA_Error e_val; // Abort if the control structure is NULL. e_val = FLA_Check_null_pointer( ( void* ) cntl ); FLA_Check_error_code( e_val ); // Verify that the object element types are identical. e_val = FLA_Check_identical_object_elemtype( A, C ); FLA_Check_error_code( e_val ); // Verify conformality between all the objects. This check works regardless // of whether the element type is FLA_MATRIX or FLA_SCALAR because the // element length and width are used instead of scalar length and width. if ( trans == FLA_NO_TRANSPOSE ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, A, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, A, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Herk_lh_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_herk_t * | cntl | ||
) |
References FLA_Herk_external().
Referenced by FLA_Herk_lh().
{ return FLA_Herk_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, alpha, A, beta, C ); }
FLA_Error FLA_Herk_ln_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_herk_t * | cntl | ||
) |
References FLA_Herk_external().
Referenced by FLA_Herk_ln().
{ return FLA_Herk_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, beta, C ); }
FLA_Error FLA_Herk_task | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_herk_t * | cntl | ||
) |
References FLA_Herk_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{ return FLA_Herk_external( uplo, trans, alpha, A, beta, C ); }
FLA_Error FLA_Herk_uh_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_herk_t * | cntl | ||
) |
References FLA_Herk_external().
Referenced by FLA_Herk_uh().
{ return FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, alpha, A, beta, C ); }
FLA_Error FLA_Herk_un_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_herk_t * | cntl | ||
) |
References FLA_Herk_external().
Referenced by FLA_Herk_un().
{ return FLA_Herk_external( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, beta, C ); }
FLA_Error FLA_Symm | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_error_level(), FLA_Symm_check(), FLA_Symm_external(), and FLA_Symm_internal().
{ FLA_Error r_val = FLA_SUCCESS; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Symm_check( side, uplo, alpha, A, B, beta, C ); #ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES r_val = FLA_Symm_internal( side, uplo, alpha, A, B, beta, C, fla_symm_cntl_mm ); #else r_val = FLA_Symm_external( side, uplo, alpha, A, B, beta, C ); #endif return r_val; }
FLA_Error FLA_Symm_check | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_leftright_side(), and FLA_Check_valid_uplo().
Referenced by FLA_Symm(), FLA_Symm_external(), FLA_Symm_external_gpu(), and FLASH_Symm().
{ FLA_Error e_val; e_val = FLA_Check_valid_leftright_side( side ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_uplo( uplo ); FLA_Check_error_code( e_val ); e_val = FLA_Check_floating_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_nonconstant_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, C ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_square( A ); FLA_Check_error_code( e_val ); if ( side == FLA_LEFT ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Symm_external | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References bl1_csymm(), bl1_dsymm(), bl1_ssymm(), bl1_zsymm(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_side(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Symm_check().
Referenced by FLA_Symm(), FLA_Symm_ll_task(), FLA_Symm_lu_task(), FLA_Symm_rl_task(), FLA_Symm_ru_task(), and FLA_Symm_task().
{ FLA_Datatype datatype; int m_C, n_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; side1_t blis_side; uplo1_t blis_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Symm_check( side, uplo, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); FLA_Param_map_flame_to_blis_side( side, &blis_side ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_ssymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dsymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bl1_csymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bl1_zsymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Symm_external_gpu | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
void * | A_gpu, | ||
FLA_Obj | B, | ||
void * | B_gpu, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
void * | C_gpu | ||
) |
References FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_side(), FLA_Param_map_flame_to_netlib_uplo(), and FLA_Symm_check().
Referenced by FLASH_Queue_exec_task_gpu().
{ FLA_Datatype datatype; int m_C, n_C; int ldim_A; int ldim_B; int ldim_C; char blas_side; char blas_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Symm_check( side, uplo, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); ldim_A = FLA_Obj_length( A ); ldim_B = FLA_Obj_length( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); ldim_C = FLA_Obj_length( C ); FLA_Param_map_flame_to_netlib_side( side, &blas_side ); FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); switch( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasSsymm( blas_side, blas_uplo, m_C, n_C, *buff_alpha, ( float * ) A_gpu, ldim_A, ( float * ) B_gpu, ldim_B, *buff_beta, ( float * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasDsymm( blas_side, blas_uplo, m_C, n_C, *buff_alpha, ( double * ) A_gpu, ldim_A, ( double * ) B_gpu, ldim_B, *buff_beta, ( double * ) C_gpu, ldim_C ); break; } case FLA_COMPLEX: { cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha ); cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta ); cublasCsymm( blas_side, blas_uplo, m_C, n_C, *buff_alpha, ( cuComplex * ) A_gpu, ldim_A, ( cuComplex * ) B_gpu, ldim_B, *buff_beta, ( cuComplex * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE_COMPLEX: { cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); cublasZsymm( blas_side, blas_uplo, m_C, n_C, *buff_alpha, ( cuDoubleComplex * ) A_gpu, ldim_A, ( cuDoubleComplex * ) B_gpu, ldim_B, *buff_beta, ( cuDoubleComplex * ) C_gpu, ldim_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Symm_internal_check | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_symm_t * | cntl | ||
) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Symm_internal().
{ FLA_Error e_val; // Abort if the control structure is NULL. e_val = FLA_Check_null_pointer( ( void* ) cntl ); FLA_Check_error_code( e_val ); // Verify that the object element types are identical. e_val = FLA_Check_identical_object_elemtype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_elemtype( A, C ); FLA_Check_error_code( e_val ); // Verify conformality between all the objects. This check works regardless // of whether the element type is FLA_MATRIX or FLA_SCALAR because the // element length and width are used instead of scalar length and width. if ( side == FLA_LEFT ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Symm_ll_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_symm_t * | cntl | ||
) |
References FLA_Symm_external().
Referenced by FLA_Symm_ll().
{ return FLA_Symm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, alpha, A, B, beta, C ); }
FLA_Error FLA_Symm_lu_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_symm_t * | cntl | ||
) |
References FLA_Symm_external().
Referenced by FLA_Symm_lu().
{ return FLA_Symm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, alpha, A, B, beta, C ); }
FLA_Error FLA_Symm_rl_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_symm_t * | cntl | ||
) |
References FLA_Symm_external().
Referenced by FLA_Symm_rl().
{ return FLA_Symm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, alpha, A, B, beta, C ); }
FLA_Error FLA_Symm_ru_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_symm_t * | cntl | ||
) |
References FLA_Symm_external().
Referenced by FLA_Symm_ru().
{ return FLA_Symm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, alpha, A, B, beta, C ); }
FLA_Error FLA_Symm_task | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_symm_t * | cntl | ||
) |
References FLA_Symm_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{ return FLA_Symm_external( side, uplo, alpha, A, B, beta, C ); }
FLA_Error FLA_Syr2k | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_error_level(), FLA_Syr2k_check(), FLA_Syr2k_external(), and FLA_Syr2k_internal().
{ FLA_Error r_val = FLA_SUCCESS; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Syr2k_check( uplo, trans, alpha, A, B, beta, C ); #ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES r_val = FLA_Syr2k_internal( uplo, trans, alpha, A, B, beta, C, fla_syr2k_cntl_mm ); #else r_val = FLA_Syr2k_external( uplo, trans, alpha, A, B, beta, C ); #endif return r_val; }
FLA_Error FLA_Syr2k_check | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_real_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Syr2k(), FLA_Syr2k_external(), FLA_Syr2k_external_gpu(), and FLASH_Syr2k().
{ FLA_Error e_val; e_val = FLA_Check_valid_uplo( uplo ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_real_trans( trans ); FLA_Check_error_code( e_val ); e_val = FLA_Check_floating_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_nonconstant_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, C ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_square( C ); FLA_Check_error_code( e_val ); if ( trans == FLA_NO_TRANSPOSE ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Syr2k_external | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References bl1_csyr2k(), bl1_dsyr2k(), bl1_ssyr2k(), bl1_zsyr2k(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), FLA_Scal_external(), and FLA_Syr2k_check().
Referenced by FLA_Syr2k(), FLA_Syr2k_ln_task(), FLA_Syr2k_lt_task(), FLA_Syr2k_task(), FLA_Syr2k_un_task(), and FLA_Syr2k_ut_task().
{ FLA_Datatype datatype; int k_AB; int m_A, n_A; int m_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; uplo1_t blis_uplo; trans1_t blis_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Syr2k_check( uplo, trans, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) ) { FLA_Scal_external( beta, C ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); if ( trans == FLA_NO_TRANSPOSE ) k_AB = n_A; else k_AB = m_A; FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_ssyr2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dsyr2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bl1_csyr2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bl1_zsyr2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Syr2k_external_gpu | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
void * | A_gpu, | ||
FLA_Obj | B, | ||
void * | B_gpu, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
void * | C_gpu | ||
) |
References FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_trans(), FLA_Param_map_flame_to_netlib_uplo(), and FLA_Syr2k_check().
Referenced by FLASH_Queue_exec_task_gpu().
{ FLA_Datatype datatype; int k_AB; int m_A, n_A; int m_C; int ldim_A; int ldim_B; int ldim_C; char blas_uplo; char blas_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Syr2k_check( uplo, trans, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); ldim_A = FLA_Obj_length( A ); ldim_B = FLA_Obj_length( B ); m_C = FLA_Obj_length( C ); ldim_C = FLA_Obj_length( C ); if ( trans == FLA_NO_TRANSPOSE ) k_AB = n_A; else k_AB = m_A; FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasSsyr2k( blas_uplo, blas_trans, m_C, k_AB, *buff_alpha, ( float * ) A_gpu, ldim_A, ( float * ) B_gpu, ldim_B, *buff_beta, ( float * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasDsyr2k( blas_uplo, blas_trans, m_C, k_AB, *buff_alpha, ( double * ) A_gpu, ldim_A, ( double * ) B_gpu, ldim_B, *buff_beta, ( double * ) C_gpu, ldim_C ); break; } case FLA_COMPLEX: { cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha ); cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta ); cublasCsyr2k( blas_uplo, blas_trans, m_C, k_AB, *buff_alpha, ( cuComplex * ) A_gpu, ldim_A, ( cuComplex * ) B_gpu, ldim_B, *buff_beta, ( cuComplex * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE_COMPLEX: { cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); cublasZsyr2k( blas_uplo, blas_trans, m_C, k_AB, *buff_alpha, ( cuDoubleComplex * ) A_gpu, ldim_A, ( cuDoubleComplex * ) B_gpu, ldim_B, *buff_beta, ( cuDoubleComplex * ) C_gpu, ldim_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Syr2k_internal_check | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_syr2k_t * | cntl | ||
) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Syr2k_internal().
{ FLA_Error e_val; // Abort if the control structure is NULL. e_val = FLA_Check_null_pointer( ( void* ) cntl ); FLA_Check_error_code( e_val ); // Verify that the object element types are identical. e_val = FLA_Check_identical_object_elemtype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_elemtype( A, C ); FLA_Check_error_code( e_val ); // Verify conformality between all the objects. This check works regardless // of whether the element type is FLA_MATRIX or FLA_SCALAR because the // element length and width are used instead of scalar length and width. if ( trans == FLA_NO_TRANSPOSE ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Syr2k_ln_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_syr2k_t * | cntl | ||
) |
References FLA_Syr2k_external().
Referenced by FLA_Syr2k_ln().
{ return FLA_Syr2k_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Syr2k_lt_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_syr2k_t * | cntl | ||
) |
References FLA_Syr2k_external().
Referenced by FLA_Syr2k_lt().
{ return FLA_Syr2k_external( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Syr2k_task | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_syr2k_t * | cntl | ||
) |
References FLA_Syr2k_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{ return FLA_Syr2k_external( uplo, trans, alpha, A, B, beta, C ); }
FLA_Error FLA_Syr2k_un_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_syr2k_t * | cntl | ||
) |
References FLA_Syr2k_external().
Referenced by FLA_Syr2k_un().
{ return FLA_Syr2k_external( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Syr2k_ut_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_syr2k_t * | cntl | ||
) |
References FLA_Syr2k_external().
Referenced by FLA_Syr2k_ut().
{ return FLA_Syr2k_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, alpha, A, B, beta, C ); }
FLA_Error FLA_Syrk | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_error_level(), FLA_Syrk_check(), FLA_Syrk_external(), and FLA_Syrk_internal().
{ FLA_Error r_val = FLA_SUCCESS; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Syrk_check( uplo, trans, alpha, A, beta, C ); #ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES r_val = FLA_Syrk_internal( uplo, trans, alpha, A, beta, C, fla_syrk_cntl_mm ); #else r_val = FLA_Syrk_external( uplo, trans, alpha, A, beta, C ); #endif return r_val; }
FLA_Error FLA_Syrk_check | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_real_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Syrk(), FLA_Syrk_external(), FLA_Syrk_external_gpu(), and FLASH_Syrk().
{ FLA_Error e_val; e_val = FLA_Check_valid_uplo( uplo ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_real_trans( trans ); FLA_Check_error_code( e_val ); e_val = FLA_Check_floating_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_nonconstant_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, C ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_square( C ); FLA_Check_error_code( e_val ); if ( trans == FLA_NO_TRANSPOSE ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, A, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, A, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Syrk_external | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References bl1_csyrk(), bl1_dsyrk(), bl1_ssyrk(), bl1_zsyrk(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), FLA_Scal_external(), and FLA_Syrk_check().
Referenced by FLA_Syrk(), FLA_Syrk_ln_task(), FLA_Syrk_lt_task(), FLA_Syrk_task(), FLA_Syrk_un_task(), and FLA_Syrk_ut_task().
{ FLA_Datatype datatype; int k_A; int m_A, n_A; int m_C; int rs_A, cs_A; int rs_C, cs_C; uplo1_t blis_uplo; trans1_t blis_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Syrk_check( uplo, trans, alpha, A, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; if ( FLA_Obj_has_zero_dim( A ) ) { FLA_Scal_external( beta, C ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); m_C = FLA_Obj_length( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); if ( trans == FLA_NO_TRANSPOSE ) k_A = n_A; else k_A = m_A; FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_ssyrk( blis_uplo, blis_trans, m_C, k_A, buff_alpha, buff_A, rs_A, cs_A, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dsyrk( blis_uplo, blis_trans, m_C, k_A, buff_alpha, buff_A, rs_A, cs_A, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bl1_csyrk( blis_uplo, blis_trans, m_C, k_A, buff_alpha, buff_A, rs_A, cs_A, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bl1_zsyrk( blis_uplo, blis_trans, m_C, k_A, buff_alpha, buff_A, rs_A, cs_A, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Syrk_external_gpu | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
void * | A_gpu, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
void * | C_gpu | ||
) |
References FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_trans(), FLA_Param_map_flame_to_netlib_uplo(), and FLA_Syrk_check().
Referenced by FLASH_Queue_exec_task_gpu().
{ FLA_Datatype datatype; int k_A; int m_A, n_A; int m_C; int ldim_A; int ldim_C; char blas_uplo; char blas_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Syrk_check( uplo, trans, alpha, A, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); ldim_A = FLA_Obj_length( A ); m_C = FLA_Obj_length( C ); ldim_C = FLA_Obj_length( C ); if ( trans == FLA_NO_TRANSPOSE ) k_A = n_A; else k_A = m_A; FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasSsyrk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( float * ) A_gpu, ldim_A, *buff_beta, ( float * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasDsyrk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( double * ) A_gpu, ldim_A, *buff_beta, ( double * ) C_gpu, ldim_C ); break; } case FLA_COMPLEX: { cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha ); cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta ); cublasCsyrk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( cuComplex * ) A_gpu, ldim_A, *buff_beta, ( cuComplex * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE_COMPLEX: { cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); cublasZsyrk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( cuDoubleComplex * ) A_gpu, ldim_A, *buff_beta, ( cuDoubleComplex * ) C_gpu, ldim_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Syrk_internal_check | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_syrk_t * | cntl | ||
) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Syrk_internal().
{ FLA_Error e_val; // Abort if the control structure is NULL. e_val = FLA_Check_null_pointer( ( void* ) cntl ); FLA_Check_error_code( e_val ); // Verify that the object element types are identical. e_val = FLA_Check_identical_object_elemtype( A, C ); FLA_Check_error_code( e_val ); // Verify conformality between all the objects. This check works regardless // of whether the element type is FLA_MATRIX or FLA_SCALAR because the // element length and width are used instead of scalar length and width. if ( trans == FLA_NO_TRANSPOSE ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, A, A, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, A, A, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Syrk_ln_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_syrk_t * | cntl | ||
) |
References FLA_Syrk_external().
Referenced by FLA_Syrk_ln().
{ return FLA_Syrk_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, beta, C ); }
FLA_Error FLA_Syrk_lt_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_syrk_t * | cntl | ||
) |
References FLA_Syrk_external().
Referenced by FLA_Syrk_lt().
{ return FLA_Syrk_external( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, alpha, A, beta, C ); }
FLA_Error FLA_Syrk_task | ( | FLA_Uplo | uplo, |
FLA_Trans | trans, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_syrk_t * | cntl | ||
) |
References FLA_Syrk_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{ return FLA_Syrk_external( uplo, trans, alpha, A, beta, C ); }
FLA_Error FLA_Syrk_un_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_syrk_t * | cntl | ||
) |
References FLA_Syrk_external().
Referenced by FLA_Syrk_un().
{ return FLA_Syrk_external( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A, beta, C ); }
FLA_Error FLA_Syrk_ut_task | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_syrk_t * | cntl | ||
) |
References FLA_Syrk_external().
Referenced by FLA_Syrk_ut().
{ return FLA_Syrk_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, alpha, A, beta, C ); }
FLA_Error FLA_Trmm | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | trans, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B | ||
) |
References FLA_Check_error_level(), FLA_Trmm_check(), FLA_Trmm_external(), and FLA_Trmm_internal().
{ FLA_Error r_val = FLA_SUCCESS; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Trmm_check( side, uplo, trans, diag, alpha, A, B ); #ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES r_val = FLA_Trmm_internal( side, uplo, trans, diag, alpha, A, B, fla_trmm_cntl_mm ); #else r_val = FLA_Trmm_external( side, uplo, trans, diag, alpha, A, B ); #endif return r_val; }
FLA_Error FLA_Trmm_check | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | transa, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B | ||
) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_diag(), FLA_Check_valid_leftright_side(), FLA_Check_valid_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Trmm(), FLA_Trmm_external(), FLA_Trmm_external_gpu(), and FLASH_Trmm().
{ FLA_Error e_val; e_val = FLA_Check_valid_leftright_side( side ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_uplo( uplo ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_trans( trans ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_diag( diag ); FLA_Check_error_code( e_val ); e_val = FLA_Check_floating_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_nonconstant_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_square( A ); FLA_Check_error_code( e_val ); if ( side == FLA_LEFT ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, B ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, B ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Trmm_external | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | trans, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B | ||
) |
References bl1_ctrmm(), bl1_dtrmm(), bl1_strmm(), bl1_ztrmm(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_diag(), FLA_Param_map_flame_to_blis_side(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Trmm_check().
Referenced by FLA_LQ_UT_blk_var2(), FLA_QR_UT_blk_var2(), FLA_Trmm(), FLA_Trmm_llc_task(), FLA_Trmm_llh_task(), FLA_Trmm_lln_task(), FLA_Trmm_llt_task(), FLA_Trmm_luc_task(), FLA_Trmm_luh_task(), FLA_Trmm_lun_task(), FLA_Trmm_lut_task(), FLA_Trmm_rlc_task(), FLA_Trmm_rlh_task(), FLA_Trmm_rln_task(), FLA_Trmm_rlt_task(), FLA_Trmm_ruc_task(), FLA_Trmm_ruh_task(), FLA_Trmm_run_task(), FLA_Trmm_rut_task(), and FLA_Trmm_task().
{ FLA_Datatype datatype; int m_B, n_B; int rs_A, cs_A; int rs_B, cs_B; side1_t blis_side; uplo1_t blis_uplo; trans1_t blis_trans; diag1_t blis_diag; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trmm_check( side, uplo, trans, diag, alpha, A, B ); if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); FLA_Param_map_flame_to_blis_side( side, &blis_side ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); FLA_Param_map_flame_to_blis_diag( diag, &blis_diag ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bl1_strmm( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bl1_dtrmm( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); bl1_ctrmm( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); bl1_ztrmm( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Trmm_external_gpu | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | trans, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
void * | A_gpu, | ||
FLA_Obj | B, | ||
void * | B_gpu | ||
) |
References FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_diag(), FLA_Param_map_flame_to_netlib_side(), FLA_Param_map_flame_to_netlib_trans(), FLA_Param_map_flame_to_netlib_uplo(), and FLA_Trmm_check().
Referenced by FLASH_Queue_exec_task_gpu().
{ FLA_Datatype datatype; int m_B, n_B; int ldim_A; int ldim_B; char blas_side; char blas_uplo; char blas_trans; char blas_diag; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trmm_check( side, uplo, trans, diag, alpha, A, B ); if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); ldim_A = FLA_Obj_length( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); ldim_B = FLA_Obj_length( B ); FLA_Param_map_flame_to_netlib_side( side, &blas_side ); FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); FLA_Param_map_flame_to_netlib_diag( diag, &blas_diag ); switch( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); cublasStrmm( blas_side, blas_uplo, blas_trans, blas_diag, m_B, n_B, *buff_alpha, ( float * ) A_gpu, ldim_A, ( float * ) B_gpu, ldim_B ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); cublasDtrmm( blas_side, blas_uplo, blas_trans, blas_diag, m_B, n_B, *buff_alpha, ( double * ) A_gpu, ldim_A, ( double * ) B_gpu, ldim_B ); break; } case FLA_COMPLEX: { cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha ); cublasCtrmm( blas_side, blas_uplo, blas_trans, blas_diag, m_B, n_B, *buff_alpha, ( cuComplex * ) A_gpu, ldim_A, ( cuComplex * ) B_gpu, ldim_B ); break; } case FLA_DOUBLE_COMPLEX: { cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); cublasZtrmm( blas_side, blas_uplo, blas_trans, blas_diag, m_B, n_B, *buff_alpha, ( cuDoubleComplex * ) A_gpu, ldim_A, ( cuDoubleComplex * ) B_gpu, ldim_B ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Trmm_internal_check | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | trans, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), and FLA_Check_null_pointer().
Referenced by FLA_Trmm_internal().
{ FLA_Error e_val; // Abort if the control structure is NULL. e_val = FLA_Check_null_pointer( ( void* ) cntl ); FLA_Check_error_code( e_val ); // Verify that the object element types are identical. e_val = FLA_Check_identical_object_elemtype( A, B ); FLA_Check_error_code( e_val ); // Verify conformality between all the objects. This check works regardless // of whether the element type is FLA_MATRIX or FLA_SCALAR because the // element length and width are used instead of scalar length and width. if ( side == FLA_LEFT ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, B ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, B ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Trmm_llc_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_llc().
{ return FLA_Trmm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_llh_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_llh().
{ return FLA_Trmm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_lln_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_lln().
{ return FLA_Trmm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_llt_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_llt().
{ return FLA_Trmm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_luc_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_luc().
{ return FLA_Trmm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_luh_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_luh().
{ return FLA_Trmm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_lun_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_lun().
{ return FLA_Trmm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_lut_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_lut().
{ return FLA_Trmm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_rlc_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_rlc().
{ return FLA_Trmm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_rlh_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_rlh().
{ return FLA_Trmm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_rln_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_rln().
{ return FLA_Trmm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_rlt_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_rlt().
{ return FLA_Trmm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_ruc_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_ruc().
{ return FLA_Trmm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_ruh_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_ruh().
{ return FLA_Trmm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_run_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_run().
{ return FLA_Trmm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_rut_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLA_Trmm_rut().
{ return FLA_Trmm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trmm_task | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | trans, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trmm_t * | cntl | ||
) |
References FLA_Trmm_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{ return FLA_Trmm_external( side, uplo, trans, diag, alpha, A, B ); }
FLA_Error FLA_Trmmsx | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | transa, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Trmmsx_external().
{ return FLA_Trmmsx_external( side, uplo, trans, diag, alpha, A, B, beta, C ); }
FLA_Error FLA_Trmmsx_check | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | transa, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_diag(), FLA_Check_valid_leftright_side(), FLA_Check_valid_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Trmmsx_external().
{ FLA_Error e_val; e_val = FLA_Check_valid_leftright_side( side ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_uplo( uplo ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_trans( trans ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_diag( diag ); FLA_Check_error_code( e_val ); e_val = FLA_Check_floating_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_nonconstant_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, C ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_square( A ); FLA_Check_error_code( e_val ); if ( side == FLA_LEFT ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Trmmsx_external | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | transa, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References bl1_ctrmmsx(), bl1_dtrmmsx(), bl1_strmmsx(), bl1_ztrmmsx(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_diag(), FLA_Param_map_flame_to_blis_side(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Trmmsx_check().
Referenced by FLA_Trmmsx().
{ FLA_Datatype datatype; int m_B, n_B; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; side1_t blis_side; uplo1_t blis_uplo; trans1_t blis_trans; diag1_t blis_diag; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trmmsx_check( side, uplo, trans, diag, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); FLA_Param_map_flame_to_blis_side( side, &blis_side ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); FLA_Param_map_flame_to_blis_diag( diag, &blis_diag ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_strmmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dtrmmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bl1_ctrmmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bl1_ztrmmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Trsm | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | trans, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B | ||
) |
References FLA_Check_error_level(), FLA_Trsm_check(), FLA_Trsm_external(), and FLA_Trsm_internal().
{ FLA_Error r_val = FLA_SUCCESS; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Trsm_check( side, uplo, trans, diag, alpha, A, B ); #ifdef FLA_ENABLE_BLAS3_FRONT_END_CNTL_TREES r_val = FLA_Trsm_internal( side, uplo, trans, diag, alpha, A, B, fla_trsm_cntl_mm ); #else r_val = FLA_Trsm_external( side, uplo, trans, diag, alpha, A, B ); #endif return r_val; }
FLA_Error FLA_Trsm_check | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | transa, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B | ||
) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_diag(), FLA_Check_valid_leftright_side(), FLA_Check_valid_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Trsm(), FLA_Trsm_external(), FLA_Trsm_external_gpu(), and FLASH_Trsm().
{ FLA_Error e_val; e_val = FLA_Check_valid_leftright_side( side ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_uplo( uplo ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_trans( trans ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_diag( diag ); FLA_Check_error_code( e_val ); e_val = FLA_Check_floating_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_nonconstant_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_square( A ); FLA_Check_error_code( e_val ); if ( side == FLA_LEFT ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, B ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, B ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_external | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | trans, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B | ||
) |
References bl1_ctrsm(), bl1_dtrsm(), bl1_strsm(), bl1_ztrsm(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_diag(), FLA_Param_map_flame_to_blis_side(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Trsm_check().
Referenced by FLA_Chol_solve(), FLA_Hess_UT_blf_var2(), FLA_Hess_UT_blf_var3(), FLA_Hess_UT_blf_var4(), FLA_Hess_UT_blk_var1(), FLA_Hess_UT_blk_var2(), FLA_Hess_UT_blk_var3(), FLA_Hess_UT_blk_var4(), FLA_Hess_UT_blk_var5(), FLA_LQ_UT_solve(), FLA_LU_nopiv_blk_var1(), FLA_LU_nopiv_blk_var2(), FLA_LU_nopiv_blk_var3(), FLA_LU_nopiv_solve(), FLA_LU_nopiv_unb_var1(), FLA_LU_nopiv_unb_var2(), FLA_LU_nopiv_unb_var3(), FLA_LU_piv_blk_var3(), FLA_LU_piv_solve(), FLA_LU_piv_unb_var3(), FLA_LU_piv_unb_var3b(), FLA_QR_UT_solve(), FLA_SA_FS_blk(), FLA_SA_LU_blk(), FLA_Trsm(), FLA_Trsm_llc_task(), FLA_Trsm_llh_task(), FLA_Trsm_lln_task(), FLA_Trsm_llt_task(), FLA_Trsm_luc_task(), FLA_Trsm_luh_task(), FLA_Trsm_lun_task(), FLA_Trsm_lut_task(), FLA_Trsm_piv_task(), FLA_Trsm_rlc_task(), FLA_Trsm_rlh_task(), FLA_Trsm_rln_task(), FLA_Trsm_rlt_task(), FLA_Trsm_ruc_task(), FLA_Trsm_ruh_task(), FLA_Trsm_run_task(), FLA_Trsm_rut_task(), FLA_Trsm_task(), and FLA_UDdate_UT_solve().
{ FLA_Datatype datatype; int m_B, n_B; int rs_A, cs_A; int rs_B, cs_B; side1_t blis_side; uplo1_t blis_uplo; trans1_t blis_trans; diag1_t blis_diag; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trsm_check( side, uplo, trans, diag, alpha, A, B ); if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); FLA_Param_map_flame_to_blis_side( side, &blis_side ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); FLA_Param_map_flame_to_blis_diag( diag, &blis_diag ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bl1_strsm( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bl1_dtrsm( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); bl1_ctrsm( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); bl1_ztrsm( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_external_gpu | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | trans, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
void * | A_gpu, | ||
FLA_Obj | B, | ||
void * | B_gpu | ||
) |
References FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), FLA_Param_map_flame_to_netlib_diag(), FLA_Param_map_flame_to_netlib_side(), FLA_Param_map_flame_to_netlib_trans(), FLA_Param_map_flame_to_netlib_uplo(), and FLA_Trsm_check().
Referenced by FLASH_Queue_exec_task_gpu().
{ FLA_Datatype datatype; int m_B, n_B; int ldim_A; int ldim_B; char blas_side; char blas_uplo; char blas_trans; char blas_diag; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trsm_check( side, uplo, trans, diag, alpha, A, B ); if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); ldim_A = FLA_Obj_length( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); ldim_B = FLA_Obj_length( B ); FLA_Param_map_flame_to_netlib_side( side, &blas_side ); FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); FLA_Param_map_flame_to_netlib_diag( diag, &blas_diag ); switch( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); cublasStrsm( blas_side, blas_uplo, blas_trans, blas_diag, m_B, n_B, *buff_alpha, ( float * ) A_gpu, ldim_A, ( float * ) B_gpu, ldim_B ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); cublasDtrsm( blas_side, blas_uplo, blas_trans, blas_diag, m_B, n_B, *buff_alpha, ( double * ) A_gpu, ldim_A, ( double * ) B_gpu, ldim_B ); break; } case FLA_COMPLEX: { cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha ); cublasCtrsm( blas_side, blas_uplo, blas_trans, blas_diag, m_B, n_B, *buff_alpha, ( cuComplex * ) A_gpu, ldim_A, ( cuComplex * ) B_gpu, ldim_B ); break; } case FLA_DOUBLE_COMPLEX: { cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); cublasZtrsm( blas_side, blas_uplo, blas_trans, blas_diag, m_B, n_B, *buff_alpha, ( cuDoubleComplex * ) A_gpu, ldim_A, ( cuDoubleComplex * ) B_gpu, ldim_B ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_internal_check | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | trans, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Check_identical_object_elemtype(), FLA_Check_matrix_matrix_dims(), FLA_Check_null_pointer(), FLA_Check_object_length_equals(), and FLA_Obj_length().
Referenced by FLA_Trsm_internal().
{ FLA_Error e_val; // Abort if the control structure is NULL. e_val = FLA_Check_null_pointer( ( void* ) cntl ); FLA_Check_error_code( e_val ); // Verify that the object element types are identical. e_val = FLA_Check_identical_object_elemtype( A, B ); FLA_Check_error_code( e_val ); // Verify conformality between all the objects. This check works regardless // of whether the element type is FLA_MATRIX or FLA_SCALAR because the // element length and width are used instead of scalar length and width. if ( side == FLA_LEFT ) { //e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, B ); //FLA_Check_error_code( e_val ); e_val = FLA_Check_object_length_equals( A, FLA_Obj_length( B ) ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, B ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_llc_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_llc().
{ return FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_llh_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_llh().
{ return FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_lln_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_lln().
{ return FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_llt_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_llt().
{ return FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_luc_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_luc().
{ return FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_luh_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_luh().
{ return FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_lun_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_lun().
{ return FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_lut_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_lut().
{ return FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_rlc_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_rlc().
{ return FLA_Trsm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_rlh_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_rlh().
{ return FLA_Trsm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_rln_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_rln().
{ return FLA_Trsm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_rlt_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_rlt().
{ return FLA_Trsm_external( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_ruc_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_ruc().
{ return FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_ruh_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_ruh().
{ return FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_run_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_run().
{ return FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_rut_task | ( | FLA_Diag | diag, |
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLA_Trsm_rut().
{ return FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, diag, alpha, A, B ); }
FLA_Error FLA_Trsm_task | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | trans, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
fla_trsm_t * | cntl | ||
) |
References FLA_Trsm_external().
Referenced by FLASH_Queue_exec_task(), and FLASH_Queue_exec_task_gpu().
{ return FLA_Trsm_external( side, uplo, trans, diag, alpha, A, B ); }
FLA_Error FLA_Trsmsx | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | transa, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Trsmsx_external().
{ return FLA_Trsmsx_external( side, uplo, trans, diag, alpha, A, B, beta, C ); }
FLA_Error FLA_Trsmsx_check | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | transa, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References FLA_Check_consistent_object_datatype(), FLA_Check_floating_object(), FLA_Check_identical_object_datatype(), FLA_Check_if_scalar(), FLA_Check_matrix_matrix_dims(), FLA_Check_nonconstant_object(), FLA_Check_square(), FLA_Check_valid_diag(), FLA_Check_valid_leftright_side(), FLA_Check_valid_trans(), and FLA_Check_valid_uplo().
Referenced by FLA_Trsmsx_external().
{ FLA_Error e_val; e_val = FLA_Check_valid_leftright_side( side ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_uplo( uplo ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_trans( trans ); FLA_Check_error_code( e_val ); e_val = FLA_Check_valid_diag( diag ); FLA_Check_error_code( e_val ); e_val = FLA_Check_floating_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_nonconstant_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, B ); FLA_Check_error_code( e_val ); e_val = FLA_Check_identical_object_datatype( A, C ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( A, beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_if_scalar( beta ); FLA_Check_error_code( e_val ); e_val = FLA_Check_square( A ); FLA_Check_error_code( e_val ); if ( side == FLA_LEFT ) { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, A, B, C ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_matrix_matrix_dims( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, B, A, C ); FLA_Check_error_code( e_val ); } return FLA_SUCCESS; }
FLA_Error FLA_Trsmsx_external | ( | FLA_Side | side, |
FLA_Uplo | uplo, | ||
FLA_Trans | transa, | ||
FLA_Diag | diag, | ||
FLA_Obj | alpha, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C | ||
) |
References bl1_ctrsmsx(), bl1_dtrsmsx(), bl1_strsmsx(), bl1_ztrsmsx(), FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Param_map_flame_to_blis_diag(), FLA_Param_map_flame_to_blis_side(), FLA_Param_map_flame_to_blis_trans(), FLA_Param_map_flame_to_blis_uplo(), and FLA_Trsmsx_check().
Referenced by FLA_Trsmsx().
{ FLA_Datatype datatype; int m_B, n_B; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; side1_t blis_side; uplo1_t blis_uplo; trans1_t blis_trans; diag1_t blis_diag; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trsmsx_check( side, uplo, trans, diag, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); FLA_Param_map_flame_to_blis_side( side, &blis_side ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); FLA_Param_map_flame_to_blis_diag( diag, &blis_diag ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_strsmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dtrsmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bl1_ctrsmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bl1_ztrsmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }