libflame
12600
|
Functions | |
void | blas_set_parameter (void) |
fla_blocksize_t * | FLA_Blocksize_create (dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z) |
void | FLA_Blocksize_set (fla_blocksize_t *bp, dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z) |
void | FLA_Blocksize_scale (fla_blocksize_t *bp, double factor) |
fla_blocksize_t * | FLA_Blocksize_create_copy (fla_blocksize_t *bp) |
void | FLA_Blocksize_free (fla_blocksize_t *bp) |
dim_t | FLA_Blocksize_extract (FLA_Datatype dt, fla_blocksize_t *bp) |
fla_blocksize_t * | FLA_Query_blocksizes (FLA_Dimension dim) |
dim_t | FLA_Query_blocksize (FLA_Datatype dt, FLA_Dimension dim) |
dim_t | FLA_Determine_blocksize (FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *bp) |
dim_t | FLA_determine_matrix_size (FLA_Obj A_unproc, FLA_Quadrant to_dir) |
Variables | |
long | sgemm_p |
long | sgemm_q |
long | sgemm_r |
long | dgemm_p |
long | dgemm_q |
long | dgemm_r |
long | cgemm_p |
long | cgemm_q |
long | cgemm_r |
long | zgemm_p |
long | zgemm_q |
long | zgemm_r |
long | fla_goto_gemm_blocksize [4][4] |
void blas_set_parameter | ( | void | ) |
fla_blocksize_t* FLA_Blocksize_create | ( | dim_t | b_s, |
dim_t | b_d, | ||
dim_t | b_c, | ||
dim_t | b_z | ||
) |
References FLA_Blocksize_s::c, FLA_Blocksize_s::d, FLA_malloc(), FLA_Blocksize_s::s, and FLA_Blocksize_s::z.
Referenced by FLA_Query_blocksizes(), FLASH_Apply_CAQ2_UT_cntl_init(), FLASH_Apply_CAQ_UT_inc_cntl_init(), FLASH_Apply_pivots_cntl_init(), FLASH_Apply_Q2_UT_cntl_init(), FLASH_Apply_Q_UT_cntl_init(), FLASH_Apply_Q_UT_inc_cntl_init(), FLASH_Apply_QUD_UT_cntl_init(), FLASH_Apply_QUD_UT_inc_cntl_init(), FLASH_Axpy_cntl_init(), FLASH_Axpyt_cntl_init(), FLASH_CAQR2_UT_cntl_init(), FLASH_CAQR_UT_inc_cntl_init(), FLASH_Chol_cntl_init(), FLASH_Copy_cntl_init(), FLASH_Copyr_cntl_init(), FLASH_Copyt_cntl_init(), FLASH_Eig_gest_cntl_init(), FLASH_Gemm_cntl_init(), FLASH_Gemv_cntl_init(), FLASH_Hemm_cntl_init(), FLASH_Her2k_cntl_init(), FLASH_Herk_cntl_init(), FLASH_LQ_UT_cntl_init(), FLASH_LU_incpiv_cntl_init(), FLASH_LU_nopiv_cntl_init(), FLASH_LU_piv_cntl_init(), FLASH_Lyap_cntl_init(), FLASH_QR2_UT_cntl_init(), FLASH_QR_UT_cntl_init(), FLASH_QR_UT_inc_cntl_init(), FLASH_Scal_cntl_init(), FLASH_Scalr_cntl_init(), FLASH_Sylv_cntl_init(), FLASH_Symm_cntl_init(), FLASH_Syr2k_cntl_init(), FLASH_Syrk_cntl_init(), FLASH_Trinv_cntl_init(), FLASH_Trmm_cntl_init(), FLASH_Trsm_cntl_init(), FLASH_Trsv_cntl_init(), FLASH_Ttmm_cntl_init(), FLASH_UDdate_UT_cntl_init(), and FLASH_UDdate_UT_inc_cntl_init().
{ fla_blocksize_t* bp; // Allocate memory for the blocksize structure. bp = ( fla_blocksize_t* ) FLA_malloc( sizeof(fla_blocksize_t) ); // Assign the provided blocksize values into the corresponding fields. bp->s = b_s; bp->d = b_d; bp->c = b_c; bp->z = b_z; // Return a pointer to the structure. return bp; }
References FLA_Blocksize_s::c, FLA_Blocksize_s::d, FLA_Check_error_level(), FLA_Check_null_pointer(), FLA_malloc(), FLA_Blocksize_s::s, and FLA_Blocksize_s::z.
Referenced by FLA_Chol_cntl_init(), FLA_LU_nopiv_cntl_init(), and FLA_LU_piv_cntl_init().
{ fla_blocksize_t* bp_copy; FLA_Error e_val; // Verify that the given blocksize pointer is valid. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) { e_val = FLA_Check_null_pointer( bp ); FLA_Check_error_code( e_val ); } // Allocate memory for the blocksize structure. bp_copy = ( fla_blocksize_t* ) FLA_malloc( sizeof(fla_blocksize_t) ); // Assign the provided blocksize object's values into the corresponding // fields of the new object. bp_copy->s = bp->s; bp_copy->d = bp->d; bp_copy->c = bp->c; bp_copy->z = bp->z; // Return a pointer to the structure. return bp_copy; }
dim_t FLA_Blocksize_extract | ( | FLA_Datatype | dt, |
fla_blocksize_t * | bp | ||
) |
References FLA_Blocksize_s::c, FLA_Blocksize_s::d, FLA_Check_error_level(), FLA_Check_null_pointer(), FLA_Blocksize_s::s, and FLA_Blocksize_s::z.
Referenced by FLA_Check_blocksize_object(), FLA_Determine_blocksize(), FLA_Trinv(), and FLA_Ttmm().
{ dim_t b = 0; FLA_Error e_val; // Verify that the given blocksize pointer is valid. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) { e_val = FLA_Check_null_pointer( bp ); FLA_Check_error_code( e_val ); } if ( dt == FLA_FLOAT ) b = bp->s; else if ( dt == FLA_DOUBLE ) b = bp->d; else if ( dt == FLA_COMPLEX ) b = bp->c; else if ( dt == FLA_DOUBLE_COMPLEX ) b = bp->z; // Return the blocksize corresponding with the datatype. return b; }
void FLA_Blocksize_free | ( | fla_blocksize_t * | bp | ) |
References FLA_free().
Referenced by FLA_Apply_CAQ2_UT_cntl_finalize(), FLA_Apply_Q2_UT_cntl_finalize(), FLA_Apply_Q_UT_cntl_finalize(), FLA_Apply_QUD_UT_cntl_finalize(), FLA_Bidiag_UT_cntl_finalize(), FLA_CAQR2_UT_cntl_finalize(), FLA_Chol_cntl_finalize(), FLA_Eig_gest_cntl_finalize(), FLA_Gemm_cntl_finalize(), FLA_Hemm_cntl_finalize(), FLA_Her2k_cntl_finalize(), FLA_Herk_cntl_finalize(), FLA_Hess_UT_cntl_finalize(), FLA_LQ_UT_cntl_finalize(), FLA_LU_nopiv_cntl_finalize(), FLA_LU_piv_cntl_finalize(), FLA_Lyap_cntl_finalize(), FLA_QR2_UT_cntl_finalize(), FLA_QR_UT_cntl_finalize(), FLA_SPDinv_cntl_finalize(), FLA_Sylv_cntl_finalize(), FLA_Symm_cntl_finalize(), FLA_Syr2k_cntl_finalize(), FLA_Syrk_cntl_finalize(), FLA_Transpose_cntl_finalize(), FLA_Tridiag_UT_cntl_finalize(), FLA_Trinv_cntl_finalize(), FLA_Trmm_cntl_finalize(), FLA_Trsm_cntl_finalize(), FLA_Ttmm_cntl_finalize(), FLA_UDdate_UT_cntl_finalize(), FLASH_Apply_CAQ2_UT_cntl_finalize(), FLASH_Apply_CAQ_UT_inc_cntl_finalize(), FLASH_Apply_pivots_cntl_finalize(), FLASH_Apply_Q2_UT_cntl_finalize(), FLASH_Apply_Q_UT_cntl_finalize(), FLASH_Apply_Q_UT_inc_cntl_finalize(), FLASH_Apply_QUD_UT_cntl_finalize(), FLASH_Apply_QUD_UT_inc_cntl_finalize(), FLASH_Axpy_cntl_finalize(), FLASH_Axpyt_cntl_finalize(), FLASH_CAQR2_UT_cntl_finalize(), FLASH_CAQR_UT_inc_cntl_finalize(), FLASH_Chol_cntl_finalize(), FLASH_Copy_cntl_finalize(), FLASH_Copyr_cntl_finalize(), FLASH_Copyt_cntl_finalize(), FLASH_Eig_gest_cntl_finalize(), FLASH_Gemm_cntl_finalize(), FLASH_Gemv_cntl_finalize(), FLASH_Hemm_cntl_finalize(), FLASH_Her2k_cntl_finalize(), FLASH_Herk_cntl_finalize(), FLASH_LQ_UT_cntl_finalize(), FLASH_LU_incpiv_cntl_finalize(), FLASH_LU_nopiv_cntl_finalize(), FLASH_LU_piv_cntl_finalize(), FLASH_Lyap_cntl_finalize(), FLASH_QR2_UT_cntl_finalize(), FLASH_QR_UT_cntl_finalize(), FLASH_QR_UT_inc_cntl_finalize(), FLASH_Scal_cntl_finalize(), FLASH_Scalr_cntl_finalize(), FLASH_SPDinv_cntl_finalize(), FLASH_Sylv_cntl_finalize(), FLASH_Symm_cntl_finalize(), FLASH_Syr2k_cntl_finalize(), FLASH_Syrk_cntl_finalize(), FLASH_Trinv_cntl_finalize(), FLASH_Trmm_cntl_finalize(), FLASH_Trsm_cntl_finalize(), FLASH_Trsv_cntl_finalize(), FLASH_Ttmm_cntl_finalize(), FLASH_UDdate_UT_cntl_finalize(), and FLASH_UDdate_UT_inc_cntl_finalize().
{ FLA_free( bp ); }
void FLA_Blocksize_scale | ( | fla_blocksize_t * | bp, |
double | factor | ||
) |
References FLA_Blocksize_s::c, FLA_Blocksize_s::d, FLA_Check_error_level(), FLA_Check_null_pointer(), FLA_Blocksize_s::s, and FLA_Blocksize_s::z.
Referenced by FLA_Apply_CAQ2_UT_cntl_init(), FLA_Apply_Q2_UT_cntl_init(), FLA_Apply_Q_UT_cntl_init(), FLA_Apply_QUD_UT_cntl_init(), FLA_Bidiag_UT_cntl_init(), FLA_CAQR2_UT_cntl_init(), FLA_Chol_cntl_init(), FLA_Hess_UT_cntl_init(), FLA_LQ_UT_cntl_init(), FLA_LU_nopiv_cntl_init(), FLA_LU_piv_cntl_init(), FLA_QR2_UT_cntl_init(), FLA_QR_UT_cntl_init(), FLA_Tridiag_UT_cntl_init(), and FLA_UDdate_UT_cntl_init().
{ FLA_Error e_val; // Verify that the given blocksize pointer is valid. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) { e_val = FLA_Check_null_pointer( bp ); FLA_Check_error_code( e_val ); } // Assign the provided blocksize values into the corresponding fields. bp->s = ( dim_t )( ( double ) bp->s * factor ); bp->d = ( dim_t )( ( double ) bp->d * factor ); bp->c = ( dim_t )( ( double ) bp->c * factor ); bp->z = ( dim_t )( ( double ) bp->z * factor ); }
void FLA_Blocksize_set | ( | fla_blocksize_t * | bp, |
dim_t | b_s, | ||
dim_t | b_d, | ||
dim_t | b_c, | ||
dim_t | b_z | ||
) |
References FLA_Blocksize_s::c, FLA_Blocksize_s::d, FLA_Blocksize_s::s, and FLA_Blocksize_s::z.
Referenced by FLASH_Apply_Q_UT().
dim_t FLA_Determine_blocksize | ( | FLA_Obj | A_unproc, |
FLA_Quadrant | to_dir, | ||
fla_blocksize_t * | bp | ||
) |
References FLA_Blocksize_extract(), FLA_Check_blocksize_value(), FLA_Check_error_level(), FLA_determine_matrix_size(), and FLA_Obj_datatype().
Referenced by FLA_Apply_CAQ2_UT_lhfc_blk_var2(), FLA_Apply_CAQ2_UT_lhfc_blk_var3(), FLA_Apply_CAQ_UT_inc_lhfc_blk_var1(), FLA_Apply_pivots_ln_blk_var1(), FLA_Apply_pivots_ln_blk_var2(), FLA_Apply_Q2_UT_lhfc_blk_var2(), FLA_Apply_Q2_UT_lhfc_blk_var3(), FLA_Apply_Q2_UT_lnfc_blk_var2(), FLA_Apply_Q2_UT_lnfc_blk_var3(), FLA_Apply_Q_UT_inc_lhfc_blk_var1(), FLA_Apply_Q_UT_inc_lnfc_blk_var1(), FLA_Apply_Q_UT_lhbc_blk_var2(), FLA_Apply_Q_UT_lhbc_blk_var3(), FLA_Apply_Q_UT_lhbr_blk_var2(), FLA_Apply_Q_UT_lhbr_blk_var3(), FLA_Apply_Q_UT_lhfc_blk_var2(), FLA_Apply_Q_UT_lhfc_blk_var3(), FLA_Apply_Q_UT_lhfr_blk_var2(), FLA_Apply_Q_UT_lhfr_blk_var3(), FLA_Apply_Q_UT_lnbc_blk_var2(), FLA_Apply_Q_UT_lnbc_blk_var3(), FLA_Apply_Q_UT_lnbr_blk_var2(), FLA_Apply_Q_UT_lnbr_blk_var3(), FLA_Apply_Q_UT_lnfc_blk_var2(), FLA_Apply_Q_UT_lnfc_blk_var3(), FLA_Apply_Q_UT_lnfr_blk_var2(), FLA_Apply_Q_UT_lnfr_blk_var3(), FLA_Apply_Q_UT_rhbc_blk_var2(), FLA_Apply_Q_UT_rhbc_blk_var3(), FLA_Apply_Q_UT_rhbr_blk_var2(), FLA_Apply_Q_UT_rhbr_blk_var3(), FLA_Apply_Q_UT_rhfc_blk_var2(), FLA_Apply_Q_UT_rhfc_blk_var3(), FLA_Apply_Q_UT_rhfr_blk_var2(), FLA_Apply_Q_UT_rhfr_blk_var3(), FLA_Apply_Q_UT_rnbc_blk_var2(), FLA_Apply_Q_UT_rnbc_blk_var3(), FLA_Apply_Q_UT_rnbr_blk_var2(), FLA_Apply_Q_UT_rnbr_blk_var3(), FLA_Apply_Q_UT_rnfc_blk_var2(), FLA_Apply_Q_UT_rnfc_blk_var3(), FLA_Apply_Q_UT_rnfr_blk_var2(), FLA_Apply_Q_UT_rnfr_blk_var3(), FLA_Apply_QUD_UT_inc_lhfc_blk_var1(), FLA_Apply_QUD_UT_lhfc_blk_var2(), FLA_Apply_QUD_UT_lhfc_blk_var3(), FLA_Axpy_blk_var1(), FLA_Axpy_blk_var2(), FLA_Axpy_blk_var3(), FLA_Axpy_blk_var4(), FLA_Axpyt_c_blk_var1(), FLA_Axpyt_c_blk_var2(), FLA_Axpyt_c_blk_var3(), FLA_Axpyt_c_blk_var4(), FLA_Axpyt_h_blk_var1(), FLA_Axpyt_h_blk_var2(), FLA_Axpyt_h_blk_var3(), FLA_Axpyt_h_blk_var4(), FLA_Axpyt_n_blk_var1(), FLA_Axpyt_n_blk_var2(), FLA_Axpyt_n_blk_var3(), FLA_Axpyt_n_blk_var4(), FLA_Axpyt_t_blk_var1(), FLA_Axpyt_t_blk_var2(), FLA_Axpyt_t_blk_var3(), FLA_Axpyt_t_blk_var4(), FLA_CAQR2_UT_blk_var2(), FLA_CAQR_UT_inc_blk_var1(), FLA_Chol_l_blk_var1(), FLA_Chol_l_blk_var2(), FLA_Chol_l_blk_var3(), FLA_Chol_u_blk_var1(), FLA_Chol_u_blk_var2(), FLA_Chol_u_blk_var3(), FLA_Copy_blk_var1(), FLA_Copy_blk_var2(), FLA_Copy_blk_var3(), FLA_Copy_blk_var4(), FLA_Copyr_l_blk_var1(), FLA_Copyr_l_blk_var2(), FLA_Copyr_l_blk_var3(), FLA_Copyr_l_blk_var4(), FLA_Copyr_u_blk_var1(), FLA_Copyr_u_blk_var2(), FLA_Copyr_u_blk_var3(), FLA_Copyr_u_blk_var4(), FLA_Copyt_c_blk_var1(), FLA_Copyt_c_blk_var2(), FLA_Copyt_c_blk_var3(), FLA_Copyt_c_blk_var4(), FLA_Copyt_h_blk_var1(), FLA_Copyt_h_blk_var2(), FLA_Copyt_h_blk_var3(), FLA_Copyt_h_blk_var4(), FLA_Copyt_n_blk_var1(), FLA_Copyt_n_blk_var2(), FLA_Copyt_n_blk_var3(), FLA_Copyt_n_blk_var4(), FLA_Copyt_t_blk_var1(), FLA_Copyt_t_blk_var2(), FLA_Copyt_t_blk_var3(), FLA_Copyt_t_blk_var4(), FLA_Eig_gest_il_blk_var1(), FLA_Eig_gest_il_blk_var2(), FLA_Eig_gest_il_blk_var3(), FLA_Eig_gest_il_blk_var4(), FLA_Eig_gest_il_blk_var5(), FLA_Eig_gest_iu_blk_var1(), FLA_Eig_gest_iu_blk_var2(), FLA_Eig_gest_iu_blk_var3(), FLA_Eig_gest_iu_blk_var4(), FLA_Eig_gest_iu_blk_var5(), FLA_Eig_gest_nl_blk_var1(), FLA_Eig_gest_nl_blk_var2(), FLA_Eig_gest_nl_blk_var4(), FLA_Eig_gest_nl_blk_var5(), FLA_Eig_gest_nu_blk_var1(), FLA_Eig_gest_nu_blk_var2(), FLA_Eig_gest_nu_blk_var4(), FLA_Eig_gest_nu_blk_var5(), FLA_Gemm_cc_blk_var1(), FLA_Gemm_cc_blk_var2(), FLA_Gemm_cc_blk_var3(), FLA_Gemm_cc_blk_var4(), FLA_Gemm_cc_blk_var5(), FLA_Gemm_cc_blk_var6(), FLA_Gemm_ch_blk_var1(), FLA_Gemm_ch_blk_var2(), FLA_Gemm_ch_blk_var3(), FLA_Gemm_ch_blk_var4(), FLA_Gemm_ch_blk_var5(), FLA_Gemm_ch_blk_var6(), FLA_Gemm_cn_blk_var1(), FLA_Gemm_cn_blk_var2(), FLA_Gemm_cn_blk_var3(), FLA_Gemm_cn_blk_var4(), FLA_Gemm_cn_blk_var5(), FLA_Gemm_cn_blk_var6(), FLA_Gemm_ct_blk_var1(), FLA_Gemm_ct_blk_var2(), FLA_Gemm_ct_blk_var3(), FLA_Gemm_ct_blk_var4(), FLA_Gemm_ct_blk_var5(), FLA_Gemm_ct_blk_var6(), FLA_Gemm_hc_blk_var1(), FLA_Gemm_hc_blk_var2(), FLA_Gemm_hc_blk_var3(), FLA_Gemm_hc_blk_var4(), FLA_Gemm_hc_blk_var5(), FLA_Gemm_hc_blk_var6(), FLA_Gemm_hh_blk_var1(), FLA_Gemm_hh_blk_var2(), FLA_Gemm_hh_blk_var3(), FLA_Gemm_hh_blk_var4(), FLA_Gemm_hh_blk_var5(), FLA_Gemm_hh_blk_var6(), FLA_Gemm_hn_blk_var1(), FLA_Gemm_hn_blk_var2(), FLA_Gemm_hn_blk_var3(), FLA_Gemm_hn_blk_var4(), FLA_Gemm_hn_blk_var5(), FLA_Gemm_hn_blk_var6(), FLA_Gemm_ht_blk_var1(), FLA_Gemm_ht_blk_var2(), FLA_Gemm_ht_blk_var3(), FLA_Gemm_ht_blk_var4(), FLA_Gemm_ht_blk_var5(), FLA_Gemm_ht_blk_var6(), FLA_Gemm_nc_blk_var1(), FLA_Gemm_nc_blk_var2(), FLA_Gemm_nc_blk_var3(), FLA_Gemm_nc_blk_var4(), FLA_Gemm_nc_blk_var5(), FLA_Gemm_nc_blk_var6(), FLA_Gemm_nh_blk_var1(), FLA_Gemm_nh_blk_var2(), FLA_Gemm_nh_blk_var3(), FLA_Gemm_nh_blk_var4(), FLA_Gemm_nh_blk_var5(), FLA_Gemm_nh_blk_var6(), FLA_Gemm_nn_blk_var1(), FLA_Gemm_nn_blk_var2(), FLA_Gemm_nn_blk_var3(), FLA_Gemm_nn_blk_var4(), FLA_Gemm_nn_blk_var5(), FLA_Gemm_nn_blk_var6(), FLA_Gemm_nt_blk_var1(), FLA_Gemm_nt_blk_var2(), FLA_Gemm_nt_blk_var3(), FLA_Gemm_nt_blk_var4(), FLA_Gemm_nt_blk_var5(), FLA_Gemm_nt_blk_var6(), FLA_Gemm_tc_blk_var1(), FLA_Gemm_tc_blk_var2(), FLA_Gemm_tc_blk_var3(), FLA_Gemm_tc_blk_var4(), FLA_Gemm_tc_blk_var5(), FLA_Gemm_tc_blk_var6(), FLA_Gemm_th_blk_var1(), FLA_Gemm_th_blk_var2(), FLA_Gemm_th_blk_var3(), FLA_Gemm_th_blk_var4(), FLA_Gemm_th_blk_var5(), FLA_Gemm_th_blk_var6(), FLA_Gemm_tn_blk_var1(), FLA_Gemm_tn_blk_var2(), FLA_Gemm_tn_blk_var3(), FLA_Gemm_tn_blk_var4(), FLA_Gemm_tn_blk_var5(), FLA_Gemm_tn_blk_var6(), FLA_Gemm_tt_blk_var1(), FLA_Gemm_tt_blk_var2(), FLA_Gemm_tt_blk_var3(), FLA_Gemm_tt_blk_var4(), FLA_Gemm_tt_blk_var5(), FLA_Gemm_tt_blk_var6(), FLA_Gemv_h_blk_var1(), FLA_Gemv_h_blk_var2(), FLA_Gemv_h_blk_var5(), FLA_Gemv_h_blk_var6(), FLA_Gemv_n_blk_var1(), FLA_Gemv_n_blk_var2(), FLA_Gemv_n_blk_var5(), FLA_Gemv_n_blk_var6(), FLA_Gemv_t_blk_var1(), FLA_Gemv_t_blk_var2(), FLA_Gemv_t_blk_var5(), FLA_Gemv_t_blk_var6(), FLA_Hemm_ll_blk_var1(), FLA_Hemm_ll_blk_var10(), FLA_Hemm_ll_blk_var2(), FLA_Hemm_ll_blk_var3(), FLA_Hemm_ll_blk_var4(), FLA_Hemm_ll_blk_var5(), FLA_Hemm_ll_blk_var6(), FLA_Hemm_ll_blk_var7(), FLA_Hemm_ll_blk_var8(), FLA_Hemm_ll_blk_var9(), FLA_Hemm_lu_blk_var1(), FLA_Hemm_lu_blk_var10(), FLA_Hemm_lu_blk_var2(), FLA_Hemm_lu_blk_var3(), FLA_Hemm_lu_blk_var4(), FLA_Hemm_lu_blk_var5(), FLA_Hemm_lu_blk_var6(), FLA_Hemm_lu_blk_var7(), FLA_Hemm_lu_blk_var8(), FLA_Hemm_lu_blk_var9(), FLA_Hemm_rl_blk_var1(), FLA_Hemm_rl_blk_var10(), FLA_Hemm_rl_blk_var2(), FLA_Hemm_rl_blk_var3(), FLA_Hemm_rl_blk_var4(), FLA_Hemm_rl_blk_var5(), FLA_Hemm_rl_blk_var6(), FLA_Hemm_rl_blk_var7(), FLA_Hemm_rl_blk_var8(), FLA_Hemm_rl_blk_var9(), FLA_Hemm_ru_blk_var1(), FLA_Hemm_ru_blk_var10(), FLA_Hemm_ru_blk_var2(), FLA_Hemm_ru_blk_var3(), FLA_Hemm_ru_blk_var4(), FLA_Hemm_ru_blk_var5(), FLA_Hemm_ru_blk_var6(), FLA_Hemm_ru_blk_var7(), FLA_Hemm_ru_blk_var8(), FLA_Hemm_ru_blk_var9(), FLA_Her2k_lh_blk_var1(), FLA_Her2k_lh_blk_var10(), FLA_Her2k_lh_blk_var2(), FLA_Her2k_lh_blk_var3(), FLA_Her2k_lh_blk_var4(), FLA_Her2k_lh_blk_var5(), FLA_Her2k_lh_blk_var6(), FLA_Her2k_lh_blk_var7(), FLA_Her2k_lh_blk_var8(), FLA_Her2k_lh_blk_var9(), FLA_Her2k_ln_blk_var1(), FLA_Her2k_ln_blk_var10(), FLA_Her2k_ln_blk_var2(), FLA_Her2k_ln_blk_var3(), FLA_Her2k_ln_blk_var4(), FLA_Her2k_ln_blk_var5(), FLA_Her2k_ln_blk_var6(), FLA_Her2k_ln_blk_var7(), FLA_Her2k_ln_blk_var8(), FLA_Her2k_ln_blk_var9(), FLA_Her2k_uh_blk_var1(), FLA_Her2k_uh_blk_var10(), FLA_Her2k_uh_blk_var2(), FLA_Her2k_uh_blk_var3(), FLA_Her2k_uh_blk_var4(), FLA_Her2k_uh_blk_var5(), FLA_Her2k_uh_blk_var6(), FLA_Her2k_uh_blk_var7(), FLA_Her2k_uh_blk_var8(), FLA_Her2k_uh_blk_var9(), FLA_Her2k_un_blk_var1(), FLA_Her2k_un_blk_var10(), FLA_Her2k_un_blk_var2(), FLA_Her2k_un_blk_var3(), FLA_Her2k_un_blk_var4(), FLA_Her2k_un_blk_var5(), FLA_Her2k_un_blk_var6(), FLA_Her2k_un_blk_var7(), FLA_Her2k_un_blk_var8(), FLA_Her2k_un_blk_var9(), FLA_Herk_lh_blk_var1(), FLA_Herk_lh_blk_var2(), FLA_Herk_lh_blk_var3(), FLA_Herk_lh_blk_var4(), FLA_Herk_lh_blk_var5(), FLA_Herk_lh_blk_var6(), FLA_Herk_ln_blk_var1(), FLA_Herk_ln_blk_var2(), FLA_Herk_ln_blk_var3(), FLA_Herk_ln_blk_var4(), FLA_Herk_ln_blk_var5(), FLA_Herk_ln_blk_var6(), FLA_Herk_uh_blk_var1(), FLA_Herk_uh_blk_var2(), FLA_Herk_uh_blk_var3(), FLA_Herk_uh_blk_var4(), FLA_Herk_uh_blk_var5(), FLA_Herk_uh_blk_var6(), FLA_Herk_un_blk_var1(), FLA_Herk_un_blk_var2(), FLA_Herk_un_blk_var3(), FLA_Herk_un_blk_var4(), FLA_Herk_un_blk_var5(), FLA_Herk_un_blk_var6(), FLA_LQ_UT_blk_var2(), FLA_LQ_UT_blk_var3(), FLA_LU_nopiv_blk_var1(), FLA_LU_nopiv_blk_var2(), FLA_LU_nopiv_blk_var3(), FLA_LU_nopiv_blk_var4(), FLA_LU_nopiv_blk_var5(), FLA_LU_piv_blk_var3(), FLA_LU_piv_blk_var4(), FLA_LU_piv_blk_var5(), FLA_Lyap_h_blk_var1(), FLA_Lyap_h_blk_var2(), FLA_Lyap_h_blk_var3(), FLA_Lyap_h_blk_var4(), FLA_Lyap_n_blk_var1(), FLA_Lyap_n_blk_var2(), FLA_Lyap_n_blk_var3(), FLA_Lyap_n_blk_var4(), FLA_QR2_UT_blk_var2(), FLA_QR_UT_blk_var2(), FLA_QR_UT_blk_var3(), FLA_QR_UT_inc_blk_var1(), FLA_QR_UT_inc_blk_var2(), FLA_Scal_blk_var1(), FLA_Scal_blk_var2(), FLA_Scal_blk_var3(), FLA_Scal_blk_var4(), FLA_Scalr_l_blk_var1(), FLA_Scalr_l_blk_var2(), FLA_Scalr_l_blk_var3(), FLA_Scalr_l_blk_var4(), FLA_Scalr_u_blk_var1(), FLA_Scalr_u_blk_var2(), FLA_Scalr_u_blk_var3(), FLA_Scalr_u_blk_var4(), FLA_Swap_t_blk_var1(), FLA_Swap_t_blk_var2(), FLA_Sylv_hh_blk_var1(), FLA_Sylv_hh_blk_var10(), FLA_Sylv_hh_blk_var11(), FLA_Sylv_hh_blk_var12(), FLA_Sylv_hh_blk_var13(), FLA_Sylv_hh_blk_var14(), FLA_Sylv_hh_blk_var15(), FLA_Sylv_hh_blk_var16(), FLA_Sylv_hh_blk_var17(), FLA_Sylv_hh_blk_var18(), FLA_Sylv_hh_blk_var2(), FLA_Sylv_hh_blk_var3(), FLA_Sylv_hh_blk_var4(), FLA_Sylv_hh_blk_var5(), FLA_Sylv_hh_blk_var6(), FLA_Sylv_hh_blk_var7(), FLA_Sylv_hh_blk_var8(), FLA_Sylv_hh_blk_var9(), FLA_Sylv_hn_blk_var1(), FLA_Sylv_hn_blk_var10(), FLA_Sylv_hn_blk_var11(), FLA_Sylv_hn_blk_var12(), FLA_Sylv_hn_blk_var13(), FLA_Sylv_hn_blk_var14(), FLA_Sylv_hn_blk_var15(), FLA_Sylv_hn_blk_var16(), FLA_Sylv_hn_blk_var17(), FLA_Sylv_hn_blk_var18(), FLA_Sylv_hn_blk_var2(), FLA_Sylv_hn_blk_var3(), FLA_Sylv_hn_blk_var4(), FLA_Sylv_hn_blk_var5(), FLA_Sylv_hn_blk_var6(), FLA_Sylv_hn_blk_var7(), FLA_Sylv_hn_blk_var8(), FLA_Sylv_hn_blk_var9(), FLA_Sylv_nh_blk_var1(), FLA_Sylv_nh_blk_var10(), FLA_Sylv_nh_blk_var11(), FLA_Sylv_nh_blk_var12(), FLA_Sylv_nh_blk_var13(), FLA_Sylv_nh_blk_var14(), FLA_Sylv_nh_blk_var15(), FLA_Sylv_nh_blk_var16(), FLA_Sylv_nh_blk_var17(), FLA_Sylv_nh_blk_var18(), FLA_Sylv_nh_blk_var2(), FLA_Sylv_nh_blk_var3(), FLA_Sylv_nh_blk_var4(), FLA_Sylv_nh_blk_var5(), FLA_Sylv_nh_blk_var6(), FLA_Sylv_nh_blk_var7(), FLA_Sylv_nh_blk_var8(), FLA_Sylv_nh_blk_var9(), FLA_Sylv_nn_blk_var1(), FLA_Sylv_nn_blk_var10(), FLA_Sylv_nn_blk_var11(), FLA_Sylv_nn_blk_var12(), FLA_Sylv_nn_blk_var13(), FLA_Sylv_nn_blk_var14(), FLA_Sylv_nn_blk_var15(), FLA_Sylv_nn_blk_var16(), FLA_Sylv_nn_blk_var17(), FLA_Sylv_nn_blk_var18(), FLA_Sylv_nn_blk_var2(), FLA_Sylv_nn_blk_var3(), FLA_Sylv_nn_blk_var4(), FLA_Sylv_nn_blk_var5(), FLA_Sylv_nn_blk_var6(), FLA_Sylv_nn_blk_var7(), FLA_Sylv_nn_blk_var8(), FLA_Sylv_nn_blk_var9(), FLA_Symm_ll_blk_var1(), FLA_Symm_ll_blk_var10(), FLA_Symm_ll_blk_var2(), FLA_Symm_ll_blk_var3(), FLA_Symm_ll_blk_var4(), FLA_Symm_ll_blk_var5(), FLA_Symm_ll_blk_var6(), FLA_Symm_ll_blk_var7(), FLA_Symm_ll_blk_var8(), FLA_Symm_ll_blk_var9(), FLA_Symm_lu_blk_var1(), FLA_Symm_lu_blk_var10(), FLA_Symm_lu_blk_var2(), FLA_Symm_lu_blk_var3(), FLA_Symm_lu_blk_var4(), FLA_Symm_lu_blk_var5(), FLA_Symm_lu_blk_var6(), FLA_Symm_lu_blk_var7(), FLA_Symm_lu_blk_var8(), FLA_Symm_lu_blk_var9(), FLA_Symm_rl_blk_var1(), FLA_Symm_rl_blk_var10(), FLA_Symm_rl_blk_var2(), FLA_Symm_rl_blk_var3(), FLA_Symm_rl_blk_var4(), FLA_Symm_rl_blk_var5(), FLA_Symm_rl_blk_var6(), FLA_Symm_rl_blk_var7(), FLA_Symm_rl_blk_var8(), FLA_Symm_rl_blk_var9(), FLA_Symm_ru_blk_var1(), FLA_Symm_ru_blk_var10(), FLA_Symm_ru_blk_var2(), FLA_Symm_ru_blk_var3(), FLA_Symm_ru_blk_var4(), FLA_Symm_ru_blk_var5(), FLA_Symm_ru_blk_var6(), FLA_Symm_ru_blk_var7(), FLA_Symm_ru_blk_var8(), FLA_Symm_ru_blk_var9(), FLA_Syr2k_ln_blk_var1(), FLA_Syr2k_ln_blk_var10(), FLA_Syr2k_ln_blk_var2(), FLA_Syr2k_ln_blk_var3(), FLA_Syr2k_ln_blk_var4(), FLA_Syr2k_ln_blk_var5(), FLA_Syr2k_ln_blk_var6(), FLA_Syr2k_ln_blk_var7(), FLA_Syr2k_ln_blk_var8(), FLA_Syr2k_ln_blk_var9(), FLA_Syr2k_lt_blk_var1(), FLA_Syr2k_lt_blk_var10(), FLA_Syr2k_lt_blk_var2(), FLA_Syr2k_lt_blk_var3(), FLA_Syr2k_lt_blk_var4(), FLA_Syr2k_lt_blk_var5(), FLA_Syr2k_lt_blk_var6(), FLA_Syr2k_lt_blk_var7(), FLA_Syr2k_lt_blk_var8(), FLA_Syr2k_lt_blk_var9(), FLA_Syr2k_un_blk_var1(), FLA_Syr2k_un_blk_var10(), FLA_Syr2k_un_blk_var2(), FLA_Syr2k_un_blk_var3(), FLA_Syr2k_un_blk_var4(), FLA_Syr2k_un_blk_var5(), FLA_Syr2k_un_blk_var6(), FLA_Syr2k_un_blk_var7(), FLA_Syr2k_un_blk_var8(), FLA_Syr2k_un_blk_var9(), FLA_Syr2k_ut_blk_var1(), FLA_Syr2k_ut_blk_var10(), FLA_Syr2k_ut_blk_var2(), FLA_Syr2k_ut_blk_var3(), FLA_Syr2k_ut_blk_var4(), FLA_Syr2k_ut_blk_var5(), FLA_Syr2k_ut_blk_var6(), FLA_Syr2k_ut_blk_var7(), FLA_Syr2k_ut_blk_var8(), FLA_Syr2k_ut_blk_var9(), FLA_Syrk_ln_blk_var1(), FLA_Syrk_ln_blk_var2(), FLA_Syrk_ln_blk_var3(), FLA_Syrk_ln_blk_var4(), FLA_Syrk_ln_blk_var5(), FLA_Syrk_ln_blk_var6(), FLA_Syrk_lt_blk_var1(), FLA_Syrk_lt_blk_var2(), FLA_Syrk_lt_blk_var3(), FLA_Syrk_lt_blk_var4(), FLA_Syrk_lt_blk_var5(), FLA_Syrk_lt_blk_var6(), FLA_Syrk_un_blk_var1(), FLA_Syrk_un_blk_var2(), FLA_Syrk_un_blk_var3(), FLA_Syrk_un_blk_var4(), FLA_Syrk_un_blk_var5(), FLA_Syrk_un_blk_var6(), FLA_Syrk_ut_blk_var1(), FLA_Syrk_ut_blk_var2(), FLA_Syrk_ut_blk_var3(), FLA_Syrk_ut_blk_var4(), FLA_Syrk_ut_blk_var5(), FLA_Syrk_ut_blk_var6(), FLA_Transpose_blk_var1(), FLA_Transpose_blk_var2(), FLA_Trinv_ln_blk_var1(), FLA_Trinv_ln_blk_var2(), FLA_Trinv_ln_blk_var3(), FLA_Trinv_ln_blk_var4(), FLA_Trinv_lu_blk_var1(), FLA_Trinv_lu_blk_var2(), FLA_Trinv_lu_blk_var3(), FLA_Trinv_lu_blk_var4(), FLA_Trinv_un_blk_var1(), FLA_Trinv_un_blk_var2(), FLA_Trinv_un_blk_var3(), FLA_Trinv_un_blk_var4(), FLA_Trinv_uu_blk_var1(), FLA_Trinv_uu_blk_var2(), FLA_Trinv_uu_blk_var3(), FLA_Trinv_uu_blk_var4(), FLA_Trmm_llc_blk_var1(), FLA_Trmm_llc_blk_var2(), FLA_Trmm_llc_blk_var3(), FLA_Trmm_llc_blk_var4(), FLA_Trmm_llh_blk_var1(), FLA_Trmm_llh_blk_var2(), FLA_Trmm_llh_blk_var3(), FLA_Trmm_llh_blk_var4(), FLA_Trmm_lln_blk_var1(), FLA_Trmm_lln_blk_var2(), FLA_Trmm_lln_blk_var3(), FLA_Trmm_lln_blk_var4(), FLA_Trmm_llt_blk_var1(), FLA_Trmm_llt_blk_var2(), FLA_Trmm_llt_blk_var3(), FLA_Trmm_llt_blk_var4(), FLA_Trmm_luc_blk_var1(), FLA_Trmm_luc_blk_var2(), FLA_Trmm_luc_blk_var3(), FLA_Trmm_luc_blk_var4(), FLA_Trmm_luh_blk_var1(), FLA_Trmm_luh_blk_var2(), FLA_Trmm_luh_blk_var3(), FLA_Trmm_luh_blk_var4(), FLA_Trmm_lun_blk_var1(), FLA_Trmm_lun_blk_var2(), FLA_Trmm_lun_blk_var3(), FLA_Trmm_lun_blk_var4(), FLA_Trmm_lut_blk_var1(), FLA_Trmm_lut_blk_var2(), FLA_Trmm_lut_blk_var3(), FLA_Trmm_lut_blk_var4(), FLA_Trmm_rlc_blk_var1(), FLA_Trmm_rlc_blk_var2(), FLA_Trmm_rlc_blk_var3(), FLA_Trmm_rlc_blk_var4(), FLA_Trmm_rlh_blk_var1(), FLA_Trmm_rlh_blk_var2(), FLA_Trmm_rlh_blk_var3(), FLA_Trmm_rlh_blk_var4(), FLA_Trmm_rln_blk_var1(), FLA_Trmm_rln_blk_var2(), FLA_Trmm_rln_blk_var3(), FLA_Trmm_rln_blk_var4(), FLA_Trmm_rlt_blk_var1(), FLA_Trmm_rlt_blk_var2(), FLA_Trmm_rlt_blk_var3(), FLA_Trmm_rlt_blk_var4(), FLA_Trmm_ruc_blk_var1(), FLA_Trmm_ruc_blk_var2(), FLA_Trmm_ruc_blk_var3(), FLA_Trmm_ruc_blk_var4(), FLA_Trmm_ruh_blk_var1(), FLA_Trmm_ruh_blk_var2(), FLA_Trmm_ruh_blk_var3(), FLA_Trmm_ruh_blk_var4(), FLA_Trmm_run_blk_var1(), FLA_Trmm_run_blk_var2(), FLA_Trmm_run_blk_var3(), FLA_Trmm_run_blk_var4(), FLA_Trmm_rut_blk_var1(), FLA_Trmm_rut_blk_var2(), FLA_Trmm_rut_blk_var3(), FLA_Trmm_rut_blk_var4(), FLA_Trsm_llc_blk_var1(), FLA_Trsm_llc_blk_var2(), FLA_Trsm_llc_blk_var3(), FLA_Trsm_llc_blk_var4(), FLA_Trsm_llh_blk_var1(), FLA_Trsm_llh_blk_var2(), FLA_Trsm_llh_blk_var3(), FLA_Trsm_llh_blk_var4(), FLA_Trsm_lln_blk_var1(), FLA_Trsm_lln_blk_var2(), FLA_Trsm_lln_blk_var3(), FLA_Trsm_lln_blk_var4(), FLA_Trsm_llt_blk_var1(), FLA_Trsm_llt_blk_var2(), FLA_Trsm_llt_blk_var3(), FLA_Trsm_llt_blk_var4(), FLA_Trsm_luc_blk_var1(), FLA_Trsm_luc_blk_var2(), FLA_Trsm_luc_blk_var3(), FLA_Trsm_luc_blk_var4(), FLA_Trsm_luh_blk_var1(), FLA_Trsm_luh_blk_var2(), FLA_Trsm_luh_blk_var3(), FLA_Trsm_luh_blk_var4(), FLA_Trsm_lun_blk_var1(), FLA_Trsm_lun_blk_var2(), FLA_Trsm_lun_blk_var3(), FLA_Trsm_lun_blk_var4(), FLA_Trsm_lut_blk_var1(), FLA_Trsm_lut_blk_var2(), FLA_Trsm_lut_blk_var3(), FLA_Trsm_lut_blk_var4(), FLA_Trsm_rlc_blk_var1(), FLA_Trsm_rlc_blk_var2(), FLA_Trsm_rlc_blk_var3(), FLA_Trsm_rlc_blk_var4(), FLA_Trsm_rlh_blk_var1(), FLA_Trsm_rlh_blk_var2(), FLA_Trsm_rlh_blk_var3(), FLA_Trsm_rlh_blk_var4(), FLA_Trsm_rln_blk_var1(), FLA_Trsm_rln_blk_var2(), FLA_Trsm_rln_blk_var3(), FLA_Trsm_rln_blk_var4(), FLA_Trsm_rlt_blk_var1(), FLA_Trsm_rlt_blk_var2(), FLA_Trsm_rlt_blk_var3(), FLA_Trsm_rlt_blk_var4(), FLA_Trsm_ruc_blk_var1(), FLA_Trsm_ruc_blk_var2(), FLA_Trsm_ruc_blk_var3(), FLA_Trsm_ruc_blk_var4(), FLA_Trsm_ruh_blk_var1(), FLA_Trsm_ruh_blk_var2(), FLA_Trsm_ruh_blk_var3(), FLA_Trsm_ruh_blk_var4(), FLA_Trsm_run_blk_var1(), FLA_Trsm_run_blk_var2(), FLA_Trsm_run_blk_var3(), FLA_Trsm_run_blk_var4(), FLA_Trsm_rut_blk_var1(), FLA_Trsm_rut_blk_var2(), FLA_Trsm_rut_blk_var3(), FLA_Trsm_rut_blk_var4(), FLA_Trsv_lc_blk_var1(), FLA_Trsv_lc_blk_var2(), FLA_Trsv_ln_blk_var1(), FLA_Trsv_ln_blk_var2(), FLA_Trsv_lt_blk_var1(), FLA_Trsv_lt_blk_var2(), FLA_Trsv_uc_blk_var1(), FLA_Trsv_uc_blk_var2(), FLA_Trsv_un_blk_var1(), FLA_Trsv_un_blk_var2(), FLA_Trsv_ut_blk_var1(), FLA_Trsv_ut_blk_var2(), FLA_Ttmm_l_blk_var1(), FLA_Ttmm_l_blk_var2(), FLA_Ttmm_l_blk_var3(), FLA_Ttmm_u_blk_var1(), FLA_Ttmm_u_blk_var2(), FLA_Ttmm_u_blk_var3(), FLA_UDdate_UT_blk_var2(), and FLA_UDdate_UT_inc_blk_var1().
{ FLA_Error e_val; FLA_Datatype datatype; dim_t A_unproc_size; dim_t typed_blocksize; dim_t b; // Determine the size of the remaining portion of the matrix. A_unproc_size = FLA_determine_matrix_size( A_unproc, to_dir ); // Determine the datatype of the matrix. datatype = FLA_Obj_datatype( A_unproc ); // Determine the raw blocksize value from the blocksize structure. typed_blocksize = FLA_Blocksize_extract( datatype, bp ); // Check blocksize for zero value. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) { e_val = FLA_Check_blocksize_value( typed_blocksize ); FLA_Check_error_code( e_val ); } // If the unprocessed partition is smaller than our blocksize allows, // we have to use it's length/width instead. b = min( A_unproc_size, typed_blocksize ); // Return the computed blocksize. return b; }
dim_t FLA_determine_matrix_size | ( | FLA_Obj | A_unproc, |
FLA_Quadrant | to_dir | ||
) |
References FLA_Obj_length(), FLA_Obj_min_dim(), and FLA_Obj_width().
Referenced by FLA_Determine_blocksize().
{ dim_t r_val = 0; // Determine the size of the matrix dimension along which we are moving. switch( to_dir ) { case FLA_TOP: case FLA_BOTTOM: { r_val = FLA_Obj_length( A_unproc ); break; } case FLA_LEFT: case FLA_RIGHT: { r_val = FLA_Obj_width( A_unproc ); break; } case FLA_TL: case FLA_TR: case FLA_BL: case FLA_BR: { // We need to use min_dim() here because the matrix might be // rectangular. r_val = FLA_Obj_min_dim( A_unproc ); break; } } return r_val; }
dim_t FLA_Query_blocksize | ( | FLA_Datatype | dt, |
FLA_Dimension | dim | ||
) |
References cgemm_p, cgemm_q, cgemm_r, dgemm_p, dgemm_q, dgemm_r, fla_goto_gemm_blocksize, sgemm_p, sgemm_q, sgemm_r, zgemm_p, zgemm_q, and zgemm_r.
Referenced by FLA_Apply_Q_blk_external(), FLA_Bidiag_blk_external(), FLA_Bidiag_UT_create_T(), FLA_Check_householder_panel_dims(), FLA_Hess_blk_external(), FLA_Hess_UT_create_T(), FLA_LQ_blk_external(), FLA_LQ_UT_create_T(), FLA_QR_blk_external(), FLA_QR_UT_create_T(), FLA_Query_blocksizes(), FLA_Tridiag_blk_external(), FLA_Tridiag_form_Q_external(), FLA_Tridiag_UT_create_T(), and FLA_UDdate_UT_create_T().
{ dim_t b_val = 0; #ifdef FLA_ENABLE_GOTO_INTERFACES int dt_index; int dim_index; if ( first_time ) { long sgemm_min, dgemm_min, cgemm_min, zgemm_min; // Find the blocksizes associated with FLA_DIMENSION_MIN. sgemm_min = min( sgemm_p, sgemm_q ); dgemm_min = min( dgemm_p, dgemm_q ); cgemm_min = min( cgemm_p, cgemm_q ); zgemm_min = min( zgemm_p, zgemm_q ); // Set the values for each datatype and dimension constant. fla_goto_gemm_blocksize[FLA_S_INDEX][FLA_DIM_M_INDEX] = sgemm_p; fla_goto_gemm_blocksize[FLA_S_INDEX][FLA_DIM_K_INDEX] = sgemm_q; fla_goto_gemm_blocksize[FLA_S_INDEX][FLA_DIM_N_INDEX] = sgemm_r; fla_goto_gemm_blocksize[FLA_S_INDEX][FLA_DIM_MIN_INDEX] = sgemm_min; fla_goto_gemm_blocksize[FLA_D_INDEX][FLA_DIM_M_INDEX] = dgemm_p; fla_goto_gemm_blocksize[FLA_D_INDEX][FLA_DIM_K_INDEX] = dgemm_q; fla_goto_gemm_blocksize[FLA_D_INDEX][FLA_DIM_N_INDEX] = dgemm_r; fla_goto_gemm_blocksize[FLA_D_INDEX][FLA_DIM_MIN_INDEX] = dgemm_min; fla_goto_gemm_blocksize[FLA_C_INDEX][FLA_DIM_M_INDEX] = cgemm_p; fla_goto_gemm_blocksize[FLA_C_INDEX][FLA_DIM_K_INDEX] = cgemm_q; fla_goto_gemm_blocksize[FLA_C_INDEX][FLA_DIM_N_INDEX] = cgemm_r; fla_goto_gemm_blocksize[FLA_C_INDEX][FLA_DIM_MIN_INDEX] = cgemm_min; fla_goto_gemm_blocksize[FLA_Z_INDEX][FLA_DIM_M_INDEX] = zgemm_p; fla_goto_gemm_blocksize[FLA_Z_INDEX][FLA_DIM_K_INDEX] = zgemm_q; fla_goto_gemm_blocksize[FLA_Z_INDEX][FLA_DIM_N_INDEX] = zgemm_r; fla_goto_gemm_blocksize[FLA_Z_INDEX][FLA_DIM_MIN_INDEX] = zgemm_min; first_time = FALSE; } // Compute the index of the requested datatype. dt_index = dt & FLA_DTYPE_INDEX_MASK; dim_index = dim & FLA_DIM_INDEX_MASK; // Index into the array and choose the appropriate blocksize. b_val = ( dim_t ) fla_goto_gemm_blocksize[dt_index][dim_index]; #else // Assign the return value to a default sane blocksize in case // we cannot access the libgoto symbols. if ( dim == FLA_DIMENSION_M ) b_val = FLA_DEFAULT_M_BLOCKSIZE; else if ( dim == FLA_DIMENSION_K ) b_val = FLA_DEFAULT_K_BLOCKSIZE; else if ( dim == FLA_DIMENSION_N ) b_val = FLA_DEFAULT_N_BLOCKSIZE; else if ( dim == FLA_DIMENSION_MIN ) { b_val = min( FLA_DEFAULT_M_BLOCKSIZE, FLA_DEFAULT_K_BLOCKSIZE ); b_val = min( b_val, FLA_DEFAULT_N_BLOCKSIZE ); } #endif // Return the blocksize. return b_val; }
References FLA_Blocksize_s::c, FLA_Blocksize_s::d, FLA_Blocksize_create(), FLA_Query_blocksize(), FLA_Blocksize_s::s, and FLA_Blocksize_s::z.
Referenced by FLA_Apply_CAQ2_UT_cntl_init(), FLA_Apply_Q2_UT_cntl_init(), FLA_Apply_Q_UT_cntl_init(), FLA_Apply_QUD_UT_cntl_init(), FLA_Bidiag_UT_cntl_init(), FLA_CAQR2_UT_cntl_init(), FLA_Chol_cntl_init(), FLA_Eig_gest_cntl_init(), FLA_Gemm_cntl_init(), FLA_Hemm_cntl_init(), FLA_Her2k_cntl_init(), FLA_Herk_cntl_init(), FLA_Hess_UT_cntl_init(), FLA_LQ_UT_cntl_init(), FLA_LU_nopiv_cntl_init(), FLA_LU_piv_cntl_init(), FLA_Lyap_cntl_init(), FLA_QR2_UT_cntl_init(), FLA_QR_UT_cntl_init(), FLA_SPDinv_cntl_init(), FLA_Sylv_cntl_init(), FLA_Symm_cntl_init(), FLA_Syr2k_cntl_init(), FLA_Syrk_cntl_init(), FLA_Transpose_cntl_init(), FLA_Tridiag_UT_cntl_init(), FLA_Trinv_cntl_init(), FLA_Trmm_cntl_init(), FLA_Trsm_cntl_init(), FLA_Ttmm_cntl_init(), FLA_UDdate_UT_cntl_init(), and FLASH_SPDinv_cntl_init().
{ fla_blocksize_t* bp; // Create an FLA_Blocksize_t object. bp = FLA_Blocksize_create( 0, 0, 0, 0 ); // Query the requested blocksize (m, k, or n dimension) for all of the // datatypes and package the results in an FLA_Blocksize_t structure. bp->s = FLA_Query_blocksize( FLA_FLOAT, dim ); bp->d = FLA_Query_blocksize( FLA_DOUBLE, dim ); bp->c = FLA_Query_blocksize( FLA_COMPLEX, dim ); bp->z = FLA_Query_blocksize( FLA_DOUBLE_COMPLEX, dim ); // Return a pointer to the structure. return bp; }
long cgemm_p |
Referenced by FLA_Query_blocksize().
long cgemm_q |
Referenced by FLA_Query_blocksize().
long cgemm_r |
Referenced by FLA_Query_blocksize().
long dgemm_p |
Referenced by FLA_Query_blocksize().
long dgemm_q |
Referenced by FLA_Query_blocksize().
long dgemm_r |
Referenced by FLA_Query_blocksize().
long fla_goto_gemm_blocksize[4][4] |
Referenced by FLA_Query_blocksize().
long sgemm_p |
Referenced by FLA_Query_blocksize().
long sgemm_q |
Referenced by FLA_Query_blocksize().
long sgemm_r |
Referenced by FLA_Query_blocksize().
long zgemm_p |
Referenced by FLA_Query_blocksize().
long zgemm_q |
Referenced by FLA_Query_blocksize().
long zgemm_r |
Referenced by FLA_Query_blocksize().