libflame
12600
|
Go to the source code of this file.
Functions | |
float | bl1_s2 (void) |
double | bl1_d2 (void) |
scomplex | bl1_c2 (void) |
dcomplex | bl1_z2 (void) |
float | bl1_s1 (void) |
double | bl1_d1 (void) |
scomplex | bl1_c1 (void) |
dcomplex | bl1_z1 (void) |
float | bl1_s1h (void) |
double | bl1_d1h (void) |
scomplex | bl1_c1h (void) |
dcomplex | bl1_z1h (void) |
float | bl1_s0 (void) |
double | bl1_d0 (void) |
scomplex | bl1_c0 (void) |
dcomplex | bl1_z0 (void) |
float | bl1_sm1h (void) |
double | bl1_dm1h (void) |
scomplex | bl1_cm1h (void) |
dcomplex | bl1_zm1h (void) |
float | bl1_sm1 (void) |
double | bl1_dm1 (void) |
scomplex | bl1_cm1 (void) |
dcomplex | bl1_zm1 (void) |
float | bl1_sm2 (void) |
double | bl1_dm2 (void) |
scomplex | bl1_cm2 (void) |
dcomplex | bl1_zm2 (void) |
void * | bl1_vallocv (unsigned int n_elem, unsigned int elem_size) |
int * | bl1_iallocv (unsigned int n_elem) |
float * | bl1_sallocv (unsigned int n_elem) |
double * | bl1_dallocv (unsigned int n_elem) |
scomplex * | bl1_callocv (unsigned int n_elem) |
dcomplex * | bl1_zallocv (unsigned int n_elem) |
void * | bl1_vallocm (unsigned int m, unsigned int n, unsigned int elem_size) |
int * | bl1_iallocm (unsigned int m, unsigned int n) |
float * | bl1_sallocm (unsigned int m, unsigned int n) |
double * | bl1_dallocm (unsigned int m, unsigned int n) |
scomplex * | bl1_callocm (unsigned int m, unsigned int n) |
dcomplex * | bl1_zallocm (unsigned int m, unsigned int n) |
void | bl1_sapdiagmv (side1_t side, conj1_t conj, int m, int n, float *x, int incx, float *a, int a_rs, int a_cs) |
void | bl1_dapdiagmv (side1_t side, conj1_t conj, int m, int n, double *x, int incx, double *a, int a_rs, int a_cs) |
void | bl1_csapdiagmv (side1_t side, conj1_t conj, int m, int n, float *x, int incx, scomplex *a, int a_rs, int a_cs) |
void | bl1_capdiagmv (side1_t side, conj1_t conj, int m, int n, scomplex *x, int incx, scomplex *a, int a_rs, int a_cs) |
void | bl1_zdapdiagmv (side1_t side, conj1_t conj, int m, int n, double *x, int incx, dcomplex *a, int a_rs, int a_cs) |
void | bl1_zapdiagmv (side1_t side, conj1_t conj, int m, int n, dcomplex *x, int incx, dcomplex *a, int a_rs, int a_cs) |
void | bl1_screate_contigm (int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs) |
void | bl1_dcreate_contigm (int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs) |
void | bl1_ccreate_contigm (int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs) |
void | bl1_zcreate_contigm (int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs) |
void | bl1_screate_contigmt (trans1_t trans_dims, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs) |
void | bl1_dcreate_contigmt (trans1_t trans_dims, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs) |
void | bl1_ccreate_contigmt (trans1_t trans_dims, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs) |
void | bl1_zcreate_contigmt (trans1_t trans_dims, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs) |
void | bl1_screate_contigmr (uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs) |
void | bl1_dcreate_contigmr (uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs) |
void | bl1_ccreate_contigmr (uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs) |
void | bl1_zcreate_contigmr (uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs) |
void | bl1_screate_contigmsr (side1_t side, uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs) |
void | bl1_dcreate_contigmsr (side1_t side, uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs) |
void | bl1_ccreate_contigmsr (side1_t side, uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs) |
void | bl1_zcreate_contigmsr (side1_t side, uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs) |
void | bl1_sfree_contigm (float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs) |
void | bl1_dfree_contigm (double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs) |
void | bl1_cfree_contigm (scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs) |
void | bl1_zfree_contigm (dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs) |
void | bl1_sfree_saved_contigm (int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs) |
void | bl1_dfree_saved_contigm (int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs) |
void | bl1_cfree_saved_contigm (int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs) |
void | bl1_zfree_saved_contigm (int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs) |
void | bl1_sfree_saved_contigmr (uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs) |
void | bl1_dfree_saved_contigmr (uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs) |
void | bl1_cfree_saved_contigmr (uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs) |
void | bl1_zfree_saved_contigmr (uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs) |
void | bl1_sfree_saved_contigmsr (side1_t side, uplo1_t uplo, int m, int n, float *a_save, int a_rs_save, int a_cs_save, float **a, int *a_rs, int *a_cs) |
void | bl1_dfree_saved_contigmsr (side1_t side, uplo1_t uplo, int m, int n, double *a_save, int a_rs_save, int a_cs_save, double **a, int *a_rs, int *a_cs) |
void | bl1_cfree_saved_contigmsr (side1_t side, uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs) |
void | bl1_zfree_saved_contigmsr (side1_t side, uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs) |
void | bl1_sewinvscalv (conj1_t conj, int n, float *x, int incx, float *y, int incy) |
void | bl1_dewinvscalv (conj1_t conj, int n, double *x, int incx, double *y, int incy) |
void | bl1_csewinvscalv (conj1_t conj, int n, float *x, int incx, scomplex *y, int incy) |
void | bl1_cewinvscalv (conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy) |
void | bl1_zdewinvscalv (conj1_t conj, int n, double *x, int incx, dcomplex *y, int incy) |
void | bl1_zewinvscalv (conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy) |
void | bl1_sewinvscalmt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_dewinvscalmt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_csewinvscalmt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_cewinvscalmt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zdewinvscalmt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_zewinvscalmt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_sewscalv (conj1_t conj, int n, float *x, int incx, float *y, int incy) |
void | bl1_dewscalv (conj1_t conj, int n, double *x, int incx, double *y, int incy) |
void | bl1_csewscalv (conj1_t conj, int n, float *x, int incx, scomplex *y, int incy) |
void | bl1_cewscalv (conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy) |
void | bl1_zdewscalv (conj1_t conj, int n, double *x, int incx, dcomplex *y, int incy) |
void | bl1_zewscalv (conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy) |
void | bl1_sewscalmt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_dewscalmt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_csewscalmt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_cewscalmt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zdewscalmt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_zewscalmt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_vfree (void *p) |
void | bl1_ifree (int *p) |
void | bl1_sfree (float *p) |
void | bl1_dfree (double *p) |
void | bl1_cfree (scomplex *p) |
void | bl1_zfree (dcomplex *p) |
void | bl1_sinverts (conj1_t conj, float *alpha) |
void | bl1_dinverts (conj1_t conj, double *alpha) |
void | bl1_cinverts (conj1_t conj, scomplex *alpha) |
void | bl1_zinverts (conj1_t conj, dcomplex *alpha) |
void | bl1_sinvert2s (conj1_t conj, float *alpha, float *beta) |
void | bl1_dinvert2s (conj1_t conj, double *alpha, double *beta) |
void | bl1_cinvert2s (conj1_t conj, scomplex *alpha, scomplex *beta) |
void | bl1_zinvert2s (conj1_t conj, dcomplex *alpha, dcomplex *beta) |
void | bl1_sinvertv (conj1_t conj, int n, float *x, int incx) |
void | bl1_dinvertv (conj1_t conj, int n, double *x, int incx) |
void | bl1_cinvertv (conj1_t conj, int n, scomplex *x, int incx) |
void | bl1_zinvertv (conj1_t conj, int n, dcomplex *x, int incx) |
void | bl1_sident (int m, float *a, int a_rs, int a_cs) |
void | bl1_dident (int m, double *a, int a_rs, int a_cs) |
void | bl1_cident (int m, scomplex *a, int a_rs, int a_cs) |
void | bl1_zident (int m, dcomplex *a, int a_rs, int a_cs) |
void | bl1_smaxabsv (int n, float *x, int incx, float *maxabs) |
void | bl1_dmaxabsv (int n, double *x, int incx, double *maxabs) |
void | bl1_cmaxabsv (int n, scomplex *x, int incx, float *maxabs) |
void | bl1_zmaxabsv (int n, dcomplex *x, int incx, double *maxabs) |
void | bl1_smaxabsm (int m, int n, float *a, int a_rs, int a_cs, float *maxabs) |
void | bl1_dmaxabsm (int m, int n, double *a, int a_rs, int a_cs, double *maxabs) |
void | bl1_cmaxabsm (int m, int n, scomplex *a, int a_rs, int a_cs, float *maxabs) |
void | bl1_zmaxabsm (int m, int n, dcomplex *a, int a_rs, int a_cs, double *maxabs) |
void | bl1_smaxabsmr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *maxabs) |
void | bl1_dmaxabsmr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *maxabs) |
void | bl1_cmaxabsmr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, float *maxabs) |
void | bl1_zmaxabsmr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, double *maxabs) |
void | bl1_srands (float *alpha) |
void | bl1_drands (double *alpha) |
void | bl1_crands (scomplex *alpha) |
void | bl1_zrands (dcomplex *alpha) |
void | bl1_srandv (int n, float *x, int incx) |
void | bl1_drandv (int n, double *x, int incx) |
void | bl1_crandv (int n, scomplex *x, int incx) |
void | bl1_zrandv (int n, dcomplex *x, int incx) |
void | bl1_srandm (int m, int n, float *a, int a_rs, int a_cs) |
void | bl1_drandm (int m, int n, double *a, int a_rs, int a_cs) |
void | bl1_crandm (int m, int n, scomplex *a, int a_rs, int a_cs) |
void | bl1_zrandm (int m, int n, dcomplex *a, int a_rs, int a_cs) |
void | bl1_srandmr (uplo1_t uplo, diag1_t diag, int m, int n, float *a, int a_rs, int a_cs) |
void | bl1_drandmr (uplo1_t uplo, diag1_t diag, int m, int n, double *a, int a_rs, int a_cs) |
void | bl1_crandmr (uplo1_t uplo, diag1_t diag, int m, int n, scomplex *a, int a_rs, int a_cs) |
void | bl1_zrandmr (uplo1_t uplo, diag1_t diag, int m, int n, dcomplex *a, int a_rs, int a_cs) |
void | bl1_set_contig_strides (int m, int n, int *rs, int *cs) |
void | bl1_set_dim_with_side (side1_t side, int m, int n, int *dim_new) |
void | bl1_set_dims_with_trans (trans1_t trans, int m, int n, int *m_new, int *n_new) |
void | bl1_isetv (int m, int *sigma, int *x, int incx) |
void | bl1_ssetv (int m, float *sigma, float *x, int incx) |
void | bl1_dsetv (int m, double *sigma, double *x, int incx) |
void | bl1_csetv (int m, scomplex *sigma, scomplex *x, int incx) |
void | bl1_zsetv (int m, dcomplex *sigma, dcomplex *x, int incx) |
void | bl1_isetm (int m, int n, int *sigma, int *a, int a_rs, int a_cs) |
void | bl1_ssetm (int m, int n, float *sigma, float *a, int a_rs, int a_cs) |
void | bl1_dsetm (int m, int n, double *sigma, double *a, int a_rs, int a_cs) |
void | bl1_csetm (int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs) |
void | bl1_zsetm (int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs) |
void | bl1_ssetmr (uplo1_t uplo, int m, int n, float *sigma, float *a, int a_rs, int a_cs) |
void | bl1_dsetmr (uplo1_t uplo, int m, int n, double *sigma, double *a, int a_rs, int a_cs) |
void | bl1_csetmr (uplo1_t uplo, int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs) |
void | bl1_zsetmr (uplo1_t uplo, int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs) |
void | bl1_isetdiag (int offset, int m, int n, int *sigma, int *a, int a_rs, int a_cs) |
void | bl1_ssetdiag (int offset, int m, int n, float *sigma, float *a, int a_rs, int a_cs) |
void | bl1_dsetdiag (int offset, int m, int n, double *sigma, double *a, int a_rs, int a_cs) |
void | bl1_csetdiag (int offset, int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs) |
void | bl1_zsetdiag (int offset, int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs) |
void | bl1_sscalediag (conj1_t conj, int offset, int m, int n, float *sigma, float *a, int a_rs, int a_cs) |
void | bl1_dscalediag (conj1_t conj, int offset, int m, int n, double *sigma, double *a, int a_rs, int a_cs) |
void | bl1_cscalediag (conj1_t conj, int offset, int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs) |
void | bl1_zscalediag (conj1_t conj, int offset, int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs) |
void | bl1_csscalediag (conj1_t conj, int offset, int m, int n, float *sigma, scomplex *a, int a_rs, int a_cs) |
void | bl1_zdscalediag (conj1_t conj, int offset, int m, int n, double *sigma, dcomplex *a, int a_rs, int a_cs) |
void | bl1_sshiftdiag (conj1_t conj, int offset, int m, int n, float *sigma, float *a, int a_rs, int a_cs) |
void | bl1_dshiftdiag (conj1_t conj, int offset, int m, int n, double *sigma, double *a, int a_rs, int a_cs) |
void | bl1_cshiftdiag (conj1_t conj, int offset, int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs) |
void | bl1_zshiftdiag (conj1_t conj, int offset, int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs) |
void | bl1_csshiftdiag (conj1_t conj, int offset, int m, int n, float *sigma, scomplex *a, int a_rs, int a_cs) |
void | bl1_zdshiftdiag (conj1_t conj, int offset, int m, int n, double *sigma, dcomplex *a, int a_rs, int a_cs) |
void | bl1_ssymmize (conj1_t conj, uplo1_t uplo, int m, float *a, int a_rs, int a_cs) |
void | bl1_dsymmize (conj1_t conj, uplo1_t uplo, int m, double *a, int a_rs, int a_cs) |
void | bl1_csymmize (conj1_t conj, uplo1_t uplo, int m, scomplex *a, int a_rs, int a_cs) |
void | bl1_zsymmize (conj1_t conj, uplo1_t uplo, int m, dcomplex *a, int a_rs, int a_cs) |
References bl1_s0(), scomplex::imag, and scomplex::real.
Referenced by bl1_cgemm(), bl1_cgemv(), bl1_chemm(), bl1_chemv(), bl1_crandmr(), bl1_csymm(), FLA_QR_UT_form_Q_opc_var1(), and FLA_Tridiag_UT_shift_U_l_opc().
References bl1_s0(), bl1_s1(), scomplex::imag, and scomplex::real.
Referenced by bl1_cgemm(), bl1_cgemv(), bl1_chemm(), bl1_chemv(), bl1_cher2k(), bl1_cherk(), bl1_crandmr(), bl1_csymm(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_v_opc_var1(), FLA_Bsvd_v_ops_var1(), FLA_QR_UT_form_Q_opc_var1(), and FLA_Tridiag_UT_shift_U_l_opc().
References bl1_s0(), bl1_s1h(), scomplex::imag, and scomplex::real.
References bl1_s0(), bl1_s2(), scomplex::imag, and scomplex::real.
scomplex* bl1_callocm | ( | unsigned int | m, |
unsigned int | n | ||
) |
scomplex* bl1_callocv | ( | unsigned int | n_elem | ) |
Referenced by bl1_caxpymt(), bl1_caxpysmt(), bl1_caxpyv(), bl1_cgemv(), bl1_cger(), bl1_chemv(), bl1_cher(), bl1_cher2(), bl1_csymv_blas(), bl1_csyr2_blas(), bl1_csyr_blas(), bl1_ctrmv(), bl1_ctrmvsx(), bl1_ctrsv(), and bl1_ctrsvsx().
void bl1_capdiagmv | ( | side1_t | side, |
conj1_t | conj, | ||
int | m, | ||
int | n, | ||
scomplex * | x, | ||
int | incx, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_cewscalv(), bl1_cscalv(), bl1_is_left(), bl1_is_row_storage(), and bl1_zero_dim2().
Referenced by FLA_Apply_diag_matrix().
{ scomplex* chi; scomplex* a_begin; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then we can proceed as if the // operation were transposed (applying the diagonal values in x from the // opposite side) for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_toggle_side( side ); } if ( bl1_is_left( side ) ) { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_cewscalv( conj, n_elem, x, incx, a_begin, inca ); } } else { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; chi = x + j*incx; bl1_cscalv( conj, n_elem, chi, a_begin, inca ); } } }
void bl1_ccreate_contigm | ( | int | m, |
int | n, | ||
scomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
scomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_callocm(), bl1_ccopymt(), bl1_is_gen_storage(), bl1_set_contig_strides(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_cgemm(), bl1_cgemv(), bl1_cger(), bl1_chemm(), bl1_csymm(), bl1_ctrmm(), bl1_ctrmmsx(), bl1_ctrsm(), and bl1_ctrsmsx().
{ int m_contig, n_contig; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Initialize dimensions assuming no transposition needed during copy. m_contig = m; n_contig = n; /* // Transpose the dimensions of the contiguous matrix, if requested. if ( bl1_does_trans( trans_copy ) ) { m_contig = n; n_contig = m; } */ // Allocate temporary contiguous storage for the matrix. *a = bl1_callocm( m_contig, n_contig ); // Set the row and column strides for the temporary matrix. bl1_set_contig_strides( m_contig, n_contig, a_rs, a_cs ); // Initialize the contiguous matrix with the contents of the original. bl1_ccopymt( BLIS1_NO_TRANSPOSE, m_contig, n_contig, a_save, a_rs_save, a_cs_save, *a, *a_rs, *a_cs ); } }
void bl1_ccreate_contigmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
scomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
scomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_callocm(), bl1_ccopymr(), bl1_is_gen_storage(), and bl1_set_contig_strides().
Referenced by bl1_ccreate_contigmsr(), bl1_chemm(), bl1_chemv(), bl1_cher(), bl1_cher2(), bl1_cher2k(), bl1_cherk(), bl1_csymm(), bl1_csymv(), bl1_csyr(), bl1_csyr2(), bl1_csyr2k(), bl1_csyrk(), bl1_ctrmm(), bl1_ctrmmsx(), bl1_ctrmv(), bl1_ctrmvsx(), bl1_ctrsm(), bl1_ctrsmsx(), bl1_ctrsv(), and bl1_ctrsvsx().
{ int m_contig, n_contig; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Initialize dimensions assuming no transposition needed during copy. m_contig = m; n_contig = n; /* // Transpose the dimensions of the contiguous matrix, if requested. if ( bl1_does_trans( trans_copy ) ) { m_contig = n; n_contig = m; } */ // Allocate temporary contiguous storage for the matrix. *a = bl1_callocm( m_contig, n_contig ); // Set the row and column strides for the temporary matrix. bl1_set_contig_strides( m_contig, n_contig, a_rs, a_cs ); // Initialize the contiguous matrix with the contents of the original. bl1_ccopymr( uplo, m_contig, n_contig, a_save, a_rs_save, a_cs_save, *a, *a_rs, *a_cs ); } }
void bl1_ccreate_contigmsr | ( | side1_t | side, |
uplo1_t | uplo, | ||
int | m, | ||
int | n, | ||
scomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
scomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_ccreate_contigmr(), and bl1_is_left().
{ int dim_a; // Choose the dimension of the matrix based on the side parameter. if ( bl1_is_left( side ) ) dim_a = m; else dim_a = n; // Call the simple version with chosen dimensions. bl1_ccreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, a, a_rs, a_cs ); }
void bl1_ccreate_contigmt | ( | trans1_t | trans_dims, |
int | m, | ||
int | n, | ||
scomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
scomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_callocm(), bl1_ccopymt(), bl1_does_trans(), bl1_is_gen_storage(), bl1_set_contig_strides(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_cgemm(), bl1_cher2k(), bl1_cherk(), bl1_csyr2k(), and bl1_csyrk().
{ int m_contig, n_contig; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Transpose the dimensions if requested. if ( bl1_does_trans( trans_dims ) ) bl1_swap_ints( m, n ); // Initialize dimensions assuming no transposition needed during copy. m_contig = m; n_contig = n; /* // Transpose the dimensions of the contiguous matrix, if requested. if ( bl1_does_trans( trans_copy ) ) { m_contig = n; n_contig = m; } */ // Allocate temporary contiguous storage for the matrix. *a = bl1_callocm( m_contig, n_contig ); // Set the row and column strides for the temporary matrix. bl1_set_contig_strides( m_contig, n_contig, a_rs, a_cs ); // Initialize the contiguous matrix with the contents of the original. bl1_ccopymt( BLIS1_NO_TRANSPOSE, m_contig, n_contig, a_save, a_rs_save, a_cs_save, *a, *a_rs, *a_cs ); } }
void bl1_cewinvscalmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cewinvscalv(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_elemwise().
{ scomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying ewinvscal // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_cewinvscalv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_cewinvscalv | ( | conj1_t | conj, |
int | n, | ||
scomplex * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy | ||
) |
References bl1_is_conj().
Referenced by bl1_cewinvscalmt().
{ scomplex* chi; scomplex* psi; scomplex conjchi; int i; if ( bl1_is_conj( conj ) ) { for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_ccopyconj( chi, &conjchi ); bl1_cinvscals( &conjchi, psi ); } } else { for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_cinvscals( chi, psi ); } } }
void bl1_cewscalmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cewscalv(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Scal_elemwise().
{ scomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying ewscal // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_cewscalv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_cewscalv | ( | conj1_t | conj, |
int | n, | ||
scomplex * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy | ||
) |
References bl1_is_conj().
Referenced by bl1_capdiagmv(), and bl1_cewscalmt().
{ scomplex* chi; scomplex* psi; scomplex conjchi; int i; if ( bl1_is_conj( conj ) ) { for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_ccopyconj( chi, &conjchi ); bl1_cscals( &conjchi, psi ); } } else { for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_cscals( chi, psi ); } } }
Referenced by bl1_caxpymt(), bl1_caxpysmt(), bl1_caxpyv(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cfree_saved_contigmr(), bl1_cfree_saved_contigmsr(), bl1_cgemm(), bl1_cgemv(), bl1_cger(), bl1_chemm(), bl1_chemv(), bl1_cher(), bl1_cher2(), bl1_cher2k(), bl1_cherk(), bl1_csymm(), bl1_csymv_blas(), bl1_csyr2_blas(), bl1_csyr2k(), bl1_csyr_blas(), bl1_ctrmm(), bl1_ctrmmsx(), bl1_ctrmv(), bl1_ctrmvsx(), bl1_ctrsm(), bl1_ctrsmsx(), bl1_ctrsv(), and bl1_ctrsvsx().
{
free( ( void* ) p );
}
void bl1_cfree_contigm | ( | scomplex * | a_save, |
int | a_rs_save, | ||
int | a_cs_save, | ||
scomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_cfree(), and bl1_is_gen_storage().
Referenced by bl1_cgemm(), bl1_cgemv(), bl1_chemm(), bl1_chemv(), bl1_cher2k(), bl1_cherk(), bl1_csymm(), bl1_csymv(), bl1_csyr2k(), bl1_csyrk(), bl1_ctrmm(), bl1_ctrmmsx(), bl1_ctrmv(), bl1_ctrmvsx(), bl1_ctrsm(), bl1_ctrsmsx(), bl1_ctrsv(), and bl1_ctrsvsx().
{ if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Free the temporary contiguous storage for the matrix. bl1_cfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_cfree_saved_contigm | ( | int | m, |
int | n, | ||
scomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
scomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_ccopymt(), bl1_cfree(), bl1_is_gen_storage(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_cgemm(), bl1_cger(), bl1_chemm(), bl1_cher(), bl1_cher2(), bl1_csymm(), bl1_csyr(), bl1_csyr2(), bl1_ctrmm(), bl1_ctrmmsx(), bl1_ctrsm(), and bl1_ctrsmsx().
{ if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Copy the contents of the temporary matrix back to the original. bl1_ccopymt( BLIS1_NO_TRANSPOSE, m, n, *a, *a_rs, *a_cs, a_save, a_rs_save, a_cs_save ); // Free the temporary contiguous storage for the matrix. bl1_cfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_cfree_saved_contigmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
scomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
scomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_ccopymr(), bl1_cfree(), and bl1_is_gen_storage().
Referenced by bl1_cher2k(), bl1_cherk(), bl1_csyr2k(), and bl1_csyrk().
{ if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Copy the contents of the temporary matrix back to the original. bl1_ccopymr( uplo, m, n, *a, *a_rs, *a_cs, a_save, a_rs_save, a_cs_save ); // Free the temporary contiguous storage for the matrix. bl1_cfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_cfree_saved_contigmsr | ( | side1_t | side, |
uplo1_t | uplo, | ||
int | m, | ||
int | n, | ||
scomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
scomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_ccopymt(), bl1_cfree(), bl1_is_gen_storage(), and bl1_is_left().
{ int dim_a; // Choose the dimension of the matrix based on the side parameter. if ( bl1_is_left( side ) ) dim_a = m; else dim_a = n; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Copy the contents of the temporary matrix back to the original. bl1_ccopymt( uplo, dim_a, dim_a, *a, *a_rs, *a_cs, a_save, a_rs_save, a_cs_save ); // Free the temporary contiguous storage for the matrix. bl1_cfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_cident | ( | int | m, |
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References scomplex::imag, and scomplex::real.
Referenced by FLA_UDdate_UT_opc_var1().
void bl1_cinvert2s | ( | conj1_t | conj, |
scomplex * | alpha, | ||
scomplex * | beta | ||
) |
References bl1_is_conj(), scomplex::imag, and scomplex::real.
Referenced by bl1_cinvscalm(), and bl1_cinvscalv().
void bl1_cinverts | ( | conj1_t | conj, |
scomplex * | alpha | ||
) |
void bl1_cinvertv | ( | conj1_t | conj, |
int | n, | ||
scomplex * | x, | ||
int | incx | ||
) |
References bl1_is_conj(), scomplex::imag, and scomplex::real.
Referenced by FLA_Invert().
{ float one = 1.0F; float temp; float s, xr_s, xi_s; float conjsign; scomplex* chi; int i; if ( bl1_is_conj( conj ) ) conjsign = one; else conjsign = -one; for ( i = 0; i < n; ++i ) { chi = x + i*incx; s = bl1_fmaxabs( chi->real, chi->imag ); \ xr_s = chi->real / s; xi_s = chi->imag / s; temp = xr_s * chi->real + xi_s * chi->imag; chi->real = xr_s / temp; chi->imag = conjsign * xi_s / temp; } }
References bl1_s0(), bl1_sm1(), scomplex::imag, and scomplex::real.
References bl1_s0(), bl1_sm1h(), scomplex::imag, and scomplex::real.
References bl1_s0(), bl1_sm2(), scomplex::imag, and scomplex::real.
void bl1_cmaxabsm | ( | int | m, |
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | maxabs | ||
) |
References bl1_cmaxabsv(), bl1_is_row_storage(), bl1_s0(), and bl1_zero_dim2().
Referenced by FLA_Max_abs_value().
{ float zero = bl1_s0(); scomplex* a_begin; float maxabs_cand; float maxabs_temp; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) { *maxabs = zero; return; } // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } // Initialize the maximum absolute value candidate to the first element. bl1_csabsval2( a, &maxabs_cand ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_cmaxabsv( n_elem, a_begin, inca, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } *maxabs = maxabs_cand; }
void bl1_cmaxabsmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | maxabs | ||
) |
References bl1_cmaxabsv(), bl1_d0(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().
Referenced by FLA_Max_abs_value_herm().
{ float zero = bl1_d0(); scomplex* a_begin; float maxabs_cand; float maxabs_temp; int inca, lda; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) { *maxabs = zero; return; } // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } // Initialize the maximum absolute value candidate to the first element. bl1_csabsval2( a, &maxabs_cand ); if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; bl1_cmaxabsv( n_elem, a_begin, inca, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; bl1_cmaxabsv( n_elem, a_begin, inca, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } } *maxabs = maxabs_cand; }
void bl1_cmaxabsv | ( | int | n, |
scomplex * | x, | ||
int | incx, | ||
float * | maxabs | ||
) |
Referenced by bl1_cmaxabsm(), and bl1_cmaxabsmr().
{ scomplex* chi; float maxabs_cand; float maxabs_temp; int i; bl1_csabsval2( x, &maxabs_cand ); for ( i = 0; i < n; ++i ) { chi = x + i*incx; bl1_csabsval2( chi, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } *maxabs = maxabs_cand; }
void bl1_crandm | ( | int | m, |
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_crandv(), bl1_is_row_storage(), and bl1_zero_dim2().
Referenced by FLA_Random_matrix().
{ scomplex* a_begin; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_crandv( n_elem, a_begin, inca ); } }
void bl1_crandmr | ( | uplo1_t | uplo, |
diag1_t | diag, | ||
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_c0(), bl1_c1(), bl1_cinvscalv(), bl1_crands(), bl1_crandv(), bl1_csetv(), bl1_is_nonunit_diag(), bl1_is_row_storage(), bl1_is_unit_diag(), bl1_is_upper(), bl1_is_zero_diag(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and scomplex::real.
Referenced by FLA_Random_tri_matrix().
{ scomplex* a_begin; scomplex* ajj; scomplex one; scomplex zero; scomplex ord; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } // Initialize some scalars. one = bl1_c1(); zero = bl1_c0(); ord = bl1_c0(); ord.real = ( float ) bl1_max( m, n ); if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j, n_elem_max ); a_begin = a + j*lda; // Randomize super-diagonal elements. bl1_crandv( n_elem, a_begin, inca ); // Normalize super-diagonal elements by order of the matrix. bl1_cinvscalv( BLIS1_NO_CONJUGATE, n_elem, &ord, a_begin, inca ); // Initialize diagonal and sub-diagonal elements only if there are // elements left in the column (ie: j < n_elem_max). if ( j < n_elem_max ) { ajj = a_begin + j*inca; // Initialize diagonal element. if ( bl1_is_unit_diag( diag ) ) *ajj = one; else if ( bl1_is_zero_diag( diag ) ) *ajj = zero; else if ( bl1_is_nonunit_diag( diag ) ) { // We want positive diagonal elements between 1 and 2. bl1_crands( ajj ); bl1_cabsval2( ajj, ajj ); bl1_cadd3( ajj, &one, ajj ); } // Initialize sub-diagonal elements to zero. bl1_csetv( n_elem_max - j - 1, &zero, ajj + inca, inca ); } } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j, n_elem_max ); a_begin = a + j*lda; // Initialize super-diagonal to zero. bl1_csetv( n_elem, &zero, a_begin, inca ); // Initialize diagonal and sub-diagonal elements only if there are // elements left in the column (ie: j < n_elem_max). if ( j < n_elem_max ) { ajj = a_begin + j*inca; // Initialize diagonal element. if ( bl1_is_unit_diag( diag ) ) *ajj = one; else if ( bl1_is_zero_diag( diag ) ) *ajj = zero; else if ( bl1_is_nonunit_diag( diag ) ) { // We want positive diagonal elements between 1 and 2. bl1_crands( ajj ); bl1_cabsval2( ajj, ajj ); bl1_cadd3( ajj, &one, ajj ); } // Randomize sub-diagonal elements. bl1_crandv( n_elem_max - j - 1, ajj + inca, inca ); // Normalize sub-diagonal elements by order of the matrix. bl1_cinvscalv( BLIS1_NO_CONJUGATE, n_elem_max - j - 1, &ord, ajj + inca, inca ); } } } }
void bl1_crands | ( | scomplex * | alpha | ) |
References bl1_srands(), scomplex::imag, and scomplex::real.
Referenced by bl1_crandmr(), and bl1_crandv().
{ bl1_srands( &(alpha->real) ); bl1_srands( &(alpha->imag) ); }
void bl1_crandv | ( | int | n, |
scomplex * | x, | ||
int | incx | ||
) |
References bl1_crands().
Referenced by bl1_crandm(), and bl1_crandmr().
{ scomplex* chi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; bl1_crands( chi ); } }
void bl1_csapdiagmv | ( | side1_t | side, |
conj1_t | conj, | ||
int | m, | ||
int | n, | ||
float * | x, | ||
int | incx, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_csewscalv(), bl1_csscalv(), bl1_is_left(), bl1_is_row_storage(), and bl1_zero_dim2().
Referenced by FLA_Apply_diag_matrix().
{ float* chi; scomplex* a_begin; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then we can proceed as if the // operation were transposed (applying the diagonal values in x from the // opposite side) for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_toggle_side( side ); } if ( bl1_is_left( side ) ) { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_csewscalv( conj, n_elem, x, incx, a_begin, inca ); } } else { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; chi = x + j*incx; bl1_csscalv( conj, n_elem, chi, a_begin, inca ); } } }
void bl1_cscalediag | ( | conj1_t | conj, |
int | offset, | ||
int | m, | ||
int | n, | ||
scomplex * | sigma, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Scale_diag(), and FLA_UDdate_UT_opc_var1().
void bl1_csetdiag | ( | int | offset, |
int | m, | ||
int | n, | ||
scomplex * | sigma, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References scomplex::imag, and scomplex::real.
Referenced by FLA_Set_diag(), FLA_Set_offdiag(), and FLA_Triangularize().
References scomplex::imag, and scomplex::real.
Referenced by FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_v_opc_var1(), FLA_Bsvd_v_ops_var1(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_Set(), FLA_Tridiag_UT_l_step_ofc_var3(), and FLA_Tridiag_UT_l_step_opc_var3().
void bl1_csetmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
scomplex * | sigma, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_csetv(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().
Referenced by FLA_Setr(), and FLA_Triangularize().
{ scomplex* a_begin; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j, n_elem_max ); a_begin = a + j*lda; bl1_csetv( n_elem, sigma, a_begin, inca ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j - 1 ); a_begin = a + j*lda + (j + 1)*inca; bl1_csetv( n_elem, sigma, a_begin, inca ); } } }
References scomplex::imag, and scomplex::real.
Referenced by bl1_crandmr(), bl1_csetmr(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Tridiag_UT_l_realify_opt(), FLA_Tridiag_UT_realify_subdiagonal_opt(), FLA_Tridiag_UT_shift_U_l_opc(), and FLA_Tridiag_UT_u_realify_opt().
void bl1_csewinvscalmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_csewinvscalv(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
{ float* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying ewinvscal // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_csewinvscalv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_csewinvscalv | ( | conj1_t | conj, |
int | n, | ||
float * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy | ||
) |
Referenced by bl1_csewinvscalmt().
{ float* chi; scomplex* psi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_csinvscals( chi, psi ); } }
void bl1_csewscalmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_csewscalv(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
{ float* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying ewscal // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_csewscalv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_csewscalv | ( | conj1_t | conj, |
int | n, | ||
float * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy | ||
) |
Referenced by bl1_csapdiagmv(), and bl1_csewscalmt().
{ float* chi; scomplex* psi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_csscals( chi, psi ); } }
void bl1_cshiftdiag | ( | conj1_t | conj, |
int | offset, | ||
int | m, | ||
int | n, | ||
scomplex * | sigma, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References scomplex::imag, and scomplex::real.
Referenced by FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), FLA_Lyap_n_opc_var4(), and FLA_Shift_diag().
{ scomplex* alpha; scomplex sigma_conj; int i, j; bl1_ccopys( conj, sigma, &sigma_conj ); i = j = 0; if ( offset < 0 ) i = -offset; else if ( offset > 0 ) j = offset; while ( i < m && j < n ) { alpha = a + i*a_rs + j*a_cs; alpha->real += sigma_conj.real; alpha->imag += sigma_conj.imag; ++i; ++j; } }
void bl1_csscalediag | ( | conj1_t | conj, |
int | offset, | ||
int | m, | ||
int | n, | ||
float * | sigma, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References scomplex::imag, and scomplex::real.
Referenced by FLA_Scale_diag().
void bl1_csshiftdiag | ( | conj1_t | conj, |
int | offset, | ||
int | m, | ||
int | n, | ||
float * | sigma, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References scomplex::real.
Referenced by FLA_Shift_diag().
void bl1_csymmize | ( | conj1_t | conj, |
uplo1_t | uplo, | ||
int | m, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_ccopyv(), bl1_is_col_storage(), bl1_is_conj(), bl1_is_gen_storage(), bl1_is_lower(), bl1_is_row_storage(), bl1_is_upper(), bl1_s0(), bl1_zero_dim1(), and scomplex::imag.
Referenced by FLA_Hermitianize(), and FLA_Symmetrize().
{ scomplex* a_src; scomplex* a_dst; scomplex* a_jj; int rs_src, cs_src, inc_src; int rs_dst, cs_dst, inc_dst; int n_iter; int j; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Assume A is square. n_iter = m; // Initialize with appropriate values based on storage. if ( bl1_is_col_storage( a_rs, a_cs ) && bl1_is_lower( uplo ) ) { cs_src = 1; rs_src = 0; inc_src = a_cs; cs_dst = a_cs; rs_dst = 0; inc_dst = 1; } else if ( bl1_is_col_storage( a_rs, a_cs ) && bl1_is_upper( uplo ) ) { cs_src = a_cs; rs_src = 0; inc_src = 1; cs_dst = 1; rs_dst = 0; inc_dst = a_cs; } else if ( bl1_is_row_storage( a_rs, a_cs ) && bl1_is_lower( uplo ) ) { cs_src = 0; rs_src = a_rs; inc_src = 1; cs_dst = 0; rs_dst = 1; inc_dst = a_rs; } else if ( bl1_is_row_storage( a_rs, a_cs ) && bl1_is_upper( uplo ) ) { cs_src = 0; rs_src = 1; inc_src = a_rs; cs_dst = 0; rs_dst = a_rs; inc_dst = 1; } else if ( bl1_is_gen_storage( a_rs, a_cs ) && bl1_is_lower( uplo ) ) { // General stride with column-major tilt looks similar to column-major. // General stride with row-major tilt looks similar to row-major. if ( a_rs < a_cs ) { cs_src = 1 * a_rs; rs_src = 0; inc_src = a_cs; cs_dst = a_cs; rs_dst = 0; inc_dst = 1 * a_rs; } else // if ( a_rs > a_cs ) { cs_src = 0; rs_src = a_rs; inc_src = 1 * a_cs; cs_dst = 0; rs_dst = 1 * a_cs; inc_dst = a_rs; } } else // if ( bl1_is_gen_storage( a_rs, a_cs ) && bl1_is_upper( uplo ) ) { // General stride with column-major tilt looks similar to column-major. // General stride with row-major tilt looks similar to row-major. if ( a_rs < a_cs ) { cs_src = a_cs; rs_src = 0; inc_src = 1 * a_rs; cs_dst = 1 * a_rs; rs_dst = 0; inc_dst = a_cs; } else // if ( a_rs > a_cs ) { cs_src = 0; rs_src = 1 * a_cs; inc_src = a_rs; cs_dst = 0; rs_dst = a_rs; inc_dst = 1 * a_cs; } } for ( j = 0; j < n_iter; j++ ) { a_src = a + j*cs_src + j*rs_src; a_dst = a + j*cs_dst + j*rs_dst; bl1_ccopyv( conj, j, a_src, inc_src, a_dst, inc_dst ); if ( bl1_is_conj( conj ) ) { a_jj = a + j*a_rs + j*a_cs; a_jj->imag = bl1_s0(); } } }
double bl1_d0 | ( | void | ) |
Referenced by bl1_cmaxabsmr(), bl1_dgemm(), bl1_dmaxabsm(), bl1_dmaxabsmr(), bl1_drandmr(), bl1_dsymm(), bl1_z0(), bl1_z1(), bl1_z1h(), bl1_z2(), bl1_zher2k(), bl1_zherk(), bl1_zm1(), bl1_zm1h(), bl1_zm2(), bl1_zmaxabsm(), bl1_zmaxabsmr(), bl1_zsymmize(), FLA_Apply_G_rf_asd_var1(), FLA_Apply_G_rf_asd_var2(), FLA_Apply_G_rf_asd_var3(), FLA_Apply_G_rf_asd_var3b(), FLA_Apply_G_rf_asd_var6(), FLA_Apply_G_rf_asd_var6b(), FLA_Apply_G_rf_asd_var9(), FLA_Apply_G_rf_asd_var9b(), FLA_Apply_G_rf_asz_var1(), FLA_Apply_G_rf_asz_var2(), FLA_Apply_G_rf_asz_var3(), FLA_Apply_G_rf_asz_var6(), FLA_Apply_G_rf_asz_var9(), FLA_Apply_G_rf_opd_var1(), FLA_Apply_G_rf_opd_var2(), FLA_Apply_G_rf_opd_var3(), FLA_Apply_G_rf_opd_var6(), FLA_Apply_G_rf_opd_var9(), FLA_Apply_G_rf_opz_var1(), FLA_Apply_G_rf_opz_var2(), FLA_Apply_G_rf_opz_var3(), FLA_Apply_G_rf_opz_var6(), FLA_Apply_G_rf_opz_var9(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), FLA_Bsvd_v_opz_var2(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Uhu_Yhu_Zhu_opd_var1(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Pythag2_opd(), FLA_Pythag3_opd(), FLA_Tevd_find_submatrix_opd(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_opz_var2(), and FLA_Tridiag_UT_shift_U_l_opd().
{ double x; x = 0.0; return x; }
double bl1_d1 | ( | void | ) |
Referenced by bl1_dgemm(), bl1_drandmr(), bl1_dsymm(), bl1_dtrmmsx(), bl1_dtrsmsx(), bl1_z1(), FLA_Apply_G_rf_asd_var1(), FLA_Apply_G_rf_asd_var2(), FLA_Apply_G_rf_asd_var3(), FLA_Apply_G_rf_asd_var3b(), FLA_Apply_G_rf_asd_var6(), FLA_Apply_G_rf_asd_var6b(), FLA_Apply_G_rf_asd_var9(), FLA_Apply_G_rf_asd_var9b(), FLA_Apply_G_rf_asz_var1(), FLA_Apply_G_rf_asz_var2(), FLA_Apply_G_rf_asz_var3(), FLA_Apply_G_rf_asz_var6(), FLA_Apply_G_rf_asz_var9(), FLA_Apply_G_rf_opd_var1(), FLA_Apply_G_rf_opd_var2(), FLA_Apply_G_rf_opd_var3(), FLA_Apply_G_rf_opd_var6(), FLA_Apply_G_rf_opd_var9(), FLA_Apply_G_rf_opz_var1(), FLA_Apply_G_rf_opz_var2(), FLA_Apply_G_rf_opz_var3(), FLA_Apply_G_rf_opz_var6(), FLA_Apply_G_rf_opz_var9(), FLA_Bsvd_francis_v_opd_var1(), FLA_Bsvd_sinval_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var2(), FLA_Pythag2_opd(), FLA_QR_UT_form_Q_opd_var1(), FLA_QR_UT_form_Q_ops_var1(), FLA_Tevd_compute_scaling_opd(), FLA_Tevd_n_opz_var1(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_opz_var2(), and FLA_Tridiag_UT_shift_U_l_opd().
{ double x; x = 1.0; return x; }
double* bl1_dallocm | ( | unsigned int | m, |
unsigned int | n | ||
) |
Referenced by bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dcreate_contigmt(), bl1_dgemm(), bl1_dsymm(), bl1_dsyr2k(), bl1_dtrmmsx(), and bl1_dtrsmsx().
{ return ( double* ) BLIS1_MALLOC( m * n * sizeof( double ) ); }
double* bl1_dallocv | ( | unsigned int | n_elem | ) |
Referenced by bl1_dtrmvsx(), and bl1_dtrsvsx().
{ return ( double* ) BLIS1_MALLOC( n_elem * sizeof( double ) ); }
void bl1_dapdiagmv | ( | side1_t | side, |
conj1_t | conj, | ||
int | m, | ||
int | n, | ||
double * | x, | ||
int | incx, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_dewscalv(), bl1_dscalv(), bl1_is_left(), bl1_is_row_storage(), and bl1_zero_dim2().
Referenced by FLA_Apply_diag_matrix().
{ double* chi; double* a_begin; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then we can proceed as if the // operation were transposed (applying the diagonal values in x from the // opposite side) for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_toggle_side( side ); } if ( bl1_is_left( side ) ) { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_dewscalv( conj, n_elem, x, incx, a_begin, inca ); } } else { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; chi = x + j*incx; bl1_dscalv( conj, n_elem, chi, a_begin, inca ); } } }
void bl1_dcreate_contigm | ( | int | m, |
int | n, | ||
double * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
double ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_dallocm(), bl1_dcopymt(), bl1_is_gen_storage(), bl1_set_contig_strides(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_dgemm(), bl1_dgemv(), bl1_dger(), bl1_dsymm(), bl1_dtrmm(), bl1_dtrmmsx(), bl1_dtrsm(), and bl1_dtrsmsx().
{ int m_contig, n_contig; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Initialize dimensions assuming no transposition needed during copy. m_contig = m; n_contig = n; /* // Transpose the dimensions of the contiguous matrix, if requested. if ( bl1_does_trans( trans_copy ) ) { m_contig = n; n_contig = m; } */ // Allocate temporary contiguous storage for the matrix. *a = bl1_dallocm( m_contig, n_contig ); // Set the row and column strides for the temporary matrix. bl1_set_contig_strides( m_contig, n_contig, a_rs, a_cs ); // Initialize the contiguous matrix with the contents of the original. bl1_dcopymt( BLIS1_NO_TRANSPOSE, m_contig, n_contig, a_save, a_rs_save, a_cs_save, *a, *a_rs, *a_cs ); } }
void bl1_dcreate_contigmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
double * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
double ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_dallocm(), bl1_dcopymr(), bl1_is_gen_storage(), and bl1_set_contig_strides().
Referenced by bl1_dcreate_contigmsr(), bl1_dsymm(), bl1_dsymv(), bl1_dsyr(), bl1_dsyr2(), bl1_dsyr2k(), bl1_dsyrk(), bl1_dtrmm(), bl1_dtrmmsx(), bl1_dtrmv(), bl1_dtrmvsx(), bl1_dtrsm(), bl1_dtrsmsx(), bl1_dtrsv(), and bl1_dtrsvsx().
{ int m_contig, n_contig; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Initialize dimensions assuming no transposition needed during copy. m_contig = m; n_contig = n; /* // Transpose the dimensions of the contiguous matrix, if requested. if ( bl1_does_trans( trans_copy ) ) { m_contig = n; n_contig = m; } */ // Allocate temporary contiguous storage for the matrix. *a = bl1_dallocm( m_contig, n_contig ); // Set the row and column strides for the temporary matrix. bl1_set_contig_strides( m_contig, n_contig, a_rs, a_cs ); // Initialize the contiguous matrix with the contents of the original. bl1_dcopymr( uplo, m_contig, n_contig, a_save, a_rs_save, a_cs_save, *a, *a_rs, *a_cs ); } }
void bl1_dcreate_contigmsr | ( | side1_t | side, |
uplo1_t | uplo, | ||
int | m, | ||
int | n, | ||
double * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
double ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_dcreate_contigmr(), and bl1_is_left().
{ int dim_a; // Choose the dimension of the matrix based on the side parameter. if ( bl1_is_left( side ) ) dim_a = m; else dim_a = n; // Call the simple version with chosen dimensions. bl1_dcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, a, a_rs, a_cs ); }
void bl1_dcreate_contigmt | ( | trans1_t | trans_dims, |
int | m, | ||
int | n, | ||
double * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
double ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_dallocm(), bl1_dcopymt(), bl1_does_trans(), bl1_is_gen_storage(), bl1_set_contig_strides(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_dgemm(), bl1_dsyr2k(), and bl1_dsyrk().
{ int m_contig, n_contig; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Transpose the dimensions if requested. if ( bl1_does_trans( trans_dims ) ) bl1_swap_ints( m, n ); // Initialize dimensions assuming no transposition needed during copy. m_contig = m; n_contig = n; /* // Transpose the dimensions of the contiguous matrix, if requested. if ( bl1_does_trans( trans_copy ) ) { m_contig = n; n_contig = m; } */ // Allocate temporary contiguous storage for the matrix. *a = bl1_dallocm( m_contig, n_contig ); // Set the row and column strides for the temporary matrix. bl1_set_contig_strides( m_contig, n_contig, a_rs, a_cs ); // Initialize the contiguous matrix with the contents of the original. bl1_dcopymt( BLIS1_NO_TRANSPOSE, m_contig, n_contig, a_save, a_rs_save, a_cs_save, *a, *a_rs, *a_cs ); } }
void bl1_dewinvscalmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dewinvscalv(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_elemwise().
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying ewinvscal // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dewinvscalv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_dewinvscalv | ( | conj1_t | conj, |
int | n, | ||
double * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy | ||
) |
Referenced by bl1_dewinvscalmt().
{ double* chi; double* psi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_dinvscals( chi, psi ); } }
void bl1_dewscalmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dewscalv(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Scal_elemwise().
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying ewscal // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dewscalv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_dewscalv | ( | conj1_t | conj, |
int | n, | ||
double * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy | ||
) |
Referenced by bl1_dapdiagmv(), and bl1_dewscalmt().
{ double* chi; double* psi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_dscals( chi, psi ); } }
void bl1_dfree | ( | double * | p | ) |
Referenced by bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dfree_saved_contigmr(), bl1_dfree_saved_contigmsr(), bl1_dgemm(), bl1_dsymm(), bl1_dsyr2k(), bl1_dtrmmsx(), bl1_dtrmvsx(), bl1_dtrsmsx(), and bl1_dtrsvsx().
{
free( ( void* ) p );
}
void bl1_dfree_contigm | ( | double * | a_save, |
int | a_rs_save, | ||
int | a_cs_save, | ||
double ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_dfree(), and bl1_is_gen_storage().
Referenced by bl1_dgemm(), bl1_dgemv(), bl1_dsymm(), bl1_dsymv(), bl1_dsyr2k(), bl1_dsyrk(), bl1_dtrmm(), bl1_dtrmmsx(), bl1_dtrmv(), bl1_dtrmvsx(), bl1_dtrsm(), bl1_dtrsmsx(), bl1_dtrsv(), and bl1_dtrsvsx().
{ if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Free the temporary contiguous storage for the matrix. bl1_dfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_dfree_saved_contigm | ( | int | m, |
int | n, | ||
double * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
double ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_dcopymt(), bl1_dfree(), bl1_is_gen_storage(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_dgemm(), bl1_dger(), bl1_dsymm(), bl1_dsyr(), bl1_dsyr2(), bl1_dtrmm(), bl1_dtrmmsx(), bl1_dtrsm(), and bl1_dtrsmsx().
{ if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Copy the contents of the temporary matrix back to the original. bl1_dcopymt( BLIS1_NO_TRANSPOSE, m, n, *a, *a_rs, *a_cs, a_save, a_rs_save, a_cs_save ); // Free the temporary contiguous storage for the matrix. bl1_dfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_dfree_saved_contigmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
double * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
double ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_dcopymr(), bl1_dfree(), and bl1_is_gen_storage().
Referenced by bl1_dsyr2k(), and bl1_dsyrk().
{ if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Copy the contents of the temporary matrix back to the original. bl1_dcopymr( uplo, m, n, *a, *a_rs, *a_cs, a_save, a_rs_save, a_cs_save ); // Free the temporary contiguous storage for the matrix. bl1_dfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_dfree_saved_contigmsr | ( | side1_t | side, |
uplo1_t | uplo, | ||
int | m, | ||
int | n, | ||
double * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
double ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_dcopymt(), bl1_dfree(), bl1_is_gen_storage(), and bl1_is_left().
{ int dim_a; // Choose the dimension of the matrix based on the side parameter. if ( bl1_is_left( side ) ) dim_a = m; else dim_a = n; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Copy the contents of the temporary matrix back to the original. bl1_dcopymt( uplo, dim_a, dim_a, *a, *a_rs, *a_cs, a_save, a_rs_save, a_cs_save ); // Free the temporary contiguous storage for the matrix. bl1_dfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_dident | ( | int | m, |
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var2(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_opz_var2(), and FLA_UDdate_UT_opd_var1().
{ double* alpha; int i, j; for ( j = 0; j < m; ++j ) { for ( i = 0; i < m; ++i ) { alpha = a + i*a_rs + j*a_cs; *alpha = 0.0; if ( i == j ) *alpha = 1.0; } } }
void bl1_dinvert2s | ( | conj1_t | conj, |
double * | alpha, | ||
double * | beta | ||
) |
Referenced by bl1_dinvscalm(), and bl1_zdinvscalm().
{
double one = 1.0;
*beta = one / *alpha;
}
void bl1_dinverts | ( | conj1_t | conj, |
double * | alpha | ||
) |
Referenced by FLA_Trinv_ln_opd_var1(), FLA_Trinv_ln_opd_var2(), FLA_Trinv_ln_opd_var3(), FLA_Trinv_ln_opd_var4(), FLA_Trinv_un_opd_var1(), FLA_Trinv_un_opd_var2(), FLA_Trinv_un_opd_var3(), and FLA_Trinv_un_opd_var4().
{
double one = 1.0;
*alpha = one / *alpha;
}
void bl1_dinvertv | ( | conj1_t | conj, |
int | n, | ||
double * | x, | ||
int | incx | ||
) |
Referenced by FLA_Invert().
{ double one = 1.0; double* chi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; *chi = one / *chi; } }
double bl1_dm1 | ( | void | ) |
Referenced by bl1_zconjm(), bl1_zconjmr(), bl1_zconjv(), bl1_zm1(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), FLA_Bsvd_v_opz_var2(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1().
{ double x; x = -1.0; return x; }
double bl1_dm1h | ( | void | ) |
Referenced by bl1_zm1h().
{ double x; x = -0.5; return x; }
void bl1_dmaxabsm | ( | int | m, |
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | maxabs | ||
) |
References bl1_d0(), bl1_dmaxabsv(), bl1_is_row_storage(), and bl1_zero_dim2().
Referenced by FLA_Max_abs_value().
{ double zero = bl1_d0(); double* a_begin; double maxabs_cand; double maxabs_temp; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) { *maxabs = zero; return; } // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } // Initialize the maximum absolute value candidate to the first element. bl1_dabsval2( a, &maxabs_cand ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_dmaxabsv( n_elem, a_begin, inca, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } *maxabs = maxabs_cand; }
void bl1_dmaxabsmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | maxabs | ||
) |
References bl1_d0(), bl1_dmaxabsv(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().
Referenced by FLA_Max_abs_value_herm().
{ double zero = bl1_d0(); double* a_begin; double maxabs_cand; double maxabs_temp; int inca, lda; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) { *maxabs = zero; return; } // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } // Initialize the maximum absolute value candidate to the first element. bl1_dabsval2( a, &maxabs_cand ); if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; bl1_dmaxabsv( n_elem, a_begin, inca, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; bl1_dmaxabsv( n_elem, a_begin, inca, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } } *maxabs = maxabs_cand; }
void bl1_dmaxabsv | ( | int | n, |
double * | x, | ||
int | incx, | ||
double * | maxabs | ||
) |
Referenced by bl1_dmaxabsm(), and bl1_dmaxabsmr().
{ double* chi; double maxabs_cand; double maxabs_temp; int i; bl1_dabsval2( x, &maxabs_cand ); for ( i = 0; i < n; ++i ) { chi = x + i*incx; bl1_dabsval2( chi, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } *maxabs = maxabs_cand; }
void bl1_drandm | ( | int | m, |
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_drandv(), bl1_is_row_storage(), and bl1_zero_dim2().
Referenced by FLA_Random_matrix().
{ double* a_begin; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_drandv( n_elem, a_begin, inca ); } }
void bl1_drandmr | ( | uplo1_t | uplo, |
diag1_t | diag, | ||
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_d0(), bl1_d1(), bl1_dinvscalv(), bl1_drands(), bl1_drandv(), bl1_dsetv(), bl1_is_nonunit_diag(), bl1_is_row_storage(), bl1_is_unit_diag(), bl1_is_upper(), bl1_is_zero_diag(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Random_tri_matrix().
{ double* a_begin; double* ajj; double one; double zero; double ord; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } // Initialize some scalars. one = bl1_d1(); zero = bl1_d0(); ord = ( double ) bl1_max( m, n ); if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j, n_elem_max ); a_begin = a + j*lda; // Randomize super-diagonal elements. bl1_drandv( n_elem, a_begin, inca ); // Normalize super-diagonal elements by order of the matrix. bl1_dinvscalv( BLIS1_NO_CONJUGATE, n_elem, &ord, a_begin, inca ); // Initialize diagonal and sub-diagonal elements only if there are // elements left in the column (ie: j < n_elem_max). if ( j < n_elem_max ) { ajj = a_begin + j*inca; // Initialize diagonal element. if ( bl1_is_unit_diag( diag ) ) *ajj = one; else if ( bl1_is_zero_diag( diag ) ) *ajj = zero; else if ( bl1_is_nonunit_diag( diag ) ) { // We want positive diagonal elements between 1 and 2. bl1_drands( ajj ); bl1_dabsval2( ajj, ajj ); bl1_dadd3( ajj, &one, ajj ); } // Initialize sub-diagonal elements to zero. bl1_dsetv( n_elem_max - j - 1, &zero, ajj + inca, inca ); } } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j, n_elem_max ); a_begin = a + j*lda; // Initialize super-diagonal to zero. bl1_dsetv( n_elem, &zero, a_begin, inca ); // Initialize diagonal and sub-diagonal elements only if there are // elements left in the column (ie: j < n_elem_max). if ( j < n_elem_max ) { ajj = a_begin + j*inca; // Initialize diagonal element. if ( bl1_is_unit_diag( diag ) ) *ajj = one; else if ( bl1_is_zero_diag( diag ) ) *ajj = zero; else if ( bl1_is_nonunit_diag( diag ) ) { // We want positive diagonal elements between 1 and 2. bl1_drands( ajj ); bl1_dabsval2( ajj, ajj ); bl1_dadd3( ajj, &one, ajj ); } // Randomize sub-diagonal elements. bl1_drandv( n_elem_max - j - 1, ajj + inca, inca ); // Normalize sub-diagonal elements by order of the matrix. bl1_dinvscalv( BLIS1_NO_CONJUGATE, n_elem_max - j - 1, &ord, ajj + inca, inca ); } } } }
void bl1_drands | ( | double * | alpha | ) |
Referenced by bl1_drandmr(), bl1_drandv(), and bl1_zrands().
{ *alpha = ( ( double ) rand() / ( ( double ) RAND_MAX / 2.0 ) ) - 1.0; }
void bl1_drandv | ( | int | n, |
double * | x, | ||
int | incx | ||
) |
References bl1_drands().
Referenced by bl1_drandm(), and bl1_drandmr().
{ double* chi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; bl1_drands( chi ); } }
void bl1_dscalediag | ( | conj1_t | conj, |
int | offset, | ||
int | m, | ||
int | n, | ||
double * | sigma, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Scale_diag(), and FLA_UDdate_UT_opd_var1().
{ double* alpha; int i, j; i = j = 0; if ( offset < 0 ) i = -offset; else if ( offset > 0 ) j = offset; while ( i < m && j < n ) { alpha = a + i*a_rs + j*a_cs; *alpha *= *sigma; ++i; ++j; } }
void bl1_dsetdiag | ( | int | offset, |
int | m, | ||
int | n, | ||
double * | sigma, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Set_diag(), FLA_Set_offdiag(), and FLA_Triangularize().
{ double* alpha; int i, j; i = j = 0; if ( offset < 0 ) i = -offset; else if ( offset > 0 ) j = offset; while ( i < m && j < n ) { alpha = a + i*a_rs + j*a_cs; *alpha = *sigma; ++i; ++j; } }
void bl1_dsetm | ( | int | m, |
int | n, | ||
double * | sigma, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_opd_var5(), FLA_Set(), FLA_Tridiag_UT_l_step_ofd_var3(), and FLA_Tridiag_UT_l_step_opd_var3().
{ double* alpha; int i, j; for ( j = 0; j < n; ++j ) { for ( i = 0; i < m; ++i ) { alpha = a + i*a_rs + j*a_cs; *alpha = *sigma; } } }
void bl1_dsetmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
double * | sigma, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_dsetv(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().
Referenced by FLA_Setr(), and FLA_Triangularize().
{ double* a_begin; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j, n_elem_max ); a_begin = a + j*lda; bl1_dsetv( n_elem, sigma, a_begin, inca ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j - 1 ); a_begin = a + j*lda + (j + 1)*inca; bl1_dsetv( n_elem, sigma, a_begin, inca ); } } }
void bl1_dsetv | ( | int | m, |
double * | sigma, | ||
double * | x, | ||
int | incx | ||
) |
Referenced by bl1_drandmr(), bl1_dsetmr(), FLA_Bidiag_UT_l_realify_opt(), FLA_Bidiag_UT_realify_diagonals_opt(), FLA_Bidiag_UT_u_realify_opt(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Her2_Ax_l_opd_var1(), FLA_Obj_extract_imag_part(), FLA_Tridiag_UT_l_realify_opt(), FLA_Tridiag_UT_realify_subdiagonal_opt(), FLA_Tridiag_UT_shift_U_l_opd(), and FLA_Tridiag_UT_u_realify_opt().
{ double* chi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; *chi = *sigma; } }
void bl1_dshiftdiag | ( | conj1_t | conj, |
int | offset, | ||
int | m, | ||
int | n, | ||
double * | sigma, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), FLA_Lyap_n_opd_var4(), and FLA_Shift_diag().
{ double* alpha; int i, j; i = j = 0; if ( offset < 0 ) i = -offset; else if ( offset > 0 ) j = offset; while ( i < m && j < n ) { alpha = a + i*a_rs + j*a_cs; *alpha += *sigma; ++i; ++j; } }
void bl1_dsymmize | ( | conj1_t | conj, |
uplo1_t | uplo, | ||
int | m, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_dcopyv(), bl1_is_col_storage(), bl1_is_gen_storage(), bl1_is_lower(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim1().
Referenced by FLA_Hermitianize(), and FLA_Symmetrize().
{ double* a_src; double* a_dst; int rs_src, cs_src, inc_src; int rs_dst, cs_dst, inc_dst; int n_iter; int j; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Assume A is square. n_iter = m; // Initialize with appropriate values based on storage. if ( bl1_is_col_storage( a_rs, a_cs ) && bl1_is_lower( uplo ) ) { cs_src = 1; rs_src = 0; inc_src = a_cs; cs_dst = a_cs; rs_dst = 0; inc_dst = 1; } else if ( bl1_is_col_storage( a_rs, a_cs ) && bl1_is_upper( uplo ) ) { cs_src = a_cs; rs_src = 0; inc_src = 1; cs_dst = 1; rs_dst = 0; inc_dst = a_cs; } else if ( bl1_is_row_storage( a_rs, a_cs ) && bl1_is_lower( uplo ) ) { cs_src = 0; rs_src = a_rs; inc_src = 1; cs_dst = 0; rs_dst = 1; inc_dst = a_rs; } else if ( bl1_is_row_storage( a_rs, a_cs ) && bl1_is_upper( uplo ) ) { cs_src = 0; rs_src = 1; inc_src = a_rs; cs_dst = 0; rs_dst = a_rs; inc_dst = 1; } else if ( bl1_is_gen_storage( a_rs, a_cs ) && bl1_is_lower( uplo ) ) { // General stride with column-major tilt looks similar to column-major. // General stride with row-major tilt looks similar to row-major. if ( a_rs < a_cs ) { cs_src = 1 * a_rs; rs_src = 0; inc_src = a_cs; cs_dst = a_cs; rs_dst = 0; inc_dst = 1 * a_rs; } else // if ( a_rs > a_cs ) { cs_src = 0; rs_src = a_rs; inc_src = 1 * a_cs; cs_dst = 0; rs_dst = 1 * a_cs; inc_dst = a_rs; } } else // if ( bl1_is_gen_storage( a_rs, a_cs ) && bl1_is_upper( uplo ) ) { // General stride with column-major tilt looks similar to column-major. // General stride with row-major tilt looks similar to row-major. if ( a_rs < a_cs ) { cs_src = a_cs; rs_src = 0; inc_src = 1 * a_rs; cs_dst = 1 * a_rs; rs_dst = 0; inc_dst = a_cs; } else // if ( a_rs > a_cs ) { cs_src = 0; rs_src = 1 * a_cs; inc_src = a_rs; cs_dst = 0; rs_dst = a_rs; inc_dst = 1 * a_cs; } } for ( j = 0; j < n_iter; j++ ) { a_src = a + j*cs_src + j*rs_src; a_dst = a + j*cs_dst + j*rs_dst; bl1_dcopyv( conj, j, a_src, inc_src, a_dst, inc_dst ); } }
int* bl1_iallocm | ( | unsigned int | m, |
unsigned int | n | ||
) |
{ return ( int* ) BLIS1_MALLOC( m * n * sizeof( int ) ); }
int* bl1_iallocv | ( | unsigned int | n_elem | ) |
{ return ( int* ) BLIS1_MALLOC( n_elem * sizeof( int ) ); }
void bl1_ifree | ( | int * | p | ) |
{
free( ( int* ) p );
}
void bl1_isetdiag | ( | int | offset, |
int | m, | ||
int | n, | ||
int * | sigma, | ||
int * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Set_diag(), and FLA_Set_offdiag().
{ int* alpha; int i, j; i = j = 0; if ( offset < 0 ) i = -offset; else if ( offset > 0 ) j = offset; while ( i < m && j < n ) { alpha = a + i*a_rs + j*a_cs; *alpha = *sigma; ++i; ++j; } }
void bl1_isetm | ( | int | m, |
int | n, | ||
int * | sigma, | ||
int * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Set().
{ int* alpha; int i, j; for ( j = 0; j < n; ++j ) { for ( i = 0; i < m; ++i ) { alpha = a + i*a_rs + j*a_cs; *alpha = *sigma; } } }
void bl1_isetv | ( | int | m, |
int * | sigma, | ||
int * | x, | ||
int | incx | ||
) |
{ int* chi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; *chi = *sigma; } }
float bl1_s0 | ( | void | ) |
Referenced by bl1_c0(), bl1_c1(), bl1_c1h(), bl1_c2(), bl1_cher2k(), bl1_cherk(), bl1_cm1(), bl1_cm1h(), bl1_cm2(), bl1_cmaxabsm(), bl1_csymmize(), bl1_sgemm(), bl1_smaxabsm(), bl1_smaxabsmr(), bl1_srandmr(), bl1_ssymm(), FLA_Apply_G_rf_asc_var1(), FLA_Apply_G_rf_asc_var2(), FLA_Apply_G_rf_asc_var3(), FLA_Apply_G_rf_asc_var6(), FLA_Apply_G_rf_asc_var9(), FLA_Apply_G_rf_ass_var1(), FLA_Apply_G_rf_ass_var2(), FLA_Apply_G_rf_ass_var3(), FLA_Apply_G_rf_ass_var6(), FLA_Apply_G_rf_ass_var9(), FLA_Apply_G_rf_opc_var1(), FLA_Apply_G_rf_opc_var2(), FLA_Apply_G_rf_opc_var3(), FLA_Apply_G_rf_opc_var6(), FLA_Apply_G_rf_opc_var9(), FLA_Apply_G_rf_ops_var1(), FLA_Apply_G_rf_ops_var2(), FLA_Apply_G_rf_ops_var3(), FLA_Apply_G_rf_ops_var6(), FLA_Apply_G_rf_ops_var9(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_v_opc_var1(), FLA_Bsvd_v_ops_var1(), FLA_Pythag2_ops(), FLA_Pythag3_ops(), and FLA_Tridiag_UT_shift_U_l_ops().
{ float x; x = 0.0F; return x; }
float bl1_s1 | ( | void | ) |
Referenced by bl1_c1(), bl1_sgemm(), bl1_srandmr(), bl1_ssymm(), bl1_strmmsx(), bl1_strsmsx(), FLA_Apply_G_rf_asc_var1(), FLA_Apply_G_rf_asc_var2(), FLA_Apply_G_rf_asc_var3(), FLA_Apply_G_rf_asc_var6(), FLA_Apply_G_rf_asc_var9(), FLA_Apply_G_rf_ass_var1(), FLA_Apply_G_rf_ass_var2(), FLA_Apply_G_rf_ass_var3(), FLA_Apply_G_rf_ass_var6(), FLA_Apply_G_rf_ass_var9(), FLA_Apply_G_rf_opc_var1(), FLA_Apply_G_rf_opc_var2(), FLA_Apply_G_rf_opc_var3(), FLA_Apply_G_rf_opc_var6(), FLA_Apply_G_rf_opc_var9(), FLA_Apply_G_rf_ops_var1(), FLA_Apply_G_rf_ops_var2(), FLA_Apply_G_rf_ops_var3(), FLA_Apply_G_rf_ops_var6(), FLA_Apply_G_rf_ops_var9(), FLA_Bsvd_francis_v_ops_var1(), FLA_Bsvd_sinval_v_ops_var1(), FLA_Pythag2_ops(), FLA_Tevd_compute_scaling_ops(), and FLA_Tridiag_UT_shift_U_l_ops().
{ float x; x = 1.0F; return x; }
float* bl1_sallocm | ( | unsigned int | m, |
unsigned int | n | ||
) |
Referenced by bl1_screate_contigm(), bl1_screate_contigmr(), bl1_screate_contigmt(), bl1_sgemm(), bl1_ssymm(), bl1_ssyr2k(), bl1_strmmsx(), and bl1_strsmsx().
{ return ( float* ) BLIS1_MALLOC( m * n * sizeof( float ) ); }
float* bl1_sallocv | ( | unsigned int | n_elem | ) |
Referenced by bl1_strmvsx(), and bl1_strsvsx().
{ return ( float* ) BLIS1_MALLOC( n_elem * sizeof( float ) ); }
void bl1_sapdiagmv | ( | side1_t | side, |
conj1_t | conj, | ||
int | m, | ||
int | n, | ||
float * | x, | ||
int | incx, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_left(), bl1_is_row_storage(), bl1_sewscalv(), bl1_sscalv(), and bl1_zero_dim2().
Referenced by FLA_Apply_diag_matrix().
{ float* chi; float* a_begin; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then we can proceed as if the // operation were transposed (applying the diagonal values in x from the // opposite side) for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_toggle_side( side ); } if ( bl1_is_left( side ) ) { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_sewscalv( conj, n_elem, x, incx, a_begin, inca ); } } else { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; chi = x + j*incx; bl1_sscalv( conj, n_elem, chi, a_begin, inca ); } } }
void bl1_screate_contigm | ( | int | m, |
int | n, | ||
float * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
float ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_gen_storage(), bl1_sallocm(), bl1_scopymt(), bl1_set_contig_strides(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_sgemm(), bl1_sgemv(), bl1_sger(), bl1_ssymm(), bl1_strmm(), bl1_strmmsx(), bl1_strsm(), and bl1_strsmsx().
{ int m_contig, n_contig; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Initialize dimensions assuming no transposition needed during copy. m_contig = m; n_contig = n; /* // Transpose the dimensions of the contiguous matrix, if requested. if ( bl1_does_trans( trans_copy ) ) { m_contig = n; n_contig = m; } */ // Allocate temporary contiguous storage for the matrix. *a = bl1_sallocm( m_contig, n_contig ); // Set the row and column strides for the temporary matrix. bl1_set_contig_strides( m_contig, n_contig, a_rs, a_cs ); // Initialize the contiguous matrix with the contents of the original. bl1_scopymt( BLIS1_NO_TRANSPOSE, m_contig, n_contig, a_save, a_rs_save, a_cs_save, *a, *a_rs, *a_cs ); } }
void bl1_screate_contigmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
float * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
float ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_gen_storage(), bl1_sallocm(), bl1_scopymr(), and bl1_set_contig_strides().
Referenced by bl1_screate_contigmsr(), bl1_ssymm(), bl1_ssymv(), bl1_ssyr(), bl1_ssyr2(), bl1_ssyr2k(), bl1_ssyrk(), bl1_strmm(), bl1_strmmsx(), bl1_strmv(), bl1_strmvsx(), bl1_strsm(), bl1_strsmsx(), bl1_strsv(), and bl1_strsvsx().
{ int m_contig, n_contig; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Initialize dimensions assuming no transposition needed during copy. m_contig = m; n_contig = n; /* // Transpose the dimensions of the contiguous matrix, if requested. if ( bl1_does_trans( trans_copy ) ) { m_contig = n; n_contig = m; } */ // Allocate temporary contiguous storage for the matrix. *a = bl1_sallocm( m_contig, n_contig ); // Set the row and column strides for the temporary matrix. bl1_set_contig_strides( m_contig, n_contig, a_rs, a_cs ); // Initialize the contiguous matrix with the contents of the original. bl1_scopymr( uplo, m_contig, n_contig, a_save, a_rs_save, a_cs_save, *a, *a_rs, *a_cs ); } }
void bl1_screate_contigmsr | ( | side1_t | side, |
uplo1_t | uplo, | ||
int | m, | ||
int | n, | ||
float * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
float ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_left(), and bl1_screate_contigmr().
{ int dim_a; // Choose the dimension of the matrix based on the side parameter. if ( bl1_is_left( side ) ) dim_a = m; else dim_a = n; // Call the simple version with chosen dimensions. bl1_screate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, a, a_rs, a_cs ); }
void bl1_screate_contigmt | ( | trans1_t | trans_dims, |
int | m, | ||
int | n, | ||
float * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
float ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_does_trans(), bl1_is_gen_storage(), bl1_sallocm(), bl1_scopymt(), bl1_set_contig_strides(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_sgemm(), bl1_ssyr2k(), and bl1_ssyrk().
{ int m_contig, n_contig; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Transpose the dimensions if requested. if ( bl1_does_trans( trans_dims ) ) bl1_swap_ints( m, n ); // Initialize dimensions assuming no transposition needed during copy. m_contig = m; n_contig = n; /* // Transpose the dimensions of the contiguous matrix, if requested. if ( bl1_does_trans( trans_copy ) ) { m_contig = n; n_contig = m; } */ // Allocate temporary contiguous storage for the matrix. *a = bl1_sallocm( m_contig, n_contig ); // Set the row and column strides for the temporary matrix. bl1_set_contig_strides( m_contig, n_contig, a_rs, a_cs ); // Initialize the contiguous matrix with the contents of the original. bl1_scopymt( BLIS1_NO_TRANSPOSE, m_contig, n_contig, a_save, a_rs_save, a_cs_save, *a, *a_rs, *a_cs ); } }
void bl1_set_contig_strides | ( | int | m, |
int | n, | ||
int * | rs, | ||
int * | cs | ||
) |
Referenced by bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dcreate_contigmt(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_screate_contigmt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), and bl1_zcreate_contigmt().
{ // Default to column-major order. *rs = 1; *cs = m; // Handle special cases first. // Check the strides, and modify them if needed. if ( *rs == 1 && *cs == 1 ) { // If both strides are unit, we are probably trying to create a // 1-by-n matrix in column-major order, or an m-by-1 matrix in // row-major order. We have decided to "reserve" the case where // rs == cs == 1 for scalars only, as having unit strides can // upset the BLAS error checking when attempting to induce a // row-major operation. if ( m > 1 && n == 1 ) { // Set the column stride to indicate that this is an m-by-1 // matrix (or vector) stored in column-major order. This is // necessary because, in some cases, we have to satisfy error // checking in the underlying BLAS library, which expects the // leading dimension to be set to at least m, even if it will // never be used for indexing since there is only one column // of data. Note that rs is already set to 1. *cs = m; } else if ( m == 1 && 1 < n ) { // Set the row stride to indicate that this is a 1-by-n matrix // stored in row-major order. Note that cs is already set to 1. *rs = n; } else { // If m == n == 1, then we are dealing with a scalar. Since rs // and cs do not exceed m and n, we don't have to do anything. } } }
void bl1_set_dim_with_side | ( | side1_t | side, |
int | m, | ||
int | n, | ||
int * | dim_new | ||
) |
References bl1_is_left().
Referenced by bl1_chemm(), bl1_csymm(), bl1_ctrmm(), bl1_ctrmmsx(), bl1_ctrsm(), bl1_ctrsmsx(), bl1_dsymm(), bl1_dtrmm(), bl1_dtrmmsx(), bl1_dtrsm(), bl1_dtrsmsx(), bl1_ssymm(), bl1_strmm(), bl1_strmmsx(), bl1_strsm(), bl1_strsmsx(), bl1_zhemm(), bl1_zsymm(), bl1_ztrmm(), bl1_ztrmmsx(), bl1_ztrsm(), and bl1_ztrsmsx().
{ if ( bl1_is_left( side ) ) { *dim_new = m; } else // if ( bl1_is_right( side ) ) { *dim_new = n; } }
void bl1_set_dims_with_trans | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
int * | m_new, | ||
int * | n_new | ||
) |
References bl1_does_trans().
Referenced by bl1_cher2k(), bl1_csyr2k(), bl1_dsyr2k(), bl1_ssyr2k(), bl1_zher2k(), and bl1_zsyr2k().
{ if ( bl1_does_trans( trans ) ) { *m_new = n; *n_new = m; } else { *m_new = m; *n_new = n; } }
void bl1_sewinvscalmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_sewinvscalv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_elemwise().
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying ewinvscal // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_sewinvscalv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_sewinvscalv | ( | conj1_t | conj, |
int | n, | ||
float * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy | ||
) |
Referenced by bl1_sewinvscalmt().
{ float* chi; float* psi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_sinvscals( chi, psi ); } }
void bl1_sewscalmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_sewscalv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Scal_elemwise().
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying ewscal // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_sewscalv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_sewscalv | ( | conj1_t | conj, |
int | n, | ||
float * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy | ||
) |
Referenced by bl1_sapdiagmv(), and bl1_sewscalmt().
{ float* chi; float* psi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_sscals( chi, psi ); } }
void bl1_sfree | ( | float * | p | ) |
Referenced by bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sfree_saved_contigmr(), bl1_sfree_saved_contigmsr(), bl1_sgemm(), bl1_ssymm(), bl1_ssyr2k(), bl1_strmmsx(), bl1_strmvsx(), bl1_strsmsx(), and bl1_strsvsx().
{
free( ( void* ) p );
}
void bl1_sfree_contigm | ( | float * | a_save, |
int | a_rs_save, | ||
int | a_cs_save, | ||
float ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_gen_storage(), and bl1_sfree().
Referenced by bl1_sgemm(), bl1_sgemv(), bl1_ssymm(), bl1_ssymv(), bl1_ssyr2k(), bl1_ssyrk(), bl1_strmm(), bl1_strmmsx(), bl1_strmv(), bl1_strmvsx(), bl1_strsm(), bl1_strsmsx(), bl1_strsv(), and bl1_strsvsx().
{ if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Free the temporary contiguous storage for the matrix. bl1_sfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_sfree_saved_contigm | ( | int | m, |
int | n, | ||
float * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
float ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_gen_storage(), bl1_scopymt(), bl1_sfree(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_sgemm(), bl1_sger(), bl1_ssymm(), bl1_ssyr(), bl1_ssyr2(), bl1_strmm(), bl1_strmmsx(), bl1_strsm(), and bl1_strsmsx().
{ if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Copy the contents of the temporary matrix back to the original. bl1_scopymt( BLIS1_NO_TRANSPOSE, m, n, *a, *a_rs, *a_cs, a_save, a_rs_save, a_cs_save ); // Free the temporary contiguous storage for the matrix. bl1_sfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_sfree_saved_contigmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
float * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
float ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_gen_storage(), bl1_scopymr(), and bl1_sfree().
Referenced by bl1_ssyr2k(), and bl1_ssyrk().
{ if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Copy the contents of the temporary matrix back to the original. bl1_scopymr( uplo, m, n, *a, *a_rs, *a_cs, a_save, a_rs_save, a_cs_save ); // Free the temporary contiguous storage for the matrix. bl1_sfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_sfree_saved_contigmsr | ( | side1_t | side, |
uplo1_t | uplo, | ||
int | m, | ||
int | n, | ||
float * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
float ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_gen_storage(), bl1_is_left(), bl1_scopymt(), and bl1_sfree().
{ int dim_a; // Choose the dimension of the matrix based on the side parameter. if ( bl1_is_left( side ) ) dim_a = m; else dim_a = n; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Copy the contents of the temporary matrix back to the original. bl1_scopymt( uplo, dim_a, dim_a, *a, *a_rs, *a_cs, a_save, a_rs_save, a_cs_save ); // Free the temporary contiguous storage for the matrix. bl1_sfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_sident | ( | int | m, |
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_UDdate_UT_ops_var1().
{ float* alpha; int i, j; for ( j = 0; j < m; ++j ) { for ( i = 0; i < m; ++i ) { alpha = a + i*a_rs + j*a_cs; *alpha = 0.0F; if ( i == j ) *alpha = 1.0F; } } }
void bl1_sinvert2s | ( | conj1_t | conj, |
float * | alpha, | ||
float * | beta | ||
) |
Referenced by bl1_csinvscalm(), and bl1_sinvscalm().
{
float one = 1.0F;
*beta = one / *alpha;
}
void bl1_sinverts | ( | conj1_t | conj, |
float * | alpha | ||
) |
Referenced by FLA_Trinv_ln_ops_var1(), FLA_Trinv_ln_ops_var2(), FLA_Trinv_ln_ops_var3(), FLA_Trinv_ln_ops_var4(), FLA_Trinv_un_ops_var1(), FLA_Trinv_un_ops_var2(), FLA_Trinv_un_ops_var3(), and FLA_Trinv_un_ops_var4().
{
float one = 1.0F;
*alpha = one / *alpha;
}
void bl1_sinvertv | ( | conj1_t | conj, |
int | n, | ||
float * | x, | ||
int | incx | ||
) |
Referenced by FLA_Invert().
{ float one = 1.0F; float* chi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; *chi = one / *chi; } }
float bl1_sm1 | ( | void | ) |
Referenced by bl1_cconjm(), bl1_cconjmr(), bl1_cconjv(), bl1_cm1(), FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_v_opc_var1(), and FLA_Bsvd_v_ops_var1().
{ float x; x = -1.0F; return x; }
float bl1_sm1h | ( | void | ) |
Referenced by bl1_cm1h().
{ float x; x = -0.5F; return x; }
void bl1_smaxabsm | ( | int | m, |
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | maxabs | ||
) |
References bl1_is_row_storage(), bl1_s0(), bl1_smaxabsv(), and bl1_zero_dim2().
Referenced by FLA_Max_abs_value().
{ float zero = bl1_s0(); float* a_begin; float maxabs_cand; float maxabs_temp; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) { *maxabs = zero; return; } // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } // Initialize the maximum absolute value candidate to the first element. bl1_sabsval2( a, &maxabs_cand ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_smaxabsv( n_elem, a_begin, inca, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } *maxabs = maxabs_cand; }
void bl1_smaxabsmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | maxabs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_s0(), bl1_smaxabsv(), and bl1_zero_dim2().
Referenced by FLA_Max_abs_value_herm().
{ float zero = bl1_s0(); float* a_begin; float maxabs_cand; float maxabs_temp; int inca, lda; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) { *maxabs = zero; return; } // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } // Initialize the maximum absolute value candidate to the first element. bl1_sabsval2( a, &maxabs_cand ); if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; bl1_smaxabsv( n_elem, a_begin, inca, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; bl1_smaxabsv( n_elem, a_begin, inca, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } } *maxabs = maxabs_cand; }
void bl1_smaxabsv | ( | int | n, |
float * | x, | ||
int | incx, | ||
float * | maxabs | ||
) |
Referenced by bl1_smaxabsm(), and bl1_smaxabsmr().
{ float* chi; float maxabs_cand; float maxabs_temp; int i; bl1_sabsval2( x, &maxabs_cand ); for ( i = 0; i < n; ++i ) { chi = x + i*incx; bl1_sabsval2( chi, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } *maxabs = maxabs_cand; }
void bl1_srandm | ( | int | m, |
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_srandv(), and bl1_zero_dim2().
Referenced by FLA_Random_matrix().
{ float* a_begin; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_srandv( n_elem, a_begin, inca ); } }
void bl1_srandmr | ( | uplo1_t | uplo, |
diag1_t | diag, | ||
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_nonunit_diag(), bl1_is_row_storage(), bl1_is_unit_diag(), bl1_is_upper(), bl1_is_zero_diag(), bl1_s0(), bl1_s1(), bl1_sinvscalv(), bl1_srands(), bl1_srandv(), bl1_ssetv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Random_tri_matrix().
{ float* a_begin; float* ajj; float one; float zero; float ord; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } // Initialize some scalars. one = bl1_s1(); zero = bl1_s0(); ord = ( float ) bl1_max( m, n ); if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j, n_elem_max ); a_begin = a + j*lda; // Randomize super-diagonal elements. bl1_srandv( n_elem, a_begin, inca ); // Normalize super-diagonal elements by order of the matrix. bl1_sinvscalv( BLIS1_NO_CONJUGATE, n_elem, &ord, a_begin, inca ); // Initialize diagonal and sub-diagonal elements only if there are // elements left in the column (ie: j < n_elem_max). if ( j < n_elem_max ) { ajj = a_begin + j*inca; // Initialize diagonal element. if ( bl1_is_unit_diag( diag ) ) *ajj = one; else if ( bl1_is_zero_diag( diag ) ) *ajj = zero; else if ( bl1_is_nonunit_diag( diag ) ) { // We want positive diagonal elements between 1 and 2. bl1_srands( ajj ); bl1_sabsval2( ajj, ajj ); bl1_sadd3( ajj, &one, ajj ); } // Initialize sub-diagonal elements to zero. bl1_ssetv( n_elem_max - j - 1, &zero, ajj + inca, inca ); } } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j, n_elem_max ); a_begin = a + j*lda; // Initialize super-diagonal to zero. bl1_ssetv( n_elem, &zero, a_begin, inca ); // Initialize diagonal and sub-diagonal elements only if there are // elements left in the column (ie: j < n_elem_max). if ( j < n_elem_max ) { ajj = a_begin + j*inca; // Initialize diagonal element. if ( bl1_is_unit_diag( diag ) ) *ajj = one; else if ( bl1_is_zero_diag( diag ) ) *ajj = zero; else if ( bl1_is_nonunit_diag( diag ) ) { // We want positive diagonal elements between 1 and 2. bl1_srands( ajj ); bl1_sabsval2( ajj, ajj ); bl1_sadd3( ajj, &one, ajj ); } // Randomize sub-diagonal elements. bl1_srandv( n_elem_max - j - 1, ajj + inca, inca ); // Normalize sub-diagonal elements by order of the matrix. bl1_sinvscalv( BLIS1_NO_CONJUGATE, n_elem_max - j - 1, &ord, ajj + inca, inca ); } } } }
void bl1_srands | ( | float * | alpha | ) |
Referenced by bl1_crands(), bl1_srandmr(), and bl1_srandv().
{
*alpha = ( float ) ( ( double ) rand() / ( ( double ) RAND_MAX / 2.0F ) ) - 1.0F;
}
void bl1_srandv | ( | int | n, |
float * | x, | ||
int | incx | ||
) |
References bl1_srands().
Referenced by bl1_srandm(), and bl1_srandmr().
{ float* chi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; bl1_srands( chi ); } }
void bl1_sscalediag | ( | conj1_t | conj, |
int | offset, | ||
int | m, | ||
int | n, | ||
float * | sigma, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Scale_diag(), and FLA_UDdate_UT_ops_var1().
{ float* alpha; int i, j; i = j = 0; if ( offset < 0 ) i = -offset; else if ( offset > 0 ) j = offset; while ( i < m && j < n ) { alpha = a + i*a_rs + j*a_cs; *alpha *= *sigma; ++i; ++j; } }
void bl1_ssetdiag | ( | int | offset, |
int | m, | ||
int | n, | ||
float * | sigma, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Set_diag(), FLA_Set_offdiag(), and FLA_Triangularize().
{ float* alpha; int i, j; i = j = 0; if ( offset < 0 ) i = -offset; else if ( offset > 0 ) j = offset; while ( i < m && j < n ) { alpha = a + i*a_rs + j*a_cs; *alpha = *sigma; ++i; ++j; } }
void bl1_ssetm | ( | int | m, |
int | n, | ||
float * | sigma, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_Set(), FLA_Tridiag_UT_l_step_ofs_var3(), and FLA_Tridiag_UT_l_step_ops_var3().
{ float* alpha; int i, j; for ( j = 0; j < n; ++j ) { for ( i = 0; i < m; ++i ) { alpha = a + i*a_rs + j*a_cs; *alpha = *sigma; } } }
void bl1_ssetmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
float * | sigma, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_ssetv(), and bl1_zero_dim2().
Referenced by FLA_Setr(), and FLA_Triangularize().
{ float* a_begin; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j, n_elem_max ); a_begin = a + j*lda; bl1_ssetv( n_elem, sigma, a_begin, inca ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j - 1 ); a_begin = a + j*lda + (j + 1)*inca; bl1_ssetv( n_elem, sigma, a_begin, inca ); } } }
void bl1_ssetv | ( | int | m, |
float * | sigma, | ||
float * | x, | ||
int | incx | ||
) |
Referenced by bl1_srandmr(), bl1_ssetmr(), FLA_Bidiag_UT_l_realify_opt(), FLA_Bidiag_UT_realify_diagonals_opt(), FLA_Bidiag_UT_u_realify_opt(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Obj_extract_imag_part(), FLA_Tridiag_UT_l_realify_opt(), FLA_Tridiag_UT_realify_subdiagonal_opt(), FLA_Tridiag_UT_shift_U_l_ops(), and FLA_Tridiag_UT_u_realify_opt().
{ float* chi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; *chi = *sigma; } }
void bl1_sshiftdiag | ( | conj1_t | conj, |
int | offset, | ||
int | m, | ||
int | n, | ||
float * | sigma, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), FLA_Lyap_n_ops_var4(), and FLA_Shift_diag().
{ float* alpha; int i, j; i = j = 0; if ( offset < 0 ) i = -offset; else if ( offset > 0 ) j = offset; while ( i < m && j < n ) { alpha = a + i*a_rs + j*a_cs; *alpha += *sigma; ++i; ++j; } }
void bl1_ssymmize | ( | conj1_t | conj, |
uplo1_t | uplo, | ||
int | m, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_col_storage(), bl1_is_gen_storage(), bl1_is_lower(), bl1_is_row_storage(), bl1_is_upper(), bl1_scopyv(), and bl1_zero_dim1().
Referenced by FLA_Hermitianize(), and FLA_Symmetrize().
{ float* a_src; float* a_dst; int rs_src, cs_src, inc_src; int rs_dst, cs_dst, inc_dst; int n_iter; int j; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Assume A is square. n_iter = m; // Initialize with appropriate values based on storage. if ( bl1_is_col_storage( a_rs, a_cs ) && bl1_is_lower( uplo ) ) { cs_src = 1; rs_src = 0; inc_src = a_cs; cs_dst = a_cs; rs_dst = 0; inc_dst = 1; } else if ( bl1_is_col_storage( a_rs, a_cs ) && bl1_is_upper( uplo ) ) { cs_src = a_cs; rs_src = 0; inc_src = 1; cs_dst = 1; rs_dst = 0; inc_dst = a_cs; } else if ( bl1_is_row_storage( a_rs, a_cs ) && bl1_is_lower( uplo ) ) { cs_src = 0; rs_src = a_rs; inc_src = 1; cs_dst = 0; rs_dst = 1; inc_dst = a_rs; } else if ( bl1_is_row_storage( a_rs, a_cs ) && bl1_is_upper( uplo ) ) { cs_src = 0; rs_src = 1; inc_src = a_rs; cs_dst = 0; rs_dst = a_rs; inc_dst = 1; } else if ( bl1_is_gen_storage( a_rs, a_cs ) && bl1_is_lower( uplo ) ) { // General stride with column-major tilt looks similar to column-major. // General stride with row-major tilt looks similar to row-major. if ( a_rs < a_cs ) { cs_src = 1 * a_rs; rs_src = 0; inc_src = a_cs; cs_dst = a_cs; rs_dst = 0; inc_dst = 1 * a_rs; } else // if ( a_rs > a_cs ) { cs_src = 0; rs_src = a_rs; inc_src = 1 * a_cs; cs_dst = 0; rs_dst = 1 * a_cs; inc_dst = a_rs; } } else // if ( bl1_is_gen_storage( a_rs, a_cs ) && bl1_is_upper( uplo ) ) { // General stride with column-major tilt looks similar to column-major. // General stride with row-major tilt looks similar to row-major. if ( a_rs < a_cs ) { cs_src = a_cs; rs_src = 0; inc_src = 1 * a_rs; cs_dst = 1 * a_rs; rs_dst = 0; inc_dst = a_cs; } else // if ( a_rs > a_cs ) { cs_src = 0; rs_src = 1 * a_cs; inc_src = a_rs; cs_dst = 0; rs_dst = a_rs; inc_dst = 1 * a_cs; } } for ( j = 0; j < n_iter; j++ ) { a_src = a + j*cs_src + j*rs_src; a_dst = a + j*cs_dst + j*rs_dst; bl1_scopyv( conj, j, a_src, inc_src, a_dst, inc_dst ); } }
void* bl1_vallocm | ( | unsigned int | m, |
unsigned int | n, | ||
unsigned int | elem_size | ||
) |
{ return ( void* ) BLIS1_MALLOC( m * n * elem_size ); }
void* bl1_vallocv | ( | unsigned int | n_elem, |
unsigned int | elem_size | ||
) |
{ return ( void* ) BLIS1_MALLOC( n_elem * elem_size ); }
void bl1_vfree | ( | void * | p | ) |
{
free( ( void* ) p );
}
References bl1_d0(), dcomplex::imag, and dcomplex::real.
Referenced by bl1_zgemm(), bl1_zgemv(), bl1_zhemm(), bl1_zhemv(), bl1_zrandmr(), bl1_zsymm(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Her2_Ax_l_opz_var1(), FLA_Fused_Uhu_Yhu_Zhu_opz_var1(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_QR_UT_form_Q_opz_var1(), and FLA_Tridiag_UT_shift_U_l_opz().
References bl1_d0(), bl1_d1(), dcomplex::imag, and dcomplex::real.
Referenced by bl1_zgemm(), bl1_zgemv(), bl1_zhemm(), bl1_zhemv(), bl1_zher2k(), bl1_zherk(), bl1_zrandmr(), bl1_zsymm(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), FLA_Bsvd_v_opz_var2(), FLA_QR_UT_form_Q_opz_var1(), FLA_Tevd_n_opz_var1(), FLA_Tevd_v_opd_var1(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_opz_var1(), FLA_Tevd_v_opz_var2(), and FLA_Tridiag_UT_shift_U_l_opz().
References bl1_d0(), bl1_d1h(), dcomplex::imag, and dcomplex::real.
References bl1_d0(), bl1_d2(), dcomplex::imag, and dcomplex::real.
dcomplex* bl1_zallocm | ( | unsigned int | m, |
unsigned int | n | ||
) |
dcomplex* bl1_zallocv | ( | unsigned int | n_elem | ) |
Referenced by bl1_zaxpymt(), bl1_zaxpysmt(), bl1_zaxpyv(), bl1_zgemv(), bl1_zger(), bl1_zhemv(), bl1_zher(), bl1_zher2(), bl1_zsymv_blas(), bl1_zsyr2_blas(), bl1_zsyr_blas(), bl1_ztrmv(), bl1_ztrmvsx(), bl1_ztrsv(), and bl1_ztrsvsx().
void bl1_zapdiagmv | ( | side1_t | side, |
conj1_t | conj, | ||
int | m, | ||
int | n, | ||
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_left(), bl1_is_row_storage(), bl1_zero_dim2(), bl1_zewscalv(), and bl1_zscalv().
Referenced by FLA_Apply_diag_matrix().
{ dcomplex* chi; dcomplex* a_begin; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then we can proceed as if the // operation were transposed (applying the diagonal values in x from the // opposite side) for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_toggle_side( side ); } if ( bl1_is_left( side ) ) { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_zewscalv( conj, n_elem, x, incx, a_begin, inca ); } } else { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; chi = x + j*incx; bl1_zscalv( conj, n_elem, chi, a_begin, inca ); } } }
void bl1_zcreate_contigm | ( | int | m, |
int | n, | ||
dcomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
dcomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_gen_storage(), bl1_set_contig_strides(), bl1_zallocm(), bl1_zcopymt(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_zgemm(), bl1_zgemv(), bl1_zger(), bl1_zhemm(), bl1_zsymm(), bl1_ztrmm(), bl1_ztrmmsx(), bl1_ztrsm(), and bl1_ztrsmsx().
{ int m_contig, n_contig; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Initialize dimensions assuming no transposition needed during copy. m_contig = m; n_contig = n; /* // Transpose the dimensions of the contiguous matrix, if requested. if ( bl1_does_trans( trans_copy ) ) { m_contig = n; n_contig = m; } */ // Allocate temporary contiguous storage for the matrix. *a = bl1_zallocm( m_contig, n_contig ); // Set the row and column strides for the temporary matrix. bl1_set_contig_strides( m_contig, n_contig, a_rs, a_cs ); // Initialize the contiguous matrix with the contents of the original. bl1_zcopymt( BLIS1_NO_TRANSPOSE, m_contig, n_contig, a_save, a_rs_save, a_cs_save, *a, *a_rs, *a_cs ); } }
void bl1_zcreate_contigmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
dcomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
dcomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_gen_storage(), bl1_set_contig_strides(), bl1_zallocm(), and bl1_zcopymr().
Referenced by bl1_zcreate_contigmsr(), bl1_zhemm(), bl1_zhemv(), bl1_zher(), bl1_zher2(), bl1_zher2k(), bl1_zherk(), bl1_zsymm(), bl1_zsymv(), bl1_zsyr(), bl1_zsyr2(), bl1_zsyr2k(), bl1_zsyrk(), bl1_ztrmm(), bl1_ztrmmsx(), bl1_ztrmv(), bl1_ztrmvsx(), bl1_ztrsm(), bl1_ztrsmsx(), bl1_ztrsv(), and bl1_ztrsvsx().
{ int m_contig, n_contig; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Initialize dimensions assuming no transposition needed during copy. m_contig = m; n_contig = n; /* // Transpose the dimensions of the contiguous matrix, if requested. if ( bl1_does_trans( trans_copy ) ) { m_contig = n; n_contig = m; } */ // Allocate temporary contiguous storage for the matrix. *a = bl1_zallocm( m_contig, n_contig ); // Set the row and column strides for the temporary matrix. bl1_set_contig_strides( m_contig, n_contig, a_rs, a_cs ); // Initialize the contiguous matrix with the contents of the original. bl1_zcopymr( uplo, m_contig, n_contig, a_save, a_rs_save, a_cs_save, *a, *a_rs, *a_cs ); } }
void bl1_zcreate_contigmsr | ( | side1_t | side, |
uplo1_t | uplo, | ||
int | m, | ||
int | n, | ||
dcomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
dcomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_left(), and bl1_zcreate_contigmr().
{ int dim_a; // Choose the dimension of the matrix based on the side parameter. if ( bl1_is_left( side ) ) dim_a = m; else dim_a = n; // Call the simple version with chosen dimensions. bl1_zcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, a, a_rs, a_cs ); }
void bl1_zcreate_contigmt | ( | trans1_t | trans_dims, |
int | m, | ||
int | n, | ||
dcomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
dcomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_does_trans(), bl1_is_gen_storage(), bl1_set_contig_strides(), bl1_zallocm(), bl1_zcopymt(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_zgemm(), bl1_zher2k(), bl1_zherk(), bl1_zsyr2k(), and bl1_zsyrk().
{ int m_contig, n_contig; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Transpose the dimensions if requested. if ( bl1_does_trans( trans_dims ) ) bl1_swap_ints( m, n ); // Initialize dimensions assuming no transposition needed during copy. m_contig = m; n_contig = n; /* // Transpose the dimensions of the contiguous matrix, if requested. if ( bl1_does_trans( trans_copy ) ) { m_contig = n; n_contig = m; } */ // Allocate temporary contiguous storage for the matrix. *a = bl1_zallocm( m_contig, n_contig ); // Set the row and column strides for the temporary matrix. bl1_set_contig_strides( m_contig, n_contig, a_rs, a_cs ); // Initialize the contiguous matrix with the contents of the original. bl1_zcopymt( BLIS1_NO_TRANSPOSE, m_contig, n_contig, a_save, a_rs_save, a_cs_save, *a, *a_rs, *a_cs ); } }
void bl1_zdapdiagmv | ( | side1_t | side, |
conj1_t | conj, | ||
int | m, | ||
int | n, | ||
double * | x, | ||
int | incx, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_left(), bl1_is_row_storage(), bl1_zdewscalv(), bl1_zdscalv(), and bl1_zero_dim2().
Referenced by FLA_Apply_diag_matrix().
{ double* chi; dcomplex* a_begin; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then we can proceed as if the // operation were transposed (applying the diagonal values in x from the // opposite side) for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_toggle_side( side ); } if ( bl1_is_left( side ) ) { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_zdewscalv( conj, n_elem, x, incx, a_begin, inca ); } } else { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; chi = x + j*incx; bl1_zdscalv( conj, n_elem, chi, a_begin, inca ); } } }
void bl1_zdewinvscalmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdewinvscalv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
{ double* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying ewinvscal // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zdewinvscalv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_zdewinvscalv | ( | conj1_t | conj, |
int | n, | ||
double * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy | ||
) |
Referenced by bl1_zdewinvscalmt().
{ double* chi; dcomplex* psi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_zdinvscals( chi, psi ); } }
void bl1_zdewscalmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdewscalv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
{ double* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying ewscal // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zdewscalv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_zdewscalv | ( | conj1_t | conj, |
int | n, | ||
double * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy | ||
) |
Referenced by bl1_zdapdiagmv(), and bl1_zdewscalmt().
{ double* chi; dcomplex* psi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_zdscals( chi, psi ); } }
void bl1_zdscalediag | ( | conj1_t | conj, |
int | offset, | ||
int | m, | ||
int | n, | ||
double * | sigma, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References dcomplex::imag, and dcomplex::real.
Referenced by FLA_Scale_diag().
void bl1_zdshiftdiag | ( | conj1_t | conj, |
int | offset, | ||
int | m, | ||
int | n, | ||
double * | sigma, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References dcomplex::real.
Referenced by FLA_Shift_diag().
void bl1_zewinvscalmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zewinvscalv(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_elemwise().
{ dcomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying ewinvscal // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zewinvscalv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_zewinvscalv | ( | conj1_t | conj, |
int | n, | ||
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy | ||
) |
References bl1_is_conj().
Referenced by bl1_zewinvscalmt().
{ dcomplex* chi; dcomplex* psi; dcomplex conjchi; int i; if ( bl1_is_conj( conj ) ) { for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_zcopyconj( chi, &conjchi ); bl1_zinvscals( &conjchi, psi ); } } else { for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_zinvscals( chi, psi ); } } }
void bl1_zewscalmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zewscalv(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Scal_elemwise().
{ dcomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying ewscal // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zewscalv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_zewscalv | ( | conj1_t | conj, |
int | n, | ||
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy | ||
) |
References bl1_is_conj().
Referenced by bl1_zapdiagmv(), and bl1_zewscalmt().
{ dcomplex* chi; dcomplex* psi; dcomplex conjchi; int i; if ( bl1_is_conj( conj ) ) { for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_zcopyconj( chi, &conjchi ); bl1_zscals( &conjchi, psi ); } } else { for ( i = 0; i < n; ++i ) { chi = x + i*incx; psi = y + i*incy; bl1_zscals( chi, psi ); } } }
Referenced by bl1_zaxpymt(), bl1_zaxpysmt(), bl1_zaxpyv(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zfree_saved_contigmr(), bl1_zfree_saved_contigmsr(), bl1_zgemm(), bl1_zgemv(), bl1_zger(), bl1_zhemm(), bl1_zhemv(), bl1_zher(), bl1_zher2(), bl1_zher2k(), bl1_zherk(), bl1_zsymm(), bl1_zsymv_blas(), bl1_zsyr2_blas(), bl1_zsyr2k(), bl1_zsyr_blas(), bl1_ztrmm(), bl1_ztrmmsx(), bl1_ztrmv(), bl1_ztrmvsx(), bl1_ztrsm(), bl1_ztrsmsx(), bl1_ztrsv(), and bl1_ztrsvsx().
{
free( ( void* ) p );
}
void bl1_zfree_contigm | ( | dcomplex * | a_save, |
int | a_rs_save, | ||
int | a_cs_save, | ||
dcomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_gen_storage(), and bl1_zfree().
Referenced by bl1_zgemm(), bl1_zgemv(), bl1_zhemm(), bl1_zhemv(), bl1_zher2k(), bl1_zherk(), bl1_zsymm(), bl1_zsymv(), bl1_zsyr2k(), bl1_zsyrk(), bl1_ztrmm(), bl1_ztrmmsx(), bl1_ztrmv(), bl1_ztrmvsx(), bl1_ztrsm(), bl1_ztrsmsx(), bl1_ztrsv(), and bl1_ztrsvsx().
{ if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Free the temporary contiguous storage for the matrix. bl1_zfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_zfree_saved_contigm | ( | int | m, |
int | n, | ||
dcomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
dcomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_gen_storage(), bl1_zcopymt(), bl1_zfree(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_zgemm(), bl1_zger(), bl1_zhemm(), bl1_zher(), bl1_zher2(), bl1_zsymm(), bl1_zsyr(), bl1_zsyr2(), bl1_ztrmm(), bl1_ztrmmsx(), bl1_ztrsm(), and bl1_ztrsmsx().
{ if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Copy the contents of the temporary matrix back to the original. bl1_zcopymt( BLIS1_NO_TRANSPOSE, m, n, *a, *a_rs, *a_cs, a_save, a_rs_save, a_cs_save ); // Free the temporary contiguous storage for the matrix. bl1_zfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_zfree_saved_contigmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
dcomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
dcomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_gen_storage(), bl1_zcopymr(), and bl1_zfree().
Referenced by bl1_zher2k(), bl1_zherk(), bl1_zsyr2k(), and bl1_zsyrk().
{ if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Copy the contents of the temporary matrix back to the original. bl1_zcopymr( uplo, m, n, *a, *a_rs, *a_cs, a_save, a_rs_save, a_cs_save ); // Free the temporary contiguous storage for the matrix. bl1_zfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_zfree_saved_contigmsr | ( | side1_t | side, |
uplo1_t | uplo, | ||
int | m, | ||
int | n, | ||
dcomplex * | a_save, | ||
int | a_rs_save, | ||
int | a_cs_save, | ||
dcomplex ** | a, | ||
int * | a_rs, | ||
int * | a_cs | ||
) |
References bl1_is_gen_storage(), bl1_is_left(), bl1_zcopymr(), and bl1_zfree().
{ int dim_a; // Choose the dimension of the matrix based on the side parameter. if ( bl1_is_left( side ) ) dim_a = m; else dim_a = n; if ( bl1_is_gen_storage( a_rs_save, a_cs_save ) ) { // Copy the contents of the temporary matrix back to the original. bl1_zcopymr( uplo, dim_a, dim_a, *a, *a_rs, *a_cs, a_save, a_rs_save, a_cs_save ); // Free the temporary contiguous storage for the matrix. bl1_zfree( *a ); // Restore the original matrix address. *a = a_save; // Restore the original row and column strides. *a_rs = a_rs_save; *a_cs = a_cs_save; } }
void bl1_zident | ( | int | m, |
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References dcomplex::imag, and dcomplex::real.
Referenced by FLA_UDdate_UT_opz_var1().
void bl1_zinvert2s | ( | conj1_t | conj, |
dcomplex * | alpha, | ||
dcomplex * | beta | ||
) |
References bl1_is_conj(), dcomplex::imag, and dcomplex::real.
Referenced by bl1_zinvscalm(), and bl1_zinvscalv().
void bl1_zinverts | ( | conj1_t | conj, |
dcomplex * | alpha | ||
) |
void bl1_zinvertv | ( | conj1_t | conj, |
int | n, | ||
dcomplex * | x, | ||
int | incx | ||
) |
References bl1_is_conj(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Invert().
{ double one = 1.0; double temp; double s, xr_s, xi_s; double conjsign; dcomplex* chi; int i; if ( bl1_is_conj( conj ) ) conjsign = one; else conjsign = -one; for ( i = 0; i < n; ++i ) { chi = x + i*incx; s = bl1_fmaxabs( chi->real, chi->imag ); \ xr_s = chi->real / s; xi_s = chi->imag / s; temp = xr_s * chi->real + xi_s * chi->imag; chi->real = xr_s / temp; chi->imag = conjsign * xi_s / temp; } }
References bl1_d0(), bl1_dm1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Fused_Ahx_Axpy_Ax_opz_var1(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1().
References bl1_d0(), bl1_dm1h(), dcomplex::imag, and dcomplex::real.
References bl1_d0(), bl1_dm2(), dcomplex::imag, and dcomplex::real.
void bl1_zmaxabsm | ( | int | m, |
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | maxabs | ||
) |
References bl1_d0(), bl1_is_row_storage(), bl1_zero_dim2(), and bl1_zmaxabsv().
Referenced by FLA_Max_abs_value().
{ double zero = bl1_d0(); dcomplex* a_begin; double maxabs_cand; double maxabs_temp; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) { *maxabs = zero; return; } // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } // Initialize the maximum absolute value candidate to the first element. bl1_zdabsval2( a, &maxabs_cand ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_zmaxabsv( n_elem, a_begin, inca, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } *maxabs = maxabs_cand; }
void bl1_zmaxabsmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | maxabs | ||
) |
References bl1_d0(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and bl1_zmaxabsv().
Referenced by FLA_Max_abs_value_herm().
{ double zero = bl1_d0(); dcomplex* a_begin; double maxabs_cand; double maxabs_temp; int inca, lda; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) { *maxabs = zero; return; } // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } // Initialize the maximum absolute value candidate to the first element. bl1_zdabsval2( a, &maxabs_cand ); if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; bl1_zmaxabsv( n_elem, a_begin, inca, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; bl1_zmaxabsv( n_elem, a_begin, inca, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } } *maxabs = maxabs_cand; }
void bl1_zmaxabsv | ( | int | n, |
dcomplex * | x, | ||
int | incx, | ||
double * | maxabs | ||
) |
Referenced by bl1_zmaxabsm(), and bl1_zmaxabsmr().
{ dcomplex* chi; double maxabs_cand; double maxabs_temp; int i; bl1_zdabsval2( x, &maxabs_cand ); for ( i = 0; i < n; ++i ) { chi = x + i*incx; bl1_zdabsval2( chi, &maxabs_temp ); if ( maxabs_temp > maxabs_cand ) maxabs_cand = maxabs_temp; } *maxabs = maxabs_cand; }
void bl1_zrandm | ( | int | m, |
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_zero_dim2(), and bl1_zrandv().
Referenced by FLA_Random_matrix().
{ dcomplex* a_begin; int inca, lda; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. inca = a_rs; lda = a_cs; n_iter = n; n_elem = m; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_zrandv( n_elem, a_begin, inca ); } }
void bl1_zrandmr | ( | uplo1_t | uplo, |
diag1_t | diag, | ||
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_nonunit_diag(), bl1_is_row_storage(), bl1_is_unit_diag(), bl1_is_upper(), bl1_is_zero_diag(), bl1_z0(), bl1_z1(), bl1_zero_dim2(), bl1_zinvscalv(), bl1_zrands(), bl1_zrandv(), bl1_zsetv(), BLIS1_NO_CONJUGATE, and dcomplex::real.
Referenced by FLA_Random_tri_matrix().
{ dcomplex* a_begin; dcomplex* ajj; dcomplex one; dcomplex zero; dcomplex ord; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } // Initialize some scalars. one = bl1_z1(); zero = bl1_z0(); ord = bl1_z0(); ord.real = ( double ) bl1_max( m, n ); if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j, n_elem_max ); a_begin = a + j*lda; // Randomize super-diagonal elements. bl1_zrandv( n_elem, a_begin, inca ); // Normalize super-diagonal elements by order of the matrix. bl1_zinvscalv( BLIS1_NO_CONJUGATE, n_elem, &ord, a_begin, inca ); // Initialize diagonal and sub-diagonal elements only if there are // elements left in the column (ie: j < n_elem_max). if ( j < n_elem_max ) { ajj = a_begin + j*inca; // Initialize diagonal element. if ( bl1_is_unit_diag( diag ) ) *ajj = one; else if ( bl1_is_zero_diag( diag ) ) *ajj = zero; else if ( bl1_is_nonunit_diag( diag ) ) { // We want positive diagonal elements between 1 and 2. bl1_zrands( ajj ); bl1_zabsval2( ajj, ajj ); bl1_zadd3( ajj, &one, ajj ); } // Initialize sub-diagonal elements to zero. bl1_zsetv( n_elem_max - j - 1, &zero, ajj + inca, inca ); } } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j, n_elem_max ); a_begin = a + j*lda; // Initialize super-diagonal to zero. bl1_zsetv( n_elem, &zero, a_begin, inca ); // Initialize diagonal and sub-diagonal elements only if there are // elements left in the column (ie: j < n_elem_max). if ( j < n_elem_max ) { ajj = a_begin + j*inca; // Initialize diagonal element. if ( bl1_is_unit_diag( diag ) ) *ajj = one; else if ( bl1_is_zero_diag( diag ) ) *ajj = zero; else if ( bl1_is_nonunit_diag( diag ) ) { // We want positive diagonal elements between 1 and 2. bl1_zrands( ajj ); bl1_zabsval2( ajj, ajj ); bl1_zadd3( ajj, &one, ajj ); } // Randomize sub-diagonal elements. bl1_zrandv( n_elem_max - j - 1, ajj + inca, inca ); // Normalize sub-diagonal elements by order of the matrix. bl1_zinvscalv( BLIS1_NO_CONJUGATE, n_elem_max - j - 1, &ord, ajj + inca, inca ); } } } }
void bl1_zrands | ( | dcomplex * | alpha | ) |
References bl1_drands(), dcomplex::imag, and dcomplex::real.
Referenced by bl1_zrandmr(), and bl1_zrandv().
{ bl1_drands( &(alpha->real) ); bl1_drands( &(alpha->imag) ); }
void bl1_zrandv | ( | int | n, |
dcomplex * | x, | ||
int | incx | ||
) |
References bl1_zrands().
Referenced by bl1_zrandm(), and bl1_zrandmr().
{ dcomplex* chi; int i; for ( i = 0; i < n; ++i ) { chi = x + i*incx; bl1_zrands( chi ); } }
void bl1_zscalediag | ( | conj1_t | conj, |
int | offset, | ||
int | m, | ||
int | n, | ||
dcomplex * | sigma, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
Referenced by FLA_Scale_diag(), and FLA_UDdate_UT_opz_var1().
void bl1_zsetdiag | ( | int | offset, |
int | m, | ||
int | n, | ||
dcomplex * | sigma, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References dcomplex::imag, and dcomplex::real.
Referenced by FLA_Set_diag(), FLA_Set_offdiag(), and FLA_Triangularize().
References dcomplex::imag, and dcomplex::real.
Referenced by FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), FLA_Bsvd_v_opz_var2(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_Set(), FLA_Tevd_n_opz_var1(), FLA_Tevd_v_opd_var1(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_opz_var1(), FLA_Tevd_v_opz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), and FLA_Tridiag_UT_l_step_opz_var3().
void bl1_zsetmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
dcomplex * | sigma, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and bl1_zsetv().
Referenced by FLA_Setr(), and FLA_Triangularize().
{ dcomplex* a_begin; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j, n_elem_max ); a_begin = a + j*lda; bl1_zsetv( n_elem, sigma, a_begin, inca ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j - 1 ); a_begin = a + j*lda + (j + 1)*inca; bl1_zsetv( n_elem, sigma, a_begin, inca ); } } }
References dcomplex::imag, and dcomplex::real.
Referenced by bl1_zrandmr(), bl1_zsetmr(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Her2_Ax_l_opz_var1(), FLA_Tridiag_UT_l_realify_opt(), FLA_Tridiag_UT_realify_subdiagonal_opt(), FLA_Tridiag_UT_shift_U_l_opz(), and FLA_Tridiag_UT_u_realify_opt().
void bl1_zshiftdiag | ( | conj1_t | conj, |
int | offset, | ||
int | m, | ||
int | n, | ||
dcomplex * | sigma, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References dcomplex::imag, and dcomplex::real.
Referenced by FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), FLA_Lyap_n_opz_var4(), and FLA_Shift_diag().
{ dcomplex* alpha; dcomplex sigma_conj; int i, j; bl1_zcopys( conj, sigma, &sigma_conj ); i = j = 0; if ( offset < 0 ) i = -offset; else if ( offset > 0 ) j = offset; while ( i < m && j < n ) { alpha = a + i*a_rs + j*a_cs; alpha->real += sigma_conj.real; alpha->imag += sigma_conj.imag; ++i; ++j; } }
void bl1_zsymmize | ( | conj1_t | conj, |
uplo1_t | uplo, | ||
int | m, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_d0(), bl1_is_col_storage(), bl1_is_conj(), bl1_is_gen_storage(), bl1_is_lower(), bl1_is_row_storage(), bl1_is_upper(), bl1_zcopyv(), bl1_zero_dim1(), and dcomplex::imag.
Referenced by FLA_Hermitianize(), and FLA_Symmetrize().
{ dcomplex* a_src; dcomplex* a_dst; dcomplex* a_jj; int rs_src, cs_src, inc_src; int rs_dst, cs_dst, inc_dst; int n_iter; int j; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Assume A is square. n_iter = m; // Initialize with appropriate values based on storage. if ( bl1_is_col_storage( a_rs, a_cs ) && bl1_is_lower( uplo ) ) { cs_src = 1; rs_src = 0; inc_src = a_cs; cs_dst = a_cs; rs_dst = 0; inc_dst = 1; } else if ( bl1_is_col_storage( a_rs, a_cs ) && bl1_is_upper( uplo ) ) { cs_src = a_cs; rs_src = 0; inc_src = 1; cs_dst = 1; rs_dst = 0; inc_dst = a_cs; } else if ( bl1_is_row_storage( a_rs, a_cs ) && bl1_is_lower( uplo ) ) { cs_src = 0; rs_src = a_rs; inc_src = 1; cs_dst = 0; rs_dst = 1; inc_dst = a_rs; } else if ( bl1_is_row_storage( a_rs, a_cs ) && bl1_is_upper( uplo ) ) { cs_src = 0; rs_src = 1; inc_src = a_rs; cs_dst = 0; rs_dst = a_rs; inc_dst = 1; } else if ( bl1_is_gen_storage( a_rs, a_cs ) && bl1_is_lower( uplo ) ) { // General stride with column-major tilt looks similar to column-major. // General stride with row-major tilt looks similar to row-major. if ( a_rs < a_cs ) { cs_src = 1 * a_rs; rs_src = 0; inc_src = a_cs; cs_dst = a_cs; rs_dst = 0; inc_dst = 1 * a_rs; } else // if ( a_rs > a_cs ) { cs_src = 0; rs_src = a_rs; inc_src = 1 * a_cs; cs_dst = 0; rs_dst = 1 * a_cs; inc_dst = a_rs; } } else // if ( bl1_is_gen_storage( a_rs, a_cs ) && bl1_is_upper( uplo ) ) { // General stride with column-major tilt looks similar to column-major. // General stride with row-major tilt looks similar to row-major. if ( a_rs < a_cs ) { cs_src = a_cs; rs_src = 0; inc_src = 1 * a_rs; cs_dst = 1 * a_rs; rs_dst = 0; inc_dst = a_cs; } else // if ( a_rs > a_cs ) { cs_src = 0; rs_src = 1 * a_cs; inc_src = a_rs; cs_dst = 0; rs_dst = a_rs; inc_dst = 1 * a_cs; } } for ( j = 0; j < n_iter; j++ ) { a_src = a + j*cs_src + j*rs_src; a_dst = a + j*cs_dst + j*rs_dst; bl1_zcopyv( conj, j, a_src, inc_src, a_dst, inc_dst ); if ( bl1_is_conj( conj ) ) { a_jj = a + j*a_rs + j*a_cs; a_jj->imag = bl1_d0(); } } }