libflame
12600
|
Functions | |
void | bl1_sgemv (trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy) |
void | bl1_dgemv (trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy) |
void | bl1_cgemv (trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy) |
void | bl1_zgemv (trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy) |
void | bl1_sgemv_blas (trans1_t transa, int m, int n, float *alpha, float *a, int lda, float *x, int incx, float *beta, float *y, int incy) |
void | bl1_dgemv_blas (trans1_t transa, int m, int n, double *alpha, double *a, int lda, double *x, int incx, double *beta, double *y, int incy) |
void | bl1_cgemv_blas (trans1_t transa, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy) |
void | bl1_zgemv_blas (trans1_t transa, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy) |
void bl1_cgemv | ( | trans1_t | transa, |
conj1_t | conjx, | ||
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | x, | ||
int | incx, | ||
scomplex * | beta, | ||
scomplex * | y, | ||
int | incy | ||
) |
References bl1_c0(), bl1_c1(), bl1_callocv(), bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_ccreate_contigm(), bl1_cfree(), bl1_cfree_contigm(), bl1_cgemv_blas(), bl1_cscalv(), bl1_does_trans(), bl1_is_conj(), bl1_is_conjnotrans(), bl1_is_row_storage(), bl1_zero_dim2(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Accum_T_UT_fc_opc_var1(), FLA_Accum_T_UT_fr_opc_var1(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var1(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_CAQR2_UT_opc_var1(), FLA_Chol_l_opc_var2(), FLA_Chol_u_opc_var2(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nu_opc_var2(), FLA_Gemv_external(), FLA_Gemvc_external(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var1(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_LQ_UT_opc_var2(), FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opc_var3(), FLA_LU_nopiv_opc_var4(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), FLA_QR2_UT_opc_var1(), FLA_QR_UT_opc_var2(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), FLA_Tridiag_UT_l_step_opc_var3(), FLA_Ttmm_l_opc_var2(), and FLA_Ttmm_u_opc_var2().
{ scomplex* a_save = a; int a_rs_save = a_rs; int a_cs_save = a_cs; scomplex zero = bl1_c0(); scomplex one = bl1_c1(); scomplex* x_conj; scomplex* ax; int lda, inca; int n_x; int incx_conj; int incax; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) { int n_elem; if ( bl1_does_trans( transa ) ) n_elem = n; else n_elem = m; bl1_cscalv( BLIS1_NO_CONJUGATE, n_elem, beta, y, incy ); return; } // If necessary, allocate, initialize, and use a temporary contiguous // copy of the matrix rather than the original matrix. bl1_ccreate_contigm( m, n, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; // If A is a row-major matrix, then we can use the underlying column-major // BLAS implementation by fiddling with the parameters. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( m, n ); bl1_swap_ints( lda, inca ); bl1_toggle_trans( transa ); } // Initialize with values assuming no conjugation of x. x_conj = x; incx_conj = incx; // We need a temporary vector for the cases when x is conjugated, and // also for the cases where A is conjugated. if ( bl1_is_conj( conjx ) || bl1_is_conjnotrans( transa ) ) { if ( bl1_does_trans( transa ) ) n_x = m; else n_x = n; x_conj = bl1_callocv( n_x ); incx_conj = 1; bl1_ccopyv( conjx, n_x, x, incx, x_conj, incx_conj ); } // We want to handle the conjnotrans case, but without explicitly // conjugating A. To do so, we leverage the fact that computing the // product conj(A) * x is equivalent to computing conj( A * conj(x) ). if ( bl1_is_conjnotrans( transa ) ) { // We need a temporary vector for the product A * conj(x), which is // conformal to y. We know we are not transposing, so y is length m. ax = bl1_callocv( m ); incax = 1; // Start by conjugating the contents of the temporary copy of x. bl1_cconjv( n, x_conj, incx_conj ); // Compute A * conj(x) where x is the temporary copy of x created above. bl1_cgemv_blas( BLIS1_NO_TRANSPOSE, m, n, &one, a, lda, x_conj, incx_conj, &zero, ax, incax ); // Scale y by beta. bl1_cscalv( BLIS1_NO_CONJUGATE, m, beta, y, incy ); // And finally, accumulate alpha * conj( A * conj(x) ) into y. bl1_caxpyv( BLIS1_CONJUGATE, m, alpha, ax, incax, y, incy); // Free the temporary vector for Ax. bl1_cfree( ax ); } else // notrans, trans, or conjtrans { bl1_cgemv_blas( transa, m, n, alpha, a, lda, x_conj, incx_conj, beta, y, incy ); } // Free the temporary conjugated x vector. if ( bl1_is_conj( conjx ) || bl1_is_conjnotrans( transa ) ) bl1_cfree( x_conj ); // Free the temporary contiguous matrix. bl1_cfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); }
void bl1_cgemv_blas | ( | trans1_t | transa, |
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | lda, | ||
scomplex * | x, | ||
int | incx, | ||
scomplex * | beta, | ||
scomplex * | y, | ||
int | incy | ||
) |
References bl1_param_map_to_netlib_trans(), cblas_cgemv(), CblasColMajor, and F77_cgemv().
Referenced by bl1_cgemv().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_TRANSPOSE cblas_transa; bl1_param_map_to_netlib_trans( transa, &cblas_transa ); cblas_cgemv( cblas_order, cblas_transa, m, n, alpha, a, lda, x, incx, beta, y, incy ); #else char blas_transa; bl1_param_map_to_netlib_trans( transa, &blas_transa ); F77_cgemv( &blas_transa, &m, &n, alpha, a, &lda, x, &incx, beta, y, &incy ); #endif }
void bl1_dgemv | ( | trans1_t | transa, |
conj1_t | conjx, | ||
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | x, | ||
int | incx, | ||
double * | beta, | ||
double * | y, | ||
int | incy | ||
) |
References bl1_dcreate_contigm(), bl1_dfree_contigm(), bl1_dgemv_blas(), bl1_does_trans(), bl1_dscalv(), bl1_is_row_storage(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Accum_T_UT_fc_opd_var1(), FLA_Accum_T_UT_fr_opd_var1(), FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_Apply_HUD_UT_l_opd_var1(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var1(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_CAQR2_UT_opd_var1(), FLA_Chol_l_opd_var2(), FLA_Chol_u_opd_var2(), FLA_Eig_gest_il_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_iu_opd_var2(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_nl_opd_var2(), FLA_Eig_gest_nu_opd_var2(), FLA_Gemv_external(), FLA_Gemvc_external(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_opd_var1(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_opd_var5(), FLA_LQ_UT_opd_var2(), FLA_LU_nopiv_opd_var2(), FLA_LU_nopiv_opd_var3(), FLA_LU_nopiv_opd_var4(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), FLA_QR2_UT_opd_var1(), FLA_QR_UT_opd_var2(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_opd_var2(), FLA_Tridiag_UT_l_step_opd_var3(), FLA_Ttmm_l_opd_var2(), and FLA_Ttmm_u_opd_var2().
{ double* a_save = a; int a_rs_save = a_rs; int a_cs_save = a_cs; int lda, inca; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) { int n_elem; if ( bl1_does_trans( transa ) ) n_elem = n; else n_elem = m; bl1_dscalv( BLIS1_NO_CONJUGATE, n_elem, beta, y, incy ); return; } // If necessary, allocate, initialize, and use a temporary contiguous // copy of the matrix rather than the original matrix. bl1_dcreate_contigm( m, n, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; // If A is a row-major matrix, then we can use the underlying column-major // BLAS implementation by fiddling with the parameters. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( m, n ); bl1_swap_ints( lda, inca ); bl1_toggle_trans( transa ); } bl1_dgemv_blas( transa, m, n, alpha, a, lda, x, incx, beta, y, incy ); // Free the temporary contiguous matrix. bl1_dfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); }
void bl1_dgemv_blas | ( | trans1_t | transa, |
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | lda, | ||
double * | x, | ||
int | incx, | ||
double * | beta, | ||
double * | y, | ||
int | incy | ||
) |
References bl1_param_map_to_netlib_trans(), cblas_dgemv(), CblasColMajor, and F77_dgemv().
Referenced by bl1_dgemv().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_TRANSPOSE cblas_transa; bl1_param_map_to_netlib_trans( transa, &cblas_transa ); cblas_dgemv( cblas_order, cblas_transa, m, n, *alpha, a, lda, x, incx, *beta, y, incy ); #else char blas_transa; bl1_param_map_to_netlib_trans( transa, &blas_transa ); F77_dgemv( &blas_transa, &m, &n, alpha, a, &lda, x, &incx, beta, y, &incy ); #endif }
void bl1_sgemv | ( | trans1_t | transa, |
conj1_t | conjx, | ||
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | x, | ||
int | incx, | ||
float * | beta, | ||
float * | y, | ||
int | incy | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_screate_contigm(), bl1_sfree_contigm(), bl1_sgemv_blas(), bl1_sscalv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Accum_T_UT_fc_ops_var1(), FLA_Accum_T_UT_fr_ops_var1(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Apply_HUD_UT_l_ops_var1(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var1(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_CAQR2_UT_ops_var1(), FLA_Chol_l_ops_var2(), FLA_Chol_u_ops_var2(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nu_ops_var2(), FLA_Gemv_external(), FLA_Gemvc_external(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var1(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_LQ_UT_ops_var2(), FLA_LU_nopiv_ops_var2(), FLA_LU_nopiv_ops_var3(), FLA_LU_nopiv_ops_var4(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), FLA_QR2_UT_ops_var1(), FLA_QR_UT_ops_var2(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_ops_var2(), FLA_Tridiag_UT_l_step_ops_var3(), FLA_Ttmm_l_ops_var2(), and FLA_Ttmm_u_ops_var2().
{ float* a_save = a; int a_rs_save = a_rs; int a_cs_save = a_cs; int lda, inca; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) { int n_elem; if ( bl1_does_trans( transa ) ) n_elem = n; else n_elem = m; bl1_sscalv( BLIS1_NO_CONJUGATE, n_elem, beta, y, incy ); return; } // If necessary, allocate, initialize, and use a temporary contiguous // copy of the matrix rather than the original matrix. bl1_screate_contigm( m, n, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; // If A is a row-major matrix, then we can use the underlying column-major // BLAS implementation by fiddling with the parameters. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( m, n ); bl1_swap_ints( lda, inca ); bl1_toggle_trans( transa ); } bl1_sgemv_blas( transa, m, n, alpha, a, lda, x, incx, beta, y, incy ); // Free the temporary contiguous matrix. bl1_sfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); }
void bl1_sgemv_blas | ( | trans1_t | transa, |
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | lda, | ||
float * | x, | ||
int | incx, | ||
float * | beta, | ||
float * | y, | ||
int | incy | ||
) |
References bl1_param_map_to_netlib_trans(), cblas_sgemv(), CblasColMajor, and F77_sgemv().
Referenced by bl1_sgemv().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_TRANSPOSE cblas_transa; bl1_param_map_to_netlib_trans( transa, &cblas_transa ); cblas_sgemv( cblas_order, cblas_transa, m, n, *alpha, a, lda, x, incx, *beta, y, incy ); #else char blas_transa; bl1_param_map_to_netlib_trans( transa, &blas_transa ); F77_sgemv( &blas_transa, &m, &n, alpha, a, &lda, x, &incx, beta, y, &incy ); #endif }
void bl1_zgemv | ( | trans1_t | transa, |
conj1_t | conjx, | ||
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | beta, | ||
dcomplex * | y, | ||
int | incy | ||
) |
References bl1_does_trans(), bl1_is_conj(), bl1_is_conjnotrans(), bl1_is_row_storage(), bl1_z0(), bl1_z1(), bl1_zallocv(), bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zcreate_contigm(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zgemv_blas(), bl1_zscalv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Accum_T_UT_fc_opz_var1(), FLA_Accum_T_UT_fr_opz_var1(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var1(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_CAQR2_UT_opz_var1(), FLA_Chol_l_opz_var2(), FLA_Chol_u_opz_var2(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nu_opz_var2(), FLA_Gemv_external(), FLA_Gemvc_external(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var1(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_LQ_UT_opz_var2(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_nopiv_opz_var4(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), FLA_QR2_UT_opz_var1(), FLA_QR_UT_opz_var2(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), FLA_Tridiag_UT_l_step_opz_var3(), FLA_Ttmm_l_opz_var2(), and FLA_Ttmm_u_opz_var2().
{ dcomplex* a_save = a; int a_rs_save = a_rs; int a_cs_save = a_cs; dcomplex zero = bl1_z0(); dcomplex one = bl1_z1(); dcomplex* x_conj; dcomplex* ax; int lda, inca; int n_x; int incx_conj; int incax; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) { int n_elem; if ( bl1_does_trans( transa ) ) n_elem = n; else n_elem = m; bl1_zscalv( BLIS1_NO_CONJUGATE, n_elem, beta, y, incy ); return; } // If necessary, allocate, initialize, and use a temporary contiguous // copy of the matrix rather than the original matrix. bl1_zcreate_contigm( m, n, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; // If A is a row-major matrix, then we can use the underlying column-major // BLAS implementation by fiddling with the parameters. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( m, n ); bl1_swap_ints( lda, inca ); bl1_toggle_trans( transa ); } // Initialize with values assuming no conjugation of x. x_conj = x; incx_conj = incx; // We need a temporary vector for the cases when x is conjugated, and // also for the cases where A is conjugated. if ( bl1_is_conj( conjx ) || bl1_is_conjnotrans( transa ) ) { if ( bl1_does_trans( transa ) ) n_x = m; else n_x = n; x_conj = bl1_zallocv( n_x ); incx_conj = 1; bl1_zcopyv( conjx, n_x, x, incx, x_conj, incx_conj ); } // We want to handle the conjnotrans case, but without explicitly // conjugating A. To do so, we leverage the fact that computing the // product conj(A) * x is equivalent to computing conj( A * conj(x) ). if ( bl1_is_conjnotrans( transa ) ) { // We need a temporary vector for the product A * conj(x), which is // conformal to y. We know we are not transposing, so y is length m. ax = bl1_zallocv( m ); incax = 1; // Start by conjugating the contents of the temporary copy of x. bl1_zconjv( n, x_conj, incx_conj ); // Compute A * conj(x) where x is the temporary copy of x created above. bl1_zgemv_blas( BLIS1_NO_TRANSPOSE, m, n, &one, a, lda, x_conj, incx_conj, &zero, ax, incax ); // Scale y by beta. bl1_zscalv( BLIS1_NO_CONJUGATE, m, beta, y, incy ); // And finally, accumulate alpha * conj( A * conj(x) ) into y. bl1_zaxpyv( BLIS1_CONJUGATE, m, alpha, ax, incax, y, incy); // Free the temporary vector for Ax. bl1_zfree( ax ); } else // notrans, trans, or conjtrans { bl1_zgemv_blas( transa, m, n, alpha, a, lda, x_conj, incx_conj, beta, y, incy ); } // Free the temporary conjugated x vector. if ( bl1_is_conj( conjx ) || bl1_is_conjnotrans( transa ) ) bl1_zfree( x_conj ); // Free the temporary contiguous matrix. bl1_zfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); }
void bl1_zgemv_blas | ( | trans1_t | transa, |
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | lda, | ||
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | beta, | ||
dcomplex * | y, | ||
int | incy | ||
) |
References bl1_param_map_to_netlib_trans(), cblas_zgemv(), CblasColMajor, and F77_zgemv().
Referenced by bl1_zgemv().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_TRANSPOSE cblas_transa; bl1_param_map_to_netlib_trans( transa, &cblas_transa ); cblas_zgemv( cblas_order, cblas_transa, m, n, alpha, a, lda, x, incx, beta, y, incy ); #else char blas_transa; bl1_param_map_to_netlib_trans( transa, &blas_transa ); F77_zgemv( &blas_transa, &m, &n, alpha, a, &lda, x, &incx, beta, y, &incy ); #endif }