libflame
12600
|
Go to the source code of this file.
Functions | |
void | bl1_samax (int n, float *x, int incx, int *index) |
void | bl1_damax (int n, double *x, int incx, int *index) |
void | bl1_camax (int n, scomplex *x, int incx, int *index) |
void | bl1_zamax (int n, dcomplex *x, int incx, int *index) |
void | bl1_sasum (int n, float *x, int incx, float *norm) |
void | bl1_dasum (int n, double *x, int incx, double *norm) |
void | bl1_casum (int n, scomplex *x, int incx, float *norm) |
void | bl1_zasum (int n, dcomplex *x, int incx, double *norm) |
void | bl1_saxpy (int n, float *alpha, float *x, int incx, float *y, int incy) |
void | bl1_daxpy (int n, double *alpha, double *x, int incx, double *y, int incy) |
void | bl1_caxpy (int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy) |
void | bl1_zaxpy (int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy) |
void | bl1_saxpyv (conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy) |
void | bl1_daxpyv (conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy) |
void | bl1_caxpyv (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy) |
void | bl1_zaxpyv (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy) |
void | bl1_saxpymt (trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_daxpymt (trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_caxpymt (trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zaxpymt (trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_saxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_daxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_caxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zaxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_saxpysv (int n, float *alpha0, float *alpha1, float *x, int incx, float *beta, float *y, int incy) |
void | bl1_daxpysv (int n, double *alpha0, double *alpha1, double *x, int incx, double *beta, double *y, int incy) |
void | bl1_caxpysv (int n, scomplex *alpha0, scomplex *alpha1, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy) |
void | bl1_zaxpysv (int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy) |
void | bl1_saxpysmt (trans1_t trans, int m, int n, float *alpha0, float *alpha1, float *a, int a_rs, int a_cs, float *beta, float *b, int b_rs, int b_cs) |
void | bl1_daxpysmt (trans1_t trans, int m, int n, double *alpha0, double *alpha1, double *a, int a_rs, int a_cs, double *beta, double *b, int b_rs, int b_cs) |
void | bl1_caxpysmt (trans1_t trans, int m, int n, scomplex *alpha0, scomplex *alpha1, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *b, int b_rs, int b_cs) |
void | bl1_zaxpysmt (trans1_t trans, int m, int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *b, int b_rs, int b_cs) |
void | bl1_sconjv (int m, float *x, int incx) |
void | bl1_dconjv (int m, double *x, int incx) |
void | bl1_cconjv (int m, scomplex *x, int incx) |
void | bl1_zconjv (int m, dcomplex *x, int incx) |
void | bl1_sconjm (int m, int n, float *a, int a_rs, int a_cs) |
void | bl1_dconjm (int m, int n, double *a, int a_rs, int a_cs) |
void | bl1_cconjm (int m, int n, scomplex *a, int a_rs, int a_cs) |
void | bl1_zconjm (int m, int n, dcomplex *a, int a_rs, int a_cs) |
void | bl1_sconjmr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs) |
void | bl1_dconjmr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs) |
void | bl1_cconjmr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs) |
void | bl1_zconjmr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs) |
void | bl1_scopy (int m, float *x, int incx, float *y, int incy) |
void | bl1_dcopy (int m, double *x, int incx, double *y, int incy) |
void | bl1_ccopy (int m, scomplex *x, int incx, scomplex *y, int incy) |
void | bl1_zcopy (int m, dcomplex *x, int incx, dcomplex *y, int incy) |
void | bl1_icopyv (conj1_t conj, int m, int *x, int incx, int *y, int incy) |
void | bl1_scopyv (conj1_t conj, int m, float *x, int incx, float *y, int incy) |
void | bl1_dcopyv (conj1_t conj, int m, double *x, int incx, double *y, int incy) |
void | bl1_ccopyv (conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy) |
void | bl1_zcopyv (conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy) |
void | bl1_sdcopyv (conj1_t conj, int m, float *x, int incx, double *y, int incy) |
void | bl1_dscopyv (conj1_t conj, int m, double *x, int incx, float *y, int incy) |
void | bl1_sccopyv (conj1_t conj, int m, float *x, int incx, scomplex *y, int incy) |
void | bl1_cscopyv (conj1_t conj, int m, scomplex *x, int incx, float *y, int incy) |
void | bl1_szcopyv (conj1_t conj, int m, float *x, int incx, dcomplex *y, int incy) |
void | bl1_zscopyv (conj1_t conj, int m, dcomplex *x, int incx, float *y, int incy) |
void | bl1_dccopyv (conj1_t conj, int m, double *x, int incx, scomplex *y, int incy) |
void | bl1_cdcopyv (conj1_t conj, int m, scomplex *x, int incx, double *y, int incy) |
void | bl1_dzcopyv (conj1_t conj, int m, double *x, int incx, dcomplex *y, int incy) |
void | bl1_zdcopyv (conj1_t conj, int m, dcomplex *x, int incx, double *y, int incy) |
void | bl1_czcopyv (conj1_t conj, int m, scomplex *x, int incx, dcomplex *y, int incy) |
void | bl1_zccopyv (conj1_t conj, int m, dcomplex *x, int incx, scomplex *y, int incy) |
void | bl1_scopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_dcopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_ccopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zcopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_sscopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_sdcopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_dscopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_sccopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_cscopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_szcopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_zscopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_ddcopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_dccopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_cdcopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_dzcopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_zdcopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_cccopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_czcopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_zccopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zzcopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_scopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_dcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_ccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_sscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_sdcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_sccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_szcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_dscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_ddcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_dccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_dzcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_cscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_cdcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_cccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_czcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_zscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_zdcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_zccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zzcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_icopymt (trans1_t trans, int m, int n, int *a, int a_rs, int a_cs, int *b, int b_rs, int b_cs) |
void | bl1_scopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_dcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_ccopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_sscopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_sdcopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_dscopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_sccopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_cscopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_szcopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_zscopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_ddcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_dccopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_cdcopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_dzcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_zdcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_cccopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_czcopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_zccopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zzcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_cdot_in (conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho) |
void | bl1_zdot_in (conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho) |
void | bl1_sdot (conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho) |
void | bl1_ddot (conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho) |
void | bl1_cdot (conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho) |
void | bl1_zdot (conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho) |
void | bl1_sdots (conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho) |
void | bl1_ddots (conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho) |
void | bl1_cdots (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho) |
void | bl1_zdots (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho) |
void | bl1_sdot2s (conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho) |
void | bl1_ddot2s (conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho) |
void | bl1_cdot2s (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho) |
void | bl1_zdot2s (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho) |
void | bl1_sfnorm (int m, int n, float *a, int a_rs, int a_cs, float *norm) |
void | bl1_dfnorm (int m, int n, double *a, int a_rs, int a_cs, double *norm) |
void | bl1_cfnorm (int m, int n, scomplex *a, int a_rs, int a_cs, float *norm) |
void | bl1_zfnorm (int m, int n, dcomplex *a, int a_rs, int a_cs, double *norm) |
void | bl1_sinvscalv (conj1_t conj, int n, float *alpha, float *x, int incx) |
void | bl1_dinvscalv (conj1_t conj, int n, double *alpha, double *x, int incx) |
void | bl1_csinvscalv (conj1_t conj, int n, float *alpha, scomplex *x, int incx) |
void | bl1_cinvscalv (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx) |
void | bl1_zdinvscalv (conj1_t conj, int n, double *alpha, dcomplex *x, int incx) |
void | bl1_zinvscalv (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx) |
void | bl1_sinvscalm (conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs) |
void | bl1_dinvscalm (conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs) |
void | bl1_csinvscalm (conj1_t conj, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs) |
void | bl1_cinvscalm (conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs) |
void | bl1_zdinvscalm (conj1_t conj, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs) |
void | bl1_zinvscalm (conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs) |
void | bl1_snrm2 (int n, float *x, int incx, float *norm) |
void | bl1_dnrm2 (int n, double *x, int incx, double *norm) |
void | bl1_cnrm2 (int n, scomplex *x, int incx, float *norm) |
void | bl1_znrm2 (int n, dcomplex *x, int incx, double *norm) |
void | bl1_sscal (int n, float *alpha, float *x, int incx) |
void | bl1_dscal (int n, double *alpha, double *x, int incx) |
void | bl1_csscal (int n, float *alpha, scomplex *x, int incx) |
void | bl1_cscal (int n, scomplex *alpha, scomplex *x, int incx) |
void | bl1_zdscal (int n, double *alpha, dcomplex *x, int incx) |
void | bl1_zscal (int n, dcomplex *alpha, dcomplex *x, int incx) |
void | bl1_sscalv (conj1_t conj, int n, float *alpha, float *x, int incx) |
void | bl1_dscalv (conj1_t conj, int n, double *alpha, double *x, int incx) |
void | bl1_csscalv (conj1_t conj, int n, float *alpha, scomplex *x, int incx) |
void | bl1_cscalv (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx) |
void | bl1_zdscalv (conj1_t conj, int n, double *alpha, dcomplex *x, int incx) |
void | bl1_zscalv (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx) |
void | bl1_sscalm (conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs) |
void | bl1_dscalm (conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs) |
void | bl1_csscalm (conj1_t conj, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs) |
void | bl1_cscalm (conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs) |
void | bl1_zdscalm (conj1_t conj, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs) |
void | bl1_zscalm (conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs) |
void | bl1_sscalmr (uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs) |
void | bl1_dscalmr (uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs) |
void | bl1_csscalmr (uplo1_t uplo, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs) |
void | bl1_cscalmr (uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs) |
void | bl1_zdscalmr (uplo1_t uplo, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs) |
void | bl1_zscalmr (uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs) |
void | bl1_sswap (int n, float *x, int incx, float *y, int incy) |
void | bl1_dswap (int n, double *x, int incx, double *y, int incy) |
void | bl1_cswap (int n, scomplex *x, int incx, scomplex *y, int incy) |
void | bl1_zswap (int n, dcomplex *x, int incx, dcomplex *y, int incy) |
void | bl1_sswapv (int n, float *x, int incx, float *y, int incy) |
void | bl1_dswapv (int n, double *x, int incx, double *y, int incy) |
void | bl1_cswapv (int n, scomplex *x, int incx, scomplex *y, int incy) |
void | bl1_zswapv (int n, dcomplex *x, int incx, dcomplex *y, int incy) |
void | bl1_sswapmt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_dswapmt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_cswapmt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zswapmt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
References cblas_icamax(), and F77_icamax().
Referenced by FLA_Amax_external(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_LU_piv_opc_var5(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *index = cblas_icamax( n, x, incx ); #else *index = F77_icamax( &n, x, &incx ) - 1; #endif }
References cblas_scasum(), and F77_scasum().
Referenced by FLA_Asum_external().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *norm = cblas_scasum( n, x, incx ); #else *norm = F77_scasum( &n, x, &incx ); #endif }
References cblas_caxpy(), and F77_caxpy().
Referenced by bl1_caxpymt(), bl1_caxpysmt(), bl1_caxpysv(), and bl1_caxpyv().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_caxpy( n, alpha, x, incx, y, incy ); #else F77_caxpy( &n, alpha, x, &incx, y, &incy ); #endif }
void bl1_caxpymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_caxpyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().
Referenced by bl1_cher2k(), bl1_cherk(), and FLA_Axpyrt_external().
{ scomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_caxpyv( conj, n_elem, alpha, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_caxpyv( conj, n_elem, alpha, a_begin, inca, b_begin, incb ); } } }
void bl1_caxpymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_cgemm(), bl1_chemm(), bl1_csymm(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().
{ scomplex* a_begin; scomplex* b_begin; scomplex* a_temp; int inca_temp; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } if ( bl1_does_conj( trans ) ) { conj1_t conj = bl1_proj_trans1_to_conj( trans ); a_temp = bl1_callocv( n_elem ); inca_temp = 1; for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_ccopyv( conj, n_elem, a_begin, inca, a_temp, inca_temp ); bl1_caxpy( n_elem, alpha, a_temp, inca_temp, b_begin, incb ); } bl1_cfree( a_temp ); } else // if ( !bl1_does_conj( trans ) ) { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_caxpy( n_elem, alpha, a_begin, inca, b_begin, incb ); } } }
void bl1_caxpysmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | alpha0, | ||
scomplex * | alpha1, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | beta, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_cscal(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, scomplex::imag, and scomplex::real.
Referenced by FLA_Axpys_external().
{ scomplex* a_begin; scomplex* b_begin; scomplex* a_temp; scomplex alpha_prod; int inca_temp; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag; alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real; // Handle cases where A and B are vectors to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } if ( bl1_does_conj( trans ) ) { conj1_t conj = bl1_proj_trans1_to_conj( trans ); a_temp = bl1_callocv( n_elem ); inca_temp = 1; for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_ccopyv( conj, n_elem, a_begin, inca, a_temp, inca_temp ); bl1_cscal( n_elem, beta, b_begin, incb ); bl1_caxpy( n_elem, &alpha_prod, a_temp, inca_temp, b_begin, incb ); } bl1_cfree( a_temp ); } else // if ( !bl1_does_conj( trans ) ) { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_cscal( n_elem, beta, b_begin, incb ); bl1_caxpy( n_elem, &alpha_prod, a_begin, inca, b_begin, incb ); } } }
void bl1_caxpysv | ( | int | n, |
scomplex * | alpha0, | ||
scomplex * | alpha1, | ||
scomplex * | x, | ||
int | incx, | ||
scomplex * | beta, | ||
scomplex * | y, | ||
int | incy | ||
) |
References bl1_caxpy(), bl1_cscal(), bl1_zero_dim1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), and FLA_Lyap_n_opc_var4().
{ scomplex alpha_prod; // Return early if possible. if ( bl1_zero_dim1( n ) ) return; alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag; alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real; bl1_cscal( n, beta, y, incy ); bl1_caxpy( n, &alpha_prod, x, incx, y, incy ); }
void bl1_caxpyv | ( | conj1_t | conj, |
int | n, | ||
scomplex * | alpha, | ||
scomplex * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy | ||
) |
References bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_is_conj(), and bl1_zero_dim1().
Referenced by bl1_caxpymrt(), bl1_cgemv(), bl1_chemv(), bl1_ctrmvsx(), bl1_ctrsvsx(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_il_opc_var4(), FLA_Eig_gest_il_opc_var5(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_iu_opc_var4(), FLA_Eig_gest_iu_opc_var5(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nl_opc_var4(), FLA_Eig_gest_nl_opc_var5(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Eig_gest_nu_opc_var4(), FLA_Eig_gest_nu_opc_var5(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_opc_var1(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), and FLA_Tridiag_UT_l_step_opc_var3().
{ scomplex* x_copy; int incx_copy; // Return early if possible. if ( bl1_zero_dim1( n ) ) return; x_copy = x; incx_copy = incx; if ( bl1_is_conj( conj ) ) { x_copy = bl1_callocv( n ); incx_copy = 1; bl1_ccopyv( conj, n, x, incx, x_copy, incx_copy ); } bl1_caxpy( n, alpha, x_copy, incx_copy, y, incy ); if ( bl1_is_conj( conj ) ) bl1_cfree( x_copy ); }
void bl1_cccopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_ccopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
{ scomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_ccopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_ccopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_cccopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_ccopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().
{ scomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_ccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_ccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_cccopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_ccopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
{ scomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_ccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_cconjm | ( | int | m, |
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_is_vector(), bl1_sm1(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_cgemm(), and FLA_Conjugate().
{ float m1 = bl1_sm1(); float* a_conj; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } for ( j = 0; j < n_iter; ++j ) { a_conj = ( float* )( a + j*lda ) + 1; bl1_sscal( n_elem, &m1, a_conj, 2*inca ); } }
void bl1_cconjmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_sm1(), bl1_sscal(), and bl1_zero_dim2().
Referenced by bl1_chemm(), bl1_ctrmm(), bl1_ctrsm(), and FLA_Conjugate_r().
{ float m1 = bl1_sm1(); float* a_conj; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; ++j ) { n_elem = bl1_min( j + 1, n_elem_max ); a_conj = ( float* )( a + j*lda ) + 1; bl1_sscal( n_elem, &m1, a_conj, 2*inca ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; ++j ) { n_elem = bl1_max( 0, n_elem_max - j ); a_conj = ( float* )( a + j*lda + j*inca ) + 1; if ( n_elem <= 0 ) break; bl1_sscal( n_elem, &m1, a_conj, 2*inca ); } } }
void bl1_cconjv | ( | int | m, |
scomplex * | x, | ||
int | incx | ||
) |
References cblas_ccopy(), and F77_ccopy().
Referenced by bl1_ccopymr(), bl1_ccopymt(), bl1_ccopyv(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_ccopy( m, x, incx, y, incy ); #else F77_ccopy( &m, x, &incx, y, &incy ); #endif }
void bl1_ccopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_ccopy(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().
Referenced by bl1_ccreate_contigmr(), bl1_cfree_saved_contigmr(), and FLA_Copyr_external().
{ scomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if A and B are both row-major, then let's access the // matrices by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) && bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_ccopy( n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_ccopy( n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_ccopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_ccopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().
Referenced by bl1_chemm(), bl1_ctrmm(), bl1_ctrsm(), FLA_Copyrt_external(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), and FLA_Lyap_n_opc_var4().
{ scomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_ccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_ccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_ccopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cconjv(), bl1_ccopy(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_ccreate_contigm(), bl1_ccreate_contigmt(), bl1_cfree_saved_contigm(), bl1_cfree_saved_contigmsr(), bl1_cgemm(), bl1_chemm(), bl1_cher2k(), bl1_csymm(), bl1_csyr2k(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Copy_external(), and FLA_Copyt_external().
{ scomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_ccopy( n_elem, a_begin, inca, b_begin, incb ); if ( bl1_does_conj( trans ) ) bl1_cconjv( n_elem, b_begin, incb ); } }
void bl1_ccopyv | ( | conj1_t | conj, |
int | m, | ||
scomplex * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy | ||
) |
References bl1_cconjv(), bl1_ccopy(), bl1_is_conj(), and bl1_zero_dim1().
Referenced by bl1_caxpymt(), bl1_caxpysmt(), bl1_caxpyv(), bl1_cccopymr(), bl1_cccopymrt(), bl1_cccopymt(), bl1_ccopymrt(), bl1_cgemv(), bl1_cger(), bl1_chemv(), bl1_cher(), bl1_cher2(), bl1_csymmize(), bl1_csymv_blas(), bl1_csyr2_blas(), bl1_csyr_blas(), bl1_ctrmv(), bl1_ctrmvsx(), bl1_ctrsv(), bl1_ctrsvsx(), FLA_Accum_T_UT_fc_opc_var1(), FLA_Accum_T_UT_fr_opc_var1(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var1(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_CAQR2_UT_opc_var1(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var3(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_LQ_UT_opc_var2(), FLA_QR_UT_opc_var2(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_opc_var2(), FLA_Tridiag_UT_l_step_opc_var3(), and FLA_Tridiag_UT_shift_U_l_opc().
{ // Return early if possible. if ( bl1_zero_dim1( m ) ) return; bl1_ccopy( m, x, incx, y, incy ); if ( bl1_is_conj( conj ) ) bl1_cconjv( m, y, incy ); }
void bl1_cdcopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cdcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Copyr_external().
{ scomplex* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_cdcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_cdcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_cdcopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cdcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external().
{ scomplex* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_cdcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_cdcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_cdcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cdcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ scomplex* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_cdcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_cdcopyv | ( | conj1_t | conj, |
int | m, | ||
scomplex * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy | ||
) |
References bl1_zero_dim1(), and scomplex::real.
Referenced by bl1_cdcopymr(), bl1_cdcopymrt(), and bl1_cdcopymt().
{ scomplex* chi; double* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { *psi = chi->real; chi += incx; psi += incy; } }
void bl1_cdot | ( | conj1_t | conj, |
int | n, | ||
scomplex * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy, | ||
scomplex * | rho | ||
) |
References bl1_cdot_in(), bl1_is_conj(), cblas_cdotc_sub(), and cblas_cdotu_sub().
Referenced by bl1_cdot2s(), bl1_cdots(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Dot_external(), FLA_Dotc_external(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Fused_Uhu_Yhu_Zhu_opc_var1(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_Sylv_hh_opc_var1(), FLA_Sylv_hn_opc_var1(), FLA_Sylv_nh_opc_var1(), FLA_Sylv_nn_opc_var1(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), and FLA_Tridiag_UT_l_step_opc_var3().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES if ( bl1_is_conj( conj ) ) { cblas_cdotc_sub( n, x, incx, y, incy, rho ); } else // if ( !bl1_is_conj( conj ) ) { cblas_cdotu_sub( n, x, incx, y, incy, rho ); } #else bl1_cdot_in( conj, n, x, incx, y, incy, rho ); #endif }
void bl1_cdot2s | ( | conj1_t | conj, |
int | n, | ||
scomplex * | alpha, | ||
scomplex * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy, | ||
scomplex * | beta, | ||
scomplex * | rho | ||
) |
References bl1_cdot(), scomplex::imag, and scomplex::real.
Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), and FLA_Lyap_n_opc_var3().
{ scomplex dotxy; scomplex dotyx; scomplex alpha_d = *alpha; scomplex alphac_d = *alpha; scomplex beta_d = *beta; scomplex rho_d = *rho; alphac_d.imag *= -1.0F; bl1_cdot( conj, n, x, incx, y, incy, &dotxy ); bl1_cdot( conj, n, y, incy, x, incx, &dotyx ); rho->real = beta_d.real * rho_d.real - beta_d.imag * rho_d.imag + alpha_d.real * dotxy.real - alpha_d.imag * dotxy.imag + alphac_d.real * dotyx.real - alphac_d.imag * dotyx.imag; rho->imag = beta_d.real * rho_d.imag + beta_d.imag * rho_d.real + alpha_d.real * dotxy.imag + alpha_d.imag * dotxy.real + alphac_d.real * dotyx.imag + alphac_d.imag * dotyx.real; }
void bl1_cdot_in | ( | conj1_t | conj, |
int | n, | ||
scomplex * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy, | ||
scomplex * | rho | ||
) |
References bl1_is_conj(), scomplex::imag, and scomplex::real.
Referenced by bl1_cdot().
{ scomplex* xip; scomplex* yip; scomplex xi; scomplex yi; scomplex rho_temp; int i; rho_temp.real = 0.0F; rho_temp.imag = 0.0F; xip = x; yip = y; if ( bl1_is_conj( conj ) ) { for ( i = 0; i < n; ++i ) { xi.real = xip->real; xi.imag = xip->imag; yi.real = yip->real; yi.imag = yip->imag; rho_temp.real += xi.real * yi.real - -xi.imag * yi.imag; rho_temp.imag += xi.real * yi.imag + -xi.imag * yi.real; xip += incx; yip += incy; } } else // if ( !bl1_is_conj( conj ) ) { for ( i = 0; i < n; ++i ) { xi.real = xip->real; xi.imag = xip->imag; yi.real = yip->real; yi.imag = yip->imag; rho_temp.real += xi.real * yi.real - xi.imag * yi.imag; rho_temp.imag += xi.real * yi.imag + xi.imag * yi.real; xip += incx; yip += incy; } } rho->real = rho_temp.real; rho->imag = rho_temp.imag; }
void bl1_cdots | ( | conj1_t | conj, |
int | n, | ||
scomplex * | alpha, | ||
scomplex * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy, | ||
scomplex * | beta, | ||
scomplex * | rho | ||
) |
References bl1_cdot(), scomplex::imag, and scomplex::real.
Referenced by FLA_Chol_l_opc_var1(), FLA_Chol_l_opc_var2(), FLA_Chol_u_opc_var1(), FLA_Chol_u_opc_var2(), FLA_Dotcs_external(), FLA_Dots_external(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Hess_UT_step_opc_var5(), FLA_LU_nopiv_opc_var1(), FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opc_var3(), FLA_LU_nopiv_opc_var4(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_Ttmm_l_opc_var2(), FLA_Ttmm_l_opc_var3(), FLA_Ttmm_u_opc_var2(), and FLA_Ttmm_u_opc_var3().
{ scomplex rho_orig = *rho; scomplex dot_prod; bl1_cdot( conj, n, x, incx, y, incy, &dot_prod ); rho->real = beta->real * rho_orig.real - beta->imag * rho_orig.imag + alpha->real * dot_prod.real - alpha->imag * dot_prod.imag; rho->imag = beta->real * rho_orig.imag + beta->imag * rho_orig.real + alpha->real * dot_prod.imag + alpha->imag * dot_prod.real; }
void bl1_cfnorm | ( | int | m, |
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | norm | ||
) |
References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, scomplex::imag, and scomplex::real.
Referenced by FLA_Norm_frob().
{ scomplex* a_ij; float sum; int lda, inca; int n_iter; int n_elem; int i, j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A is a vector separately. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } // Initialize the accumulator variable. sum = 0.0F; for ( j = 0; j < n_iter; j++ ) { for ( i = 0; i < n_elem; i++ ) { a_ij = a + i*inca + j*lda; sum += a_ij->real * a_ij->real + a_ij->imag * a_ij->imag; } } // Compute the norm and store the result. *norm = ( float ) sqrt( sum ); }
void bl1_cinvscalm | ( | conj1_t | conj, |
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_cinvert2s(), bl1_cscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().
{ scomplex alpha_inv; scomplex* a_begin; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_ceq1( alpha ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } bl1_cinvert2s( conj, alpha, &alpha_inv ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_cscal( n_elem, &alpha_inv, a_begin, inca ); } }
void bl1_cinvscalv | ( | conj1_t | conj, |
int | n, | ||
scomplex * | alpha, | ||
scomplex * | x, | ||
int | incx | ||
) |
References bl1_cinvert2s(), and bl1_cscal().
Referenced by bl1_crandmr(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Chol_l_opc_var2(), FLA_Chol_l_opc_var3(), FLA_Chol_u_opc_var2(), FLA_Chol_u_opc_var3(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_il_opc_var4(), FLA_Eig_gest_il_opc_var5(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_iu_opc_var4(), FLA_Eig_gest_iu_opc_var5(), FLA_Househ2_UT_l_opc(), FLA_Househ3UD_UT_opc(), FLA_LU_nopiv_opc_var3(), FLA_LU_nopiv_opc_var4(), FLA_LU_nopiv_opc_var5(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_LU_piv_opc_var5(), FLA_Trinv_ln_opc_var1(), FLA_Trinv_ln_opc_var2(), FLA_Trinv_ln_opc_var3(), FLA_Trinv_un_opc_var1(), FLA_Trinv_un_opc_var2(), and FLA_Trinv_un_opc_var3().
{ scomplex alpha_inv; if ( bl1_ceq1( alpha ) ) return; bl1_cinvert2s( conj, alpha, &alpha_inv ); bl1_cscal( n, &alpha_inv, x, incx ); }
References cblas_scnrm2(), and F77_scnrm2().
Referenced by FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_l_opc(), FLA_Househ3UD_UT_opc(), and FLA_Nrm2_external().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *norm = cblas_scnrm2( n, x, incx ); #else *norm = F77_scnrm2( &n, x, &incx ); #endif }
References cblas_cscal(), and F77_cscal().
Referenced by bl1_caxpysmt(), bl1_caxpysv(), bl1_cinvscalm(), bl1_cinvscalv(), bl1_cscalm(), bl1_cscalmr(), bl1_cscalv(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_cscal( n, alpha, x, incx ); #else F77_cscal( &n, alpha, x, &incx ); #endif }
void bl1_cscalm | ( | conj1_t | conj, |
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_cscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_cgemm(), bl1_chemm(), bl1_csymm(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), FLA_Lyap_n_opc_var4(), FLA_Scal_external(), and FLA_Scalc_external().
{ scomplex alpha_conj; scomplex* a_begin; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_ceq1( alpha ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } bl1_ccopys( conj, alpha, &alpha_conj ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_cscal( n_elem, &alpha_conj, a_begin, inca ); } }
void bl1_cscalmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_cscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().
Referenced by FLA_Scalr_external().
{ scomplex* a_begin; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_ceq1( alpha ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; bl1_cscal( n_elem, alpha, a_begin, inca ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; if ( n_elem <= 0 ) break; bl1_cscal( n_elem, alpha, a_begin, inca ); } } }
void bl1_cscalv | ( | conj1_t | conj, |
int | n, | ||
scomplex * | alpha, | ||
scomplex * | x, | ||
int | incx | ||
) |
References bl1_cscal(), and bl1_zero_dim1().
Referenced by bl1_capdiagmv(), bl1_cgemv(), bl1_chemv(), bl1_ctrmvsx(), bl1_ctrsvsx(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nl_opc_var4(), FLA_Eig_gest_nl_opc_var5(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Eig_gest_nu_opc_var4(), FLA_Eig_gest_nu_opc_var5(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_QR_UT_form_Q_opc_var1(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), FLA_Tridiag_UT_l_step_opc_var3(), FLA_Trinv_ln_opc_var4(), FLA_Trinv_lu_opc_var1(), FLA_Trinv_lu_opc_var2(), FLA_Trinv_lu_opc_var3(), FLA_Trinv_lu_opc_var4(), FLA_Trinv_un_opc_var4(), FLA_Trinv_uu_opc_var1(), FLA_Trinv_uu_opc_var2(), FLA_Trinv_uu_opc_var3(), FLA_Trinv_uu_opc_var4(), FLA_Ttmm_l_opc_var1(), FLA_Ttmm_l_opc_var2(), FLA_Ttmm_u_opc_var1(), and FLA_Ttmm_u_opc_var2().
{ scomplex alpha_conj; // Return early if possible. if ( bl1_zero_dim1( n ) ) return; if ( bl1_ceq1( alpha ) ) return; bl1_ccopys( conj, alpha, &alpha_conj ); bl1_cscal( n, &alpha_conj, x, incx ); }
void bl1_cscopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cscopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Copyr_external().
{ scomplex* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_cscopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_cscopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_cscopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cscopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external().
{ scomplex* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_cscopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_cscopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_cscopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cscopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ scomplex* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_cscopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_cscopyv | ( | conj1_t | conj, |
int | m, | ||
scomplex * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy | ||
) |
References bl1_zero_dim1(), and scomplex::real.
Referenced by bl1_cscopymr(), bl1_cscopymrt(), and bl1_cscopymt().
{ scomplex* chi; float* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { *psi = chi->real; chi += incx; psi += incy; } }
void bl1_csinvscalm | ( | conj1_t | conj, |
int | m, | ||
int | n, | ||
float * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_csscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_sinvert2s(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().
{ float alpha_inv; scomplex* a_begin; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_seq1( alpha ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } bl1_sinvert2s( conj, alpha, &alpha_inv ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_csscal( n_elem, &alpha_inv, a_begin, inca ); } }
void bl1_csinvscalv | ( | conj1_t | conj, |
int | n, | ||
float * | alpha, | ||
scomplex * | x, | ||
int | incx | ||
) |
References bl1_csscal().
{ float alpha_inv; if ( bl1_seq1( alpha ) ) return; alpha_inv = 1.0F / *alpha; bl1_csscal( n, &alpha_inv, x, incx ); }
void bl1_csscal | ( | int | n, |
float * | alpha, | ||
scomplex * | x, | ||
int | incx | ||
) |
References cblas_csscal(), and F77_csscal().
Referenced by bl1_csinvscalm(), bl1_csinvscalv(), bl1_csscalm(), bl1_csscalmr(), and bl1_csscalv().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_csscal( n, *alpha, x, incx ); #else F77_csscal( &n, alpha, x, &incx ); #endif }
void bl1_csscalm | ( | conj1_t | conj, |
int | m, | ||
int | n, | ||
float * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_csscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Scal_external(), and FLA_Scalc_external().
{ float alpha_conj; scomplex* a_begin; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_seq1( alpha ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } bl1_scopys( conj, alpha, &alpha_conj ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_csscal( n_elem, &alpha_conj, a_begin, inca ); } }
void bl1_csscalmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
float * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_csscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().
Referenced by bl1_cher2k(), bl1_cherk(), and FLA_Scalr_external().
{ scomplex* a_begin; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_seq1( alpha ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; bl1_csscal( n_elem, alpha, a_begin, inca ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; if ( n_elem <= 0 ) break; bl1_csscal( n_elem, alpha, a_begin, inca ); } } }
void bl1_csscalv | ( | conj1_t | conj, |
int | n, | ||
float * | alpha, | ||
scomplex * | x, | ||
int | incx | ||
) |
References bl1_csscal(), and bl1_zero_dim1().
Referenced by bl1_csapdiagmv(), FLA_Bsvd_ext_opc_var1(), and FLA_Bsvd_v_opc_var1().
{ // Return early if possible. if ( bl1_zero_dim1( n ) ) return; if ( bl1_seq1( alpha ) ) return; bl1_csscal( n, alpha, x, incx ); }
References cblas_cswap(), and F77_cswap().
Referenced by bl1_cswapmt(), bl1_cswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_cswap( n, x, incx, y, incy ); #else F77_cswap( &n, x, &incx, y, &incy ); #endif }
void bl1_cswapmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cconjv(), bl1_cswap(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Swap_external(), and FLA_Swapt_external().
{ scomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_cswap( n_elem, a_begin, inca, b_begin, incb ); if ( bl1_does_conj( trans ) ) bl1_cconjv( n_elem, a_begin, inca ); if ( bl1_does_conj( trans ) ) bl1_cconjv( n_elem, b_begin, incb ); } }
void bl1_cswapv | ( | int | n, |
scomplex * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy | ||
) |
References bl1_cswap(), and bl1_zero_dim1().
Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_bsvd_ext_b_opc(), and FLA_Sort_bsvd_ext_f_opc().
{ // Return early if possible. if ( bl1_zero_dim1( n ) ) return; bl1_cswap( n, x, incx, y, incy ); }
void bl1_czcopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_czcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Copyr_external().
{ scomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_czcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_czcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_czcopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_czcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external().
{ scomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_czcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_czcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_czcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_czcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ scomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_czcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_czcopyv | ( | conj1_t | conj, |
int | m, | ||
scomplex * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy | ||
) |
References bl1_is_conj(), bl1_zconjv(), bl1_zero_dim1(), scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.
Referenced by bl1_czcopymr(), bl1_czcopymrt(), and bl1_czcopymt().
{ scomplex* chi; dcomplex* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { psi->real = chi->real; psi->imag = chi->imag; chi += incx; psi += incy; } if ( bl1_is_conj( conj ) ) bl1_zconjv( m, y, incy ); }
void bl1_damax | ( | int | n, |
double * | x, | ||
int | incx, | ||
int * | index | ||
) |
References cblas_idamax(), and F77_idamax().
Referenced by FLA_Amax_external(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_LU_piv_opd_var5(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *index = cblas_idamax( n, x, incx ); #else *index = F77_idamax( &n, x, &incx ) - 1; #endif }
void bl1_dasum | ( | int | n, |
double * | x, | ||
int | incx, | ||
double * | norm | ||
) |
References cblas_dasum(), and F77_dasum().
Referenced by FLA_Asum_external().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *norm = cblas_dasum( n, x, incx ); #else *norm = F77_dasum( &n, x, &incx ); #endif }
void bl1_daxpy | ( | int | n, |
double * | alpha, | ||
double * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy | ||
) |
References cblas_daxpy(), and F77_daxpy().
Referenced by bl1_daxpymt(), bl1_daxpysmt(), bl1_daxpysv(), and bl1_daxpyv().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_daxpy( n, *alpha, x, incx, y, incy ); #else F77_daxpy( &n, alpha, x, &incx, y, &incy ); #endif }
void bl1_daxpymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_daxpyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().
Referenced by FLA_Axpyrt_external().
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_daxpyv( conj, n_elem, alpha, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_daxpyv( conj, n_elem, alpha, a_begin, inca, b_begin, incb ); } } }
void bl1_daxpymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_daxpy(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_dgemm(), bl1_dsymm(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_daxpy( n_elem, alpha, a_begin, inca, b_begin, incb ); } }
void bl1_daxpysmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | alpha0, | ||
double * | alpha1, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | beta, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_daxpy(), bl1_does_notrans(), bl1_does_trans(), bl1_dscal(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Axpys_external().
{ double* a_begin; double* b_begin; double alpha_prod; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; alpha_prod = (*alpha0) * (*alpha1); // Handle cases where A and B are vectors to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dscal( n_elem, beta, b_begin, incb ); bl1_daxpy( n_elem, &alpha_prod, a_begin, inca, b_begin, incb ); } }
void bl1_daxpysv | ( | int | n, |
double * | alpha0, | ||
double * | alpha1, | ||
double * | x, | ||
int | incx, | ||
double * | beta, | ||
double * | y, | ||
int | incy | ||
) |
References bl1_daxpy(), bl1_dscal(), and bl1_zero_dim1().
Referenced by FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), and FLA_Lyap_n_opd_var4().
{ double alpha_prod; // Return early if possible. if ( bl1_zero_dim1( n ) ) return; alpha_prod = (*alpha0) * (*alpha1); bl1_dscal( n, beta, y, incy ); bl1_daxpy( n, &alpha_prod, x, incx, y, incy ); }
void bl1_daxpyv | ( | conj1_t | conj, |
int | n, | ||
double * | alpha, | ||
double * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy | ||
) |
References bl1_daxpy().
Referenced by bl1_daxpymrt(), bl1_dtrmvsx(), bl1_dtrsvsx(), FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_Apply_HUD_UT_l_opd_var1(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_Eig_gest_il_opd_var1(), FLA_Eig_gest_il_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_il_opd_var4(), FLA_Eig_gest_il_opd_var5(), FLA_Eig_gest_iu_opd_var1(), FLA_Eig_gest_iu_opd_var2(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_iu_opd_var4(), FLA_Eig_gest_iu_opd_var5(), FLA_Eig_gest_nl_opd_var1(), FLA_Eig_gest_nl_opd_var2(), FLA_Eig_gest_nl_opd_var4(), FLA_Eig_gest_nl_opd_var5(), FLA_Eig_gest_nu_opd_var1(), FLA_Eig_gest_nu_opd_var2(), FLA_Eig_gest_nu_opd_var4(), FLA_Eig_gest_nu_opd_var5(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Uhu_Yhu_Zhu_opd_var1(), FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_opd_var5(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_opd_var2(), and FLA_Tridiag_UT_l_step_opd_var3().
{ bl1_daxpy( n, alpha, x, incx, y, incy ); }
void bl1_dccopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dccopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Copyr_external().
{ double* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dccopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_dccopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_dccopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dccopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external().
{ double* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_dccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_dccopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dccopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ double* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_dccopyv | ( | conj1_t | conj, |
int | m, | ||
double * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy | ||
) |
References bl1_zero_dim1(), scomplex::imag, and scomplex::real.
Referenced by bl1_dccopymr(), bl1_dccopymrt(), and bl1_dccopymt().
{ double* chi; scomplex* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { psi->real = *chi; psi->imag = 0.0F; chi += incx; psi += incy; } }
void bl1_dconjm | ( | int | m, |
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
{
return;
}
void bl1_dconjmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
{
return;
}
void bl1_dconjv | ( | int | m, |
double * | x, | ||
int | incx | ||
) |
Referenced by FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var3(), and FLA_Bidiag_UT_u_step_opd_var4().
{
return;
}
void bl1_dcopy | ( | int | m, |
double * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy | ||
) |
References cblas_dcopy(), and F77_dcopy().
Referenced by bl1_dcopymr(), bl1_dcopymt(), bl1_dcopyv(), FLA_Obj_extract_imag_part(), FLA_Obj_extract_real_part(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_dcopy( m, x, incx, y, incy ); #else F77_dcopy( &m, x, &incx, y, &incy ); #endif }
void bl1_dcopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dcopy(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().
Referenced by bl1_dcreate_contigmr(), bl1_dfree_saved_contigmr(), and FLA_Copyr_external().
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if A and B are both row-major, then let's access the // matrices by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) && bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dcopy( n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_dcopy( n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_dcopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), and FLA_Lyap_n_opd_var4().
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_dcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_dcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dcopy(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_dcreate_contigm(), bl1_dcreate_contigmt(), bl1_dfree_saved_contigm(), bl1_dfree_saved_contigmsr(), bl1_dsymm(), bl1_dsyr2k(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Bsvd_v_opd_var2(), FLA_Copy_external(), FLA_Copyt_external(), and FLA_Tevd_v_opd_var2().
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dcopy( n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_dcopyv | ( | conj1_t | conj, |
int | m, | ||
double * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy | ||
) |
References bl1_dcopy().
Referenced by bl1_dcopymrt(), bl1_ddcopymr(), bl1_ddcopymrt(), bl1_ddcopymt(), bl1_dsymmize(), bl1_dtrmvsx(), bl1_dtrsvsx(), FLA_Accum_T_UT_fc_opd_var1(), FLA_Accum_T_UT_fr_opd_var1(), FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_Apply_HUD_UT_l_opd_var1(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var1(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_CAQR2_UT_opd_var1(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_iu_opd_var3(), FLA_Fused_UYx_ZVx_opd_var1(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_opd_var5(), FLA_LQ_UT_opd_var2(), FLA_QR_UT_opd_var2(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_opd_var2(), FLA_Tridiag_UT_l_step_opd_var3(), and FLA_Tridiag_UT_shift_U_l_opd().
{ bl1_dcopy( m, x, incx, y, incy ); }
void bl1_ddcopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_dcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_ddcopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_dcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_ddcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
References cblas_ddot(), and F77_ddot().
Referenced by bl1_ddot2s(), bl1_ddots(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_Dot_external(), FLA_Dotc_external(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_opd_var5(), FLA_Sylv_hh_opd_var1(), FLA_Sylv_hn_opd_var1(), FLA_Sylv_nh_opd_var1(), FLA_Sylv_nn_opd_var1(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_opd_var2(), and FLA_Tridiag_UT_l_step_opd_var3().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *rho = cblas_ddot( n, x, incx, y, incy ); #else *rho = F77_ddot( &n, x, &incx, y, &incy ); #endif }
void bl1_ddot2s | ( | conj1_t | conj, |
int | n, | ||
double * | alpha, | ||
double * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy, | ||
double * | beta, | ||
double * | rho | ||
) |
References bl1_ddot().
Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opd_var1(), FLA_Eig_gest_il_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_iu_opd_var1(), FLA_Eig_gest_iu_opd_var2(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_nl_opd_var1(), FLA_Eig_gest_nl_opd_var2(), FLA_Eig_gest_nu_opd_var1(), FLA_Eig_gest_nu_opd_var2(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), and FLA_Lyap_n_opd_var3().
{ double dot; bl1_ddot( conj, n, x, incx, y, incy, &dot ); *rho = (*beta) * (*rho) + 2.0 * (*alpha) * dot; }
void bl1_ddots | ( | conj1_t | conj, |
int | n, | ||
double * | alpha, | ||
double * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy, | ||
double * | beta, | ||
double * | rho | ||
) |
References bl1_ddot().
Referenced by FLA_Chol_l_opd_var1(), FLA_Chol_l_opd_var2(), FLA_Chol_u_opd_var1(), FLA_Chol_u_opd_var2(), FLA_Dotcs_external(), FLA_Dots_external(), FLA_Hess_UT_step_opd_var5(), FLA_LU_nopiv_opd_var1(), FLA_LU_nopiv_opd_var2(), FLA_LU_nopiv_opd_var3(), FLA_LU_nopiv_opd_var4(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_Ttmm_l_opd_var2(), FLA_Ttmm_l_opd_var3(), FLA_Ttmm_u_opd_var2(), and FLA_Ttmm_u_opd_var3().
{ double dot_prod; bl1_ddot( conj, n, x, incx, y, incy, &dot_prod ); *rho = (*beta) * (*rho) + (*alpha) * dot_prod; }
void bl1_dfnorm | ( | int | m, |
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | norm | ||
) |
References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Norm_frob().
{ double* a_ij; double sum; int lda, inca; int n_iter; int n_elem; int i, j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A is a vector separately. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } // Initialize the accumulator variable. sum = 0.0; for ( j = 0; j < n_iter; j++ ) { for ( i = 0; i < n_elem; i++ ) { a_ij = a + i*inca + j*lda; sum += (*a_ij) * (*a_ij); } } // Compute the norm and store the result. *norm = sqrt( sum ); }
void bl1_dinvscalm | ( | conj1_t | conj, |
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_dinvert2s(), bl1_dscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().
{ double alpha_inv; double* a_begin; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_deq1( alpha ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } bl1_dinvert2s( conj, alpha, &alpha_inv ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_dscal( n_elem, &alpha_inv, a_begin, inca ); } }
void bl1_dinvscalv | ( | conj1_t | conj, |
int | n, | ||
double * | alpha, | ||
double * | x, | ||
int | incx | ||
) |
References bl1_dscal().
Referenced by bl1_drandmr(), FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_Apply_HUD_UT_l_opd_var1(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_Chol_l_opd_var2(), FLA_Chol_l_opd_var3(), FLA_Chol_u_opd_var2(), FLA_Chol_u_opd_var3(), FLA_Eig_gest_il_opd_var1(), FLA_Eig_gest_il_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_il_opd_var4(), FLA_Eig_gest_il_opd_var5(), FLA_Eig_gest_iu_opd_var1(), FLA_Eig_gest_iu_opd_var2(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_iu_opd_var4(), FLA_Eig_gest_iu_opd_var5(), FLA_Househ2_UT_l_opd(), FLA_Househ3UD_UT_opd(), FLA_LU_nopiv_opd_var3(), FLA_LU_nopiv_opd_var4(), FLA_LU_nopiv_opd_var5(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_LU_piv_opd_var5(), FLA_Trinv_ln_opd_var1(), FLA_Trinv_ln_opd_var2(), FLA_Trinv_ln_opd_var3(), FLA_Trinv_un_opd_var1(), FLA_Trinv_un_opd_var2(), and FLA_Trinv_un_opd_var3().
{ double alpha_inv; if ( bl1_deq1( alpha ) ) return; alpha_inv = 1.0 / *alpha; bl1_dscal( n, &alpha_inv, x, incx ); }
void bl1_dnrm2 | ( | int | n, |
double * | x, | ||
int | incx, | ||
double * | norm | ||
) |
References cblas_dnrm2(), and F77_dnrm2().
Referenced by FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_l_opd(), FLA_Househ3UD_UT_opd(), and FLA_Nrm2_external().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *norm = cblas_dnrm2( n, x, incx ); #else *norm = F77_dnrm2( &n, x, &incx ); #endif }
void bl1_dscal | ( | int | n, |
double * | alpha, | ||
double * | x, | ||
int | incx | ||
) |
References cblas_dscal(), and F77_dscal().
Referenced by bl1_daxpysmt(), bl1_daxpysv(), bl1_dinvscalm(), bl1_dinvscalv(), bl1_dscalm(), bl1_dscalmr(), bl1_dscalv(), bl1_zconjm(), bl1_zconjmr(), bl1_zconjv(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_dscal( n, *alpha, x, incx ); #else F77_dscal( &n, alpha, x, &incx ); #endif }
void bl1_dscalm | ( | conj1_t | conj, |
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_dscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_dgemm(), bl1_dsymm(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), FLA_Lyap_n_opd_var4(), FLA_Scal_external(), and FLA_Scalc_external().
{ double alpha_conj; double* a_begin; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_deq1( alpha ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } bl1_dcopys( conj, alpha, &alpha_conj ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_dscal( n_elem, &alpha_conj, a_begin, inca ); } }
void bl1_dscalmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_dscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().
Referenced by FLA_Scalr_external().
{ double* a_begin; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_deq1( alpha ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; bl1_dscal( n_elem, alpha, a_begin, inca ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; if ( n_elem <= 0 ) break; bl1_dscal( n_elem, alpha, a_begin, inca ); } } }
void bl1_dscalv | ( | conj1_t | conj, |
int | n, | ||
double * | alpha, | ||
double * | x, | ||
int | incx | ||
) |
References bl1_dscal(), and bl1_zero_dim1().
Referenced by bl1_dapdiagmv(), bl1_dgemv(), bl1_dtrmvsx(), bl1_dtrsvsx(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_nl_opd_var1(), FLA_Eig_gest_nl_opd_var2(), FLA_Eig_gest_nl_opd_var4(), FLA_Eig_gest_nl_opd_var5(), FLA_Eig_gest_nu_opd_var1(), FLA_Eig_gest_nu_opd_var2(), FLA_Eig_gest_nu_opd_var4(), FLA_Eig_gest_nu_opd_var5(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_opd_var4(), FLA_QR_UT_form_Q_opd_var1(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_opd_var2(), FLA_Tridiag_UT_l_step_opd_var3(), FLA_Trinv_ln_opd_var4(), FLA_Trinv_lu_opd_var1(), FLA_Trinv_lu_opd_var2(), FLA_Trinv_lu_opd_var3(), FLA_Trinv_lu_opd_var4(), FLA_Trinv_un_opd_var4(), FLA_Trinv_uu_opd_var1(), FLA_Trinv_uu_opd_var2(), FLA_Trinv_uu_opd_var3(), FLA_Trinv_uu_opd_var4(), FLA_Ttmm_l_opd_var1(), FLA_Ttmm_l_opd_var2(), FLA_Ttmm_u_opd_var1(), and FLA_Ttmm_u_opd_var2().
{ // Return early if possible. if ( bl1_zero_dim1( n ) ) return; if ( bl1_deq1( alpha ) ) return; bl1_dscal( n, alpha, x, incx ); }
void bl1_dscopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dscopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Copyr_external().
{ double* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dscopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_dscopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_dscopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_dscopyv(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external().
{ double* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_dscopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dscopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_dscopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_dscopyv(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ double* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dscopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_dscopyv | ( | conj1_t | conj, |
int | m, | ||
double * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy | ||
) |
References bl1_zero_dim1().
Referenced by bl1_dscopymr(), bl1_dscopymrt(), and bl1_dscopymt().
{ double* chi; float* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { *psi = *chi; chi += incx; psi += incy; } }
void bl1_dswap | ( | int | n, |
double * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy | ||
) |
References cblas_dswap(), and F77_dswap().
Referenced by bl1_dswapmt(), bl1_dswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_dswap( n, x, incx, y, incy ); #else F77_dswap( &n, x, &incx, y, &incy ); #endif }
void bl1_dswapmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_dswap(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Swap_external(), and FLA_Swapt_external().
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dswap( n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_dswapv | ( | int | n, |
double * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy | ||
) |
References bl1_dswap(), and bl1_zero_dim1().
Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_bsvd_ext_b_opd(), FLA_Sort_bsvd_ext_f_opd(), FLA_Sort_evd_b_opd(), FLA_Sort_evd_f_opd(), FLA_Sort_svd_b_opd(), and FLA_Sort_svd_f_opd().
{ // Return early if possible. if ( bl1_zero_dim1( n ) ) return; bl1_dswap( n, x, incx, y, incy ); }
void bl1_dzcopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dzcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Copyr_external().
{ double* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dzcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_dzcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_dzcopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_dzcopyv(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external().
{ double* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_dzcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dzcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_dzcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_dzcopyv(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ double* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dzcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_dzcopyv | ( | conj1_t | conj, |
int | m, | ||
double * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy | ||
) |
References bl1_zero_dim1(), dcomplex::imag, and dcomplex::real.
Referenced by bl1_dzcopymr(), bl1_dzcopymrt(), and bl1_dzcopymt().
{ double* chi; dcomplex* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { psi->real = *chi; psi->imag = 0.0; chi += incx; psi += incy; } }
void bl1_icopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
int * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
int * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_icopyv(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ int* a_begin; int* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_icopyv( bl1_proj_trans1_to_conj( trans ), n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_icopyv | ( | conj1_t | conj, |
int | m, | ||
int * | x, | ||
int | incx, | ||
int * | y, | ||
int | incy | ||
) |
References bl1_zero_dim1().
Referenced by bl1_icopymt().
{ int* chi; int* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { *psi = *chi; chi += incx; psi += incy; } }
void bl1_samax | ( | int | n, |
float * | x, | ||
int | incx, | ||
int * | index | ||
) |
References cblas_isamax(), and F77_isamax().
Referenced by FLA_Amax_external(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_LU_piv_ops_var5(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *index = cblas_isamax( n, x, incx ); #else *index = F77_isamax( &n, x, &incx ) - 1; #endif }
void bl1_sasum | ( | int | n, |
float * | x, | ||
int | incx, | ||
float * | norm | ||
) |
References cblas_sasum(), and F77_sasum().
Referenced by FLA_Asum_external().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *norm = cblas_sasum( n, x, incx ); #else *norm = F77_sasum( &n, x, &incx ); #endif }
void bl1_saxpy | ( | int | n, |
float * | alpha, | ||
float * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy | ||
) |
References cblas_saxpy(), and F77_saxpy().
Referenced by bl1_saxpymt(), bl1_saxpysmt(), bl1_saxpysv(), and bl1_saxpyv().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_saxpy( n, *alpha, x, incx, y, incy ); #else F77_saxpy( &n, alpha, x, &incx, y, &incy ); #endif }
void bl1_saxpymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_saxpyv(), and bl1_zero_dim2().
Referenced by FLA_Axpyrt_external().
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_saxpyv( conj, n_elem, alpha, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_saxpyv( conj, n_elem, alpha, a_begin, inca, b_begin, incb ); } } }
void bl1_saxpymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_saxpy(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_sgemm(), bl1_ssymm(), bl1_strmmsx(), bl1_strsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_saxpy( n_elem, alpha, a_begin, inca, b_begin, incb ); } }
void bl1_saxpysmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | alpha0, | ||
float * | alpha1, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | beta, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_saxpy(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Axpys_external().
{ float* a_begin; float* b_begin; float alpha_prod; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; alpha_prod = (*alpha0) * (*alpha1); // Handle cases where A and B are vectors to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_sscal( n_elem, beta, b_begin, incb ); bl1_saxpy( n_elem, &alpha_prod, a_begin, inca, b_begin, incb ); } }
void bl1_saxpysv | ( | int | n, |
float * | alpha0, | ||
float * | alpha1, | ||
float * | x, | ||
int | incx, | ||
float * | beta, | ||
float * | y, | ||
int | incy | ||
) |
References bl1_saxpy(), bl1_sscal(), and bl1_zero_dim1().
Referenced by FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), and FLA_Lyap_n_ops_var4().
{ float alpha_prod; // Return early if possible. if ( bl1_zero_dim1( n ) ) return; alpha_prod = (*alpha0) * (*alpha1); bl1_sscal( n, beta, y, incy ); bl1_saxpy( n, &alpha_prod, x, incx, y, incy ); }
void bl1_saxpyv | ( | conj1_t | conj, |
int | n, | ||
float * | alpha, | ||
float * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy | ||
) |
References bl1_saxpy().
Referenced by bl1_saxpymrt(), bl1_strmvsx(), bl1_strsvsx(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Apply_HUD_UT_l_ops_var1(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Eig_gest_il_ops_var1(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_il_ops_var4(), FLA_Eig_gest_il_ops_var5(), FLA_Eig_gest_iu_ops_var1(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_iu_ops_var4(), FLA_Eig_gest_iu_ops_var5(), FLA_Eig_gest_nl_ops_var1(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nl_ops_var4(), FLA_Eig_gest_nl_ops_var5(), FLA_Eig_gest_nu_ops_var1(), FLA_Eig_gest_nu_ops_var2(), FLA_Eig_gest_nu_ops_var4(), FLA_Eig_gest_nu_ops_var5(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_ops_var2(), and FLA_Tridiag_UT_l_step_ops_var3().
{ bl1_saxpy( n, alpha, x, incx, y, incy ); }
void bl1_sccopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_sccopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Copyr_external().
{ float* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_sccopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_sccopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_sccopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_sccopyv(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external().
{ float* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_sccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_sccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_sccopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_sccopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ float* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_sccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_sccopyv | ( | conj1_t | conj, |
int | m, | ||
float * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy | ||
) |
References bl1_zero_dim1(), scomplex::imag, and scomplex::real.
Referenced by bl1_sccopymr(), bl1_sccopymrt(), and bl1_sccopymt().
{ float* chi; scomplex* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { psi->real = *chi; psi->imag = 0.0F; chi += incx; psi += incy; } }
void bl1_sconjm | ( | int | m, |
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
{
return;
}
void bl1_sconjmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
{
return;
}
void bl1_sconjv | ( | int | m, |
float * | x, | ||
int | incx | ||
) |
Referenced by FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var3(), and FLA_Bidiag_UT_u_step_ops_var4().
{
return;
}
void bl1_scopy | ( | int | m, |
float * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy | ||
) |
References cblas_scopy(), and F77_scopy().
Referenced by bl1_scopymr(), bl1_scopymt(), bl1_scopyv(), FLA_Obj_extract_imag_part(), FLA_Obj_extract_real_part(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_scopy( m, x, incx, y, incy ); #else F77_scopy( &m, x, &incx, y, &incy ); #endif }
void bl1_scopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_scopy(), and bl1_zero_dim2().
Referenced by bl1_screate_contigmr(), bl1_sfree_saved_contigmr(), and FLA_Copyr_external().
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if A and B are both row-major, then let's access the // matrices by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) && bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_scopy( n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_scopy( n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_scopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_scopyv(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), and FLA_Lyap_n_ops_var4().
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_scopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_scopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_scopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_scopy(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_screate_contigm(), bl1_screate_contigmt(), bl1_sfree_saved_contigm(), bl1_sfree_saved_contigmsr(), bl1_ssymm(), bl1_ssyr2k(), bl1_strmmsx(), bl1_strsmsx(), FLA_Copy_external(), and FLA_Copyt_external().
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_scopy( n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_scopyv | ( | conj1_t | conj, |
int | m, | ||
float * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy | ||
) |
References bl1_scopy().
Referenced by bl1_scopymrt(), bl1_sscopymr(), bl1_sscopymrt(), bl1_sscopymt(), bl1_ssymmize(), bl1_strmvsx(), bl1_strsvsx(), FLA_Accum_T_UT_fc_ops_var1(), FLA_Accum_T_UT_fr_ops_var1(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Apply_HUD_UT_l_ops_var1(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var1(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_CAQR2_UT_ops_var1(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_iu_ops_var3(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_LQ_UT_ops_var2(), FLA_QR_UT_ops_var2(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ops_var2(), FLA_Tridiag_UT_l_step_ops_var3(), and FLA_Tridiag_UT_shift_U_l_ops().
{ bl1_scopy( m, x, incx, y, incy ); }
void bl1_sdcopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_sdcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Copyr_external().
{ float* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_sdcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_sdcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_sdcopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_sdcopyv(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external().
{ float* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_sdcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_sdcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_sdcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_sdcopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ float* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_sdcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_sdcopyv | ( | conj1_t | conj, |
int | m, | ||
float * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy | ||
) |
References bl1_zero_dim1().
Referenced by bl1_sdcopymr(), bl1_sdcopymrt(), and bl1_sdcopymt().
{ float* chi; double* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { *psi = *chi; chi += incx; psi += incy; } }
References cblas_sdot(), and F77_sdot().
Referenced by bl1_sdot2s(), bl1_sdots(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Dot_external(), FLA_Dotc_external(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Ax_ops_var1(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_Sylv_hh_ops_var1(), FLA_Sylv_hn_ops_var1(), FLA_Sylv_nh_ops_var1(), FLA_Sylv_nn_ops_var1(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_ops_var2(), and FLA_Tridiag_UT_l_step_ops_var3().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *rho = cblas_sdot( n, x, incx, y, incy ); #else *rho = F77_sdot( &n, x, &incx, y, &incy ); #endif }
void bl1_sdot2s | ( | conj1_t | conj, |
int | n, | ||
float * | alpha, | ||
float * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy, | ||
float * | beta, | ||
float * | rho | ||
) |
References bl1_sdot().
Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_ops_var1(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_iu_ops_var1(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_nl_ops_var1(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nu_ops_var1(), FLA_Eig_gest_nu_ops_var2(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), and FLA_Lyap_n_ops_var3().
{ float dot; bl1_sdot( conj, n, x, incx, y, incy, &dot ); *rho = (*beta) * (*rho) + 2.0F * (*alpha) * dot; }
void bl1_sdots | ( | conj1_t | conj, |
int | n, | ||
float * | alpha, | ||
float * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy, | ||
float * | beta, | ||
float * | rho | ||
) |
References bl1_sdot().
Referenced by FLA_Chol_l_ops_var1(), FLA_Chol_l_ops_var2(), FLA_Chol_u_ops_var1(), FLA_Chol_u_ops_var2(), FLA_Dotcs_external(), FLA_Dots_external(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Hess_UT_step_ops_var5(), FLA_LU_nopiv_ops_var1(), FLA_LU_nopiv_ops_var2(), FLA_LU_nopiv_ops_var3(), FLA_LU_nopiv_ops_var4(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_Ttmm_l_ops_var2(), FLA_Ttmm_l_ops_var3(), FLA_Ttmm_u_ops_var2(), and FLA_Ttmm_u_ops_var3().
{ float dot_prod; bl1_sdot( conj, n, x, incx, y, incy, &dot_prod ); *rho = (*beta) * (*rho) + (*alpha) * dot_prod; }
void bl1_sfnorm | ( | int | m, |
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | norm | ||
) |
References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Norm_frob().
{ float* a_ij; float sum; int lda, inca; int n_iter; int n_elem; int i, j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A is a vector separately. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } // Initialize the accumulator variable. sum = 0.0F; for ( j = 0; j < n_iter; j++ ) { for ( i = 0; i < n_elem; i++ ) { a_ij = a + i*inca + j*lda; sum += (*a_ij) * (*a_ij); } } // Compute the norm and store the result. *norm = ( float ) sqrt( sum ); }
void bl1_sinvscalm | ( | conj1_t | conj, |
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_is_vector(), bl1_sinvert2s(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().
{ float alpha_inv; float* a_begin; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_seq1( alpha ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } bl1_sinvert2s( conj, alpha, &alpha_inv ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_sscal( n_elem, &alpha_inv, a_begin, inca ); } }
void bl1_sinvscalv | ( | conj1_t | conj, |
int | n, | ||
float * | alpha, | ||
float * | x, | ||
int | incx | ||
) |
References bl1_sscal().
Referenced by bl1_srandmr(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Apply_HUD_UT_l_ops_var1(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Chol_l_ops_var2(), FLA_Chol_l_ops_var3(), FLA_Chol_u_ops_var2(), FLA_Chol_u_ops_var3(), FLA_Eig_gest_il_ops_var1(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_il_ops_var4(), FLA_Eig_gest_il_ops_var5(), FLA_Eig_gest_iu_ops_var1(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_iu_ops_var4(), FLA_Eig_gest_iu_ops_var5(), FLA_Househ2_UT_l_ops(), FLA_Househ3UD_UT_ops(), FLA_LU_nopiv_ops_var3(), FLA_LU_nopiv_ops_var4(), FLA_LU_nopiv_ops_var5(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_LU_piv_ops_var5(), FLA_Trinv_ln_ops_var1(), FLA_Trinv_ln_ops_var2(), FLA_Trinv_ln_ops_var3(), FLA_Trinv_un_ops_var1(), FLA_Trinv_un_ops_var2(), and FLA_Trinv_un_ops_var3().
{ float alpha_inv; if ( bl1_seq1( alpha ) ) return; alpha_inv = 1.0F / *alpha; bl1_sscal( n, &alpha_inv, x, incx ); }
void bl1_snrm2 | ( | int | n, |
float * | x, | ||
int | incx, | ||
float * | norm | ||
) |
References cblas_snrm2(), and F77_snrm2().
Referenced by FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_l_ops(), FLA_Househ3UD_UT_ops(), and FLA_Nrm2_external().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *norm = cblas_snrm2( n, x, incx ); #else *norm = F77_snrm2( &n, x, &incx ); #endif }
void bl1_sscal | ( | int | n, |
float * | alpha, | ||
float * | x, | ||
int | incx | ||
) |
References cblas_sscal(), and F77_sscal().
Referenced by bl1_cconjm(), bl1_cconjmr(), bl1_cconjv(), bl1_saxpysmt(), bl1_saxpysv(), bl1_sinvscalm(), bl1_sinvscalv(), bl1_sscalm(), bl1_sscalmr(), bl1_sscalv(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_sscal( n, *alpha, x, incx ); #else F77_sscal( &n, alpha, x, &incx ); #endif }
void bl1_sscalm | ( | conj1_t | conj, |
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_is_vector(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_sgemm(), bl1_ssymm(), bl1_strmmsx(), bl1_strsmsx(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), FLA_Lyap_n_ops_var4(), FLA_Scal_external(), and FLA_Scalc_external().
{ float alpha_conj; float* a_begin; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_seq1( alpha ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } bl1_scopys( conj, alpha, &alpha_conj ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_sscal( n_elem, &alpha_conj, a_begin, inca ); } }
void bl1_sscalmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_sscal(), and bl1_zero_dim2().
Referenced by FLA_Scalr_external().
{ float* a_begin; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_seq1( alpha ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; bl1_sscal( n_elem, alpha, a_begin, inca ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; if ( n_elem <= 0 ) break; bl1_sscal( n_elem, alpha, a_begin, inca ); } } }
void bl1_sscalv | ( | conj1_t | conj, |
int | n, | ||
float * | alpha, | ||
float * | x, | ||
int | incx | ||
) |
References bl1_sscal(), and bl1_zero_dim1().
Referenced by bl1_sapdiagmv(), bl1_sgemv(), bl1_strmvsx(), bl1_strsvsx(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_v_ops_var1(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_nl_ops_var1(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nl_ops_var4(), FLA_Eig_gest_nl_ops_var5(), FLA_Eig_gest_nu_ops_var1(), FLA_Eig_gest_nu_ops_var2(), FLA_Eig_gest_nu_ops_var4(), FLA_Eig_gest_nu_ops_var5(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_QR_UT_form_Q_ops_var1(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_ops_var2(), FLA_Tridiag_UT_l_step_ops_var3(), FLA_Trinv_ln_ops_var4(), FLA_Trinv_lu_ops_var1(), FLA_Trinv_lu_ops_var2(), FLA_Trinv_lu_ops_var3(), FLA_Trinv_lu_ops_var4(), FLA_Trinv_un_ops_var4(), FLA_Trinv_uu_ops_var1(), FLA_Trinv_uu_ops_var2(), FLA_Trinv_uu_ops_var3(), FLA_Trinv_uu_ops_var4(), FLA_Ttmm_l_ops_var1(), FLA_Ttmm_l_ops_var2(), FLA_Ttmm_u_ops_var1(), and FLA_Ttmm_u_ops_var2().
{ // Return early if possible. if ( bl1_zero_dim1( n ) ) return; if ( bl1_seq1( alpha ) ) return; bl1_sscal( n, alpha, x, incx ); }
void bl1_sscopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_scopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_scopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_scopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_sscopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_scopyv(), and bl1_zero_dim2().
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_scopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_scopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_sscopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_scopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_scopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_sswap | ( | int | n, |
float * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy | ||
) |
References cblas_sswap(), and F77_sswap().
Referenced by bl1_sswapmt(), bl1_sswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_sswap( n, x, incx, y, incy ); #else F77_sswap( &n, x, &incx, y, &incy ); #endif }
void bl1_sswapmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_sswap(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Swap_external(), and FLA_Swapt_external().
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_sswap( n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_sswapv | ( | int | n, |
float * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy | ||
) |
References bl1_sswap(), and bl1_zero_dim1().
Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_bsvd_ext_b_ops(), and FLA_Sort_bsvd_ext_f_ops().
{ // Return early if possible. if ( bl1_zero_dim1( n ) ) return; bl1_sswap( n, x, incx, y, incy ); }
void bl1_szcopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_szcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Copyr_external().
{ float* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_szcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_szcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_szcopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_szcopyv(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external().
{ float* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_szcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_szcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_szcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_szcopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ float* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_szcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_szcopyv | ( | conj1_t | conj, |
int | m, | ||
float * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy | ||
) |
References bl1_zero_dim1(), dcomplex::imag, and dcomplex::real.
Referenced by bl1_szcopymr(), bl1_szcopymrt(), and bl1_szcopymt().
{ float* chi; dcomplex* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { psi->real = *chi; psi->imag = 0.0; chi += incx; psi += incy; } }
References cblas_izamax(), and F77_izamax().
Referenced by FLA_Amax_external(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_LU_piv_opz_var5(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *index = cblas_izamax( n, x, incx ); #else *index = F77_izamax( &n, x, &incx ) - 1; #endif }
References cblas_dzasum(), and F77_dzasum().
Referenced by FLA_Asum_external().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *norm = cblas_dzasum( n, x, incx ); #else *norm = F77_dzasum( &n, x, &incx ); #endif }
References cblas_zaxpy(), and F77_zaxpy().
Referenced by bl1_zaxpymt(), bl1_zaxpysmt(), bl1_zaxpysv(), and bl1_zaxpyv().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_zaxpy( n, alpha, x, incx, y, incy ); #else F77_zaxpy( &n, alpha, x, &incx, y, &incy ); #endif }
void bl1_zaxpymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zaxpyv(), and bl1_zero_dim2().
Referenced by bl1_zher2k(), bl1_zherk(), and FLA_Axpyrt_external().
{ dcomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_zaxpyv( conj, n_elem, alpha, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zaxpyv( conj, n_elem, alpha, a_begin, inca, b_begin, incb ); } } }
void bl1_zaxpymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim2(), bl1_zfree(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_zgemm(), bl1_zhemm(), bl1_zsymm(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().
{ dcomplex* a_begin; dcomplex* b_begin; dcomplex* a_temp; int inca_temp; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } if ( bl1_does_conj( trans ) ) { conj1_t conj = bl1_proj_trans1_to_conj( trans ); a_temp = bl1_zallocv( n_elem ); inca_temp = 1; for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zcopyv( conj, n_elem, a_begin, inca, a_temp, inca_temp ); bl1_zaxpy( n_elem, alpha, a_temp, inca_temp, b_begin, incb ); } bl1_zfree( a_temp ); } else // if ( !bl1_does_conj( trans ) ) { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zaxpy( n_elem, alpha, a_begin, inca, b_begin, incb ); } } }
void bl1_zaxpysmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | alpha0, | ||
dcomplex * | alpha1, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | beta, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim2(), bl1_zfree(), bl1_zscal(), BLIS1_NO_TRANSPOSE, dcomplex::imag, and dcomplex::real.
Referenced by FLA_Axpys_external().
{ dcomplex* a_begin; dcomplex* b_begin; dcomplex* a_temp; dcomplex alpha_prod; int inca_temp; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag; alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real; // Handle cases where A and B are vectors to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } if ( bl1_does_conj( trans ) ) { conj1_t conj = bl1_proj_trans1_to_conj( trans ); a_temp = bl1_zallocv( n_elem ); inca_temp = 1; for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zcopyv( conj, n_elem, a_begin, inca, a_temp, inca_temp ); bl1_zscal( n_elem, beta, b_begin, incb ); bl1_zaxpy( n_elem, &alpha_prod, a_temp, inca_temp, b_begin, incb ); } bl1_zfree( a_temp ); } else // if ( !bl1_does_conj( trans ) ) { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zscal( n_elem, beta, b_begin, incb ); bl1_zaxpy( n_elem, &alpha_prod, a_begin, inca, b_begin, incb ); } } }
void bl1_zaxpysv | ( | int | n, |
dcomplex * | alpha0, | ||
dcomplex * | alpha1, | ||
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | beta, | ||
dcomplex * | y, | ||
int | incy | ||
) |
References bl1_zaxpy(), bl1_zero_dim1(), bl1_zscal(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), and FLA_Lyap_n_opz_var4().
{ dcomplex alpha_prod; // Return early if possible. if ( bl1_zero_dim1( n ) ) return; alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag; alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real; bl1_zscal( n, beta, y, incy ); bl1_zaxpy( n, &alpha_prod, x, incx, y, incy ); }
void bl1_zaxpyv | ( | conj1_t | conj, |
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy | ||
) |
References bl1_is_conj(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim1(), and bl1_zfree().
Referenced by bl1_zaxpymrt(), bl1_zgemv(), bl1_zhemv(), bl1_ztrmvsx(), bl1_ztrsvsx(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_il_opz_var4(), FLA_Eig_gest_il_opz_var5(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_iu_opz_var4(), FLA_Eig_gest_iu_opz_var5(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nl_opz_var4(), FLA_Eig_gest_nl_opz_var5(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Eig_gest_nu_opz_var4(), FLA_Eig_gest_nu_opz_var5(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), and FLA_Tridiag_UT_l_step_opz_var3().
{ dcomplex* x_copy; int incx_copy; // Return early if possible. if ( bl1_zero_dim1( n ) ) return; x_copy = x; incx_copy = incx; if ( bl1_is_conj( conj ) ) { x_copy = bl1_zallocv( n ); incx_copy = 1; bl1_zcopyv( conj, n, x, incx, x_copy, incx_copy ); } bl1_zaxpy( n, alpha, x_copy, incx_copy, y, incy ); if ( bl1_is_conj( conj ) ) bl1_zfree( x_copy ); }
void bl1_zccopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_zccopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Copyr_external().
{ dcomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zccopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_zccopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_zccopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zccopyv(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external().
{ dcomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_zccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_zccopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zccopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ dcomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_zccopyv | ( | conj1_t | conj, |
int | m, | ||
dcomplex * | x, | ||
int | incx, | ||
scomplex * | y, | ||
int | incy | ||
) |
References bl1_cconjv(), bl1_is_conj(), bl1_zero_dim1(), scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.
Referenced by bl1_zccopymr(), bl1_zccopymrt(), and bl1_zccopymt().
{ dcomplex* chi; scomplex* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { psi->real = chi->real; psi->imag = chi->imag; chi += incx; psi += incy; } if ( bl1_is_conj( conj ) ) bl1_cconjv( m, y, incy ); }
void bl1_zconjm | ( | int | m, |
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_dm1(), bl1_dscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_zgemm(), and FLA_Conjugate().
{ double m1 = bl1_dm1(); double* a_conj; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } for ( j = 0; j < n_iter; ++j ) { a_conj = ( double* )( a + j*lda ) + 1; bl1_dscal( n_elem, &m1, a_conj, 2*inca ); } }
void bl1_zconjmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_dm1(), bl1_dscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().
Referenced by bl1_zhemm(), bl1_ztrmm(), bl1_ztrsm(), and FLA_Conjugate_r().
{ double m1 = bl1_dm1(); double* a_conj; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; ++j ) { n_elem = bl1_min( j + 1, n_elem_max ); a_conj = ( double* )( a + j*lda ) + 1; bl1_dscal( n_elem, &m1, a_conj, 2*inca ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; ++j ) { n_elem = bl1_max( 0, n_elem_max - j ); a_conj = ( double* )( a + j*lda + j*inca ) + 1; if ( n_elem <= 0 ) break; bl1_dscal( n_elem, &m1, a_conj, 2*inca ); } } }
void bl1_zconjv | ( | int | m, |
dcomplex * | x, | ||
int | incx | ||
) |
References cblas_zcopy(), and F77_zcopy().
Referenced by bl1_zcopymr(), bl1_zcopymt(), bl1_zcopyv(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_zcopy( m, x, incx, y, incy ); #else F77_zcopy( &m, x, &incx, y, &incy ); #endif }
void bl1_zcopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_zcopy(), and bl1_zero_dim2().
Referenced by bl1_zcreate_contigmr(), bl1_zfree_saved_contigmr(), bl1_zfree_saved_contigmsr(), and FLA_Copyr_external().
{ dcomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if A and B are both row-major, then let's access the // matrices by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) && bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zcopy( n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_zcopy( n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_zcopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zcopyv(), and bl1_zero_dim2().
Referenced by bl1_zhemm(), bl1_ztrmm(), bl1_ztrsm(), FLA_Copyrt_external(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), and FLA_Lyap_n_opz_var4().
{ dcomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_zcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_zcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zconjv(), bl1_zcopy(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_zcreate_contigm(), bl1_zcreate_contigmt(), bl1_zfree_saved_contigm(), bl1_zgemm(), bl1_zhemm(), bl1_zher2k(), bl1_zsymm(), bl1_zsyr2k(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Bsvd_v_opz_var2(), FLA_Copy_external(), FLA_Copyt_external(), and FLA_Tevd_v_opz_var2().
{ dcomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zcopy( n_elem, a_begin, inca, b_begin, incb ); if ( bl1_does_conj( trans ) ) bl1_zconjv( n_elem, b_begin, incb ); } }
void bl1_zcopyv | ( | conj1_t | conj, |
int | m, | ||
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy | ||
) |
References bl1_is_conj(), bl1_zconjv(), bl1_zcopy(), and bl1_zero_dim1().
Referenced by bl1_zaxpymt(), bl1_zaxpysmt(), bl1_zaxpyv(), bl1_zcopymrt(), bl1_zgemv(), bl1_zger(), bl1_zhemv(), bl1_zher(), bl1_zher2(), bl1_zsymmize(), bl1_zsymv_blas(), bl1_zsyr2_blas(), bl1_zsyr_blas(), bl1_ztrmv(), bl1_ztrmvsx(), bl1_ztrsv(), bl1_ztrsvsx(), bl1_zzcopymr(), bl1_zzcopymrt(), bl1_zzcopymt(), FLA_Accum_T_UT_fc_opz_var1(), FLA_Accum_T_UT_fr_opz_var1(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var1(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_CAQR2_UT_opz_var1(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var3(), FLA_Fused_UYx_ZVx_opz_var1(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_LQ_UT_opz_var2(), FLA_QR_UT_opz_var2(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_opz_var2(), FLA_Tridiag_UT_l_step_opz_var3(), and FLA_Tridiag_UT_shift_U_l_opz().
{ // Return early if possible. if ( bl1_zero_dim1( m ) ) return; bl1_zcopy( m, x, incx, y, incy ); if ( bl1_is_conj( conj ) ) bl1_zconjv( m, y, incy ); }
void bl1_zdcopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_zdcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Copyr_external().
{ dcomplex* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zdcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_zdcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_zdcopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zdcopyv(), and bl1_zero_dim2().
Referenced by FLA_Copyrt_external().
{ dcomplex* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_zdcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zdcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_zdcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdcopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ dcomplex* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zdcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_zdcopyv | ( | conj1_t | conj, |
int | m, | ||
dcomplex * | x, | ||
int | incx, | ||
double * | y, | ||
int | incy | ||
) |
References bl1_zero_dim1(), and dcomplex::real.
Referenced by bl1_zdcopymr(), bl1_zdcopymrt(), and bl1_zdcopymt().
{ dcomplex* chi; double* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { *psi = chi->real; chi += incx; psi += incy; } }
void bl1_zdinvscalm | ( | conj1_t | conj, |
int | m, | ||
int | n, | ||
double * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_dinvert2s(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdscal(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().
{ double alpha_inv; dcomplex* a_begin; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_deq1( alpha ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } bl1_dinvert2s( conj, alpha, &alpha_inv ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_zdscal( n_elem, &alpha_inv, a_begin, inca ); } }
void bl1_zdinvscalv | ( | conj1_t | conj, |
int | n, | ||
double * | alpha, | ||
dcomplex * | x, | ||
int | incx | ||
) |
References bl1_zdscal().
{ double alpha_inv; if ( bl1_deq1( alpha ) ) return; alpha_inv = 1.0 / *alpha; bl1_zdscal( n, &alpha_inv, x, incx ); }
void bl1_zdot | ( | conj1_t | conj, |
int | n, | ||
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy, | ||
dcomplex * | rho | ||
) |
References bl1_is_conj(), bl1_zdot_in(), cblas_zdotc_sub(), and cblas_zdotu_sub().
Referenced by bl1_zdot2s(), bl1_zdots(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Dot_external(), FLA_Dotc_external(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_Sylv_hh_opz_var1(), FLA_Sylv_hn_opz_var1(), FLA_Sylv_nh_opz_var1(), FLA_Sylv_nn_opz_var1(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), and FLA_Tridiag_UT_l_step_opz_var3().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES if ( bl1_is_conj( conj ) ) { cblas_zdotc_sub( n, x, incx, y, incy, rho ); } else // if ( !bl1_is_conj( conj ) ) { cblas_zdotu_sub( n, x, incx, y, incy, rho ); } #else bl1_zdot_in( conj, n, x, incx, y, incy, rho ); #endif }
void bl1_zdot2s | ( | conj1_t | conj, |
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy, | ||
dcomplex * | beta, | ||
dcomplex * | rho | ||
) |
References bl1_zdot(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), and FLA_Lyap_n_opz_var3().
{ dcomplex dotxy; dcomplex dotyx; dcomplex alpha_d = *alpha; dcomplex alphac_d = *alpha; dcomplex beta_d = *beta; dcomplex rho_d = *rho; alphac_d.imag *= -1.0; bl1_zdot( conj, n, x, incx, y, incy, &dotxy ); bl1_zdot( conj, n, y, incy, x, incx, &dotyx ); rho->real = beta_d.real * rho_d.real - beta_d.imag * rho_d.imag + alpha_d.real * dotxy.real - alpha_d.imag * dotxy.imag + alphac_d.real * dotyx.real - alphac_d.imag * dotyx.imag; rho->imag = beta_d.real * rho_d.imag + beta_d.imag * rho_d.real + alpha_d.real * dotxy.imag + alpha_d.imag * dotxy.real + alphac_d.real * dotyx.imag + alphac_d.imag * dotyx.real; }
void bl1_zdot_in | ( | conj1_t | conj, |
int | n, | ||
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy, | ||
dcomplex * | rho | ||
) |
References bl1_is_conj(), dcomplex::imag, and dcomplex::real.
Referenced by bl1_zdot().
{ dcomplex* xip; dcomplex* yip; dcomplex xi; dcomplex yi; dcomplex rho_temp; int i; rho_temp.real = 0.0; rho_temp.imag = 0.0; xip = x; yip = y; if ( bl1_is_conj( conj ) ) { for ( i = 0; i < n; ++i ) { xi.real = xip->real; xi.imag = xip->imag; yi.real = yip->real; yi.imag = yip->imag; rho_temp.real += xi.real * yi.real - -xi.imag * yi.imag; rho_temp.imag += xi.real * yi.imag + -xi.imag * yi.real; xip += incx; yip += incy; } } else // if ( !bl1_is_conj( conj ) ) { for ( i = 0; i < n; ++i ) { xi.real = xip->real; xi.imag = xip->imag; yi.real = yip->real; yi.imag = yip->imag; rho_temp.real += xi.real * yi.real - xi.imag * yi.imag; rho_temp.imag += xi.real * yi.imag + xi.imag * yi.real; xip += incx; yip += incy; } } rho->real = rho_temp.real; rho->imag = rho_temp.imag; }
void bl1_zdots | ( | conj1_t | conj, |
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy, | ||
dcomplex * | beta, | ||
dcomplex * | rho | ||
) |
References bl1_zdot(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Chol_l_opz_var1(), FLA_Chol_l_opz_var2(), FLA_Chol_u_opz_var1(), FLA_Chol_u_opz_var2(), FLA_Dotcs_external(), FLA_Dots_external(), FLA_Hess_UT_step_opz_var5(), FLA_LU_nopiv_opz_var1(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_nopiv_opz_var4(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_Ttmm_l_opz_var2(), FLA_Ttmm_l_opz_var3(), FLA_Ttmm_u_opz_var2(), and FLA_Ttmm_u_opz_var3().
{ dcomplex rho_orig = *rho; dcomplex dot_prod; bl1_zdot( conj, n, x, incx, y, incy, &dot_prod ); rho->real = beta->real * rho_orig.real - beta->imag * rho_orig.imag + alpha->real * dot_prod.real - alpha->imag * dot_prod.imag; rho->imag = beta->real * rho_orig.imag + beta->imag * rho_orig.real + alpha->real * dot_prod.imag + alpha->imag * dot_prod.real; }
void bl1_zdscal | ( | int | n, |
double * | alpha, | ||
dcomplex * | x, | ||
int | incx | ||
) |
References cblas_zdscal(), and F77_zdscal().
Referenced by bl1_zdinvscalm(), bl1_zdinvscalv(), bl1_zdscalm(), bl1_zdscalmr(), and bl1_zdscalv().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_zdscal( n, *alpha, x, incx ); #else F77_zdscal( &n, alpha, x, &incx ); #endif }
void bl1_zdscalm | ( | conj1_t | conj, |
int | m, | ||
int | n, | ||
double * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdscal(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Scal_external(), and FLA_Scalc_external().
{ double alpha_conj; dcomplex* a_begin; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_deq1( alpha ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } bl1_dcopys( conj, alpha, &alpha_conj ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_zdscal( n_elem, &alpha_conj, a_begin, inca ); } }
void bl1_zdscalmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
double * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_zdscal(), and bl1_zero_dim2().
Referenced by bl1_zher2k(), bl1_zherk(), and FLA_Scalr_external().
{ dcomplex* a_begin; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_deq1( alpha ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; bl1_zdscal( n_elem, alpha, a_begin, inca ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; if ( n_elem <= 0 ) break; bl1_zdscal( n_elem, alpha, a_begin, inca ); } } }
void bl1_zdscalv | ( | conj1_t | conj, |
int | n, | ||
double * | alpha, | ||
dcomplex * | x, | ||
int | incx | ||
) |
References bl1_zdscal(), and bl1_zero_dim1().
Referenced by bl1_zdapdiagmv(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().
{ // Return early if possible. if ( bl1_zero_dim1( n ) ) return; if ( bl1_deq1( alpha ) ) return; bl1_zdscal( n, alpha, x, incx ); }
void bl1_zfnorm | ( | int | m, |
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | norm | ||
) |
References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, dcomplex::imag, and dcomplex::real.
Referenced by FLA_Norm_frob().
{ dcomplex* a_ij; double sum; int lda, inca; int n_iter; int n_elem; int i, j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A is a vector separately. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } // Initialize the accumulator variable. sum = 0.0; for ( j = 0; j < n_iter; j++ ) { for ( i = 0; i < n_elem; i++ ) { a_ij = a + i*inca + j*lda; sum += a_ij->real * a_ij->real + a_ij->imag * a_ij->imag; } } // Compute the norm and store the result. *norm = sqrt( sum ); }
void bl1_zinvscalm | ( | conj1_t | conj, |
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zinvert2s(), bl1_zscal(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().
{ dcomplex alpha_inv; dcomplex* a_begin; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_zeq1( alpha ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } bl1_zinvert2s( conj, alpha, &alpha_inv ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_zscal( n_elem, &alpha_inv, a_begin, inca ); } }
void bl1_zinvscalv | ( | conj1_t | conj, |
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | x, | ||
int | incx | ||
) |
References bl1_zinvert2s(), and bl1_zscal().
Referenced by bl1_zrandmr(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Chol_l_opz_var2(), FLA_Chol_l_opz_var3(), FLA_Chol_u_opz_var2(), FLA_Chol_u_opz_var3(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_il_opz_var4(), FLA_Eig_gest_il_opz_var5(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_iu_opz_var4(), FLA_Eig_gest_iu_opz_var5(), FLA_Househ2_UT_l_opz(), FLA_Househ3UD_UT_opz(), FLA_LU_nopiv_opz_var3(), FLA_LU_nopiv_opz_var4(), FLA_LU_nopiv_opz_var5(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_LU_piv_opz_var5(), FLA_Trinv_ln_opz_var1(), FLA_Trinv_ln_opz_var2(), FLA_Trinv_ln_opz_var3(), FLA_Trinv_un_opz_var1(), FLA_Trinv_un_opz_var2(), and FLA_Trinv_un_opz_var3().
{ dcomplex alpha_inv; if ( bl1_zeq1( alpha ) ) return; bl1_zinvert2s( conj, alpha, &alpha_inv ); bl1_zscal( n, &alpha_inv, x, incx ); }
References cblas_dznrm2(), and F77_dznrm2().
Referenced by FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_l_opz(), FLA_Househ3UD_UT_opz(), and FLA_Nrm2_external().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES *norm = cblas_dznrm2( n, x, incx ); #else *norm = F77_dznrm2( &n, x, &incx ); #endif }
References cblas_zscal(), and F77_zscal().
Referenced by bl1_zaxpysmt(), bl1_zaxpysv(), bl1_zinvscalm(), bl1_zinvscalv(), bl1_zscalm(), bl1_zscalmr(), bl1_zscalv(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_zscal( n, alpha, x, incx ); #else F77_zscal( &n, alpha, x, &incx ); #endif }
void bl1_zscalm | ( | conj1_t | conj, |
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zscal(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_zgemm(), bl1_zhemm(), bl1_zsymm(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), FLA_Lyap_n_opz_var4(), FLA_Scal_external(), and FLA_Scalc_external().
{ dcomplex alpha_conj; dcomplex* a_begin; int lda, inca; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_zeq1( alpha ) ) return; // Handle cases where A is a vector to ensure that the underlying axpy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for a vector. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); } } bl1_zcopys( conj, alpha, &alpha_conj ); for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; bl1_zscal( n_elem, &alpha_conj, a_begin, inca ); } }
void bl1_zscalmr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and bl1_zscal().
Referenced by FLA_Scalr_external().
{ dcomplex* a_begin; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; if ( bl1_zeq1( alpha ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bl1_is_row_storage( a_rs, a_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; bl1_zscal( n_elem, alpha, a_begin, inca ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; if ( n_elem <= 0 ) break; bl1_zscal( n_elem, alpha, a_begin, inca ); } } }
void bl1_zscalv | ( | conj1_t | conj, |
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | x, | ||
int | incx | ||
) |
References bl1_zero_dim1(), and bl1_zscal().
Referenced by bl1_zapdiagmv(), bl1_zgemv(), bl1_zhemv(), bl1_ztrmvsx(), bl1_ztrsvsx(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nl_opz_var4(), FLA_Eig_gest_nl_opz_var5(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Eig_gest_nu_opz_var4(), FLA_Eig_gest_nu_opz_var5(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_QR_UT_form_Q_opz_var1(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), FLA_Tridiag_UT_l_step_opz_var3(), FLA_Trinv_ln_opz_var4(), FLA_Trinv_lu_opz_var1(), FLA_Trinv_lu_opz_var2(), FLA_Trinv_lu_opz_var3(), FLA_Trinv_lu_opz_var4(), FLA_Trinv_un_opz_var4(), FLA_Trinv_uu_opz_var1(), FLA_Trinv_uu_opz_var2(), FLA_Trinv_uu_opz_var3(), FLA_Trinv_uu_opz_var4(), FLA_Ttmm_l_opz_var1(), FLA_Ttmm_l_opz_var2(), FLA_Ttmm_u_opz_var1(), and FLA_Ttmm_u_opz_var2().
{ dcomplex alpha_conj; // Return early if possible. if ( bl1_zero_dim1( n ) ) return; if ( bl1_zeq1( alpha ) ) return; bl1_zcopys( conj, alpha, &alpha_conj ); bl1_zscal( n, &alpha_conj, x, incx ); }
void bl1_zscopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), bl1_zscopyv(), and BLIS1_NO_CONJUGATE.
Referenced by FLA_Copyr_external().
{ dcomplex* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zscopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_zscopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_zscopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zero_dim2(), and bl1_zscopyv().
Referenced by FLA_Copyrt_external().
{ dcomplex* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_zscopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zscopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_zscopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zscopyv(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ dcomplex* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zscopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_zscopyv | ( | conj1_t | conj, |
int | m, | ||
dcomplex * | x, | ||
int | incx, | ||
float * | y, | ||
int | incy | ||
) |
References bl1_zero_dim1(), and dcomplex::real.
Referenced by bl1_zscopymr(), bl1_zscopymrt(), and bl1_zscopymt().
{ dcomplex* chi; float* psi; int i; // Return early if possible. if ( bl1_zero_dim1( m ) ) return; // Initialize pointers. chi = x; psi = y; for ( i = 0; i < m; ++i ) { *psi = chi->real; chi += incx; psi += incy; } }
References cblas_zswap(), and F77_zswap().
Referenced by bl1_zswapmt(), bl1_zswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().
{ #ifdef BLIS1_ENABLE_CBLAS_INTERFACES cblas_zswap( n, x, incx, y, incy ); #else F77_zswap( &n, x, &incx, y, &incy ); #endif }
void bl1_zswapmt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zconjv(), bl1_zero_dim2(), bl1_zswap(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Swap_external(), and FLA_Swapt_external().
{ dcomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zswap( n_elem, a_begin, inca, b_begin, incb ); if ( bl1_does_conj( trans ) ) bl1_zconjv( n_elem, a_begin, inca ); if ( bl1_does_conj( trans ) ) bl1_zconjv( n_elem, b_begin, incb ); } }
void bl1_zswapv | ( | int | n, |
dcomplex * | x, | ||
int | incx, | ||
dcomplex * | y, | ||
int | incy | ||
) |
References bl1_zero_dim1(), and bl1_zswap().
Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_bsvd_ext_b_opz(), FLA_Sort_bsvd_ext_f_opz(), FLA_Sort_evd_b_opz(), FLA_Sort_evd_f_opz(), FLA_Sort_svd_b_opz(), and FLA_Sort_svd_f_opz().
{ // Return early if possible. if ( bl1_zero_dim1( n ) ) return; bl1_zswap( n, x, incx, y, incy ); }
void bl1_zzcopymr | ( | uplo1_t | uplo, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_is_row_storage(), bl1_is_upper(), bl1_zcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.
{ dcomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // An optimization: if B is row-major, then let's access the matrix // by rows instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem_max ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); bl1_toggle_uplo( uplo ); } if ( bl1_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( bl1_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_max( 0, n_elem_max - j ); a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; if ( n_elem <= 0 ) break; bl1_zcopyv( BLIS1_NO_CONJUGATE, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_zzcopymrt | ( | uplo1_t | uplo, |
trans1_t | trans, | ||
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zcopyv(), and bl1_zero_dim2().
{ dcomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int n_elem_max; int n_elem_is_descending; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Initialize variables based on storage format of B and value of uplo. if ( bl1_is_col_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = TRUE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = n; n_elem_max = bl1_min( m, n ); lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; n_elem_is_descending = FALSE; } } else // if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( bl1_is_lower( uplo ) ) { n_iter = m; n_elem_max = bl1_min( m, n ); lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = FALSE; } else // if ( bl1_is_upper( uplo ) ) { n_iter = bl1_min( m, n ); n_elem_max = n; lda = a_rs; inca = a_cs; ldb = b_rs; incb = b_cs; n_elem_is_descending = TRUE; } } // Swap lda and inca if we're doing a transpose. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); // Choose the loop based on whether n_elem will be shrinking or growing // with each iteration. if ( n_elem_is_descending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = n_elem_max - j; a_begin = a + j*lda + j*inca; b_begin = b + j*ldb + j*incb; bl1_zcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } else // if ( n_elem_is_ascending ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bl1_min( j + 1, n_elem_max ); a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } } }
void bl1_zzcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zcopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
{ dcomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }