libflame  12600
Functions
blis_prototypes_level1.h File Reference

(r12600)

Go to the source code of this file.

Functions

void bl1_samax (int n, float *x, int incx, int *index)
void bl1_damax (int n, double *x, int incx, int *index)
void bl1_camax (int n, scomplex *x, int incx, int *index)
void bl1_zamax (int n, dcomplex *x, int incx, int *index)
void bl1_sasum (int n, float *x, int incx, float *norm)
void bl1_dasum (int n, double *x, int incx, double *norm)
void bl1_casum (int n, scomplex *x, int incx, float *norm)
void bl1_zasum (int n, dcomplex *x, int incx, double *norm)
void bl1_saxpy (int n, float *alpha, float *x, int incx, float *y, int incy)
void bl1_daxpy (int n, double *alpha, double *x, int incx, double *y, int incy)
void bl1_caxpy (int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
void bl1_zaxpy (int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
void bl1_saxpyv (conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
void bl1_daxpyv (conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
void bl1_caxpyv (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
void bl1_zaxpyv (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
void bl1_saxpymt (trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_daxpymt (trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_caxpymt (trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_zaxpymt (trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_saxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_daxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_caxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_zaxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_saxpysv (int n, float *alpha0, float *alpha1, float *x, int incx, float *beta, float *y, int incy)
void bl1_daxpysv (int n, double *alpha0, double *alpha1, double *x, int incx, double *beta, double *y, int incy)
void bl1_caxpysv (int n, scomplex *alpha0, scomplex *alpha1, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
void bl1_zaxpysv (int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
void bl1_saxpysmt (trans1_t trans, int m, int n, float *alpha0, float *alpha1, float *a, int a_rs, int a_cs, float *beta, float *b, int b_rs, int b_cs)
void bl1_daxpysmt (trans1_t trans, int m, int n, double *alpha0, double *alpha1, double *a, int a_rs, int a_cs, double *beta, double *b, int b_rs, int b_cs)
void bl1_caxpysmt (trans1_t trans, int m, int n, scomplex *alpha0, scomplex *alpha1, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *b, int b_rs, int b_cs)
void bl1_zaxpysmt (trans1_t trans, int m, int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *b, int b_rs, int b_cs)
void bl1_sconjv (int m, float *x, int incx)
void bl1_dconjv (int m, double *x, int incx)
void bl1_cconjv (int m, scomplex *x, int incx)
void bl1_zconjv (int m, dcomplex *x, int incx)
void bl1_sconjm (int m, int n, float *a, int a_rs, int a_cs)
void bl1_dconjm (int m, int n, double *a, int a_rs, int a_cs)
void bl1_cconjm (int m, int n, scomplex *a, int a_rs, int a_cs)
void bl1_zconjm (int m, int n, dcomplex *a, int a_rs, int a_cs)
void bl1_sconjmr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs)
void bl1_dconjmr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs)
void bl1_cconjmr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs)
void bl1_zconjmr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs)
void bl1_scopy (int m, float *x, int incx, float *y, int incy)
void bl1_dcopy (int m, double *x, int incx, double *y, int incy)
void bl1_ccopy (int m, scomplex *x, int incx, scomplex *y, int incy)
void bl1_zcopy (int m, dcomplex *x, int incx, dcomplex *y, int incy)
void bl1_icopyv (conj1_t conj, int m, int *x, int incx, int *y, int incy)
void bl1_scopyv (conj1_t conj, int m, float *x, int incx, float *y, int incy)
void bl1_dcopyv (conj1_t conj, int m, double *x, int incx, double *y, int incy)
void bl1_ccopyv (conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
void bl1_zcopyv (conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
void bl1_sdcopyv (conj1_t conj, int m, float *x, int incx, double *y, int incy)
void bl1_dscopyv (conj1_t conj, int m, double *x, int incx, float *y, int incy)
void bl1_sccopyv (conj1_t conj, int m, float *x, int incx, scomplex *y, int incy)
void bl1_cscopyv (conj1_t conj, int m, scomplex *x, int incx, float *y, int incy)
void bl1_szcopyv (conj1_t conj, int m, float *x, int incx, dcomplex *y, int incy)
void bl1_zscopyv (conj1_t conj, int m, dcomplex *x, int incx, float *y, int incy)
void bl1_dccopyv (conj1_t conj, int m, double *x, int incx, scomplex *y, int incy)
void bl1_cdcopyv (conj1_t conj, int m, scomplex *x, int incx, double *y, int incy)
void bl1_dzcopyv (conj1_t conj, int m, double *x, int incx, dcomplex *y, int incy)
void bl1_zdcopyv (conj1_t conj, int m, dcomplex *x, int incx, double *y, int incy)
void bl1_czcopyv (conj1_t conj, int m, scomplex *x, int incx, dcomplex *y, int incy)
void bl1_zccopyv (conj1_t conj, int m, dcomplex *x, int incx, scomplex *y, int incy)
void bl1_scopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_dcopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_ccopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_zcopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_sscopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_sdcopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_dscopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_sccopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_cscopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_szcopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_zscopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_ddcopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_dccopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_cdcopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_dzcopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_zdcopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_cccopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_czcopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_zccopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_zzcopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_scopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_dcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_ccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_zcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_sscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_sdcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_sccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_szcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_dscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_ddcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_dccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_dzcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_cscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_cdcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_cccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_czcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_zscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_zdcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_zccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_zzcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_icopymt (trans1_t trans, int m, int n, int *a, int a_rs, int a_cs, int *b, int b_rs, int b_cs)
void bl1_scopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_dcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_ccopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_zcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_sscopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_sdcopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_dscopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_sccopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_cscopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_szcopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_zscopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_ddcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_dccopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_cdcopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_dzcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_zdcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_cccopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_czcopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_zccopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_zzcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_cdot_in (conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
void bl1_zdot_in (conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
void bl1_sdot (conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
void bl1_ddot (conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
void bl1_cdot (conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
void bl1_zdot (conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
void bl1_sdots (conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
void bl1_ddots (conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
void bl1_cdots (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
void bl1_zdots (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
void bl1_sdot2s (conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
void bl1_ddot2s (conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
void bl1_cdot2s (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
void bl1_zdot2s (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
void bl1_sfnorm (int m, int n, float *a, int a_rs, int a_cs, float *norm)
void bl1_dfnorm (int m, int n, double *a, int a_rs, int a_cs, double *norm)
void bl1_cfnorm (int m, int n, scomplex *a, int a_rs, int a_cs, float *norm)
void bl1_zfnorm (int m, int n, dcomplex *a, int a_rs, int a_cs, double *norm)
void bl1_sinvscalv (conj1_t conj, int n, float *alpha, float *x, int incx)
void bl1_dinvscalv (conj1_t conj, int n, double *alpha, double *x, int incx)
void bl1_csinvscalv (conj1_t conj, int n, float *alpha, scomplex *x, int incx)
void bl1_cinvscalv (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
void bl1_zdinvscalv (conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
void bl1_zinvscalv (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
void bl1_sinvscalm (conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
void bl1_dinvscalm (conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
void bl1_csinvscalm (conj1_t conj, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
void bl1_cinvscalm (conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
void bl1_zdinvscalm (conj1_t conj, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
void bl1_zinvscalm (conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
void bl1_snrm2 (int n, float *x, int incx, float *norm)
void bl1_dnrm2 (int n, double *x, int incx, double *norm)
void bl1_cnrm2 (int n, scomplex *x, int incx, float *norm)
void bl1_znrm2 (int n, dcomplex *x, int incx, double *norm)
void bl1_sscal (int n, float *alpha, float *x, int incx)
void bl1_dscal (int n, double *alpha, double *x, int incx)
void bl1_csscal (int n, float *alpha, scomplex *x, int incx)
void bl1_cscal (int n, scomplex *alpha, scomplex *x, int incx)
void bl1_zdscal (int n, double *alpha, dcomplex *x, int incx)
void bl1_zscal (int n, dcomplex *alpha, dcomplex *x, int incx)
void bl1_sscalv (conj1_t conj, int n, float *alpha, float *x, int incx)
void bl1_dscalv (conj1_t conj, int n, double *alpha, double *x, int incx)
void bl1_csscalv (conj1_t conj, int n, float *alpha, scomplex *x, int incx)
void bl1_cscalv (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
void bl1_zdscalv (conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
void bl1_zscalv (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
void bl1_sscalm (conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
void bl1_dscalm (conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
void bl1_csscalm (conj1_t conj, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
void bl1_cscalm (conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
void bl1_zdscalm (conj1_t conj, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
void bl1_zscalm (conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
void bl1_sscalmr (uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
void bl1_dscalmr (uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
void bl1_csscalmr (uplo1_t uplo, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
void bl1_cscalmr (uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
void bl1_zdscalmr (uplo1_t uplo, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
void bl1_zscalmr (uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
void bl1_sswap (int n, float *x, int incx, float *y, int incy)
void bl1_dswap (int n, double *x, int incx, double *y, int incy)
void bl1_cswap (int n, scomplex *x, int incx, scomplex *y, int incy)
void bl1_zswap (int n, dcomplex *x, int incx, dcomplex *y, int incy)
void bl1_sswapv (int n, float *x, int incx, float *y, int incy)
void bl1_dswapv (int n, double *x, int incx, double *y, int incy)
void bl1_cswapv (int n, scomplex *x, int incx, scomplex *y, int incy)
void bl1_zswapv (int n, dcomplex *x, int incx, dcomplex *y, int incy)
void bl1_sswapmt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_dswapmt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_cswapmt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_zswapmt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)

Function Documentation

void bl1_camax ( int  n,
scomplex x,
int  incx,
int *  index 
)

References cblas_icamax(), and F77_icamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_LU_piv_opc_var5(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    *index = cblas_icamax( n,
                           x, incx );
#else
    *index = F77_icamax( &n,
                         x, &incx ) - 1;
#endif
}
void bl1_casum ( int  n,
scomplex x,
int  incx,
float *  norm 
)

References cblas_scasum(), and F77_scasum().

Referenced by FLA_Asum_external().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    *norm = cblas_scasum( n,
                          x, incx );
#else
    *norm = F77_scasum( &n,
                        x, &incx );
#endif
}
void bl1_caxpy ( int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy 
)

References cblas_caxpy(), and F77_caxpy().

Referenced by bl1_caxpymt(), bl1_caxpysmt(), bl1_caxpysv(), and bl1_caxpyv().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_caxpy( n,
                 alpha,
                 x, incx,
                 y, incy );
#else
    F77_caxpy( &n,
               alpha,
               x, &incx,
               y, &incy );
#endif
}
void bl1_caxpymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_caxpyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by bl1_cher2k(), bl1_cherk(), and FLA_Axpyrt_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_caxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_caxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bl1_caxpymt ( trans1_t  trans,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_cgemm(), bl1_chemm(), bl1_csymm(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    scomplex* a_temp;
    int       inca_temp;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    if ( bl1_does_conj( trans ) )
    {
        conj1_t conj = bl1_proj_trans1_to_conj( trans );

        a_temp = bl1_callocv( n_elem );
        inca_temp = 1;

        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_ccopyv( conj,
                        n_elem,
                        a_begin, inca,
                        a_temp,  inca_temp );

            bl1_caxpy( n_elem,
                       alpha,
                       a_temp,  inca_temp, 
                       b_begin, incb );
        }

        bl1_cfree( a_temp );
    }
    else // if ( !bl1_does_conj( trans ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_caxpy( n_elem,
                       alpha,
                       a_begin, inca, 
                       b_begin, incb );
        }
    
    }
}
void bl1_caxpysmt ( trans1_t  trans,
int  m,
int  n,
scomplex alpha0,
scomplex alpha1,
scomplex a,
int  a_rs,
int  a_cs,
scomplex beta,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_cscal(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, scomplex::imag, and scomplex::real.

Referenced by FLA_Axpys_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    scomplex* a_temp;
    scomplex  alpha_prod;
    int       inca_temp;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
    alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    if ( bl1_does_conj( trans ) )
    {
        conj1_t conj = bl1_proj_trans1_to_conj( trans );

        a_temp = bl1_callocv( n_elem );
        inca_temp = 1;

        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_ccopyv( conj,
                        n_elem,
                        a_begin, inca,
                        a_temp,  inca_temp );

            bl1_cscal( n_elem,
                       beta,
                       b_begin, incb );

            bl1_caxpy( n_elem,
                       &alpha_prod,
                       a_temp,  inca_temp, 
                       b_begin, incb );
        }
    
        bl1_cfree( a_temp );
    }
    else // if ( !bl1_does_conj( trans ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_cscal( n_elem,
                       beta,
                       b_begin, incb );

            bl1_caxpy( n_elem,
                       &alpha_prod,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
}
void bl1_caxpysv ( int  n,
scomplex alpha0,
scomplex alpha1,
scomplex x,
int  incx,
scomplex beta,
scomplex y,
int  incy 
)

References bl1_caxpy(), bl1_cscal(), bl1_zero_dim1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), and FLA_Lyap_n_opc_var4().

{
    scomplex alpha_prod;

    // Return early if possible.
    if ( bl1_zero_dim1( n ) ) return;

    alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
    alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;

    bl1_cscal( n,
               beta,
               y, incy );

    bl1_caxpy( n,
               &alpha_prod,
               x, incx,
               y, incy );
}
void bl1_caxpyv ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy 
)

References bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_is_conj(), and bl1_zero_dim1().

Referenced by bl1_caxpymrt(), bl1_cgemv(), bl1_chemv(), bl1_ctrmvsx(), bl1_ctrsvsx(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_il_opc_var4(), FLA_Eig_gest_il_opc_var5(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_iu_opc_var4(), FLA_Eig_gest_iu_opc_var5(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nl_opc_var4(), FLA_Eig_gest_nl_opc_var5(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Eig_gest_nu_opc_var4(), FLA_Eig_gest_nu_opc_var5(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_opc_var1(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), and FLA_Tridiag_UT_l_step_opc_var3().

{
    scomplex* x_copy;
    int       incx_copy;

    // Return early if possible.
    if ( bl1_zero_dim1( n ) ) return;

    x_copy    = x;
    incx_copy = incx;
    
    if ( bl1_is_conj( conj ) )
    {
        x_copy    = bl1_callocv( n );
        incx_copy = 1;
    
        bl1_ccopyv( conj,
                    n,
                    x,      incx,
                    x_copy, incx_copy );
    }

    bl1_caxpy( n,
               alpha,
               x_copy, incx_copy,
               y,      incy );

    if ( bl1_is_conj( conj ) )
        bl1_cfree( x_copy );
}
void bl1_cccopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_ccopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_ccopyv( BLIS1_NO_CONJUGATE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_ccopyv( BLIS1_NO_CONJUGATE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
}
void bl1_cccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_ccopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_ccopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_ccopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bl1_cccopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_ccopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_ccopyv( conj,
                    n_elem,
                    a_begin, inca,
                    b_begin, incb );
    }
}
void bl1_cconjm ( int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs 
)

References bl1_is_row_storage(), bl1_is_vector(), bl1_sm1(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_cgemm(), and FLA_Conjugate().

{
    float   m1 = bl1_sm1();
    float*  a_conj;
    int     lda, inca;
    int     n_iter;
    int     n_elem;
    int     j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_conj = ( float* )( a + j*lda ) + 1;

        bl1_sscal( n_elem,
                   &m1,
                   a_conj, 2*inca );
    }
}
void bl1_cconjmr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_sm1(), bl1_sscal(), and bl1_zero_dim2().

Referenced by bl1_chemm(), bl1_ctrmm(), bl1_ctrsm(), and FLA_Conjugate_r().

{
    float   m1 = bl1_sm1();
    float*  a_conj;
    int     lda, inca;
    int     n_iter;
    int     n_elem_max;
    int     n_elem;
    int     j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bl1_is_row_storage( a_rs, a_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_toggle_uplo( uplo );
    }

    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; ++j )
        {
            n_elem = bl1_min( j + 1, n_elem_max );
            a_conj = ( float* )( a + j*lda ) + 1;
    
            bl1_sscal( n_elem,
                       &m1,
                       a_conj, 2*inca );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; ++j )
        {
            n_elem = bl1_max( 0, n_elem_max - j );
            a_conj = ( float* )( a + j*lda + j*inca ) + 1;
    
            if ( n_elem <= 0 ) break;

            bl1_sscal( n_elem,
                       &m1,
                       a_conj, 2*inca );
        }
    }
}
void bl1_cconjv ( int  m,
scomplex x,
int  incx 
)

References bl1_sm1(), and bl1_sscal().

Referenced by bl1_ccopymt(), bl1_ccopyv(), bl1_cgemv(), bl1_cswapmt(), bl1_zccopyv(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), and FLA_Househ2_UT_r_opc().

{
    float  m1        = bl1_sm1();
    float* x_conj    = ( float* ) x + 1;
    int    incx_conj = 2 * incx;

    bl1_sscal( m,
               &m1,
               x_conj, incx_conj );
}
void bl1_ccopy ( int  m,
scomplex x,
int  incx,
scomplex y,
int  incy 
)

References cblas_ccopy(), and F77_ccopy().

Referenced by bl1_ccopymr(), bl1_ccopymt(), bl1_ccopyv(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_ccopy( m,
                 x, incx, 
                 y, incy );
#else
    F77_ccopy( &m,
               x, &incx, 
               y, &incy );
#endif
}
void bl1_ccopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_ccopy(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by bl1_ccreate_contigmr(), bl1_cfree_saved_contigmr(), and FLA_Copyr_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if A and B are both row-major, then let's access the
    // matrices by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) && bl1_is_row_storage( a_rs, a_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_ccopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_ccopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
}
void bl1_ccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_ccopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by bl1_chemm(), bl1_ctrmm(), bl1_ctrsm(), FLA_Copyrt_external(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), and FLA_Lyap_n_opc_var4().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_ccopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_ccopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bl1_ccopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_cconjv(), bl1_ccopy(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_ccreate_contigm(), bl1_ccreate_contigmt(), bl1_cfree_saved_contigm(), bl1_cfree_saved_contigmsr(), bl1_cgemm(), bl1_chemm(), bl1_cher2k(), bl1_csymm(), bl1_csyr2k(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Copy_external(), and FLA_Copyt_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bl1_ccopy( n_elem,
                   a_begin, inca, 
                   b_begin, incb );

        if ( bl1_does_conj( trans ) )
            bl1_cconjv( n_elem,
                        b_begin, incb );
    }
}
void bl1_ccopyv ( conj1_t  conj,
int  m,
scomplex x,
int  incx,
scomplex y,
int  incy 
)
void bl1_cdcopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_cdcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

{
    scomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_cdcopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_cdcopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bl1_cdcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_cdcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    scomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_cdcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_cdcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bl1_cdcopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_cdcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    scomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_cdcopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bl1_cdcopyv ( conj1_t  conj,
int  m,
scomplex x,
int  incx,
double *  y,
int  incy 
)

References bl1_zero_dim1(), and scomplex::real.

Referenced by bl1_cdcopymr(), bl1_cdcopymrt(), and bl1_cdcopymt().

{
    scomplex* chi;
    double*   psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = chi->real;

        chi += incx;
        psi += incy;
    }
}
void bl1_cdot ( conj1_t  conj,
int  n,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex rho 
)
void bl1_cdot2s ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex beta,
scomplex rho 
)

References bl1_cdot(), scomplex::imag, and scomplex::real.

Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), and FLA_Lyap_n_opc_var3().

{
    scomplex dotxy;
    scomplex dotyx;
    scomplex alpha_d    = *alpha;
    scomplex alphac_d   = *alpha;
    scomplex beta_d     = *beta;
    scomplex rho_d      = *rho;

    alphac_d.imag *= -1.0F;

    bl1_cdot( conj,
              n,
              x, incx,
              y, incy,
              &dotxy );

    bl1_cdot( conj,
              n,
              y, incy,
              x, incx,
              &dotyx );

    rho->real = beta_d.real   * rho_d.real - beta_d.imag   * rho_d.imag +
                alpha_d.real  * dotxy.real - alpha_d.imag  * dotxy.imag +
                alphac_d.real * dotyx.real - alphac_d.imag * dotyx.imag; 
    rho->imag = beta_d.real   * rho_d.imag + beta_d.imag   * rho_d.real +
                alpha_d.real  * dotxy.imag + alpha_d.imag  * dotxy.real +
                alphac_d.real * dotyx.imag + alphac_d.imag * dotyx.real; 
}
void bl1_cdot_in ( conj1_t  conj,
int  n,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex rho 
)

References bl1_is_conj(), scomplex::imag, and scomplex::real.

Referenced by bl1_cdot().

{
    scomplex* xip;
    scomplex* yip;
    scomplex  xi;
    scomplex  yi;
    scomplex  rho_temp;
    int       i;

    rho_temp.real = 0.0F;
    rho_temp.imag = 0.0F;
        
    xip = x;
    yip = y;
        
    if ( bl1_is_conj( conj ) )
    {
        for ( i = 0; i < n; ++i )
        {
            xi.real = xip->real;
            xi.imag = xip->imag;
            yi.real = yip->real;
            yi.imag = yip->imag;
            
            rho_temp.real += xi.real * yi.real - -xi.imag * yi.imag;
            rho_temp.imag += xi.real * yi.imag + -xi.imag * yi.real;

            xip += incx;
            yip += incy;
        }
    }
    else // if ( !bl1_is_conj( conj ) )
    {
        for ( i = 0; i < n; ++i )
        {
            xi.real = xip->real;
            xi.imag = xip->imag;
            yi.real = yip->real;
            yi.imag = yip->imag;
            
            rho_temp.real += xi.real * yi.real - xi.imag * yi.imag;
            rho_temp.imag += xi.real * yi.imag + xi.imag * yi.real;

            xip += incx;
            yip += incy;
        }
    }
    
    rho->real = rho_temp.real;
    rho->imag = rho_temp.imag;
}
void bl1_cdots ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex beta,
scomplex rho 
)
void bl1_cfnorm ( int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  norm 
)

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, scomplex::imag, and scomplex::real.

Referenced by FLA_Norm_frob().

{
    scomplex* a_ij;
    float     sum;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       i, j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A is a vector separately.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        
        // An optimization: if A is row-major, then let's access the matrix by
        // rows instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    // Initialize the accumulator variable.
    sum = 0.0F;

    for ( j = 0; j < n_iter; j++ )
    {
        for ( i = 0; i < n_elem; i++ )
        {
            a_ij = a + i*inca + j*lda;
            sum += a_ij->real * a_ij->real + a_ij->imag * a_ij->imag;
        }
    }
    
    // Compute the norm and store the result.
    *norm = ( float ) sqrt( sum );
}
void bl1_cinvscalm ( conj1_t  conj,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs 
)

References bl1_cinvert2s(), bl1_cscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

{
    scomplex  alpha_inv;
    scomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_ceq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    bl1_cinvert2s( conj, alpha, &alpha_inv );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bl1_cscal( n_elem,
                   &alpha_inv,
                   a_begin, inca );
    }
}
void bl1_cinvscalv ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx 
)
void bl1_cnrm2 ( int  n,
scomplex x,
int  incx,
float *  norm 
)

References cblas_scnrm2(), and F77_scnrm2().

Referenced by FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_l_opc(), FLA_Househ3UD_UT_opc(), and FLA_Nrm2_external().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    *norm = cblas_scnrm2( n,
                          x, incx );
#else
    *norm = F77_scnrm2( &n,
                        x, &incx );
#endif
}
void bl1_cscal ( int  n,
scomplex alpha,
scomplex x,
int  incx 
)

References cblas_cscal(), and F77_cscal().

Referenced by bl1_caxpysmt(), bl1_caxpysv(), bl1_cinvscalm(), bl1_cinvscalv(), bl1_cscalm(), bl1_cscalmr(), bl1_cscalv(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_cscal( n,
                 alpha,
                 x, incx );
#else
    F77_cscal( &n,
               alpha,
               x, &incx );
#endif
}
void bl1_cscalm ( conj1_t  conj,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs 
)

References bl1_cscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_cgemm(), bl1_chemm(), bl1_csymm(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), FLA_Lyap_n_opc_var4(), FLA_Scal_external(), and FLA_Scalc_external().

{
    scomplex  alpha_conj;
    scomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_ceq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    bl1_ccopys( conj, alpha, &alpha_conj );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bl1_cscal( n_elem,
                   &alpha_conj,
                   a_begin, inca );
    }
}
void bl1_cscalmr ( uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs 
)

References bl1_cscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by FLA_Scalr_external().

{
    scomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_ceq1( alpha ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bl1_is_row_storage( a_rs, a_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_toggle_uplo( uplo );
    }
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;

            bl1_cscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;

            if ( n_elem <= 0 ) break;

            bl1_cscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
}
void bl1_cscalv ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx 
)
void bl1_cscopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_cscopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

{
    scomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_cscopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_cscopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bl1_cscopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_cscopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    scomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_cscopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_cscopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bl1_cscopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_cscopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    scomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_cscopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bl1_cscopyv ( conj1_t  conj,
int  m,
scomplex x,
int  incx,
float *  y,
int  incy 
)

References bl1_zero_dim1(), and scomplex::real.

Referenced by bl1_cscopymr(), bl1_cscopymrt(), and bl1_cscopymt().

{
    scomplex* chi;
    float*    psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = chi->real;

        chi += incx;
        psi += incy;
    }
}
void bl1_csinvscalm ( conj1_t  conj,
int  m,
int  n,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs 
)

References bl1_csscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_sinvert2s(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

{
    float     alpha_inv;
    scomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_seq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    bl1_sinvert2s( conj, alpha, &alpha_inv );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bl1_csscal( n_elem,
                    &alpha_inv,
                    a_begin, inca );
    }
}
void bl1_csinvscalv ( conj1_t  conj,
int  n,
float *  alpha,
scomplex x,
int  incx 
)

References bl1_csscal().

{
    float alpha_inv;

    if ( bl1_seq1( alpha ) ) return;

    alpha_inv = 1.0F / *alpha;

    bl1_csscal( n,
                &alpha_inv,
                x, incx );
}
void bl1_csscal ( int  n,
float *  alpha,
scomplex x,
int  incx 
)

References cblas_csscal(), and F77_csscal().

Referenced by bl1_csinvscalm(), bl1_csinvscalv(), bl1_csscalm(), bl1_csscalmr(), and bl1_csscalv().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_csscal( n,
                  *alpha,
                  x, incx );
#else
    F77_csscal( &n,
                alpha,
                x, &incx );
#endif
}
void bl1_csscalm ( conj1_t  conj,
int  m,
int  n,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs 
)

References bl1_csscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Scal_external(), and FLA_Scalc_external().

{
    float     alpha_conj;
    scomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_seq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    bl1_scopys( conj, alpha, &alpha_conj );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bl1_csscal( n_elem,
                    &alpha_conj,
                    a_begin, inca );
    }
}
void bl1_csscalmr ( uplo1_t  uplo,
int  m,
int  n,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs 
)

References bl1_csscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by bl1_cher2k(), bl1_cherk(), and FLA_Scalr_external().

{
    scomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_seq1( alpha ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bl1_is_row_storage( a_rs, a_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_toggle_uplo( uplo );
    }
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;

            bl1_csscal( n_elem,
                        alpha,
                        a_begin, inca );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;

            if ( n_elem <= 0 ) break;

            bl1_csscal( n_elem,
                        alpha,
                        a_begin, inca );
        }
    }
}
void bl1_csscalv ( conj1_t  conj,
int  n,
float *  alpha,
scomplex x,
int  incx 
)

References bl1_csscal(), and bl1_zero_dim1().

Referenced by bl1_csapdiagmv(), FLA_Bsvd_ext_opc_var1(), and FLA_Bsvd_v_opc_var1().

{
    // Return early if possible.
    if ( bl1_zero_dim1( n ) ) return;
    if ( bl1_seq1( alpha ) ) return;

    bl1_csscal( n,
                alpha,
                x, incx );
}
void bl1_cswap ( int  n,
scomplex x,
int  incx,
scomplex y,
int  incy 
)

References cblas_cswap(), and F77_cswap().

Referenced by bl1_cswapmt(), bl1_cswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_cswap( n,
                 x, incx, 
                 y, incy );
#else
    F77_cswap( &n,
               x, &incx, 
               y, &incy );
#endif
}
void bl1_cswapmt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_cconjv(), bl1_cswap(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bl1_cswap( n_elem,
                   a_begin, inca, 
                   b_begin, incb );

        if ( bl1_does_conj( trans ) )
            bl1_cconjv( n_elem,
                        a_begin, inca );

        if ( bl1_does_conj( trans ) )
            bl1_cconjv( n_elem,
                        b_begin, incb );
    }
}
void bl1_cswapv ( int  n,
scomplex x,
int  incx,
scomplex y,
int  incy 
)

References bl1_cswap(), and bl1_zero_dim1().

Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_bsvd_ext_b_opc(), and FLA_Sort_bsvd_ext_f_opc().

{
    // Return early if possible.
    if ( bl1_zero_dim1( n ) ) return;

    bl1_cswap( n,
               x, incx, 
               y, incy );
}
void bl1_czcopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_czcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

{
    scomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_czcopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_czcopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bl1_czcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_czcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    scomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_czcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_czcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bl1_czcopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_czcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    scomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_czcopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bl1_czcopyv ( conj1_t  conj,
int  m,
scomplex x,
int  incx,
dcomplex y,
int  incy 
)

References bl1_is_conj(), bl1_zconjv(), bl1_zero_dim1(), scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.

Referenced by bl1_czcopymr(), bl1_czcopymrt(), and bl1_czcopymt().

{
    scomplex* chi;
    dcomplex* psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        psi->real = chi->real;
        psi->imag = chi->imag;

        chi += incx;
        psi += incy;
    }

    if ( bl1_is_conj( conj ) )
        bl1_zconjv( m,
                    y, incy );
}
void bl1_damax ( int  n,
double *  x,
int  incx,
int *  index 
)

References cblas_idamax(), and F77_idamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_LU_piv_opd_var5(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    *index = cblas_idamax( n,
                           x, incx );
#else
    *index = F77_idamax( &n,
                         x, &incx ) - 1;
#endif
}
void bl1_dasum ( int  n,
double *  x,
int  incx,
double *  norm 
)

References cblas_dasum(), and F77_dasum().

Referenced by FLA_Asum_external().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    *norm = cblas_dasum( n,
                         x, incx );
#else
    *norm = F77_dasum( &n,
                       x, &incx );
#endif
}
void bl1_daxpy ( int  n,
double *  alpha,
double *  x,
int  incx,
double *  y,
int  incy 
)

References cblas_daxpy(), and F77_daxpy().

Referenced by bl1_daxpymt(), bl1_daxpysmt(), bl1_daxpysv(), and bl1_daxpyv().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_daxpy( n,
                 *alpha,
                 x, incx,
                 y, incy );
#else
    F77_daxpy( &n,
               alpha,
               x, &incx,
               y, &incy );
#endif
}
void bl1_daxpymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_daxpyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Axpyrt_external().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_daxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_daxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bl1_daxpymt ( trans1_t  trans,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_daxpy(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dgemm(), bl1_dsymm(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_daxpy( n_elem,
                   alpha,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bl1_daxpysmt ( trans1_t  trans,
int  m,
int  n,
double *  alpha0,
double *  alpha1,
double *  a,
int  a_rs,
int  a_cs,
double *  beta,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_daxpy(), bl1_does_notrans(), bl1_does_trans(), bl1_dscal(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Axpys_external().

{
    double*   a_begin;
    double*   b_begin;
    double    alpha_prod;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    alpha_prod = (*alpha0) * (*alpha1);

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_dscal( n_elem,
                   beta,
                   b_begin, incb );

        bl1_daxpy( n_elem,
                   &alpha_prod,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bl1_daxpysv ( int  n,
double *  alpha0,
double *  alpha1,
double *  x,
int  incx,
double *  beta,
double *  y,
int  incy 
)

References bl1_daxpy(), bl1_dscal(), and bl1_zero_dim1().

Referenced by FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), and FLA_Lyap_n_opd_var4().

{
    double   alpha_prod;

    // Return early if possible.
    if ( bl1_zero_dim1( n ) ) return;

    alpha_prod = (*alpha0) * (*alpha1);

    bl1_dscal( n,
               beta,
               y, incy );

    bl1_daxpy( n,
               &alpha_prod,
               x, incx,
               y, incy );
}
void bl1_daxpyv ( conj1_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx,
double *  y,
int  incy 
)
void bl1_dccopymr ( uplo1_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_dccopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

{
    double*   a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_dccopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_dccopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bl1_dccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_dccopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    double*   a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_dccopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_dccopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bl1_dccopymt ( trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_dccopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    double*   a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_dccopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bl1_dccopyv ( conj1_t  conj,
int  m,
double *  x,
int  incx,
scomplex y,
int  incy 
)

References bl1_zero_dim1(), scomplex::imag, and scomplex::real.

Referenced by bl1_dccopymr(), bl1_dccopymrt(), and bl1_dccopymt().

{
    double*   chi;
    scomplex* psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        psi->real = *chi;
        psi->imag = 0.0F;

        chi += incx;
        psi += incy;
    }
}
void bl1_dconjm ( int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs 
)
{
    return;
}
void bl1_dconjmr ( uplo1_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs 
)
{
    return;
}
void bl1_dconjv ( int  m,
double *  x,
int  incx 
)
void bl1_dcopy ( int  m,
double *  x,
int  incx,
double *  y,
int  incy 
)

References cblas_dcopy(), and F77_dcopy().

Referenced by bl1_dcopymr(), bl1_dcopymt(), bl1_dcopyv(), FLA_Obj_extract_imag_part(), FLA_Obj_extract_real_part(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_dcopy( m,
                 x, incx, 
                 y, incy );
#else
    F77_dcopy( &m,
               x, &incx, 
               y, &incy );
#endif
}
void bl1_dcopymr ( uplo1_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_dcopy(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by bl1_dcreate_contigmr(), bl1_dfree_saved_contigmr(), and FLA_Copyr_external().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if A and B are both row-major, then let's access the
    // matrices by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) && bl1_is_row_storage( a_rs, a_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_dcopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_dcopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
}
void bl1_dcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_dcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), and FLA_Lyap_n_opd_var4().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_dcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_dcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bl1_dcopymt ( trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_dcopy(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dcreate_contigm(), bl1_dcreate_contigmt(), bl1_dfree_saved_contigm(), bl1_dfree_saved_contigmsr(), bl1_dsymm(), bl1_dsyr2k(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Bsvd_v_opd_var2(), FLA_Copy_external(), FLA_Copyt_external(), and FLA_Tevd_v_opd_var2().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bl1_dcopy( n_elem,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bl1_dcopyv ( conj1_t  conj,
int  m,
double *  x,
int  incx,
double *  y,
int  incy 
)
void bl1_ddcopymr ( uplo1_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_dcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_dcopyv( BLIS1_NO_CONJUGATE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_dcopyv( BLIS1_NO_CONJUGATE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
}
void bl1_ddcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_dcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_dcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_dcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bl1_ddcopymt ( trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_dcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_dcopyv( conj,
                    n_elem,
                    a_begin, inca,
                    b_begin, incb );
    }
}
void bl1_ddot ( conj1_t  conj,
int  n,
double *  x,
int  incx,
double *  y,
int  incy,
double *  rho 
)
void bl1_ddot2s ( conj1_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx,
double *  y,
int  incy,
double *  beta,
double *  rho 
)
void bl1_ddots ( conj1_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx,
double *  y,
int  incy,
double *  beta,
double *  rho 
)
void bl1_dfnorm ( int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  norm 
)

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Norm_frob().

{
    double*   a_ij;
    double    sum;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       i, j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A is a vector separately.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        
        // An optimization: if A is row-major, then let's access the matrix by
        // rows instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    // Initialize the accumulator variable.
    sum = 0.0;

    for ( j = 0; j < n_iter; j++ )
    {
        for ( i = 0; i < n_elem; i++ )
        {
            a_ij = a + i*inca + j*lda;
            sum += (*a_ij) * (*a_ij);
        }
    }
    
    // Compute the norm and store the result.
    *norm = sqrt( sum );
}
void bl1_dinvscalm ( conj1_t  conj,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs 
)

References bl1_dinvert2s(), bl1_dscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

{
    double    alpha_inv;
    double*   a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_deq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    bl1_dinvert2s( conj, alpha, &alpha_inv );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bl1_dscal( n_elem,
                   &alpha_inv,
                   a_begin, inca );
    }
}
void bl1_dinvscalv ( conj1_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx 
)
void bl1_dnrm2 ( int  n,
double *  x,
int  incx,
double *  norm 
)

References cblas_dnrm2(), and F77_dnrm2().

Referenced by FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_l_opd(), FLA_Househ3UD_UT_opd(), and FLA_Nrm2_external().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    *norm = cblas_dnrm2( n,
                         x, incx );
#else
    *norm = F77_dnrm2( &n,
                       x, &incx );
#endif
}
void bl1_dscal ( int  n,
double *  alpha,
double *  x,
int  incx 
)

References cblas_dscal(), and F77_dscal().

Referenced by bl1_daxpysmt(), bl1_daxpysv(), bl1_dinvscalm(), bl1_dinvscalv(), bl1_dscalm(), bl1_dscalmr(), bl1_dscalv(), bl1_zconjm(), bl1_zconjmr(), bl1_zconjv(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_dscal( n,
                 *alpha,
                 x, incx );
#else
    F77_dscal( &n,
               alpha,
               x, &incx );
#endif
}
void bl1_dscalm ( conj1_t  conj,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs 
)

References bl1_dscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dgemm(), bl1_dsymm(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), FLA_Lyap_n_opd_var4(), FLA_Scal_external(), and FLA_Scalc_external().

{
    double    alpha_conj;
    double*   a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_deq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    bl1_dcopys( conj, alpha, &alpha_conj );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bl1_dscal( n_elem,
                   &alpha_conj,
                   a_begin, inca );
    }
}
void bl1_dscalmr ( uplo1_t  uplo,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs 
)

References bl1_dscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by FLA_Scalr_external().

{
    double*   a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_deq1( alpha ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bl1_is_row_storage( a_rs, a_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_toggle_uplo( uplo );
    }
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;

            bl1_dscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;

            if ( n_elem <= 0 ) break;

            bl1_dscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
}
void bl1_dscalv ( conj1_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx 
)
void bl1_dscopymr ( uplo1_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_dscopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

{
    double*   a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_dscopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_dscopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bl1_dscopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_dscopyv(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    double*   a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_dscopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_dscopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bl1_dscopymt ( trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_dscopyv(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    double*   a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_dscopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bl1_dscopyv ( conj1_t  conj,
int  m,
double *  x,
int  incx,
float *  y,
int  incy 
)

References bl1_zero_dim1().

Referenced by bl1_dscopymr(), bl1_dscopymrt(), and bl1_dscopymt().

{
    double*   chi;
    float*    psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = *chi;

        chi += incx;
        psi += incy;
    }
}
void bl1_dswap ( int  n,
double *  x,
int  incx,
double *  y,
int  incy 
)

References cblas_dswap(), and F77_dswap().

Referenced by bl1_dswapmt(), bl1_dswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_dswap( n,
                 x, incx, 
                 y, incy );
#else
    F77_dswap( &n,
               x, &incx, 
               y, &incy );
#endif
}
void bl1_dswapmt ( trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_notrans(), bl1_does_trans(), bl1_dswap(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bl1_dswap( n_elem,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bl1_dswapv ( int  n,
double *  x,
int  incx,
double *  y,
int  incy 
)
void bl1_dzcopymr ( uplo1_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_dzcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

{
    double*   a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_dzcopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_dzcopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bl1_dzcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_dzcopyv(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    double*   a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_dzcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_dzcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bl1_dzcopymt ( trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_dzcopyv(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    double*   a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_dzcopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bl1_dzcopyv ( conj1_t  conj,
int  m,
double *  x,
int  incx,
dcomplex y,
int  incy 
)

References bl1_zero_dim1(), dcomplex::imag, and dcomplex::real.

Referenced by bl1_dzcopymr(), bl1_dzcopymrt(), and bl1_dzcopymt().

{
    double*   chi;
    dcomplex* psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        psi->real = *chi;
        psi->imag = 0.0;

        chi += incx;
        psi += incy;
    }
}
void bl1_icopymt ( trans1_t  trans,
int  m,
int  n,
int *  a,
int  a_rs,
int  a_cs,
int *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_notrans(), bl1_does_trans(), bl1_icopyv(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    int*      a_begin;
    int*      b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bl1_icopyv( bl1_proj_trans1_to_conj( trans ),
                    n_elem,
                    a_begin, inca, 
                    b_begin, incb );
    }
}
void bl1_icopyv ( conj1_t  conj,
int  m,
int *  x,
int  incx,
int *  y,
int  incy 
)

References bl1_zero_dim1().

Referenced by bl1_icopymt().

{
    int*      chi;
    int*      psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = *chi;

        chi += incx;
        psi += incy;
    }
}
void bl1_samax ( int  n,
float *  x,
int  incx,
int *  index 
)

References cblas_isamax(), and F77_isamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_LU_piv_ops_var5(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    *index = cblas_isamax( n,
                           x, incx );
#else
    *index = F77_isamax( &n,
                         x, &incx ) - 1;
#endif
}
void bl1_sasum ( int  n,
float *  x,
int  incx,
float *  norm 
)

References cblas_sasum(), and F77_sasum().

Referenced by FLA_Asum_external().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    *norm = cblas_sasum( n,
                         x, incx );
#else
    *norm = F77_sasum( &n,
                       x, &incx );
#endif
}
void bl1_saxpy ( int  n,
float *  alpha,
float *  x,
int  incx,
float *  y,
int  incy 
)

References cblas_saxpy(), and F77_saxpy().

Referenced by bl1_saxpymt(), bl1_saxpysmt(), bl1_saxpysv(), and bl1_saxpyv().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_saxpy( n,
                 *alpha,
                 x, incx,
                 y, incy );
#else
    F77_saxpy( &n,
               alpha,
               x, &incx,
               y, &incy );
#endif
}
void bl1_saxpymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_saxpyv(), and bl1_zero_dim2().

Referenced by FLA_Axpyrt_external().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_saxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_saxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bl1_saxpymt ( trans1_t  trans,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_saxpy(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_sgemm(), bl1_ssymm(), bl1_strmmsx(), bl1_strsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_saxpy( n_elem,
                   alpha,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bl1_saxpysmt ( trans1_t  trans,
int  m,
int  n,
float *  alpha0,
float *  alpha1,
float *  a,
int  a_rs,
int  a_cs,
float *  beta,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_saxpy(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Axpys_external().

{
    float*    a_begin;
    float*    b_begin;
    float     alpha_prod;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    alpha_prod = (*alpha0) * (*alpha1);

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_sscal( n_elem,
                   beta,
                   b_begin, incb );

        bl1_saxpy( n_elem,
                   &alpha_prod,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bl1_saxpysv ( int  n,
float *  alpha0,
float *  alpha1,
float *  x,
int  incx,
float *  beta,
float *  y,
int  incy 
)

References bl1_saxpy(), bl1_sscal(), and bl1_zero_dim1().

Referenced by FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), and FLA_Lyap_n_ops_var4().

{
    float    alpha_prod;

    // Return early if possible.
    if ( bl1_zero_dim1( n ) ) return;

    alpha_prod = (*alpha0) * (*alpha1);

    bl1_sscal( n,
               beta,
               y, incy );

    bl1_saxpy( n,
               &alpha_prod,
               x, incx,
               y, incy );
}
void bl1_saxpyv ( conj1_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx,
float *  y,
int  incy 
)

References bl1_saxpy().

Referenced by bl1_saxpymrt(), bl1_strmvsx(), bl1_strsvsx(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Apply_HUD_UT_l_ops_var1(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Eig_gest_il_ops_var1(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_il_ops_var4(), FLA_Eig_gest_il_ops_var5(), FLA_Eig_gest_iu_ops_var1(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_iu_ops_var4(), FLA_Eig_gest_iu_ops_var5(), FLA_Eig_gest_nl_ops_var1(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nl_ops_var4(), FLA_Eig_gest_nl_ops_var5(), FLA_Eig_gest_nu_ops_var1(), FLA_Eig_gest_nu_ops_var2(), FLA_Eig_gest_nu_ops_var4(), FLA_Eig_gest_nu_ops_var5(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_ops_var2(), and FLA_Tridiag_UT_l_step_ops_var3().

{
    bl1_saxpy( n,
               alpha,
               x, incx,
               y, incy );
}
void bl1_sccopymr ( uplo1_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_sccopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

{
    float*    a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_sccopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_sccopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bl1_sccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_sccopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    float*    a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_sccopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_sccopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bl1_sccopymt ( trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_sccopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    float*    a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_sccopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bl1_sccopyv ( conj1_t  conj,
int  m,
float *  x,
int  incx,
scomplex y,
int  incy 
)

References bl1_zero_dim1(), scomplex::imag, and scomplex::real.

Referenced by bl1_sccopymr(), bl1_sccopymrt(), and bl1_sccopymt().

{
    float*    chi;
    scomplex* psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        psi->real = *chi;
        psi->imag = 0.0F;

        chi += incx;
        psi += incy;
    }
}
void bl1_sconjm ( int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs 
)
{
    return;
}
void bl1_sconjmr ( uplo1_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs 
)
{
    return;
}
void bl1_sconjv ( int  m,
float *  x,
int  incx 
)
void bl1_scopy ( int  m,
float *  x,
int  incx,
float *  y,
int  incy 
)

References cblas_scopy(), and F77_scopy().

Referenced by bl1_scopymr(), bl1_scopymt(), bl1_scopyv(), FLA_Obj_extract_imag_part(), FLA_Obj_extract_real_part(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_scopy( m,
                 x, incx, 
                 y, incy );
#else
    F77_scopy( &m,
               x, &incx, 
               y, &incy );
#endif
}
void bl1_scopymr ( uplo1_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_scopy(), and bl1_zero_dim2().

Referenced by bl1_screate_contigmr(), bl1_sfree_saved_contigmr(), and FLA_Copyr_external().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if A and B are both row-major, then let's access the
    // matrices by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) && bl1_is_row_storage( a_rs, a_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_scopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_scopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
}
void bl1_scopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_scopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), and FLA_Lyap_n_ops_var4().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_scopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_scopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bl1_scopymt ( trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_scopy(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_screate_contigm(), bl1_screate_contigmt(), bl1_sfree_saved_contigm(), bl1_sfree_saved_contigmsr(), bl1_ssymm(), bl1_ssyr2k(), bl1_strmmsx(), bl1_strsmsx(), FLA_Copy_external(), and FLA_Copyt_external().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bl1_scopy( n_elem,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bl1_scopyv ( conj1_t  conj,
int  m,
float *  x,
int  incx,
float *  y,
int  incy 
)
void bl1_sdcopymr ( uplo1_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_sdcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

{
    float*    a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_sdcopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_sdcopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bl1_sdcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_sdcopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    float*    a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_sdcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_sdcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bl1_sdcopymt ( trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_sdcopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    float*    a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_sdcopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bl1_sdcopyv ( conj1_t  conj,
int  m,
float *  x,
int  incx,
double *  y,
int  incy 
)

References bl1_zero_dim1().

Referenced by bl1_sdcopymr(), bl1_sdcopymrt(), and bl1_sdcopymt().

{
    float*    chi;
    double*   psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = *chi;

        chi += incx;
        psi += incy;
    }
}
void bl1_sdot ( conj1_t  conj,
int  n,
float *  x,
int  incx,
float *  y,
int  incy,
float *  rho 
)
void bl1_sdot2s ( conj1_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx,
float *  y,
int  incy,
float *  beta,
float *  rho 
)
void bl1_sdots ( conj1_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx,
float *  y,
int  incy,
float *  beta,
float *  rho 
)
void bl1_sfnorm ( int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  norm 
)

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Norm_frob().

{
    float*    a_ij;
    float     sum;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       i, j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A is a vector separately.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        
        // An optimization: if A is row-major, then let's access the matrix by
        // rows instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    // Initialize the accumulator variable.
    sum = 0.0F;

    for ( j = 0; j < n_iter; j++ )
    {
        for ( i = 0; i < n_elem; i++ )
        {
            a_ij = a + i*inca + j*lda;
            sum += (*a_ij) * (*a_ij);
        }
    }
    
    // Compute the norm and store the result.
    *norm = ( float ) sqrt( sum );
}
void bl1_sinvscalm ( conj1_t  conj,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs 
)

References bl1_is_row_storage(), bl1_is_vector(), bl1_sinvert2s(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

{
    float     alpha_inv;
    float*    a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_seq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    bl1_sinvert2s( conj, alpha, &alpha_inv );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bl1_sscal( n_elem,
                   &alpha_inv,
                   a_begin, inca );
    }
}
void bl1_sinvscalv ( conj1_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx 
)
void bl1_snrm2 ( int  n,
float *  x,
int  incx,
float *  norm 
)

References cblas_snrm2(), and F77_snrm2().

Referenced by FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_l_ops(), FLA_Househ3UD_UT_ops(), and FLA_Nrm2_external().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    *norm = cblas_snrm2( n,
                         x, incx );
#else
    *norm = F77_snrm2( &n,
                       x, &incx );
#endif
}
void bl1_sscal ( int  n,
float *  alpha,
float *  x,
int  incx 
)

References cblas_sscal(), and F77_sscal().

Referenced by bl1_cconjm(), bl1_cconjmr(), bl1_cconjv(), bl1_saxpysmt(), bl1_saxpysv(), bl1_sinvscalm(), bl1_sinvscalv(), bl1_sscalm(), bl1_sscalmr(), bl1_sscalv(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_sscal( n,
                 *alpha,
                 x, incx );
#else
    F77_sscal( &n,
               alpha,
               x, &incx );
#endif
}
void bl1_sscalm ( conj1_t  conj,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs 
)

References bl1_is_row_storage(), bl1_is_vector(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_sgemm(), bl1_ssymm(), bl1_strmmsx(), bl1_strsmsx(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), FLA_Lyap_n_ops_var4(), FLA_Scal_external(), and FLA_Scalc_external().

{
    float     alpha_conj;
    float*    a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_seq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    bl1_scopys( conj, alpha, &alpha_conj );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bl1_sscal( n_elem,
                   &alpha_conj,
                   a_begin, inca );
    }
}
void bl1_sscalmr ( uplo1_t  uplo,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_sscal(), and bl1_zero_dim2().

Referenced by FLA_Scalr_external().

{
    float*    a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_seq1( alpha ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bl1_is_row_storage( a_rs, a_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_toggle_uplo( uplo );
    }
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;

            bl1_sscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;

            if ( n_elem <= 0 ) break;

            bl1_sscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
}
void bl1_sscalv ( conj1_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx 
)
void bl1_sscopymr ( uplo1_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_scopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_scopyv( BLIS1_NO_CONJUGATE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_scopyv( BLIS1_NO_CONJUGATE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
}
void bl1_sscopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_scopyv(), and bl1_zero_dim2().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_scopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_scopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bl1_sscopymt ( trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_scopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_scopyv( conj,
                    n_elem,
                    a_begin, inca,
                    b_begin, incb );
    }
}
void bl1_sswap ( int  n,
float *  x,
int  incx,
float *  y,
int  incy 
)

References cblas_sswap(), and F77_sswap().

Referenced by bl1_sswapmt(), bl1_sswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_sswap( n,
                 x, incx, 
                 y, incy );
#else
    F77_sswap( &n,
               x, &incx, 
               y, &incy );
#endif
}
void bl1_sswapmt ( trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_sswap(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bl1_sswap( n_elem,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bl1_sswapv ( int  n,
float *  x,
int  incx,
float *  y,
int  incy 
)

References bl1_sswap(), and bl1_zero_dim1().

Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_bsvd_ext_b_ops(), and FLA_Sort_bsvd_ext_f_ops().

{
    // Return early if possible.
    if ( bl1_zero_dim1( n ) ) return;

    bl1_sswap( n,
               x, incx, 
               y, incy );
}
void bl1_szcopymr ( uplo1_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_szcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

{
    float*    a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_szcopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_szcopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bl1_szcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_szcopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    float*    a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_szcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_szcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bl1_szcopymt ( trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_szcopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    float*    a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_szcopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bl1_szcopyv ( conj1_t  conj,
int  m,
float *  x,
int  incx,
dcomplex y,
int  incy 
)

References bl1_zero_dim1(), dcomplex::imag, and dcomplex::real.

Referenced by bl1_szcopymr(), bl1_szcopymrt(), and bl1_szcopymt().

{
    float*    chi;
    dcomplex* psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        psi->real = *chi;
        psi->imag = 0.0;

        chi += incx;
        psi += incy;
    }
}
void bl1_zamax ( int  n,
dcomplex x,
int  incx,
int *  index 
)

References cblas_izamax(), and F77_izamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_LU_piv_opz_var5(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    *index = cblas_izamax( n,
                           x, incx );
#else
    *index = F77_izamax( &n,
                         x, &incx ) - 1;
#endif
}
void bl1_zasum ( int  n,
dcomplex x,
int  incx,
double *  norm 
)

References cblas_dzasum(), and F77_dzasum().

Referenced by FLA_Asum_external().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    *norm = cblas_dzasum( n,
                          x, incx );
#else
    *norm = F77_dzasum( &n,
                        x, &incx );
#endif
}
void bl1_zaxpy ( int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)

References cblas_zaxpy(), and F77_zaxpy().

Referenced by bl1_zaxpymt(), bl1_zaxpysmt(), bl1_zaxpysv(), and bl1_zaxpyv().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_zaxpy( n,
                 alpha,
                 x, incx,
                 y, incy );
#else
    F77_zaxpy( &n,
               alpha,
               x, &incx,
               y, &incy );
#endif
}
void bl1_zaxpymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zaxpyv(), and bl1_zero_dim2().

Referenced by bl1_zher2k(), bl1_zherk(), and FLA_Axpyrt_external().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_zaxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_zaxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bl1_zaxpymt ( trans1_t  trans,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim2(), bl1_zfree(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zgemm(), bl1_zhemm(), bl1_zsymm(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    dcomplex* a_temp;
    int       inca_temp;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    if ( bl1_does_conj( trans ) )
    {
        conj1_t conj = bl1_proj_trans1_to_conj( trans );

        a_temp = bl1_zallocv( n_elem );
        inca_temp = 1;

        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_zcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        a_temp,  inca_temp );

            bl1_zaxpy( n_elem,
                       alpha,
                       a_temp,  inca_temp, 
                       b_begin, incb );
        }

        bl1_zfree( a_temp );
    }
    else // if ( !bl1_does_conj( trans ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_zaxpy( n_elem,
                       alpha,
                       a_begin, inca, 
                       b_begin, incb );
        }
    
    }
}
void bl1_zaxpysmt ( trans1_t  trans,
int  m,
int  n,
dcomplex alpha0,
dcomplex alpha1,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex beta,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim2(), bl1_zfree(), bl1_zscal(), BLIS1_NO_TRANSPOSE, dcomplex::imag, and dcomplex::real.

Referenced by FLA_Axpys_external().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    dcomplex* a_temp;
    dcomplex  alpha_prod;
    int       inca_temp;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
    alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    if ( bl1_does_conj( trans ) )
    {
        conj1_t conj = bl1_proj_trans1_to_conj( trans );

        a_temp = bl1_zallocv( n_elem );
        inca_temp = 1;

        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_zcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        a_temp,  inca_temp );

            bl1_zscal( n_elem,
                       beta,
                       b_begin, incb );

            bl1_zaxpy( n_elem,
                       &alpha_prod,
                       a_temp,  inca_temp, 
                       b_begin, incb );
        }
    
        bl1_zfree( a_temp );
    }
    else // if ( !bl1_does_conj( trans ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_zscal( n_elem,
                       beta,
                       b_begin, incb );

            bl1_zaxpy( n_elem,
                       &alpha_prod,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
}
void bl1_zaxpysv ( int  n,
dcomplex alpha0,
dcomplex alpha1,
dcomplex x,
int  incx,
dcomplex beta,
dcomplex y,
int  incy 
)

References bl1_zaxpy(), bl1_zero_dim1(), bl1_zscal(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), and FLA_Lyap_n_opz_var4().

{
    dcomplex alpha_prod;

    // Return early if possible.
    if ( bl1_zero_dim1( n ) ) return;

    alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
    alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;

    bl1_zscal( n,
               beta,
               y, incy );

    bl1_zaxpy( n,
               &alpha_prod,
               x, incx,
               y, incy );
}
void bl1_zaxpyv ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)

References bl1_is_conj(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim1(), and bl1_zfree().

Referenced by bl1_zaxpymrt(), bl1_zgemv(), bl1_zhemv(), bl1_ztrmvsx(), bl1_ztrsvsx(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_il_opz_var4(), FLA_Eig_gest_il_opz_var5(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_iu_opz_var4(), FLA_Eig_gest_iu_opz_var5(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nl_opz_var4(), FLA_Eig_gest_nl_opz_var5(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Eig_gest_nu_opz_var4(), FLA_Eig_gest_nu_opz_var5(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), and FLA_Tridiag_UT_l_step_opz_var3().

{
    dcomplex* x_copy;
    int       incx_copy;

    // Return early if possible.
    if ( bl1_zero_dim1( n ) ) return;

    x_copy    = x;
    incx_copy = incx;
    
    if ( bl1_is_conj( conj ) )
    {
        x_copy    = bl1_zallocv( n );
        incx_copy = 1;
    
        bl1_zcopyv( conj,
                    n,
                    x,      incx,
                    x_copy, incx_copy );
    }

    bl1_zaxpy( n,
               alpha,
               x_copy, incx_copy,
               y,      incy );

    if ( bl1_is_conj( conj ) )
        bl1_zfree( x_copy );
}
void bl1_zccopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_zccopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

{
    dcomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_zccopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_zccopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bl1_zccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zccopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    dcomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_zccopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_zccopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bl1_zccopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zccopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    dcomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_zccopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bl1_zccopyv ( conj1_t  conj,
int  m,
dcomplex x,
int  incx,
scomplex y,
int  incy 
)

References bl1_cconjv(), bl1_is_conj(), bl1_zero_dim1(), scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.

Referenced by bl1_zccopymr(), bl1_zccopymrt(), and bl1_zccopymt().

{
    dcomplex* chi;
    scomplex* psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        psi->real = chi->real;
        psi->imag = chi->imag;

        chi += incx;
        psi += incy;
    }

    if ( bl1_is_conj( conj ) )
        bl1_cconjv( m,
                    y, incy );
}
void bl1_zconjm ( int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bl1_dm1(), bl1_dscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zgemm(), and FLA_Conjugate().

{
    double  m1 = bl1_dm1();
    double* a_conj;
    int     lda, inca;
    int     n_iter;
    int     n_elem;
    int     j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_conj = ( double* )( a + j*lda ) + 1;

        bl1_dscal( n_elem,
                   &m1,
                   a_conj, 2*inca );
    }
}
void bl1_zconjmr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bl1_dm1(), bl1_dscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by bl1_zhemm(), bl1_ztrmm(), bl1_ztrsm(), and FLA_Conjugate_r().

{
    double  m1 = bl1_dm1();
    double* a_conj;
    int     lda, inca;
    int     n_iter;
    int     n_elem_max;
    int     n_elem;
    int     j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bl1_is_row_storage( a_rs, a_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_toggle_uplo( uplo );
    }

    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; ++j )
        {
            n_elem = bl1_min( j + 1, n_elem_max );
            a_conj = ( double* )( a + j*lda ) + 1;
    
            bl1_dscal( n_elem,
                       &m1,
                       a_conj, 2*inca );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; ++j )
        {
            n_elem = bl1_max( 0, n_elem_max - j );
            a_conj = ( double* )( a + j*lda + j*inca ) + 1;
    
            if ( n_elem <= 0 ) break;

            bl1_dscal( n_elem,
                       &m1,
                       a_conj, 2*inca );
        }
    }
}
void bl1_zconjv ( int  m,
dcomplex x,
int  incx 
)

References bl1_dm1(), and bl1_dscal().

Referenced by bl1_czcopyv(), bl1_zcopymt(), bl1_zcopyv(), bl1_zgemv(), bl1_zswapmt(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), and FLA_Househ2_UT_r_opz().

{
    double  m1        = bl1_dm1();
    double* x_conj    = ( double* ) x + 1;
    int     incx_conj = 2 * incx;

    bl1_dscal( m,
               &m1,
               x_conj, incx_conj );
}
void bl1_zcopy ( int  m,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)

References cblas_zcopy(), and F77_zcopy().

Referenced by bl1_zcopymr(), bl1_zcopymt(), bl1_zcopyv(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_zcopy( m,
                 x, incx, 
                 y, incy );
#else
    F77_zcopy( &m,
               x, &incx, 
               y, &incy );
#endif
}
void bl1_zcopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_zcopy(), and bl1_zero_dim2().

Referenced by bl1_zcreate_contigmr(), bl1_zfree_saved_contigmr(), bl1_zfree_saved_contigmsr(), and FLA_Copyr_external().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if A and B are both row-major, then let's access the
    // matrices by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) && bl1_is_row_storage( a_rs, a_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_zcopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_zcopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
}
void bl1_zcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zcopyv(), and bl1_zero_dim2().

Referenced by bl1_zhemm(), bl1_ztrmm(), bl1_ztrsm(), FLA_Copyrt_external(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), and FLA_Lyap_n_opz_var4().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_zcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_zcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bl1_zcopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zconjv(), bl1_zcopy(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zcreate_contigm(), bl1_zcreate_contigmt(), bl1_zfree_saved_contigm(), bl1_zgemm(), bl1_zhemm(), bl1_zher2k(), bl1_zsymm(), bl1_zsyr2k(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Bsvd_v_opz_var2(), FLA_Copy_external(), FLA_Copyt_external(), and FLA_Tevd_v_opz_var2().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bl1_zcopy( n_elem,
                   a_begin, inca, 
                   b_begin, incb );

        if ( bl1_does_conj( trans ) )
            bl1_zconjv( n_elem,
                        b_begin, incb );
    }
}
void bl1_zcopyv ( conj1_t  conj,
int  m,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)
void bl1_zdcopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_zdcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

{
    dcomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_zdcopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_zdcopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bl1_zdcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zdcopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    dcomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_zdcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_zdcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bl1_zdcopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdcopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    dcomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_zdcopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bl1_zdcopyv ( conj1_t  conj,
int  m,
dcomplex x,
int  incx,
double *  y,
int  incy 
)

References bl1_zero_dim1(), and dcomplex::real.

Referenced by bl1_zdcopymr(), bl1_zdcopymrt(), and bl1_zdcopymt().

{
    dcomplex* chi;
    double*   psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = chi->real;

        chi += incx;
        psi += incy;
    }
}
void bl1_zdinvscalm ( conj1_t  conj,
int  m,
int  n,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bl1_dinvert2s(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdscal(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

{
    double    alpha_inv;
    dcomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_deq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    bl1_dinvert2s( conj, alpha, &alpha_inv );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bl1_zdscal( n_elem,
                    &alpha_inv,
                    a_begin, inca );
    }
}
void bl1_zdinvscalv ( conj1_t  conj,
int  n,
double *  alpha,
dcomplex x,
int  incx 
)

References bl1_zdscal().

{
    double alpha_inv;

    if ( bl1_deq1( alpha ) ) return;

    alpha_inv = 1.0 / *alpha;

    bl1_zdscal( n,
                &alpha_inv,
                x, incx );
}
void bl1_zdot ( conj1_t  conj,
int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex rho 
)
void bl1_zdot2s ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex beta,
dcomplex rho 
)

References bl1_zdot(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), and FLA_Lyap_n_opz_var3().

{
    dcomplex dotxy;
    dcomplex dotyx;
    dcomplex alpha_d    = *alpha;
    dcomplex alphac_d   = *alpha;
    dcomplex beta_d     = *beta;
    dcomplex rho_d      = *rho;

    alphac_d.imag *= -1.0;

    bl1_zdot( conj,
              n,
              x, incx,
              y, incy,
              &dotxy );

    bl1_zdot( conj,
              n,
              y, incy,
              x, incx,
              &dotyx );

    rho->real = beta_d.real   * rho_d.real - beta_d.imag   * rho_d.imag +
                alpha_d.real  * dotxy.real - alpha_d.imag  * dotxy.imag +
                alphac_d.real * dotyx.real - alphac_d.imag * dotyx.imag; 
    rho->imag = beta_d.real   * rho_d.imag + beta_d.imag   * rho_d.real +
                alpha_d.real  * dotxy.imag + alpha_d.imag  * dotxy.real +
                alphac_d.real * dotyx.imag + alphac_d.imag * dotyx.real; 
}
void bl1_zdot_in ( conj1_t  conj,
int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex rho 
)

References bl1_is_conj(), dcomplex::imag, and dcomplex::real.

Referenced by bl1_zdot().

{
    dcomplex* xip;
    dcomplex* yip;
    dcomplex  xi;
    dcomplex  yi;
    dcomplex  rho_temp;
    int       i;

    rho_temp.real = 0.0;
    rho_temp.imag = 0.0;
        
    xip = x;
    yip = y;
        
    if ( bl1_is_conj( conj ) )
    {
        for ( i = 0; i < n; ++i )
        {
            xi.real = xip->real;
            xi.imag = xip->imag;
            yi.real = yip->real;
            yi.imag = yip->imag;
            
            rho_temp.real += xi.real * yi.real - -xi.imag * yi.imag;
            rho_temp.imag += xi.real * yi.imag + -xi.imag * yi.real;

            xip += incx;
            yip += incy;
        }
    }
    else // if ( !bl1_is_conj( conj ) )
    {
        for ( i = 0; i < n; ++i )
        {
            xi.real = xip->real;
            xi.imag = xip->imag;
            yi.real = yip->real;
            yi.imag = yip->imag;
            
            rho_temp.real += xi.real * yi.real - xi.imag * yi.imag;
            rho_temp.imag += xi.real * yi.imag + xi.imag * yi.real;

            xip += incx;
            yip += incy;
        }
    }
    
    rho->real = rho_temp.real;
    rho->imag = rho_temp.imag;
}
void bl1_zdots ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex beta,
dcomplex rho 
)

References bl1_zdot(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Chol_l_opz_var1(), FLA_Chol_l_opz_var2(), FLA_Chol_u_opz_var1(), FLA_Chol_u_opz_var2(), FLA_Dotcs_external(), FLA_Dots_external(), FLA_Hess_UT_step_opz_var5(), FLA_LU_nopiv_opz_var1(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_nopiv_opz_var4(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_Ttmm_l_opz_var2(), FLA_Ttmm_l_opz_var3(), FLA_Ttmm_u_opz_var2(), and FLA_Ttmm_u_opz_var3().

{
    dcomplex rho_orig = *rho;
    dcomplex dot_prod;

    bl1_zdot( conj,
              n,
              x, incx,
              y, incy,
              &dot_prod );

    rho->real = beta->real  * rho_orig.real - beta->imag  * rho_orig.imag +
                alpha->real * dot_prod.real - alpha->imag * dot_prod.imag;
    rho->imag = beta->real  * rho_orig.imag + beta->imag  * rho_orig.real +
                alpha->real * dot_prod.imag + alpha->imag * dot_prod.real;
}
void bl1_zdscal ( int  n,
double *  alpha,
dcomplex x,
int  incx 
)

References cblas_zdscal(), and F77_zdscal().

Referenced by bl1_zdinvscalm(), bl1_zdinvscalv(), bl1_zdscalm(), bl1_zdscalmr(), and bl1_zdscalv().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_zdscal( n,
                  *alpha,
                  x, incx );
#else
    F77_zdscal( &n,
                alpha,
                x, &incx );
#endif
}
void bl1_zdscalm ( conj1_t  conj,
int  m,
int  n,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdscal(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Scal_external(), and FLA_Scalc_external().

{
    double    alpha_conj;
    dcomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_deq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    bl1_dcopys( conj, alpha, &alpha_conj );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bl1_zdscal( n_elem,
                    &alpha_conj,
                    a_begin, inca );
    }
}
void bl1_zdscalmr ( uplo1_t  uplo,
int  m,
int  n,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_zdscal(), and bl1_zero_dim2().

Referenced by bl1_zher2k(), bl1_zherk(), and FLA_Scalr_external().

{
    dcomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_deq1( alpha ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bl1_is_row_storage( a_rs, a_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_toggle_uplo( uplo );
    }
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;

            bl1_zdscal( n_elem,
                        alpha,
                        a_begin, inca );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;

            if ( n_elem <= 0 ) break;

            bl1_zdscal( n_elem,
                        alpha,
                        a_begin, inca );
        }
    }
}
void bl1_zdscalv ( conj1_t  conj,
int  n,
double *  alpha,
dcomplex x,
int  incx 
)

References bl1_zdscal(), and bl1_zero_dim1().

Referenced by bl1_zdapdiagmv(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().

{
    // Return early if possible.
    if ( bl1_zero_dim1( n ) ) return;
    if ( bl1_deq1( alpha ) ) return;

    bl1_zdscal( n,
                alpha,
                x, incx );
}
void bl1_zfnorm ( int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  norm 
)

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, dcomplex::imag, and dcomplex::real.

Referenced by FLA_Norm_frob().

{
    dcomplex* a_ij;
    double    sum;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       i, j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A is a vector separately.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        
        // An optimization: if A is row-major, then let's access the matrix by
        // rows instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    // Initialize the accumulator variable.
    sum = 0.0;

    for ( j = 0; j < n_iter; j++ )
    {
        for ( i = 0; i < n_elem; i++ )
        {
            a_ij = a + i*inca + j*lda;
            sum += a_ij->real * a_ij->real + a_ij->imag * a_ij->imag;
        }
    }
    
    // Compute the norm and store the result.
    *norm = sqrt( sum );
}
void bl1_zinvscalm ( conj1_t  conj,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zinvert2s(), bl1_zscal(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

{
    dcomplex  alpha_inv;
    dcomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_zeq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    bl1_zinvert2s( conj, alpha, &alpha_inv );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bl1_zscal( n_elem,
                   &alpha_inv,
                   a_begin, inca );
    }
}
void bl1_zinvscalv ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx 
)
void bl1_znrm2 ( int  n,
dcomplex x,
int  incx,
double *  norm 
)

References cblas_dznrm2(), and F77_dznrm2().

Referenced by FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_l_opz(), FLA_Househ3UD_UT_opz(), and FLA_Nrm2_external().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    *norm = cblas_dznrm2( n,
                          x, incx );
#else
    *norm = F77_dznrm2( &n,
                        x, &incx );
#endif
}
void bl1_zscal ( int  n,
dcomplex alpha,
dcomplex x,
int  incx 
)

References cblas_zscal(), and F77_zscal().

Referenced by bl1_zaxpysmt(), bl1_zaxpysv(), bl1_zinvscalm(), bl1_zinvscalv(), bl1_zscalm(), bl1_zscalmr(), bl1_zscalv(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_zscal( n,
                 alpha,
                 x, incx );
#else
    F77_zscal( &n,
               alpha,
               x, &incx );
#endif
}
void bl1_zscalm ( conj1_t  conj,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zscal(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zgemm(), bl1_zhemm(), bl1_zsymm(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), FLA_Lyap_n_opz_var4(), FLA_Scal_external(), and FLA_Scalc_external().

{
    dcomplex  alpha_conj;
    dcomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_zeq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
        }
    }

    bl1_zcopys( conj, alpha, &alpha_conj );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bl1_zscal( n_elem,
                   &alpha_conj,
                   a_begin, inca );
    }
}
void bl1_zscalmr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and bl1_zscal().

Referenced by FLA_Scalr_external().

{
    dcomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;
    if ( bl1_zeq1( alpha ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bl1_is_row_storage( a_rs, a_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_toggle_uplo( uplo );
    }
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;

            bl1_zscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;

            if ( n_elem <= 0 ) break;

            bl1_zscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
}
void bl1_zscalv ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx 
)
void bl1_zscopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), bl1_zscopyv(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

{
    dcomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_zscopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_zscopyv( BLIS1_NO_CONJUGATE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bl1_zscopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zero_dim2(), and bl1_zscopyv().

Referenced by FLA_Copyrt_external().

{
    dcomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_zscopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_zscopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bl1_zscopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zscopyv(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    dcomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_zscopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bl1_zscopyv ( conj1_t  conj,
int  m,
dcomplex x,
int  incx,
float *  y,
int  incy 
)

References bl1_zero_dim1(), and dcomplex::real.

Referenced by bl1_zscopymr(), bl1_zscopymrt(), and bl1_zscopymt().

{
    dcomplex* chi;
    float*    psi;
    int       i;

    // Return early if possible.
    if ( bl1_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = chi->real;

        chi += incx;
        psi += incy;
    }
}
void bl1_zswap ( int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)

References cblas_zswap(), and F77_zswap().

Referenced by bl1_zswapmt(), bl1_zswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    cblas_zswap( n,
                 x, incx, 
                 y, incy );
#else
    F77_zswap( &n,
               x, &incx, 
               y, &incy );
#endif
}
void bl1_zswapmt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zconjv(), bl1_zero_dim2(), bl1_zswap(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
                 ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
            {
                bl1_swap_ints( n_iter, n_elem );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bl1_zswap( n_elem,
                   a_begin, inca, 
                   b_begin, incb );

        if ( bl1_does_conj( trans ) )
            bl1_zconjv( n_elem,
                        a_begin, inca );

        if ( bl1_does_conj( trans ) )
            bl1_zconjv( n_elem,
                        b_begin, incb );
    }
}
void bl1_zswapv ( int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)
void bl1_zzcopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_is_row_storage(), bl1_is_upper(), bl1_zcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        bl1_swap_ints( n_iter, n_elem_max );
        bl1_swap_ints( lda, inca );
        bl1_swap_ints( ldb, incb );
        bl1_toggle_uplo( uplo );
    }
    
    
    if ( bl1_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bl1_zcopyv( BLIS1_NO_CONJUGATE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
    else // if ( bl1_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bl1_zcopyv( BLIS1_NO_CONJUGATE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
}
void bl1_zzcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zcopyv(), and bl1_zero_dim2().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bl1_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bl1_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bl1_is_upper( uplo ) )
        {
            n_iter     = bl1_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bl1_does_trans( trans ) )
    {
        bl1_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bl1_zcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bl1_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bl1_zcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bl1_zzcopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zcopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj1_t    conj;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bl1_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bl1_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bl1_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bl1_does_trans( trans ) )
        {
            bl1_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bl1_is_row_storage( b_rs, b_cs ) )
        {
            bl1_swap_ints( n_iter, n_elem );
            bl1_swap_ints( lda, inca );
            bl1_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bl1_proj_trans1_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bl1_zcopyv( conj,
                    n_elem,
                    a_begin, inca,
                    b_begin, incb );
    }
}