libflame
12600
|
Functions | |
void | bl1_icopymt (trans1_t trans, int m, int n, int *a, int a_rs, int a_cs, int *b, int b_rs, int b_cs) |
void | bl1_scopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_dcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_ccopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_sscopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_sdcopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_dscopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_sccopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_cscopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_szcopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_zscopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bl1_ddcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_dccopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_cdcopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_dzcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_zdcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bl1_cccopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_czcopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bl1_zccopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bl1_zzcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void bl1_cccopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_ccopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
{ scomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_ccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_ccopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cconjv(), bl1_ccopy(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_ccreate_contigm(), bl1_ccreate_contigmt(), bl1_cfree_saved_contigm(), bl1_cfree_saved_contigmsr(), bl1_cgemm(), bl1_chemm(), bl1_cher2k(), bl1_csymm(), bl1_csyr2k(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Copy_external(), and FLA_Copyt_external().
{ scomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_ccopy( n_elem, a_begin, inca, b_begin, incb ); if ( bl1_does_conj( trans ) ) bl1_cconjv( n_elem, b_begin, incb ); } }
void bl1_cdcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cdcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ scomplex* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_cdcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_cscopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_cscopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ scomplex* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_cscopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_czcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_czcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ scomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_czcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_dccopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dccopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ double* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_dcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dcopy(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_dcreate_contigm(), bl1_dcreate_contigmt(), bl1_dfree_saved_contigm(), bl1_dfree_saved_contigmsr(), bl1_dsymm(), bl1_dsyr2k(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Bsvd_v_opd_var2(), FLA_Copy_external(), FLA_Copyt_external(), and FLA_Tevd_v_opd_var2().
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dcopy( n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_ddcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_dcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
{ double* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_dscopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_dscopyv(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ double* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dscopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_dzcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_dzcopyv(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ double* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_dzcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_icopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
int * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
int * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_icopyv(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ int* a_begin; int* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_icopyv( bl1_proj_trans1_to_conj( trans ), n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_sccopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_sccopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ float* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_sccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_scopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_scopy(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_screate_contigm(), bl1_screate_contigmt(), bl1_sfree_saved_contigm(), bl1_sfree_saved_contigmsr(), bl1_ssymm(), bl1_ssyr2k(), bl1_strmmsx(), bl1_strsmsx(), FLA_Copy_external(), and FLA_Copyt_external().
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_scopy( n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_sdcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_sdcopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ float* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_sdcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_sscopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_scopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
{ float* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_scopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_szcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_szcopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ float* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_szcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_zccopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zccopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ dcomplex* a_begin; scomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zccopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_zcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zconjv(), bl1_zcopy(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by bl1_zcreate_contigm(), bl1_zcreate_contigmt(), bl1_zfree_saved_contigm(), bl1_zgemm(), bl1_zhemm(), bl1_zher2k(), bl1_zsymm(), bl1_zsyr2k(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Bsvd_v_opz_var2(), FLA_Copy_external(), FLA_Copyt_external(), and FLA_Tevd_v_opz_var2().
{ dcomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) || ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zcopy( n_elem, a_begin, inca, b_begin, incb ); if ( bl1_does_conj( trans ) ) bl1_zconjv( n_elem, b_begin, incb ); } }
void bl1_zdcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdcopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ dcomplex* a_begin; double* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zdcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_zscopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zscopyv(), and BLIS1_NO_TRANSPOSE.
Referenced by FLA_Copy_external(), and FLA_Copyt_external().
{ dcomplex* a_begin; float* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zscopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }
void bl1_zzcopymt | ( | trans1_t | trans, |
int | m, | ||
int | n, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zcopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.
{ dcomplex* a_begin; dcomplex* b_begin; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; conj1_t conj; // Return early if possible. if ( bl1_zero_dim2( m, n ) ) return; // Handle cases where A and B are vectors to ensure that the underlying copy // gets invoked only once. if ( bl1_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bl1_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bl1_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage of B. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bl1_does_trans( trans ) ) { bl1_swap_ints( lda, inca ); } // An optimization: if B is row-major, then let's access the matrix by rows // instead of by columns for increased spatial locality. if ( bl1_is_row_storage( b_rs, b_cs ) ) { bl1_swap_ints( n_iter, n_elem ); bl1_swap_ints( lda, inca ); bl1_swap_ints( ldb, incb ); } } // Extract conj component from trans parameter. conj = bl1_proj_trans1_to_conj( trans ); for ( j = 0; j < n_iter; ++j ) { a_begin = a + j*lda; b_begin = b + j*ldb; bl1_zcopyv( conj, n_elem, a_begin, inca, b_begin, incb ); } }