|
libflame
12600
|
Functions | |
| void | bl1_sherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs) |
| void | bl1_dherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs) |
| void | bl1_cherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int a_rs, int a_cs, float *beta, scomplex *c, int c_rs, int c_cs) |
| void | bl1_zherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int a_rs, int a_cs, double *beta, dcomplex *c, int c_rs, int c_cs) |
| void | bl1_cherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc) |
| void | bl1_zherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc) |
| void bl1_cherk | ( | uplo1_t | uplo, |
| trans1_t | trans, | ||
| int | m, | ||
| int | k, | ||
| float * | alpha, | ||
| scomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | beta, | ||
| scomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bl1_c1(), bl1_callocm(), bl1_caxpymrt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_cherk_blas(), bl1_csscalmr(), bl1_is_col_storage(), bl1_s0(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.
Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opc_var1().
{
uplo1_t uplo_save = uplo;
int m_save = m;
scomplex* a_save = a;
scomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
float zero_r = bl1_s0();
scomplex one = bl1_c1();
scomplex* c_conj;
int lda, inca;
int ldc, incc;
int ldc_conj, incc_conj;
int herk_needs_conj = FALSE;
// Return early if possible.
if ( bl1_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bl1_ccreate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bl1_ccreate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bl1_is_col_storage( c_rs, c_cs ) )
{
if ( bl1_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_c * A_c'
// effective operation: uplo( C_c ) += A_c * A_c'
}
else // if ( bl1_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_r * A_r'
// effective operation: uplo( C_c ) += conj( A_c' * A_c )
bl1_swap_ints( lda, inca );
bl1_toggle_conjtrans( trans );
herk_needs_conj = TRUE;
}
}
else // if ( bl1_is_row_storage( c_rs, c_cs ) )
{
if ( bl1_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_c * A_c'
// effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
bl1_swap_ints( ldc, incc );
bl1_toggle_uplo( uplo );
herk_needs_conj = TRUE;
}
else // if ( bl1_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_r * A_r'
// effective operation: ~uplo( C_c ) += A_c' * A_c
bl1_swap_ints( ldc, incc );
bl1_swap_ints( lda, inca );
bl1_toggle_uplo( uplo );
bl1_toggle_conjtrans( trans );
}
}
// There are two cases where we need to perform the rank-k product and
// then axpy the result into C with a conjugation. We handle those two
// cases here.
if ( herk_needs_conj )
{
// We need a temporary matrix for holding the rank-k product.
c_conj = bl1_callocm( m, m );
ldc_conj = m;
incc_conj = 1;
// Compute the rank-k product.
bl1_cherk_blas( uplo,
trans,
m,
k,
alpha,
a, lda,
&zero_r,
c_conj, ldc_conj );
// Scale C by beta.
bl1_csscalmr( uplo,
m,
m,
beta,
c, incc, ldc );
// And finally, accumulate the rank-k product in C_conj into C
// with a conjugation.
bl1_caxpymrt( uplo,
BLIS1_CONJ_NO_TRANSPOSE,
m,
m,
&one,
c_conj, incc_conj, ldc_conj,
c, incc, ldc );
// Free the temporary matrix for C.
bl1_cfree( c_conj );
}
else
{
bl1_cherk_blas( uplo,
trans,
m,
k,
alpha,
a, lda,
beta,
c, ldc );
}
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bl1_cfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bl1_cherk_blas | ( | uplo1_t | uplo, |
| trans1_t | trans, | ||
| int | m, | ||
| int | k, | ||
| float * | alpha, | ||
| scomplex * | a, | ||
| int | lda, | ||
| float * | beta, | ||
| scomplex * | c, | ||
| int | ldc | ||
| ) |
References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_cherk(), CblasColMajor, and F77_cherk().
Referenced by bl1_cherk().
{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bl1_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_cherk( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
*alpha,
a, lda,
*beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
bl1_param_map_to_netlib_trans( trans, &blas_trans );
F77_cherk( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
beta,
c, &ldc );
#endif
}
| void bl1_dherk | ( | uplo1_t | uplo, |
| trans1_t | trans, | ||
| int | m, | ||
| int | k, | ||
| double * | alpha, | ||
| double * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | beta, | ||
| double * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bl1_dsyrk().
{
bl1_dsyrk( uplo,
trans,
m,
k,
alpha,
a, a_rs, a_cs,
beta,
c, c_rs, c_cs );
}
| void bl1_sherk | ( | uplo1_t | uplo, |
| trans1_t | trans, | ||
| int | m, | ||
| int | k, | ||
| float * | alpha, | ||
| float * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| float * | beta, | ||
| float * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bl1_ssyrk().
{
bl1_ssyrk( uplo,
trans,
m,
k,
alpha,
a, a_rs, a_cs,
beta,
c, c_rs, c_cs );
}
| void bl1_zherk | ( | uplo1_t | uplo, |
| trans1_t | trans, | ||
| int | m, | ||
| int | k, | ||
| double * | alpha, | ||
| dcomplex * | a, | ||
| int | a_rs, | ||
| int | a_cs, | ||
| double * | beta, | ||
| dcomplex * | c, | ||
| int | c_rs, | ||
| int | c_cs | ||
| ) |
References bl1_d0(), bl1_is_col_storage(), bl1_z1(), bl1_zallocm(), bl1_zaxpymrt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zdscalmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zherk_blas(), and BLIS1_CONJ_NO_TRANSPOSE.
Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opz_var1().
{
uplo1_t uplo_save = uplo;
int m_save = m;
dcomplex* a_save = a;
dcomplex* c_save = c;
int a_rs_save = a_rs;
int a_cs_save = a_cs;
int c_rs_save = c_rs;
int c_cs_save = c_cs;
double zero_r = bl1_d0();
dcomplex one = bl1_z1();
dcomplex* c_conj;
int lda, inca;
int ldc, incc;
int ldc_conj, incc_conj;
int herk_needs_conj = FALSE;
// Return early if possible.
if ( bl1_zero_dim2( m, k ) ) return;
// If necessary, allocate, initialize, and use a temporary contiguous
// copy of each matrix rather than the original matrices.
bl1_zcreate_contigmt( trans,
m,
k,
a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bl1_zcreate_contigmr( uplo,
m,
m,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
// Initialize with values assuming column-major storage.
lda = a_cs;
inca = a_rs;
ldc = c_cs;
incc = c_rs;
// Adjust the parameters based on the storage of each matrix.
if ( bl1_is_col_storage( c_rs, c_cs ) )
{
if ( bl1_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_c * A_c'
// effective operation: uplo( C_c ) += A_c * A_c'
}
else // if ( bl1_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_c ) += A_r * A_r'
// effective operation: uplo( C_c ) += conj( A_c' * A_c )
bl1_swap_ints( lda, inca );
bl1_toggle_conjtrans( trans );
herk_needs_conj = TRUE;
}
}
else // if ( bl1_is_row_storage( c_rs, c_cs ) )
{
if ( bl1_is_col_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_c * A_c'
// effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
bl1_swap_ints( ldc, incc );
bl1_toggle_uplo( uplo );
herk_needs_conj = TRUE;
}
else // if ( bl1_is_row_storage( a_rs, a_cs ) )
{
// requested operation: uplo( C_r ) += A_r * A_r'
// effective operation: ~uplo( C_c ) += A_c' * A_c
bl1_swap_ints( ldc, incc );
bl1_swap_ints( lda, inca );
bl1_toggle_uplo( uplo );
bl1_toggle_conjtrans( trans );
}
}
// There are two cases where we need to perform the rank-k product and
// then axpy the result into C with a conjugation. We handle those two
// cases here.
if ( herk_needs_conj )
{
// We need a temporary matrix for holding the rank-k product.
c_conj = bl1_zallocm( m, m );
ldc_conj = m;
incc_conj = 1;
// Compute the rank-k product.
bl1_zherk_blas( uplo,
trans,
m,
k,
alpha,
a, lda,
&zero_r,
c_conj, ldc_conj );
// Scale C by beta.
bl1_zdscalmr( uplo,
m,
m,
beta,
c, incc, ldc );
// And finally, accumulate the rank-k product in C_conj into C
// with a conjugation.
bl1_zaxpymrt( uplo,
BLIS1_CONJ_NO_TRANSPOSE,
m,
m,
&one,
c_conj, incc_conj, ldc_conj,
c, incc, ldc );
// Free the temporary matrix for C.
bl1_zfree( c_conj );
}
else
{
bl1_zherk_blas( uplo,
trans,
m,
k,
alpha,
a, lda,
beta,
c, ldc );
}
// Free any temporary contiguous matrices, copying the result back to
// the original matrix.
bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
&a, &a_rs, &a_cs );
bl1_zfree_saved_contigmr( uplo_save,
m_save,
m_save,
c_save, c_rs_save, c_cs_save,
&c, &c_rs, &c_cs );
}
| void bl1_zherk_blas | ( | uplo1_t | uplo, |
| trans1_t | trans, | ||
| int | m, | ||
| int | k, | ||
| double * | alpha, | ||
| dcomplex * | a, | ||
| int | lda, | ||
| double * | beta, | ||
| dcomplex * | c, | ||
| int | ldc | ||
| ) |
References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zherk(), CblasColMajor, and F77_zherk().
Referenced by bl1_zherk().
{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
enum CBLAS_ORDER cblas_order = CblasColMajor;
enum CBLAS_UPLO cblas_uplo;
enum CBLAS_TRANSPOSE cblas_trans;
bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
bl1_param_map_to_netlib_trans( trans, &cblas_trans );
cblas_zherk( cblas_order,
cblas_uplo,
cblas_trans,
m,
k,
*alpha,
a, lda,
*beta,
c, ldc );
#else
char blas_uplo;
char blas_trans;
bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
bl1_param_map_to_netlib_trans( trans, &blas_trans );
F77_zherk( &blas_uplo,
&blas_trans,
&m,
&k,
alpha,
a, &lda,
beta,
c, &ldc );
#endif
}
1.7.6.1