libflame  12600
Functions
bl1_herk.c File Reference

(r12600)

Functions

void bl1_sherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
void bl1_dherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
void bl1_cherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int a_rs, int a_cs, float *beta, scomplex *c, int c_rs, int c_cs)
void bl1_zherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int a_rs, int a_cs, double *beta, dcomplex *c, int c_rs, int c_cs)
void bl1_cherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc)
void bl1_zherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc)

Function Documentation

void bl1_cherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs,
float *  beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c1(), bl1_callocm(), bl1_caxpymrt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_cherk_blas(), bl1_csscalmr(), bl1_is_col_storage(), bl1_s0(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opc_var1().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    scomplex* a_save    = a;
    scomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    float     zero_r = bl1_s0();
    scomplex  one    = bl1_c1();
    scomplex* c_conj;
    int       lda, inca;
    int       ldc, incc;
    int       ldc_conj, incc_conj;
    int       herk_needs_conj = FALSE;
    
    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_ccreate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_ccreate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_c * A_c'
            // effective operation: uplo( C_c ) += A_c * A_c'
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_r * A_r'
            // effective operation: uplo( C_c ) += conj( A_c' * A_c )
            bl1_swap_ints( lda, inca );

            bl1_toggle_conjtrans( trans );

            herk_needs_conj = TRUE;
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_c * A_c'
            // effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
            bl1_swap_ints( ldc, incc );

            bl1_toggle_uplo( uplo );

            herk_needs_conj = TRUE;
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_r * A_r'
            // effective operation: ~uplo( C_c ) += A_c' * A_c
            bl1_swap_ints( ldc, incc );
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_conjtrans( trans );
        }
    }

    // There are two cases where we need to perform the rank-k product and
    // then axpy the result into C with a conjugation. We handle those two
    // cases here.
    if ( herk_needs_conj )
    {
        // We need a temporary matrix for holding the rank-k product.
        c_conj    = bl1_callocm( m, m );
        ldc_conj  = m;
        incc_conj = 1;

        // Compute the rank-k product.
        bl1_cherk_blas( uplo,
                        trans,
                        m,
                        k,
                        alpha,
                        a, lda,
                        &zero_r,
                        c_conj, ldc_conj );

        // Scale C by beta.
        bl1_csscalmr( uplo,
                      m,
                      m,
                      beta,
                      c, incc, ldc );

        // And finally, accumulate the rank-k product in C_conj into C
        // with a conjugation.
        bl1_caxpymrt( uplo,
                      BLIS1_CONJ_NO_TRANSPOSE,
                      m,
                      m,
                      &one,
                      c_conj, incc_conj, ldc_conj,
                      c,      incc,      ldc );

        // Free the temporary matrix for C.
        bl1_cfree( c_conj );
    }
    else
    {
        bl1_cherk_blas( uplo,
                        trans,
                        m,
                        k,
                        alpha,
                        a, lda,
                        beta,
                        c, ldc );
    }

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_cfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_cherk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
scomplex a,
int  lda,
float *  beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_cherk(), CblasColMajor, and F77_cherk().

Referenced by bl1_cherk().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_cherk( cblas_order,
                 cblas_uplo,
                 cblas_trans,
                 m,
                 k,
                 *alpha,
                 a, lda,
                 *beta,
                 c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_cherk( &blas_uplo,
               &blas_trans,
               &m,
               &k,
               alpha,
               a, &lda,
               beta,
               c, &ldc );
#endif
}
void bl1_dherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_dsyrk().

{
    bl1_dsyrk( uplo,
               trans,
               m,
               k,
               alpha,
               a, a_rs, a_cs,
               beta,
               c, c_rs, c_cs );
}
void bl1_sherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_ssyrk().

{
    bl1_ssyrk( uplo,
               trans,
               m,
               k,
               alpha,
               a, a_rs, a_cs,
               beta,
               c, c_rs, c_cs );
}
void bl1_zherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs,
double *  beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_d0(), bl1_is_col_storage(), bl1_z1(), bl1_zallocm(), bl1_zaxpymrt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zdscalmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zherk_blas(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opz_var1().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    dcomplex* a_save    = a;
    dcomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    double    zero_r = bl1_d0();
    dcomplex  one    = bl1_z1();
    dcomplex* c_conj;
    int       lda, inca;
    int       ldc, incc;
    int       ldc_conj, incc_conj;
    int       herk_needs_conj = FALSE;
    
    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_zcreate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_zcreate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_c * A_c'
            // effective operation: uplo( C_c ) += A_c * A_c'
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_r * A_r'
            // effective operation: uplo( C_c ) += conj( A_c' * A_c )
            bl1_swap_ints( lda, inca );

            bl1_toggle_conjtrans( trans );

            herk_needs_conj = TRUE;
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_c * A_c'
            // effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
            bl1_swap_ints( ldc, incc );

            bl1_toggle_uplo( uplo );

            herk_needs_conj = TRUE;
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_r * A_r'
            // effective operation: ~uplo( C_c ) += A_c' * A_c
            bl1_swap_ints( ldc, incc );
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_conjtrans( trans );
        }
    }

    // There are two cases where we need to perform the rank-k product and
    // then axpy the result into C with a conjugation. We handle those two
    // cases here.
    if ( herk_needs_conj )
    {
        // We need a temporary matrix for holding the rank-k product.
        c_conj    = bl1_zallocm( m, m );
        ldc_conj  = m;
        incc_conj = 1;

        // Compute the rank-k product.
        bl1_zherk_blas( uplo,
                        trans,
                        m,
                        k,
                        alpha,
                        a, lda,
                        &zero_r,
                        c_conj, ldc_conj );

        // Scale C by beta.
        bl1_zdscalmr( uplo,
                      m,
                      m,
                      beta,
                      c, incc, ldc );
        
        // And finally, accumulate the rank-k product in C_conj into C
        // with a conjugation.
        bl1_zaxpymrt( uplo,
                      BLIS1_CONJ_NO_TRANSPOSE,
                      m,
                      m,
                      &one,
                      c_conj, incc_conj, ldc_conj,
                      c,      incc,      ldc );

        // Free the temporary matrix for C.
        bl1_zfree( c_conj );
    }
    else
    {
        bl1_zherk_blas( uplo,
                        trans,
                        m,
                        k,
                        alpha,
                        a, lda,
                        beta,
                        c, ldc );
    }

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_zfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_zherk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
dcomplex a,
int  lda,
double *  beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zherk(), CblasColMajor, and F77_zherk().

Referenced by bl1_zherk().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_zherk( cblas_order,
                 cblas_uplo,
                 cblas_trans,
                 m,
                 k,
                 *alpha,
                 a, lda,
                 *beta,
                 c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_zherk( &blas_uplo,
               &blas_trans,
               &m,
               &k,
               alpha,
               a, &lda,
               beta,
               c, &ldc );
#endif
}