libflame  12600
Functions
blis_prototypes_level3.h File Reference

(r12600)

Go to the source code of this file.

Functions

void bl1_sgemm (trans1_t transa, trans1_t transb, int m, int k, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
void bl1_dgemm (trans1_t transa, trans1_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
void bl1_cgemm (trans1_t transa, trans1_t transb, int m, int k, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
void bl1_zgemm (trans1_t transa, trans1_t transb, int m, int k, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
void bl1_sgemm_blas (trans1_t transa, trans1_t transb, int m, int n, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
void bl1_dgemm_blas (trans1_t transa, trans1_t transb, int m, int n, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
void bl1_cgemm_blas (trans1_t transa, trans1_t transb, int m, int n, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
void bl1_zgemm_blas (trans1_t transa, trans1_t transb, int m, int n, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
void bl1_shemm (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
void bl1_dhemm (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
void bl1_chemm (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
void bl1_zhemm (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
void bl1_chemm_blas (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
void bl1_zhemm_blas (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
void bl1_sherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
void bl1_dherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
void bl1_cherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int a_rs, int a_cs, float *beta, scomplex *c, int c_rs, int c_cs)
void bl1_zherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int a_rs, int a_cs, double *beta, dcomplex *c, int c_rs, int c_cs)
void bl1_cherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc)
void bl1_zherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc)
void bl1_sher2k (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
void bl1_dher2k (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
void bl1_cher2k (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, float *beta, scomplex *c, int c_rs, int c_cs)
void bl1_zher2k (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, double *beta, dcomplex *c, int c_rs, int c_cs)
void bl1_cher2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, float *beta, scomplex *c, int ldc)
void bl1_zher2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, double *beta, dcomplex *c, int ldc)
void bl1_ssymm (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
void bl1_dsymm (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
void bl1_csymm (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
void bl1_zsymm (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
void bl1_ssymm_blas (side1_t side, uplo1_t uplo, int m, int n, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
void bl1_dsymm_blas (side1_t side, uplo1_t uplo, int m, int n, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
void bl1_csymm_blas (side1_t side, uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
void bl1_zsymm_blas (side1_t side, uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
void bl1_ssyrk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
void bl1_dsyrk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
void bl1_csyrk (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
void bl1_zsyrk (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
void bl1_ssyrk_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int lda, float *beta, float *c, int ldc)
void bl1_dsyrk_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int lda, double *beta, double *c, int ldc)
void bl1_csyrk_blas (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *beta, scomplex *c, int ldc)
void bl1_zsyrk_blas (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *beta, dcomplex *c, int ldc)
void bl1_ssyr2k (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
void bl1_dsyr2k (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
void bl1_csyr2k (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
void bl1_zsyr2k (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
void bl1_ssyr2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc)
void bl1_dsyr2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc)
void bl1_csyr2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc)
void bl1_zsyr2k_blas (uplo1_t uplo, trans1_t trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc)
void bl1_strmm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_dtrmm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_ctrmm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_ztrmm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_strmm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb)
void bl1_dtrmm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb)
void bl1_ctrmm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb)
void bl1_ztrmm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb)
void bl1_strsm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bl1_dtrsm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bl1_ctrsm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bl1_ztrsm (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bl1_strsm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb)
void bl1_dtrsm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb)
void bl1_ctrsm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb)
void bl1_ztrsm_blas (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb)
void bl1_strmmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
void bl1_dtrmmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
void bl1_ctrmmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
void bl1_ztrmmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)
void bl1_strsmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs)
void bl1_dtrsmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
void bl1_ctrsmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs)
void bl1_ztrsmsx (side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs)

Function Documentation

void bl1_cgemm ( trans1_t  transa,
trans1_t  transb,
int  m,
int  k,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c0(), bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_cconjm(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cgemm_blas(), bl1_cscalm(), bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_zero_dim3(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, and BLIS1_TRANSPOSE.

Referenced by FLA_Gemm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    scomplex* a_save    = a;
    scomplex* b_save    = b;
    scomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    scomplex  zero = bl1_c0();
    scomplex  one  = bl1_c1();
    scomplex* a_unswap;
    scomplex* b_unswap;
    scomplex* a_conj;
    scomplex* b_conj;
    scomplex* c_trans;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       lda_conj, inca_conj;
    int       ldb_conj, incb_conj;
    int       ldc_trans, incc_trans;
    int       m_gemm, n_gemm;
    int       gemm_needs_axpyt = FALSE;
    int       a_was_copied;
    int       b_was_copied;

    // Return early if possible.
    if ( bl1_zero_dim3( m, k, n ) )
    {
        bl1_cscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, c_rs, c_cs );
        return;
    }

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_ccreate_contigmt( transa,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_ccreate_contigmt( transb,
                          k,
                          n,
                          b_save, b_rs_save, b_cs_save,
                          &b,     &b_rs,     &b_cs );

    bl1_ccreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Figure out whether A and/or B was copied to contiguous memory. This
    // is used later to prevent redundant copying.
    a_was_copied = ( a != a_save );
    b_was_copied = ( b != b_save );

    // These are used to track the original values of a and b prior to any
    // operand swapping that might take place. This is necessary for proper
    // freeing of memory when one is a temporary contiguous matrix.
    a_unswap = a;
    b_unswap = b;

    // These are used to track the dimensions of the product of the
    // A and B operands to the BLAS invocation of gemm. These differ
    // from m and n when the operands need to be swapped.
    m_gemm = m;
    n_gemm = n;

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += tr( A_c ) * tr( B_c )
                // effective operation: C_c += tr( A_c ) * tr( B_c )
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                
                // requested operation: C_c += tr( A_c ) * tr( B_r )
                // effective operation: C_c += tr( A_c ) * tr( B_c )^T
                bl1_swap_ints( ldb, incb );

                bl1_toggle_trans( transb );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += tr( A_r )   * tr( B_c )
                // effective operation: C_c += tr( A_r )^T * tr( B_c )
                bl1_swap_ints( lda, inca );

                bl1_toggle_trans( transa );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c +=   tr( A_r ) * tr( B_r )
                // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_cswap_pointers( a, b );
                bl1_swap_ints( a_was_copied, b_was_copied );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );

                gemm_needs_axpyt = TRUE;
                bl1_swap_ints( m_gemm, n_gemm );
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r +=   tr( A_c ) * tr( B_c )
                // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );

                gemm_needs_axpyt = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += tr( A_c ) * tr( B_r )
                // effective operation: C_c += tr( B_c ) * tr( A_c )^T
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_trans( transa );

                bl1_swap_ints( m, n );
                bl1_swap_ints( m_gemm, n_gemm );
                bl1_cswap_pointers( a, b );
                bl1_swap_ints( a_was_copied, b_was_copied );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += tr( A_r )   * tr( B_c )
                // effective operation: C_c += tr( B_c )^T * tr( A_c )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );

                bl1_toggle_trans( transb );

                bl1_swap_ints( m, n );
                bl1_swap_ints( m_gemm, n_gemm );
                bl1_cswap_pointers( a, b );
                bl1_swap_ints( a_was_copied, b_was_copied );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += tr( A_r ) * tr( B_r )
                // effective operation: C_c += tr( B_c ) * tr( A_c )
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );
                bl1_swap_ints( m_gemm, n_gemm );
                bl1_cswap_pointers( a, b );
                bl1_swap_ints( a_was_copied, b_was_copied );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );
            }
        }
    }

    // We need a temporary matrix for the case where A is conjugated.
    a_conj    = a;
    lda_conj  = lda;
    inca_conj = inca;

    // If transa indicates conjugate-no-transpose and A was not already
    // copied, then copy and conjugate it to a temporary matrix. Otherwise,
    // if transa indicates conjugate-no-transpose and A was already copied,
    // just conjugate it.
    if ( bl1_is_conjnotrans( transa ) && !a_was_copied )
    {
        a_conj    = bl1_callocm( m_gemm, k );
        lda_conj  = m_gemm;
        inca_conj = 1;

        bl1_ccopymt( BLIS1_CONJ_NO_TRANSPOSE,
                     m_gemm,
                     k,
                     a,      inca,      lda,
                     a_conj, inca_conj, lda_conj );
    }
    else if ( bl1_is_conjnotrans( transa ) && a_was_copied )
    {
        bl1_cconjm( m_gemm,
                    k,
                    a_conj, inca_conj, lda_conj );
    }

    // We need a temporary matrix for the case where B is conjugated.
    b_conj    = b;
    ldb_conj  = ldb;
    incb_conj = incb;

    // If transb indicates conjugate-no-transpose and B was not already
    // copied, then copy and conjugate it to a temporary matrix. Otherwise,
    // if transb indicates conjugate-no-transpose and B was already copied,
    // just conjugate it.
    if ( bl1_is_conjnotrans( transb ) && !b_was_copied )
    {
        b_conj    = bl1_callocm( k, n_gemm );
        ldb_conj  = k;
        incb_conj = 1;

        bl1_ccopymt( BLIS1_CONJ_NO_TRANSPOSE,
                     k,
                     n_gemm,
                     b,      incb,      ldb,
                     b_conj, incb_conj, ldb_conj );
    }
    else if ( bl1_is_conjnotrans( transb ) && b_was_copied )
    {
        bl1_cconjm( k,
                    n_gemm,
                    b_conj, incb_conj, ldb_conj );
    }

    // There are two cases where we need to perform the gemm and then axpy
    // the result into C with a transposition. We handle those cases here.
    if ( gemm_needs_axpyt )
    {
        // We need a temporary matrix for holding C^T. Notice that m and n
        // represent the dimensions of C, while m_gemm and n_gemm are the
        // dimensions of the actual product op(A)*op(B), which may be n-by-m
        // since the operands may have been swapped.
        c_trans    = bl1_callocm( m_gemm, n_gemm );
        ldc_trans  = m_gemm;
        incc_trans = 1;

        // Compute tr( A ) * tr( B ), where A and B may have been swapped
        // to reference the other, and store the result in C_trans.
        bl1_cgemm_blas( transa,
                        transb,
                        m_gemm,
                        n_gemm,
                        k,
                        alpha,
                        a_conj,  lda_conj,
                        b_conj,  ldb_conj,
                        &zero,
                        c_trans, ldc_trans );

        // Scale C by beta.
        bl1_cscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, incc, ldc );
        
        // And finally, accumulate the matrix product in C_trans into C
        // with a transpose.
        bl1_caxpymt( BLIS1_TRANSPOSE,
                     m,
                     n,
                     &one,
                     c_trans, incc_trans, ldc_trans,
                     c,       incc,       ldc );

        // Free the temporary matrix for C.
        bl1_cfree( c_trans );
    }
    else // no extra axpyt step needed
    {
        bl1_cgemm_blas( transa,
                        transb,
                        m_gemm,
                        n_gemm,
                        k,
                        alpha,
                        a_conj, lda_conj,
                        b_conj, ldb_conj,
                        beta,
                        c,      ldc );
    }

    if ( bl1_is_conjnotrans( transa ) && !a_was_copied )
        bl1_cfree( a_conj );

    if ( bl1_is_conjnotrans( transb ) && !b_was_copied )
        bl1_cfree( b_conj );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_cfree_contigm( a_save,    a_rs_save, a_cs_save,
                       &a_unswap, &a_rs,     &a_cs );

    bl1_cfree_contigm( b_save,    b_rs_save, b_cs_save,
                       &b_unswap, &b_rs,     &b_cs );

    bl1_cfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_cgemm_blas ( trans1_t  transa,
trans1_t  transb,
int  m,
int  n,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), cblas_cgemm(), CblasColMajor, and F77_cgemm().

Referenced by bl1_cgemm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_TRANSPOSE cblas_transa;
    enum CBLAS_TRANSPOSE cblas_transb;

    bl1_param_map_to_netlib_trans( transa, &cblas_transa );
    bl1_param_map_to_netlib_trans( transb, &cblas_transb );

    cblas_cgemm( cblas_order,
                 cblas_transa,
                 cblas_transb,
                 m,
                 n,
                 k,
                 alpha,
                 a, lda,
                 b, ldb,
                 beta,
                 c, ldc );
#else
    char blas_transa;
    char blas_transb;

    bl1_param_map_to_netlib_trans( transa, &blas_transa );
    bl1_param_map_to_netlib_trans( transb, &blas_transb );

    F77_cgemm( &blas_transa,
               &blas_transb,
               &m,
               &n,
               &k,
               alpha,
               a, &lda,
               b, &ldb,
               beta,
               c, &ldc );
#endif
}
void bl1_chemm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c0(), bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_cconjmr(), bl1_ccopymrt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_chemm_blas(), bl1_cscalm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Hemm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    scomplex* a_save    = a;
    scomplex* b_save    = b;
    scomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    scomplex  zero = bl1_c0();
    scomplex  one  = bl1_c1();
    scomplex* a_conj;
    scomplex* b_copy;
    scomplex* c_trans;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       lda_conj, inca_conj;
    int       ldb_copy, incb_copy;
    int       ldc_trans, incc_trans;
    int       hemm_needs_conja  = FALSE;
    int       hemm_needs_copyb  = FALSE;
    int       hemm_needs_transb = FALSE;
    int       hemm_needs_axpyt  = FALSE;
    int       a_was_copied;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_ccreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_ccreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_ccreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Figure out whether A was copied to contiguous memory. This is used to
    // prevent redundant copying.
    a_was_copied = ( a != a_save );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;
    
    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_c ) * B_c
                // effective operation: C_c += uplo( A_c ) * B_c
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_c ) * B_r
                // effective operation: C_c += uplo( A_c ) * B_c
                hemm_needs_copyb = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c +=  uplo( A_r ) * B_c
                // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
                bl1_swap_ints( lda, inca );

                bl1_toggle_uplo( uplo );

                hemm_needs_conja = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_r ) * B_r
                // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_side( side );
                bl1_toggle_uplo( uplo );

                hemm_needs_axpyt = TRUE;
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_c ) * B_c
                // effective operation: C_c += ( uplo( A_c ) * B_c )^T
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );

                hemm_needs_axpyt = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_c ) * B_r
                // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( ldb, incb );

                bl1_swap_ints( m, n );

                bl1_toggle_side( side );

                hemm_needs_conja = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_r ) * B_c
                // effective operation: C_c += B_c^T * ~uplo( A_c )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );

                bl1_swap_ints( m, n );

                bl1_toggle_side( side );
                bl1_toggle_uplo( uplo );

                hemm_needs_copyb  = TRUE;
                hemm_needs_transb = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_r ) * B_r
                // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_swap_ints( m, n );

                bl1_toggle_uplo( uplo );
                bl1_toggle_side( side );
            }
        }
    }

    // We need a temporary matrix for the cases where A is conjugated.
    a_conj    = a;
    lda_conj  = lda;
    inca_conj = inca;
    
    if ( hemm_needs_conja && !a_was_copied )
    {
        int dim_a;

        bl1_set_dim_with_side( side, m, n, &dim_a );
        
        a_conj    = bl1_callocm( dim_a, dim_a );
        lda_conj  = dim_a;
        inca_conj = 1;

        bl1_ccopymrt( uplo,
                      BLIS1_CONJ_NO_TRANSPOSE,
                      dim_a,
                      dim_a,
                      a,      inca,      lda,
                      a_conj, inca_conj, lda_conj );
    }
    else if ( hemm_needs_conja && a_was_copied )
    {
        int dim_a;

        bl1_set_dim_with_side( side, m, n, &dim_a );

        bl1_cconjmr( uplo,
                     dim_a,
                     dim_a,
                     a_conj, inca_conj, lda_conj );
    }
    
    // We need a temporary matrix for the cases where B needs to be copied.
    b_copy    = b;
    ldb_copy  = ldb;
    incb_copy = incb;
    
    // There are two cases where we need to make a copy of B: one where the
    // copy's dimensions are transposed from the original B, and one where
    // the dimensions are not swapped.
    if ( hemm_needs_copyb )
    {
        trans1_t transb;

        // Set transb, which determines whether or not we need to copy from B
        // as if it needs a transposition. If a transposition is needed, then
        // m and n and have already been swapped. So in either case m
        // represents the leading dimension of the copy.
        if ( hemm_needs_transb ) transb = BLIS1_TRANSPOSE;
        else                     transb = BLIS1_NO_TRANSPOSE;
        
        b_copy    = bl1_callocm( m, n );
        ldb_copy  = m;
        incb_copy = 1;

        bl1_ccopymt( transb,
                     m,
                     n,
                     b,      incb,      ldb,
                     b_copy, incb_copy, ldb_copy );
    }

    // There are two cases where we need to perform the hemm and then axpy
    // the result into C with a transposition. We handle those cases here.
    if ( hemm_needs_axpyt )
    {
        // We need a temporary matrix for holding C^T. Notice that m and n
        // represent the dimensions of C, and thus C_trans is n-by-m
        // (interpreting both as column-major matrices). So the leading
        // dimension of the temporary matrix holding C^T is n.
        c_trans    = bl1_callocm( n, m );
        ldc_trans  = n;
        incc_trans = 1;

        // Compute A * B (or B * A) and store the result in C_trans.
        // Note that there is no overlap between the axpyt cases and
        // the conja/copyb cases, hence the use of a, b, lda, and ldb.
        bl1_chemm_blas( side,
                        uplo,
                        n,
                        m,
                        alpha,
                        a,       lda,
                        b,       ldb,
                        &zero,
                        c_trans, ldc_trans );

        // Scale C by beta.
        bl1_cscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, incc, ldc );
        
        // And finally, accumulate the matrix product in C_trans into C
        // with a transpose.
        bl1_caxpymt( BLIS1_TRANSPOSE,
                     m,
                     n,
                     &one,
                     c_trans, incc_trans, ldc_trans,
                     c,       incc,       ldc );

        // Free the temporary matrix for C.
        bl1_cfree( c_trans );
    }
    else // no extra axpyt step needed
    {
        bl1_chemm_blas( side,
                        uplo,
                        m,
                        n,
                        alpha,
                        a_conj, lda_conj,
                        b_copy, ldb_copy,
                        beta,
                        c,      ldc );
    }

    if ( hemm_needs_conja && !a_was_copied )
        bl1_cfree( a_conj );

    if ( hemm_needs_copyb )
        bl1_cfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_cfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_chemm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_chemm(), CblasColMajor, and F77_chemm().

Referenced by bl1_chemm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );

    cblas_chemm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 m,
                 n,
                 alpha,
                 a, lda,
                 b, ldb,
                 beta,
                 c, ldc );
#else
    char blas_side;
    char blas_uplo;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );

    F77_chemm( &blas_side,
               &blas_uplo,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb,
               beta,
               c, &ldc );
#endif
}
void bl1_cher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
float *  beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c1(), bl1_callocm(), bl1_caxpymrt(), bl1_ccopymt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_cher2k_blas(), bl1_csscalmr(), bl1_is_col_storage(), bl1_s0(), bl1_set_dims_with_trans(), bl1_zero_dim2(), BLIS1_CONJ_NO_TRANSPOSE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Her2k_external().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    scomplex* a_save    = a;
    scomplex* b_save    = b;
    scomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    float     zero_r = bl1_s0();
    scomplex  one    = bl1_c1();
    scomplex  alpha_copy;
    scomplex* a_copy;
    scomplex* b_copy;
    scomplex* c_conj;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       lda_copy, inca_copy;
    int       ldb_copy, incb_copy;
    int       ldc_conj, incc_conj;
    int       her2k_needs_copya      = FALSE;
    int       her2k_needs_copyb      = FALSE;
    int       her2k_needs_conj       = FALSE;
    int       her2k_needs_alpha_conj = FALSE;

    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_ccreate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_ccreate_contigmt( trans,
                          m,
                          k,
                          b_save, b_rs_save, b_cs_save,
                          &b,     &b_rs,     &b_cs );

    bl1_ccreate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                her2k_needs_copyb = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                her2k_needs_copya = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
                // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_conjtrans( trans );

                her2k_needs_conj       = TRUE;
                her2k_needs_alpha_conj = TRUE;
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_c * B_c' + B_c * A_c'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );

                her2k_needs_conj = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_c * B_r' + B_r * A_c'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                her2k_needs_copyb = TRUE;

                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );

                her2k_needs_conj = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_r * B_c' + B_c * A_r'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                her2k_needs_copya = TRUE;

                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );

                her2k_needs_conj = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_r * B_r' + B_r * A_r'
                // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_uplo( uplo );
                bl1_toggle_conjtrans( trans );

                her2k_needs_alpha_conj = TRUE;
            }
        }
    }

    // Make a copy of alpha and conjugate if necessary.
    alpha_copy = *alpha;
    if ( her2k_needs_alpha_conj )
    {
        bl1_zconjs( &alpha_copy );
    }

    a_copy    = a;
    lda_copy  = lda;
    inca_copy = inca;
    
    // There are two cases where we need to copy A column-major storage.
    // We handle those two cases here.
    if ( her2k_needs_copya )
    {
        int m_a;
        int n_a;

        // Determine the dimensions of A according to the value of trans. We
        // need this in order to set the leading dimension of the copy of A.
        bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );

        // We need a temporary matrix to hold a column-major copy of A.
        a_copy    = bl1_callocm( m, k );
        lda_copy  = m_a;
        inca_copy = 1;

        // Copy the contents of A into A_copy.
        bl1_ccopymt( BLIS1_NO_TRANSPOSE,
                     m_a,
                     n_a,
                     a,      inca,      lda,
                     a_copy, inca_copy, lda_copy );
    }
    
    b_copy    = b;
    ldb_copy  = ldb;
    incb_copy = incb;

    // There are two cases where we need to copy B column-major storage.
    // We handle those two cases here.
    if ( her2k_needs_copyb )
    {
        int m_b;
        int n_b;

        // Determine the dimensions of B according to the value of trans. We
        // need this in order to set the leading dimension of the copy of B.
        bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );

        // We need a temporary matrix to hold a column-major copy of B.
        b_copy    = bl1_callocm( m, k );
        ldb_copy  = m_b;
        incb_copy = 1;

        // Copy the contents of B into B_copy.
        bl1_ccopymt( BLIS1_NO_TRANSPOSE,
                     m_b,
                     n_b,
                     b,      incb,      ldb,
                     b_copy, incb_copy, ldb_copy );
    }

    // There are two cases where we need to perform the rank-2k product and
    // then axpy the result into C with a conjugation. We handle those two
    // cases here.
    if ( her2k_needs_conj )
    {
        // We need a temporary matrix for holding the rank-k product.
        c_conj    = bl1_callocm( m, m );
        ldc_conj  = m;
        incc_conj = 1;

        // Compute the rank-2k product.
        bl1_cher2k_blas( uplo,
                         trans,
                         m,
                         k,
                         &alpha_copy,
                         a_copy, lda_copy,
                         b_copy, ldb_copy,
                         &zero_r,
                         c_conj, ldc_conj );

        // Scale C by beta.
        bl1_csscalmr( uplo,
                      m,
                      m,
                      beta,
                      c, incc, ldc );

        // And finally, accumulate the rank-2k product in C_conj into C
        // with a conjugation.
        bl1_caxpymrt( uplo,
                      BLIS1_CONJ_NO_TRANSPOSE,
                      m,
                      m,
                      &one,
                      c_conj, incc_conj, ldc_conj,
                      c,      incc,      ldc );

        // Free the temporary matrix for C.
        bl1_cfree( c_conj );
    }
    else
    {
        bl1_cher2k_blas( uplo,
                         trans,
                         m,
                         k,
                         &alpha_copy,
                         a_copy, lda_copy,
                         b_copy, ldb_copy,
                         beta,
                         c, ldc );
    }

    if ( her2k_needs_copya )
        bl1_cfree( a_copy );

    if ( her2k_needs_copyb )
        bl1_cfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_cfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_cher2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
float *  beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_cher2k(), CblasColMajor, and F77_cher2k().

Referenced by bl1_cher2k().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_cher2k( cblas_order,
                  cblas_uplo,
                  cblas_trans,
                  m,
                  k,
                  alpha,
                  a, lda,
                  b, ldb,
                  *beta,
                  c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_cher2k( &blas_uplo,
                &blas_trans,
                &m,
                &k,
                alpha,
                a, &lda,
                b, &ldb,
                beta,
                c, &ldc );
#endif
}
void bl1_cherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs,
float *  beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c1(), bl1_callocm(), bl1_caxpymrt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_cherk_blas(), bl1_csscalmr(), bl1_is_col_storage(), bl1_s0(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opc_var1().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    scomplex* a_save    = a;
    scomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    float     zero_r = bl1_s0();
    scomplex  one    = bl1_c1();
    scomplex* c_conj;
    int       lda, inca;
    int       ldc, incc;
    int       ldc_conj, incc_conj;
    int       herk_needs_conj = FALSE;
    
    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_ccreate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_ccreate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_c * A_c'
            // effective operation: uplo( C_c ) += A_c * A_c'
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_r * A_r'
            // effective operation: uplo( C_c ) += conj( A_c' * A_c )
            bl1_swap_ints( lda, inca );

            bl1_toggle_conjtrans( trans );

            herk_needs_conj = TRUE;
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_c * A_c'
            // effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
            bl1_swap_ints( ldc, incc );

            bl1_toggle_uplo( uplo );

            herk_needs_conj = TRUE;
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_r * A_r'
            // effective operation: ~uplo( C_c ) += A_c' * A_c
            bl1_swap_ints( ldc, incc );
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_conjtrans( trans );
        }
    }

    // There are two cases where we need to perform the rank-k product and
    // then axpy the result into C with a conjugation. We handle those two
    // cases here.
    if ( herk_needs_conj )
    {
        // We need a temporary matrix for holding the rank-k product.
        c_conj    = bl1_callocm( m, m );
        ldc_conj  = m;
        incc_conj = 1;

        // Compute the rank-k product.
        bl1_cherk_blas( uplo,
                        trans,
                        m,
                        k,
                        alpha,
                        a, lda,
                        &zero_r,
                        c_conj, ldc_conj );

        // Scale C by beta.
        bl1_csscalmr( uplo,
                      m,
                      m,
                      beta,
                      c, incc, ldc );

        // And finally, accumulate the rank-k product in C_conj into C
        // with a conjugation.
        bl1_caxpymrt( uplo,
                      BLIS1_CONJ_NO_TRANSPOSE,
                      m,
                      m,
                      &one,
                      c_conj, incc_conj, ldc_conj,
                      c,      incc,      ldc );

        // Free the temporary matrix for C.
        bl1_cfree( c_conj );
    }
    else
    {
        bl1_cherk_blas( uplo,
                        trans,
                        m,
                        k,
                        alpha,
                        a, lda,
                        beta,
                        c, ldc );
    }

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_cfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_cherk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
scomplex a,
int  lda,
float *  beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_cherk(), CblasColMajor, and F77_cherk().

Referenced by bl1_cherk().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_cherk( cblas_order,
                 cblas_uplo,
                 cblas_trans,
                 m,
                 k,
                 *alpha,
                 a, lda,
                 *beta,
                 c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_cherk( &blas_uplo,
               &blas_trans,
               &m,
               &k,
               alpha,
               a, &lda,
               beta,
               c, &ldc );
#endif
}
void bl1_csymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c0(), bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cscalm(), bl1_csymm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Symm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    scomplex* a_save    = a;
    scomplex* b_save    = b;
    scomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    scomplex  zero = bl1_c0();
    scomplex  one  = bl1_c1();
    scomplex* b_copy;
    scomplex* c_trans;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       ldb_copy, incb_copy;
    int       ldc_trans, incc_trans;
    int       symm_needs_copyb  = FALSE;
    int       symm_needs_transb = FALSE;
    int       symm_needs_axpyt  = FALSE;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_ccreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_ccreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_ccreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;
    
    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_c ) * B_c
                // effective operation: C_c += uplo( A_c ) * B_c
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_c ) * B_r
                // effective operation: C_c += uplo( A_c ) * B_c
                symm_needs_copyb = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c +=  uplo( A_r ) * B_c
                // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
                bl1_swap_ints( lda, inca );

                bl1_toggle_uplo( uplo );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_r ) * B_r
                // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_side( side );
                bl1_toggle_uplo( uplo );

                symm_needs_axpyt = TRUE;
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_c ) * B_c
                // effective operation: C_c += ( uplo( A_c ) * B_c )^T
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );

                symm_needs_axpyt = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_c ) * B_r
                // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( ldb, incb );

                bl1_swap_ints( m, n );

                bl1_toggle_side( side );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_r ) * B_c
                // effective operation: C_c += B_c^T * ~uplo( A_c )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );

                bl1_swap_ints( m, n );

                bl1_toggle_side( side );
                bl1_toggle_uplo( uplo );

                symm_needs_copyb  = TRUE;
                symm_needs_transb = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_r ) * B_r
                // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_swap_ints( m, n );

                bl1_toggle_uplo( uplo );
                bl1_toggle_side( side );
            }
        }
    }

    // We need a temporary matrix for the cases where B needs to be copied.
    b_copy    = b;
    ldb_copy  = ldb;
    incb_copy = incb;
    
    // There are two cases where we need to make a copy of B: one where the
    // copy's dimensions are transposed from the original B, and one where
    // the dimensions are not swapped.
    if ( symm_needs_copyb )
    {
        trans1_t transb;

        // Set transb, which determines whether or not we need to copy from B
        // as if it needs a transposition. If a transposition is needed, then
        // m and n and have already been swapped. So in either case m
        // represents the leading dimension of the copy.
        if ( symm_needs_transb ) transb = BLIS1_TRANSPOSE;
        else                     transb = BLIS1_NO_TRANSPOSE;
        
        b_copy    = bl1_callocm( m, n );
        ldb_copy  = m;
        incb_copy = 1;

        bl1_ccopymt( transb,
                     m,
                     n,
                     b,      incb,      ldb,
                     b_copy, incb_copy, ldb_copy );
    }

    // There are two cases where we need to perform the symm and then axpy
    // the result into C with a transposition. We handle those cases here.
    if ( symm_needs_axpyt )
    {
        // We need a temporary matrix for holding C^T. Notice that m and n
        // represent the dimensions of C, and thus C_trans is n-by-m
        // (interpreting both as column-major matrices). So the leading
        // dimension of the temporary matrix holding C^T is n.
        c_trans    = bl1_callocm( n, m );
        ldc_trans  = n;
        incc_trans = 1;

        // Compute A * B (or B * A) and store the result in C_trans.
        // Note that there is no overlap between the axpyt cases and
        // the conja/copyb cases, hence the use of a, b, lda, and ldb.
        bl1_csymm_blas( side,
                        uplo,
                        n,
                        m,
                        alpha,
                        a,       lda,
                        b,       ldb,
                        &zero,
                        c_trans, ldc_trans );

        // Scale C by beta.
        bl1_cscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, incc, ldc );
        
        // And finally, accumulate the matrix product in C_trans into C
        // with a transpose.
        bl1_caxpymt( BLIS1_TRANSPOSE,
                     m,
                     n,
                     &one,
                     c_trans, incc_trans, ldc_trans,
                     c,       incc,       ldc );

        // Free the temporary matrix for C.
        bl1_cfree( c_trans );
    }
    else // no extra axpyt step needed
    {
        bl1_csymm_blas( side,
                        uplo,
                        m,
                        n,
                        alpha,
                        a,      lda,
                        b_copy, ldb_copy,
                        beta,
                        c,      ldc );
    }

    if ( symm_needs_copyb )
        bl1_cfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_cfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_csymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_csymm(), CblasColMajor, and F77_csymm().

Referenced by bl1_csymm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );

    cblas_csymm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 m,
                 n,
                 alpha,
                 a, lda,
                 b, ldb,
                 beta,
                 c, ldc );
#else
    char blas_side;
    char blas_uplo;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );

    F77_csymm( &blas_side,
               &blas_uplo,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb,
               beta,
               c, &ldc );
#endif
}
void bl1_csyr2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_callocm(), bl1_ccopymt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_csyr2k_blas(), bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Syr2k_external().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    scomplex* a_save    = a;
    scomplex* b_save    = b;
    scomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    scomplex* a_copy;
    scomplex* b_copy;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       lda_copy, inca_copy;
    int       ldb_copy, incb_copy;
    int       syr2k_needs_copya = FALSE;
    int       syr2k_needs_copyb = FALSE;

    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_ccreate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_ccreate_contigmt( trans,
                          m,
                          k,
                          b_save, b_rs_save, b_cs_save,
                          &b,     &b_rs,     &b_cs );

    bl1_ccreate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                syr2k_needs_copyb = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                syr2k_needs_copya = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
                // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_trans( trans );
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_c * B_c' + B_c * A_c'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_c * B_r' + B_r * A_c'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                syr2k_needs_copyb = TRUE;

                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_r * B_c' + B_c * A_r'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                syr2k_needs_copya = TRUE;

                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_r * B_r' + B_r * A_r'
                // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_uplo( uplo );
                bl1_toggle_trans( trans );
            }
        }
    }

    a_copy    = a;
    lda_copy  = lda;
    inca_copy = inca;
    
    // There are two cases where we need to copy A column-major storage.
    // We handle those two cases here.
    if ( syr2k_needs_copya )
    {
        int m_a;
        int n_a;

        // Determine the dimensions of A according to the value of trans. We
        // need this in order to set the leading dimension of the copy of A.
        bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );

        // We need a temporary matrix to hold a column-major copy of A.
        a_copy    = bl1_callocm( m, k );
        lda_copy  = m_a;
        inca_copy = 1;

        // Copy the contents of A into A_copy.
        bl1_ccopymt( BLIS1_NO_TRANSPOSE,
                     m_a,
                     n_a,
                     a,      inca,      lda,
                     a_copy, inca_copy, lda_copy );
    }
    
    b_copy    = b;
    ldb_copy  = ldb;
    incb_copy = incb;

    // There are two cases where we need to copy B column-major storage.
    // We handle those two cases here.
    if ( syr2k_needs_copyb )
    {
        int m_b;
        int n_b;

        // Determine the dimensions of B according to the value of trans. We
        // need this in order to set the leading dimension of the copy of B.
        bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );

        // We need a temporary matrix to hold a column-major copy of B.
        b_copy    = bl1_callocm( m, k );
        ldb_copy  = m_b;
        incb_copy = 1;

        // Copy the contents of B into B_copy.
        bl1_ccopymt( BLIS1_NO_TRANSPOSE,
                     m_b,
                     n_b,
                     b,      incb,      ldb,
                     b_copy, incb_copy, ldb_copy );
    }

    bl1_csyr2k_blas( uplo,
                     trans,
                     m,
                     k,
                     alpha,
                     a_copy, lda_copy,
                     b_copy, ldb_copy,
                     beta,
                     c, ldc );

    if ( syr2k_needs_copya )
        bl1_cfree( a_copy );

    if ( syr2k_needs_copyb )
        bl1_cfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_cfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_csyr2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb,
scomplex beta,
scomplex c,
int  ldc 
)

References bl1_is_conjtrans(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), BLIS1_TRANSPOSE, cblas_csyr2k(), CblasColMajor, and F77_csyr2k().

Referenced by bl1_csyr2k().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
    // so we have to map it down to regular transposition.
    if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_csyr2k( cblas_order,
                  cblas_uplo,
                  cblas_trans,
                  m,
                  k,
                  alpha,
                  a, lda,
                  b, ldb,
                  beta,
                  c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
    // so we have to map it down to regular transposition.
    if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_csyr2k( &blas_uplo,
                &blas_trans,
                &m,
                &k,
                alpha,
                a, &lda,
                b, &ldb,
                beta,
                c, &ldc );
#endif
}
void bl1_csyrk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_csyrk_blas(), bl1_is_col_storage(), and bl1_zero_dim2().

Referenced by FLA_Syrk_external().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    scomplex* a_save    = a;
    scomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    int       lda, inca;
    int       ldc, incc;

    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_ccreate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_ccreate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_c * A_c^T
            // effective operation: uplo( C_c ) += A_c * A_c^T
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_r * A_r^T
            // effective operation: uplo( C_c ) += A_c^T * A_c
            bl1_swap_ints( lda, inca );

            bl1_toggle_trans( trans );
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_c * A_c^T
            // effective operation: ~uplo( C_c ) += A_c * A_c^T
            bl1_swap_ints( ldc, incc );

            bl1_toggle_uplo( uplo );
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_r * A_r^T
            // effective operation: ~uplo( C_c ) += A_c^T * A_c
            bl1_swap_ints( ldc, incc );
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_trans( trans );
        }
    }

    bl1_csyrk_blas( uplo,
                    trans,
                    m,
                    k,
                    alpha,
                    a, lda,
                    beta,
                    c, ldc );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_cfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_csyrk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
scomplex alpha,
scomplex a,
int  lda,
scomplex beta,
scomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_csyrk(), CblasColMajor, and F77_csyrk().

Referenced by bl1_csyrk().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_csyrk( cblas_order,
                 cblas_uplo,
                 cblas_trans,
                 m,
                 k,
                 alpha,
                 a, lda,
                 beta,
                 c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_csyrk( &blas_uplo,
               &blas_trans,
               &m,
               &k,
               alpha,
               a, &lda,
               beta,
               c, &ldc );
#endif
}
void bl1_ctrmm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_callocm(), bl1_cconjmr(), bl1_ccopymrt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_ctrmm_blas(), bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_set_dim_with_side(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by bl1_ctrmmsx(), and FLA_Trmm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    scomplex* a_save    = a;
    scomplex* b_save    = b;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    scomplex* a_conj;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;
    int       lda_conj, inca_conj;
    int       a_was_copied;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_ccreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_ccreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    // Figure out whether A was copied to contiguous memory. This is used to
    // prevent redundant copying.
    a_was_copied = ( a != a_save );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr( uplo( A_c ) ) * B_c
            // effective operation: B_c := tr( uplo( A_c ) ) * B_c
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr(  uplo( A_r ) )   * B_c
            // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_trans( trans );
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_c ) ) * B_r
            // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
            bl1_swap_ints( ldb, incb );

            bl1_swap_ints( m, n );

            bl1_toggle_side( side );
            bl1_toggle_trans( trans );
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_r ) ) * B_r
            // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
            bl1_swap_ints( ldb, incb );
            bl1_swap_ints( lda, inca );

            bl1_swap_ints( m, n );

            bl1_toggle_uplo( uplo );
            bl1_toggle_side( side );
        }
    }

    // Initialize with values assuming that trans is not conjnotrans.
    a_conj    = a;
    lda_conj  = lda;
    inca_conj = inca;

    // We want to handle the conjnotrans case. The easiest way to do so is
    // by making a conjugated copy of A.
    if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
    {
        int dim_a;

        bl1_set_dim_with_side( side, m, n, &dim_a );
        
        a_conj    = bl1_callocm( dim_a, dim_a );
        lda_conj  = dim_a;
        inca_conj = 1;

        bl1_ccopymrt( uplo,
                      BLIS1_CONJ_NO_TRANSPOSE,
                      dim_a,
                      dim_a,
                      a,      inca,      lda,
                      a_conj, inca_conj, lda_conj );
    }
    else if ( bl1_is_conjnotrans( trans ) && a_was_copied )
    {
        int dim_a;

        bl1_set_dim_with_side( side, m, n, &dim_a );
        
        bl1_cconjmr( uplo,
                     dim_a,
                     dim_a,
                     a_conj, inca_conj, lda_conj );
    }


    bl1_ctrmm_blas( side,
                    uplo,
                    trans,
                    diag,
                    m,
                    n,
                    alpha,
                    a_conj, lda_conj,
                    b,      ldb );

    if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
        bl1_cfree( a_conj );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_cfree_saved_contigm( m_save,
                             n_save,
                             b_save, b_rs_save, b_cs_save,
                             &b,     &b_rs,     &b_cs );
}
void bl1_ctrmm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ctrmm(), CblasColMajor, and F77_ctrmm().

Referenced by bl1_ctrmm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;
    enum CBLAS_DIAG  cblas_diag;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );
    bl1_param_map_to_netlib_diag( diag, &cblas_diag );

    cblas_ctrmm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 cblas_trans,
                 cblas_diag,
                 m,
                 n,
                 alpha,
                 a, lda,
                 b, ldb );
#else
    char blas_side;
    char blas_uplo;
    char blas_trans;
    char blas_diag;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );
    bl1_param_map_to_netlib_diag( diag, &blas_diag );

    F77_ctrmm( &blas_side,
               &blas_uplo,
               &blas_trans,
               &blas_diag,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb );
#endif
}
void bl1_ctrmmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cscalm(), bl1_ctrmm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trmmsx_external().

{
    int       m_save    = m;
    int       n_save    = n;
    scomplex* a_save    = a;
    scomplex* b_save    = b;
    scomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    scomplex  one = bl1_c1();
    scomplex* b_copy;
    int       dim_a;
    int       b_copy_rs, b_copy_cs;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_ccreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_ccreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_ccreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Create a copy of B to use in the computation so the original matrix is
    // left untouched.
    b_copy = bl1_callocm( m, n );

    // Match the strides of B_copy to that of B.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        b_copy_rs = 1;
        b_copy_cs = m;
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        b_copy_rs = n;
        b_copy_cs = 1;
    }

    // Copy the contents of B to B_copy.
    bl1_ccopymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 b,      b_rs,      b_cs,
                 b_copy, b_copy_rs, b_copy_cs );
    
    // Perform the operation on B_copy.
    bl1_ctrmm( side,
               uplo,
               trans,
               diag,
               m,
               n,
               alpha,
               a,      a_rs,      a_cs,
               b_copy, b_copy_rs, b_copy_cs );

    // Scale C by beta.
    bl1_cscalm( BLIS1_NO_CONJUGATE,
                m,
                n,
                beta,
                c, c_rs, c_cs );

    // Add B_copy into C.
    bl1_caxpymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 &one,
                 b_copy, b_copy_rs, b_copy_cs,
                 c,      c_rs,      c_cs );

    // Free the copy of B.
    bl1_cfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_cfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_ctrsm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bl1_callocm(), bl1_cconjmr(), bl1_ccopymrt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_ctrsm_blas(), bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_set_dim_with_side(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by bl1_ctrsmsx(), FLA_LU_nopiv_opc_var1(), FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opc_var3(), FLA_LU_piv_opc_var3(), and FLA_Trsm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    scomplex* a_save    = a;
    scomplex* b_save    = b;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    scomplex* a_conj;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;
    int       lda_conj, inca_conj;
    int       a_was_copied;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_ccreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_ccreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    // Figure out whether A was copied to contiguous memory. This is used to
    // prevent redundant copying.
    a_was_copied = ( a != a_save );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr( uplo( A_c ) ) * B_c
            // effective operation: B_c := tr( uplo( A_c ) ) * B_c
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr(  uplo( A_r ) )   * B_c
            // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_trans( trans );
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_c ) ) * B_r
            // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
            bl1_swap_ints( ldb, incb );

            bl1_swap_ints( m, n );

            bl1_toggle_side( side );
            bl1_toggle_trans( trans );
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_r ) ) * B_r
            // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
            bl1_swap_ints( ldb, incb );
            bl1_swap_ints( lda, inca );

            bl1_swap_ints( m, n );

            bl1_toggle_uplo( uplo );
            bl1_toggle_side( side );
        }
    }

    // Initialize with values assuming that trans is not conjnotrans.
    a_conj    = a;
    lda_conj  = lda;
    inca_conj = inca;

    // We want to handle the conjnotrans case. The easiest way to do so is
    // by making a conjugated copy of A.
    if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
    {
        int dim_a;

        bl1_set_dim_with_side( side, m, n, &dim_a );
        
        a_conj    = bl1_callocm( dim_a, dim_a );
        lda_conj  = dim_a;
        inca_conj = 1;

        bl1_ccopymrt( uplo,
                      BLIS1_CONJ_NO_TRANSPOSE,
                      dim_a,
                      dim_a,
                      a,      inca,      lda,
                      a_conj, inca_conj, lda_conj );
    }
    else if ( bl1_is_conjnotrans( trans ) && a_was_copied )
    {
        int dim_a;

        bl1_set_dim_with_side( side, m, n, &dim_a );
        
        bl1_cconjmr( uplo,
                     dim_a,
                     dim_a,
                     a_conj, inca_conj, lda_conj );
    }


    bl1_ctrsm_blas( side,
                    uplo,
                    trans,
                    diag,
                    m,
                    n,
                    alpha,
                    a_conj, lda_conj,
                    b,      ldb );

    if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
        bl1_cfree( a_conj );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_cfree_saved_contigm( m_save,
                             n_save,
                             b_save, b_rs_save, b_cs_save,
                             &b,     &b_rs,     &b_cs );
}
void bl1_ctrsm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ctrsm(), CblasColMajor, and F77_ctrsm().

Referenced by bl1_ctrsm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;
    enum CBLAS_DIAG  cblas_diag;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );
    bl1_param_map_to_netlib_diag( diag, &cblas_diag );

    cblas_ctrsm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 cblas_trans,
                 cblas_diag,
                 m,
                 n,
                 alpha,
                 a, lda,
                 b, ldb );
#else
    char blas_side;
    char blas_uplo;
    char blas_trans;
    char blas_diag;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );
    bl1_param_map_to_netlib_diag( diag, &blas_diag );

    F77_ctrsm( &blas_side,
               &blas_uplo,
               &blas_trans,
               &blas_diag,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb );
#endif
}
void bl1_ctrsmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs,
scomplex beta,
scomplex c,
int  c_rs,
int  c_cs 
)

References bl1_c1(), bl1_callocm(), bl1_caxpymt(), bl1_ccopymt(), bl1_ccreate_contigm(), bl1_ccreate_contigmr(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigm(), bl1_cscalm(), bl1_ctrsm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trsmsx_external().

{
    int       m_save    = m;
    int       n_save    = n;
    scomplex* a_save    = a;
    scomplex* b_save    = b;
    scomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    scomplex  one = bl1_c1();
    scomplex* b_copy;
    int       dim_a;
    int       b_copy_rs, b_copy_cs;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_ccreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_ccreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_ccreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Create a copy of B to use in the computation so the original matrix is
    // left untouched.
    b_copy = bl1_callocm( m, n );

    // Match the strides of B_copy to that of B.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        b_copy_rs = 1;
        b_copy_cs = m;
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        b_copy_rs = n;
        b_copy_cs = 1;
    }

    // Copy the contents of B to B_copy.
    bl1_ccopymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 b,      b_rs,      b_cs,
                 b_copy, b_copy_rs, b_copy_cs );
    
    // Perform the operation on B_copy.
    bl1_ctrsm( side,
               uplo,
               trans,
               diag,
               m,
               n,
               alpha,
               a,      a_rs,      a_cs,
               b_copy, b_copy_rs, b_copy_cs );

    // Scale C by beta.
    bl1_cscalm( BLIS1_NO_CONJUGATE,
                m,
                n,
                beta,
                c, c_rs, c_cs );

    // Add B_copy into C.
    bl1_caxpymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 &one,
                 b_copy, b_copy_rs, b_copy_cs,
                 c,      c_rs,      c_cs );

    // Free the copy of B.
    bl1_cfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_cfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_cfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_dgemm ( trans1_t  transa,
trans1_t  transb,
int  m,
int  k,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_d0(), bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmt(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dgemm_blas(), bl1_dscalm(), bl1_is_col_storage(), bl1_zero_dim3(), BLIS1_NO_CONJUGATE, and BLIS1_TRANSPOSE.

Referenced by FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var2(), FLA_Gemm_external(), FLA_Tevd_v_opd_var2(), and FLA_Tevd_v_opz_var2().

{
    int       m_save    = m;
    int       n_save    = n;
    double*   a_save    = a;
    double*   b_save    = b;
    double*   c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    double    zero = bl1_d0();
    double    one  = bl1_d1();
    double*   a_unswap;
    double*   b_unswap;
    double*   c_trans;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       ldc_trans, incc_trans;
    int       m_gemm, n_gemm;
    int       gemm_needs_axpyt = FALSE;

    // Return early if possible.
    if ( bl1_zero_dim3( m, k, n ) )
    {
        bl1_dscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, c_rs, c_cs );
        return;
    }

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_dcreate_contigmt( transa,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_dcreate_contigmt( transb,
                          k,
                          n,
                          b_save, b_rs_save, b_cs_save,
                          &b,     &b_rs,     &b_cs );

    bl1_dcreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // These are used to track the original values of a and b prior to any
    // operand swapping that might take place. This is necessary for proper
    // freeing of memory when one is a temporary contiguous matrix.
    a_unswap = a;
    b_unswap = b;

    // These are used to track the dimensions of the product of the
    // A and B operands to the BLAS invocation of gemm. These differ
    // from m and n when the operands need to be swapped.
    m_gemm = m;
    n_gemm = n;

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += tr( A_c ) * tr( B_c )
                // effective operation: C_c += tr( A_c ) * tr( B_c )
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                
                // requested operation: C_c += tr( A_c ) * tr( B_r )
                // effective operation: C_c += tr( A_c ) * tr( B_c )^T
                bl1_swap_ints( ldb, incb );

                bl1_toggle_trans( transb );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += tr( A_r )   * tr( B_c )
                // effective operation: C_c += tr( A_r )^T * tr( B_c )
                bl1_swap_ints( lda, inca );

                bl1_toggle_trans( transa );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c +=   tr( A_r ) * tr( B_r )
                // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_dswap_pointers( a, b );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );

                gemm_needs_axpyt = TRUE;
                bl1_swap_ints( m_gemm, n_gemm );
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r +=   tr( A_c ) * tr( B_c )
                // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );

                gemm_needs_axpyt = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += tr( A_c ) * tr( B_r )
                // effective operation: C_c += tr( B_c ) * tr( A_c )^T
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_trans( transa );

                bl1_swap_ints( m, n );
                bl1_swap_ints( m_gemm, n_gemm );
                bl1_dswap_pointers( a, b );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += tr( A_r )   * tr( B_c )
                // effective operation: C_c += tr( B_c )^T * tr( A_c )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );

                bl1_toggle_trans( transb );

                bl1_swap_ints( m, n );
                bl1_swap_ints( m_gemm, n_gemm );
                bl1_dswap_pointers( a, b );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += tr( A_r ) * tr( B_r )
                // effective operation: C_c += tr( B_c ) * tr( A_c )
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );
                bl1_swap_ints( m_gemm, n_gemm );
                bl1_dswap_pointers( a, b );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );
            }
        }
    }

    // There are two cases where we need to perform the gemm and then axpy
    // the result into C with a transposition. We handle those cases here.
    if ( gemm_needs_axpyt )
    {
        // We need a temporary matrix for holding C^T. Notice that m and n
        // represent the dimensions of C, while m_gemm and n_gemm are the
        // dimensions of the actual product op(A)*op(B), which may be n-by-m
        // since the operands may have been swapped.
        c_trans    = bl1_dallocm( m_gemm, n_gemm );
        ldc_trans  = m_gemm;
        incc_trans = 1;

        // Compute tr( A ) * tr( B ), where A and B may have been swapped
        // to reference the other, and store the result in C_trans.
        bl1_dgemm_blas( transa,
                        transb,
                        m_gemm,
                        n_gemm,
                        k,
                        alpha,
                        a,       lda,
                        b,       ldb,
                        &zero,
                        c_trans, ldc_trans );

        // Scale C by beta.
        bl1_dscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, incc, ldc );
        
        // And finally, accumulate the matrix product in C_trans into C
        // with a transpose.
        bl1_daxpymt( BLIS1_TRANSPOSE,
                     m,
                     n,
                     &one,
                     c_trans, incc_trans, ldc_trans,
                     c,       incc,       ldc );

        // Free the temporary matrix for C.
        bl1_dfree( c_trans );
    }
    else // no extra axpyt step needed
    {
        bl1_dgemm_blas( transa,
                        transb,
                        m_gemm,
                        n_gemm,
                        k,
                        alpha,
                        a, lda,
                        b, ldb,
                        beta,
                        c, ldc );
    }

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_dfree_contigm( a_save,    a_rs_save, a_cs_save,
                       &a_unswap, &a_rs,     &a_cs );

    bl1_dfree_contigm( b_save,    b_rs_save, b_cs_save,
                       &b_unswap, &b_rs,     &b_cs );

    bl1_dfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_dgemm_blas ( trans1_t  transa,
trans1_t  transb,
int  m,
int  n,
int  k,
double *  alpha,
double *  a,
int  lda,
double *  b,
int  ldb,
double *  beta,
double *  c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), cblas_dgemm(), CblasColMajor, and F77_dgemm().

Referenced by bl1_dgemm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_TRANSPOSE cblas_transa;
    enum CBLAS_TRANSPOSE cblas_transb;

    bl1_param_map_to_netlib_trans( transa, &cblas_transa );
    bl1_param_map_to_netlib_trans( transb, &cblas_transb );

    cblas_dgemm( cblas_order,
                 cblas_transa,
                 cblas_transb,
                 m,
                 n,
                 k,
                 *alpha,
                 a, lda,
                 b, ldb,
                 *beta,
                 c, ldc );
#else
    char blas_transa;
    char blas_transb;

    bl1_param_map_to_netlib_trans( transa, &blas_transa );
    bl1_param_map_to_netlib_trans( transb, &blas_transb );

    F77_dgemm( &blas_transa,
               &blas_transb,
               &m,
               &n,
               &k,
               alpha,
               a, &lda,
               b, &ldb,
               beta,
               c, &ldc );
#endif
}
void bl1_dhemm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_dsymm().

{
    bl1_dsymm( side,
               uplo,
               m,
               n,
               alpha,
               a, a_rs, a_cs,
               b, b_rs, b_cs,
               beta,
               c, c_rs, c_cs );
}
void bl1_dher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_dsyr2k().

{
    bl1_dsyr2k( uplo,
                trans,
                m,
                k,
                alpha,
                a, a_rs, a_cs,
                b, b_rs, b_cs,
                beta,
                c, c_rs, c_cs );
}
void bl1_dherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_dsyrk().

{
    bl1_dsyrk( uplo,
               trans,
               m,
               k,
               alpha,
               a, a_rs, a_cs,
               beta,
               c, c_rs, c_cs );
}
void bl1_dsymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_d0(), bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcopymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dscalm(), bl1_dsymm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by bl1_dhemm(), FLA_Hemm_external(), and FLA_Symm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    double*   a_save    = a;
    double*   b_save    = b;
    double*   c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    double    zero = bl1_d0();
    double    one  = bl1_d1();
    double*   b_copy;
    double*   c_trans;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       ldb_copy, incb_copy;
    int       ldc_trans, incc_trans;
    int       symm_needs_copyb  = FALSE;
    int       symm_needs_transb = FALSE;
    int       symm_needs_axpyt  = FALSE;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_dcreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_dcreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_dcreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;
    
    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_c ) * B_c
                // effective operation: C_c += uplo( A_c ) * B_c
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_c ) * B_r
                // effective operation: C_c += uplo( A_c ) * B_c
                symm_needs_copyb = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c +=  uplo( A_r ) * B_c
                // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
                bl1_swap_ints( lda, inca );

                bl1_toggle_uplo( uplo );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_r ) * B_r
                // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_side( side );
                bl1_toggle_uplo( uplo );

                symm_needs_axpyt = TRUE;
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_c ) * B_c
                // effective operation: C_c += ( uplo( A_c ) * B_c )^T
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );

                symm_needs_axpyt = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_c ) * B_r
                // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( ldb, incb );

                bl1_swap_ints( m, n );

                bl1_toggle_side( side );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_r ) * B_c
                // effective operation: C_c += B_c^T * ~uplo( A_c )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );

                bl1_swap_ints( m, n );

                bl1_toggle_side( side );
                bl1_toggle_uplo( uplo );

                symm_needs_copyb  = TRUE;
                symm_needs_transb = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_r ) * B_r
                // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_swap_ints( m, n );

                bl1_toggle_uplo( uplo );
                bl1_toggle_side( side );
            }
        }
    }

    // We need a temporary matrix for the cases where B needs to be copied.
    b_copy    = b;
    ldb_copy  = ldb;
    incb_copy = incb;
    
    // There are two cases where we need to make a copy of B: one where the
    // copy's dimensions are transposed from the original B, and one where
    // the dimensions are not swapped.
    if ( symm_needs_copyb )
    {
        trans1_t transb;

        // Set transb, which determines whether or not we need to copy from B
        // as if it needs a transposition. If a transposition is needed, then
        // m and n and have already been swapped. So in either case m
        // represents the leading dimension of the copy.
        if ( symm_needs_transb ) transb = BLIS1_TRANSPOSE;
        else                     transb = BLIS1_NO_TRANSPOSE;
        
        b_copy    = bl1_dallocm( m, n );
        ldb_copy  = m;
        incb_copy = 1;

        bl1_dcopymt( transb,
                     m,
                     n,
                     b,      incb,      ldb,
                     b_copy, incb_copy, ldb_copy );
    }

    // There are two cases where we need to perform the symm and then axpy
    // the result into C with a transposition. We handle those cases here.
    if ( symm_needs_axpyt )
    {
        // We need a temporary matrix for holding C^T. Notice that m and n
        // represent the dimensions of C, and thus C_trans is n-by-m
        // (interpreting both as column-major matrices). So the leading
        // dimension of the temporary matrix holding C^T is n.
        c_trans    = bl1_dallocm( n, m );
        ldc_trans  = n;
        incc_trans = 1;

        // Compute A * B (or B * A) and store the result in C_trans.
        // Note that there is no overlap between the axpyt cases and
        // the conja/copyb cases, hence the use of a, b, lda, and ldb.
        bl1_dsymm_blas( side,
                        uplo,
                        n,
                        m,
                        alpha,
                        a,       lda,
                        b,       ldb,
                        &zero,
                        c_trans, ldc_trans );

        // Scale C by beta.
        bl1_dscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, incc, ldc );
        
        // And finally, accumulate the matrix product in C_trans into C
        // with a transpose.
        bl1_daxpymt( BLIS1_TRANSPOSE,
                     m,
                     n,
                     &one,
                     c_trans, incc_trans, ldc_trans,
                     c,       incc,       ldc );

        // Free the temporary matrix for C.
        bl1_dfree( c_trans );
    }
    else // no extra axpyt step needed
    {
        bl1_dsymm_blas( side,
                        uplo,
                        m,
                        n,
                        alpha,
                        a,      lda,
                        b_copy, ldb_copy,
                        beta,
                        c,      ldc );
    }

    if ( symm_needs_copyb )
        bl1_dfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_dfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_dfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_dsymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
double *  alpha,
double *  a,
int  lda,
double *  b,
int  ldb,
double *  beta,
double *  c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_dsymm(), CblasColMajor, and F77_dsymm().

Referenced by bl1_dsymm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );

    cblas_dsymm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 m,
                 n,
                 *alpha,
                 a, lda,
                 b, ldb,
                 *beta,
                 c, ldc );
#else
    char blas_side;
    char blas_uplo;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );

    F77_dsymm( &blas_side,
               &blas_uplo,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb,
               beta,
               c, &ldc );
#endif
}
void bl1_dsyr2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_dallocm(), bl1_dcopymt(), bl1_dcreate_contigmr(), bl1_dcreate_contigmt(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigmr(), bl1_dsyr2k_blas(), bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dher2k(), FLA_Her2k_external(), and FLA_Syr2k_external().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    double*   a_save    = a;
    double*   b_save    = b;
    double*   c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    double*   a_copy;
    double*   b_copy;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       lda_copy, inca_copy;
    int       ldb_copy, incb_copy;
    int       syr2k_needs_copya = FALSE;
    int       syr2k_needs_copyb = FALSE;

    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_dcreate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_dcreate_contigmt( trans,
                          m,
                          k,
                          b_save, b_rs_save, b_cs_save,
                          &b,     &b_rs,     &b_cs );

    bl1_dcreate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                syr2k_needs_copyb = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                syr2k_needs_copya = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
                // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_trans( trans );
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_c * B_c' + B_c * A_c'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_c * B_r' + B_r * A_c'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                syr2k_needs_copyb = TRUE;

                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_r * B_c' + B_c * A_r'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                syr2k_needs_copya = TRUE;

                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_r * B_r' + B_r * A_r'
                // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_uplo( uplo );
                bl1_toggle_trans( trans );
            }
        }
    }

    a_copy    = a;
    lda_copy  = lda;
    inca_copy = inca;
    
    // There are two cases where we need to copy A column-major storage.
    // We handle those two cases here.
    if ( syr2k_needs_copya )
    {
        int m_a;
        int n_a;

        // Determine the dimensions of A according to the value of trans. We
        // need this in order to set the leading dimension of the copy of A.
        bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );

        // We need a temporary matrix to hold a column-major copy of A.
        a_copy    = bl1_dallocm( m, k );
        lda_copy  = m_a;
        inca_copy = 1;

        // Copy the contents of A into A_copy.
        bl1_dcopymt( BLIS1_NO_TRANSPOSE,
                     m_a,
                     n_a,
                     a,      inca,      lda,
                     a_copy, inca_copy, lda_copy );
    }
    
    b_copy    = b;
    ldb_copy  = ldb;
    incb_copy = incb;

    // There are two cases where we need to copy B column-major storage.
    // We handle those two cases here.
    if ( syr2k_needs_copyb )
    {
        int m_b;
        int n_b;

        // Determine the dimensions of B according to the value of trans. We
        // need this in order to set the leading dimension of the copy of B.
        bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );

        // We need a temporary matrix to hold a column-major copy of B.
        b_copy    = bl1_dallocm( m, k );
        ldb_copy  = m_b;
        incb_copy = 1;

        // Copy the contents of B into B_copy.
        bl1_dcopymt( BLIS1_NO_TRANSPOSE,
                     m_b,
                     n_b,
                     b,      incb,      ldb,
                     b_copy, incb_copy, ldb_copy );
    }

    bl1_dsyr2k_blas( uplo,
                     trans,
                     m,
                     k,
                     alpha,
                     a_copy, lda_copy,
                     b_copy, ldb_copy,
                     beta,
                     c, ldc );

    if ( syr2k_needs_copya )
        bl1_dfree( a_copy );

    if ( syr2k_needs_copyb )
        bl1_dfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_dfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_dfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_dsyr2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  lda,
double *  b,
int  ldb,
double *  beta,
double *  c,
int  ldc 
)

References bl1_is_conjtrans(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), BLIS1_TRANSPOSE, cblas_dsyr2k(), CblasColMajor, and F77_dsyr2k().

Referenced by bl1_dsyr2k().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
    // so we have to map it down to regular transposition.
    if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_dsyr2k( cblas_order,
                  cblas_uplo,
                  cblas_trans,
                  m,
                  k,
                  *alpha,
                  a, lda,
                  b, ldb,
                  *beta,
                  c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
    // so we have to map it down to regular transposition.
    if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_dsyr2k( &blas_uplo,
                &blas_trans,
                &m,
                &k,
                alpha,
                a, &lda,
                b, &ldb,
                beta,
                c, &ldc );
#endif
}
void bl1_dsyrk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_dcreate_contigmr(), bl1_dcreate_contigmt(), bl1_dfree_contigm(), bl1_dfree_saved_contigmr(), bl1_dsyrk_blas(), bl1_is_col_storage(), and bl1_zero_dim2().

Referenced by bl1_dherk(), FLA_Herk_external(), FLA_Syrk_external(), and FLA_UDdate_UT_opd_var1().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    double*   a_save    = a;
    double*   c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    int       lda, inca;
    int       ldc, incc;

    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_dcreate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_dcreate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_c * A_c^T
            // effective operation: uplo( C_c ) += A_c * A_c^T
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_r * A_r^T
            // effective operation: uplo( C_c ) += A_c^T * A_c
            bl1_swap_ints( lda, inca );

            bl1_toggle_trans( trans );
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_c * A_c^T
            // effective operation: ~uplo( C_c ) += A_c * A_c^T
            bl1_swap_ints( ldc, incc );

            bl1_toggle_uplo( uplo );
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_r * A_r^T
            // effective operation: ~uplo( C_c ) += A_c^T * A_c
            bl1_swap_ints( ldc, incc );
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_trans( trans );
        }
    }

    bl1_dsyrk_blas( uplo,
                    trans,
                    m,
                    k,
                    alpha,
                    a, lda,
                    beta,
                    c, ldc );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_dfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_dsyrk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  lda,
double *  beta,
double *  c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_dsyrk(), CblasColMajor, and F77_dsyrk().

Referenced by bl1_dsyrk().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_dsyrk( cblas_order,
                 cblas_uplo,
                 cblas_trans,
                 m,
                 k,
                 *alpha,
                 a, lda,
                 *beta,
                 c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_dsyrk( &blas_uplo,
               &blas_trans,
               &m,
               &k,
               alpha,
               a, &lda,
               beta,
               c, &ldc );
#endif
}
void bl1_dtrmm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dtrmm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), and bl1_zero_dim2().

Referenced by bl1_dtrmmsx(), and FLA_Trmm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    double*   a_save    = a;
    double*   b_save    = b;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_dcreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_dcreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr( uplo( A_c ) ) * B_c
            // effective operation: B_c := tr( uplo( A_c ) ) * B_c
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr(  uplo( A_r ) )   * B_c
            // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_trans( trans );
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_c ) ) * B_r
            // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
            bl1_swap_ints( ldb, incb );

            bl1_swap_ints( m, n );

            bl1_toggle_side( side );
            bl1_toggle_trans( trans );
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_r ) ) * B_r
            // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
            bl1_swap_ints( ldb, incb );
            bl1_swap_ints( lda, inca );

            bl1_swap_ints( m, n );

            bl1_toggle_uplo( uplo );
            bl1_toggle_side( side );
        }
    }

    bl1_dtrmm_blas( side,
                    uplo,
                    trans,
                    diag,
                    m,
                    n,
                    alpha,
                    a, lda,
                    b, ldb );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_dfree_saved_contigm( m_save,
                             n_save,
                             b_save, b_rs_save, b_cs_save,
                             &b,     &b_rs,     &b_cs );
}
void bl1_dtrmm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double *  alpha,
double *  a,
int  lda,
double *  b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_dtrmm(), CblasColMajor, and F77_dtrmm().

Referenced by bl1_dtrmm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;
    enum CBLAS_DIAG  cblas_diag;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );
    bl1_param_map_to_netlib_diag( diag, &cblas_diag );

    cblas_dtrmm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 cblas_trans,
                 cblas_diag,
                 m,
                 n,
                 *alpha,
                 a, lda,
                 b, ldb );
#else
    char blas_side;
    char blas_uplo;
    char blas_trans;
    char blas_diag;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );
    bl1_param_map_to_netlib_diag( diag, &blas_diag );

    F77_dtrmm( &blas_side,
               &blas_uplo,
               &blas_trans,
               &blas_diag,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb );
#endif
}
void bl1_dtrmmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcopymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dscalm(), bl1_dtrmm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trmmsx_external().

{
    int       m_save    = m;
    int       n_save    = n;
    double*   a_save    = a;
    double*   b_save    = b;
    double*   c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    double    one = bl1_d1();
    double*   b_copy;
    int       dim_a;
    int       b_copy_rs, b_copy_cs;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_dcreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_dcreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_dcreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Create a copy of B to use in the computation so the original matrix is
    // left untouched.
    b_copy = bl1_dallocm( m, n );

    // Match the strides of B_copy to that of B.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        b_copy_rs = 1;
        b_copy_cs = m;
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        b_copy_rs = n;
        b_copy_cs = 1;
    }

    // Copy the contents of B to B_copy.
    bl1_dcopymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 b,      b_rs,      b_cs,
                 b_copy, b_copy_rs, b_copy_cs );
    
    // Perform the operation on B_copy.
    bl1_dtrmm( side,
               uplo,
               trans,
               diag,
               m,
               n,
               alpha,
               a,      a_rs,      a_cs,
               b_copy, b_copy_rs, b_copy_cs );

    // Scale C by beta.
    bl1_dscalm( BLIS1_NO_CONJUGATE,
                m,
                n,
                beta,
                c, c_rs, c_cs );

    // Add B_copy into C.
    bl1_daxpymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 &one,
                 b_copy, b_copy_rs, b_copy_cs,
                 c,      c_rs,      c_cs );

    // Free the copy of B.
    bl1_dfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_dfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_dfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_dtrsm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dtrsm_blas(), bl1_is_col_storage(), bl1_set_dim_with_side(), and bl1_zero_dim2().

Referenced by bl1_dtrsmsx(), FLA_LU_nopiv_opd_var1(), FLA_LU_nopiv_opd_var2(), FLA_LU_nopiv_opd_var3(), FLA_LU_piv_opd_var3(), and FLA_Trsm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    double*   a_save    = a;
    double*   b_save    = b;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_dcreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_dcreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr( uplo( A_c ) ) * B_c
            // effective operation: B_c := tr( uplo( A_c ) ) * B_c
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr(  uplo( A_r ) )   * B_c
            // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_trans( trans );
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_c ) ) * B_r
            // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
            bl1_swap_ints( ldb, incb );

            bl1_swap_ints( m, n );

            bl1_toggle_side( side );
            bl1_toggle_trans( trans );
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_r ) ) * B_r
            // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
            bl1_swap_ints( ldb, incb );
            bl1_swap_ints( lda, inca );

            bl1_swap_ints( m, n );

            bl1_toggle_uplo( uplo );
            bl1_toggle_side( side );
        }
    }

    bl1_dtrsm_blas( side,
                    uplo,
                    trans,
                    diag,
                    m,
                    n,
                    alpha,
                    a, lda,
                    b, ldb );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_dfree_saved_contigm( m_save,
                             n_save,
                             b_save, b_rs_save, b_cs_save,
                             &b,     &b_rs,     &b_cs );
}
void bl1_dtrsm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double *  alpha,
double *  a,
int  lda,
double *  b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_dtrsm(), CblasColMajor, and F77_dtrsm().

Referenced by bl1_dtrsm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;
    enum CBLAS_DIAG  cblas_diag;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );
    bl1_param_map_to_netlib_diag( diag, &cblas_diag );

    cblas_dtrsm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 cblas_trans,
                 cblas_diag,
                 m,
                 n,
                 *alpha,
                 a, lda,
                 b, ldb );
#else
    char blas_side;
    char blas_uplo;
    char blas_trans;
    char blas_diag;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );
    bl1_param_map_to_netlib_diag( diag, &blas_diag );

    F77_dtrsm( &blas_side,
               &blas_uplo,
               &blas_trans,
               &blas_diag,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb );
#endif
}
void bl1_dtrsmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)

References bl1_d1(), bl1_dallocm(), bl1_daxpymt(), bl1_dcopymt(), bl1_dcreate_contigm(), bl1_dcreate_contigmr(), bl1_dfree(), bl1_dfree_contigm(), bl1_dfree_saved_contigm(), bl1_dscalm(), bl1_dtrsm(), bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trsmsx_external().

{
    int       m_save    = m;
    int       n_save    = n;
    double*   a_save    = a;
    double*   b_save    = b;
    double*   c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    double    one = bl1_d1();
    double*   b_copy;
    int       dim_a;
    int       b_copy_rs, b_copy_cs;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_dcreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_dcreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_dcreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Create a copy of B to use in the computation so the original matrix is
    // left untouched.
    b_copy = bl1_dallocm( m, n );

    // Match the strides of B_copy to that of B.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        b_copy_rs = 1;
        b_copy_cs = m;
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        b_copy_rs = n;
        b_copy_cs = 1;
    }

    // Copy the contents of B to B_copy.
    bl1_dcopymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 b,      b_rs,      b_cs,
                 b_copy, b_copy_rs, b_copy_cs );
    
    // Perform the operation on B_copy.
    bl1_dtrsm( side,
               uplo,
               trans,
               diag,
               m,
               n,
               alpha,
               a,      a_rs,      a_cs,
               b_copy, b_copy_rs, b_copy_cs );

    // Scale C by beta.
    bl1_dscalm( BLIS1_NO_CONJUGATE,
                m,
                n,
                beta,
                c, c_rs, c_cs );

    // Add B_copy into C.
    bl1_daxpymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 &one,
                 b_copy, b_copy_rs, b_copy_cs,
                 c,      c_rs,      c_cs );

    // Free the copy of B.
    bl1_dfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_dfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_dfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_sgemm ( trans1_t  transa,
trans1_t  transb,
int  m,
int  k,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_s0(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_screate_contigm(), bl1_screate_contigmt(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sgemm_blas(), bl1_sscalm(), bl1_zero_dim3(), BLIS1_NO_CONJUGATE, and BLIS1_TRANSPOSE.

Referenced by FLA_Gemm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    float*    a_save    = a;
    float*    b_save    = b;
    float*    c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    float     zero = bl1_s0();
    float     one  = bl1_s1();
    float*    a_unswap;
    float*    b_unswap;
    float*    c_trans;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       ldc_trans, incc_trans;
    int       m_gemm, n_gemm;
    int       gemm_needs_axpyt = FALSE;

    // Return early if possible.
    if ( bl1_zero_dim3( m, k, n ) )
    {
        bl1_sscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, c_rs, c_cs );
        return;
    }

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_screate_contigmt( transa,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_screate_contigmt( transb,
                          k,
                          n,
                          b_save, b_rs_save, b_cs_save,
                          &b,     &b_rs,     &b_cs );

    bl1_screate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // These are used to track the original values of a and b prior to any
    // operand swapping that might take place. This is necessary for proper
    // freeing of memory when one is a temporary contiguous matrix.
    a_unswap = a;
    b_unswap = b;

    // These are used to track the dimensions of the product of the
    // A and B operands to the BLAS invocation of gemm. These differ
    // from m and n when the operands need to be swapped.
    m_gemm = m;
    n_gemm = n;

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += tr( A_c ) * tr( B_c )
                // effective operation: C_c += tr( A_c ) * tr( B_c )
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                
                // requested operation: C_c += tr( A_c ) * tr( B_r )
                // effective operation: C_c += tr( A_c ) * tr( B_c )^T
                bl1_swap_ints( ldb, incb );

                bl1_toggle_trans( transb );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += tr( A_r )   * tr( B_c )
                // effective operation: C_c += tr( A_r )^T * tr( B_c )
                bl1_swap_ints( lda, inca );

                bl1_toggle_trans( transa );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c +=   tr( A_r ) * tr( B_r )
                // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_sswap_pointers( a, b );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );

                gemm_needs_axpyt = TRUE;
                bl1_swap_ints( m_gemm, n_gemm );
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r +=   tr( A_c ) * tr( B_c )
                // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );

                gemm_needs_axpyt = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += tr( A_c ) * tr( B_r )
                // effective operation: C_c += tr( B_c ) * tr( A_c )^T
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_trans( transa );

                bl1_swap_ints( m, n );
                bl1_swap_ints( m_gemm, n_gemm );
                bl1_sswap_pointers( a, b );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += tr( A_r )   * tr( B_c )
                // effective operation: C_c += tr( B_c )^T * tr( A_c )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );

                bl1_toggle_trans( transb );

                bl1_swap_ints( m, n );
                bl1_swap_ints( m_gemm, n_gemm );
                bl1_sswap_pointers( a, b );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += tr( A_r ) * tr( B_r )
                // effective operation: C_c += tr( B_c ) * tr( A_c )
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );
                bl1_swap_ints( m_gemm, n_gemm );
                bl1_sswap_pointers( a, b );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );
            }
        }
    }

    // There are two cases where we need to perform the gemm and then axpy
    // the result into C with a transposition. We handle those cases here.
    if ( gemm_needs_axpyt )
    {
        // We need a temporary matrix for holding C^T. Notice that m and n
        // represent the dimensions of C, while m_gemm and n_gemm are the
        // dimensions of the actual product op(A)*op(B), which may be n-by-m
        // since the operands may have been swapped.
        c_trans    = bl1_sallocm( m_gemm, n_gemm );
        ldc_trans  = m_gemm;
        incc_trans = 1;

        // Compute tr( A ) * tr( B ), where A and B may have been swapped
        // to reference the other, and store the result in C_trans.
        bl1_sgemm_blas( transa,
                        transb,
                        m_gemm,
                        n_gemm,
                        k,
                        alpha,
                        a,       lda,
                        b,       ldb,
                        &zero,
                        c_trans, ldc_trans );

        // Scale C by beta.
        bl1_sscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, incc, ldc );
        
        // And finally, accumulate the matrix product in C_trans into C
        // with a transpose.
        bl1_saxpymt( BLIS1_TRANSPOSE,
                     m,
                     n,
                     &one,
                     c_trans, incc_trans, ldc_trans,
                     c,       incc,       ldc );

        // Free the temporary matrix for C.
        bl1_sfree( c_trans );
    }
    else // no extra axpyt step needed
    {
        bl1_sgemm_blas( transa,
                        transb,
                        m_gemm,
                        n_gemm,
                        k,
                        alpha,
                        a, lda,
                        b, ldb,
                        beta,
                        c, ldc );
    }

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_sfree_contigm( a_save,    a_rs_save, a_cs_save,
                       &a_unswap, &a_rs,     &a_cs );

    bl1_sfree_contigm( b_save,    b_rs_save, b_cs_save,
                       &b_unswap, &b_rs,     &b_cs );

    bl1_sfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_sgemm_blas ( trans1_t  transa,
trans1_t  transb,
int  m,
int  n,
int  k,
float *  alpha,
float *  a,
int  lda,
float *  b,
int  ldb,
float *  beta,
float *  c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), cblas_sgemm(), CblasColMajor, and F77_sgemm().

Referenced by bl1_sgemm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_TRANSPOSE cblas_transa;
    enum CBLAS_TRANSPOSE cblas_transb;

    bl1_param_map_to_netlib_trans( transa, &cblas_transa );
    bl1_param_map_to_netlib_trans( transb, &cblas_transb );

    cblas_sgemm( cblas_order,
                 cblas_transa,
                 cblas_transb,
                 m,
                 n,
                 k,
                 *alpha,
                 a, lda,
                 b, ldb,
                 *beta,
                 c, ldc );
#else
    char blas_transa;
    char blas_transb;

    bl1_param_map_to_netlib_trans( transa, &blas_transa );
    bl1_param_map_to_netlib_trans( transb, &blas_transb );

    F77_sgemm( &blas_transa,
               &blas_transb,
               &m,
               &n,
               &k,
               alpha,
               a, &lda,
               b, &ldb,
               beta,
               c, &ldc );
#endif
}
void bl1_shemm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_ssymm().

{
    bl1_ssymm( side,
               uplo,
               m,
               n,
               alpha,
               a, a_rs, a_cs,
               b, b_rs, b_cs,
               beta,
               c, c_rs, c_cs );
}
void bl1_sher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_ssyr2k().

{
    bl1_ssyr2k( uplo,
                trans,
                m,
                k,
                alpha,
                a, a_rs, a_cs,
                b, b_rs, b_cs,
                beta,
                c, c_rs, c_cs );
}
void bl1_sherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_ssyrk().

{
    bl1_ssyrk( uplo,
               trans,
               m,
               k,
               alpha,
               a, a_rs, a_cs,
               beta,
               c, c_rs, c_cs );
}
void bl1_ssymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_s0(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_scopymt(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sscalm(), bl1_ssymm_blas(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by bl1_shemm(), FLA_Hemm_external(), and FLA_Symm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    float*    a_save    = a;
    float*    b_save    = b;
    float*    c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    float     zero = bl1_s0();
    float     one  = bl1_s1();
    float*    b_copy;
    float*    c_trans;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       ldb_copy, incb_copy;
    int       ldc_trans, incc_trans;
    int       symm_needs_copyb  = FALSE;
    int       symm_needs_transb = FALSE;
    int       symm_needs_axpyt  = FALSE;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_screate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_screate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_screate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;
    
    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_c ) * B_c
                // effective operation: C_c += uplo( A_c ) * B_c
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_c ) * B_r
                // effective operation: C_c += uplo( A_c ) * B_c
                symm_needs_copyb = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c +=  uplo( A_r ) * B_c
                // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
                bl1_swap_ints( lda, inca );

                bl1_toggle_uplo( uplo );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_r ) * B_r
                // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_side( side );
                bl1_toggle_uplo( uplo );

                symm_needs_axpyt = TRUE;
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_c ) * B_c
                // effective operation: C_c += ( uplo( A_c ) * B_c )^T
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );

                symm_needs_axpyt = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_c ) * B_r
                // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( ldb, incb );

                bl1_swap_ints( m, n );

                bl1_toggle_side( side );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_r ) * B_c
                // effective operation: C_c += B_c^T * ~uplo( A_c )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );

                bl1_swap_ints( m, n );

                bl1_toggle_side( side );
                bl1_toggle_uplo( uplo );

                symm_needs_copyb  = TRUE;
                symm_needs_transb = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_r ) * B_r
                // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_swap_ints( m, n );

                bl1_toggle_uplo( uplo );
                bl1_toggle_side( side );
            }
        }
    }

    // We need a temporary matrix for the cases where B needs to be copied.
    b_copy    = b;
    ldb_copy  = ldb;
    incb_copy = incb;
    
    // There are two cases where we need to make a copy of B: one where the
    // copy's dimensions are transposed from the original B, and one where
    // the dimensions are not swapped.
    if ( symm_needs_copyb )
    {
        trans1_t transb;

        // Set transb, which determines whether or not we need to copy from B
        // as if it needs a transposition. If a transposition is needed, then
        // m and n and have already been swapped. So in either case m
        // represents the leading dimension of the copy.
        if ( symm_needs_transb ) transb = BLIS1_TRANSPOSE;
        else                     transb = BLIS1_NO_TRANSPOSE;
        
        b_copy    = bl1_sallocm( m, n );
        ldb_copy  = m;
        incb_copy = 1;

        bl1_scopymt( transb,
                     m,
                     n,
                     b,      incb,      ldb,
                     b_copy, incb_copy, ldb_copy );
    }

    // There are two cases where we need to perform the symm and then axpy
    // the result into C with a transposition. We handle those cases here.
    if ( symm_needs_axpyt )
    {
        // We need a temporary matrix for holding C^T. Notice that m and n
        // represent the dimensions of C, and thus C_trans is n-by-m
        // (interpreting both as column-major matrices). So the leading
        // dimension of the temporary matrix holding C^T is n.
        c_trans    = bl1_sallocm( n, m );
        ldc_trans  = n;
        incc_trans = 1;

        // Compute A * B (or B * A) and store the result in C_trans.
        // Note that there is no overlap between the axpyt cases and
        // the conja/copyb cases, hence the use of a, b, lda, and ldb.
        bl1_ssymm_blas( side,
                        uplo,
                        n,
                        m,
                        alpha,
                        a,       lda,
                        b,       ldb,
                        &zero,
                        c_trans, ldc_trans );

        // Scale C by beta.
        bl1_sscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, incc, ldc );
        
        // And finally, accumulate the matrix product in C_trans into C
        // with a transpose.
        bl1_saxpymt( BLIS1_TRANSPOSE,
                     m,
                     n,
                     &one,
                     c_trans, incc_trans, ldc_trans,
                     c,       incc,       ldc );

        // Free the temporary matrix for C.
        bl1_sfree( c_trans );
    }
    else // no extra axpyt step needed
    {
        bl1_ssymm_blas( side,
                        uplo,
                        m,
                        n,
                        alpha,
                        a,      lda,
                        b_copy, ldb_copy,
                        beta,
                        c,      ldc );
    }

    if ( symm_needs_copyb )
        bl1_sfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_sfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_sfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_ssymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
float *  alpha,
float *  a,
int  lda,
float *  b,
int  ldb,
float *  beta,
float *  c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_ssymm(), CblasColMajor, and F77_ssymm().

Referenced by bl1_ssymm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );

    cblas_ssymm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 m,
                 n,
                 *alpha,
                 a, lda,
                 b, ldb,
                 *beta,
                 c, ldc );
#else
    char blas_side;
    char blas_uplo;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );

    F77_ssymm( &blas_side,
               &blas_uplo,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb,
               beta,
               c, &ldc );
#endif
}
void bl1_ssyr2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_sallocm(), bl1_scopymt(), bl1_screate_contigmr(), bl1_screate_contigmt(), bl1_set_dims_with_trans(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigmr(), bl1_ssyr2k_blas(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_sher2k(), FLA_Her2k_external(), and FLA_Syr2k_external().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    float*    a_save    = a;
    float*    b_save    = b;
    float*    c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    float*    a_copy;
    float*    b_copy;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       lda_copy, inca_copy;
    int       ldb_copy, incb_copy;
    int       syr2k_needs_copya = FALSE;
    int       syr2k_needs_copyb = FALSE;

    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_screate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_screate_contigmt( trans,
                          m,
                          k,
                          b_save, b_rs_save, b_cs_save,
                          &b,     &b_rs,     &b_cs );

    bl1_screate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                syr2k_needs_copyb = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                syr2k_needs_copya = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
                // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_trans( trans );
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_c * B_c' + B_c * A_c'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_c * B_r' + B_r * A_c'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                syr2k_needs_copyb = TRUE;

                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_r * B_c' + B_c * A_r'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                syr2k_needs_copya = TRUE;

                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_r * B_r' + B_r * A_r'
                // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_uplo( uplo );
                bl1_toggle_trans( trans );
            }
        }
    }

    a_copy    = a;
    lda_copy  = lda;
    inca_copy = inca;
    
    // There are two cases where we need to copy A column-major storage.
    // We handle those two cases here.
    if ( syr2k_needs_copya )
    {
        int m_a;
        int n_a;

        // Determine the dimensions of A according to the value of trans. We
        // need this in order to set the leading dimension of the copy of A.
        bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );

        // We need a temporary matrix to hold a column-major copy of A.
        a_copy    = bl1_sallocm( m, k );
        lda_copy  = m_a;
        inca_copy = 1;

        // Copy the contents of A into A_copy.
        bl1_scopymt( BLIS1_NO_TRANSPOSE,
                     m_a,
                     n_a,
                     a,      inca,      lda,
                     a_copy, inca_copy, lda_copy );
    }
    
    b_copy    = b;
    ldb_copy  = ldb;
    incb_copy = incb;

    // There are two cases where we need to copy B column-major storage.
    // We handle those two cases here.
    if ( syr2k_needs_copyb )
    {
        int m_b;
        int n_b;

        // Determine the dimensions of B according to the value of trans. We
        // need this in order to set the leading dimension of the copy of B.
        bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );

        // We need a temporary matrix to hold a column-major copy of B.
        b_copy    = bl1_sallocm( m, k );
        ldb_copy  = m_b;
        incb_copy = 1;

        // Copy the contents of B into B_copy.
        bl1_scopymt( BLIS1_NO_TRANSPOSE,
                     m_b,
                     n_b,
                     b,      incb,      ldb,
                     b_copy, incb_copy, ldb_copy );
    }

    bl1_ssyr2k_blas( uplo,
                     trans,
                     m,
                     k,
                     alpha,
                     a_copy, lda_copy,
                     b_copy, ldb_copy,
                     beta,
                     c, ldc );

    if ( syr2k_needs_copya )
        bl1_sfree( a_copy );

    if ( syr2k_needs_copyb )
        bl1_sfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_sfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_sfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_ssyr2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  lda,
float *  b,
int  ldb,
float *  beta,
float *  c,
int  ldc 
)

References bl1_is_conjtrans(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), BLIS1_TRANSPOSE, cblas_ssyr2k(), CblasColMajor, and F77_ssyr2k().

Referenced by bl1_ssyr2k().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
    // so we have to map it down to regular transposition.
    if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_ssyr2k( cblas_order,
                  cblas_uplo,
                  cblas_trans,
                  m,
                  k,
                  *alpha,
                  a, lda,
                  b, ldb,
                  *beta,
                  c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
    // so we have to map it down to regular transposition.
    if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_ssyr2k( &blas_uplo,
                &blas_trans,
                &m,
                &k,
                alpha,
                a, &lda,
                b, &ldb,
                beta,
                c, &ldc );
#endif
}
void bl1_ssyrk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_screate_contigmr(), bl1_screate_contigmt(), bl1_sfree_contigm(), bl1_sfree_saved_contigmr(), bl1_ssyrk_blas(), and bl1_zero_dim2().

Referenced by bl1_sherk(), FLA_Herk_external(), FLA_Syrk_external(), and FLA_UDdate_UT_ops_var1().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    float*    a_save    = a;
    float*    c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    int       lda, inca;
    int       ldc, incc;

    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_screate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_screate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_c * A_c^T
            // effective operation: uplo( C_c ) += A_c * A_c^T
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_r * A_r^T
            // effective operation: uplo( C_c ) += A_c^T * A_c
            bl1_swap_ints( lda, inca );

            bl1_toggle_trans( trans );
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_c * A_c^T
            // effective operation: ~uplo( C_c ) += A_c * A_c^T
            bl1_swap_ints( ldc, incc );

            bl1_toggle_uplo( uplo );
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_r * A_r^T
            // effective operation: ~uplo( C_c ) += A_c^T * A_c
            bl1_swap_ints( ldc, incc );
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_trans( trans );
        }
    }

    bl1_ssyrk_blas( uplo,
                    trans,
                    m,
                    k,
                    alpha,
                    a, lda,
                    beta,
                    c, ldc );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_sfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_ssyrk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  lda,
float *  beta,
float *  c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ssyrk(), CblasColMajor, and F77_ssyrk().

Referenced by bl1_ssyrk().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_ssyrk( cblas_order,
                 cblas_uplo,
                 cblas_trans,
                 m,
                 k,
                 *alpha,
                 a, lda,
                 *beta,
                 c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_ssyrk( &blas_uplo,
               &blas_trans,
               &m,
               &k,
               alpha,
               a, &lda,
               beta,
               c, &ldc );
#endif
}
void bl1_strmm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_is_col_storage(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_strmm_blas(), and bl1_zero_dim2().

Referenced by bl1_strmmsx(), and FLA_Trmm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    float*    a_save    = a;
    float*    b_save    = b;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_screate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_screate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr( uplo( A_c ) ) * B_c
            // effective operation: B_c := tr( uplo( A_c ) ) * B_c
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr(  uplo( A_r ) )   * B_c
            // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_trans( trans );
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_c ) ) * B_r
            // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
            bl1_swap_ints( ldb, incb );

            bl1_swap_ints( m, n );

            bl1_toggle_side( side );
            bl1_toggle_trans( trans );
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_r ) ) * B_r
            // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
            bl1_swap_ints( ldb, incb );
            bl1_swap_ints( lda, inca );

            bl1_swap_ints( m, n );

            bl1_toggle_uplo( uplo );
            bl1_toggle_side( side );
        }
    }

    bl1_strmm_blas( side,
                    uplo,
                    trans,
                    diag,
                    m,
                    n,
                    alpha,
                    a, lda,
                    b, ldb );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_sfree_saved_contigm( m_save,
                             n_save,
                             b_save, b_rs_save, b_cs_save,
                             &b,     &b_rs,     &b_cs );
}
void bl1_strmm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float *  alpha,
float *  a,
int  lda,
float *  b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_strmm(), CblasColMajor, and F77_strmm().

Referenced by bl1_strmm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;
    enum CBLAS_DIAG  cblas_diag;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );
    bl1_param_map_to_netlib_diag( diag, &cblas_diag );

    cblas_strmm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 cblas_trans,
                 cblas_diag,
                 m,
                 n,
                 *alpha,
                 a, lda,
                 b, ldb );
#else
    char blas_side;
    char blas_uplo;
    char blas_trans;
    char blas_diag;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );
    bl1_param_map_to_netlib_diag( diag, &blas_diag );

    F77_strmm( &blas_side,
               &blas_uplo,
               &blas_trans,
               &blas_diag,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb );
#endif
}
void bl1_strmmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_scopymt(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sscalm(), bl1_strmm(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trmmsx_external().

{
    int       m_save    = m;
    int       n_save    = n;
    float*    a_save    = a;
    float*    b_save    = b;
    float*    c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    float     one = bl1_s1();
    float*    b_copy;
    int       dim_a;
    int       b_copy_rs, b_copy_cs;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_screate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_screate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_screate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Create a copy of B to use in the computation so the original matrix is
    // left untouched.
    b_copy = bl1_sallocm( m, n );

    // Match the strides of B_copy to that of B.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        b_copy_rs = 1;
        b_copy_cs = m;
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        b_copy_rs = n;
        b_copy_cs = 1;
    }

    // Copy the contents of B to B_copy.
    bl1_scopymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 b,      b_rs,      b_cs,
                 b_copy, b_copy_rs, b_copy_cs );
    
    // Perform the operation on B_copy.
    bl1_strmm( side,
               uplo,
               trans,
               diag,
               m,
               n,
               alpha,
               a,      a_rs,      a_cs,
               b_copy, b_copy_rs, b_copy_cs );

    // Scale C by beta.
    bl1_sscalm( BLIS1_NO_CONJUGATE,
                m,
                n,
                beta,
                c, c_rs, c_cs );

    // Add B_copy into C.
    bl1_saxpymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 &one,
                 b_copy, b_copy_rs, b_copy_cs,
                 c,      c_rs,      c_cs );

    // Free the copy of B.
    bl1_sfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_sfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_sfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_strsm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bl1_is_col_storage(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_strsm_blas(), and bl1_zero_dim2().

Referenced by bl1_strsmsx(), FLA_LU_nopiv_ops_var1(), FLA_LU_nopiv_ops_var2(), FLA_LU_nopiv_ops_var3(), FLA_LU_piv_ops_var3(), and FLA_Trsm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    float*    a_save    = a;
    float*    b_save    = b;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_screate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_screate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr( uplo( A_c ) ) * B_c
            // effective operation: B_c := tr( uplo( A_c ) ) * B_c
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr(  uplo( A_r ) )   * B_c
            // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_trans( trans );
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_c ) ) * B_r
            // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
            bl1_swap_ints( ldb, incb );

            bl1_swap_ints( m, n );

            bl1_toggle_side( side );
            bl1_toggle_trans( trans );
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_r ) ) * B_r
            // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
            bl1_swap_ints( ldb, incb );
            bl1_swap_ints( lda, inca );

            bl1_swap_ints( m, n );

            bl1_toggle_uplo( uplo );
            bl1_toggle_side( side );
        }
    }

    bl1_strsm_blas( side,
                    uplo,
                    trans,
                    diag,
                    m,
                    n,
                    alpha,
                    a, lda,
                    b, ldb );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_sfree_saved_contigm( m_save,
                             n_save,
                             b_save, b_rs_save, b_cs_save,
                             &b,     &b_rs,     &b_cs );
}
void bl1_strsm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float *  alpha,
float *  a,
int  lda,
float *  b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_strsm(), CblasColMajor, and F77_strsm().

Referenced by bl1_strsm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;
    enum CBLAS_DIAG  cblas_diag;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );
    bl1_param_map_to_netlib_diag( diag, &cblas_diag );

    cblas_strsm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 cblas_trans,
                 cblas_diag,
                 m,
                 n,
                 *alpha,
                 a, lda,
                 b, ldb );
#else
    char blas_side;
    char blas_uplo;
    char blas_trans;
    char blas_diag;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );
    bl1_param_map_to_netlib_diag( diag, &blas_diag );

    F77_strsm( &blas_side,
               &blas_uplo,
               &blas_trans,
               &blas_diag,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb );
#endif
}
void bl1_strsmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_s1(), bl1_sallocm(), bl1_saxpymt(), bl1_scopymt(), bl1_screate_contigm(), bl1_screate_contigmr(), bl1_set_dim_with_side(), bl1_sfree(), bl1_sfree_contigm(), bl1_sfree_saved_contigm(), bl1_sscalm(), bl1_strsm(), bl1_zero_dim2(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trsmsx_external().

{
    int       m_save    = m;
    int       n_save    = n;
    float*    a_save    = a;
    float*    b_save    = b;
    float*    c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    float     one = bl1_s1();
    float*    b_copy;
    int       dim_a;
    int       b_copy_rs, b_copy_cs;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_screate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_screate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_screate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Create a copy of B to use in the computation so the original matrix is
    // left untouched.
    b_copy = bl1_sallocm( m, n );

    // Match the strides of B_copy to that of B.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        b_copy_rs = 1;
        b_copy_cs = m;
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        b_copy_rs = n;
        b_copy_cs = 1;
    }

    // Copy the contents of B to B_copy.
    bl1_scopymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 b,      b_rs,      b_cs,
                 b_copy, b_copy_rs, b_copy_cs );
    
    // Perform the operation on B_copy.
    bl1_strsm( side,
               uplo,
               trans,
               diag,
               m,
               n,
               alpha,
               a,      a_rs,      a_cs,
               b_copy, b_copy_rs, b_copy_cs );

    // Scale C by beta.
    bl1_sscalm( BLIS1_NO_CONJUGATE,
                m,
                n,
                beta,
                c, c_rs, c_cs );

    // Add B_copy into C.
    bl1_saxpymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 &one,
                 b_copy, b_copy_rs, b_copy_cs,
                 c,      c_rs,      c_cs );

    // Free the copy of B.
    bl1_sfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_sfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_sfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_zgemm ( trans1_t  transa,
trans1_t  transb,
int  m,
int  k,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_z0(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zconjm(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmt(), bl1_zero_dim3(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zgemm_blas(), bl1_zscalm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, and BLIS1_TRANSPOSE.

Referenced by FLA_Gemm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    dcomplex* a_save    = a;
    dcomplex* b_save    = b;
    dcomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    dcomplex  zero = bl1_z0();
    dcomplex  one  = bl1_z1();
    dcomplex* a_unswap;
    dcomplex* b_unswap;
    dcomplex* a_conj;
    dcomplex* b_conj;
    dcomplex* c_trans;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       lda_conj, inca_conj;
    int       ldb_conj, incb_conj;
    int       ldc_trans, incc_trans;
    int       m_gemm, n_gemm;
    int       gemm_needs_axpyt = FALSE;
    int       a_was_copied;
    int       b_was_copied;

    // Return early if possible.
    if ( bl1_zero_dim3( m, k, n ) )
    {
        bl1_zscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, c_rs, c_cs );
        return;
    }

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_zcreate_contigmt( transa,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_zcreate_contigmt( transb,
                          k,
                          n,
                          b_save, b_rs_save, b_cs_save,
                          &b,     &b_rs,     &b_cs );

    bl1_zcreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Figure out whether A and/or B was copied to contiguous memory. This
    // is used later to prevent redundant copying.
    a_was_copied = ( a != a_save );
    b_was_copied = ( b != b_save );

    // These are used to track the original values of a and b prior to any
    // operand swapping that might take place. This is necessary for proper
    // freeing of memory when one is a temporary contiguous matrix.
    a_unswap = a;
    b_unswap = b;

    // These are used to track the dimensions of the product of the
    // A and B operands to the BLAS invocation of gemm. These differ
    // from m and n when the operands need to be swapped.
    m_gemm = m;
    n_gemm = n;

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += tr( A_c ) * tr( B_c )
                // effective operation: C_c += tr( A_c ) * tr( B_c )
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                
                // requested operation: C_c += tr( A_c ) * tr( B_r )
                // effective operation: C_c += tr( A_c ) * tr( B_c )^T
                bl1_swap_ints( ldb, incb );

                bl1_toggle_trans( transb );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += tr( A_r )   * tr( B_c )
                // effective operation: C_c += tr( A_r )^T * tr( B_c )
                bl1_swap_ints( lda, inca );

                bl1_toggle_trans( transa );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c +=   tr( A_r ) * tr( B_r )
                // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_zswap_pointers( a, b );
                bl1_swap_ints( a_was_copied, b_was_copied );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );

                gemm_needs_axpyt = TRUE;
                bl1_swap_ints( m_gemm, n_gemm );
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r +=   tr( A_c ) * tr( B_c )
                // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );

                gemm_needs_axpyt = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += tr( A_c ) * tr( B_r )
                // effective operation: C_c += tr( B_c ) * tr( A_c )^T
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_trans( transa );

                bl1_swap_ints( m, n );
                bl1_swap_ints( m_gemm, n_gemm );
                bl1_zswap_pointers( a, b );
                bl1_swap_ints( a_was_copied, b_was_copied );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += tr( A_r )   * tr( B_c )
                // effective operation: C_c += tr( B_c )^T * tr( A_c )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );

                bl1_toggle_trans( transb );

                bl1_swap_ints( m, n );
                bl1_swap_ints( m_gemm, n_gemm );
                bl1_zswap_pointers( a, b );
                bl1_swap_ints( a_was_copied, b_was_copied );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += tr( A_r ) * tr( B_r )
                // effective operation: C_c += tr( B_c ) * tr( A_c )
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );
                bl1_swap_ints( m_gemm, n_gemm );
                bl1_zswap_pointers( a, b );
                bl1_swap_ints( a_was_copied, b_was_copied );
                bl1_swap_ints( lda, ldb );
                bl1_swap_ints( inca, incb );
                bl1_swap_trans( transa, transb );
            }
        }
    }

    // We need a temporary matrix for the case where A is conjugated.
    a_conj    = a;
    lda_conj  = lda;
    inca_conj = inca;

    // If transa indicates conjugate-no-transpose and A was not already
    // copied, then copy and conjugate it to a temporary matrix. Otherwise,
    // if transa indicates conjugate-no-transpose and A was already copied,
    // just conjugate it.
    if ( bl1_is_conjnotrans( transa ) && !a_was_copied )
    {
        a_conj    = bl1_zallocm( m_gemm, k );
        lda_conj  = m_gemm;
        inca_conj = 1;

        bl1_zcopymt( BLIS1_CONJ_NO_TRANSPOSE,
                     m_gemm,
                     k,
                     a,      inca,      lda,
                     a_conj, inca_conj, lda_conj );
    }
    else if ( bl1_is_conjnotrans( transa ) && a_was_copied )
    {
        bl1_zconjm( m_gemm,
                    k,
                    a_conj, inca_conj, lda_conj );
    }

    // We need a temporary matrix for the case where B is conjugated.
    b_conj    = b;
    ldb_conj  = ldb;
    incb_conj = incb;

    // If transb indicates conjugate-no-transpose and B was not already
    // copied, then copy and conjugate it to a temporary matrix. Otherwise,
    // if transb indicates conjugate-no-transpose and B was already copied,
    // just conjugate it.
    if ( bl1_is_conjnotrans( transb ) && !b_was_copied )
    {
        b_conj    = bl1_zallocm( k, n_gemm );
        ldb_conj  = k;
        incb_conj = 1;

        bl1_zcopymt( BLIS1_CONJ_NO_TRANSPOSE,
                     k,
                     n_gemm,
                     b,      incb,      ldb,
                     b_conj, incb_conj, ldb_conj );
    }
    else if ( bl1_is_conjnotrans( transb ) && b_was_copied )
    {
        bl1_zconjm( k,
                    n_gemm,
                    b_conj, incb_conj, ldb_conj );
    }

    // There are two cases where we need to perform the gemm and then axpy
    // the result into C with a transposition. We handle those cases here.
    if ( gemm_needs_axpyt )
    {
        // We need a temporary matrix for holding C^T. Notice that m and n
        // represent the dimensions of C, while m_gemm and n_gemm are the
        // dimensions of the actual product op(A)*op(B), which may be n-by-m
        // since the operands may have been swapped.
        c_trans    = bl1_zallocm( m_gemm, n_gemm );
        ldc_trans  = m_gemm;
        incc_trans = 1;

        // Compute tr( A ) * tr( B ), where A and B may have been swapped
        // to reference the other, and store the result in C_trans.
        bl1_zgemm_blas( transa,
                        transb,
                        m_gemm,
                        n_gemm,
                        k,
                        alpha,
                        a_conj,  lda_conj,
                        b_conj,  ldb_conj,
                        &zero,
                        c_trans, ldc_trans );

        // Scale C by beta.
        bl1_zscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, incc, ldc );
        
        // And finally, accumulate the matrix product in C_trans into C
        // with a transpose.
        bl1_zaxpymt( BLIS1_TRANSPOSE,
                     m,
                     n,
                     &one,
                     c_trans, incc_trans, ldc_trans,
                     c,       incc,       ldc );

        // Free the temporary matrix for C.
        bl1_zfree( c_trans );
    }
    else // no extra axpyt step needed
    {
        bl1_zgemm_blas( transa,
                        transb,
                        m_gemm,
                        n_gemm,
                        k,
                        alpha,
                        a_conj, lda_conj,
                        b_conj, ldb_conj,
                        beta,
                        c,      ldc );
    }

    if ( bl1_is_conjnotrans( transa ) && !a_was_copied )
        bl1_zfree( a_conj );

    if ( bl1_is_conjnotrans( transb ) && !b_was_copied )
        bl1_zfree( b_conj );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_zfree_contigm( a_save,    a_rs_save, a_cs_save,
                       &a_unswap, &a_rs,     &a_cs );

    bl1_zfree_contigm( b_save,    b_rs_save, b_cs_save,
                       &b_unswap, &b_rs,     &b_cs );

    bl1_zfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_zgemm_blas ( trans1_t  transa,
trans1_t  transb,
int  m,
int  n,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), cblas_zgemm(), CblasColMajor, and F77_zgemm().

Referenced by bl1_zgemm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_TRANSPOSE cblas_transa;
    enum CBLAS_TRANSPOSE cblas_transb;

    bl1_param_map_to_netlib_trans( transa, &cblas_transa );
    bl1_param_map_to_netlib_trans( transb, &cblas_transb );

    cblas_zgemm( cblas_order,
                 cblas_transa,
                 cblas_transb,
                 m,
                 n,
                 k,
                 alpha,
                 a, lda,
                 b, ldb,
                 beta,
                 c, ldc );
#else
    char blas_transa;
    char blas_transb;

    bl1_param_map_to_netlib_trans( transa, &blas_transa );
    bl1_param_map_to_netlib_trans( transb, &blas_transb );

    F77_zgemm( &blas_transa,
               &blas_transb,
               &m,
               &n,
               &k,
               alpha,
               a, &lda,
               b, &ldb,
               beta,
               c, &ldc );
#endif
}
void bl1_zhemm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z0(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zconjmr(), bl1_zcopymrt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zhemm_blas(), bl1_zscalm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Hemm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    dcomplex* a_save    = a;
    dcomplex* b_save    = b;
    dcomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    dcomplex  zero = bl1_z0();
    dcomplex  one  = bl1_z1();
    dcomplex* a_conj;
    dcomplex* b_copy;
    dcomplex* c_trans;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       lda_conj, inca_conj;
    int       ldb_copy, incb_copy;
    int       ldc_trans, incc_trans;
    int       hemm_needs_conja  = FALSE;
    int       hemm_needs_copyb  = FALSE;
    int       hemm_needs_transb = FALSE;
    int       hemm_needs_axpyt  = FALSE;
    int       a_was_copied;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_zcreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_zcreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_zcreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Figure out whether A was copied to contiguous memory. This is used to
    // prevent redundant copying.
    a_was_copied = ( a != a_save );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;
    
    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_c ) * B_c
                // effective operation: C_c += uplo( A_c ) * B_c
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_c ) * B_r
                // effective operation: C_c += uplo( A_c ) * B_c
                hemm_needs_copyb = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c +=  uplo( A_r ) * B_c
                // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
                bl1_swap_ints( lda, inca );

                bl1_toggle_uplo( uplo );

                hemm_needs_conja = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_r ) * B_r
                // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_side( side );
                bl1_toggle_uplo( uplo );

                hemm_needs_axpyt = TRUE;
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_c ) * B_c
                // effective operation: C_c += ( uplo( A_c ) * B_c )^T
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );

                hemm_needs_axpyt = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_c ) * B_r
                // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( ldb, incb );

                bl1_swap_ints( m, n );

                bl1_toggle_side( side );

                hemm_needs_conja = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_r ) * B_c
                // effective operation: C_c += B_c^T * ~uplo( A_c )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );

                bl1_swap_ints( m, n );

                bl1_toggle_side( side );
                bl1_toggle_uplo( uplo );

                hemm_needs_copyb  = TRUE;
                hemm_needs_transb = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_r ) * B_r
                // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_swap_ints( m, n );

                bl1_toggle_uplo( uplo );
                bl1_toggle_side( side );
            }
        }
    }

    // We need a temporary matrix for the cases where A is conjugated.
    a_conj    = a;
    lda_conj  = lda;
    inca_conj = inca;
    
    if ( hemm_needs_conja && !a_was_copied )
    {
        int dim_a;

        bl1_set_dim_with_side( side, m, n, &dim_a );
        
        a_conj    = bl1_zallocm( dim_a, dim_a );
        lda_conj  = dim_a;
        inca_conj = 1;

        bl1_zcopymrt( uplo,
                      BLIS1_CONJ_NO_TRANSPOSE,
                      dim_a,
                      dim_a,
                      a,      inca,      lda,
                      a_conj, inca_conj, lda_conj );
    }
    else if ( hemm_needs_conja && a_was_copied )
    {
        int dim_a;

        bl1_set_dim_with_side( side, m, n, &dim_a );
        
        bl1_zconjmr( uplo,
                     dim_a,
                     dim_a,
                     a_conj, inca_conj, lda_conj );
    }
    
    // We need a temporary matrix for the cases where B needs to be copied.
    b_copy    = b;
    ldb_copy  = ldb;
    incb_copy = incb;
    
    // There are two cases where we need to make a copy of B: one where the
    // copy's dimensions are transposed from the original B, and one where
    // the dimensions are not swapped.
    if ( hemm_needs_copyb )
    {
        trans1_t transb;

        // Set transb, which determines whether or not we need to copy from B
        // as if it needs a transposition. If a transposition is needed, then
        // m and n and have already been swapped. So in either case m
        // represents the leading dimension of the copy.
        if ( hemm_needs_transb ) transb = BLIS1_TRANSPOSE;
        else                     transb = BLIS1_NO_TRANSPOSE;
        
        b_copy    = bl1_zallocm( m, n );
        ldb_copy  = m;
        incb_copy = 1;

        bl1_zcopymt( transb,
                     m,
                     n,
                     b,      incb,      ldb,
                     b_copy, incb_copy, ldb_copy );
    }

    // There are two cases where we need to perform the hemm and then axpy
    // the result into C with a transposition. We handle those cases here.
    if ( hemm_needs_axpyt )
    {
        // We need a temporary matrix for holding C^T. Notice that m and n
        // represent the dimensions of C, and thus C_trans is n-by-m
        // (interpreting both as column-major matrices). So the leading
        // dimension of the temporary matrix holding C^T is n.
        c_trans    = bl1_zallocm( n, m );
        ldc_trans  = n;
        incc_trans = 1;

        // Compute A * B (or B * A) and store the result in C_trans.
        // Note that there is no overlap between the axpyt cases and
        // the conja/copyb cases, hence the use of a, b, lda, and ldb.
        bl1_zhemm_blas( side,
                        uplo,
                        n,
                        m,
                        alpha,
                        a,       lda,
                        b,       ldb,
                        &zero,
                        c_trans, ldc_trans );

        // Scale C by beta.
        bl1_zscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, incc, ldc );
        
        // And finally, accumulate the matrix product in C_trans into C
        // with a transpose.
        bl1_zaxpymt( BLIS1_TRANSPOSE,
                     m,
                     n,
                     &one,
                     c_trans, incc_trans, ldc_trans,
                     c,       incc,       ldc );

        // Free the temporary matrix for C.
        bl1_zfree( c_trans );
    }
    else // no extra axpyt step needed
    {
        bl1_zhemm_blas( side,
                        uplo,
                        m,
                        n,
                        alpha,
                        a_conj, lda_conj,
                        b_copy, ldb_copy,
                        beta,
                        c,      ldc );
    }

    if ( hemm_needs_conja && !a_was_copied )
        bl1_zfree( a_conj );

    if ( hemm_needs_copyb )
        bl1_zfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_zfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_zhemm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_zhemm(), CblasColMajor, and F77_zhemm().

Referenced by bl1_zhemm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );

    cblas_zhemm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 m,
                 n,
                 alpha,
                 a, lda,
                 b, ldb,
                 beta,
                 c, ldc );
#else
    char blas_side;
    char blas_uplo;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );

    F77_zhemm( &blas_side,
               &blas_uplo,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb,
               beta,
               c, &ldc );
#endif
}
void bl1_zher2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
double *  beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_d0(), bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_z1(), bl1_zallocm(), bl1_zaxpymrt(), bl1_zcopymt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zdscalmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zher2k_blas(), BLIS1_CONJ_NO_TRANSPOSE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Her2k_external().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    dcomplex* a_save    = a;
    dcomplex* b_save    = b;
    dcomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    double    zero_r = bl1_d0();
    dcomplex  one    = bl1_z1();
    dcomplex  alpha_copy;
    dcomplex* a_copy;
    dcomplex* b_copy;
    dcomplex* c_conj;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       lda_copy, inca_copy;
    int       ldb_copy, incb_copy;
    int       ldc_conj, incc_conj;
    int       her2k_needs_copya      = FALSE;
    int       her2k_needs_copyb      = FALSE;
    int       her2k_needs_conj       = FALSE;
    int       her2k_needs_alpha_conj = FALSE;

    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_zcreate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_zcreate_contigmt( trans,
                          m,
                          k,
                          b_save, b_rs_save, b_cs_save,
                          &b,     &b_rs,     &b_cs );

    bl1_zcreate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                her2k_needs_copyb = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                her2k_needs_copya = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
                // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_conjtrans( trans );

                her2k_needs_conj       = TRUE;
                her2k_needs_alpha_conj = TRUE;
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_c * B_c' + B_c * A_c'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );

                her2k_needs_conj = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_c * B_r' + B_r * A_c'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                her2k_needs_copyb = TRUE;

                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );

                her2k_needs_conj = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_r * B_c' + B_c * A_r'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                her2k_needs_copya = TRUE;

                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );

                her2k_needs_conj = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_r * B_r' + B_r * A_r'
                // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_uplo( uplo );
                bl1_toggle_conjtrans( trans );

                her2k_needs_alpha_conj = TRUE;
            }
        }
    }

    // Make a copy of alpha and conjugate if necessary.
    alpha_copy = *alpha;
    if ( her2k_needs_alpha_conj )
    {
        bl1_zconjs( &alpha_copy );
    }

    a_copy    = a;
    lda_copy  = lda;
    inca_copy = inca;
    
    // There are two cases where we need to copy A column-major storage.
    // We handle those two cases here.
    if ( her2k_needs_copya )
    {
        int m_a;
        int n_a;

        // Determine the dimensions of A according to the value of trans. We
        // need this in order to set the leading dimension of the copy of A.
        bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );

        // We need a temporary matrix to hold a column-major copy of A.
        a_copy    = bl1_zallocm( m, k );
        lda_copy  = m_a;
        inca_copy = 1;

        // Copy the contents of A into A_copy.
        bl1_zcopymt( BLIS1_NO_TRANSPOSE,
                     m_a,
                     n_a,
                     a,      inca,      lda,
                     a_copy, inca_copy, lda_copy );
    }
    
    b_copy    = b;
    ldb_copy  = ldb;
    incb_copy = incb;

    // There are two cases where we need to copy B column-major storage.
    // We handle those two cases here.
    if ( her2k_needs_copyb )
    {
        int m_b;
        int n_b;

        // Determine the dimensions of B according to the value of trans. We
        // need this in order to set the leading dimension of the copy of B.
        bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );

        // We need a temporary matrix to hold a column-major copy of B.
        b_copy    = bl1_zallocm( m, k );
        ldb_copy  = m_b;
        incb_copy = 1;

        // Copy the contents of B into B_copy.
        bl1_zcopymt( BLIS1_NO_TRANSPOSE,
                     m_b,
                     n_b,
                     b,      incb,      ldb,
                     b_copy, incb_copy, ldb_copy );
    }

    // There are two cases where we need to perform the rank-2k product and
    // then axpy the result into C with a conjugation. We handle those two
    // cases here.
    if ( her2k_needs_conj )
    {
        // We need a temporary matrix for holding the rank-k product.
        c_conj    = bl1_zallocm( m, m );
        ldc_conj  = m;
        incc_conj = 1;

        // Compute the rank-2k product.
        bl1_zher2k_blas( uplo,
                         trans,
                         m,
                         k,
                         &alpha_copy,
                         a_copy, lda_copy,
                         b_copy, ldb_copy,
                         &zero_r,
                         c_conj, ldc_conj );

        // Scale C by beta.
        bl1_zdscalmr( uplo,
                      m,
                      m,
                      beta,
                      c, incc, ldc );

        // And finally, accumulate the rank-2k product in C_conj into C
        // with a conjugation.
        bl1_zaxpymrt( uplo,
                      BLIS1_CONJ_NO_TRANSPOSE,
                      m,
                      m,
                      &one,
                      c_conj, incc_conj, ldc_conj,
                      c,      incc,      ldc );

        // Free the temporary matrix for C.
        bl1_zfree( c_conj );
    }
    else
    {
        bl1_zher2k_blas( uplo,
                         trans,
                         m,
                         k,
                         &alpha_copy,
                         a_copy, lda_copy,
                         b_copy, ldb_copy,
                         beta,
                         c, ldc );
    }

    if ( her2k_needs_copya )
        bl1_zfree( a_copy );

    if ( her2k_needs_copyb )
        bl1_zfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_zfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_zher2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
double *  beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zher2k(), CblasColMajor, and F77_zher2k().

Referenced by bl1_zher2k().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_zher2k( cblas_order,
                  cblas_uplo,
                  cblas_trans,
                  m,
                  k,
                  alpha,
                  a, lda,
                  b, ldb,
                  *beta,
                  c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_zher2k( &blas_uplo,
                &blas_trans,
                &m,
                &k,
                alpha,
                a, &lda,
                b, &ldb,
                beta,
                c, &ldc );
#endif
}
void bl1_zherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs,
double *  beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_d0(), bl1_is_col_storage(), bl1_z1(), bl1_zallocm(), bl1_zaxpymrt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zdscalmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zherk_blas(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opz_var1().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    dcomplex* a_save    = a;
    dcomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    double    zero_r = bl1_d0();
    dcomplex  one    = bl1_z1();
    dcomplex* c_conj;
    int       lda, inca;
    int       ldc, incc;
    int       ldc_conj, incc_conj;
    int       herk_needs_conj = FALSE;
    
    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_zcreate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_zcreate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_c * A_c'
            // effective operation: uplo( C_c ) += A_c * A_c'
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_r * A_r'
            // effective operation: uplo( C_c ) += conj( A_c' * A_c )
            bl1_swap_ints( lda, inca );

            bl1_toggle_conjtrans( trans );

            herk_needs_conj = TRUE;
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_c * A_c'
            // effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
            bl1_swap_ints( ldc, incc );

            bl1_toggle_uplo( uplo );

            herk_needs_conj = TRUE;
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_r * A_r'
            // effective operation: ~uplo( C_c ) += A_c' * A_c
            bl1_swap_ints( ldc, incc );
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_conjtrans( trans );
        }
    }

    // There are two cases where we need to perform the rank-k product and
    // then axpy the result into C with a conjugation. We handle those two
    // cases here.
    if ( herk_needs_conj )
    {
        // We need a temporary matrix for holding the rank-k product.
        c_conj    = bl1_zallocm( m, m );
        ldc_conj  = m;
        incc_conj = 1;

        // Compute the rank-k product.
        bl1_zherk_blas( uplo,
                        trans,
                        m,
                        k,
                        alpha,
                        a, lda,
                        &zero_r,
                        c_conj, ldc_conj );

        // Scale C by beta.
        bl1_zdscalmr( uplo,
                      m,
                      m,
                      beta,
                      c, incc, ldc );
        
        // And finally, accumulate the rank-k product in C_conj into C
        // with a conjugation.
        bl1_zaxpymrt( uplo,
                      BLIS1_CONJ_NO_TRANSPOSE,
                      m,
                      m,
                      &one,
                      c_conj, incc_conj, ldc_conj,
                      c,      incc,      ldc );

        // Free the temporary matrix for C.
        bl1_zfree( c_conj );
    }
    else
    {
        bl1_zherk_blas( uplo,
                        trans,
                        m,
                        k,
                        alpha,
                        a, lda,
                        beta,
                        c, ldc );
    }

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_zfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_zherk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
dcomplex a,
int  lda,
double *  beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zherk(), CblasColMajor, and F77_zherk().

Referenced by bl1_zherk().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_zherk( cblas_order,
                 cblas_uplo,
                 cblas_trans,
                 m,
                 k,
                 *alpha,
                 a, lda,
                 *beta,
                 c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_zherk( &blas_uplo,
               &blas_trans,
               &m,
               &k,
               alpha,
               a, &lda,
               beta,
               c, &ldc );
#endif
}
void bl1_zsymm ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z0(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zscalm(), bl1_zsymm_blas(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, and BLIS1_TRANSPOSE.

Referenced by FLA_Symm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    dcomplex* a_save    = a;
    dcomplex* b_save    = b;
    dcomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    dcomplex  zero = bl1_z0();
    dcomplex  one  = bl1_z1();
    dcomplex* b_copy;
    dcomplex* c_trans;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       ldb_copy, incb_copy;
    int       ldc_trans, incc_trans;
    int       symm_needs_copyb  = FALSE;
    int       symm_needs_transb = FALSE;
    int       symm_needs_axpyt  = FALSE;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_zcreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_zcreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_zcreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;
    
    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_c ) * B_c
                // effective operation: C_c += uplo( A_c ) * B_c
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_c ) * B_r
                // effective operation: C_c += uplo( A_c ) * B_c
                symm_needs_copyb = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c +=  uplo( A_r ) * B_c
                // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c
                bl1_swap_ints( lda, inca );

                bl1_toggle_uplo( uplo );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_c += uplo( A_r ) * B_r
                // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_side( side );
                bl1_toggle_uplo( uplo );

                symm_needs_axpyt = TRUE;
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_c ) * B_c
                // effective operation: C_c += ( uplo( A_c ) * B_c )^T
                bl1_swap_ints( ldc, incc );

                bl1_swap_ints( m, n );

                symm_needs_axpyt = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_c ) * B_r
                // effective operation: C_c += B_c * ~uplo( conj( A_c ) )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( ldb, incb );

                bl1_swap_ints( m, n );

                bl1_toggle_side( side );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_r ) * B_c
                // effective operation: C_c += B_c^T * ~uplo( A_c )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );

                bl1_swap_ints( m, n );

                bl1_toggle_side( side );
                bl1_toggle_uplo( uplo );

                symm_needs_copyb  = TRUE;
                symm_needs_transb = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: C_r += uplo( A_r ) * B_r
                // effective operation: C_c += B_c * conj( ~uplo( A_c ) )
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_swap_ints( m, n );

                bl1_toggle_uplo( uplo );
                bl1_toggle_side( side );
            }
        }
    }

    // We need a temporary matrix for the cases where B needs to be copied.
    b_copy    = b;
    ldb_copy  = ldb;
    incb_copy = incb;
    
    // There are two cases where we need to make a copy of B: one where the
    // copy's dimensions are transposed from the original B, and one where
    // the dimensions are not swapped.
    if ( symm_needs_copyb )
    {
        trans1_t transb;

        // Set transb, which determines whether or not we need to copy from B
        // as if it needs a transposition. If a transposition is needed, then
        // m and n and have already been swapped. So in either case m
        // represents the leading dimension of the copy.
        if ( symm_needs_transb ) transb = BLIS1_TRANSPOSE;
        else                     transb = BLIS1_NO_TRANSPOSE;
        
        b_copy    = bl1_zallocm( m, n );
        ldb_copy  = m;
        incb_copy = 1;

        bl1_zcopymt( transb,
                     m,
                     n,
                     b,      incb,      ldb,
                     b_copy, incb_copy, ldb_copy );
    }

    // There are two cases where we need to perform the symm and then axpy
    // the result into C with a transposition. We handle those cases here.
    if ( symm_needs_axpyt )
    {
        // We need a temporary matrix for holding C^T. Notice that m and n
        // represent the dimensions of C, and thus C_trans is n-by-m
        // (interpreting both as column-major matrices). So the leading
        // dimension of the temporary matrix holding C^T is n.
        c_trans    = bl1_zallocm( n, m );
        ldc_trans  = n;
        incc_trans = 1;

        // Compute A * B (or B * A) and store the result in C_trans.
        // Note that there is no overlap between the axpyt cases and
        // the conja/copyb cases, hence the use of a, b, lda, and ldb.
        bl1_zsymm_blas( side,
                        uplo,
                        n,
                        m,
                        alpha,
                        a,       lda,
                        b,       ldb,
                        &zero,
                        c_trans, ldc_trans );

        // Scale C by beta.
        bl1_zscalm( BLIS1_NO_CONJUGATE,
                    m,
                    n,
                    beta,
                    c, incc, ldc );
        
        // And finally, accumulate the matrix product in C_trans into C
        // with a transpose.
        bl1_zaxpymt( BLIS1_TRANSPOSE,
                     m,
                     n,
                     &one,
                     c_trans, incc_trans, ldc_trans,
                     c,       incc,       ldc );

        // Free the temporary matrix for C.
        bl1_zfree( c_trans );
    }
    else // no extra axpyt step needed
    {
        bl1_zsymm_blas( side,
                        uplo,
                        m,
                        n,
                        alpha,
                        a,      lda,
                        b_copy, ldb_copy,
                        beta,
                        c,      ldc );
    }

    if ( symm_needs_copyb )
        bl1_zfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_zfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_zsymm_blas ( side1_t  side,
uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_uplo(), cblas_zsymm(), CblasColMajor, and F77_zsymm().

Referenced by bl1_zsymm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );

    cblas_zsymm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 m,
                 n,
                 alpha,
                 a, lda,
                 b, ldb,
                 beta,
                 c, ldc );
#else
    char blas_side;
    char blas_uplo;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );

    F77_zsymm( &blas_side,
               &blas_uplo,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb,
               beta,
               c, &ldc );
#endif
}
void bl1_zsyr2k ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_set_dims_with_trans(), bl1_zallocm(), bl1_zcopymt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zsyr2k_blas(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Syr2k_external().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    dcomplex* a_save    = a;
    dcomplex* b_save    = b;
    dcomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    dcomplex* a_copy;
    dcomplex* b_copy;
    int       lda, inca;
    int       ldb, incb;
    int       ldc, incc;
    int       lda_copy, inca_copy;
    int       ldb_copy, incb_copy;
    int       syr2k_needs_copya = FALSE;
    int       syr2k_needs_copyb = FALSE;

    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_zcreate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_zcreate_contigmt( trans,
                          m,
                          k,
                          b_save, b_rs_save, b_cs_save,
                          &b,     &b_rs,     &b_cs );

    bl1_zcreate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                syr2k_needs_copyb = TRUE;
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r'
                // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c'
                syr2k_needs_copya = TRUE;
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r'
                // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c )
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_trans( trans );
            }
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_c * B_c' + B_c * A_c'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_c * B_r' + B_r * A_c'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                syr2k_needs_copyb = TRUE;

                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );
            }
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            if ( bl1_is_col_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_r * B_c' + B_c * A_r'
                // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' )
                syr2k_needs_copya = TRUE;

                bl1_swap_ints( ldc, incc );

                bl1_toggle_uplo( uplo );
            }
            else // if ( bl1_is_row_storage( b_rs, b_cs ) )
            {
                // requested operation:  uplo( C_r ) += A_r * B_r' + B_r * A_r'
                // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c
                bl1_swap_ints( ldc, incc );
                bl1_swap_ints( lda, inca );
                bl1_swap_ints( ldb, incb );

                bl1_toggle_uplo( uplo );
                bl1_toggle_trans( trans );
            }
        }
    }

    a_copy    = a;
    lda_copy  = lda;
    inca_copy = inca;
    
    // There are two cases where we need to copy A column-major storage.
    // We handle those two cases here.
    if ( syr2k_needs_copya )
    {
        int m_a;
        int n_a;

        // Determine the dimensions of A according to the value of trans. We
        // need this in order to set the leading dimension of the copy of A.
        bl1_set_dims_with_trans( trans, m, k, &m_a, &n_a );

        // We need a temporary matrix to hold a column-major copy of A.
        a_copy    = bl1_zallocm( m, k );
        lda_copy  = m_a;
        inca_copy = 1;

        // Copy the contents of A into A_copy.
        bl1_zcopymt( BLIS1_NO_TRANSPOSE,
                     m_a,
                     n_a,
                     a,      inca,      lda,
                     a_copy, inca_copy, lda_copy );
    }
    
    b_copy    = b;
    ldb_copy  = ldb;
    incb_copy = incb;

    // There are two cases where we need to copy B column-major storage.
    // We handle those two cases here.
    if ( syr2k_needs_copyb )
    {
        int m_b;
        int n_b;

        // Determine the dimensions of B according to the value of trans. We
        // need this in order to set the leading dimension of the copy of B.
        bl1_set_dims_with_trans( trans, m, k, &m_b, &n_b );

        // We need a temporary matrix to hold a column-major copy of B.
        b_copy    = bl1_zallocm( m, k );
        ldb_copy  = m_b;
        incb_copy = 1;

        // Copy the contents of B into B_copy.
        bl1_zcopymt( BLIS1_NO_TRANSPOSE,
                     m_b,
                     n_b,
                     b,      incb,      ldb,
                     b_copy, incb_copy, ldb_copy );
    }

    bl1_zsyr2k_blas( uplo,
                     trans,
                     m,
                     k,
                     alpha,
                     a_copy, lda_copy,
                     b_copy, ldb_copy,
                     beta,
                     c, ldc );

    if ( syr2k_needs_copya )
        bl1_zfree( a_copy );

    if ( syr2k_needs_copyb )
        bl1_zfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_zfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_zsyr2k_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb,
dcomplex beta,
dcomplex c,
int  ldc 
)

References bl1_is_conjtrans(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), BLIS1_TRANSPOSE, cblas_zsyr2k(), CblasColMajor, and F77_zsyr2k().

Referenced by bl1_zsyr2k().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
    // so we have to map it down to regular transposition.
    if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_zsyr2k( cblas_order,
                  cblas_uplo,
                  cblas_trans,
                  m,
                  k,
                  alpha,
                  a, lda,
                  b, ldb,
                  beta,
                  c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    // BLAS doesn't recognize the conjugate-transposition constant for syr2k,
    // so we have to map it down to regular transposition.
    if ( bl1_is_conjtrans( trans ) ) trans = BLIS1_TRANSPOSE;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_zsyr2k( &blas_uplo,
                &blas_trans,
                &m,
                &k,
                alpha,
                a, &lda,
                b, &ldb,
                beta,
                c, &ldc );
#endif
}
void bl1_zsyrk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zero_dim2(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), and bl1_zsyrk_blas().

Referenced by FLA_Syrk_external().

{
    uplo1_t    uplo_save = uplo;
    int       m_save    = m;
    dcomplex* a_save    = a;
    dcomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    int       lda, inca;
    int       ldc, incc;
    
    // Return early if possible.
    if ( bl1_zero_dim2( m, k ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_zcreate_contigmt( trans,
                          m,
                          k,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_zcreate_contigmr( uplo,
                          m,
                          m,
                          c_save, c_rs_save, c_cs_save,
                          &c,     &c_rs,     &c_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldc  = c_cs;
    incc = c_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_c * A_c^T
            // effective operation: uplo( C_c ) += A_c * A_c^T
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: uplo( C_c ) += A_r * A_r^T
            // effective operation: uplo( C_c ) += A_c^T * A_c
            bl1_swap_ints( lda, inca );

            bl1_toggle_trans( trans );
        }
    }
    else // if ( bl1_is_row_storage( c_rs, c_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_c * A_c^T
            // effective operation: ~uplo( C_c ) += A_c * A_c^T
            bl1_swap_ints( ldc, incc );

            bl1_toggle_uplo( uplo );
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation:  uplo( C_r ) += A_r * A_r^T
            // effective operation: ~uplo( C_c ) += A_c^T * A_c
            bl1_swap_ints( ldc, incc );
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_trans( trans );
        }
    }

    bl1_zsyrk_blas( uplo,
                    trans,
                    m,
                    k,
                    alpha,
                    a, lda,
                    beta,
                    c, ldc );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_zfree_saved_contigmr( uplo_save,
                              m_save,
                              m_save,
                              c_save, c_rs_save, c_cs_save,
                              &c,     &c_rs,     &c_cs );
}
void bl1_zsyrk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex beta,
dcomplex c,
int  ldc 
)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zsyrk(), CblasColMajor, and F77_zsyrk().

Referenced by bl1_zsyrk().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_UPLO      cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );

    cblas_zsyrk( cblas_order,
                 cblas_uplo,
                 cblas_trans,
                 m,
                 k,
                 alpha,
                 a, lda,
                 beta,
                 c, ldc );
#else
    char blas_uplo;
    char blas_trans;

    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );

    F77_zsyrk( &blas_uplo,
               &blas_trans,
               &m,
               &k,
               alpha,
               a, &lda,
               beta,
               c, &ldc );
#endif
}
void bl1_ztrmm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_set_dim_with_side(), bl1_zallocm(), bl1_zconjmr(), bl1_zcopymrt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_ztrmm_blas(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by bl1_ztrmmsx(), and FLA_Trmm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    dcomplex* a_save    = a;
    dcomplex* b_save    = b;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    dcomplex* a_conj;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;
    int       lda_conj, inca_conj;
    int       a_was_copied;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_zcreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_zcreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    // Figure out whether A was copied to contiguous memory. This is used to
    // prevent redundant copying.
    a_was_copied = ( a != a_save );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr( uplo( A_c ) ) * B_c
            // effective operation: B_c := tr( uplo( A_c ) ) * B_c
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr(  uplo( A_r ) )   * B_c
            // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_trans( trans );
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_c ) ) * B_r
            // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
            bl1_swap_ints( ldb, incb );

            bl1_swap_ints( m, n );

            bl1_toggle_side( side );
            bl1_toggle_trans( trans );
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_r ) ) * B_r
            // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
            bl1_swap_ints( ldb, incb );
            bl1_swap_ints( lda, inca );

            bl1_swap_ints( m, n );

            bl1_toggle_side( side );
            bl1_toggle_uplo( uplo );
        }
    }

    // Initialize with values assuming that trans is not conjnotrans.
    a_conj    = a;
    lda_conj  = lda;
    inca_conj = inca;

    // We want to handle the conjnotrans case. The easiest way to do so is
    // by making a conjugated copy of A.
    if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
    {
        int dim_a;

        bl1_set_dim_with_side( side, m, n, &dim_a );
        
        a_conj    = bl1_zallocm( dim_a, dim_a );
        lda_conj  = dim_a;
        inca_conj = 1;

        bl1_zcopymrt( uplo,
                      BLIS1_CONJ_NO_TRANSPOSE,
                      dim_a,
                      dim_a,
                      a,      inca,      lda,
                      a_conj, inca_conj, lda_conj );
    }
    else if ( bl1_is_conjnotrans( trans ) && a_was_copied )
    {
        int dim_a;

        bl1_set_dim_with_side( side, m, n, &dim_a );
        
        bl1_zconjmr( uplo,
                     dim_a,
                     dim_a,
                     a_conj, inca_conj, lda_conj );
    }

    bl1_ztrmm_blas( side,
                    uplo,
                    trans,
                    diag,
                    m,
                    n,
                    alpha,
                    a_conj, lda_conj,
                    b,      ldb );

    if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
        bl1_zfree( a_conj );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_zfree_saved_contigm( m_save,
                             n_save,
                             b_save, b_rs_save, b_cs_save,
                             &b,     &b_rs,     &b_cs );
}
void bl1_ztrmm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ztrmm(), CblasColMajor, and F77_ztrmm().

Referenced by bl1_ztrmm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;
    enum CBLAS_DIAG  cblas_diag;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );
    bl1_param_map_to_netlib_diag( diag, &cblas_diag );

    cblas_ztrmm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 cblas_trans,
                 cblas_diag,
                 m,
                 n,
                 alpha,
                 a, lda,
                 b, ldb );
#else
    char blas_side;
    char blas_uplo;
    char blas_trans;
    char blas_diag;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );
    bl1_param_map_to_netlib_diag( diag, &blas_diag );

    F77_ztrmm( &blas_side,
               &blas_uplo,
               &blas_trans,
               &blas_diag,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb );
#endif
}
void bl1_ztrmmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zscalm(), bl1_ztrmm(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trmmsx_external().

{
    int       m_save    = m;
    int       n_save    = n;
    dcomplex* a_save    = a;
    dcomplex* b_save    = b;
    dcomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    dcomplex  one = bl1_z1();
    dcomplex* b_copy;
    int       dim_a;
    int       b_copy_rs, b_copy_cs;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_zcreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_zcreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_zcreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Create a copy of B to use in the computation so the original matrix is
    // left untouched.
    b_copy = bl1_zallocm( m, n );

    // Match the strides of B_copy to that of B.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        b_copy_rs = 1;
        b_copy_cs = m;
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        b_copy_rs = n;
        b_copy_cs = 1;
    }

    // Copy the contents of B to B_copy.
    bl1_zcopymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 b,      b_rs,      b_cs,
                 b_copy, b_copy_rs, b_copy_cs );
    
    // Perform the operation on B_copy.
    bl1_ztrmm( side,
               uplo,
               trans,
               diag,
               m,
               n,
               alpha,
               a,      a_rs,      a_cs,
               b_copy, b_copy_rs, b_copy_cs );

    // Scale C by beta.
    bl1_zscalm( BLIS1_NO_CONJUGATE,
                m,
                n,
                beta,
                c, c_rs, c_cs );

    // Add B_copy into C.
    bl1_zaxpymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 &one,
                 b_copy, b_copy_rs, b_copy_cs,
                 c,      c_rs,      c_cs );

    // Free the copy of B.
    bl1_zfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_zfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}
void bl1_ztrsm ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bl1_is_col_storage(), bl1_is_conjnotrans(), bl1_set_dim_with_side(), bl1_zallocm(), bl1_zconjmr(), bl1_zcopymrt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_ztrsm_blas(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by bl1_ztrsmsx(), FLA_LU_nopiv_opz_var1(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_piv_opz_var3(), and FLA_Trsm_external().

{
    int       m_save    = m;
    int       n_save    = n;
    dcomplex* a_save    = a;
    dcomplex* b_save    = b;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    dcomplex* a_conj;
    int       dim_a;
    int       lda, inca;
    int       ldb, incb;
    int       lda_conj, inca_conj;
    int       a_was_copied;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_zcreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_zcreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    // Figure out whether A was copied to contiguous memory. This is used to
    // prevent redundant copying.
    a_was_copied = ( a != a_save );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;
    ldb  = b_cs;
    incb = b_rs;

    // Adjust the parameters based on the storage of each matrix.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr( uplo( A_c ) ) * B_c
            // effective operation: B_c := tr( uplo( A_c ) ) * B_c
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_c := tr(  uplo( A_r ) )   * B_c
            // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c
            bl1_swap_ints( lda, inca );

            bl1_toggle_uplo( uplo );
            bl1_toggle_trans( trans );
        }
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        if ( bl1_is_col_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_c ) ) * B_r
            // effective operation: B_c := B_c * tr( uplo( A_c ) )^T
            bl1_swap_ints( ldb, incb );

            bl1_swap_ints( m, n );

            bl1_toggle_side( side );
            bl1_toggle_trans( trans );
        }
        else // if ( bl1_is_row_storage( a_rs, a_cs ) )
        {
            // requested operation: B_r := tr( uplo( A_r ) ) * B_r
            // effective operation: B_c := B_c * tr( ~uplo( A_c ) )
            bl1_swap_ints( ldb, incb );
            bl1_swap_ints( lda, inca );

            bl1_swap_ints( m, n );

            bl1_toggle_side( side );
            bl1_toggle_uplo( uplo );
        }
    }

    // Initialize with values assuming that trans is not conjnotrans.
    a_conj    = a;
    lda_conj  = lda;
    inca_conj = inca;

    // We want to handle the conjnotrans case. The easiest way to do so is
    // by making a conjugated copy of A.
    if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
    {
        int dim_a;

        bl1_set_dim_with_side( side, m, n, &dim_a );
        
        a_conj    = bl1_zallocm( dim_a, dim_a );
        lda_conj  = dim_a;
        inca_conj = 1;

        bl1_zcopymrt( uplo,
                      BLIS1_CONJ_NO_TRANSPOSE,
                      dim_a,
                      dim_a,
                      a,      inca,      lda,
                      a_conj, inca_conj, lda_conj );
    }
    else if ( bl1_is_conjnotrans( trans ) && a_was_copied )
    {
        int dim_a;

        bl1_set_dim_with_side( side, m, n, &dim_a );
        
        bl1_zconjmr( uplo,
                     dim_a,
                     dim_a,
                     a_conj, inca_conj, lda_conj );
    }

    bl1_ztrsm_blas( side,
                    uplo,
                    trans,
                    diag,
                    m,
                    n,
                    alpha,
                    a_conj, lda_conj,
                    b,      ldb );

    if ( bl1_is_conjnotrans( trans ) && !a_was_copied )
        bl1_zfree( a_conj );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_zfree_saved_contigm( m_save,
                             n_save,
                             b_save, b_rs_save, b_cs_save,
                             &b,     &b_rs,     &b_cs );
}
void bl1_ztrsm_blas ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex b,
int  ldb 
)

References bl1_param_map_to_netlib_diag(), bl1_param_map_to_netlib_side(), bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_ztrsm(), CblasColMajor, and F77_ztrsm().

Referenced by bl1_ztrsm().

{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER cblas_order = CblasColMajor;
    enum CBLAS_SIDE  cblas_side;
    enum CBLAS_UPLO  cblas_uplo;
    enum CBLAS_TRANSPOSE cblas_trans;
    enum CBLAS_DIAG  cblas_diag;

    bl1_param_map_to_netlib_side( side, &cblas_side );
    bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
    bl1_param_map_to_netlib_trans( trans, &cblas_trans );
    bl1_param_map_to_netlib_diag( diag, &cblas_diag );

    cblas_ztrsm( cblas_order,
                 cblas_side,
                 cblas_uplo,
                 cblas_trans,
                 cblas_diag,
                 m,
                 n,
                 alpha,
                 a, lda,
                 b, ldb );
#else
    char blas_side;
    char blas_uplo;
    char blas_trans;
    char blas_diag;

    bl1_param_map_to_netlib_side( side, &blas_side );
    bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
    bl1_param_map_to_netlib_trans( trans, &blas_trans );
    bl1_param_map_to_netlib_diag( diag, &blas_diag );

    F77_ztrsm( &blas_side,
               &blas_uplo,
               &blas_trans,
               &blas_diag,
               &m,
               &n,
               alpha,
               a, &lda,
               b, &ldb );
#endif
}
void bl1_ztrsmsx ( side1_t  side,
uplo1_t  uplo,
trans1_t  trans,
diag1_t  diag,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs,
dcomplex beta,
dcomplex c,
int  c_rs,
int  c_cs 
)

References bl1_is_col_storage(), bl1_set_dim_with_side(), bl1_z1(), bl1_zallocm(), bl1_zaxpymt(), bl1_zcopymt(), bl1_zcreate_contigm(), bl1_zcreate_contigmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigm(), bl1_zscalm(), bl1_ztrsm(), BLIS1_NO_CONJUGATE, and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Trsmsx_external().

{
    int       m_save    = m;
    int       n_save    = n;
    dcomplex* a_save    = a;
    dcomplex* b_save    = b;
    dcomplex* c_save    = c;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       b_rs_save = b_rs;
    int       b_cs_save = b_cs;
    int       c_rs_save = c_rs;
    int       c_cs_save = c_cs;
    dcomplex  one = bl1_z1();
    dcomplex* b_copy;
    int       dim_a;
    int       b_copy_rs, b_copy_cs;

    // Return early if possible.
    if ( bl1_zero_dim2( m, n ) ) return;

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of each matrix rather than the original matrices.
    bl1_set_dim_with_side( side, m, n, &dim_a );
    bl1_zcreate_contigmr( uplo,
                          dim_a,
                          dim_a,
                          a_save, a_rs_save, a_cs_save,
                          &a,     &a_rs,     &a_cs );

    bl1_zcreate_contigm( m,
                         n,
                         b_save, b_rs_save, b_cs_save,
                         &b,     &b_rs,     &b_cs );

    bl1_zcreate_contigm( m,
                         n,
                         c_save, c_rs_save, c_cs_save,
                         &c,     &c_rs,     &c_cs );

    // Create a copy of B to use in the computation so the original matrix is
    // left untouched.
    b_copy = bl1_zallocm( m, n );

    // Match the strides of B_copy to that of B.
    if ( bl1_is_col_storage( b_rs, b_cs ) )
    {
        b_copy_rs = 1;
        b_copy_cs = m;
    }
    else // if ( bl1_is_row_storage( b_rs, b_cs ) )
    {
        b_copy_rs = n;
        b_copy_cs = 1;
    }

    // Copy the contents of B to B_copy.
    bl1_zcopymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 b,      b_rs,      b_cs,
                 b_copy, b_copy_rs, b_copy_cs );
    
    // Perform the operation on B_copy.
    bl1_ztrsm( side,
               uplo,
               trans,
               diag,
               m,
               n,
               alpha,
               a,      a_rs,      a_cs,
               b_copy, b_copy_rs, b_copy_cs );

    // Scale C by beta.
    bl1_zscalm( BLIS1_NO_CONJUGATE,
                m,
                n,
                beta,
                c, c_rs, c_cs );

    // Add B_copy into C.
    bl1_zaxpymt( BLIS1_NO_TRANSPOSE,
                 m,
                 n,
                 &one,
                 b_copy, b_copy_rs, b_copy_cs,
                 c,      c_rs,      c_cs );

    // Free the copy of B.
    bl1_zfree( b_copy );

    // Free any temporary contiguous matrices, copying the result back to
    // the original matrix.
    bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );

    bl1_zfree_contigm( b_save, b_rs_save, b_cs_save,
                       &b,     &b_rs,     &b_cs );

    bl1_zfree_saved_contigm( m_save,
                             n_save,
                             c_save, c_rs_save, c_cs_save,
                             &c,     &c_rs,     &c_cs );
}