libflame  12600
Functions
blis_prototypes_fused1.h File Reference

(r12600)

Go to the source code of this file.

Functions

void bl1_saxmyv2 (conj1_t conjx, int n, float *alpha, float *beta, float *x, int inc_x, float *y, int inc_y, float *z, int inc_z)
void bl1_daxmyv2 (conj1_t conjx, int n, double *alpha, double *beta, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z)
void bl1_caxmyv2 (conj1_t conjx, int n, scomplex *alpha, scomplex *beta, scomplex *x, int inc_x, scomplex *y, int inc_y, scomplex *z, int inc_z)
void bl1_zaxmyv2 (conj1_t conjx, int n, dcomplex *alpha, dcomplex *beta, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z)
void bl1_saxpyv2b (int n, float *beta1, float *beta2, float *a1, int inc_a1, float *a2, int inc_a2, float *w, int inc_w)
void bl1_daxpyv2b (int n, double *beta1, double *beta2, double *a1, int inc_a1, double *a2, int inc_a2, double *w, int inc_w)
void bl1_caxpyv2b (int n, scomplex *beta1, scomplex *beta2, scomplex *a1, int inc_a1, scomplex *a2, int inc_a2, scomplex *w, int inc_w)
void bl1_zaxpyv2b (int n, dcomplex *beta1, dcomplex *beta2, dcomplex *a1, int inc_a1, dcomplex *a2, int inc_a2, dcomplex *w, int inc_w)
void bl1_saxpyv3b (int n, float *beta1, float *beta2, float *beta3, float *a1, int inc_a1, float *a2, int inc_a2, float *a3, int inc_a3, float *w, int inc_w)
void bl1_daxpyv3b (int n, double *beta1, double *beta2, double *beta3, double *a1, int inc_a1, double *a2, int inc_a2, double *a3, int inc_a3, double *w, int inc_w)
void bl1_caxpyv3b (int n, scomplex *beta1, scomplex *beta2, scomplex *beta3, scomplex *a1, int inc_a1, scomplex *a2, int inc_a2, scomplex *a3, int inc_a3, scomplex *w, int inc_w)
void bl1_zaxpyv3b (int n, dcomplex *beta1, dcomplex *beta2, dcomplex *beta3, dcomplex *a1, int inc_a1, dcomplex *a2, int inc_a2, dcomplex *a3, int inc_a3, dcomplex *w, int inc_w)
void bl1_saxpyv2bdotaxpy (int n, float *beta, float *u, int inc_u, float *gamma, float *z, int inc_z, float *a, int inc_a, float *x, int inc_x, float *kappa, float *rho, float *w, int inc_w)
void bl1_daxpyv2bdotaxpy (int n, double *beta, double *u, int inc_u, double *gamma, double *z, int inc_z, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
void bl1_caxpyv2bdotaxpy (int n, scomplex *beta, scomplex *u, int inc_u, scomplex *gamma, scomplex *z, int inc_z, scomplex *a, int inc_a, scomplex *x, int inc_x, scomplex *kappa, scomplex *rho, scomplex *w, int inc_w)
void bl1_zaxpyv2bdotaxpy (int n, dcomplex *beta, dcomplex *u, int inc_u, dcomplex *gamma, dcomplex *z, int inc_z, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
void bl1_sdotsv2 (conj1_t conjxy, int n, float *x, int inc_x, float *y, int inc_y, float *z, int inc_z, float *beta, float *rho_xz, float *rho_yz)
void bl1_ddotsv2 (conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
void bl1_cdotsv2 (conj1_t conjxy, int n, scomplex *x, int inc_x, scomplex *y, int inc_y, scomplex *z, int inc_z, scomplex *beta, scomplex *rho_xz, scomplex *rho_yz)
void bl1_zdotsv2 (conj1_t conjxy, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz)
void bl1_sdotsv3 (conj1_t conjxyw, int n, float *x, int inc_x, float *y, int inc_y, float *w, int inc_w, float *z, int inc_z, float *beta, float *rho_xz, float *rho_yz, float *rho_wz)
void bl1_ddotsv3 (conj1_t conjxyw, int n, double *x, int inc_x, double *y, int inc_y, double *w, int inc_w, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz, double *rho_wz)
void bl1_cdotsv3 (conj1_t conjxyw, int n, scomplex *x, int inc_x, scomplex *y, int inc_y, scomplex *w, int inc_w, scomplex *z, int inc_z, scomplex *beta, scomplex *rho_xz, scomplex *rho_yz, scomplex *rho_wz)
void bl1_zdotsv3 (conj1_t conjxyw, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *w, int inc_w, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz, dcomplex *rho_wz)
void bl1_sdotaxpy (int n, float *a, int inc_a, float *x, int inc_x, float *kappa, float *rho, float *w, int inc_w)
void bl1_ddotaxpy (int n, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
void bl1_cdotaxpy (int n, scomplex *a, int inc_a, scomplex *x, int inc_x, scomplex *kappa, scomplex *rho, scomplex *w, int inc_w)
void bl1_zdotaxpy (int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
void bl1_sdotaxmyv2 (int n, float *alpha, float *beta, float *x, int inc_x, float *u, int inc_u, float *rho, float *y, int inc_y, float *z, int inc_z)
void bl1_ddotaxmyv2 (int n, double *alpha, double *beta, double *x, int inc_x, double *u, int inc_u, double *rho, double *y, int inc_y, double *z, int inc_z)
void bl1_cdotaxmyv2 (int n, scomplex *alpha, scomplex *beta, scomplex *x, int inc_x, scomplex *u, int inc_u, scomplex *rho, scomplex *y, int inc_y, scomplex *z, int inc_z)
void bl1_zdotaxmyv2 (int n, dcomplex *alpha, dcomplex *beta, dcomplex *x, int inc_x, dcomplex *u, int inc_u, dcomplex *rho, dcomplex *y, int inc_y, dcomplex *z, int inc_z)
void bl1_sdotv2axpyv2b (int n, float *a1, int inc_a1, float *a2, int inc_a2, float *x, int inc_x, float *kappa1, float *kappa2, float *rho1, float *rho2, float *w, int inc_w)
void bl1_ddotv2axpyv2b (int n, double *a1, int inc_a1, double *a2, int inc_a2, double *x, int inc_x, double *kappa1, double *kappa2, double *rho1, double *rho2, double *w, int inc_w)
void bl1_cdotv2axpyv2b (int n, scomplex *a1, int inc_a1, scomplex *a2, int inc_a2, scomplex *x, int inc_x, scomplex *kappa1, scomplex *kappa2, scomplex *rho1, scomplex *rho2, scomplex *w, int inc_w)
void bl1_zdotv2axpyv2b (int n, dcomplex *a1, int inc_a1, dcomplex *a2, int inc_a2, dcomplex *x, int inc_x, dcomplex *kappa1, dcomplex *kappa2, dcomplex *rho1, dcomplex *rho2, dcomplex *w, int inc_w)
void bl1_zaxpyv2bdots (int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y, dcomplex *u, int inc_u, dcomplex *beta, dcomplex *rho)

Function Documentation

void bl1_caxmyv2 ( conj1_t  conjx,
int  n,
scomplex alpha,
scomplex beta,
scomplex x,
int  inc_x,
scomplex y,
int  inc_y,
scomplex z,
int  inc_z 
)
void bl1_caxpyv2b ( int  n,
scomplex beta1,
scomplex beta2,
scomplex a1,
int  inc_a1,
scomplex a2,
int  inc_a2,
scomplex w,
int  inc_w 
)
void bl1_caxpyv2bdotaxpy ( int  n,
scomplex beta,
scomplex u,
int  inc_u,
scomplex gamma,
scomplex z,
int  inc_z,
scomplex a,
int  inc_a,
scomplex x,
int  inc_x,
scomplex kappa,
scomplex rho,
scomplex w,
int  inc_w 
)
void bl1_caxpyv3b ( int  n,
scomplex beta1,
scomplex beta2,
scomplex beta3,
scomplex a1,
int  inc_a1,
scomplex a2,
int  inc_a2,
scomplex a3,
int  inc_a3,
scomplex w,
int  inc_w 
)
void bl1_cdotaxmyv2 ( int  n,
scomplex alpha,
scomplex beta,
scomplex x,
int  inc_x,
scomplex u,
int  inc_u,
scomplex rho,
scomplex y,
int  inc_y,
scomplex z,
int  inc_z 
)
void bl1_cdotaxpy ( int  n,
scomplex a,
int  inc_a,
scomplex x,
int  inc_x,
scomplex kappa,
scomplex rho,
scomplex w,
int  inc_w 
)
void bl1_cdotsv2 ( conj1_t  conjxy,
int  n,
scomplex x,
int  inc_x,
scomplex y,
int  inc_y,
scomplex z,
int  inc_z,
scomplex beta,
scomplex rho_xz,
scomplex rho_yz 
)
void bl1_cdotsv3 ( conj1_t  conjxyw,
int  n,
scomplex x,
int  inc_x,
scomplex y,
int  inc_y,
scomplex w,
int  inc_w,
scomplex z,
int  inc_z,
scomplex beta,
scomplex rho_xz,
scomplex rho_yz,
scomplex rho_wz 
)
void bl1_cdotv2axpyv2b ( int  n,
scomplex a1,
int  inc_a1,
scomplex a2,
int  inc_a2,
scomplex x,
int  inc_x,
scomplex kappa1,
scomplex kappa2,
scomplex rho1,
scomplex rho2,
scomplex w,
int  inc_w 
)
void bl1_daxmyv2 ( conj1_t  conjx,
int  n,
double *  alpha,
double *  beta,
double *  x,
int  inc_x,
double *  y,
int  inc_y,
double *  z,
int  inc_z 
)

References bl1_abort(), and v2df_t::v.

Referenced by FLA_Fused_UYx_ZVx_opd_var1().

{
    double*   restrict chi1;
    double*   restrict psi1;
    double*   restrict zeta1;
    int       i;

    int       n_pre;
    int       n_run;
    int       n_left;

    v2df_t    a1v, b1v;
    v2df_t    x1v, y1v, z1v;
    v2df_t    x2v, y2v, z2v;

    if ( inc_x != 1 ||
         inc_y != 1 ||
         inc_z != 1 ) bl1_abort();

    n_pre = 0;
    if ( ( unsigned long ) z % 16 != 0 )
    {
        if ( ( unsigned long ) x % 16 == 0 ||
             ( unsigned long ) y % 16 == 0 ) bl1_abort();

        n_pre = 1;
    }

    n_run       = ( n - n_pre ) / 4;
    n_left      = ( n - n_pre ) % 4;

    chi1  = x;
    psi1  = y;
    zeta1 = z;

    if ( n_pre == 1 )
    {
        double   alpha_c = *alpha;
        double   beta_c  = *beta;
        double   chi1_c  = *chi1;

        *psi1  -= alpha_c * chi1_c;
        *zeta1 -= beta_c  * chi1_c;

        chi1  += inc_x;
        psi1  += inc_y;
        zeta1 += inc_z;
    }

    a1v.v = _mm_loaddup_pd( ( double* )alpha );
    b1v.v = _mm_loaddup_pd( ( double* )beta );

    for ( i = 0; i < n_run; ++i )
    {
        x1v.v = _mm_load_pd( ( double* )chi1 );
        y1v.v = _mm_load_pd( ( double* )psi1 );
        z1v.v = _mm_load_pd( ( double* )zeta1 );

        x2v.v = _mm_load_pd( ( double* )(chi1 + 2) );
        y2v.v = _mm_load_pd( ( double* )(psi1 + 2) );
        z2v.v = _mm_load_pd( ( double* )(zeta1 + 2) );

        y1v.v = y1v.v - a1v.v * x1v.v;
        z1v.v = z1v.v - b1v.v * x1v.v;

        _mm_store_pd( ( double* )psi1,  y1v.v );
        _mm_store_pd( ( double* )zeta1, z1v.v );

        y2v.v = y2v.v - a1v.v * x2v.v;
        z2v.v = z2v.v - b1v.v * x2v.v;

        _mm_store_pd( ( double* )(psi1 + 2),  y2v.v );
        _mm_store_pd( ( double* )(zeta1 + 2), z2v.v );

        chi1  += 4;
        psi1  += 4;
        zeta1 += 4;
    }

    if ( n_left > 0 )
    {
        double   alpha_c = *alpha;
        double   beta_c  = *beta;

        for( i = 0; i < n_left; ++i )
        {
            double   chi1_c = *chi1;

            *psi1  -= alpha_c * chi1_c;
            *zeta1 -= beta_c  * chi1_c;

            chi1  += inc_x;
            psi1  += inc_y;
            zeta1 += inc_z;
        }
    }
}
void bl1_daxpyv2b ( int  n,
double *  beta1,
double *  beta2,
double *  a1,
int  inc_a1,
double *  a2,
int  inc_a2,
double *  w,
int  inc_w 
)

References bl1_abort(), and v2df_t::v.

Referenced by FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_opd_var1(), and FLA_Fused_UZhu_ZUhu_opd_var1().

{
    double*   restrict chi1;
    double*   restrict chi2;
    double*   restrict psi1;
    int       i;

    int       n_pre;
    int       n_run;
    int       n_left;

    v2df_t    a1v, a2v;
    v2df_t    x11v, x12v;
    v2df_t    x21v, x22v;
    v2df_t    y1v;
    v2df_t    y2v;

    if ( inc_x1 != 1 ||
         inc_x2 != 1 ||
         inc_y  != 1 ) bl1_abort();

    n_pre = 0;
    if ( ( unsigned long ) y % 16 != 0 )
    {
        if ( ( unsigned long ) x1 % 16 == 0 ||
             ( unsigned long ) x2 % 16 == 0 ) bl1_abort();

        n_pre = 1;
    }

    n_run       = ( n - n_pre ) / 4;
    n_left      = ( n - n_pre ) % 4;

    chi1 = x1;
    chi2 = x2;
    psi1 = y;

    if ( n_pre == 1 )
    {
        double   alpha1_c = *alpha1;
        double   alpha2_c = *alpha2;
        double   chi11_c = *chi1;
        double   chi12_c = *chi2;
        double   temp1;

        // psi1 = psi1 + alpha1 * chi11 + alpha2 * chi12;
        temp1 = alpha1_c * chi11_c + alpha2_c * chi12_c;
        *psi1 = *psi1 + temp1;

        chi1 += inc_x1;
        chi2 += inc_x2;
        psi1 += inc_y;
    }

    a1v.v = _mm_loaddup_pd( ( double* )alpha1 );
    a2v.v = _mm_loaddup_pd( ( double* )alpha2 );

    for ( i = 0; i < n_run; ++i )
    {
        x11v.v = _mm_load_pd( ( double* )chi1 );
        x12v.v = _mm_load_pd( ( double* )chi2 );
        y1v.v  = _mm_load_pd( ( double* )psi1 );

        x21v.v = _mm_load_pd( ( double* )(chi1 + 2) );
        x22v.v = _mm_load_pd( ( double* )(chi2 + 2) );
        y2v.v  = _mm_load_pd( ( double* )(psi1 + 2) );

        y1v.v += a1v.v * x11v.v + a2v.v * x12v.v;
        y2v.v += a1v.v * x21v.v + a2v.v * x22v.v;

        _mm_store_pd( ( double* )psi1, y1v.v );
        _mm_store_pd( ( double* )(psi1 + 2), y2v.v );

        //chi1 += step_x1;
        //chi2 += step_x2;
        //psi1 += step_y;
        chi1 += 4;
        chi2 += 4;
        psi1 += 4;
    }

    if ( n_left > 0 )
    {
        double   alpha1_c = *alpha1;
        double   alpha2_c = *alpha2;

        for ( i = 0; i < n_left; ++i )
        {
            double   chi11_c = *chi1;
            double   chi12_c = *chi2;
            double   psi1_c  = *psi1;
            double   temp1;

            temp1 = alpha1_c * chi11_c + alpha2_c * chi12_c;
            *psi1 = psi1_c + temp1;

            chi1 += inc_x1;
            chi2 += inc_x2;
            psi1 += inc_y;
        }
    }
}
void bl1_daxpyv2bdotaxpy ( int  n,
double *  beta,
double *  u,
int  inc_u,
double *  gamma,
double *  z,
int  inc_z,
double *  a,
int  inc_a,
double *  x,
int  inc_x,
double *  kappa,
double *  rho,
double *  w,
int  inc_w 
)

References bl1_abort(), v2df_t::d, and v2df_t::v.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opd_var1(), and FLA_Fused_Her2_Ax_l_opd_var1().

{
    double*   restrict upsilon1;
    double*   restrict zeta1;
    double*   restrict alpha1;
    double*   restrict chi1;
    double*   restrict omega1;
    double             rho_c;
    int                i;
    v2df_t             b1v, g1v, k1v;
    v2df_t             rhov;
    v2df_t             u1v, z1v, a1v;
    v2df_t             u2v, z2v, a2v;
    v2df_t             x1v, w1v;
    v2df_t             x2v, w2v;

    int       n_pre;
    int       n_run;
    int       n_left;

    n_pre = 0;
    if ( ( unsigned long ) a % 16 != 0 )
    {
        if ( ( unsigned long ) u % 16 == 0 ||
             ( unsigned long ) z % 16 == 0 ||
             ( unsigned long ) x % 16 == 0 ||
             ( unsigned long ) w % 16 == 0 ) bl1_abort();

        n_pre = 1;
    }

    n_run       = ( n - n_pre ) / 4;
    n_left      = ( n - n_pre ) % 4;

    upsilon1 = u;
    zeta1    = z;
    alpha1   = a;
    chi1     = x;
    omega1   = w;


    rho_c   = 0.0;

    if ( n_pre == 1 )
    {
        double   beta_c     = *beta;
        double   gamma_c    = *gamma;
        double   kappa_c    = *kappa;

        double   upsilon1_c = *upsilon1;
        double   zeta1_c    = *zeta1;
        double   alpha1_c   = *alpha1;
        double   chi1_c     = *chi1;
        double   omega1_c   = *omega1;

        alpha1_c += beta_c * upsilon1_c + gamma_c * zeta1_c;
        rho_c += alpha1_c * chi1_c;
        omega1_c += kappa_c * alpha1_c;

        *alpha1 = alpha1_c;
        *omega1 = omega1_c;

        upsilon1 += inc_u;
        zeta1    += inc_z;
        alpha1   += inc_a;
        chi1     += inc_x;
        omega1   += inc_w;
    }

    b1v.v = _mm_loaddup_pd( ( double* )beta );
    g1v.v = _mm_loaddup_pd( ( double* )gamma );
    k1v.v = _mm_loaddup_pd( ( double* )kappa );

    rhov.v = _mm_setzero_pd();

    for ( i = 0; i < n_run; ++i )
    {
        u1v.v = _mm_load_pd( ( double* )upsilon1 );
        z1v.v = _mm_load_pd( ( double* )zeta1 );
        a1v.v = _mm_load_pd( ( double* )alpha1 );

        a1v.v += b1v.v * u1v.v + g1v.v * z1v.v;

        u2v.v = _mm_load_pd( ( double* )(upsilon1 + 2) );
        z2v.v = _mm_load_pd( ( double* )(zeta1 + 2) );
        a2v.v = _mm_load_pd( ( double* )(alpha1 + 2) );

        a2v.v += b1v.v * u2v.v + g1v.v * z2v.v;

        x1v.v = _mm_load_pd( ( double* )chi1 );
        x2v.v = _mm_load_pd( ( double* )(chi1 + 2) );

        w1v.v = _mm_load_pd( ( double* )omega1 );
        w2v.v = _mm_load_pd( ( double* )(omega1 + 2) );

        rhov.v += a1v.v * x1v.v;
        rhov.v += a2v.v * x2v.v;

        w1v.v += k1v.v * a1v.v;
        w2v.v += k1v.v * a2v.v;

        _mm_store_pd( ( double* )alpha1, a1v.v );
        _mm_store_pd( ( double* )(alpha1 + 2), a2v.v );

        _mm_store_pd( ( double* )omega1, w1v.v );
        _mm_store_pd( ( double* )(omega1 + 2), w2v.v );


        upsilon1 += 4;
        zeta1    += 4;
        alpha1   += 4;
        chi1     += 4;
        omega1   += 4;
    }

    rho_c += rhov.d[0] + rhov.d[1];

    if ( n_left > 0 )
    {
        double beta_c  = *beta;
        double gamma_c = *gamma;
        double kappa_c = *kappa;

        for ( i = 0; i < n_left; ++i )
        {
            double   upsilon1_c = *upsilon1;
            double   zeta1_c    = *zeta1;
            double   alpha1_c   = *alpha1;
            double   chi1_c     = *chi1;
            double   omega1_c   = *omega1;

            alpha1_c += beta_c * upsilon1_c + gamma_c * zeta1_c;
            rho_c += alpha1_c * chi1_c;
            omega1_c += kappa_c * alpha1_c;

            *alpha1 = alpha1_c;
            *omega1 = omega1_c;

            upsilon1 += inc_u;
            zeta1    += inc_z;
            alpha1   += inc_a;
            chi1     += inc_x;
            omega1   += inc_w;
        }
    }

    *rho = rho_c;
}
void bl1_daxpyv3b ( int  n,
double *  beta1,
double *  beta2,
double *  beta3,
double *  a1,
int  inc_a1,
double *  a2,
int  inc_a2,
double *  a3,
int  inc_a3,
double *  w,
int  inc_w 
)

References bl1_abort(), and v2df_t::v.

{
    double*   restrict chi1;
    double*   restrict chi2;
    double*   restrict chi3;
    double*   restrict psi1;
    int       i;

    int       n_pre;
    int       n_run;
    int       n_left;

    v2df_t    a1v, a2v, a3v;
    v2df_t    x11v, x12v, x13v;
    v2df_t    x21v, x22v, x23v;
    v2df_t    y1v;
    v2df_t    y2v;

    if ( inc_x1 != 1 ||
         inc_x2 != 1 ||
         inc_x3 != 1 ||
         inc_y  != 1 ) bl1_abort();

    n_pre = 0;
    if ( ( unsigned long ) y % 16 != 0 )
    {
        if ( ( unsigned long ) x1 % 16 == 0 ||
             ( unsigned long ) x2 % 16 == 0 ||
             ( unsigned long ) x3 % 16 == 0 ) bl1_abort();

        n_pre = 1;
    }

    n_run       = ( n - n_pre ) / 4;
    n_left      = ( n - n_pre ) % 4;

    chi1 = x1;
    chi2 = x2;
    chi3 = x3;
    psi1 = y;

    if ( n_pre == 1 )
    {
        double   alpha1_c = *alpha1;
        double   alpha2_c = *alpha2;
        double   alpha3_c = *alpha3;
        double   chi11_c = *chi1;
        double   chi12_c = *chi2;
        double   chi13_c = *chi3;

        *psi1 += alpha1_c * chi11_c + alpha2_c * chi12_c + alpha3_c * chi13_c;

        chi1 += inc_x1;
        chi2 += inc_x2;
        chi3 += inc_x3;
        psi1 += inc_y;
    }

    a1v.v = _mm_loaddup_pd( ( double* )alpha1 );
    a2v.v = _mm_loaddup_pd( ( double* )alpha2 );
    a3v.v = _mm_loaddup_pd( ( double* )alpha3 );

    for ( i = 0; i < n_run; ++i )
    {
        x11v.v = _mm_load_pd( ( double* )chi1 );
        x12v.v = _mm_load_pd( ( double* )chi2 );
        x13v.v = _mm_load_pd( ( double* )chi3 );
        y1v.v  = _mm_load_pd( ( double* )psi1 );

        y1v.v += a1v.v * x11v.v + a2v.v * x12v.v + a3v.v * x13v.v;

        _mm_store_pd( ( double* )psi1, y1v.v );

        x21v.v = _mm_load_pd( ( double* )(chi1 + 2) );
        x22v.v = _mm_load_pd( ( double* )(chi2 + 2) );
        x23v.v = _mm_load_pd( ( double* )(chi3 + 2) );
        y2v.v  = _mm_load_pd( ( double* )(psi1 + 2) );

        y2v.v += a1v.v * x21v.v + a2v.v * x22v.v + a3v.v * x23v.v;

        _mm_store_pd( ( double* )(psi1 + 2), y2v.v );

        chi1 += 4;
        chi2 += 4;
        chi3 += 4;
        psi1 += 4;
    }

    if ( n_left > 0 )
    {
        double   alpha1_c = *alpha1;
        double   alpha2_c = *alpha2;
        double   alpha3_c = *alpha3;

        for ( i = 0; i < n_left; ++i )
        {
            double   chi11_c = *chi1;
            double   chi12_c = *chi2;
            double   chi13_c = *chi3;

            *psi1 += alpha1_c * chi11_c + alpha2_c * chi12_c + alpha3_c * chi13_c;

            chi1 += inc_x1;
            chi2 += inc_x2;
            chi3 += inc_x3;
            psi1 += inc_y;
        }
    }
}
void bl1_ddotaxmyv2 ( int  n,
double *  alpha,
double *  beta,
double *  x,
int  inc_x,
double *  u,
int  inc_u,
double *  rho,
double *  y,
int  inc_y,
double *  z,
int  inc_z 
)

References bl1_abort(), v2df_t::d, and v2df_t::v.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opd_var1().

{
    double*   restrict chi1;
    double*   restrict upsilon1;
    double*   restrict psi1;
    double*   restrict zeta1;
    double    rho_c;
    int       i;

    int       n_pre;
    int       n_run;
    int       n_left;

    v2df_t    a1v, b1v;
    v2df_t    rho1v;
    v2df_t    x1v, u1v, y1v, z1v;

    if ( inc_x != 1 ||
         inc_u != 1 ||
         inc_y != 1 ||
         inc_z != 1 ) bl1_abort();

    n_pre = 0;
    if ( ( unsigned long ) z % 16 != 0 )
    {
        if ( ( unsigned long ) x % 16 == 0 ||
             ( unsigned long ) u % 16 == 0 ||
             ( unsigned long ) y % 16 == 0 ) bl1_abort();

        n_pre = 1;
    }

    n_run       = ( n - n_pre ) / 2;
    n_left      = ( n - n_pre ) % 2;

    chi1     = x;
    upsilon1 = u;
    psi1     = y;
    zeta1    = z;

    rho_c   = 0.0;

    if ( n_pre == 1 )
    {
        double   alpha_c = *alpha;
        double   beta_c  = *beta;
        double   chi1_c    = *chi1;
        double   upsilon_c = *upsilon1;

        rho_c  += chi1_c * upsilon_c;
        *psi1  -= alpha_c * chi1_c;
        *zeta1 -= beta_c  * chi1_c;

        chi1     += inc_x;
        upsilon1 += inc_u;
        psi1     += inc_y;
        zeta1    += inc_z;
    }

    a1v.v = _mm_loaddup_pd( ( double* )alpha );
    b1v.v = _mm_loaddup_pd( ( double* )beta );

    rho1v.v = _mm_setzero_pd();

    for ( i = 0; i < n_run; ++i )
    {
        x1v.v = _mm_load_pd( ( double* )chi1 );
        u1v.v = _mm_load_pd( ( double* )upsilon1 );
        y1v.v = _mm_load_pd( ( double* )psi1 );
        z1v.v = _mm_load_pd( ( double* )zeta1 );

        rho1v.v += x1v.v * u1v.v;
        y1v.v   -= a1v.v * x1v.v;
        z1v.v   -= b1v.v * x1v.v;

        _mm_store_pd( ( double* )psi1,  y1v.v );
        _mm_store_pd( ( double* )zeta1, z1v.v );

        chi1     += 2;
        upsilon1 += 2;
        psi1     += 2;
        zeta1    += 2;
    }

    rho_c += rho1v.d[0] + rho1v.d[1];

    if ( n_left > 0 )
    {
        double   alpha_c = *alpha;
        double   beta_c  = *beta;

        for( i = 0; i < n_left; ++i )
        {
            double   chi1_c    = *chi1;
            double   upsilon_c = *upsilon1;

            rho_c  += chi1_c * upsilon_c;
            *psi1  -= alpha_c * chi1_c;
            *zeta1 -= beta_c  * chi1_c;

            chi1     += inc_x;
            upsilon1 += inc_u;
            psi1     += inc_y;
            zeta1    += inc_z;
        }
    }

    *rho = rho_c;
}
void bl1_ddotaxpy ( int  n,
double *  a,
int  inc_a,
double *  x,
int  inc_x,
double *  kappa,
double *  rho,
double *  w,
int  inc_w 
)

References bl1_abort(), v2df_t::d, and v2df_t::v.

Referenced by FLA_Fused_Ahx_Ax_opd_var1(), and FLA_Fused_UZhu_ZUhu_opd_var1().

{
    double*   restrict alpha1;
    double*   restrict chi1;
    double*   restrict omega1;
    double             rho_c;
    int                i;

    int                n_pre;
    int                n_run;
    int                n_left;

    v2df_t    k1v, rho1v;
    v2df_t    a1v, x1v, w1v;
    v2df_t    a2v, x2v, w2v;
    
    if ( inc_a != 1 ||
         inc_x != 1 ||
         inc_w != 1 ) bl1_abort();

    n_pre = 0;
    if ( ( unsigned long ) a % 16 != 0 )
    {
        if ( ( unsigned long ) x % 16 == 0 ||
             ( unsigned long ) w % 16 == 0 ) bl1_abort();

        n_pre = 1;
    }

    n_run       = ( n - n_pre ) / 4;
    n_left      = ( n - n_pre ) % 4;

    alpha1   = a;
    chi1     = x;
    omega1   = w;

    rho_c = 0.0;

    if ( n_pre == 1 )
    {
        double   kappa_c    = *kappa;
        double   alpha1_c   = *alpha1;
        double   chi1_c     = *chi1;
        double   omega1_c   = *omega1;

        rho_c += alpha1_c * chi1_c;
        omega1_c += kappa_c * alpha1_c;

        *omega1 = omega1_c;

        alpha1   += inc_a;
        chi1     += inc_x;
        omega1   += inc_w;
    }

    rho1v.v = _mm_setzero_pd();

    k1v.v = _mm_loaddup_pd( ( double* )kappa );

    for ( i = 0; i < n_run; ++i )
    {
        a1v.v = _mm_load_pd( ( double* )alpha1 );
        x1v.v = _mm_load_pd( ( double* )chi1 );
        w1v.v = _mm_load_pd( ( double* )omega1 );

        a2v.v = _mm_load_pd( ( double* )(alpha1 + 2) );
        x2v.v = _mm_load_pd( ( double* )(chi1 + 2) );
        w2v.v = _mm_load_pd( ( double* )(omega1 + 2) );

        rho1v.v += a1v.v * x1v.v;
        w1v.v += k1v.v * a1v.v;

        _mm_store_pd( ( double* )omega1, w1v.v );

        rho1v.v += a2v.v * x2v.v;
        w2v.v += k1v.v * a2v.v;

        _mm_store_pd( ( double* )(omega1 + 2), w2v.v );

        alpha1   += 4;
        chi1     += 4;
        omega1   += 4;
    }

    if ( n_left > 0 )
    {
        for ( i = 0; i < n_left; ++i )
        {
            double   kappa_c    = *kappa;
            double   alpha1_c   = *alpha1;
            double   chi1_c     = *chi1;
            double   omega1_c   = *omega1;

            rho_c += alpha1_c * chi1_c;
            omega1_c += kappa_c * alpha1_c;

            *omega1 = omega1_c;

            alpha1   += inc_a;
            chi1     += inc_x;
            omega1   += inc_w;
        }
    }

    rho_c += rho1v.d[0] + rho1v.d[1];

    *rho = rho_c;
}
void bl1_ddotsv2 ( conj1_t  conjxy,
int  n,
double *  x,
int  inc_x,
double *  y,
int  inc_y,
double *  z,
int  inc_z,
double *  beta,
double *  rho_xz,
double *  rho_yz 
)

References bl1_abort(), v2df_t::d, and v2df_t::v.

Referenced by FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Uhu_Yhu_Zhu_opd_var1(), FLA_Fused_UYx_ZVx_opd_var1(), and FLA_Fused_UZhu_ZUhu_opd_var1().

{
    double*   restrict x1;
    double*   restrict y1;
    double*   restrict z1;
    double             rho1, rho2;
    double             x1c, y1c, z1c;
    int                i;

    int                n_pre;
    int                n_run;
    int                n_left;

    v2df_t             rho1v, rho2v;
    v2df_t             x1v, y1v, z1v;
    v2df_t             x2v, y2v, z2v;
    
    if ( inc_x != 1 ||
         inc_y != 1 ||
         inc_z != 1 ) bl1_abort();

    n_pre = 0;
    if ( ( unsigned long ) z % 16 != 0 )
    {
        if ( ( unsigned long ) x % 16 == 0 ||
             ( unsigned long ) y % 16 == 0 ) bl1_abort();

        n_pre = 1;
    }

    n_run       = ( n - n_pre ) / 4;
    n_left      = ( n - n_pre ) % 4;

    x1 = x;
    y1 = y;
    z1 = z;

    rho1 = 0.0;
    rho2 = 0.0;

    if ( n_pre == 1 )
    {
        x1c = *x1;
        y1c = *y1;
        z1c = *z1;

        rho1 += x1c * z1c;
        rho2 += y1c * z1c;

        x1 += inc_x;
        y1 += inc_y;
        z1 += inc_z;
    }

    rho1v.v = _mm_setzero_pd();
    rho2v.v = _mm_setzero_pd();

    for ( i = 0; i < n_run; ++i )
    {
        x1v.v = _mm_load_pd( ( double* )x1 );
        y1v.v = _mm_load_pd( ( double* )y1 );
        z1v.v = _mm_load_pd( ( double* )z1 );

        x2v.v = _mm_load_pd( ( double* )(x1 + 2) );
        y2v.v = _mm_load_pd( ( double* )(y1 + 2) );
        z2v.v = _mm_load_pd( ( double* )(z1 + 2) );

        rho1v.v += x1v.v * z1v.v;
        rho2v.v += y1v.v * z1v.v;

        rho1v.v += x2v.v * z2v.v;
        rho2v.v += y2v.v * z2v.v;

        x1 += 4;
        y1 += 4;
        z1 += 4;
    }

    rho1 += rho1v.d[0] + rho1v.d[1];
    rho2 += rho2v.d[0] + rho2v.d[1];

    if ( n_left > 0 )
    {
        for ( i = 0; i < n_left; ++i )
        {
            x1c = *x1;
            y1c = *y1;
            z1c = *z1;

            rho1 += x1c * z1c;
            rho2 += y1c * z1c;

            x1 += inc_x;
            y1 += inc_y;
            z1 += inc_z;
        }
    }

    *rho_xz = *beta * *rho_xz + rho1;
    *rho_yz = *beta * *rho_yz + rho2;
}
void bl1_ddotsv3 ( conj1_t  conjxyw,
int  n,
double *  x,
int  inc_x,
double *  y,
int  inc_y,
double *  w,
int  inc_w,
double *  z,
int  inc_z,
double *  beta,
double *  rho_xz,
double *  rho_yz,
double *  rho_wz 
)

References bl1_abort(), v2df_t::d, and v2df_t::v.

{
    double*   restrict x1;
    double*   restrict y1;
    double*   restrict w1;
    double*   restrict z1;
    double             rho1, rho2, rho3;
    double             x1c, y1c, w1c, z1c;
    int                i;

    int                n_pre;
    int                n_run;
    int                n_left;

    v2df_t             rho1v, rho2v, rho3v;
    v2df_t             x1v, y1v, w1v, z1v;
    v2df_t             x2v, y2v, w2v, z2v;
    
    if ( inc_x != 1 ||
         inc_y != 1 ||
         inc_w != 1 ||
         inc_z != 1 ) bl1_abort();

    n_pre = 0;
    if ( ( unsigned long ) z % 16 != 0 )
    {
        if ( ( unsigned long ) x % 16 == 0 ||
             ( unsigned long ) y % 16 == 0 ||
             ( unsigned long ) w % 16 == 0 ) bl1_abort();

        n_pre = 1;
    }

    n_run       = ( n - n_pre ) / 4;
    n_left      = ( n - n_pre ) % 4;

    x1 = x;
    y1 = y;
    w1 = w;
    z1 = z;

    rho1 = 0.0;
    rho2 = 0.0;
    rho3 = 0.0;

    if ( n_pre == 1 )
    {
        x1c = *x1;
        y1c = *y1;
        w1c = *w1;
        z1c = *z1;

        rho1 += x1c * z1c;
        rho2 += y1c * z1c;
        rho3 += w1c * z1c;

        x1 += inc_x;
        y1 += inc_y;
        w1 += inc_w;
        z1 += inc_z;
    }

    rho1v.v = _mm_setzero_pd();
    rho2v.v = _mm_setzero_pd();
    rho3v.v = _mm_setzero_pd();

    for ( i = 0; i < n_run; ++i )
    {
        x1v.v = _mm_load_pd( ( double* )x1 );
        y1v.v = _mm_load_pd( ( double* )y1 );
        w1v.v = _mm_load_pd( ( double* )w1 );
        z1v.v = _mm_load_pd( ( double* )z1 );

        rho1v.v += x1v.v * z1v.v;
        rho2v.v += y1v.v * z1v.v;
        rho3v.v += w1v.v * z1v.v;

        x2v.v = _mm_load_pd( ( double* )(x1 + 2) );
        y2v.v = _mm_load_pd( ( double* )(y1 + 2) );
        w2v.v = _mm_load_pd( ( double* )(w1 + 2) );
        z2v.v = _mm_load_pd( ( double* )(z1 + 2) );

        rho1v.v += x2v.v * z2v.v;
        rho2v.v += y2v.v * z2v.v;
        rho3v.v += w2v.v * z2v.v;

        x1 += 4;
        y1 += 4;
        w1 += 4;
        z1 += 4;
    }

    rho1 += rho1v.d[0] + rho1v.d[1];
    rho2 += rho2v.d[0] + rho2v.d[1];
    rho3 += rho3v.d[0] + rho3v.d[1];

    if ( n_left > 0 )
    {
        for ( i = 0; i < n_left; ++i )
        {
            x1c = *x1;
            y1c = *y1;
            w1c = *w1;
            z1c = *z1;

            rho1 += x1c * z1c;
            rho2 += y1c * z1c;
            rho3 += w1c * z1c;

            x1 += inc_x;
            y1 += inc_y;
            w1 += inc_w;
            z1 += inc_z;
        }
    }

    *rho_xz = *beta * *rho_xz + rho1;
    *rho_yz = *beta * *rho_yz + rho2;
    *rho_wz = *beta * *rho_wz + rho3;
}
void bl1_ddotv2axpyv2b ( int  n,
double *  a1,
int  inc_a1,
double *  a2,
int  inc_a2,
double *  x,
int  inc_x,
double *  kappa1,
double *  kappa2,
double *  rho1,
double *  rho2,
double *  w,
int  inc_w 
)

References bl1_abort(), v2df_t::d, and v2df_t::v.

Referenced by FLA_Fused_Ahx_Ax_opd_var1(), and FLA_Fused_UZhu_ZUhu_opd_var1().

{
    double*   restrict alpha1;
    double*   restrict alpha2;
    double*   restrict chi1;
    double*   restrict omega1;
    double             rho1_c;
    double             rho2_c;
    int                i;

    int                n_pre;
    int                n_run;
    int                n_left;
    
    v2df_t    k1v, rho1v;
    v2df_t    k2v, rho2v;
    v2df_t    a11v, a12v, x1v, w1v;
    v2df_t    a21v, a22v, x2v, w2v;
    
    if ( inc_a1 != 1 ||
         inc_a2 != 1 ||
         inc_x  != 1 ||
         inc_w  != 1 ) bl1_abort();

    n_pre = 0;
    if ( ( unsigned long ) a1 % 16 != 0 )
    {
        if ( ( unsigned long ) a2 % 16 == 0 ||
             ( unsigned long ) x % 16 == 0 ||
             ( unsigned long ) w % 16 == 0 ) bl1_abort();

        n_pre = 1;
    }

    n_run       = ( n - n_pre ) / 4;
    n_left      = ( n - n_pre ) % 4;

    alpha1   = a1;
    alpha2   = a2;
    chi1     = x;
    omega1   = w;

    rho1_c = 0.0;
    rho2_c = 0.0;

    if ( n_pre == 1 )
    {
        double   kappa1_c = *kappa1;
        double   kappa2_c = *kappa2;
        double   alpha1_c   = *alpha1;
        double   alpha2_c   = *alpha2;
        double   chi1_c     = *chi1;
        double   omega1_c   = *omega1;

        rho1_c   += alpha1_c * chi1_c;
        omega1_c += kappa1_c * alpha1_c;

        rho2_c   += alpha2_c * chi1_c;
        omega1_c += kappa2_c * alpha2_c;

        *omega1 = omega1_c;

        alpha1   += inc_a1;
        alpha2   += inc_a2;
        chi1     += inc_x;
        omega1   += inc_w;
    }

    rho1v.v = _mm_setzero_pd();
    rho2v.v = _mm_setzero_pd();

    k1v.v = _mm_loaddup_pd( ( double* )kappa1 );
    k2v.v = _mm_loaddup_pd( ( double* )kappa2 );

    for ( i = 0; i < n_run; ++i )
    {
        a11v.v = _mm_load_pd( ( double* )alpha1 );
        a12v.v = _mm_load_pd( ( double* )alpha2 );
        x1v.v  = _mm_load_pd( ( double* )chi1 );
        w1v.v  = _mm_load_pd( ( double* )omega1 );

        rho1v.v += a11v.v * x1v.v;
        w1v.v += k1v.v * a11v.v;

        rho2v.v += a12v.v * x1v.v;
        w1v.v += k2v.v * a12v.v;

        _mm_store_pd( ( double* )omega1, w1v.v );

        a21v.v = _mm_load_pd( ( double* )(alpha1 + 2) );
        a22v.v = _mm_load_pd( ( double* )(alpha2 + 2) );
        x2v.v  = _mm_load_pd( ( double* )(chi1 + 2) );
        w2v.v  = _mm_load_pd( ( double* )(omega1 + 2) );

        rho1v.v += a21v.v * x2v.v;
        w2v.v += k1v.v * a21v.v;

        rho2v.v += a22v.v * x2v.v;
        w2v.v += k2v.v * a22v.v;

        _mm_store_pd( ( double* )(omega1 + 2), w2v.v );

        alpha1   += 4;
        alpha2   += 4;
        chi1     += 4;
        omega1   += 4;
    }

    if ( n_left > 0 )
    {
        for ( i = 0; i < n_left; ++i )
        {
            double   kappa1_c = *kappa1;
            double   kappa2_c = *kappa2;
            double   alpha1_c   = *alpha1;
            double   alpha2_c   = *alpha2;
            double   chi1_c     = *chi1;
            double   omega1_c   = *omega1;

            rho1_c   += alpha1_c * chi1_c;
            omega1_c += kappa1_c * alpha1_c;

            rho2_c   += alpha2_c * chi1_c;
            omega1_c += kappa2_c * alpha2_c;

            *omega1 = omega1_c;

            alpha1   += inc_a1;
            alpha2   += inc_a2;
            chi1     += inc_x;
            omega1   += inc_w;
        }
    }

    rho1_c += rho1v.d[0] + rho1v.d[1];
    rho2_c += rho2v.d[0] + rho2v.d[1];

    *rho1 = rho1_c;
    *rho2 = rho2_c;
}
void bl1_saxmyv2 ( conj1_t  conjx,
int  n,
float *  alpha,
float *  beta,
float *  x,
int  inc_x,
float *  y,
int  inc_y,
float *  z,
int  inc_z 
)

References bl1_abort().

{
    bl1_abort();
}
void bl1_saxpyv2b ( int  n,
float *  beta1,
float *  beta2,
float *  a1,
int  inc_a1,
float *  a2,
int  inc_a2,
float *  w,
int  inc_w 
)

References bl1_abort().

{
    bl1_abort();
}
void bl1_saxpyv2bdotaxpy ( int  n,
float *  beta,
float *  u,
int  inc_u,
float *  gamma,
float *  z,
int  inc_z,
float *  a,
int  inc_a,
float *  x,
int  inc_x,
float *  kappa,
float *  rho,
float *  w,
int  inc_w 
)

References bl1_abort().

{
    bl1_abort();
}
void bl1_saxpyv3b ( int  n,
float *  beta1,
float *  beta2,
float *  beta3,
float *  a1,
int  inc_a1,
float *  a2,
int  inc_a2,
float *  a3,
int  inc_a3,
float *  w,
int  inc_w 
)

References bl1_abort().

{
    bl1_abort();
}
void bl1_sdotaxmyv2 ( int  n,
float *  alpha,
float *  beta,
float *  x,
int  inc_x,
float *  u,
int  inc_u,
float *  rho,
float *  y,
int  inc_y,
float *  z,
int  inc_z 
)

References bl1_abort().

{
    bl1_abort();
}
void bl1_sdotaxpy ( int  n,
float *  a,
int  inc_a,
float *  x,
int  inc_x,
float *  kappa,
float *  rho,
float *  w,
int  inc_w 
)

References bl1_abort().

{
    bl1_abort();
}
void bl1_sdotsv2 ( conj1_t  conjxy,
int  n,
float *  x,
int  inc_x,
float *  y,
int  inc_y,
float *  z,
int  inc_z,
float *  beta,
float *  rho_xz,
float *  rho_yz 
)

References bl1_abort().

{
    bl1_abort();
}
void bl1_sdotsv3 ( conj1_t  conjxyw,
int  n,
float *  x,
int  inc_x,
float *  y,
int  inc_y,
float *  w,
int  inc_w,
float *  z,
int  inc_z,
float *  beta,
float *  rho_xz,
float *  rho_yz,
float *  rho_wz 
)

References bl1_abort().

{
    bl1_abort();
}
void bl1_sdotv2axpyv2b ( int  n,
float *  a1,
int  inc_a1,
float *  a2,
int  inc_a2,
float *  x,
int  inc_x,
float *  kappa1,
float *  kappa2,
float *  rho1,
float *  rho2,
float *  w,
int  inc_w 
)

References bl1_abort().

{
    bl1_abort();
}
void bl1_zaxmyv2 ( conj1_t  conjx,
int  n,
dcomplex alpha,
dcomplex beta,
dcomplex x,
int  inc_x,
dcomplex y,
int  inc_y,
dcomplex z,
int  inc_z 
)
void bl1_zaxpyv2b ( int  n,
dcomplex beta1,
dcomplex beta2,
dcomplex a1,
int  inc_a1,
dcomplex a2,
int  inc_a2,
dcomplex w,
int  inc_w 
)
void bl1_zaxpyv2bdotaxpy ( int  n,
dcomplex beta,
dcomplex u,
int  inc_u,
dcomplex gamma,
dcomplex z,
int  inc_z,
dcomplex a,
int  inc_a,
dcomplex x,
int  inc_x,
dcomplex kappa,
dcomplex rho,
dcomplex w,
int  inc_w 
)
void bl1_zaxpyv2bdots ( int  n,
dcomplex alpha1,
dcomplex alpha2,
dcomplex x1,
int  inc_x1,
dcomplex x2,
int  inc_x2,
dcomplex y,
int  inc_y,
dcomplex u,
int  inc_u,
dcomplex beta,
dcomplex rho 
)
void bl1_zaxpyv3b ( int  n,
dcomplex beta1,
dcomplex beta2,
dcomplex beta3,
dcomplex a1,
int  inc_a1,
dcomplex a2,
int  inc_a2,
dcomplex a3,
int  inc_a3,
dcomplex w,
int  inc_w 
)
void bl1_zdotaxmyv2 ( int  n,
dcomplex alpha,
dcomplex beta,
dcomplex x,
int  inc_x,
dcomplex u,
int  inc_u,
dcomplex rho,
dcomplex y,
int  inc_y,
dcomplex z,
int  inc_z 
)
void bl1_zdotaxpy ( int  n,
dcomplex a,
int  inc_a,
dcomplex x,
int  inc_x,
dcomplex kappa,
dcomplex rho,
dcomplex w,
int  inc_w 
)
void bl1_zdotsv2 ( conj1_t  conjxy,
int  n,
dcomplex x,
int  inc_x,
dcomplex y,
int  inc_y,
dcomplex z,
int  inc_z,
dcomplex beta,
dcomplex rho_xz,
dcomplex rho_yz 
)
void bl1_zdotsv3 ( conj1_t  conjxyw,
int  n,
dcomplex x,
int  inc_x,
dcomplex y,
int  inc_y,
dcomplex w,
int  inc_w,
dcomplex z,
int  inc_z,
dcomplex beta,
dcomplex rho_xz,
dcomplex rho_yz,
dcomplex rho_wz 
)
void bl1_zdotv2axpyv2b ( int  n,
dcomplex a1,
int  inc_a1,
dcomplex a2,
int  inc_a2,
dcomplex x,
int  inc_x,
dcomplex kappa1,
dcomplex kappa2,
dcomplex rho1,
dcomplex rho2,
dcomplex w,
int  inc_w 
)