|
libflame
12600
|
Go to the source code of this file.
Functions | |
| void | bl1_saxmyv2 (conj1_t conjx, int n, float *alpha, float *beta, float *x, int inc_x, float *y, int inc_y, float *z, int inc_z) |
| void | bl1_daxmyv2 (conj1_t conjx, int n, double *alpha, double *beta, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z) |
| void | bl1_caxmyv2 (conj1_t conjx, int n, scomplex *alpha, scomplex *beta, scomplex *x, int inc_x, scomplex *y, int inc_y, scomplex *z, int inc_z) |
| void | bl1_zaxmyv2 (conj1_t conjx, int n, dcomplex *alpha, dcomplex *beta, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z) |
| void | bl1_saxpyv2b (int n, float *beta1, float *beta2, float *a1, int inc_a1, float *a2, int inc_a2, float *w, int inc_w) |
| void | bl1_daxpyv2b (int n, double *beta1, double *beta2, double *a1, int inc_a1, double *a2, int inc_a2, double *w, int inc_w) |
| void | bl1_caxpyv2b (int n, scomplex *beta1, scomplex *beta2, scomplex *a1, int inc_a1, scomplex *a2, int inc_a2, scomplex *w, int inc_w) |
| void | bl1_zaxpyv2b (int n, dcomplex *beta1, dcomplex *beta2, dcomplex *a1, int inc_a1, dcomplex *a2, int inc_a2, dcomplex *w, int inc_w) |
| void | bl1_saxpyv3b (int n, float *beta1, float *beta2, float *beta3, float *a1, int inc_a1, float *a2, int inc_a2, float *a3, int inc_a3, float *w, int inc_w) |
| void | bl1_daxpyv3b (int n, double *beta1, double *beta2, double *beta3, double *a1, int inc_a1, double *a2, int inc_a2, double *a3, int inc_a3, double *w, int inc_w) |
| void | bl1_caxpyv3b (int n, scomplex *beta1, scomplex *beta2, scomplex *beta3, scomplex *a1, int inc_a1, scomplex *a2, int inc_a2, scomplex *a3, int inc_a3, scomplex *w, int inc_w) |
| void | bl1_zaxpyv3b (int n, dcomplex *beta1, dcomplex *beta2, dcomplex *beta3, dcomplex *a1, int inc_a1, dcomplex *a2, int inc_a2, dcomplex *a3, int inc_a3, dcomplex *w, int inc_w) |
| void | bl1_saxpyv2bdotaxpy (int n, float *beta, float *u, int inc_u, float *gamma, float *z, int inc_z, float *a, int inc_a, float *x, int inc_x, float *kappa, float *rho, float *w, int inc_w) |
| void | bl1_daxpyv2bdotaxpy (int n, double *beta, double *u, int inc_u, double *gamma, double *z, int inc_z, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w) |
| void | bl1_caxpyv2bdotaxpy (int n, scomplex *beta, scomplex *u, int inc_u, scomplex *gamma, scomplex *z, int inc_z, scomplex *a, int inc_a, scomplex *x, int inc_x, scomplex *kappa, scomplex *rho, scomplex *w, int inc_w) |
| void | bl1_zaxpyv2bdotaxpy (int n, dcomplex *beta, dcomplex *u, int inc_u, dcomplex *gamma, dcomplex *z, int inc_z, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w) |
| void | bl1_sdotsv2 (conj1_t conjxy, int n, float *x, int inc_x, float *y, int inc_y, float *z, int inc_z, float *beta, float *rho_xz, float *rho_yz) |
| void | bl1_ddotsv2 (conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz) |
| void | bl1_cdotsv2 (conj1_t conjxy, int n, scomplex *x, int inc_x, scomplex *y, int inc_y, scomplex *z, int inc_z, scomplex *beta, scomplex *rho_xz, scomplex *rho_yz) |
| void | bl1_zdotsv2 (conj1_t conjxy, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz) |
| void | bl1_sdotsv3 (conj1_t conjxyw, int n, float *x, int inc_x, float *y, int inc_y, float *w, int inc_w, float *z, int inc_z, float *beta, float *rho_xz, float *rho_yz, float *rho_wz) |
| void | bl1_ddotsv3 (conj1_t conjxyw, int n, double *x, int inc_x, double *y, int inc_y, double *w, int inc_w, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz, double *rho_wz) |
| void | bl1_cdotsv3 (conj1_t conjxyw, int n, scomplex *x, int inc_x, scomplex *y, int inc_y, scomplex *w, int inc_w, scomplex *z, int inc_z, scomplex *beta, scomplex *rho_xz, scomplex *rho_yz, scomplex *rho_wz) |
| void | bl1_zdotsv3 (conj1_t conjxyw, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *w, int inc_w, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz, dcomplex *rho_wz) |
| void | bl1_sdotaxpy (int n, float *a, int inc_a, float *x, int inc_x, float *kappa, float *rho, float *w, int inc_w) |
| void | bl1_ddotaxpy (int n, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w) |
| void | bl1_cdotaxpy (int n, scomplex *a, int inc_a, scomplex *x, int inc_x, scomplex *kappa, scomplex *rho, scomplex *w, int inc_w) |
| void | bl1_zdotaxpy (int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w) |
| void | bl1_sdotaxmyv2 (int n, float *alpha, float *beta, float *x, int inc_x, float *u, int inc_u, float *rho, float *y, int inc_y, float *z, int inc_z) |
| void | bl1_ddotaxmyv2 (int n, double *alpha, double *beta, double *x, int inc_x, double *u, int inc_u, double *rho, double *y, int inc_y, double *z, int inc_z) |
| void | bl1_cdotaxmyv2 (int n, scomplex *alpha, scomplex *beta, scomplex *x, int inc_x, scomplex *u, int inc_u, scomplex *rho, scomplex *y, int inc_y, scomplex *z, int inc_z) |
| void | bl1_zdotaxmyv2 (int n, dcomplex *alpha, dcomplex *beta, dcomplex *x, int inc_x, dcomplex *u, int inc_u, dcomplex *rho, dcomplex *y, int inc_y, dcomplex *z, int inc_z) |
| void | bl1_sdotv2axpyv2b (int n, float *a1, int inc_a1, float *a2, int inc_a2, float *x, int inc_x, float *kappa1, float *kappa2, float *rho1, float *rho2, float *w, int inc_w) |
| void | bl1_ddotv2axpyv2b (int n, double *a1, int inc_a1, double *a2, int inc_a2, double *x, int inc_x, double *kappa1, double *kappa2, double *rho1, double *rho2, double *w, int inc_w) |
| void | bl1_cdotv2axpyv2b (int n, scomplex *a1, int inc_a1, scomplex *a2, int inc_a2, scomplex *x, int inc_x, scomplex *kappa1, scomplex *kappa2, scomplex *rho1, scomplex *rho2, scomplex *w, int inc_w) |
| void | bl1_zdotv2axpyv2b (int n, dcomplex *a1, int inc_a1, dcomplex *a2, int inc_a2, dcomplex *x, int inc_x, dcomplex *kappa1, dcomplex *kappa2, dcomplex *rho1, dcomplex *rho2, dcomplex *w, int inc_w) |
| void | bl1_zaxpyv2bdots (int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y, dcomplex *u, int inc_u, dcomplex *beta, dcomplex *rho) |
| void bl1_caxmyv2 | ( | conj1_t | conjx, |
| int | n, | ||
| scomplex * | alpha, | ||
| scomplex * | beta, | ||
| scomplex * | x, | ||
| int | inc_x, | ||
| scomplex * | y, | ||
| int | inc_y, | ||
| scomplex * | z, | ||
| int | inc_z | ||
| ) |
| void bl1_caxpyv2b | ( | int | n, |
| scomplex * | beta1, | ||
| scomplex * | beta2, | ||
| scomplex * | a1, | ||
| int | inc_a1, | ||
| scomplex * | a2, | ||
| int | inc_a2, | ||
| scomplex * | w, | ||
| int | inc_w | ||
| ) |
| void bl1_caxpyv2bdotaxpy | ( | int | n, |
| scomplex * | beta, | ||
| scomplex * | u, | ||
| int | inc_u, | ||
| scomplex * | gamma, | ||
| scomplex * | z, | ||
| int | inc_z, | ||
| scomplex * | a, | ||
| int | inc_a, | ||
| scomplex * | x, | ||
| int | inc_x, | ||
| scomplex * | kappa, | ||
| scomplex * | rho, | ||
| scomplex * | w, | ||
| int | inc_w | ||
| ) |
| void bl1_caxpyv3b | ( | int | n, |
| scomplex * | beta1, | ||
| scomplex * | beta2, | ||
| scomplex * | beta3, | ||
| scomplex * | a1, | ||
| int | inc_a1, | ||
| scomplex * | a2, | ||
| int | inc_a2, | ||
| scomplex * | a3, | ||
| int | inc_a3, | ||
| scomplex * | w, | ||
| int | inc_w | ||
| ) |
| void bl1_cdotaxmyv2 | ( | int | n, |
| scomplex * | alpha, | ||
| scomplex * | beta, | ||
| scomplex * | x, | ||
| int | inc_x, | ||
| scomplex * | u, | ||
| int | inc_u, | ||
| scomplex * | rho, | ||
| scomplex * | y, | ||
| int | inc_y, | ||
| scomplex * | z, | ||
| int | inc_z | ||
| ) |
| void bl1_cdotaxpy | ( | int | n, |
| scomplex * | a, | ||
| int | inc_a, | ||
| scomplex * | x, | ||
| int | inc_x, | ||
| scomplex * | kappa, | ||
| scomplex * | rho, | ||
| scomplex * | w, | ||
| int | inc_w | ||
| ) |
| void bl1_cdotsv2 | ( | conj1_t | conjxy, |
| int | n, | ||
| scomplex * | x, | ||
| int | inc_x, | ||
| scomplex * | y, | ||
| int | inc_y, | ||
| scomplex * | z, | ||
| int | inc_z, | ||
| scomplex * | beta, | ||
| scomplex * | rho_xz, | ||
| scomplex * | rho_yz | ||
| ) |
| void bl1_cdotsv3 | ( | conj1_t | conjxyw, |
| int | n, | ||
| scomplex * | x, | ||
| int | inc_x, | ||
| scomplex * | y, | ||
| int | inc_y, | ||
| scomplex * | w, | ||
| int | inc_w, | ||
| scomplex * | z, | ||
| int | inc_z, | ||
| scomplex * | beta, | ||
| scomplex * | rho_xz, | ||
| scomplex * | rho_yz, | ||
| scomplex * | rho_wz | ||
| ) |
| void bl1_cdotv2axpyv2b | ( | int | n, |
| scomplex * | a1, | ||
| int | inc_a1, | ||
| scomplex * | a2, | ||
| int | inc_a2, | ||
| scomplex * | x, | ||
| int | inc_x, | ||
| scomplex * | kappa1, | ||
| scomplex * | kappa2, | ||
| scomplex * | rho1, | ||
| scomplex * | rho2, | ||
| scomplex * | w, | ||
| int | inc_w | ||
| ) |
| void bl1_daxmyv2 | ( | conj1_t | conjx, |
| int | n, | ||
| double * | alpha, | ||
| double * | beta, | ||
| double * | x, | ||
| int | inc_x, | ||
| double * | y, | ||
| int | inc_y, | ||
| double * | z, | ||
| int | inc_z | ||
| ) |
References bl1_abort(), and v2df_t::v.
Referenced by FLA_Fused_UYx_ZVx_opd_var1().
{
double* restrict chi1;
double* restrict psi1;
double* restrict zeta1;
int i;
int n_pre;
int n_run;
int n_left;
v2df_t a1v, b1v;
v2df_t x1v, y1v, z1v;
v2df_t x2v, y2v, z2v;
if ( inc_x != 1 ||
inc_y != 1 ||
inc_z != 1 ) bl1_abort();
n_pre = 0;
if ( ( unsigned long ) z % 16 != 0 )
{
if ( ( unsigned long ) x % 16 == 0 ||
( unsigned long ) y % 16 == 0 ) bl1_abort();
n_pre = 1;
}
n_run = ( n - n_pre ) / 4;
n_left = ( n - n_pre ) % 4;
chi1 = x;
psi1 = y;
zeta1 = z;
if ( n_pre == 1 )
{
double alpha_c = *alpha;
double beta_c = *beta;
double chi1_c = *chi1;
*psi1 -= alpha_c * chi1_c;
*zeta1 -= beta_c * chi1_c;
chi1 += inc_x;
psi1 += inc_y;
zeta1 += inc_z;
}
a1v.v = _mm_loaddup_pd( ( double* )alpha );
b1v.v = _mm_loaddup_pd( ( double* )beta );
for ( i = 0; i < n_run; ++i )
{
x1v.v = _mm_load_pd( ( double* )chi1 );
y1v.v = _mm_load_pd( ( double* )psi1 );
z1v.v = _mm_load_pd( ( double* )zeta1 );
x2v.v = _mm_load_pd( ( double* )(chi1 + 2) );
y2v.v = _mm_load_pd( ( double* )(psi1 + 2) );
z2v.v = _mm_load_pd( ( double* )(zeta1 + 2) );
y1v.v = y1v.v - a1v.v * x1v.v;
z1v.v = z1v.v - b1v.v * x1v.v;
_mm_store_pd( ( double* )psi1, y1v.v );
_mm_store_pd( ( double* )zeta1, z1v.v );
y2v.v = y2v.v - a1v.v * x2v.v;
z2v.v = z2v.v - b1v.v * x2v.v;
_mm_store_pd( ( double* )(psi1 + 2), y2v.v );
_mm_store_pd( ( double* )(zeta1 + 2), z2v.v );
chi1 += 4;
psi1 += 4;
zeta1 += 4;
}
if ( n_left > 0 )
{
double alpha_c = *alpha;
double beta_c = *beta;
for( i = 0; i < n_left; ++i )
{
double chi1_c = *chi1;
*psi1 -= alpha_c * chi1_c;
*zeta1 -= beta_c * chi1_c;
chi1 += inc_x;
psi1 += inc_y;
zeta1 += inc_z;
}
}
}
| void bl1_daxpyv2b | ( | int | n, |
| double * | beta1, | ||
| double * | beta2, | ||
| double * | a1, | ||
| int | inc_a1, | ||
| double * | a2, | ||
| int | inc_a2, | ||
| double * | w, | ||
| int | inc_w | ||
| ) |
References bl1_abort(), and v2df_t::v.
Referenced by FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_opd_var1(), and FLA_Fused_UZhu_ZUhu_opd_var1().
{
double* restrict chi1;
double* restrict chi2;
double* restrict psi1;
int i;
int n_pre;
int n_run;
int n_left;
v2df_t a1v, a2v;
v2df_t x11v, x12v;
v2df_t x21v, x22v;
v2df_t y1v;
v2df_t y2v;
if ( inc_x1 != 1 ||
inc_x2 != 1 ||
inc_y != 1 ) bl1_abort();
n_pre = 0;
if ( ( unsigned long ) y % 16 != 0 )
{
if ( ( unsigned long ) x1 % 16 == 0 ||
( unsigned long ) x2 % 16 == 0 ) bl1_abort();
n_pre = 1;
}
n_run = ( n - n_pre ) / 4;
n_left = ( n - n_pre ) % 4;
chi1 = x1;
chi2 = x2;
psi1 = y;
if ( n_pre == 1 )
{
double alpha1_c = *alpha1;
double alpha2_c = *alpha2;
double chi11_c = *chi1;
double chi12_c = *chi2;
double temp1;
// psi1 = psi1 + alpha1 * chi11 + alpha2 * chi12;
temp1 = alpha1_c * chi11_c + alpha2_c * chi12_c;
*psi1 = *psi1 + temp1;
chi1 += inc_x1;
chi2 += inc_x2;
psi1 += inc_y;
}
a1v.v = _mm_loaddup_pd( ( double* )alpha1 );
a2v.v = _mm_loaddup_pd( ( double* )alpha2 );
for ( i = 0; i < n_run; ++i )
{
x11v.v = _mm_load_pd( ( double* )chi1 );
x12v.v = _mm_load_pd( ( double* )chi2 );
y1v.v = _mm_load_pd( ( double* )psi1 );
x21v.v = _mm_load_pd( ( double* )(chi1 + 2) );
x22v.v = _mm_load_pd( ( double* )(chi2 + 2) );
y2v.v = _mm_load_pd( ( double* )(psi1 + 2) );
y1v.v += a1v.v * x11v.v + a2v.v * x12v.v;
y2v.v += a1v.v * x21v.v + a2v.v * x22v.v;
_mm_store_pd( ( double* )psi1, y1v.v );
_mm_store_pd( ( double* )(psi1 + 2), y2v.v );
//chi1 += step_x1;
//chi2 += step_x2;
//psi1 += step_y;
chi1 += 4;
chi2 += 4;
psi1 += 4;
}
if ( n_left > 0 )
{
double alpha1_c = *alpha1;
double alpha2_c = *alpha2;
for ( i = 0; i < n_left; ++i )
{
double chi11_c = *chi1;
double chi12_c = *chi2;
double psi1_c = *psi1;
double temp1;
temp1 = alpha1_c * chi11_c + alpha2_c * chi12_c;
*psi1 = psi1_c + temp1;
chi1 += inc_x1;
chi2 += inc_x2;
psi1 += inc_y;
}
}
}
| void bl1_daxpyv2bdotaxpy | ( | int | n, |
| double * | beta, | ||
| double * | u, | ||
| int | inc_u, | ||
| double * | gamma, | ||
| double * | z, | ||
| int | inc_z, | ||
| double * | a, | ||
| int | inc_a, | ||
| double * | x, | ||
| int | inc_x, | ||
| double * | kappa, | ||
| double * | rho, | ||
| double * | w, | ||
| int | inc_w | ||
| ) |
References bl1_abort(), v2df_t::d, and v2df_t::v.
Referenced by FLA_Fused_Gerc2_Ahx_Ax_opd_var1(), and FLA_Fused_Her2_Ax_l_opd_var1().
{
double* restrict upsilon1;
double* restrict zeta1;
double* restrict alpha1;
double* restrict chi1;
double* restrict omega1;
double rho_c;
int i;
v2df_t b1v, g1v, k1v;
v2df_t rhov;
v2df_t u1v, z1v, a1v;
v2df_t u2v, z2v, a2v;
v2df_t x1v, w1v;
v2df_t x2v, w2v;
int n_pre;
int n_run;
int n_left;
n_pre = 0;
if ( ( unsigned long ) a % 16 != 0 )
{
if ( ( unsigned long ) u % 16 == 0 ||
( unsigned long ) z % 16 == 0 ||
( unsigned long ) x % 16 == 0 ||
( unsigned long ) w % 16 == 0 ) bl1_abort();
n_pre = 1;
}
n_run = ( n - n_pre ) / 4;
n_left = ( n - n_pre ) % 4;
upsilon1 = u;
zeta1 = z;
alpha1 = a;
chi1 = x;
omega1 = w;
rho_c = 0.0;
if ( n_pre == 1 )
{
double beta_c = *beta;
double gamma_c = *gamma;
double kappa_c = *kappa;
double upsilon1_c = *upsilon1;
double zeta1_c = *zeta1;
double alpha1_c = *alpha1;
double chi1_c = *chi1;
double omega1_c = *omega1;
alpha1_c += beta_c * upsilon1_c + gamma_c * zeta1_c;
rho_c += alpha1_c * chi1_c;
omega1_c += kappa_c * alpha1_c;
*alpha1 = alpha1_c;
*omega1 = omega1_c;
upsilon1 += inc_u;
zeta1 += inc_z;
alpha1 += inc_a;
chi1 += inc_x;
omega1 += inc_w;
}
b1v.v = _mm_loaddup_pd( ( double* )beta );
g1v.v = _mm_loaddup_pd( ( double* )gamma );
k1v.v = _mm_loaddup_pd( ( double* )kappa );
rhov.v = _mm_setzero_pd();
for ( i = 0; i < n_run; ++i )
{
u1v.v = _mm_load_pd( ( double* )upsilon1 );
z1v.v = _mm_load_pd( ( double* )zeta1 );
a1v.v = _mm_load_pd( ( double* )alpha1 );
a1v.v += b1v.v * u1v.v + g1v.v * z1v.v;
u2v.v = _mm_load_pd( ( double* )(upsilon1 + 2) );
z2v.v = _mm_load_pd( ( double* )(zeta1 + 2) );
a2v.v = _mm_load_pd( ( double* )(alpha1 + 2) );
a2v.v += b1v.v * u2v.v + g1v.v * z2v.v;
x1v.v = _mm_load_pd( ( double* )chi1 );
x2v.v = _mm_load_pd( ( double* )(chi1 + 2) );
w1v.v = _mm_load_pd( ( double* )omega1 );
w2v.v = _mm_load_pd( ( double* )(omega1 + 2) );
rhov.v += a1v.v * x1v.v;
rhov.v += a2v.v * x2v.v;
w1v.v += k1v.v * a1v.v;
w2v.v += k1v.v * a2v.v;
_mm_store_pd( ( double* )alpha1, a1v.v );
_mm_store_pd( ( double* )(alpha1 + 2), a2v.v );
_mm_store_pd( ( double* )omega1, w1v.v );
_mm_store_pd( ( double* )(omega1 + 2), w2v.v );
upsilon1 += 4;
zeta1 += 4;
alpha1 += 4;
chi1 += 4;
omega1 += 4;
}
rho_c += rhov.d[0] + rhov.d[1];
if ( n_left > 0 )
{
double beta_c = *beta;
double gamma_c = *gamma;
double kappa_c = *kappa;
for ( i = 0; i < n_left; ++i )
{
double upsilon1_c = *upsilon1;
double zeta1_c = *zeta1;
double alpha1_c = *alpha1;
double chi1_c = *chi1;
double omega1_c = *omega1;
alpha1_c += beta_c * upsilon1_c + gamma_c * zeta1_c;
rho_c += alpha1_c * chi1_c;
omega1_c += kappa_c * alpha1_c;
*alpha1 = alpha1_c;
*omega1 = omega1_c;
upsilon1 += inc_u;
zeta1 += inc_z;
alpha1 += inc_a;
chi1 += inc_x;
omega1 += inc_w;
}
}
*rho = rho_c;
}
| void bl1_daxpyv3b | ( | int | n, |
| double * | beta1, | ||
| double * | beta2, | ||
| double * | beta3, | ||
| double * | a1, | ||
| int | inc_a1, | ||
| double * | a2, | ||
| int | inc_a2, | ||
| double * | a3, | ||
| int | inc_a3, | ||
| double * | w, | ||
| int | inc_w | ||
| ) |
References bl1_abort(), and v2df_t::v.
{
double* restrict chi1;
double* restrict chi2;
double* restrict chi3;
double* restrict psi1;
int i;
int n_pre;
int n_run;
int n_left;
v2df_t a1v, a2v, a3v;
v2df_t x11v, x12v, x13v;
v2df_t x21v, x22v, x23v;
v2df_t y1v;
v2df_t y2v;
if ( inc_x1 != 1 ||
inc_x2 != 1 ||
inc_x3 != 1 ||
inc_y != 1 ) bl1_abort();
n_pre = 0;
if ( ( unsigned long ) y % 16 != 0 )
{
if ( ( unsigned long ) x1 % 16 == 0 ||
( unsigned long ) x2 % 16 == 0 ||
( unsigned long ) x3 % 16 == 0 ) bl1_abort();
n_pre = 1;
}
n_run = ( n - n_pre ) / 4;
n_left = ( n - n_pre ) % 4;
chi1 = x1;
chi2 = x2;
chi3 = x3;
psi1 = y;
if ( n_pre == 1 )
{
double alpha1_c = *alpha1;
double alpha2_c = *alpha2;
double alpha3_c = *alpha3;
double chi11_c = *chi1;
double chi12_c = *chi2;
double chi13_c = *chi3;
*psi1 += alpha1_c * chi11_c + alpha2_c * chi12_c + alpha3_c * chi13_c;
chi1 += inc_x1;
chi2 += inc_x2;
chi3 += inc_x3;
psi1 += inc_y;
}
a1v.v = _mm_loaddup_pd( ( double* )alpha1 );
a2v.v = _mm_loaddup_pd( ( double* )alpha2 );
a3v.v = _mm_loaddup_pd( ( double* )alpha3 );
for ( i = 0; i < n_run; ++i )
{
x11v.v = _mm_load_pd( ( double* )chi1 );
x12v.v = _mm_load_pd( ( double* )chi2 );
x13v.v = _mm_load_pd( ( double* )chi3 );
y1v.v = _mm_load_pd( ( double* )psi1 );
y1v.v += a1v.v * x11v.v + a2v.v * x12v.v + a3v.v * x13v.v;
_mm_store_pd( ( double* )psi1, y1v.v );
x21v.v = _mm_load_pd( ( double* )(chi1 + 2) );
x22v.v = _mm_load_pd( ( double* )(chi2 + 2) );
x23v.v = _mm_load_pd( ( double* )(chi3 + 2) );
y2v.v = _mm_load_pd( ( double* )(psi1 + 2) );
y2v.v += a1v.v * x21v.v + a2v.v * x22v.v + a3v.v * x23v.v;
_mm_store_pd( ( double* )(psi1 + 2), y2v.v );
chi1 += 4;
chi2 += 4;
chi3 += 4;
psi1 += 4;
}
if ( n_left > 0 )
{
double alpha1_c = *alpha1;
double alpha2_c = *alpha2;
double alpha3_c = *alpha3;
for ( i = 0; i < n_left; ++i )
{
double chi11_c = *chi1;
double chi12_c = *chi2;
double chi13_c = *chi3;
*psi1 += alpha1_c * chi11_c + alpha2_c * chi12_c + alpha3_c * chi13_c;
chi1 += inc_x1;
chi2 += inc_x2;
chi3 += inc_x3;
psi1 += inc_y;
}
}
}
| void bl1_ddotaxmyv2 | ( | int | n, |
| double * | alpha, | ||
| double * | beta, | ||
| double * | x, | ||
| int | inc_x, | ||
| double * | u, | ||
| int | inc_u, | ||
| double * | rho, | ||
| double * | y, | ||
| int | inc_y, | ||
| double * | z, | ||
| int | inc_z | ||
| ) |
References bl1_abort(), v2df_t::d, and v2df_t::v.
Referenced by FLA_Fused_Uhu_Yhu_Zhu_opd_var1().
{
double* restrict chi1;
double* restrict upsilon1;
double* restrict psi1;
double* restrict zeta1;
double rho_c;
int i;
int n_pre;
int n_run;
int n_left;
v2df_t a1v, b1v;
v2df_t rho1v;
v2df_t x1v, u1v, y1v, z1v;
if ( inc_x != 1 ||
inc_u != 1 ||
inc_y != 1 ||
inc_z != 1 ) bl1_abort();
n_pre = 0;
if ( ( unsigned long ) z % 16 != 0 )
{
if ( ( unsigned long ) x % 16 == 0 ||
( unsigned long ) u % 16 == 0 ||
( unsigned long ) y % 16 == 0 ) bl1_abort();
n_pre = 1;
}
n_run = ( n - n_pre ) / 2;
n_left = ( n - n_pre ) % 2;
chi1 = x;
upsilon1 = u;
psi1 = y;
zeta1 = z;
rho_c = 0.0;
if ( n_pre == 1 )
{
double alpha_c = *alpha;
double beta_c = *beta;
double chi1_c = *chi1;
double upsilon_c = *upsilon1;
rho_c += chi1_c * upsilon_c;
*psi1 -= alpha_c * chi1_c;
*zeta1 -= beta_c * chi1_c;
chi1 += inc_x;
upsilon1 += inc_u;
psi1 += inc_y;
zeta1 += inc_z;
}
a1v.v = _mm_loaddup_pd( ( double* )alpha );
b1v.v = _mm_loaddup_pd( ( double* )beta );
rho1v.v = _mm_setzero_pd();
for ( i = 0; i < n_run; ++i )
{
x1v.v = _mm_load_pd( ( double* )chi1 );
u1v.v = _mm_load_pd( ( double* )upsilon1 );
y1v.v = _mm_load_pd( ( double* )psi1 );
z1v.v = _mm_load_pd( ( double* )zeta1 );
rho1v.v += x1v.v * u1v.v;
y1v.v -= a1v.v * x1v.v;
z1v.v -= b1v.v * x1v.v;
_mm_store_pd( ( double* )psi1, y1v.v );
_mm_store_pd( ( double* )zeta1, z1v.v );
chi1 += 2;
upsilon1 += 2;
psi1 += 2;
zeta1 += 2;
}
rho_c += rho1v.d[0] + rho1v.d[1];
if ( n_left > 0 )
{
double alpha_c = *alpha;
double beta_c = *beta;
for( i = 0; i < n_left; ++i )
{
double chi1_c = *chi1;
double upsilon_c = *upsilon1;
rho_c += chi1_c * upsilon_c;
*psi1 -= alpha_c * chi1_c;
*zeta1 -= beta_c * chi1_c;
chi1 += inc_x;
upsilon1 += inc_u;
psi1 += inc_y;
zeta1 += inc_z;
}
}
*rho = rho_c;
}
| void bl1_ddotaxpy | ( | int | n, |
| double * | a, | ||
| int | inc_a, | ||
| double * | x, | ||
| int | inc_x, | ||
| double * | kappa, | ||
| double * | rho, | ||
| double * | w, | ||
| int | inc_w | ||
| ) |
References bl1_abort(), v2df_t::d, and v2df_t::v.
Referenced by FLA_Fused_Ahx_Ax_opd_var1(), and FLA_Fused_UZhu_ZUhu_opd_var1().
{
double* restrict alpha1;
double* restrict chi1;
double* restrict omega1;
double rho_c;
int i;
int n_pre;
int n_run;
int n_left;
v2df_t k1v, rho1v;
v2df_t a1v, x1v, w1v;
v2df_t a2v, x2v, w2v;
if ( inc_a != 1 ||
inc_x != 1 ||
inc_w != 1 ) bl1_abort();
n_pre = 0;
if ( ( unsigned long ) a % 16 != 0 )
{
if ( ( unsigned long ) x % 16 == 0 ||
( unsigned long ) w % 16 == 0 ) bl1_abort();
n_pre = 1;
}
n_run = ( n - n_pre ) / 4;
n_left = ( n - n_pre ) % 4;
alpha1 = a;
chi1 = x;
omega1 = w;
rho_c = 0.0;
if ( n_pre == 1 )
{
double kappa_c = *kappa;
double alpha1_c = *alpha1;
double chi1_c = *chi1;
double omega1_c = *omega1;
rho_c += alpha1_c * chi1_c;
omega1_c += kappa_c * alpha1_c;
*omega1 = omega1_c;
alpha1 += inc_a;
chi1 += inc_x;
omega1 += inc_w;
}
rho1v.v = _mm_setzero_pd();
k1v.v = _mm_loaddup_pd( ( double* )kappa );
for ( i = 0; i < n_run; ++i )
{
a1v.v = _mm_load_pd( ( double* )alpha1 );
x1v.v = _mm_load_pd( ( double* )chi1 );
w1v.v = _mm_load_pd( ( double* )omega1 );
a2v.v = _mm_load_pd( ( double* )(alpha1 + 2) );
x2v.v = _mm_load_pd( ( double* )(chi1 + 2) );
w2v.v = _mm_load_pd( ( double* )(omega1 + 2) );
rho1v.v += a1v.v * x1v.v;
w1v.v += k1v.v * a1v.v;
_mm_store_pd( ( double* )omega1, w1v.v );
rho1v.v += a2v.v * x2v.v;
w2v.v += k1v.v * a2v.v;
_mm_store_pd( ( double* )(omega1 + 2), w2v.v );
alpha1 += 4;
chi1 += 4;
omega1 += 4;
}
if ( n_left > 0 )
{
for ( i = 0; i < n_left; ++i )
{
double kappa_c = *kappa;
double alpha1_c = *alpha1;
double chi1_c = *chi1;
double omega1_c = *omega1;
rho_c += alpha1_c * chi1_c;
omega1_c += kappa_c * alpha1_c;
*omega1 = omega1_c;
alpha1 += inc_a;
chi1 += inc_x;
omega1 += inc_w;
}
}
rho_c += rho1v.d[0] + rho1v.d[1];
*rho = rho_c;
}
| void bl1_ddotsv2 | ( | conj1_t | conjxy, |
| int | n, | ||
| double * | x, | ||
| int | inc_x, | ||
| double * | y, | ||
| int | inc_y, | ||
| double * | z, | ||
| int | inc_z, | ||
| double * | beta, | ||
| double * | rho_xz, | ||
| double * | rho_yz | ||
| ) |
References bl1_abort(), v2df_t::d, and v2df_t::v.
Referenced by FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Uhu_Yhu_Zhu_opd_var1(), FLA_Fused_UYx_ZVx_opd_var1(), and FLA_Fused_UZhu_ZUhu_opd_var1().
{
double* restrict x1;
double* restrict y1;
double* restrict z1;
double rho1, rho2;
double x1c, y1c, z1c;
int i;
int n_pre;
int n_run;
int n_left;
v2df_t rho1v, rho2v;
v2df_t x1v, y1v, z1v;
v2df_t x2v, y2v, z2v;
if ( inc_x != 1 ||
inc_y != 1 ||
inc_z != 1 ) bl1_abort();
n_pre = 0;
if ( ( unsigned long ) z % 16 != 0 )
{
if ( ( unsigned long ) x % 16 == 0 ||
( unsigned long ) y % 16 == 0 ) bl1_abort();
n_pre = 1;
}
n_run = ( n - n_pre ) / 4;
n_left = ( n - n_pre ) % 4;
x1 = x;
y1 = y;
z1 = z;
rho1 = 0.0;
rho2 = 0.0;
if ( n_pre == 1 )
{
x1c = *x1;
y1c = *y1;
z1c = *z1;
rho1 += x1c * z1c;
rho2 += y1c * z1c;
x1 += inc_x;
y1 += inc_y;
z1 += inc_z;
}
rho1v.v = _mm_setzero_pd();
rho2v.v = _mm_setzero_pd();
for ( i = 0; i < n_run; ++i )
{
x1v.v = _mm_load_pd( ( double* )x1 );
y1v.v = _mm_load_pd( ( double* )y1 );
z1v.v = _mm_load_pd( ( double* )z1 );
x2v.v = _mm_load_pd( ( double* )(x1 + 2) );
y2v.v = _mm_load_pd( ( double* )(y1 + 2) );
z2v.v = _mm_load_pd( ( double* )(z1 + 2) );
rho1v.v += x1v.v * z1v.v;
rho2v.v += y1v.v * z1v.v;
rho1v.v += x2v.v * z2v.v;
rho2v.v += y2v.v * z2v.v;
x1 += 4;
y1 += 4;
z1 += 4;
}
rho1 += rho1v.d[0] + rho1v.d[1];
rho2 += rho2v.d[0] + rho2v.d[1];
if ( n_left > 0 )
{
for ( i = 0; i < n_left; ++i )
{
x1c = *x1;
y1c = *y1;
z1c = *z1;
rho1 += x1c * z1c;
rho2 += y1c * z1c;
x1 += inc_x;
y1 += inc_y;
z1 += inc_z;
}
}
*rho_xz = *beta * *rho_xz + rho1;
*rho_yz = *beta * *rho_yz + rho2;
}
| void bl1_ddotsv3 | ( | conj1_t | conjxyw, |
| int | n, | ||
| double * | x, | ||
| int | inc_x, | ||
| double * | y, | ||
| int | inc_y, | ||
| double * | w, | ||
| int | inc_w, | ||
| double * | z, | ||
| int | inc_z, | ||
| double * | beta, | ||
| double * | rho_xz, | ||
| double * | rho_yz, | ||
| double * | rho_wz | ||
| ) |
References bl1_abort(), v2df_t::d, and v2df_t::v.
{
double* restrict x1;
double* restrict y1;
double* restrict w1;
double* restrict z1;
double rho1, rho2, rho3;
double x1c, y1c, w1c, z1c;
int i;
int n_pre;
int n_run;
int n_left;
v2df_t rho1v, rho2v, rho3v;
v2df_t x1v, y1v, w1v, z1v;
v2df_t x2v, y2v, w2v, z2v;
if ( inc_x != 1 ||
inc_y != 1 ||
inc_w != 1 ||
inc_z != 1 ) bl1_abort();
n_pre = 0;
if ( ( unsigned long ) z % 16 != 0 )
{
if ( ( unsigned long ) x % 16 == 0 ||
( unsigned long ) y % 16 == 0 ||
( unsigned long ) w % 16 == 0 ) bl1_abort();
n_pre = 1;
}
n_run = ( n - n_pre ) / 4;
n_left = ( n - n_pre ) % 4;
x1 = x;
y1 = y;
w1 = w;
z1 = z;
rho1 = 0.0;
rho2 = 0.0;
rho3 = 0.0;
if ( n_pre == 1 )
{
x1c = *x1;
y1c = *y1;
w1c = *w1;
z1c = *z1;
rho1 += x1c * z1c;
rho2 += y1c * z1c;
rho3 += w1c * z1c;
x1 += inc_x;
y1 += inc_y;
w1 += inc_w;
z1 += inc_z;
}
rho1v.v = _mm_setzero_pd();
rho2v.v = _mm_setzero_pd();
rho3v.v = _mm_setzero_pd();
for ( i = 0; i < n_run; ++i )
{
x1v.v = _mm_load_pd( ( double* )x1 );
y1v.v = _mm_load_pd( ( double* )y1 );
w1v.v = _mm_load_pd( ( double* )w1 );
z1v.v = _mm_load_pd( ( double* )z1 );
rho1v.v += x1v.v * z1v.v;
rho2v.v += y1v.v * z1v.v;
rho3v.v += w1v.v * z1v.v;
x2v.v = _mm_load_pd( ( double* )(x1 + 2) );
y2v.v = _mm_load_pd( ( double* )(y1 + 2) );
w2v.v = _mm_load_pd( ( double* )(w1 + 2) );
z2v.v = _mm_load_pd( ( double* )(z1 + 2) );
rho1v.v += x2v.v * z2v.v;
rho2v.v += y2v.v * z2v.v;
rho3v.v += w2v.v * z2v.v;
x1 += 4;
y1 += 4;
w1 += 4;
z1 += 4;
}
rho1 += rho1v.d[0] + rho1v.d[1];
rho2 += rho2v.d[0] + rho2v.d[1];
rho3 += rho3v.d[0] + rho3v.d[1];
if ( n_left > 0 )
{
for ( i = 0; i < n_left; ++i )
{
x1c = *x1;
y1c = *y1;
w1c = *w1;
z1c = *z1;
rho1 += x1c * z1c;
rho2 += y1c * z1c;
rho3 += w1c * z1c;
x1 += inc_x;
y1 += inc_y;
w1 += inc_w;
z1 += inc_z;
}
}
*rho_xz = *beta * *rho_xz + rho1;
*rho_yz = *beta * *rho_yz + rho2;
*rho_wz = *beta * *rho_wz + rho3;
}
| void bl1_ddotv2axpyv2b | ( | int | n, |
| double * | a1, | ||
| int | inc_a1, | ||
| double * | a2, | ||
| int | inc_a2, | ||
| double * | x, | ||
| int | inc_x, | ||
| double * | kappa1, | ||
| double * | kappa2, | ||
| double * | rho1, | ||
| double * | rho2, | ||
| double * | w, | ||
| int | inc_w | ||
| ) |
References bl1_abort(), v2df_t::d, and v2df_t::v.
Referenced by FLA_Fused_Ahx_Ax_opd_var1(), and FLA_Fused_UZhu_ZUhu_opd_var1().
{
double* restrict alpha1;
double* restrict alpha2;
double* restrict chi1;
double* restrict omega1;
double rho1_c;
double rho2_c;
int i;
int n_pre;
int n_run;
int n_left;
v2df_t k1v, rho1v;
v2df_t k2v, rho2v;
v2df_t a11v, a12v, x1v, w1v;
v2df_t a21v, a22v, x2v, w2v;
if ( inc_a1 != 1 ||
inc_a2 != 1 ||
inc_x != 1 ||
inc_w != 1 ) bl1_abort();
n_pre = 0;
if ( ( unsigned long ) a1 % 16 != 0 )
{
if ( ( unsigned long ) a2 % 16 == 0 ||
( unsigned long ) x % 16 == 0 ||
( unsigned long ) w % 16 == 0 ) bl1_abort();
n_pre = 1;
}
n_run = ( n - n_pre ) / 4;
n_left = ( n - n_pre ) % 4;
alpha1 = a1;
alpha2 = a2;
chi1 = x;
omega1 = w;
rho1_c = 0.0;
rho2_c = 0.0;
if ( n_pre == 1 )
{
double kappa1_c = *kappa1;
double kappa2_c = *kappa2;
double alpha1_c = *alpha1;
double alpha2_c = *alpha2;
double chi1_c = *chi1;
double omega1_c = *omega1;
rho1_c += alpha1_c * chi1_c;
omega1_c += kappa1_c * alpha1_c;
rho2_c += alpha2_c * chi1_c;
omega1_c += kappa2_c * alpha2_c;
*omega1 = omega1_c;
alpha1 += inc_a1;
alpha2 += inc_a2;
chi1 += inc_x;
omega1 += inc_w;
}
rho1v.v = _mm_setzero_pd();
rho2v.v = _mm_setzero_pd();
k1v.v = _mm_loaddup_pd( ( double* )kappa1 );
k2v.v = _mm_loaddup_pd( ( double* )kappa2 );
for ( i = 0; i < n_run; ++i )
{
a11v.v = _mm_load_pd( ( double* )alpha1 );
a12v.v = _mm_load_pd( ( double* )alpha2 );
x1v.v = _mm_load_pd( ( double* )chi1 );
w1v.v = _mm_load_pd( ( double* )omega1 );
rho1v.v += a11v.v * x1v.v;
w1v.v += k1v.v * a11v.v;
rho2v.v += a12v.v * x1v.v;
w1v.v += k2v.v * a12v.v;
_mm_store_pd( ( double* )omega1, w1v.v );
a21v.v = _mm_load_pd( ( double* )(alpha1 + 2) );
a22v.v = _mm_load_pd( ( double* )(alpha2 + 2) );
x2v.v = _mm_load_pd( ( double* )(chi1 + 2) );
w2v.v = _mm_load_pd( ( double* )(omega1 + 2) );
rho1v.v += a21v.v * x2v.v;
w2v.v += k1v.v * a21v.v;
rho2v.v += a22v.v * x2v.v;
w2v.v += k2v.v * a22v.v;
_mm_store_pd( ( double* )(omega1 + 2), w2v.v );
alpha1 += 4;
alpha2 += 4;
chi1 += 4;
omega1 += 4;
}
if ( n_left > 0 )
{
for ( i = 0; i < n_left; ++i )
{
double kappa1_c = *kappa1;
double kappa2_c = *kappa2;
double alpha1_c = *alpha1;
double alpha2_c = *alpha2;
double chi1_c = *chi1;
double omega1_c = *omega1;
rho1_c += alpha1_c * chi1_c;
omega1_c += kappa1_c * alpha1_c;
rho2_c += alpha2_c * chi1_c;
omega1_c += kappa2_c * alpha2_c;
*omega1 = omega1_c;
alpha1 += inc_a1;
alpha2 += inc_a2;
chi1 += inc_x;
omega1 += inc_w;
}
}
rho1_c += rho1v.d[0] + rho1v.d[1];
rho2_c += rho2v.d[0] + rho2v.d[1];
*rho1 = rho1_c;
*rho2 = rho2_c;
}
| void bl1_saxmyv2 | ( | conj1_t | conjx, |
| int | n, | ||
| float * | alpha, | ||
| float * | beta, | ||
| float * | x, | ||
| int | inc_x, | ||
| float * | y, | ||
| int | inc_y, | ||
| float * | z, | ||
| int | inc_z | ||
| ) |
References bl1_abort().
{
bl1_abort();
}
| void bl1_saxpyv2b | ( | int | n, |
| float * | beta1, | ||
| float * | beta2, | ||
| float * | a1, | ||
| int | inc_a1, | ||
| float * | a2, | ||
| int | inc_a2, | ||
| float * | w, | ||
| int | inc_w | ||
| ) |
References bl1_abort().
{
bl1_abort();
}
| void bl1_saxpyv2bdotaxpy | ( | int | n, |
| float * | beta, | ||
| float * | u, | ||
| int | inc_u, | ||
| float * | gamma, | ||
| float * | z, | ||
| int | inc_z, | ||
| float * | a, | ||
| int | inc_a, | ||
| float * | x, | ||
| int | inc_x, | ||
| float * | kappa, | ||
| float * | rho, | ||
| float * | w, | ||
| int | inc_w | ||
| ) |
References bl1_abort().
{
bl1_abort();
}
| void bl1_saxpyv3b | ( | int | n, |
| float * | beta1, | ||
| float * | beta2, | ||
| float * | beta3, | ||
| float * | a1, | ||
| int | inc_a1, | ||
| float * | a2, | ||
| int | inc_a2, | ||
| float * | a3, | ||
| int | inc_a3, | ||
| float * | w, | ||
| int | inc_w | ||
| ) |
References bl1_abort().
{
bl1_abort();
}
| void bl1_sdotaxmyv2 | ( | int | n, |
| float * | alpha, | ||
| float * | beta, | ||
| float * | x, | ||
| int | inc_x, | ||
| float * | u, | ||
| int | inc_u, | ||
| float * | rho, | ||
| float * | y, | ||
| int | inc_y, | ||
| float * | z, | ||
| int | inc_z | ||
| ) |
References bl1_abort().
{
bl1_abort();
}
| void bl1_sdotaxpy | ( | int | n, |
| float * | a, | ||
| int | inc_a, | ||
| float * | x, | ||
| int | inc_x, | ||
| float * | kappa, | ||
| float * | rho, | ||
| float * | w, | ||
| int | inc_w | ||
| ) |
References bl1_abort().
{
bl1_abort();
}
| void bl1_sdotsv2 | ( | conj1_t | conjxy, |
| int | n, | ||
| float * | x, | ||
| int | inc_x, | ||
| float * | y, | ||
| int | inc_y, | ||
| float * | z, | ||
| int | inc_z, | ||
| float * | beta, | ||
| float * | rho_xz, | ||
| float * | rho_yz | ||
| ) |
References bl1_abort().
{
bl1_abort();
}
| void bl1_sdotsv3 | ( | conj1_t | conjxyw, |
| int | n, | ||
| float * | x, | ||
| int | inc_x, | ||
| float * | y, | ||
| int | inc_y, | ||
| float * | w, | ||
| int | inc_w, | ||
| float * | z, | ||
| int | inc_z, | ||
| float * | beta, | ||
| float * | rho_xz, | ||
| float * | rho_yz, | ||
| float * | rho_wz | ||
| ) |
References bl1_abort().
{
bl1_abort();
}
| void bl1_sdotv2axpyv2b | ( | int | n, |
| float * | a1, | ||
| int | inc_a1, | ||
| float * | a2, | ||
| int | inc_a2, | ||
| float * | x, | ||
| int | inc_x, | ||
| float * | kappa1, | ||
| float * | kappa2, | ||
| float * | rho1, | ||
| float * | rho2, | ||
| float * | w, | ||
| int | inc_w | ||
| ) |
References bl1_abort().
{
bl1_abort();
}
| void bl1_zaxmyv2 | ( | conj1_t | conjx, |
| int | n, | ||
| dcomplex * | alpha, | ||
| dcomplex * | beta, | ||
| dcomplex * | x, | ||
| int | inc_x, | ||
| dcomplex * | y, | ||
| int | inc_y, | ||
| dcomplex * | z, | ||
| int | inc_z | ||
| ) |
Referenced by FLA_Fused_UYx_ZVx_opz_var1().
| void bl1_zaxpyv2bdotaxpy | ( | int | n, |
| dcomplex * | beta, | ||
| dcomplex * | u, | ||
| int | inc_u, | ||
| dcomplex * | gamma, | ||
| dcomplex * | z, | ||
| int | inc_z, | ||
| dcomplex * | a, | ||
| int | inc_a, | ||
| dcomplex * | x, | ||
| int | inc_x, | ||
| dcomplex * | kappa, | ||
| dcomplex * | rho, | ||
| dcomplex * | w, | ||
| int | inc_w | ||
| ) |
| void bl1_zaxpyv2bdots | ( | int | n, |
| dcomplex * | alpha1, | ||
| dcomplex * | alpha2, | ||
| dcomplex * | x1, | ||
| int | inc_x1, | ||
| dcomplex * | x2, | ||
| int | inc_x2, | ||
| dcomplex * | y, | ||
| int | inc_y, | ||
| dcomplex * | u, | ||
| int | inc_u, | ||
| dcomplex * | beta, | ||
| dcomplex * | rho | ||
| ) |
| void bl1_zaxpyv3b | ( | int | n, |
| dcomplex * | beta1, | ||
| dcomplex * | beta2, | ||
| dcomplex * | beta3, | ||
| dcomplex * | a1, | ||
| int | inc_a1, | ||
| dcomplex * | a2, | ||
| int | inc_a2, | ||
| dcomplex * | a3, | ||
| int | inc_a3, | ||
| dcomplex * | w, | ||
| int | inc_w | ||
| ) |
| void bl1_zdotaxmyv2 | ( | int | n, |
| dcomplex * | alpha, | ||
| dcomplex * | beta, | ||
| dcomplex * | x, | ||
| int | inc_x, | ||
| dcomplex * | u, | ||
| int | inc_u, | ||
| dcomplex * | rho, | ||
| dcomplex * | y, | ||
| int | inc_y, | ||
| dcomplex * | z, | ||
| int | inc_z | ||
| ) |
| void bl1_zdotsv3 | ( | conj1_t | conjxyw, |
| int | n, | ||
| dcomplex * | x, | ||
| int | inc_x, | ||
| dcomplex * | y, | ||
| int | inc_y, | ||
| dcomplex * | w, | ||
| int | inc_w, | ||
| dcomplex * | z, | ||
| int | inc_z, | ||
| dcomplex * | beta, | ||
| dcomplex * | rho_xz, | ||
| dcomplex * | rho_yz, | ||
| dcomplex * | rho_wz | ||
| ) |
Referenced by FLA_Fused_Uhu_Yhu_Zhu_opz_var1().
| void bl1_zdotv2axpyv2b | ( | int | n, |
| dcomplex * | a1, | ||
| int | inc_a1, | ||
| dcomplex * | a2, | ||
| int | inc_a2, | ||
| dcomplex * | x, | ||
| int | inc_x, | ||
| dcomplex * | kappa1, | ||
| dcomplex * | kappa2, | ||
| dcomplex * | rho1, | ||
| dcomplex * | rho2, | ||
| dcomplex * | w, | ||
| int | inc_w | ||
| ) |
Referenced by FLA_Fused_Ahx_Ax_opz_var1().
1.7.6.1