libflame
12600
|
Functions | |
FLA_Error | FLA_Gemm_nh_blk_var1 (FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl) |
FLA_Error FLA_Gemm_nh_blk_var1 | ( | FLA_Obj | alpha, |
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | beta, | ||
FLA_Obj | C, | ||
fla_gemm_t * | cntl | ||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_Part_2x1(), and FLA_Repart_2x1_to_3x1().
Referenced by FLA_Gemm_nh().
{ FLA_Obj AT, A0, AB, A1, A2; FLA_Obj CT, C0, CB, C1, C2; dim_t b; FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_2x1( C, &CT, &CB, 0, FLA_TOP ); while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( AB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* ** */ &A1, AB, &A2, b, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( CT, &C0, /* ** */ /* ** */ &C1, CB, &C2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* C1 = alpha * A1 * B' + C1; */ FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, alpha, A1, B, beta, C1, FLA_Cntl_sub_gemm( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, A1, /* ** */ /* ** */ &AB, A2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &CT, C0, C1, /* ** */ /* ** */ &CB, C2, FLA_TOP ); } return FLA_SUCCESS; }