libflame
12600
|
Go to the source code of this file.
References FLA_Abort(), FLA_Apply_pivots_internal(), FLA_Print_message(), FLASH_Obj_depth(), FLASH_Queue_disable(), FLASH_Queue_enable(), and FLASH_Queue_get_enabled().
Referenced by FLASH_LU_piv_solve().
{ FLA_Error r_val; FLA_Bool enable_supermatrix; // Check parameters. // *** The current Apply_pivots algorithm implemented assumes that // the matrix has a hierarchical depth of 1. We check for that here, because // we anticipate that we'll use a more general algorithm in the future, and // we don't want to forget to remove the constraint. *** if ( FLASH_Obj_depth( A ) != 1 ) { FLA_Print_message( "FLASH_Apply_pivots() currently only supports matrices of depth 1", __FILE__, __LINE__ ); FLA_Abort(); } // Find the status of SuperMatrix. enable_supermatrix = FLASH_Queue_get_enabled(); // Temporarily disable SuperMatrix. FLASH_Queue_disable(); // Invoke FLA_Apply_pivots_internal() with large control tree. r_val = FLA_Apply_pivots_internal( side, trans, p, A, flash_appiv_cntl ); // Restore SuperMatrix to its previous status. if ( enable_supermatrix ) FLASH_Queue_enable(); return r_val; }
FLA_Error FLASH_Apply_Q2_UT | ( | FLA_Side | side, |
FLA_Trans | trans, | ||
FLA_Direct | direct, | ||
FLA_Store | storev, | ||
FLA_Obj | D, | ||
FLA_Obj | T, | ||
FLA_Obj | W, | ||
FLA_Obj | C, | ||
FLA_Obj | E | ||
) |
{ FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Apply_Q2_UT_check( side, trans, direct, storev, D, T, W, C, E ); // Begin a parallel region. FLASH_Queue_begin(); // Invoke FLA_Apply_Q2_UT_internal() with the standard control tree. r_val = FLA_Apply_Q2_UT_internal( side, trans, direct, storev, D, T, W, C, E, flash_apq2ut_cntl ); // End the parallel region. FLASH_Queue_end(); return r_val; }
FLA_Error FLASH_Apply_Q_UT | ( | FLA_Side | side, |
FLA_Trans | trans, | ||
FLA_Direct | direct, | ||
FLA_Store | storev, | ||
FLA_Obj | A, | ||
FLA_Obj | T, | ||
FLA_Obj | W, | ||
FLA_Obj | B | ||
) |
Referenced by FLASH_LQ_UT_solve(), and FLASH_QR_UT_solve().
{ FLA_Error r_val; dim_t b_alg; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Apply_Q_UT_check( side, trans, direct, storev, A, T, W, B ); // Inspect the length of TTL to get the blocksize used by the QR/LQ // factorization, which will be our inner blocksize for Apply_Q_UT. b_alg = FLASH_Obj_scalar_length_tl( T ); // The traditional (non-incremental) Apply_Q_UT algorithm-by-blocks // requires that the algorithmic blocksize be equal to the storage // blocksize. if ( b_alg != FLASH_Obj_scalar_width_tl( T ) ) { FLA_Print_message( "FLASH_Apply_Q_UT() requires that b_alg == b_store", __FILE__, __LINE__ ); FLA_Abort(); } // Adjust the blocksize of the control tree node for the flat subproblem. if ( FLA_Cntl_blocksize( fla_apqut_cntl_leaf ) != NULL ) FLA_Blocksize_set( FLA_Cntl_blocksize( fla_apqut_cntl_leaf ), b_alg, b_alg, b_alg, b_alg ); // Begin a parallel region. FLASH_Queue_begin(); // Invoke FLA_Apply_Q_UT_internal() with the standard control tree. r_val = FLA_Apply_Q_UT_internal( side, trans, direct, storev, A, T, W, B, flash_apqut_cntl_blas ); // End the parallel region. FLASH_Queue_end(); return r_val; }
FLA_Error FLASH_Apply_Q_UT_inc | ( | FLA_Side | side, |
FLA_Trans | trans, | ||
FLA_Direct | direct, | ||
FLA_Store | storev, | ||
FLA_Obj | A, | ||
FLA_Obj | TW, | ||
FLA_Obj | W1, | ||
FLA_Obj | B | ||
) |
Referenced by FLA_Apply_CAQ_UT_inc_apply_panels(), and FLASH_QR_UT_inc_solve().
{ FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Apply_Q_UT_inc_check( side, trans, direct, storev, A, TW, W1, B ); // Begin a parallel region. FLASH_Queue_begin(); // Invoke FLA_Apply_Q_UT_inc_internal() with the standard control tree. r_val = FLA_Apply_Q_UT_inc_internal( side, trans, direct, storev, A, TW, W1, B, flash_apqutinc_cntl ); // End the parallel region. FLASH_Queue_end(); return r_val; }
FLA_Error FLASH_Chol | ( | FLA_Uplo | uplo, |
FLA_Obj | A | ||
) |
References FLA_Check_error_level(), FLA_Chol_check(), FLA_Chol_internal(), FLASH_Queue_begin(), and FLASH_Queue_end().
{ FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Chol_check( uplo, A ); // Begin a parallel region. FLASH_Queue_begin(); // Enqueue tasks via a SuperMatrix-aware control tree. r_val = FLA_Chol_internal( uplo, A, flash_chol_cntl ); // End the parallel region. FLASH_Queue_end(); return r_val; }
{ // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Chol_solve_check( uplo, A, B, X ); FLASH_Copy( B, X ); if ( uplo == FLA_LOWER_TRIANGULAR ) { FLASH_Trsm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, X ); FLASH_Trsm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, X ); } else // if ( uplo == FLA_UPPER_TRIANGULAR ) { FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, X ); FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, X ); } return FLA_SUCCESS; }
References FLA_Abort(), FLA_Check_error_level(), FLA_Eig_gest_check(), FLA_Eig_gest_internal(), FLA_Print_message(), FLASH_Obj_create_conf_to(), FLASH_Obj_free(), FLASH_Queue_begin(), FLASH_Queue_end(), and FLASH_Queue_stack_depth().
{ FLA_Obj Y; FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Eig_gest_check( inv, uplo, A, B ); // The temporary matrix object Y must exist when execution occurs, NOT just // when enqueuing occurs. So if the SuperMatrix stack depth is positive, then // it means the user has declared a "parallel region" in his code, and thus // execution won't occur until sometime after FLASH_Eig_gest() returns, at // which time Y will have been deallocated. Thus, we disallow this scenario // for now, until we can think of a more general solution. if ( FLASH_Queue_stack_depth() > 0 ) { FLA_Print_message( "FLASH_Eig_gest() MUST be invoked with standalone parallelism, and may not be called from within a user-level parallel region", __FILE__, __LINE__ ); FLA_Abort(); } FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Y ); // Begin a parallel region. FLASH_Queue_begin(); // Enqueue tasks via a SuperMatrix-aware control tree. r_val = FLA_Eig_gest_internal( inv, uplo, A, Y, B, flash_eig_gest_cntl ); // End the parallel region. FLASH_Queue_end(); FLASH_Obj_free( &Y ); return r_val; }
References FLA_Abort(), FLA_Check_error_level(), FLA_FS_incpiv_check(), FLA_Print_message(), FLASH_FS_incpiv_aux1(), FLASH_Obj_depth(), FLASH_Obj_scalar_width_tl(), FLASH_Queue_disable(), FLASH_Queue_enable(), and FLASH_Queue_get_enabled().
Referenced by FLASH_LU_incpiv_solve().
{ dim_t nb_alg; FLA_Error r_val; FLA_Bool enable_supermatrix; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_FS_incpiv_check( A, p, L, b ); // *** The current forward substitution algorithm implemented assumes that // the matrix has a hierarchical depth of 1. We check for that here, because // we anticipate that we'll use a more general algorithm in the future, and // we don't want to forget to remove the constraint. *** if ( FLASH_Obj_depth( A ) != 1 ) { FLA_Print_message( "FLASH_FS_incpiv() currently only supports matrices of depth 1", __FILE__, __LINE__ ); FLA_Abort(); } // Inspect the width of a the top-left element of L to get the algorithmic // blocksize we'll use throughout the LU_incpiv algorithm. nb_alg = FLASH_Obj_scalar_width_tl( L ); // Find the status of SuperMatrix. enable_supermatrix = FLASH_Queue_get_enabled(); // Temporarily disable SuperMatrix. FLASH_Queue_disable(); // Execute tasks. r_val = FLASH_FS_incpiv_aux1( A, p, L, b, nb_alg ); // Restore SuperMatrix to its previous status. if ( enable_supermatrix ) FLASH_Queue_enable(); return r_val; }
FLA_Error FLASH_LQ2_UT | ( | FLA_Obj | B, |
FLA_Obj | C, | ||
FLA_Obj | T | ||
) |
FLA_Error FLASH_LQ_UT_inv | ( | FLA_Obj | A, |
FLA_Obj | TW | ||
) |
FLA_Error FLASH_LU_incpiv | ( | FLA_Obj | A, |
FLA_Obj | p, | ||
FLA_Obj | L | ||
) |
References FLA_Abort(), FLA_Check_error_level(), FLA_LU_incpiv_check(), FLA_Print_message(), FLASH_LU_incpiv_noopt(), FLASH_LU_incpiv_opt1(), FLASH_Obj_depth(), and FLASH_Queue_stack_depth().
{ FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_LU_incpiv_check( A, p, L ); // *** The current LU_incpiv algorithm implemented assumes that // the matrix has a hierarchical depth of 1. We check for that here, because // we anticipate that we'll use a more general algorithm in the future, and // we don't want to forget to remove the constraint. *** if ( FLASH_Obj_depth( A ) != 1 ) { FLA_Print_message( "FLASH_LU_incpiv() currently only supports matrices of depth 1", __FILE__, __LINE__ ); FLA_Abort(); } if ( FLASH_Queue_stack_depth() == 0 ) r_val = FLASH_LU_incpiv_opt1( A, p, L ); else r_val = FLASH_LU_incpiv_noopt( A, p, L ); return r_val; }
References FLA_Check_error_level(), FLA_LU_nopiv_check(), FLA_LU_nopiv_internal(), FLASH_LU_find_zero_on_diagonal(), FLASH_Queue_begin(), and FLASH_Queue_end().
{ FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_LU_nopiv_check( A ); // Begin a parallel region. FLASH_Queue_begin(); // Enqueue tasks via a SuperMatrix-aware control tree. r_val = FLA_LU_nopiv_internal( A, flash_lu_nopiv_cntl ); // End the parallel region. FLASH_Queue_end(); // Check for singularity. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) r_val = FLASH_LU_find_zero_on_diagonal( A ); return r_val; }
FLA_Error FLASH_LU_nopiv_solve | ( | FLA_Obj | A, |
FLA_Obj | B, | ||
FLA_Obj | X | ||
) |
{ // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_LU_nopiv_solve_check( A, B, X ); FLASH_Copy( B, X ); FLASH_Trsm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, FLA_ONE, A, X ); FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, X ); return FLA_SUCCESS; }
FLA_Error FLASH_LU_piv | ( | FLA_Obj | A, |
FLA_Obj | p | ||
) |
References FLA_Abort(), FLA_Check_error_level(), FLA_LU_piv_check(), FLA_LU_piv_internal(), FLA_Print_message(), FLASH_LU_find_zero_on_diagonal(), FLASH_Obj_depth(), FLASH_Queue_begin(), and FLASH_Queue_end().
{ FLA_Error r_val = FLA_SUCCESS; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_LU_piv_check( A, p ); // *** The current LU_piv algorithm implemented assumes that // the matrix has a hierarchical depth of 1. We check for that here, because // we anticipate that we'll use a more general algorithm in the future, and // we don't want to forget to remove the constraint. *** if ( FLASH_Obj_depth( A ) != 1 ) { FLA_Print_message( "FLASH_LU_piv() currently only supports matrices of depth 1", __FILE__, __LINE__ ); FLA_Abort(); } // Begin a parallel region. FLASH_Queue_begin(); // Invoke FLA_LU_piv_internal() with large control tree. FLA_LU_piv_internal( A, p, flash_lu_piv_cntl ); // End the parallel region. FLASH_Queue_end(); // Check for singularity. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) r_val = FLASH_LU_find_zero_on_diagonal( A ); return r_val; }
{ // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_LU_piv_solve_check( A, p, B, X ); FLASH_Copy( B, X ); FLASH_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, X ); FLASH_Trsm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, FLA_ONE, A, X ); FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, A, X ); return FLA_SUCCESS; }
FLA_Error FLASH_QR2_UT | ( | FLA_Obj | B, |
FLA_Obj | D, | ||
FLA_Obj | T | ||
) |
{ FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_QR2_UT_check( B, D, T ); // Begin a parallel region. FLASH_Queue_begin(); // Invoke FLA_QR2_UT_internal() with the standard control tree. r_val = FLA_QR2_UT_internal( B, D, T, flash_qr2ut_cntl ); // End the parallel region. FLASH_Queue_end(); return r_val; }
FLA_Error FLASH_QR_UT | ( | FLA_Obj | A, |
FLA_Obj | TW | ||
) |
{ FLA_Error r_val; dim_t b_alg, b_flash; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_QR_UT_check( A, TW ); // *** The current hierarchical QR_UT algorithm assumes that the matrix // has a hierarchical depth of 1. We check for that here, because we // anticipate that we'll use a more general algorithm in the future, and // we don't want to forget to remove the constraint. *** if ( FLASH_Obj_depth( A ) != 1 ) { FLA_Print_message( "FLASH_QR_UT() currently only supports matrices of depth 1", __FILE__, __LINE__ ); FLA_Abort(); } // Inspect the length of TTL to get the blocksize used by the QR // factorization, which will be our inner blocksize for Apply_Q_UT. b_alg = FLASH_Obj_scalar_length_tl( TW ); b_flash = FLASH_Obj_scalar_width_tl( TW ); // The traditional (non-incremental) QR_UT algorithm-by-blocks requires // that the algorithmic blocksize be equal to the storage blocksize. if ( b_alg != b_flash ) { FLA_Print_message( "FLASH_QR_UT() requires that b_alg == b_store", __FILE__, __LINE__ ); FLA_Abort(); } // The traditional (non-incremental) QR_UT algorithm-by-blocks requires // that min_dim(A) % b_flash == 0. if ( FLASH_Obj_scalar_min_dim( A ) % b_flash != 0 ) { FLA_Print_message( "FLASH_QR_UT() requires that min_dim( A ) %% b_store == 0", __FILE__, __LINE__ ); FLA_Abort(); } // Begin a parallel region. FLASH_Queue_begin(); // Invoke FLA_QR_UT_internal() with hierarchical control tree. r_val = FLA_QR_UT_internal( A, TW, flash_qrut_cntl ); // End the parallel region. FLASH_Queue_end(); return r_val; }
FLA_Error FLASH_QR_UT_inc | ( | FLA_Obj | A, |
FLA_Obj | TW | ||
) |
Referenced by FLA_CAQR_UT_inc_factorize_panels().
{ FLA_Error r_val; if ( FLASH_Queue_stack_depth() == 0 ) r_val = FLASH_QR_UT_inc_opt1( A, TW ); else r_val = FLASH_QR_UT_inc_noopt( A, TW ); return r_val; }
{ FLA_Obj W, Y; FLA_Obj AT, AB; FLA_Obj YT, YB; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_QR_UT_inc_solve_check( A, TW, B, X ); FLASH_Apply_Q_UT_inc_create_workspace( TW, B, &W ); FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, B, &Y ); FLASH_Apply_Q_UT_inc( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, A, TW, W, Y ); // Create a temporary hierarchical view of only the top n-by-n part of A in // case m > n so that AT captures the upper triangular factor R. We do the // same for Y to ensure conformality. FLASH_Part_create_2x1( A, &AT, &AB, FLASH_Obj_scalar_width( A ), FLA_TOP ); FLASH_Part_create_2x1( Y, &YT, &YB, FLASH_Obj_scalar_width( A ), FLA_TOP ); FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, AT, YT ); FLASH_Copy( YT, X ); // Free the temporary hierarchical views. FLASH_Part_free_2x1( &AT, &AB ); FLASH_Part_free_2x1( &YT, &YB ); FLASH_Obj_free( &Y ); FLASH_Obj_free( &W ); return FLA_SUCCESS; }
{ FLA_Obj W, Y; FLA_Obj AT, AB; FLA_Obj YT, YB; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_QR_UT_solve_check( A, TW, B, X ); FLASH_Apply_Q_UT_create_workspace( TW, B, &W ); FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, B, &Y ); FLASH_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, A, TW, W, Y ); FLA_Part_2x1( A, &AT, &AB, FLA_Obj_width( A ), FLA_TOP ); FLA_Part_2x1( Y, &YT, &YB, FLA_Obj_width( A ), FLA_TOP ); FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, AT, YT ); FLASH_Copy( YT, X ); FLASH_Obj_free( &Y ); FLASH_Obj_free( &W ); return FLA_SUCCESS; }
FLA_Error FLASH_SPDinv | ( | FLA_Uplo | uplo, |
FLA_Obj | A | ||
) |
References FLA_Check_error_level(), FLA_SPDinv_check(), FLA_SPDinv_internal(), FLASH_Queue_begin(), and FLASH_Queue_end().
{ FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_SPDinv_check( uplo, A ); // Begin a parallel region. FLASH_Queue_begin(); // Enqueue tasks via a SuperMatrix-aware control tree. r_val = FLA_SPDinv_internal( uplo, A, flash_spdinv_cntl ); // End the parallel region. FLASH_Queue_end(); return r_val; }
FLA_Error FLASH_Sylv | ( | FLA_Trans | transa, |
FLA_Trans | transb, | ||
FLA_Obj | isgn, | ||
FLA_Obj | A, | ||
FLA_Obj | B, | ||
FLA_Obj | C, | ||
FLA_Obj | scale | ||
) |
References FLA_Check_error_level(), FLA_Sylv_check(), FLA_Sylv_internal(), FLASH_Queue_begin(), and FLASH_Queue_end().
{ FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Sylv_check( transa, transb, isgn, A, B, C, scale ); // Begin a parallel region. FLASH_Queue_begin(); // Enqueue tasks via a SuperMatrix-aware control tree. r_val = FLA_Sylv_internal( transa, transb, isgn, A, B, C, scale, flash_sylv_cntl ); // End the parallel region. FLASH_Queue_end(); return r_val; }
FLA_Error FLASH_Trinv | ( | FLA_Uplo | uplo, |
FLA_Diag | diag, | ||
FLA_Obj | A | ||
) |
References FLA_Check_error_level(), FLA_Trinv_check(), FLA_Trinv_internal(), FLASH_Queue_begin(), and FLASH_Queue_end().
{ FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Trinv_check( uplo, diag, A ); // Begin a parallel region. FLASH_Queue_begin(); // Enqueue tasks via a SuperMatrix-aware control tree. r_val = FLA_Trinv_internal( uplo, diag, A, flash_trinv_cntl ); // End the parallel region. FLASH_Queue_end(); return r_val; }
FLA_Error FLASH_Ttmm | ( | FLA_Uplo | uplo, |
FLA_Obj | A | ||
) |
References FLA_Check_error_level(), FLA_Ttmm_check(), FLA_Ttmm_internal(), FLASH_Queue_begin(), and FLASH_Queue_end().
{ FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Ttmm_check( uplo, A ); // Begin a parallel region. FLASH_Queue_begin(); // Enqueue tasks via a SuperMatrix-aware control tree. r_val = FLA_Ttmm_internal( uplo, A, flash_ttmm_cntl ); // End the parallel region. FLASH_Queue_end(); return r_val; }