libflame
12600
|
typedef volatile unsigned char* t_vcharp |
void FLA_adjust_strides | ( | dim_t | m, |
dim_t | n, | ||
dim_t * | rs, | ||
dim_t * | cs | ||
) |
Referenced by FLA_Obj_attach_buffer(), FLA_Obj_create_buffer(), and FLA_Obj_create_ext().
{ // Check the strides, and modify them if needed. if ( *rs == 0 && *cs == 0 ) { // Default values induce column-major storage, except when m == 1, // because we dont want both strides to be unit. if ( m == 1 && n > 1 ) { *rs = n; *cs = 1; } else { *rs = 1; *cs = m; } } else if ( *rs == 1 && *cs == 1 ) { // If both strides are unit, this is probably a "lazy" request for a // single vector (but could also be a request for a 1xn matrix in column- // major order or an mx1 matrix in row-major order). In libflame, we have // decided to "reserve" the case where rs == cs == 1 for scalars only, as // having unit strides can upset the BLAS error checking when attempting // to induce a row-major operation. Also, there is another special case // where rs == cs == 1 and one or both of m and n equal zero. This last // case is supported to allow creating "empty" objects. if ( m == 0 || n == 0 ) { // Nothing needs to be done for the "empty" case where m and/or n // equal zero. } else if ( m == 1 && n == 1 ) { // Nothing needs to be done for the scalar case where m == n == 1. } else if ( m > 1 && n == 1 ) { // Set the column stride to indicate that this is a column vector stored // in column-major order. This is necessary because, in some cases, we // have to satisify the error checking in the underlying BLAS library, // which expects the leading dimension to be set to at least m, even if // it will never be used for indexing since it is a vector and thus only // has one column of data. *cs = m; } else if ( m == 1 && n > 1 ) { // Set the row stride to indicate that this is a row vector stored // in row-major order. *rs = n; } } }
dim_t FLA_align_ldim | ( | dim_t | ldim, |
dim_t | elem_size | ||
) |
Referenced by FLA_compute_num_elem().
{ #ifdef FLA_ENABLE_MEMORY_ALIGNMENT #ifdef FLA_ENABLE_LDIM_ALIGNMENT // Increase ldim so that ( ldim * elem_size ) is a multiple of the desired // alignment. ldim = ( ( ldim * elem_size + FLA_MEMORY_ALIGNMENT_BOUNDARY - 1 ) / FLA_MEMORY_ALIGNMENT_BOUNDARY ) * FLA_MEMORY_ALIGNMENT_BOUNDARY / elem_size; #endif #endif return ldim; }
References FLA_align_ldim().
Referenced by FLA_Obj_create_buffer(), and FLA_Obj_create_ext().
{ dim_t n_elem; // Determine the amount of space we need to allocate based on the values of // the row and column strides. if ( m == 0 || n == 0 ) { // For empty objects, set the length of the buffer to 0. Row and column // strides should remain unchanged (because alignment is not needed). n_elem = 0; } else if ( *rs == 1 ) { // For column-major storage, use cs for computing the length of the buffer // to allocate. // Align the leading dimension to some user-defined address multiple, // if requested at configure-time. *cs = FLA_align_ldim( *cs, elem_size ); // Compute the length of the buffer needed for the object we're creating. n_elem = ( size_t ) *cs * ( size_t ) n; } else if ( *cs == 1 ) { // For row-major storage, use rs for computing the length of the buffer // to allocate. // Align the leading dimension to some user-defined address multiple, // if requested at configure-time. *rs = FLA_align_ldim( *rs, elem_size ); // Compute the length of the buffer needed for the object we're creating. n_elem = ( size_t ) m * ( size_t ) *rs; } else { // For general storage, use rs and cs to compute the length of the buffer // to allocate. // Compute the size of the buffer needed for the object we're creating. if ( *rs < *cs ) { *cs = FLA_align_ldim( *cs, elem_size ); n_elem = ( size_t ) *cs * ( size_t ) n; } else if ( *rs > *cs ) { *rs = FLA_align_ldim( *rs, elem_size ); n_elem = ( size_t ) m * ( size_t ) *rs; } else // if ( rs == cs ) { //rs = FLA_align_ldim( rs, FLA_Obj_elem_size( *obj ) ); *cs = FLA_align_ldim( *cs, elem_size ); // Note that if rs == cs, then we must be creating either a 1-by-n matrix // or a m-by-1 matrix. This constraint is enforced in // FLA_Check_matrix_strides(). Thus, we can compute the buffer length: // m * n * (rs|cs). n_elem = ( size_t ) m * ( size_t ) n * ( size_t ) *cs; } } return n_elem; }
FLA_Bool FLA_is_owner | ( | void | ) |
References RCCE_ue().
Referenced by FLA_Obj_create_complex_constant(), FLA_Obj_create_constant(), FLA_Obj_create_constant_ext(), FLA_Obj_create_copy_of(), FLASH_Axpy_hierarchy(), FLASH_Copy_hierarchy(), FLASH_Queue_exec(), and FLASH_Queue_init_tasks().
{ if ( RCCE_ue() == 0 ) return TRUE; return FALSE; }
FLA_Error FLA_Obj_attach_buffer | ( | void * | buffer, |
dim_t | rs, | ||
dim_t | cs, | ||
FLA_Obj * | obj | ||
) |
References FLA_Obj_view::base, FLA_Obj_struct::buffer, FLA_Obj_struct::cs, FLA_adjust_strides(), FLA_Check_error_level(), FLA_Obj_attach_buffer_check(), FLA_Obj_length(), FLA_Obj_width(), and FLA_Obj_struct::rs.
Referenced by FLA_Axpy_buffer_to_object(), FLA_Axpy_object_to_buffer(), FLA_Copy_buffer_to_object(), FLA_Copy_object_to_buffer(), FLASH_Axpy_buffer_to_hier(), FLASH_Axpy_hier_to_buffer(), FLASH_Copy_buffer_to_hier(), FLASH_Copy_hier_to_buffer(), FLASH_Obj_attach_buffer(), FLASH_Obj_attach_buffer_hierarchy(), and FLASH_Obj_create_hierarchy().
{ dim_t m, n; m = FLA_Obj_length( *obj ); n = FLA_Obj_width( *obj ); // Adjust the strides, if necessary. FLA_adjust_strides( m, n, &rs, &cs ); if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_attach_buffer_check( buffer, rs, cs, obj ); obj->base->buffer = buffer; obj->base->rs = rs; obj->base->cs = cs; return FLA_SUCCESS; }
FLA_Error FLA_Obj_create | ( | FLA_Datatype | datatype, |
dim_t | m, | ||
dim_t | n, | ||
dim_t | rs, | ||
dim_t | cs, | ||
FLA_Obj * | obj | ||
) |
References FLA_Obj_create_ext().
Referenced by FLA_Apply_Q_blk_external(), FLA_Apply_Q_UT_create_workspace(), FLA_Apply_Q_UT_create_workspace_side(), FLA_Apply_QUD_UT_create_workspace(), FLA_Bidiag_apply_U_external(), FLA_Bidiag_apply_V_external(), FLA_Bidiag_blk_external(), FLA_Bidiag_form_U_external(), FLA_Bidiag_form_V_external(), FLA_Bidiag_unb_external(), FLA_Bidiag_UT_create_T(), FLA_Bidiag_UT_form_U_ext(), FLA_Bidiag_UT_form_V_ext(), FLA_Bidiag_UT_l_realify_unb(), FLA_Bidiag_UT_u_blf_var4(), FLA_Bidiag_UT_u_blk_var4(), FLA_Bidiag_UT_u_blk_var5(), FLA_Bidiag_UT_u_ofu_var4(), FLA_Bidiag_UT_u_opt_var4(), FLA_Bidiag_UT_u_opt_var5(), FLA_Bidiag_UT_u_realify_unb(), FLA_Bidiag_UT_u_step_unb_var1(), FLA_Bidiag_UT_u_step_unb_var2(), FLA_Bidiag_UT_u_step_unb_var3(), FLA_Bidiag_UT_u_step_unb_var4(), FLA_Bidiag_UT_u_step_unb_var5(), FLA_Bidiag_UT_u_unb_var4(), FLA_Bidiag_UT_u_unb_var5(), FLA_Bsvd_create_workspace(), FLA_Bsvd_external(), FLA_Bsvdd_external(), FLA_Fill_with_cluster_dist(), FLA_Fill_with_geometric_dist(), FLA_Fill_with_inverse_dist(), FLA_Fill_with_linear_dist(), FLA_Fill_with_logarithmic_dist(), FLA_Fill_with_random_dist(), FLA_Hess_blk_external(), FLA_Hess_unb_external(), FLA_Hess_UT_blf_var2(), FLA_Hess_UT_blf_var3(), FLA_Hess_UT_blf_var4(), FLA_Hess_UT_blk_var1(), FLA_Hess_UT_blk_var2(), FLA_Hess_UT_blk_var3(), FLA_Hess_UT_blk_var4(), FLA_Hess_UT_blk_var5(), FLA_Hess_UT_create_T(), FLA_Hess_UT_step_unb_var1(), FLA_Hess_UT_step_unb_var2(), FLA_Hess_UT_step_unb_var3(), FLA_Hess_UT_step_unb_var4(), FLA_Hess_UT_step_unb_var5(), FLA_Hevd_compute_scaling(), FLA_Hevd_external(), FLA_Hevd_lv_unb_var1(), FLA_Hevd_lv_unb_var2(), FLA_Hevdd_external(), FLA_Hevdr_external(), FLA_LQ_blk_external(), FLA_LQ_unb_external(), FLA_LQ_UT_create_T(), FLA_Lyap_h_unb_var1(), FLA_Lyap_h_unb_var2(), FLA_Lyap_h_unb_var3(), FLA_Lyap_h_unb_var4(), FLA_Lyap_n_unb_var1(), FLA_Lyap_n_unb_var2(), FLA_Lyap_n_unb_var3(), FLA_Lyap_n_unb_var4(), FLA_Norm1(), FLA_Norm_inf(), FLA_Obj_create_complex_constant(), FLA_Obj_create_constant(), FLA_Obj_create_constant_ext(), FLA_QR_blk_external(), FLA_QR_form_Q_external(), FLA_QR_unb_external(), FLA_QR_UT_create_T(), FLA_QR_UT_piv_colnorm(), FLA_QR_UT_piv_unb_var2(), FLA_Svd_compute_scaling(), FLA_Svd_ext_u_unb_var1(), FLA_Svd_external(), FLA_Svd_uv_unb_var1(), FLA_Svd_uv_unb_var2(), FLA_Svdd_external(), FLA_Tevd_external(), FLA_Tevdd_external(), FLA_Tevdr_external(), FLA_Tridiag_apply_Q_external(), FLA_Tridiag_blk_external(), FLA_Tridiag_form_Q_external(), FLA_Tridiag_unb_external(), FLA_Tridiag_UT_create_T(), FLA_Tridiag_UT_l_blf_var3(), FLA_Tridiag_UT_l_blk_var3(), FLA_Tridiag_UT_l_realify_unb(), FLA_Tridiag_UT_l_step_unb_var1(), FLA_Tridiag_UT_l_step_unb_var2(), FLA_Tridiag_UT_l_step_unb_var3(), FLA_Tridiag_UT_u_realify_unb(), FLA_UDdate_UT_create_T(), FLASH_Obj_create_flat_conf_to_hier(), and FLASH_Obj_create_helper().
{ FLA_Obj_create_ext( datatype, FLA_SCALAR, m, n, m, n, rs, cs, obj ); return FLA_SUCCESS; }
FLA_Error FLA_Obj_create_buffer | ( | dim_t | rs, |
dim_t | cs, | ||
FLA_Obj * | obj | ||
) |
References FLA_Obj_view::base, FLA_Obj_struct::buffer, FLA_Obj_struct::buffer_info, FLA_Obj_struct::cs, FLA_adjust_strides(), FLA_Check_error_level(), FLA_compute_num_elem(), FLA_malloc(), FLA_Obj_create_buffer_check(), FLA_Obj_elem_size(), FLA_Obj_elemtype(), FLA_Obj_length(), FLA_Obj_width(), FLA_shmalloc(), FLA_Obj_struct::n_elem_alloc, and FLA_Obj_struct::rs.
Referenced by FLA_Obj_create_buffer_task().
{ size_t buffer_size; size_t n_elem; dim_t m, n; m = FLA_Obj_length( *obj ); n = FLA_Obj_width( *obj ); // Adjust the strides, if necessary. FLA_adjust_strides( m, n, &rs, &cs ); if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_create_buffer_check( rs, cs, obj ); // Compute the number of elements needed for the buffer, adjusting // the strides for alignment if needed. n_elem = FLA_compute_num_elem( FLA_Obj_elem_size( *obj ), m, n, &rs, &cs ); // Compute the buffer size in bytes. buffer_size = ( size_t ) n_elem * ( size_t ) FLA_Obj_elem_size( *obj ); // Allocate the base object's element buffer. #ifdef FLA_ENABLE_SCC obj->base->buffer = ( FLA_Obj_elemtype( *obj ) == FLA_MATRIX ? FLA_malloc( buffer_size ) : FLA_shmalloc( buffer_size ) ); #else obj->base->buffer = FLA_malloc( buffer_size ); #endif obj->base->buffer_info = 0; // Save the number of elements allocated (for use with FLASH). obj->base->n_elem_alloc = n_elem; // Save the row and column strides used in the memory allocation. obj->base->rs = rs; obj->base->cs = cs; return FLA_SUCCESS; }
FLA_Error FLA_Obj_create_complex_constant | ( | double | const_real, |
double | const_imag, | ||
FLA_Obj * | obj | ||
) |
References FLA_Check_error_level(), FLA_is_owner(), FLA_Obj_create(), FLA_Obj_create_complex_constant_check(), scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.
{ int* temp_i; float* temp_s; double* temp_d; scomplex* temp_c; dcomplex* temp_z; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_create_complex_constant_check( const_real, const_imag, obj ); FLA_Obj_create( FLA_CONSTANT, 1, 1, 0, 0, obj ); #ifdef FLA_ENABLE_SCC if ( !FLA_is_owner() ) return FLA_SUCCESS; #endif temp_i = FLA_INT_PTR( *obj ); temp_s = FLA_FLOAT_PTR( *obj ); temp_d = FLA_DOUBLE_PTR( *obj ); temp_c = FLA_COMPLEX_PTR( *obj ); temp_z = FLA_DOUBLE_COMPLEX_PTR( *obj ); *temp_i = ( int ) const_real; *temp_s = ( float ) const_real; *temp_d = const_real; temp_c->real = ( float ) const_real; temp_c->imag = ( float ) const_imag; temp_z->real = const_real; temp_z->imag = const_imag; return FLA_SUCCESS; }
FLA_Error FLA_Obj_create_conf_to | ( | FLA_Trans | trans, |
FLA_Obj | obj_cur, | ||
FLA_Obj * | obj_new | ||
) |
References FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_create_conf_to_check(), FLA_Obj_create_ext(), FLA_Obj_datatype(), FLA_Obj_elemtype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
Referenced by FLA_Apply_H2_UT_l_unb_var1(), FLA_Apply_H2_UT_r_unb_var1(), FLA_Eig_gest(), FLA_Hess_UT_ofu_var4(), FLA_Hess_UT_opt_var4(), FLA_Hess_UT_opt_var5(), FLA_Hess_UT_unb_var4(), FLA_Hess_UT_unb_var5(), FLA_Lyap_h_opt_var1(), FLA_Lyap_h_opt_var2(), FLA_Lyap_h_opt_var3(), FLA_Lyap_h_opt_var4(), FLA_Lyap_h_unb_var1(), FLA_Lyap_h_unb_var2(), FLA_Lyap_h_unb_var3(), FLA_Lyap_h_unb_var4(), FLA_Lyap_n_opt_var1(), FLA_Lyap_n_opt_var2(), FLA_Lyap_n_opt_var3(), FLA_Lyap_n_opt_var4(), FLA_Lyap_n_unb_var1(), FLA_Lyap_n_unb_var2(), FLA_Lyap_n_unb_var3(), FLA_Lyap_n_unb_var4(), FLA_Obj_create_copy_of(), FLA_Random_spd_matrix(), FLA_Random_unitary_matrix(), FLA_Svd_ext_u_unb_var1(), FLA_Tridiag_UT_l_ofu_var3(), FLA_Tridiag_UT_l_opt_var3(), FLA_Tridiag_UT_l_unb_var3(), FLA_Trmvsx_external(), and FLA_Trsvsx_external().
{ FLA_Datatype datatype; FLA_Elemtype elemtype; dim_t m, n; dim_t rs, cs; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_create_conf_to_check( trans, obj_cur, obj_new ); datatype = FLA_Obj_datatype( obj_cur ); elemtype = FLA_Obj_elemtype( obj_cur ); // Query the dimensions of the existing object. if ( trans == FLA_NO_TRANSPOSE || trans == FLA_CONJ_NO_TRANSPOSE ) { m = FLA_Obj_length( obj_cur ); n = FLA_Obj_width( obj_cur ); } else // if ( trans == FLA_TRANSPOSE || trans == FLA_CONJ_TRANSPOSE ) { m = FLA_Obj_width( obj_cur ); n = FLA_Obj_length( obj_cur ); } // Query the row and column strides of the existing object. We don't care // about the actual leading dimension of the existing object, only whether // it is in row- or column-major format. rs = FLA_Obj_row_stride( obj_cur ); cs = FLA_Obj_col_stride( obj_cur ); if ( ( rs == 1 && cs == 1 ) ) { // Do nothing. This special case will be handled by FLA_adjust_strides(). ; } else if ( rs == 1 ) { // For column-major storage, use the m dimension as the column stride. // Row stride is already set to 1. cs = m; } else if ( cs == 1 ) { // For row-major storage, use the n dimension as the row stride. // Column stride is already set to 1. rs = n; } // Handle empty views. if ( m == 0 ) cs = 1; if ( n == 0 ) rs = 1; FLA_Obj_create_ext( datatype, elemtype, m, n, m, n, rs, cs, obj_new ); return FLA_SUCCESS; }
FLA_Error FLA_Obj_create_constant | ( | double | const_real, |
FLA_Obj * | obj | ||
) |
References FLA_Check_error_level(), FLA_is_owner(), FLA_Obj_create(), FLA_Obj_create_constant_check(), scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.
Referenced by FLA_Init_constants().
{ int* temp_i; float* temp_s; double* temp_d; scomplex* temp_c; dcomplex* temp_z; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_create_constant_check( const_real, obj ); FLA_Obj_create( FLA_CONSTANT, 1, 1, 0, 0, obj ); #ifdef FLA_ENABLE_SCC if ( !FLA_is_owner() ) return FLA_SUCCESS; #endif temp_i = FLA_INT_PTR( *obj ); temp_s = FLA_FLOAT_PTR( *obj ); temp_d = FLA_DOUBLE_PTR( *obj ); temp_c = FLA_COMPLEX_PTR( *obj ); temp_z = FLA_DOUBLE_COMPLEX_PTR( *obj ); *temp_i = ( int ) const_real; *temp_s = ( float ) const_real; *temp_d = const_real; temp_c->real = ( float ) const_real; temp_c->imag = ( float ) 0.0; temp_z->real = const_real; temp_z->imag = 0.0; return FLA_SUCCESS; }
FLA_Error FLA_Obj_create_constant_ext | ( | float | const_s, |
double | const_d, | ||
FLA_Obj * | obj | ||
) |
References FLA_Check_error_level(), FLA_is_owner(), FLA_Obj_create(), FLA_Obj_create_constant_ext_check(), scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.
Referenced by FLA_Init_constants().
{ int* temp_i; float* temp_s; double* temp_d; scomplex* temp_c; dcomplex* temp_z; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_create_constant_ext_check( const_s, const_d, obj ); FLA_Obj_create( FLA_CONSTANT, 1, 1, 0, 0, obj ); #ifdef FLA_ENABLE_SCC if ( !FLA_is_owner() ) return FLA_SUCCESS; #endif temp_i = FLA_INT_PTR( *obj ); temp_s = FLA_FLOAT_PTR( *obj ); temp_d = FLA_DOUBLE_PTR( *obj ); temp_c = FLA_COMPLEX_PTR( *obj ); temp_z = FLA_DOUBLE_COMPLEX_PTR( *obj ); *temp_i = ( int ) const_s; *temp_s = const_s; *temp_d = const_d; temp_c->real = const_s; temp_c->imag = 0.0F; temp_z->real = const_d; temp_z->imag = 0.0; return FLA_SUCCESS; }
FLA_Error FLA_Obj_create_copy_of | ( | FLA_Trans | trans, |
FLA_Obj | obj_cur, | ||
FLA_Obj * | obj_new | ||
) |
References FLA_Copyt_external(), FLA_is_owner(), and FLA_Obj_create_conf_to().
Referenced by FLA_QR_UT_solve(), FLA_Sort(), FLA_UDdate_UT_update_rhs(), and FLASH_Obj_create_copy_of().
{ // Create a new object conformal to the current object. FLA_Obj_create_conf_to( trans, obj_cur, obj_new ); #ifdef FLA_ENABLE_SCC if ( !FLA_is_owner() ) return FLA_SUCCESS; #endif // Copy the contents of the current object to the new object. FLA_Copyt_external( trans, obj_cur, *obj_new ); return FLA_SUCCESS; }
FLA_Error FLA_Obj_create_ext | ( | FLA_Datatype | datatype, |
FLA_Elemtype | elemtype, | ||
dim_t | m, | ||
dim_t | n, | ||
dim_t | m_inner, | ||
dim_t | n_inner, | ||
dim_t | rs, | ||
dim_t | cs, | ||
FLA_Obj * | obj | ||
) |
References FLA_Obj_view::base, FLA_Obj_struct::buffer, FLA_Obj_struct::buffer_info, FLA_Obj_struct::cs, FLA_Obj_struct::datatype, FLA_Obj_struct::elemtype, FLA_adjust_strides(), FLA_Check_error_level(), FLA_compute_num_elem(), FLA_malloc(), FLA_Obj_create_ext_check(), FLA_Obj_elem_size(), FLA_Obj_elemtype(), FLA_shmalloc(), FLA_Obj_struct::id, FLA_Obj_struct::m, FLA_Obj_view::m, FLA_Obj_struct::m_index, FLA_Obj_struct::m_inner, FLA_Obj_view::m_inner, FLA_Obj_struct::n, FLA_Obj_view::n, FLA_Obj_struct::n_elem_alloc, FLA_Obj_struct::n_index, FLA_Obj_struct::n_inner, FLA_Obj_view::n_inner, FLA_Obj_struct::n_read_tasks, FLA_Obj_view::offm, FLA_Obj_view::offn, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, FLA_Obj_struct::rs, and FLA_Obj_struct::write_task.
Referenced by FLA_Obj_create(), FLA_Obj_create_conf_to(), and FLASH_Obj_create_hierarchy().
{ size_t buffer_size; size_t n_elem; // Adjust the strides, if necessary. FLA_adjust_strides( m, n, &rs, &cs ); if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_create_ext_check( datatype, elemtype, m, n, m_inner, n_inner, rs, cs, obj ); // Populate the fields in the view object. obj->m = m; obj->n = n; obj->offm = 0; obj->offn = 0; obj->m_inner = m_inner; obj->n_inner = n_inner; // Allocate the base object field. obj->base = ( FLA_Base_obj * ) FLA_malloc( sizeof( FLA_Base_obj ) ); // Populate the fields in the base object. obj->base->datatype = datatype; obj->base->elemtype = elemtype; obj->base->m = m; obj->base->n = n; obj->base->m_inner = m_inner; obj->base->n_inner = n_inner; obj->base->id = ( unsigned long ) obj->base; obj->base->m_index = 0; obj->base->n_index = 0; // Compute the number of elements needed for the buffer, adjusting // the strides for alignment if needed. n_elem = FLA_compute_num_elem( FLA_Obj_elem_size( *obj ), m, n, &rs, &cs ); // Compute the buffer size in bytes. buffer_size = ( size_t ) n_elem * ( size_t ) FLA_Obj_elem_size( *obj ); // Allocate the base object's element buffer. #ifdef FLA_ENABLE_SCC obj->base->buffer = ( FLA_Obj_elemtype( *obj ) == FLA_MATRIX ? FLA_malloc( buffer_size ) : FLA_shmalloc( buffer_size ) ); #else obj->base->buffer = FLA_malloc( buffer_size ); #endif obj->base->buffer_info = 0; // Just in case this is a FLASH object, save the number of elements // allocated so that we can more easily free the elements later on. obj->base->n_elem_alloc = n_elem; // Save the row and column strides used in the memory allocation. obj->base->rs = rs; obj->base->cs = cs; #ifdef FLA_ENABLE_SUPERMATRIX // Initialize SuperMatrix fields. obj->base->n_read_tasks = 0; obj->base->read_task_head = NULL; obj->base->read_task_tail = NULL; obj->base->write_task = NULL; #endif return FLA_SUCCESS; }
FLA_Error FLA_Obj_create_without_buffer | ( | FLA_Datatype | datatype, |
dim_t | m, | ||
dim_t | n, | ||
FLA_Obj * | obj | ||
) |
References FLA_Obj_view::base, FLA_Obj_struct::buffer, FLA_Obj_struct::buffer_info, FLA_Obj_struct::cs, FLA_Obj_struct::datatype, FLA_Obj_struct::elemtype, FLA_Check_error_level(), FLA_malloc(), FLA_Obj_create_without_buffer_check(), FLA_Obj_struct::id, FLA_Obj_struct::m, FLA_Obj_view::m, FLA_Obj_struct::m_index, FLA_Obj_struct::m_inner, FLA_Obj_view::m_inner, FLA_Obj_struct::n, FLA_Obj_view::n, FLA_Obj_struct::n_elem_alloc, FLA_Obj_struct::n_index, FLA_Obj_struct::n_inner, FLA_Obj_view::n_inner, FLA_Obj_struct::n_read_tasks, FLA_Obj_view::offm, FLA_Obj_view::offn, FLA_Obj_struct::read_task_head, FLA_Obj_struct::read_task_tail, FLA_Obj_struct::rs, and FLA_Obj_struct::write_task.
Referenced by FLA_Axpy_buffer_to_object(), FLA_Axpy_object_to_buffer(), FLA_Copy_buffer_to_object(), FLA_Copy_object_to_buffer(), FLASH_Axpy_buffer_to_hier(), FLASH_Axpy_hier_to_buffer(), FLASH_Copy_buffer_to_hier(), FLASH_Copy_hier_to_buffer(), FLASH_Obj_attach_buffer(), FLASH_Obj_create_helper(), and FLASH_Obj_create_hierarchy().
{ if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_create_without_buffer_check( datatype, m, n, obj ); // Populate the fields in the view object. obj->m = m; obj->n = n; obj->offm = 0; obj->offn = 0; obj->m_inner = m; obj->n_inner = n; // Allocate the base object field. obj->base = ( FLA_Base_obj * ) FLA_malloc( sizeof( FLA_Base_obj ) ); // Populate the fields in the base object. obj->base->datatype = datatype; obj->base->elemtype = FLA_SCALAR; obj->base->m = m; obj->base->n = n; obj->base->m_inner = m; obj->base->n_inner = n; obj->base->id = ( unsigned long ) obj->base; obj->base->m_index = 0; obj->base->n_index = 0; // Set the row and column strides to invalid values. obj->base->rs = 0; obj->base->cs = 0; // Initialize the base object's element buffer to NULL. obj->base->buffer = NULL; obj->base->buffer_info = 0; obj->base->n_elem_alloc = 0; #ifdef FLA_ENABLE_SUPERMATRIX // Initialize SuperMatrix fields. obj->base->n_read_tasks = 0; obj->base->read_task_head = NULL; obj->base->read_task_tail = NULL; obj->base->write_task = NULL; #endif return FLA_SUCCESS; }
FLA_Error FLA_Obj_flip_base | ( | FLA_Obj * | obj | ) |
References FLA_Obj_view::base, FLA_Obj_struct::cs, FLA_Check_error_level(), FLA_Check_null_pointer(), FLA_Obj_struct::m, FLA_Obj_struct::m_index, FLA_Obj_struct::m_inner, FLA_Obj_struct::n, FLA_Obj_struct::n_index, FLA_Obj_struct::n_inner, and FLA_Obj_struct::rs.
Referenced by FLA_Bidiag_UT_form_U_ext(), FLA_Bidiag_UT_form_V_ext(), FLA_Bidiag_UT_internal(), FLA_LQ_UT_form_Q(), FLA_Svd(), FLA_Svd_ext(), and FLA_Svd_ext_u_unb_var1().
{ FLA_Error e_val; dim_t temp; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) { e_val = FLA_Check_null_pointer( obj ); FLA_Check_error_code( e_val ); e_val = FLA_Check_null_pointer( obj->base ); FLA_Check_error_code( e_val ); } exchange( obj->base->m, obj->base->n, temp ); exchange( obj->base->cs, obj->base->rs, temp ); exchange( obj->base->m_inner, obj->base->n_inner, temp ); exchange( obj->base->m_index, obj->base->n_index, temp ); return FLA_SUCCESS; }
FLA_Error FLA_Obj_flip_view | ( | FLA_Obj * | obj | ) |
References FLA_Check_error_level(), FLA_Check_null_pointer(), FLA_Obj_view::m, FLA_Obj_view::m_inner, FLA_Obj_view::n, FLA_Obj_view::n_inner, FLA_Obj_view::offm, and FLA_Obj_view::offn.
Referenced by FLA_Bidiag_UT_form_U_ext(), FLA_Bidiag_UT_form_V_ext(), FLA_Bidiag_UT_internal(), FLA_LQ_UT_form_Q(), FLA_Svd(), FLA_Svd_ext(), and FLA_Svd_ext_u_unb_var1().
{ FLA_Error e_val; dim_t temp; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) { e_val = FLA_Check_null_pointer( obj ); FLA_Check_error_code( e_val ); } exchange( obj->offm, obj->offn, temp ); exchange( obj->m, obj->n, temp ); exchange( obj->m_inner, obj->n_inner, temp ); return FLA_SUCCESS; }
FLA_Error FLA_Obj_free | ( | FLA_Obj * | obj | ) |
References FLA_Obj_view::base, FLA_Obj_struct::buffer, FLA_Check_error_level(), FLA_free(), FLA_Obj_elemtype(), FLA_Obj_free_check(), FLA_shfree(), FLA_Obj_view::m, FLA_Obj_view::n, FLA_Obj_view::offm, and FLA_Obj_view::offn.
Referenced by FLA_Apply_H2_UT_l_unb_var1(), FLA_Apply_H2_UT_r_unb_var1(), FLA_Apply_Q_blk_external(), FLA_Bidiag_apply_U_external(), FLA_Bidiag_apply_V_external(), FLA_Bidiag_blk_external(), FLA_Bidiag_form_U_external(), FLA_Bidiag_form_V_external(), FLA_Bidiag_unb_external(), FLA_Bidiag_UT_form_U_ext(), FLA_Bidiag_UT_form_V_ext(), FLA_Bidiag_UT_l_realify_unb(), FLA_Bidiag_UT_u_blf_var4(), FLA_Bidiag_UT_u_blk_var4(), FLA_Bidiag_UT_u_blk_var5(), FLA_Bidiag_UT_u_ofu_var4(), FLA_Bidiag_UT_u_opt_var4(), FLA_Bidiag_UT_u_opt_var5(), FLA_Bidiag_UT_u_realify_unb(), FLA_Bidiag_UT_u_step_unb_var1(), FLA_Bidiag_UT_u_step_unb_var2(), FLA_Bidiag_UT_u_step_unb_var3(), FLA_Bidiag_UT_u_step_unb_var4(), FLA_Bidiag_UT_u_step_unb_var5(), FLA_Bidiag_UT_u_unb_var4(), FLA_Bidiag_UT_u_unb_var5(), FLA_Bsvd_external(), FLA_Bsvdd_external(), FLA_Eig_gest(), FLA_Fill_with_cluster_dist(), FLA_Fill_with_geometric_dist(), FLA_Fill_with_inverse_dist(), FLA_Fill_with_linear_dist(), FLA_Fill_with_logarithmic_dist(), FLA_Fill_with_random_dist(), FLA_Finalize_constants(), FLA_Hess_blk_external(), FLA_Hess_unb_external(), FLA_Hess_UT_blf_var2(), FLA_Hess_UT_blf_var3(), FLA_Hess_UT_blf_var4(), FLA_Hess_UT_blk_var1(), FLA_Hess_UT_blk_var2(), FLA_Hess_UT_blk_var3(), FLA_Hess_UT_blk_var4(), FLA_Hess_UT_blk_var5(), FLA_Hess_UT_ofu_var4(), FLA_Hess_UT_opt_var4(), FLA_Hess_UT_opt_var5(), FLA_Hess_UT_step_unb_var1(), FLA_Hess_UT_step_unb_var2(), FLA_Hess_UT_step_unb_var3(), FLA_Hess_UT_step_unb_var4(), FLA_Hess_UT_step_unb_var5(), FLA_Hess_UT_unb_var4(), FLA_Hess_UT_unb_var5(), FLA_Hevd_compute_scaling(), FLA_Hevd_external(), FLA_Hevd_lv_unb_var1(), FLA_Hevd_lv_unb_var2(), FLA_Hevdd_external(), FLA_Hevdr_external(), FLA_LQ_blk_external(), FLA_LQ_unb_external(), FLA_LQ_UT_macro_task(), FLA_LQ_UT_solve(), FLA_LU_piv_macro_task(), FLA_Lyap_h_opt_var1(), FLA_Lyap_h_opt_var2(), FLA_Lyap_h_opt_var3(), FLA_Lyap_h_opt_var4(), FLA_Lyap_h_unb_var1(), FLA_Lyap_h_unb_var2(), FLA_Lyap_h_unb_var3(), FLA_Lyap_h_unb_var4(), FLA_Lyap_n_opt_var1(), FLA_Lyap_n_opt_var2(), FLA_Lyap_n_opt_var3(), FLA_Lyap_n_opt_var4(), FLA_Lyap_n_unb_var1(), FLA_Lyap_n_unb_var2(), FLA_Lyap_n_unb_var3(), FLA_Lyap_n_unb_var4(), FLA_Norm1(), FLA_Norm_inf(), FLA_QR_blk_external(), FLA_QR_form_Q_external(), FLA_QR_unb_external(), FLA_QR_UT_form_Q(), FLA_QR_UT_macro_task(), FLA_QR_UT_piv_colnorm(), FLA_QR_UT_piv_unb_var2(), FLA_QR_UT_solve(), FLA_Random_spd_matrix(), FLA_Random_unitary_matrix(), FLA_Sort(), FLA_Svd_compute_scaling(), FLA_Svd_ext_u_unb_var1(), FLA_Svd_external(), FLA_Svd_uv_unb_var1(), FLA_Svd_uv_unb_var2(), FLA_Svdd_external(), FLA_Tevd_external(), FLA_Tevdd_external(), FLA_Tevdr_external(), FLA_Tridiag_apply_Q_external(), FLA_Tridiag_blk_external(), FLA_Tridiag_form_Q_external(), FLA_Tridiag_unb_external(), FLA_Tridiag_UT_l_blf_var3(), FLA_Tridiag_UT_l_blk_var3(), FLA_Tridiag_UT_l_ofu_var3(), FLA_Tridiag_UT_l_opt_var3(), FLA_Tridiag_UT_l_realify_unb(), FLA_Tridiag_UT_l_step_unb_var1(), FLA_Tridiag_UT_l_step_unb_var2(), FLA_Tridiag_UT_l_step_unb_var3(), FLA_Tridiag_UT_l_unb_var3(), FLA_Tridiag_UT_u_realify_unb(), FLA_Trmvsx_external(), FLA_Trsvsx_external(), FLA_UDdate_UT_update_rhs(), FLASH_Hermitianize(), FLASH_Max_elemwise_diff(), FLASH_Norm1(), FLASH_Obj_create_copy_of(), FLASH_Obj_free(), FLASH_Obj_free_hierarchy(), FLASH_Random_matrix(), FLASH_Set(), FLASH_Shift_diag(), and FLASH_Triangularize().
{ if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_free_check( obj ); if ( obj->base != NULL ) { #ifdef FLA_ENABLE_SCC ( FLA_Obj_elemtype( *obj ) == FLA_MATRIX ? FLA_free( obj->base->buffer ) : FLA_shfree( obj->base->buffer ) ); #else //printf( "freeing buff %p\n", obj->base->buffer ); fflush( stdout ); FLA_free( obj->base->buffer ); #endif //printf( "freeing base %p\n", obj->base ); fflush( stdout ); FLA_free( ( void * ) obj->base ); } obj->offm = 0; obj->offn = 0; obj->m = 0; obj->n = 0; return FLA_SUCCESS; }
FLA_Error FLA_Obj_free_buffer | ( | FLA_Obj * | obj | ) |
References FLA_Obj_view::base, FLA_Obj_struct::buffer, FLA_Check_error_level(), FLA_free(), FLA_Obj_elemtype(), FLA_Obj_free_buffer_check(), and FLA_shfree().
Referenced by FLA_Obj_free_buffer_task().
{ if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_free_buffer_check( obj ); #ifdef FLA_ENABLE_SCC ( FLA_Obj_elemtype( *obj ) == FLA_MATRIX ? FLA_free( obj->base->buffer ) : FLA_shfree( obj->base->buffer ) ); #else FLA_free( obj->base->buffer ); #endif obj->base->buffer = NULL; return FLA_SUCCESS; }
FLA_Error FLA_Obj_free_without_buffer | ( | FLA_Obj * | obj | ) |
References FLA_Obj_view::base, FLA_Check_error_level(), FLA_free(), FLA_Obj_free_without_buffer_check(), FLA_Obj_view::m, FLA_Obj_view::n, FLA_Obj_view::offm, and FLA_Obj_view::offn.
Referenced by FLA_Axpy_buffer_to_object(), FLA_Axpy_object_to_buffer(), FLA_Copy_buffer_to_object(), FLA_Copy_object_to_buffer(), FLASH_Axpy_buffer_to_hier(), FLASH_Axpy_hier_to_buffer(), FLASH_Copy_buffer_to_hier(), FLASH_Copy_hier_to_buffer(), FLASH_Obj_attach_buffer(), FLASH_Obj_create_helper(), FLASH_Obj_create_hierarchy(), FLASH_Obj_free_hierarchy(), and FLASH_Obj_free_without_buffer().
{ if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_free_without_buffer_check( obj ); FLA_free( ( void * ) obj->base ); obj->offm = 0; obj->offn = 0; obj->m = 0; obj->n = 0; return FLA_SUCCESS; }
FLA_Error FLA_Obj_nullify | ( | FLA_Obj * | obj | ) |
void FLA_shfree | ( | void * | ptr | ) |
References RCCE_shfree().
Referenced by FLA_Obj_free(), FLA_Obj_free_buffer(), FLASH_Obj_free(), and FLASH_Queue_exec().
{ RCCE_shfree( ( t_vcharp ) ptr ); }
void* FLA_shmalloc | ( | size_t | size | ) |
References RCCE_shmalloc().
Referenced by FLA_Obj_create_buffer(), FLA_Obj_create_ext(), and FLASH_Queue_exec().
{ return ( void * ) RCCE_shmalloc( size ); }
void RCCE_shfree | ( | t_vcharp | ) |
Referenced by FLA_shfree().
t_vcharp RCCE_shmalloc | ( | size_t | ) |
Referenced by FLA_shmalloc().
int RCCE_ue | ( | void | ) |
Referenced by FLA_is_owner(), and FLASH_Queue_exec_parallel_function().