libflame  12600
Functions
FLA_Apply_pivots_macro_external.c File Reference

(r12600)

Functions

FLA_Error FLA_Apply_pivots_macro_external (FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A)

Function Documentation

References bl1_cswapv(), bl1_dswapv(), bl1_sswapv(), bl1_zswapv(), FLA_Obj_buffer_at_view(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_Apply_pivots_macro_task().

{
   int          i, j;
   int          ipiv;
   int*         buf_p    = ( int* ) FLA_Obj_buffer_at_view( p );
   FLA_Obj*     blocks   = FLASH_OBJ_PTR_AT( A );
   int          m_blocks = FLA_Obj_length( A );
   int          m_A      = FLA_Obj_length( *blocks );
   int          n_A      = FLA_Obj_width( *blocks );
   FLA_Datatype datatype = FLA_Obj_datatype( A );

#ifdef FLA_ENABLE_WINDOWS_BUILD
   int* m  = ( int* ) _alloca( m_blocks * sizeof( int ) );
   int* cs = ( int* ) _alloca( m_blocks * sizeof( int ) );
#else
   int* m  = ( int* ) malloc( m_blocks * sizeof( int ) );
   int* cs = ( int* ) malloc( m_blocks * sizeof( int ) );
   //int m[m_blocks];
   //int cs[m_blocks];
#endif

   if ( side != FLA_LEFT || trans != FLA_NO_TRANSPOSE )
      FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

   switch ( datatype )
   {
      case FLA_FLOAT:
      {
#ifdef FLA_ENABLE_WINDOWS_BUILD
         float** buffer = ( float** ) _alloca( m_blocks * sizeof( float* ) );
#else
         float** buffer = ( float** ) malloc( m_blocks * sizeof( float* ) );
         //float*  buffer[m_blocks];
#endif
         for ( i = 0; i < m_blocks; i++ )
         {
            buffer[i] = ( float* ) FLA_Obj_buffer_at_view( blocks[i] );
            
            m[i] = FLA_Obj_length( blocks[i] );
            cs[i] = FLA_Obj_col_stride( blocks[i] );
         }
         
         for ( j = 0; j < m_A; j++ )
         {
            ipiv = buf_p[j] + j;
            
            if ( ipiv != j )
            {
               i = 0;
               
               while ( ipiv >= m[i] )
               {
                  ipiv = ipiv - m[i];
                  i++;
               }

               bl1_sswapv( n_A,
                           buffer[0] + j,    cs[0],
                           buffer[i] + ipiv, cs[i] );
            }
         }
#ifdef FLA_ENABLE_WINDOWS_BUILD
#else
         free( buffer ); 
#endif
         break;
      }
      case FLA_DOUBLE:
      {
#ifdef FLA_ENABLE_WINDOWS_BUILD
         double** buffer = ( double** ) _alloca( m_blocks * sizeof( double* ) );
#else
         double** buffer = ( double** ) malloc( m_blocks * sizeof( double* ) );
         //double*  buffer[m_blocks];
#endif
         for ( i = 0; i < m_blocks; i++ )
         {
            buffer[i] = ( double* ) FLA_Obj_buffer_at_view( blocks[i] );
            
            m[i] = FLA_Obj_length( blocks[i] );
            cs[i] = FLA_Obj_col_stride( blocks[i] );
         }
         
         for ( j = 0; j < m_A; j++ )
         {
            ipiv = buf_p[j] + j;
            
            if ( ipiv != j )
            {
               i = 0;
               
               while ( ipiv >= m[i] )
               {
                  ipiv = ipiv - m[i];
                  i++;
               }

               bl1_dswapv( n_A,
                           buffer[0] + j,    cs[0],
                           buffer[i] + ipiv, cs[i] );
            }
         }
#ifdef FLA_ENABLE_WINDOWS_BUILD
#else
         free( buffer ); 
#endif
         break;
      }
      case FLA_COMPLEX:
      {
#ifdef FLA_ENABLE_WINDOWS_BUILD
         scomplex** buffer = ( scomplex** ) _alloca( m_blocks * sizeof( scomplex* ) );
#else
         scomplex** buffer = ( scomplex** ) malloc( m_blocks * sizeof( scomplex* ) );
         //scomplex*  buffer[m_blocks];
#endif
         for ( i = 0; i < m_blocks; i++ )
         {
            buffer[i] = ( scomplex* ) FLA_Obj_buffer_at_view( blocks[i] );
            
            m[i] = FLA_Obj_length( blocks[i] );
            cs[i] = FLA_Obj_col_stride( blocks[i] );
         }
         
         for ( j = 0; j < m_A; j++ )
         {
            ipiv = buf_p[j] + j;
            
            if ( ipiv != j )
            {
               i = 0;
               
               while ( ipiv >= m[i] )
               {
                  ipiv = ipiv - m[i];
                  i++;
               }

               bl1_cswapv( n_A,
                           buffer[0] + j,    cs[0],
                           buffer[i] + ipiv, cs[i] );
            }
         }
#ifdef FLA_ENABLE_WINDOWS_BUILD
#else
         free( buffer ); 
#endif
         break;
      }
      case FLA_DOUBLE_COMPLEX:
      {
#ifdef FLA_ENABLE_WINDOWS_BUILD
         dcomplex** buffer = ( dcomplex** ) _alloca( m_blocks * sizeof( dcomplex* ) );
#else
         dcomplex** buffer = ( dcomplex** ) malloc( m_blocks * sizeof( dcomplex* ) );
         //dcomplex*  buffer[m_blocks];
#endif
         for ( i = 0; i < m_blocks; i++ )
         {
            buffer[i] = ( dcomplex* ) FLA_Obj_buffer_at_view( blocks[i] );
            
            m[i] = FLA_Obj_length( blocks[i] );
            cs[i] = FLA_Obj_col_stride( blocks[i] );
         }
         
         for ( j = 0; j < m_A; j++ )
         {
            ipiv = buf_p[j] + j;
            
            if ( ipiv != j )
            {
               i = 0;
               
               while ( ipiv >= m[i] )
               {
                  ipiv = ipiv - m[i];
                  i++;
               }

               bl1_zswapv( n_A,
                           buffer[0] + j,    cs[0],
                           buffer[i] + ipiv, cs[i] );
            }
         }
#ifdef FLA_ENABLE_WINDOWS_BUILD
#else
         free( buffer ); 
#endif
         break;
      }
   }

#ifdef FLA_ENABLE_WINDOWS_BUILD
#else
   free( m );
   free( cs );
#endif

   return FLA_SUCCESS;
}