#include "PLA.h"

int PLA_Chol_blk_var3_opt2( PLA_Obj A, int nb_alg )
{
  PLA_Obj ATL=NULL,   ATR=NULL,      A00=NULL, A01=NULL, A02=NULL, 
          ABL=NULL,   ABR=NULL,      A10=NULL, A11=NULL, A12=NULL,
                                     A20=NULL, A21=NULL, A22=NULL;

  PLA_Obj MINUS_ONE=NULL, ZERO=NULL, ONE=NULL;

  PLA_Obj A11_dmsc=NULL;

  int b, b_top, b_left, dummy;

  PLA_Create_constants_conf_to( A, &MINUS_ONE, &ZERO, &ONE );

  PLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, PLA_TL );

  while ( PLA_Obj_length( ATL ) < PLA_Obj_length( A ) ){

    /* Limit the size of A11 so that it is guaranteed to be on one node */
    PLA_Obj_split_size( ABR, PLA_SIDE_TOP,  &b_top, &dummy );
    PLA_Obj_split_size( ABR, PLA_SIDE_LEFT, &b_left, &dummy );
    b = min( min( b_top, b_left), nb_alg );

    PLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, PLA_BR );

    /*------------------------------------------------------------*/

    /* Call a routine that only performs a local Cholesky factorization on the 
       node that owns A11 */
    PLA_Local_chol( PLA_LOWER_TRIANGULAR, A11 );

    /* Copy A11 to every node in the same column of nodes where A11 exists */
    PLA_Mscalar_create_conf_to( A11, PLA_ALL_ROWS, PLA_INHERIT, &A11_dmsc );
    PLA_Copy( A11, A11_dmsc );

    /* This allows A21 = A21 * inv( tril( A11 )' ) to proceed independently
       on each processor that owns part of A21  */
    PLA_Local_trsm( PLA_RIGHT, PLA_LOWER_TRIANGULAR, 
                    PLA_TRANSPOSE, PLA_NONUNIT_DIAG,
                    ONE, A11_dmsc, A21 );

    PLA_Obj_free( &A11_dmsc );

    PLA_Syrk( PLA_LOWER_TRIANGULAR, PLA_NO_TRANSPOSE,
              MINUS_ONE, A21, ONE, A22 );

    /*------------------------------------------------------------*/

    PLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              PLA_TL );

  }

  PLA_Obj_free( &ATL ); PLA_Obj_free( &ATR );
  PLA_Obj_free( &ABL ); PLA_Obj_free( &ABR );
  PLA_Obj_free( &A00 ); PLA_Obj_free( &A01 ); PLA_Obj_free( &A02 );
  PLA_Obj_free( &A10 ); PLA_Obj_free( &A11 ); PLA_Obj_free( &A12 );
  PLA_Obj_free( &A20 ); PLA_Obj_free( &A21 ); PLA_Obj_free( &A22 );

  PLA_Obj_free( &MINUS_ONE ); PLA_Obj_free( &ZERO ); PLA_Obj_free( &ONE );

  return PLA_SUCCESS;
}