/*
 *             Automatically Tuned Linear Algebra Software v3.0Beta
 *                    (C) Copyright 1999 R. Clint Whaley                     
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions, and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *   3. The name of the University, the ATLAS group, or the names of its 
 *      contributers may not be used to endorse or promote products derived
 *      from this software without specific written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE. 
 *
 */

#include "atlas_misc.h"
#include "atlas_lvl3.h"
#include "atlas_level3.h"
#include "atlas_level1.h"
#include "atlas_lapack.h"

#ifdef TCPLX
   #define ATL_CplxInv(in, out) Mjoin(PATL,cplxinvert)(1, in, 1, out, 1);
#endif

int ATL_getrfC(const int M, const int N, TYPE *A, const int lda, int *ipiv)
/*
 * Column-major factorization of form 
 *   A = P * L * U
 * where P is a row-permutation matrix, L is lower triangular with unit diagonal
 * elements (lower trapazoidal if M > N), and U is upper triangular (upper
 * trapazoidal if M < N).  This is the recursive Level 3 BLAS version.
 */
{
   const int MN = Mmin(M, N);
   int Nleft, Nright, i, ierr=0;
   #ifdef TCPLX
      const TYPE one[2] = {ATL_rone, ATL_rzero}; 
      const TYPE none[2] = {ATL_rnone, ATL_rzero};
      TYPE inv[2], tmp[2];
   #else
      #define one ATL_rone
      #define none ATL_rnone
      TYPE tmp;
   #endif
   TYPE *Ac, *An;

   if (MN > 1)
   {
      Nleft = MN >> 1;
      #ifdef NB
         if (Nleft > NB) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft));
      #endif
      Nright = N - Nleft;
      i = ATL_getrfC(M, Nleft, A, lda, ipiv);  /* factor left to L & U */
      if (i) if (!ierr) ierr = i;
/*
 *    Update trailing submatrix
 */
      Ac = A + (Nleft * lda SHIFT);
      An = Ac + (Nleft SHIFT);
      ATL_laswp(Nright, Ac, lda, 0, Nleft, ipiv, 1);
      cblas_trsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
                 Nleft, Nright, one, A, lda, Ac, lda);
      cblas_gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M-Nleft, Nright, 
                 Nleft, none, A+(Nleft SHIFT), lda, Ac, lda, one, An, lda);
      i = ATL_getrfC(M-Nleft, Nright, An, lda, ipiv+Nleft);
      if (i) if (!ierr) ierr = i + Nleft;
      for (i=Nleft; i != MN; i++) ipiv[i] += Nleft;
      ATL_laswp(Nleft, A, lda, Nleft, MN, ipiv, 1);
   }
   else if (MN == 1)
   {
      *ipiv = i = cblas_iamax(M, A, 1);  /* find pivot */
      #ifdef TREAL
         tmp = A[i];
         if (tmp != ATL_rzero)
         {
            cblas_scal(M, ATL_rone/tmp, A, 1);
            A[i] = *A;
            *A = tmp;
         }
         else ierr = 1;
      #else
         i <<= 1;
         tmp[0] = A[i];
         tmp[1] = A[i+1];
         if (tmp[0] != ATL_rzero || tmp[1] != ATL_rzero)
         {
            ATL_CplxInv(tmp, inv);
            cblas_scal(M, inv, A, 1);
            A[i] = *A;
            A[i+1] = A[1];
            *A = tmp[0];
            A[1] = tmp[1];
         }
         else ierr = 1;
      #endif
   }
   return(ierr);
}

