#ifndef _SKIT_IC0_ALGO_H
#define _SKIT_IC0_ALGO_H
///
/// This file is part of Rheolef.
///
/// Copyright (C) 2000-2009 Pierre Saramito <Pierre.Saramito@imag.fr>
///
/// Rheolef is free software; you can redistribute it and/or modify
/// it under the terms of the GNU General Public License as published by
/// the Free Software Foundation; either version 2 of the License, or
/// (at your option) any later version.
///
/// Rheolef is distributed in the hope that it will be useful,
/// but WITHOUT ANY WARRANTY; without even the implied warranty of
/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
/// GNU General Public License for more details.
///
/// You should have received a copy of the GNU General Public License
/// along with Rheolef; if not, write to the Free Software
/// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
/// 
/// =========================================================================
namespace rheolef { 

// ic0 : inspirated from sparselib++ algorithms
//  store A = L + D + L^t
//  with (L+D) in CSC (by column instead of row)
//  then apply a KIJ inplace choleski algo
// -------------------------------------------------------------
// part 1: data structure conversion
// -------------------------------------------------------------
// count non-zero in extra-diagonal part
template<
  class Size,
  class InputSizeIterator1,
  class InputSizeIterator2
>
Size
csr_nnz_extra_diag_upper (
  Size                 n,
  InputSizeIterator1   astart, // [n+1]
  InputSizeIterator2   acol)   // [annz]
{
  Size nnz_extra_diag_upper = 0;
  for (Size k = 0; k < n; k++) {
    for (Size za = astart[k]; za < astart[k+1]; za++) {
      if (acol[za] > k) nnz_extra_diag_upper++;
    }
  }
  return nnz_extra_diag_upper;
}
// csr_upper (or csc_lower)
//   extract upper part including diagonal from CSR format
//   output it also in CSR format
// or : extract lower from CSR and output to CSC, as used by IC0
template<
  class Size,
  class InputSizeIterator1,
  class InputSizeIterator2,
  class InputValueIterator,
  class OutputSizeIterator1,
  class OutputSizeIterator2,
  class OutputValueIterator
>
void
csr_extract_upper (
  Size                 n,
  InputSizeIterator1   astart, // [n+1]
  InputSizeIterator2   acol,   // [annz]
  InputValueIterator   aval,   // [annz]
  OutputSizeIterator1  bstart, // [n+1]
  OutputSizeIterator2  bcol,   // [bsize]
  OutputValueIterator  bval)   // [bsize] where bsize = (annz + n)/2
{
  bstart[0] = 0;
  for (Size k = 0; k < n; k++) {
    bstart[k+1] = bstart[k];
    for (Size za = astart[k]; za < astart[k+1]; za++) {
      if (acol[za] >= k) {
        Size zb = bstart[k+1]++;
        bval[zb] = aval[za];
        bcol[zb] = acol[za];
      }
    }
  }
}
// -------------------------------------------------------------
// part 2: factorization
// -------------------------------------------------------------
// CRL inplace incomplete Choleski factorization
// based on a KJI algorithm and a storage by column
// inspirated from sparselib++-1.7/src/icpre.cc
template<
  class Size,
  class InputSizeIterator1,
  class InputSizeIterator2,
  class InputValueIterator
>
void
csc_ic0 (
  Size                 n,
  InputSizeIterator1   astart, // [n+1]
  InputSizeIterator2   aline,  // [asize]
  InputValueIterator   aval)   // [asize]
{
  typedef typename std::iterator_traits<InputValueIterator>::value_type T;
  for (Size k = 0; k < n - 1; k++) {
    Size zkk = astart[k];
    T akk = aval[zkk] = ::sqrt(aval[zkk]);
    for (Size zik = zkk + 1; zik < astart[k+1]; zik++) {
      aval[zik] /= akk;
    }
    for (Size zik = zkk + 1; zik < astart[k+1]; zik++) {
      Size i   = aline[zik];
      T    aik = aval [zik];
      Size zij = zik;
      for (Size zji = astart[i] ; zji < astart[i+1]; zji++) {
        for ( ; zij < astart[k+1] && aline[zij+1] <= aline[zji]; zij++) {
          if (aline[zij] == aline[zji]) {
            aval[zji] -= aik*aval[zij];
          }
        }
      }
    }
  }
  Size znn = astart[n-1];
  aval[znn] = ::sqrt(aval[znn]);
}
// -------------------------------------------------------------
// part 3: solve
// -------------------------------------------------------------
template<
  class Size,
  class InputSizeIterator1,
  class InputSizeIterator2,
  class InputValueIterator,
  class OutputValueRandomIterator
>
void
csc_lower_diag_solve(
  Size                       n,
  InputSizeIterator1         astart, // [n+1]
  InputSizeIterator2         aline,  // [asize]
  InputValueIterator         aval,   // [asize]
  OutputValueRandomIterator  x)      // [n]
{
  typedef typename std::iterator_traits<OutputValueRandomIterator>::value_type T;
  for (Size i = 0; i < n; i++) {
    T xi = x[i] / aval[astart[i]];
    x[i] = xi;
    for (Size zij = astart[i] + 1; zij < astart[i+1]; zij++) {
      x[aline[zij]] -= aval[zij]*xi;
    }
  }
}
template<
  class Size,
  class InputSizeIterator1,
  class InputSizeIterator2,
  class InputValueIterator1,
  class InputValueIterator2,
  class OutputValueRandomIterator
>
void
csr_upper_diag_solve(
  Size                       n,
  InputSizeIterator1         astart, // [n+1]
  InputSizeIterator2         acol,   // [asize]
  InputValueIterator1        aval,   // [asize]
  InputValueIterator2        b,      // [n]
  OutputValueRandomIterator  x)      // [n]
{
  typedef typename std::iterator_traits<OutputValueRandomIterator>::value_type T;
  for (long int i = n - 1; i >= 0; i--) {    
    T sum = 0;
    for (Size zij = astart[i] + 1; zij < astart[i+1]; zij++) {
      sum += x[acol[zij]] * aval[zij];
    }
    x[i+astart[0]] = (b[i] - sum) / aval[astart[i]];
  }
}
template<
  class Size,
  class InputSizeIterator1,
  class InputSizeIterator2,
  class InputValueIterator1,
  class InputValueIterator2,
  class OutputValueRandomIterator
>
void
ic0_solve(
  Size                       n,
  InputSizeIterator1         astart, // [n+1]
  InputSizeIterator2         aline,  // [asize]
  InputValueIterator1        aval,   // [asize]
  InputValueIterator2        b,      // [n]
  OutputValueRandomIterator  x)      // [n]
{
  std::copy (b, b+n, x);
  csc_lower_diag_solve (n, astart, aline, aval, x);
  csr_upper_diag_solve (n, astart, aline, aval, b, x);
}
}// namespace rheolef
#endif // _SKIT_IC0_ALGO_H
