sparse_ops.h | searchcode

/tags/R2002-11-30/octave-forge/main/sparse/sparse_ops.h

#
C Header | 528 lines | 371 code | 17 blank | 140 comment | 102 complexity | 04bfd70eb5c9d20e99c2a4b22dad5bc5 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, GPL-3.0, LGPL-3.0

/*

Copyright (C) 1999 Andy Adler

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with Octave; see the file COPYING.  If not, write to the Free
Software Foundation, 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

$Id: sparse_ops.h 663 2002-11-27 04:46:42Z pkienzle $

$Log$
Revision 1.6  2002/11/27 04:46:42  pkienzle
Use new exception handling infrastructure.

Revision 1.5  2002/01/04 15:53:57  pkienzle
Changes required to compile for gcc-3.0 in debian hppa/unstable

Revision 1.4  2001/11/04 19:54:49  aadler
fix bug with multiple entries in sparse creation.
Added "summation" mode for matrix creation

Revision 1.3  2001/10/14 03:06:31  aadler
fixed memory leak in complex sparse solve
fixed malloc bugs for zero size allocs

Revision 1.2  2001/10/12 02:24:28  aadler
Mods to fix bugs
add support for all zero sparse matrices
add support fom complex sparse inverse

Revision 1.2  2001/04/04 02:13:46  aadler
complete complex_sparse, templates, fix memory leaks

Revision 1.1  2001/03/30 04:34:23  aadler
"template" functions for sparse operations


*/

// I would like to do this with templates,
// but I don't think you can specify operators
//
// TYPX -> output type ( Complex or double)
// _OP_ -> operation to implement ( + , - , != , .* )
// A_B_INTERACT -> evaluate operations where A or B ==0 
//
// I'm assuming that compiler optimization will remove
// the if (0) and x+0 operations
#define SPARSE_EL_OP( TYPX, _OP_, A_B_INTERACT ) \
   SuperMatrix X; \
   if ( (Anc != Bnc) || (Anr != Bnr) ) { \
      gripe_nonconformant ("operator " #_OP_, Anr, Anc, Bnr, Bnc); \
   } else { \
      assert(Anr == Bnr); assert(Anc == Bnc); \
      TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz  * sizeof(TYPX)); \
      int * ridxX= (int *)oct_sparse_malloc( nnz  * sizeof(int) ); \
      int * cidxX= (int *)oct_sparse_malloc((Anc+1)*sizeof(int)); cidxX[0]= 0; \
 \
      int jx= 0; \
      for (int i= 0 ; i < Anc ; i++ ) { \
         int  ja= cidxA[ i ]; \
         int  ja_max= cidxA[ i+1 ]; \
         bool ja_lt_max= ja < ja_max; \
 \
         int  jb= cidxB[ i ]; \
         int  jb_max= cidxB[ i+1 ]; \
         bool jb_lt_max= jb < jb_max; \
 \
         while( ja_lt_max || jb_lt_max ) { \
            OCTAVE_QUIT; \
            if( ( !jb_lt_max ) || \
                ( ja_lt_max && (ridxA[ja] < ridxB[jb]) ) ) \
            { \
               if (A_B_INTERACT) { \
                  ridxX[ jx ] = ridxA[ja]; \
                  coefX[ jx ] = coefA[ ja ] _OP_ 0.0; \
                  jx++; \
               } \
               ja++; ja_lt_max= ja < ja_max; \
            } else \
            if( ( !ja_lt_max ) || \
               ( jb_lt_max && (ridxB[jb] < ridxA[ja]) ) ) \
            { \
               if (A_B_INTERACT) { \
                  ridxX[ jx ] = ridxB[jb]; \
                  coefX[ jx ] = 0.0 _OP_ coefB[ jb ]; \
                  jx++; \
               } \
               jb++; jb_lt_max= jb < jb_max; \
            } else \
            { \
               assert( ridxA[ja] == ridxB[jb] ); \
               TYPX tmpval= coefA[ ja ] _OP_ coefB[ jb ]; \
               if (tmpval !=0.0) { \
                  coefX[ jx ] = tmpval; \
                  ridxX[ jx ] = ridxA[ja]; \
                  jx++; \
               } \
               ja++; ja_lt_max= ja < ja_max; \
               jb++; jb_lt_max= jb < jb_max; \
            } \
         }  \
         cidxX[i+1] = jx;  \
      } \
      maybe_shrink( jx, nnz, ridxX, coefX ); \
      X= create_SuperMatrix( Anr, Anc, jx, coefX, ridxX, cidxX ); \
   }


#define SPARSE_MATRIX_EL_OP( TYPX, _OP_ ) \
   SuperMatrix X; \
   if ( (Anc != Bnc) || (Anr != Bnr) ) { \
      gripe_nonconformant ("operator .*", Anr, Anc, Bnr, Bnc); \
   } else { \
      assert(Anr == Bnr); assert(Anc == Bnc); \
      TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz  * sizeof(TYPX)); \
      int * ridxX= (int *)oct_sparse_malloc( nnz  * sizeof(int) ); \
      int * cidxX= (int *)oct_sparse_malloc((Anc+1)*sizeof(int)); cidxX[0]= 0; \
 \
      int cx= 0; \
      for (int i=0; i < Anc ; i++) { \
         for (int j= cidxA[i]  ; \
                  j< cidxA[i+1]; j++) { \
            OCTAVE_QUIT; \
            int  rowidx = ridxA[j]; \
            TYPX tmpval = B(rowidx,i); \
            if (tmpval != 0.0) { \
               ridxX[ cx ] = rowidx; \
               coefX[ cx ] = tmpval * coefA[ j ]; \
               cx++; \
            } \
         } \
         cidxX[i+1] = cx;  \
      } \
      maybe_shrink( cx, nnz, ridxX, coefX ); \
      X= create_SuperMatrix( Anr, Anc, cx, coefX, ridxX, cidxX ); \
   }

// multiply type ops
// TYPM is the output matrix type
// TYPX is the sparse matrix type
#define SPARSE_MATRIX_MUL( TYPM, TYPX) \
   TYPM X (Anr , Bnc);  \
   if (Anc != Bnr) { \
      gripe_nonconformant ("operator *", Anr, Anc, Bnr, Bnc); \
   } else { \
      assert (Anc == Bnr); \
      for (int i=0; i< Anr; i++ ) \
         for (int j=0; j< Bnc; j++ ) \
            X.elem(i,j)=0.; \
      for ( int i=0; i < Anc ; i++) { \
         for ( int j= cidxA[i]; j< cidxA[i+1]; j++) { \
            OCTAVE_QUIT; \
            int  col = ridxA[j]; \
            TYPX tmpval = coefA[j]; \
            for ( int k=0 ; k< Bnc; k++) { \
               X.elem(col , k)+= tmpval * B(i,k); \
            } \
         } \
      } \
   }

#define MATRIX_SPARSE_MUL( TYPM, TYPX ) \
   TYPM X (Anr , Bnc);  \
   if (Anc != Bnr) { \
      gripe_nonconformant ("operator *", Anr, Anc, Bnr, Bnc); \
   } else { \
      assert (Anc == Bnr); \
      for (int i=0; i< Anr; i++ ) \
         for (int j=0; j< Bnc; j++ ) \
            X.elem(i,j)=0.; \
      for ( int i=0; i < Bnc ; i++) { \
         for ( int j= cidxB[i]; j< cidxB[i+1]; j++) { \
            OCTAVE_QUIT; \
            int  col = ridxB[j]; \
            TYPX tmpval = coefB[j]; \
            for ( int k=0 ; k< Anr; k++) { \
               X(k, i)+= A(k,col) * tmpval; \
            } \
         } \
      } \
   }

//
// Multiply sparse by sparse, element by element
// This algorithm allocates a full column of the output
// matrix. Hopefully that won't be a storage problem.
//
// TODO: allocate a row or column depending on the larger
//       dimention.
//
// I'm sure there are good sparse multiplication algorithms
//   available in the litterature. I invented this one
//   to fill a gap. Tell me if you know of better ones.
//
#define SPARSE_SPARSE_MUL( TYPX ) \
   SuperMatrix X; \
   if (Anc != Bnr) { \
      gripe_nonconformant ("operator *", Anr, Anc, Bnr, Bnc); \
   } else { \
      assert (Anc == Bnr ); \
      int nnz =  NCFA->nnz + NCFB->nnz ; \
      TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz  * sizeof(TYPX)); \
      int * ridxX= (int *)oct_sparse_malloc( nnz  * sizeof(int) ); \
      int * cidxX= (int *)oct_sparse_malloc((Bnc+1)*sizeof(int)); cidxX[0]= 0; \
 \
      TYPX * Xcol= (TYPX*)oct_sparse_malloc( Anr  * sizeof(TYPX)); \
      int cx= 0; \
      for ( int i=0; i < Bnc ; i++) { \
         OCTAVE_QUIT; \
         for (int k=0; k<Anr; k++) Xcol[k]= 0.; \
         for (int j= cidxB[i]; j< cidxB[i+1]; j++) { \
            int  col= ridxB[j]; \
            TYPX tmpval = coefB[j]; \
            for (int k= cidxA[col] ; k< cidxA[col+1]; k++)  \
               Xcol[ ridxA[k] ]+= tmpval * coefA[k]; \
         } \
         for (int k=0; k<Anr; k++) { \
            if ( Xcol[k] !=0. ) { \
               check_bounds( cx, nnz, ridxX, coefX ); \
               ridxX[ cx ]= k; \
               coefX[ cx ]= Xcol[k]; \
               cx++; \
            } \
         } \
         cidxX[i+1] = cx; \
      } \
      free( Xcol ); \
      maybe_shrink( cx, nnz, ridxX, coefX ); \
      X= create_SuperMatrix( Anr, Bnc, cx, coefX, ridxX, cidxX ); \
   } 

// assemble a sparse matrix from elements
//   called by > 1 args for sparse
// NOTE: index vectors are 1 based!
//
// NOTE2: be careful about when we convert ri to int,
// otherwise the maximum matrix size will be m*n < maxint/2
#define ASSEMBLE_SPARSE( TYPX ) \
   int  nnz= MAX( ridxA.length(), cidxA.length() ); \
   TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz  * sizeof(TYPX)); \
   int * ridxX= (int *)oct_sparse_malloc( nnz  * sizeof(int) ); \
   int * cidxX= (int *)oct_sparse_malloc( (n+1)* sizeof(int)); cidxX[0]= 0; \
 \
   bool ri_scalar = (ridxA.length() == 1); \
   bool ci_scalar = (cidxA.length() == 1); \
   bool cf_scalar = (coefA.length() == 1); \
 \
   sort_idxl sidx[ nnz ]; \
   OCTAVE_QUIT; \
   for (int i=0; i<nnz; i++) { \
      sidx[i].val = (long) ( \
                ( ri_scalar ? ridxA(0) : ridxA(i) ) - 1 + \
           m * (( ci_scalar ? cidxA(0) : cidxA(i) ) - 1) );  \
      sidx[i].idx = i; \
   } \
 \
   OCTAVE_QUIT; \
   qsort( sidx, nnz, sizeof(sort_idxl), sidxl_comp ); \
    \
   OCTAVE_QUIT; \
   int cx= 0; \
   long prev_val=-1;  \
   int ii= -1; \
   for (int i=0; i<nnz; i++) { \
      OCTAVE_QUIT; \
      long  idx= (long) sidx[i].idx; \
      long  val= (long) sidx[i].val; \
      if (prev_val < val) { \
         ii++; \
         coefX[ii]=      ( cf_scalar ? coefA(0) : coefA(idx) ); \
         double ri  =   ( ri_scalar ? ridxA(0) : ridxA(idx) ) - 1 ; \
         ridxX[ii]= (long) (ri - ((long) (ri/m))*m ) ; \
         long ci  = (long)( ci_scalar ? cidxA(0) : cidxA(idx) ) - 1 ; \
         while( cx < ci ) cidxX[++cx]= ii; \
      } else { \
         if (assemble_do_sum) \
            coefX[ii]+=      ( cf_scalar ? coefA(0) : coefA(idx) ); \
         else \
            coefX[ii] =      ( cf_scalar ? coefA(0) : coefA(idx) ); \
      } \
      prev_val= val;\
   } \
   while( cx < n ) cidxX[++cx]= ii+1; \
   OCTAVE_QUIT; \
   maybe_shrink( ii+1, nnz, ridxX, coefX ); \
 \
   OCTAVE_QUIT; \
   SuperMatrix X= create_SuperMatrix( m, n, ii+1, coefX, ridxX, cidxX );

// assemble a sparse matrix from full
//   called by one arg for sparse
// start with an initial estimate for nnz and
// work with it.
#define MATRIX_TO_SPARSE( TYPX ) \
   int nnz= 100; \
   TYPX * coef = (TYPX *) oct_sparse_malloc ( nnz   * sizeof(TYPX) ); \
   int  * ridx = (int  *) oct_sparse_malloc ( nnz   * sizeof(int) ); \
   int  * cidx = (int  *) oct_sparse_malloc ((Anc+1)* sizeof(int) );  cidx[0]= 0; \
   int jx=0; \
   for (int j= 0; j<Anc ; j++ ) { \
      for (int i= 0; i<Anr ; i++ ) { \
         OCTAVE_QUIT; \
         TYPX tmpval= A(i,j); \
         if (tmpval != 0.) { \
            check_bounds( jx, nnz, ridx, coef ); \
            ridx[ jx ]= i; \
            coef[ jx ]= tmpval; \
            jx++; \
         } \
      } \
      cidx[j+1]= jx; \
   } \
   maybe_shrink( jx, nnz, ridx, coef ); \
   SuperMatrix X= create_SuperMatrix( Anr, Anc, jx, coef, ridx, cidx );


//
// Next three functions
//
// Fix the row ordering problems that
// LUExtract seems to cause
// NOTE: we don't try to fix other structural errors
//    in the generated matrices, but we bail out
//    if we find any. This should work since I 
//    haven't seen any problems other than ordering
//
// NOTE: The right way to fix this, of course, is to
//    track down the bug in the superlu codebase.

// define a struct and qsort
// comparison function to do the reordering
#define FIX_ROW_ORDER_SORT_FUNCTIONS( TYPE ) \
typedef struct { unsigned int idx; \
                 TYPE         val; } fixrow_sort; \
static inline int \
fixrow_comp( const void *i, const void *j)  \
{ \
   return  ((fixrow_sort *) i)->idx - \
           ((fixrow_sort *) j)->idx ; \
}

// this define ends function like
// void
// fix_row_order ## TYPE( SuperMatrix X )
// {
//    DEBUGMSG("fix_row_order");
//    DEFINE_SP_POINTERS_REAL( X )
//    FIX_ROW_ORDER_FUNCTIONS( TYPE )
// }

#define FIX_ROW_ORDER_FUNCTIONS \
   int    nnz= NCFX->nnz; \
   for ( int i=0; i < Xnr ; i++) { \
      OCTAVE_QUIT; \
      assert( cidxX[i] >= 0.); \
      assert( cidxX[i] <  nnz); \
      assert( cidxX[i] <=  cidxX[i+1]); \
      int reorder=0; \
      for( int j= cidxX[i]; \
               j< cidxX[i+1]; \
               j++ ) { \
         assert( ridxX[j] >= 0.); \
         assert( ridxX[j] <  Xnc); \
         assert( coefX[j] !=  0.); /* don't keep zero values */ \
         if (j> cidxX[i]) \
            if ( ridxX[j-1] > ridxX[j] ) \
               reorder=1; \
      } /* for j */ \
      if(reorder) { \
         int snum= cidxX[i+1] - cidxX[i]; \
         fixrow_sort arry[snum]; \
         /* copy to the sort struct */ \
         for( int k=0, \
                  j= cidxX[i]; \
                  j< cidxX[i+1]; \
                  j++ ) { \
            arry[k].idx= ridxX[j]; \
            arry[k].val= coefX[j]; \
            k++; \
         } \
         OCTAVE_QUIT; \
         qsort( arry, snum, sizeof(fixrow_sort), fixrow_comp ); \
         OCTAVE_QUIT; \
         /* copy back to the position */ \
         for( int k=0, \
                  j= cidxX[i]; \
                  j< cidxX[i+1]; \
                  j++ ) { \
            ridxX[j]= arry[k].idx; \
            coefX[j]= arry[k].val; \
            k++; \
         } \
      } \
   } // for i 
// endof FIX_ROW_ORDER_FUNCTIONS


// calculate the inverse of an 
//  upper triangular sparse matrix
//
// iUt = inv(U)
//  Note that the transpose is returned
//
// CODE:
//
//  note that the input matrix is accesses in
//  row major order, and the output is accessed
//  in column major. This is the reason that the
//  output matrix is the transpose of the input
//
// I= eye( size(A) );
// for n=1:N  # across
//    for m= n+1:N # down
//       v=0;
//       for i= m-1:-1:n
//          v=v- A(m,i)*I(i,n);
//       end
//       I(m,n)= v/A(m,m);
//    end
//    I(:,n)= I(:,n)/A(n,n);    <- for non unit norm
// end   
//   
// we need to be careful here,
// U is uppertriangular, but we're treating
// it as though it is a lower triag matrix in NR form
//
// debug code
//          printf("n=%d, m=%d, X[%d]=%7.4f U[%d]=%7.4f cXp=%d cUp=%d v=%7.4f\n", 
//                n,m, rpX, coefX[ scolXp ], rpU, coefU[ scolUp ], 
//             scolXp,scolUp, v); 
#if 0   
// this is a pain - we can't use c++ constructors because
// dmalloc complains when they're free'd
   int * ridxX = new int  [nnz]; \
   TYPE* coefX = new TYPE [nnz]; \
   int * cidxX = new int  [Unc+1]; \

#endif   

#define SPARSE_INV_UPPERTRIANG( TYPE ) \
   assert ( Unc == Unr ); \
   /* estimate inverse nnz= input nnz */ \
   int     nnz = NCFU->nnz; \
   int * ridxX = (int * ) oct_sparse_malloc ( sizeof(int) *nnz); \
   TYPE* coefX = (TYPE* ) oct_sparse_malloc ( sizeof(TYPE)*nnz); \
   int * cidxX = (int * ) oct_sparse_malloc ( sizeof(int )*(Unc+1)); \
 \
   int cx= 0; \
 \
   /* iterate accross columns of output matrix */ \
   for ( int n=0; n < Unr ; n++) { \
      OCTAVE_QUIT; \
      /* place the 1 in the identity position */ \
      int cx_colstart= cx; \
 \
      cidxX[n]= cx; \
      check_bounds( cx, nnz, ridxX, coefX ); \
      ridxX[cx]= n; \
      coefX[cx]= 1.0; \
      cx++; \
 \
      /* iterate accross columns of input matrix */ \
      for ( int m= n+1; m< Unr; m++) { \
         TYPE v=0.; \
         /* iterate to calculate sum */ \
         int colXp= cidxX[n]; \
         int colUp= cidxU[m]; \
 \
         int rpX, rpU; \
         do { \
            OCTAVE_QUIT; \
            rpX= ridxX [ colXp ]; \
            rpU= ridxU [ colUp ]; \
 \
            if (rpX < rpU) { \
               colXp++; \
            } else \
            if (rpX > rpU) { \
               colUp++; \
            } else { \
               assert(rpX == rpU); \
               assert(rpX >= n); \
 \
               v-= coefX[ colXp ]*coefU[ colUp ]; \
               colXp++;  \
               colUp++; \
            }  \
 \
         } while ((rpX<m) && (rpU<m) && (colXp<cx) && (colUp<nnzU)); \
 \
         /* get A(m,m) */ \
         colUp= cidxU[m+1]-1; \
         assert (ridxU[ colUp ] == m ); /* assert  U is upper triangular */ \
 \
         TYPE pivot= coefU[ colUp ]; \
         if (pivot == 0.) gripe_divide_by_zero (); \
 \
         if (v!=0.) { \
            check_bounds( cx, nnz, ridxX, coefX ); \
            ridxX[cx]= m; \
            coefX[cx]= v / pivot; \
            cx++; \
         } \
      } /* for m */ \
 \
      /* get A(m,m) */ \
      int colUp= cidxU[n+1]-1; \
      assert (ridxU[ colUp ] == n ); /* assert U upper triangular */ \
      TYPE pivot= coefU[ colUp ]; \
      if (pivot == 0.) gripe_divide_by_zero (); \
 \
      if (pivot!=1.0) \
         for( int i= cx_colstart; i< cx; i++)  \
            coefX[i]/= pivot; \
   } /* for n */ \
   cidxX[Unr]= cx; \
   maybe_shrink( cx, nnz, ridxX, coefX );