PageRenderTime 26ms CodeModel.GetById 6ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/R2002-11-30/octave-forge/main/sparse/sparse_ops.h

#
C Header | 528 lines | 371 code | 17 blank | 140 comment | 102 complexity | 04bfd70eb5c9d20e99c2a4b22dad5bc5 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, GPL-3.0, LGPL-3.0
  1. /*
  2. Copyright (C) 1999 Andy Adler
  3. This program is free software; you can redistribute it and/or modify it
  4. under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 2, or (at your option)
  6. any later version.
  7. This program is distributed in the hope that it will be useful, but
  8. WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with Octave; see the file COPYING. If not, write to the Free
  13. Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  14. $Id: sparse_ops.h 663 2002-11-27 04:46:42Z pkienzle $
  15. $Log$
  16. Revision 1.6 2002/11/27 04:46:42 pkienzle
  17. Use new exception handling infrastructure.
  18. Revision 1.5 2002/01/04 15:53:57 pkienzle
  19. Changes required to compile for gcc-3.0 in debian hppa/unstable
  20. Revision 1.4 2001/11/04 19:54:49 aadler
  21. fix bug with multiple entries in sparse creation.
  22. Added "summation" mode for matrix creation
  23. Revision 1.3 2001/10/14 03:06:31 aadler
  24. fixed memory leak in complex sparse solve
  25. fixed malloc bugs for zero size allocs
  26. Revision 1.2 2001/10/12 02:24:28 aadler
  27. Mods to fix bugs
  28. add support for all zero sparse matrices
  29. add support fom complex sparse inverse
  30. Revision 1.2 2001/04/04 02:13:46 aadler
  31. complete complex_sparse, templates, fix memory leaks
  32. Revision 1.1 2001/03/30 04:34:23 aadler
  33. "template" functions for sparse operations
  34. */
  35. // I would like to do this with templates,
  36. // but I don't think you can specify operators
  37. //
  38. // TYPX -> output type ( Complex or double)
  39. // _OP_ -> operation to implement ( + , - , != , .* )
  40. // A_B_INTERACT -> evaluate operations where A or B ==0
  41. //
  42. // I'm assuming that compiler optimization will remove
  43. // the if (0) and x+0 operations
  44. #define SPARSE_EL_OP( TYPX, _OP_, A_B_INTERACT ) \
  45. SuperMatrix X; \
  46. if ( (Anc != Bnc) || (Anr != Bnr) ) { \
  47. gripe_nonconformant ("operator " #_OP_, Anr, Anc, Bnr, Bnc); \
  48. } else { \
  49. assert(Anr == Bnr); assert(Anc == Bnc); \
  50. TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz * sizeof(TYPX)); \
  51. int * ridxX= (int *)oct_sparse_malloc( nnz * sizeof(int) ); \
  52. int * cidxX= (int *)oct_sparse_malloc((Anc+1)*sizeof(int)); cidxX[0]= 0; \
  53. \
  54. int jx= 0; \
  55. for (int i= 0 ; i < Anc ; i++ ) { \
  56. int ja= cidxA[ i ]; \
  57. int ja_max= cidxA[ i+1 ]; \
  58. bool ja_lt_max= ja < ja_max; \
  59. \
  60. int jb= cidxB[ i ]; \
  61. int jb_max= cidxB[ i+1 ]; \
  62. bool jb_lt_max= jb < jb_max; \
  63. \
  64. while( ja_lt_max || jb_lt_max ) { \
  65. OCTAVE_QUIT; \
  66. if( ( !jb_lt_max ) || \
  67. ( ja_lt_max && (ridxA[ja] < ridxB[jb]) ) ) \
  68. { \
  69. if (A_B_INTERACT) { \
  70. ridxX[ jx ] = ridxA[ja]; \
  71. coefX[ jx ] = coefA[ ja ] _OP_ 0.0; \
  72. jx++; \
  73. } \
  74. ja++; ja_lt_max= ja < ja_max; \
  75. } else \
  76. if( ( !ja_lt_max ) || \
  77. ( jb_lt_max && (ridxB[jb] < ridxA[ja]) ) ) \
  78. { \
  79. if (A_B_INTERACT) { \
  80. ridxX[ jx ] = ridxB[jb]; \
  81. coefX[ jx ] = 0.0 _OP_ coefB[ jb ]; \
  82. jx++; \
  83. } \
  84. jb++; jb_lt_max= jb < jb_max; \
  85. } else \
  86. { \
  87. assert( ridxA[ja] == ridxB[jb] ); \
  88. TYPX tmpval= coefA[ ja ] _OP_ coefB[ jb ]; \
  89. if (tmpval !=0.0) { \
  90. coefX[ jx ] = tmpval; \
  91. ridxX[ jx ] = ridxA[ja]; \
  92. jx++; \
  93. } \
  94. ja++; ja_lt_max= ja < ja_max; \
  95. jb++; jb_lt_max= jb < jb_max; \
  96. } \
  97. } \
  98. cidxX[i+1] = jx; \
  99. } \
  100. maybe_shrink( jx, nnz, ridxX, coefX ); \
  101. X= create_SuperMatrix( Anr, Anc, jx, coefX, ridxX, cidxX ); \
  102. }
  103. #define SPARSE_MATRIX_EL_OP( TYPX, _OP_ ) \
  104. SuperMatrix X; \
  105. if ( (Anc != Bnc) || (Anr != Bnr) ) { \
  106. gripe_nonconformant ("operator .*", Anr, Anc, Bnr, Bnc); \
  107. } else { \
  108. assert(Anr == Bnr); assert(Anc == Bnc); \
  109. TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz * sizeof(TYPX)); \
  110. int * ridxX= (int *)oct_sparse_malloc( nnz * sizeof(int) ); \
  111. int * cidxX= (int *)oct_sparse_malloc((Anc+1)*sizeof(int)); cidxX[0]= 0; \
  112. \
  113. int cx= 0; \
  114. for (int i=0; i < Anc ; i++) { \
  115. for (int j= cidxA[i] ; \
  116. j< cidxA[i+1]; j++) { \
  117. OCTAVE_QUIT; \
  118. int rowidx = ridxA[j]; \
  119. TYPX tmpval = B(rowidx,i); \
  120. if (tmpval != 0.0) { \
  121. ridxX[ cx ] = rowidx; \
  122. coefX[ cx ] = tmpval * coefA[ j ]; \
  123. cx++; \
  124. } \
  125. } \
  126. cidxX[i+1] = cx; \
  127. } \
  128. maybe_shrink( cx, nnz, ridxX, coefX ); \
  129. X= create_SuperMatrix( Anr, Anc, cx, coefX, ridxX, cidxX ); \
  130. }
  131. // multiply type ops
  132. // TYPM is the output matrix type
  133. // TYPX is the sparse matrix type
  134. #define SPARSE_MATRIX_MUL( TYPM, TYPX) \
  135. TYPM X (Anr , Bnc); \
  136. if (Anc != Bnr) { \
  137. gripe_nonconformant ("operator *", Anr, Anc, Bnr, Bnc); \
  138. } else { \
  139. assert (Anc == Bnr); \
  140. for (int i=0; i< Anr; i++ ) \
  141. for (int j=0; j< Bnc; j++ ) \
  142. X.elem(i,j)=0.; \
  143. for ( int i=0; i < Anc ; i++) { \
  144. for ( int j= cidxA[i]; j< cidxA[i+1]; j++) { \
  145. OCTAVE_QUIT; \
  146. int col = ridxA[j]; \
  147. TYPX tmpval = coefA[j]; \
  148. for ( int k=0 ; k< Bnc; k++) { \
  149. X.elem(col , k)+= tmpval * B(i,k); \
  150. } \
  151. } \
  152. } \
  153. }
  154. #define MATRIX_SPARSE_MUL( TYPM, TYPX ) \
  155. TYPM X (Anr , Bnc); \
  156. if (Anc != Bnr) { \
  157. gripe_nonconformant ("operator *", Anr, Anc, Bnr, Bnc); \
  158. } else { \
  159. assert (Anc == Bnr); \
  160. for (int i=0; i< Anr; i++ ) \
  161. for (int j=0; j< Bnc; j++ ) \
  162. X.elem(i,j)=0.; \
  163. for ( int i=0; i < Bnc ; i++) { \
  164. for ( int j= cidxB[i]; j< cidxB[i+1]; j++) { \
  165. OCTAVE_QUIT; \
  166. int col = ridxB[j]; \
  167. TYPX tmpval = coefB[j]; \
  168. for ( int k=0 ; k< Anr; k++) { \
  169. X(k, i)+= A(k,col) * tmpval; \
  170. } \
  171. } \
  172. } \
  173. }
  174. //
  175. // Multiply sparse by sparse, element by element
  176. // This algorithm allocates a full column of the output
  177. // matrix. Hopefully that won't be a storage problem.
  178. //
  179. // TODO: allocate a row or column depending on the larger
  180. // dimention.
  181. //
  182. // I'm sure there are good sparse multiplication algorithms
  183. // available in the litterature. I invented this one
  184. // to fill a gap. Tell me if you know of better ones.
  185. //
  186. #define SPARSE_SPARSE_MUL( TYPX ) \
  187. SuperMatrix X; \
  188. if (Anc != Bnr) { \
  189. gripe_nonconformant ("operator *", Anr, Anc, Bnr, Bnc); \
  190. } else { \
  191. assert (Anc == Bnr ); \
  192. int nnz = NCFA->nnz + NCFB->nnz ; \
  193. TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz * sizeof(TYPX)); \
  194. int * ridxX= (int *)oct_sparse_malloc( nnz * sizeof(int) ); \
  195. int * cidxX= (int *)oct_sparse_malloc((Bnc+1)*sizeof(int)); cidxX[0]= 0; \
  196. \
  197. TYPX * Xcol= (TYPX*)oct_sparse_malloc( Anr * sizeof(TYPX)); \
  198. int cx= 0; \
  199. for ( int i=0; i < Bnc ; i++) { \
  200. OCTAVE_QUIT; \
  201. for (int k=0; k<Anr; k++) Xcol[k]= 0.; \
  202. for (int j= cidxB[i]; j< cidxB[i+1]; j++) { \
  203. int col= ridxB[j]; \
  204. TYPX tmpval = coefB[j]; \
  205. for (int k= cidxA[col] ; k< cidxA[col+1]; k++) \
  206. Xcol[ ridxA[k] ]+= tmpval * coefA[k]; \
  207. } \
  208. for (int k=0; k<Anr; k++) { \
  209. if ( Xcol[k] !=0. ) { \
  210. check_bounds( cx, nnz, ridxX, coefX ); \
  211. ridxX[ cx ]= k; \
  212. coefX[ cx ]= Xcol[k]; \
  213. cx++; \
  214. } \
  215. } \
  216. cidxX[i+1] = cx; \
  217. } \
  218. free( Xcol ); \
  219. maybe_shrink( cx, nnz, ridxX, coefX ); \
  220. X= create_SuperMatrix( Anr, Bnc, cx, coefX, ridxX, cidxX ); \
  221. }
  222. // assemble a sparse matrix from elements
  223. // called by > 1 args for sparse
  224. // NOTE: index vectors are 1 based!
  225. //
  226. // NOTE2: be careful about when we convert ri to int,
  227. // otherwise the maximum matrix size will be m*n < maxint/2
  228. #define ASSEMBLE_SPARSE( TYPX ) \
  229. int nnz= MAX( ridxA.length(), cidxA.length() ); \
  230. TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz * sizeof(TYPX)); \
  231. int * ridxX= (int *)oct_sparse_malloc( nnz * sizeof(int) ); \
  232. int * cidxX= (int *)oct_sparse_malloc( (n+1)* sizeof(int)); cidxX[0]= 0; \
  233. \
  234. bool ri_scalar = (ridxA.length() == 1); \
  235. bool ci_scalar = (cidxA.length() == 1); \
  236. bool cf_scalar = (coefA.length() == 1); \
  237. \
  238. sort_idxl sidx[ nnz ]; \
  239. OCTAVE_QUIT; \
  240. for (int i=0; i<nnz; i++) { \
  241. sidx[i].val = (long) ( \
  242. ( ri_scalar ? ridxA(0) : ridxA(i) ) - 1 + \
  243. m * (( ci_scalar ? cidxA(0) : cidxA(i) ) - 1) ); \
  244. sidx[i].idx = i; \
  245. } \
  246. \
  247. OCTAVE_QUIT; \
  248. qsort( sidx, nnz, sizeof(sort_idxl), sidxl_comp ); \
  249. \
  250. OCTAVE_QUIT; \
  251. int cx= 0; \
  252. long prev_val=-1; \
  253. int ii= -1; \
  254. for (int i=0; i<nnz; i++) { \
  255. OCTAVE_QUIT; \
  256. long idx= (long) sidx[i].idx; \
  257. long val= (long) sidx[i].val; \
  258. if (prev_val < val) { \
  259. ii++; \
  260. coefX[ii]= ( cf_scalar ? coefA(0) : coefA(idx) ); \
  261. double ri = ( ri_scalar ? ridxA(0) : ridxA(idx) ) - 1 ; \
  262. ridxX[ii]= (long) (ri - ((long) (ri/m))*m ) ; \
  263. long ci = (long)( ci_scalar ? cidxA(0) : cidxA(idx) ) - 1 ; \
  264. while( cx < ci ) cidxX[++cx]= ii; \
  265. } else { \
  266. if (assemble_do_sum) \
  267. coefX[ii]+= ( cf_scalar ? coefA(0) : coefA(idx) ); \
  268. else \
  269. coefX[ii] = ( cf_scalar ? coefA(0) : coefA(idx) ); \
  270. } \
  271. prev_val= val;\
  272. } \
  273. while( cx < n ) cidxX[++cx]= ii+1; \
  274. OCTAVE_QUIT; \
  275. maybe_shrink( ii+1, nnz, ridxX, coefX ); \
  276. \
  277. OCTAVE_QUIT; \
  278. SuperMatrix X= create_SuperMatrix( m, n, ii+1, coefX, ridxX, cidxX );
  279. // assemble a sparse matrix from full
  280. // called by one arg for sparse
  281. // start with an initial estimate for nnz and
  282. // work with it.
  283. #define MATRIX_TO_SPARSE( TYPX ) \
  284. int nnz= 100; \
  285. TYPX * coef = (TYPX *) oct_sparse_malloc ( nnz * sizeof(TYPX) ); \
  286. int * ridx = (int *) oct_sparse_malloc ( nnz * sizeof(int) ); \
  287. int * cidx = (int *) oct_sparse_malloc ((Anc+1)* sizeof(int) ); cidx[0]= 0; \
  288. int jx=0; \
  289. for (int j= 0; j<Anc ; j++ ) { \
  290. for (int i= 0; i<Anr ; i++ ) { \
  291. OCTAVE_QUIT; \
  292. TYPX tmpval= A(i,j); \
  293. if (tmpval != 0.) { \
  294. check_bounds( jx, nnz, ridx, coef ); \
  295. ridx[ jx ]= i; \
  296. coef[ jx ]= tmpval; \
  297. jx++; \
  298. } \
  299. } \
  300. cidx[j+1]= jx; \
  301. } \
  302. maybe_shrink( jx, nnz, ridx, coef ); \
  303. SuperMatrix X= create_SuperMatrix( Anr, Anc, jx, coef, ridx, cidx );
  304. //
  305. // Next three functions
  306. //
  307. // Fix the row ordering problems that
  308. // LUExtract seems to cause
  309. // NOTE: we don't try to fix other structural errors
  310. // in the generated matrices, but we bail out
  311. // if we find any. This should work since I
  312. // haven't seen any problems other than ordering
  313. //
  314. // NOTE: The right way to fix this, of course, is to
  315. // track down the bug in the superlu codebase.
  316. // define a struct and qsort
  317. // comparison function to do the reordering
  318. #define FIX_ROW_ORDER_SORT_FUNCTIONS( TYPE ) \
  319. typedef struct { unsigned int idx; \
  320. TYPE val; } fixrow_sort; \
  321. static inline int \
  322. fixrow_comp( const void *i, const void *j) \
  323. { \
  324. return ((fixrow_sort *) i)->idx - \
  325. ((fixrow_sort *) j)->idx ; \
  326. }
  327. // this define ends function like
  328. // void
  329. // fix_row_order ## TYPE( SuperMatrix X )
  330. // {
  331. // DEBUGMSG("fix_row_order");
  332. // DEFINE_SP_POINTERS_REAL( X )
  333. // FIX_ROW_ORDER_FUNCTIONS( TYPE )
  334. // }
  335. #define FIX_ROW_ORDER_FUNCTIONS \
  336. int nnz= NCFX->nnz; \
  337. for ( int i=0; i < Xnr ; i++) { \
  338. OCTAVE_QUIT; \
  339. assert( cidxX[i] >= 0.); \
  340. assert( cidxX[i] < nnz); \
  341. assert( cidxX[i] <= cidxX[i+1]); \
  342. int reorder=0; \
  343. for( int j= cidxX[i]; \
  344. j< cidxX[i+1]; \
  345. j++ ) { \
  346. assert( ridxX[j] >= 0.); \
  347. assert( ridxX[j] < Xnc); \
  348. assert( coefX[j] != 0.); /* don't keep zero values */ \
  349. if (j> cidxX[i]) \
  350. if ( ridxX[j-1] > ridxX[j] ) \
  351. reorder=1; \
  352. } /* for j */ \
  353. if(reorder) { \
  354. int snum= cidxX[i+1] - cidxX[i]; \
  355. fixrow_sort arry[snum]; \
  356. /* copy to the sort struct */ \
  357. for( int k=0, \
  358. j= cidxX[i]; \
  359. j< cidxX[i+1]; \
  360. j++ ) { \
  361. arry[k].idx= ridxX[j]; \
  362. arry[k].val= coefX[j]; \
  363. k++; \
  364. } \
  365. OCTAVE_QUIT; \
  366. qsort( arry, snum, sizeof(fixrow_sort), fixrow_comp ); \
  367. OCTAVE_QUIT; \
  368. /* copy back to the position */ \
  369. for( int k=0, \
  370. j= cidxX[i]; \
  371. j< cidxX[i+1]; \
  372. j++ ) { \
  373. ridxX[j]= arry[k].idx; \
  374. coefX[j]= arry[k].val; \
  375. k++; \
  376. } \
  377. } \
  378. } // for i
  379. // endof FIX_ROW_ORDER_FUNCTIONS
  380. // calculate the inverse of an
  381. // upper triangular sparse matrix
  382. //
  383. // iUt = inv(U)
  384. // Note that the transpose is returned
  385. //
  386. // CODE:
  387. //
  388. // note that the input matrix is accesses in
  389. // row major order, and the output is accessed
  390. // in column major. This is the reason that the
  391. // output matrix is the transpose of the input
  392. //
  393. // I= eye( size(A) );
  394. // for n=1:N # across
  395. // for m= n+1:N # down
  396. // v=0;
  397. // for i= m-1:-1:n
  398. // v=v- A(m,i)*I(i,n);
  399. // end
  400. // I(m,n)= v/A(m,m);
  401. // end
  402. // I(:,n)= I(:,n)/A(n,n); <- for non unit norm
  403. // end
  404. //
  405. // we need to be careful here,
  406. // U is uppertriangular, but we're treating
  407. // it as though it is a lower triag matrix in NR form
  408. //
  409. // debug code
  410. // printf("n=%d, m=%d, X[%d]=%7.4f U[%d]=%7.4f cXp=%d cUp=%d v=%7.4f\n",
  411. // n,m, rpX, coefX[ scolXp ], rpU, coefU[ scolUp ],
  412. // scolXp,scolUp, v);
  413. #if 0
  414. // this is a pain - we can't use c++ constructors because
  415. // dmalloc complains when they're free'd
  416. int * ridxX = new int [nnz]; \
  417. TYPE* coefX = new TYPE [nnz]; \
  418. int * cidxX = new int [Unc+1]; \
  419. #endif
  420. #define SPARSE_INV_UPPERTRIANG( TYPE ) \
  421. assert ( Unc == Unr ); \
  422. /* estimate inverse nnz= input nnz */ \
  423. int nnz = NCFU->nnz; \
  424. int * ridxX = (int * ) oct_sparse_malloc ( sizeof(int) *nnz); \
  425. TYPE* coefX = (TYPE* ) oct_sparse_malloc ( sizeof(TYPE)*nnz); \
  426. int * cidxX = (int * ) oct_sparse_malloc ( sizeof(int )*(Unc+1)); \
  427. \
  428. int cx= 0; \
  429. \
  430. /* iterate accross columns of output matrix */ \
  431. for ( int n=0; n < Unr ; n++) { \
  432. OCTAVE_QUIT; \
  433. /* place the 1 in the identity position */ \
  434. int cx_colstart= cx; \
  435. \
  436. cidxX[n]= cx; \
  437. check_bounds( cx, nnz, ridxX, coefX ); \
  438. ridxX[cx]= n; \
  439. coefX[cx]= 1.0; \
  440. cx++; \
  441. \
  442. /* iterate accross columns of input matrix */ \
  443. for ( int m= n+1; m< Unr; m++) { \
  444. TYPE v=0.; \
  445. /* iterate to calculate sum */ \
  446. int colXp= cidxX[n]; \
  447. int colUp= cidxU[m]; \
  448. \
  449. int rpX, rpU; \
  450. do { \
  451. OCTAVE_QUIT; \
  452. rpX= ridxX [ colXp ]; \
  453. rpU= ridxU [ colUp ]; \
  454. \
  455. if (rpX < rpU) { \
  456. colXp++; \
  457. } else \
  458. if (rpX > rpU) { \
  459. colUp++; \
  460. } else { \
  461. assert(rpX == rpU); \
  462. assert(rpX >= n); \
  463. \
  464. v-= coefX[ colXp ]*coefU[ colUp ]; \
  465. colXp++; \
  466. colUp++; \
  467. } \
  468. \
  469. } while ((rpX<m) && (rpU<m) && (colXp<cx) && (colUp<nnzU)); \
  470. \
  471. /* get A(m,m) */ \
  472. colUp= cidxU[m+1]-1; \
  473. assert (ridxU[ colUp ] == m ); /* assert U is upper triangular */ \
  474. \
  475. TYPE pivot= coefU[ colUp ]; \
  476. if (pivot == 0.) gripe_divide_by_zero (); \
  477. \
  478. if (v!=0.) { \
  479. check_bounds( cx, nnz, ridxX, coefX ); \
  480. ridxX[cx]= m; \
  481. coefX[cx]= v / pivot; \
  482. cx++; \
  483. } \
  484. } /* for m */ \
  485. \
  486. /* get A(m,m) */ \
  487. int colUp= cidxU[n+1]-1; \
  488. assert (ridxU[ colUp ] == n ); /* assert U upper triangular */ \
  489. TYPE pivot= coefU[ colUp ]; \
  490. if (pivot == 0.) gripe_divide_by_zero (); \
  491. \
  492. if (pivot!=1.0) \
  493. for( int i= cx_colstart; i< cx; i++) \
  494. coefX[i]/= pivot; \
  495. } /* for n */ \
  496. cidxX[Unr]= cx; \
  497. maybe_shrink( cx, nnz, ridxX, coefX );