PageRenderTime 94ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/R2004-09-09/octave-forge/main/sparse/sparse_ops.h

#
C Header | 623 lines | 427 code | 21 blank | 175 comment | 117 complexity | 59801b87960804a2be55c0cd1b2f3aac MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, GPL-3.0, LGPL-3.0
  1. /*
  2. Copyright (C) 1999 Andy Adler
  3. This program is free software; you can redistribute it and/or modify it
  4. under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 2, or (at your option)
  6. any later version.
  7. This program is distributed in the hope that it will be useful, but
  8. WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with Octave; see the file COPYING. If not, write to the Free
  13. Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  14. $Id: sparse_ops.h 1697 2004-08-31 15:23:45Z adb014 $
  15. $Log$
  16. Revision 1.14 2004/08/31 15:23:45 adb014
  17. Small build fix for the macro SPARSE_RESIZE
  18. Revision 1.13 2004/08/25 16:13:57 adb014
  19. Working, but inefficient, concatentaion code
  20. Revision 1.12 2004/08/03 14:45:31 aadler
  21. clean up ASSEMBLE_SPARSE macro
  22. Revision 1.11 2003/12/22 15:13:23 pkienzle
  23. Use error/return rather than SP_FATAL_ERROR where possible.
  24. Test for zero elements from scalar multiply/power and shrink sparse
  25. accordingly; accomodate libstdc++ bugs with mixed real/complex power.
  26. Revision 1.10 2003/10/13 03:44:26 aadler
  27. check for out of bounds in allocation
  28. Revision 1.9 2003/04/03 22:06:41 aadler
  29. sparse create bug - need to use heap for large temp vars
  30. Revision 1.8 2003/02/20 23:03:59 pkienzle
  31. Use of "T x[n]" where n is not constant is a g++ extension so replace it with
  32. OCTAVE_LOCAL_BUFFER(T,x,n), and other things to keep the picky MipsPRO CC
  33. compiler happy.
  34. Revision 1.7 2002/12/25 01:33:00 aadler
  35. fixed bug which allowed zero values to be stored in sparse matrices.
  36. improved print output
  37. Revision 1.6 2002/11/27 04:46:42 pkienzle
  38. Use new exception handling infrastructure.
  39. Revision 1.5 2002/01/04 15:53:57 pkienzle
  40. Changes required to compile for gcc-3.0 in debian hppa/unstable
  41. Revision 1.4 2001/11/04 19:54:49 aadler
  42. fix bug with multiple entries in sparse creation.
  43. Added "summation" mode for matrix creation
  44. Revision 1.3 2001/10/14 03:06:31 aadler
  45. fixed memory leak in complex sparse solve
  46. fixed malloc bugs for zero size allocs
  47. Revision 1.2 2001/10/12 02:24:28 aadler
  48. Mods to fix bugs
  49. add support for all zero sparse matrices
  50. add support fom complex sparse inverse
  51. Revision 1.2 2001/04/04 02:13:46 aadler
  52. complete complex_sparse, templates, fix memory leaks
  53. Revision 1.1 2001/03/30 04:34:23 aadler
  54. "template" functions for sparse operations
  55. */
  56. // I would like to do this with templates,
  57. // but I don't think you can specify operators
  58. //
  59. // TYPX -> output type ( Complex or double)
  60. // _OP_ -> operation to implement ( + , - , != , .* )
  61. // A_B_INTERACT -> evaluate operations where A or B ==0
  62. //
  63. // I'm assuming that compiler optimization will remove
  64. // the if (0) and x+0 operations
  65. #define SPARSE_EL_OP( TYPX, _OP_, A_B_INTERACT ) \
  66. SuperMatrix X; \
  67. if ( (Anc != Bnc) || (Anr != Bnr) ) { \
  68. gripe_nonconformant ("operator " #_OP_, Anr, Anc, Bnr, Bnc); \
  69. } else { \
  70. assert(Anr == Bnr); assert(Anc == Bnc); \
  71. TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz * sizeof(TYPX)); \
  72. int * ridxX= (int *)oct_sparse_malloc( nnz * sizeof(int) ); \
  73. int * cidxX= (int *)oct_sparse_malloc((Anc+1)*sizeof(int)); cidxX[0]= 0; \
  74. \
  75. int jx= 0; \
  76. for (int i= 0 ; i < Anc ; i++ ) { \
  77. int ja= cidxA[ i ]; \
  78. int ja_max= cidxA[ i+1 ]; \
  79. bool ja_lt_max= ja < ja_max; \
  80. \
  81. int jb= cidxB[ i ]; \
  82. int jb_max= cidxB[ i+1 ]; \
  83. bool jb_lt_max= jb < jb_max; \
  84. \
  85. while( ja_lt_max || jb_lt_max ) { \
  86. OCTAVE_QUIT; \
  87. if( ( !jb_lt_max ) || \
  88. ( ja_lt_max && (ridxA[ja] < ridxB[jb]) ) ) \
  89. { \
  90. if (A_B_INTERACT) { \
  91. ridxX[ jx ] = ridxA[ja]; \
  92. coefX[ jx ] = coefA[ ja ] _OP_ 0.0; \
  93. jx++; \
  94. } \
  95. ja++; ja_lt_max= ja < ja_max; \
  96. } else \
  97. if( ( !ja_lt_max ) || \
  98. ( jb_lt_max && (ridxB[jb] < ridxA[ja]) ) ) \
  99. { \
  100. if (A_B_INTERACT) { \
  101. ridxX[ jx ] = ridxB[jb]; \
  102. coefX[ jx ] = 0.0 _OP_ coefB[ jb ]; \
  103. jx++; \
  104. } \
  105. jb++; jb_lt_max= jb < jb_max; \
  106. } else \
  107. { \
  108. assert( ridxA[ja] == ridxB[jb] ); \
  109. TYPX tmpval= coefA[ ja ] _OP_ coefB[ jb ]; \
  110. if (tmpval !=0.0) { \
  111. coefX[ jx ] = tmpval; \
  112. ridxX[ jx ] = ridxA[ja]; \
  113. jx++; \
  114. } \
  115. ja++; ja_lt_max= ja < ja_max; \
  116. jb++; jb_lt_max= jb < jb_max; \
  117. } \
  118. } \
  119. cidxX[i+1] = jx; \
  120. } \
  121. maybe_shrink( jx, nnz, ridxX, coefX ); \
  122. X= create_SuperMatrix( Anr, Anc, jx, coefX, ridxX, cidxX ); \
  123. }
  124. #define SPARSE_MATRIX_EL_OP( TYPX, _OP_ ) \
  125. SuperMatrix X; \
  126. if ( (Anc != Bnc) || (Anr != Bnr) ) { \
  127. gripe_nonconformant ("operator .*", Anr, Anc, Bnr, Bnc); \
  128. } else { \
  129. assert(Anr == Bnr); assert(Anc == Bnc); \
  130. TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz * sizeof(TYPX)); \
  131. int * ridxX= (int *)oct_sparse_malloc( nnz * sizeof(int) ); \
  132. int * cidxX= (int *)oct_sparse_malloc((Anc+1)*sizeof(int)); cidxX[0]= 0; \
  133. \
  134. int cx= 0; \
  135. for (int i=0; i < Anc ; i++) { \
  136. for (int j= cidxA[i] ; \
  137. j< cidxA[i+1]; j++) { \
  138. OCTAVE_QUIT; \
  139. int rowidx = ridxA[j]; \
  140. TYPX tmpval = B(rowidx,i); \
  141. if (tmpval != 0.0) { \
  142. ridxX[ cx ] = rowidx; \
  143. coefX[ cx ] = tmpval * coefA[ j ]; \
  144. cx++; \
  145. } \
  146. } \
  147. cidxX[i+1] = cx; \
  148. } \
  149. maybe_shrink( cx, nnz, ridxX, coefX ); \
  150. X= create_SuperMatrix( Anr, Anc, cx, coefX, ridxX, cidxX ); \
  151. }
  152. // multiply type ops
  153. // TYPM is the output matrix type
  154. // TYPX is the sparse matrix type
  155. #define SPARSE_MATRIX_MUL( TYPM, TYPX) \
  156. TYPM X (Anr , Bnc); \
  157. if (Anc != Bnr) { \
  158. gripe_nonconformant ("operator *", Anr, Anc, Bnr, Bnc); \
  159. } else { \
  160. assert (Anc == Bnr); \
  161. for (int i=0; i< Anr; i++ ) \
  162. for (int j=0; j< Bnc; j++ ) \
  163. X.elem(i,j)=0.; \
  164. for ( int i=0; i < Anc ; i++) { \
  165. for ( int j= cidxA[i]; j< cidxA[i+1]; j++) { \
  166. OCTAVE_QUIT; \
  167. int col = ridxA[j]; \
  168. TYPX tmpval = coefA[j]; \
  169. for ( int k=0 ; k< Bnc; k++) { \
  170. X.elem(col , k)+= tmpval * B(i,k); \
  171. } \
  172. } \
  173. } \
  174. }
  175. #define MATRIX_SPARSE_MUL( TYPM, TYPX ) \
  176. TYPM X (Anr , Bnc); \
  177. if (Anc != Bnr) { \
  178. gripe_nonconformant ("operator *", Anr, Anc, Bnr, Bnc); \
  179. } else { \
  180. assert (Anc == Bnr); \
  181. for (int i=0; i< Anr; i++ ) \
  182. for (int j=0; j< Bnc; j++ ) \
  183. X.elem(i,j)=0.; \
  184. for ( int i=0; i < Bnc ; i++) { \
  185. for ( int j= cidxB[i]; j< cidxB[i+1]; j++) { \
  186. OCTAVE_QUIT; \
  187. int col = ridxB[j]; \
  188. TYPX tmpval = coefB[j]; \
  189. for ( int k=0 ; k< Anr; k++) { \
  190. X(k, i)+= A(k,col) * tmpval; \
  191. } \
  192. } \
  193. } \
  194. }
  195. //
  196. // Multiply sparse by sparse, element by element
  197. // This algorithm allocates a full column of the output
  198. // matrix. Hopefully that won't be a storage problem.
  199. //
  200. // TODO: allocate a row or column depending on the larger
  201. // dimention.
  202. //
  203. // I'm sure there are good sparse multiplication algorithms
  204. // available in the litterature. I invented this one
  205. // to fill a gap. Tell me if you know of better ones.
  206. //
  207. #define SPARSE_SPARSE_MUL( TYPX ) \
  208. SuperMatrix X; \
  209. if (Anc != Bnr) { \
  210. gripe_nonconformant ("operator *", Anr, Anc, Bnr, Bnc); \
  211. } else { \
  212. assert (Anc == Bnr ); \
  213. int nnz = NCFA->nnz + NCFB->nnz ; \
  214. TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz * sizeof(TYPX)); \
  215. int * ridxX= (int *)oct_sparse_malloc( nnz * sizeof(int) ); \
  216. int * cidxX= (int *)oct_sparse_malloc((Bnc+1)*sizeof(int)); cidxX[0]= 0; \
  217. \
  218. TYPX * Xcol= (TYPX*)oct_sparse_malloc( Anr * sizeof(TYPX)); \
  219. int cx= 0; \
  220. for ( int i=0; i < Bnc ; i++) { \
  221. OCTAVE_QUIT; \
  222. for (int k=0; k<Anr; k++) Xcol[k]= 0.; \
  223. for (int j= cidxB[i]; j< cidxB[i+1]; j++) { \
  224. int col= ridxB[j]; \
  225. TYPX tmpval = coefB[j]; \
  226. for (int k= cidxA[col] ; k< cidxA[col+1]; k++) \
  227. Xcol[ ridxA[k] ]+= tmpval * coefA[k]; \
  228. } \
  229. for (int k=0; k<Anr; k++) { \
  230. if ( Xcol[k] !=0. ) { \
  231. check_bounds( cx, nnz, ridxX, coefX ); \
  232. ridxX[ cx ]= k; \
  233. coefX[ cx ]= Xcol[k]; \
  234. cx++; \
  235. } \
  236. } \
  237. cidxX[i+1] = cx; \
  238. } \
  239. free( Xcol ); \
  240. maybe_shrink( cx, nnz, ridxX, coefX ); \
  241. X= create_SuperMatrix( Anr, Bnc, cx, coefX, ridxX, cidxX ); \
  242. }
  243. // assemble a sparse matrix from elements
  244. // called by > 1 args for sparse
  245. // NOTE: index vectors are 1 based!
  246. //
  247. // NOTE2: be careful about when we convert ri to int,
  248. // otherwise the maximum matrix size will be m*n < maxint/2
  249. //
  250. // The OCTAVE_LOCAL_BUFFER can't be used for the sort index
  251. // when the requested size is too big. we need to put it on the
  252. // heap.
  253. #define ASSEMBLE_SPARSE( TYPX , REJECT_OUT_OF_RANGE ) \
  254. int nnz= MAX( ridxA.length(), cidxA.length() ); \
  255. TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz * sizeof(TYPX)); \
  256. int * ridxX= (int *)oct_sparse_malloc( nnz * sizeof(int) ); \
  257. int * cidxX= (int *)oct_sparse_malloc( (n+1)* sizeof(int)); cidxX[0]= 0; \
  258. \
  259. bool ri_scalar = (ridxA.length() == 1); \
  260. bool ci_scalar = (cidxA.length() == 1); \
  261. bool cf_scalar = (coefA.length() == 1); \
  262. \
  263. sort_idxl* sidx = (sort_idxl *)oct_sparse_malloc( nnz* sizeof(sort_idxl) );\
  264. int actual_nnz=0; \
  265. OCTAVE_QUIT; \
  266. for (int i=0; i<nnz; i++) { \
  267. double rowidx= ( ri_scalar ? ridxA(0) : ridxA(i) ) - 1; \
  268. double colidx= ( ci_scalar ? cidxA(0) : cidxA(i) ) - 1; \
  269. if (rowidx < m && rowidx >= 0 && \
  270. colidx < n && colidx >= 0 ) { \
  271. if ( coefA( cf_scalar ? 0 : i ) != 0. ) { \
  272. sidx[actual_nnz].val = (long) ( rowidx + m * colidx ); \
  273. sidx[actual_nnz].idx = i; \
  274. actual_nnz++; \
  275. } \
  276. } \
  277. else { \
  278. if( REJECT_OUT_OF_RANGE ) { \
  279. error("sparse (row,col) index (%d,%d) out of range", rowidx,colidx);\
  280. actual_nnz=0; \
  281. break; \
  282. } \
  283. } \
  284. } \
  285. \
  286. OCTAVE_QUIT; \
  287. qsort( sidx, actual_nnz, sizeof(sort_idxl), sidxl_comp ); \
  288. \
  289. OCTAVE_QUIT; \
  290. int cx= 0; \
  291. long prev_val=-1; \
  292. int ii= -1; \
  293. for (int i=0; i<actual_nnz; i++) { \
  294. OCTAVE_QUIT; \
  295. long idx= (long) sidx[i].idx; \
  296. long val= (long) sidx[i].val; \
  297. if (prev_val < val) { \
  298. ii++; \
  299. coefX[ii]= coefA( cf_scalar ? 0 : idx ); \
  300. double ri = ridxA( ri_scalar ? 0 : idx ) - 1 ; \
  301. ridxX[ii]= (long) (ri - ((long) (ri/m))*m ) ; \
  302. long ci = (long) cidxA( ci_scalar ? 0 : idx ) - 1 ; \
  303. while( cx < ci ) cidxX[++cx]= ii; \
  304. } else { \
  305. if (assemble_do_sum) \
  306. coefX[ii]+= coefA( cf_scalar ? 0 : idx ); \
  307. else \
  308. coefX[ii] = coefA( cf_scalar ? 0 : idx ); \
  309. } \
  310. prev_val= val;\
  311. } \
  312. while( cx < n ) cidxX[++cx]= ii+1; \
  313. OCTAVE_QUIT; \
  314. maybe_shrink( ii+1, actual_nnz, ridxX, coefX ); \
  315. \
  316. OCTAVE_QUIT; \
  317. SuperMatrix X= create_SuperMatrix( m, n, ii+1, coefX, ridxX, cidxX ); \
  318. oct_sparse_free( sidx );
  319. // destructively resize a sparse matrix
  320. #define SPARSE_RESIZE( TYPX, VECTYP1, VECTYP2) \
  321. /* Special case the redimensioning to an empty matrix */ \
  322. if (dv.numel() == 0) \
  323. return new octave_sparse (create_SuperMatrix (0, 0, 0, (TYPX *)0, \
  324. 0, 0)); \
  325. \
  326. /* Special case the redimensioning of an empty matrix */ \
  327. NCformat * Xstore = (NCformat * ) X.Store; \
  328. if (Xstore->nnz == 0) \
  329. { \
  330. int * colptr = (int *) oct_sparse_malloc ((dv(1) + 1) * sizeof(int)); \
  331. for (int i = 0; i <= dv(1); i++) \
  332. colptr [i] = 0; \
  333. return new octave_sparse (create_SuperMatrix (dv(0), dv(1), 0, \
  334. (TYPX *)0, 0, colptr)); \
  335. } \
  336. \
  337. octave_value_list find_val= find(); \
  338. int n= (int) dv(1); \
  339. int m= (int) dv(0); \
  340. ColumnVector ridxA= find_val(0).column_vector_value(); \
  341. ColumnVector cidxA= find_val(1).column_vector_value(); \
  342. VECTYP1 ## ColumnVector coefA= find_val(2). VECTYP2 ## column_vector_value(); \
  343. int assemble_do_sum = 0; \
  344. ASSEMBLE_SPARSE( TYPX, false);
  345. // assemble a sparse matrix from full
  346. // called by one arg for sparse
  347. // start with an initial estimate for nnz and
  348. // work with it.
  349. #define MATRIX_TO_SPARSE( TYPX ) \
  350. int nnz= 100; \
  351. TYPX * coef = (TYPX *) oct_sparse_malloc ( nnz * sizeof(TYPX) ); \
  352. int * ridx = (int *) oct_sparse_malloc ( nnz * sizeof(int) ); \
  353. int * cidx = (int *) oct_sparse_malloc ((Anc+1)* sizeof(int) ); cidx[0]= 0; \
  354. int jx=0; \
  355. for (int j= 0; j<Anc ; j++ ) { \
  356. for (int i= 0; i<Anr ; i++ ) { \
  357. OCTAVE_QUIT; \
  358. TYPX tmpval= A(i,j); \
  359. if (tmpval != 0.) { \
  360. check_bounds( jx, nnz, ridx, coef ); \
  361. ridx[ jx ]= i; \
  362. coef[ jx ]= tmpval; \
  363. jx++; \
  364. } \
  365. } \
  366. cidx[j+1]= jx; \
  367. } \
  368. maybe_shrink( jx, nnz, ridx, coef ); \
  369. SuperMatrix X= create_SuperMatrix( Anr, Anc, jx, coef, ridx, cidx );
  370. //
  371. // Next three functions
  372. //
  373. // Fix the row ordering problems that
  374. // LUExtract seems to cause
  375. // NOTE: we don't try to fix other structural errors
  376. // in the generated matrices, but we bail out
  377. // if we find any. This should work since I
  378. // haven't seen any problems other than ordering
  379. //
  380. // NOTE: The right way to fix this, of course, is to
  381. // track down the bug in the superlu codebase.
  382. // define a struct and qsort
  383. // comparison function to do the reordering
  384. #define FIX_ROW_ORDER_SORT_FUNCTIONS( TYPE ) \
  385. typedef struct { unsigned int idx; \
  386. TYPE val; } fixrow_sort; \
  387. static inline int \
  388. fixrow_comp( const void *i, const void *j) \
  389. { \
  390. return ((fixrow_sort *) i)->idx - \
  391. ((fixrow_sort *) j)->idx ; \
  392. }
  393. // this define ends function like
  394. // void
  395. // fix_row_order ## TYPE( SuperMatrix X )
  396. // {
  397. // DEBUGMSG("fix_row_order");
  398. // DEFINE_SP_POINTERS_REAL( X )
  399. // FIX_ROW_ORDER_FUNCTIONS( TYPE )
  400. // }
  401. #define FIX_ROW_ORDER_FUNCTIONS \
  402. for ( int i=0; i < Xnr ; i++) { \
  403. OCTAVE_QUIT; \
  404. assert( cidxX[i] >= 0.); \
  405. assert( cidxX[i] <= NCFX->nnz); \
  406. assert( cidxX[i] <= cidxX[i+1]); \
  407. int reorder=0; \
  408. for( int j= cidxX[i]; \
  409. j< cidxX[i+1]; \
  410. j++ ) { \
  411. assert( ridxX[j] >= 0.); \
  412. assert( ridxX[j] < Xnc); \
  413. assert( coefX[j] != 0.); /* don't keep zero values */ \
  414. if (j> cidxX[i]) \
  415. if ( ridxX[j-1] > ridxX[j] ) \
  416. reorder=1; \
  417. } /* for j */ \
  418. if(reorder) { \
  419. int snum= cidxX[i+1] - cidxX[i]; \
  420. OCTAVE_LOCAL_BUFFER (fixrow_sort, arry, snum); \
  421. /* copy to the sort struct */ \
  422. for( int k=0, \
  423. j= cidxX[i]; \
  424. j< cidxX[i+1]; \
  425. j++ ) { \
  426. arry[k].idx= ridxX[j]; \
  427. arry[k].val= coefX[j]; \
  428. k++; \
  429. } \
  430. OCTAVE_QUIT; \
  431. qsort( arry, snum, sizeof(fixrow_sort), fixrow_comp ); \
  432. OCTAVE_QUIT; \
  433. /* copy back to the position */ \
  434. for( int k=0, \
  435. j= cidxX[i]; \
  436. j< cidxX[i+1]; \
  437. j++ ) { \
  438. ridxX[j]= arry[k].idx; \
  439. coefX[j]= arry[k].val; \
  440. k++; \
  441. } \
  442. } \
  443. } // for i
  444. // endof FIX_ROW_ORDER_FUNCTIONS
  445. // calculate the inverse of an
  446. // upper triangular sparse matrix
  447. //
  448. // iUt = inv(U)
  449. // Note that the transpose is returned
  450. //
  451. // CODE:
  452. //
  453. // note that the input matrix is accesses in
  454. // row major order, and the output is accessed
  455. // in column major. This is the reason that the
  456. // output matrix is the transpose of the input
  457. //
  458. // I= eye( size(A) );
  459. // for n=1:N # across
  460. // for m= n+1:N # down
  461. // v=0;
  462. // for i= m-1:-1:n
  463. // v=v- A(m,i)*I(i,n);
  464. // end
  465. // I(m,n)= v/A(m,m);
  466. // end
  467. // I(:,n)= I(:,n)/A(n,n); <- for non unit norm
  468. // end
  469. //
  470. // we need to be careful here,
  471. // U is uppertriangular, but we're treating
  472. // it as though it is a lower triag matrix in NR form
  473. //
  474. // debug code
  475. // printf("n=%d, m=%d, X[%d]=%7.4f U[%d]=%7.4f cXp=%d cUp=%d v=%7.4f\n",
  476. // n,m, rpX, coefX[ scolXp ], rpU, coefU[ scolUp ],
  477. // scolXp,scolUp, v);
  478. #if 0
  479. // this is a pain - we can't use c++ constructors because
  480. // dmalloc complains when they're free'd
  481. int * ridxX = new int [nnz]; \
  482. TYPE* coefX = new TYPE [nnz]; \
  483. int * cidxX = new int [Unc+1]; \
  484. #endif
  485. #define SPARSE_INV_UPPERTRIANG( TYPE ) \
  486. assert ( Unc == Unr ); \
  487. /* estimate inverse nnz= input nnz */ \
  488. int nnz = NCFU->nnz; \
  489. int * ridxX = (int * ) oct_sparse_malloc ( sizeof(int) *nnz); \
  490. TYPE* coefX = (TYPE* ) oct_sparse_malloc ( sizeof(TYPE)*nnz); \
  491. int * cidxX = (int * ) oct_sparse_malloc ( sizeof(int )*(Unc+1)); \
  492. \
  493. int cx= 0; \
  494. \
  495. /* iterate accross columns of output matrix */ \
  496. for ( int n=0; n < Unr ; n++) { \
  497. OCTAVE_QUIT; \
  498. /* place the 1 in the identity position */ \
  499. int cx_colstart= cx; \
  500. \
  501. cidxX[n]= cx; \
  502. check_bounds( cx, nnz, ridxX, coefX ); \
  503. ridxX[cx]= n; \
  504. coefX[cx]= 1.0; \
  505. cx++; \
  506. \
  507. /* iterate accross columns of input matrix */ \
  508. for ( int m= n+1; m< Unr; m++) { \
  509. TYPE v=0.; \
  510. /* iterate to calculate sum */ \
  511. int colXp= cidxX[n]; \
  512. int colUp= cidxU[m]; \
  513. \
  514. int rpX, rpU; \
  515. do { \
  516. OCTAVE_QUIT; \
  517. rpX= ridxX [ colXp ]; \
  518. rpU= ridxU [ colUp ]; \
  519. \
  520. if (rpX < rpU) { \
  521. colXp++; \
  522. } else \
  523. if (rpX > rpU) { \
  524. colUp++; \
  525. } else { \
  526. assert(rpX == rpU); \
  527. assert(rpX >= n); \
  528. \
  529. v-= coefX[ colXp ]*coefU[ colUp ]; \
  530. colXp++; \
  531. colUp++; \
  532. } \
  533. \
  534. } while ((rpX<m) && (rpU<m) && (colXp<cx) && (colUp<nnzU)); \
  535. \
  536. /* get A(m,m) */ \
  537. colUp= cidxU[m+1]-1; \
  538. assert (ridxU[ colUp ] == m ); /* assert U is upper triangular */ \
  539. \
  540. TYPE pivot= coefU[ colUp ]; \
  541. if (pivot == 0.) gripe_divide_by_zero (); \
  542. \
  543. if (v!=0.) { \
  544. check_bounds( cx, nnz, ridxX, coefX ); \
  545. ridxX[cx]= m; \
  546. coefX[cx]= v / pivot; \
  547. cx++; \
  548. } \
  549. } /* for m */ \
  550. \
  551. /* get A(m,m) */ \
  552. int colUp= cidxU[n+1]-1; \
  553. assert (ridxU[ colUp ] == n ); /* assert U upper triangular */ \
  554. TYPE pivot= coefU[ colUp ]; \
  555. if (pivot == 0.) gripe_divide_by_zero (); \
  556. \
  557. if (pivot!=1.0) \
  558. for( int i= cx_colstart; i< cx; i++) \
  559. coefX[i]/= pivot; \
  560. } /* for n */ \
  561. cidxX[Unr]= cx; \
  562. maybe_shrink( cx, nnz, ridxX, coefX );
  563. #ifdef HAVE_OCTAVE_CONCAT
  564. #define DEFCATOP_SPARSE_FN(name, ret, t1, t2, e1, e2, f) \
  565. CATOPDECL (name, a1, a2) \
  566. { \
  567. DEBUGMSG("CATOP ## name ## t1 ## t2 ## f"); \
  568. CAST_BINOP_ARGS (const octave_ ## t1&, const octave_ ## t2&); \
  569. SuperMatrix B = f (v1.e1 (), v2.e2 (), ra_idx); \
  570. if (error_state) \
  571. return new octave_ ## ret (); \
  572. else \
  573. return new octave_ ## ret (B); \
  574. }
  575. #define INSTALL_SPARSE_CATOP(t1, t2, f) INSTALL_CATOP(t1, t2, f)
  576. #else
  577. #define DEFCATOP_SPARSE_FN(name, ret, t1, t2, e1, e2, f)
  578. #define INSTALL_SPARSE_CATOP(t1, t2, f)
  579. #endif