PageRenderTime 28ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/branches/R2-1-x/octave-forge/main/sparse/sparse_ops.h

#
C++ Header | 647 lines | 441 code | 22 blank | 184 comment | 119 complexity | 0f35f1cfc16732b49809f12684761fcd MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, GPL-3.0, LGPL-3.0
  1. /*
  2. Copyright (C) 1999 Andy Adler
  3. This program is free software; you can redistribute it and/or modify it
  4. under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 2, or (at your option)
  6. any later version.
  7. This program is distributed in the hope that it will be useful, but
  8. WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with Octave; see the file COPYING. If not, write to the Free
  13. Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  14. $Id: sparse_ops.h 2276 2006-01-03 02:06:58Z pkienzle $
  15. $Log$
  16. Revision 1.17 2006/01/03 02:06:58 pkienzle
  17. Sparse row was being set to one in the constructor.
  18. Revision 1.16 2005/11/14 18:24:53 aadler
  19. Define INT64 type to properly handle large matrices
  20. Revision 1.15 2004/11/09 23:34:49 adb014
  21. Fix concatenation for recent octave core CVS changes
  22. Revision 1.14 2004/08/31 15:23:45 adb014
  23. Small build fix for the macro SPARSE_RESIZE
  24. Revision 1.13 2004/08/25 16:13:57 adb014
  25. Working, but inefficient, concatentaion code
  26. Revision 1.12 2004/08/03 14:45:31 aadler
  27. clean up ASSEMBLE_SPARSE macro
  28. Revision 1.11 2003/12/22 15:13:23 pkienzle
  29. Use error/return rather than SP_FATAL_ERROR where possible.
  30. Test for zero elements from scalar multiply/power and shrink sparse
  31. accordingly; accomodate libstdc++ bugs with mixed real/complex power.
  32. Revision 1.10 2003/10/13 03:44:26 aadler
  33. check for out of bounds in allocation
  34. Revision 1.9 2003/04/03 22:06:41 aadler
  35. sparse create bug - need to use heap for large temp vars
  36. Revision 1.8 2003/02/20 23:03:59 pkienzle
  37. Use of "T x[n]" where n is not constant is a g++ extension so replace it with
  38. OCTAVE_LOCAL_BUFFER(T,x,n), and other things to keep the picky MipsPRO CC
  39. compiler happy.
  40. Revision 1.7 2002/12/25 01:33:00 aadler
  41. fixed bug which allowed zero values to be stored in sparse matrices.
  42. improved print output
  43. Revision 1.6 2002/11/27 04:46:42 pkienzle
  44. Use new exception handling infrastructure.
  45. Revision 1.5 2002/01/04 15:53:57 pkienzle
  46. Changes required to compile for gcc-3.0 in debian hppa/unstable
  47. Revision 1.4 2001/11/04 19:54:49 aadler
  48. fix bug with multiple entries in sparse creation.
  49. Added "summation" mode for matrix creation
  50. Revision 1.3 2001/10/14 03:06:31 aadler
  51. fixed memory leak in complex sparse solve
  52. fixed malloc bugs for zero size allocs
  53. Revision 1.2 2001/10/12 02:24:28 aadler
  54. Mods to fix bugs
  55. add support for all zero sparse matrices
  56. add support fom complex sparse inverse
  57. Revision 1.2 2001/04/04 02:13:46 aadler
  58. complete complex_sparse, templates, fix memory leaks
  59. Revision 1.1 2001/03/30 04:34:23 aadler
  60. "template" functions for sparse operations
  61. */
  62. // I would like to do this with templates,
  63. // but I don't think you can specify operators
  64. //
  65. // TYPX -> output type ( Complex or double)
  66. // _OP_ -> operation to implement ( + , - , != , .* )
  67. // A_B_INTERACT -> evaluate operations where A or B ==0
  68. //
  69. // I'm assuming that compiler optimization will remove
  70. // the if (0) and x+0 operations
  71. #define SPARSE_EL_OP( TYPX, _OP_, A_B_INTERACT ) \
  72. SuperMatrix X; \
  73. if ( (Anc != Bnc) || (Anr != Bnr) ) { \
  74. gripe_nonconformant ("operator " #_OP_, Anr, Anc, Bnr, Bnc); \
  75. } else { \
  76. assert(Anr == Bnr); assert(Anc == Bnc); \
  77. TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz * sizeof(TYPX)); \
  78. int * ridxX= (int *)oct_sparse_malloc( nnz * sizeof(int) ); \
  79. int * cidxX= (int *)oct_sparse_malloc((Anc+1)*sizeof(int)); cidxX[0]= 0; \
  80. \
  81. int jx= 0; \
  82. for (int i= 0 ; i < Anc ; i++ ) { \
  83. int ja= cidxA[ i ]; \
  84. int ja_max= cidxA[ i+1 ]; \
  85. bool ja_lt_max= ja < ja_max; \
  86. \
  87. int jb= cidxB[ i ]; \
  88. int jb_max= cidxB[ i+1 ]; \
  89. bool jb_lt_max= jb < jb_max; \
  90. \
  91. while( ja_lt_max || jb_lt_max ) { \
  92. OCTAVE_QUIT; \
  93. if( ( !jb_lt_max ) || \
  94. ( ja_lt_max && (ridxA[ja] < ridxB[jb]) ) ) \
  95. { \
  96. if (A_B_INTERACT) { \
  97. ridxX[ jx ] = ridxA[ja]; \
  98. coefX[ jx ] = coefA[ ja ] _OP_ 0.0; \
  99. jx++; \
  100. } \
  101. ja++; ja_lt_max= ja < ja_max; \
  102. } else \
  103. if( ( !ja_lt_max ) || \
  104. ( jb_lt_max && (ridxB[jb] < ridxA[ja]) ) ) \
  105. { \
  106. if (A_B_INTERACT) { \
  107. ridxX[ jx ] = ridxB[jb]; \
  108. coefX[ jx ] = 0.0 _OP_ coefB[ jb ]; \
  109. jx++; \
  110. } \
  111. jb++; jb_lt_max= jb < jb_max; \
  112. } else \
  113. { \
  114. assert( ridxA[ja] == ridxB[jb] ); \
  115. TYPX tmpval= coefA[ ja ] _OP_ coefB[ jb ]; \
  116. if (tmpval !=0.0) { \
  117. coefX[ jx ] = tmpval; \
  118. ridxX[ jx ] = ridxA[ja]; \
  119. jx++; \
  120. } \
  121. ja++; ja_lt_max= ja < ja_max; \
  122. jb++; jb_lt_max= jb < jb_max; \
  123. } \
  124. } \
  125. cidxX[i+1] = jx; \
  126. } \
  127. maybe_shrink( jx, nnz, ridxX, coefX ); \
  128. X= create_SuperMatrix( Anr, Anc, jx, coefX, ridxX, cidxX ); \
  129. }
  130. #define SPARSE_MATRIX_EL_OP( TYPX, _OP_ ) \
  131. SuperMatrix X; \
  132. if ( (Anc != Bnc) || (Anr != Bnr) ) { \
  133. gripe_nonconformant ("operator .*", Anr, Anc, Bnr, Bnc); \
  134. } else { \
  135. assert(Anr == Bnr); assert(Anc == Bnc); \
  136. TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz * sizeof(TYPX)); \
  137. int * ridxX= (int *)oct_sparse_malloc( nnz * sizeof(int) ); \
  138. int * cidxX= (int *)oct_sparse_malloc((Anc+1)*sizeof(int)); cidxX[0]= 0; \
  139. \
  140. int cx= 0; \
  141. for (int i=0; i < Anc ; i++) { \
  142. for (int j= cidxA[i] ; \
  143. j< cidxA[i+1]; j++) { \
  144. OCTAVE_QUIT; \
  145. int rowidx = ridxA[j]; \
  146. TYPX tmpval = B(rowidx,i); \
  147. if (tmpval != 0.0) { \
  148. ridxX[ cx ] = rowidx; \
  149. coefX[ cx ] = tmpval * coefA[ j ]; \
  150. cx++; \
  151. } \
  152. } \
  153. cidxX[i+1] = cx; \
  154. } \
  155. maybe_shrink( cx, nnz, ridxX, coefX ); \
  156. X= create_SuperMatrix( Anr, Anc, cx, coefX, ridxX, cidxX ); \
  157. }
  158. // multiply type ops
  159. // TYPM is the output matrix type
  160. // TYPX is the sparse matrix type
  161. #define SPARSE_MATRIX_MUL( TYPM, TYPX) \
  162. TYPM X (Anr , Bnc); \
  163. if (Anc != Bnr) { \
  164. gripe_nonconformant ("operator *", Anr, Anc, Bnr, Bnc); \
  165. } else { \
  166. assert (Anc == Bnr); \
  167. for (int i=0; i< Anr; i++ ) \
  168. for (int j=0; j< Bnc; j++ ) \
  169. X.elem(i,j)=0.; \
  170. for ( int i=0; i < Anc ; i++) { \
  171. for ( int j= cidxA[i]; j< cidxA[i+1]; j++) { \
  172. OCTAVE_QUIT; \
  173. int col = ridxA[j]; \
  174. TYPX tmpval = coefA[j]; \
  175. for ( int k=0 ; k< Bnc; k++) { \
  176. X.elem(col , k)+= tmpval * B(i,k); \
  177. } \
  178. } \
  179. } \
  180. }
  181. #define MATRIX_SPARSE_MUL( TYPM, TYPX ) \
  182. TYPM X (Anr , Bnc); \
  183. if (Anc != Bnr) { \
  184. gripe_nonconformant ("operator *", Anr, Anc, Bnr, Bnc); \
  185. } else { \
  186. assert (Anc == Bnr); \
  187. for (int i=0; i< Anr; i++ ) \
  188. for (int j=0; j< Bnc; j++ ) \
  189. X.elem(i,j)=0.; \
  190. for ( int i=0; i < Bnc ; i++) { \
  191. for ( int j= cidxB[i]; j< cidxB[i+1]; j++) { \
  192. OCTAVE_QUIT; \
  193. int col = ridxB[j]; \
  194. TYPX tmpval = coefB[j]; \
  195. for ( int k=0 ; k< Anr; k++) { \
  196. X(k, i)+= A(k,col) * tmpval; \
  197. } \
  198. } \
  199. } \
  200. }
  201. //
  202. // Multiply sparse by sparse, element by element
  203. // This algorithm allocates a full column of the output
  204. // matrix. Hopefully that won't be a storage problem.
  205. //
  206. // TODO: allocate a row or column depending on the larger
  207. // dimention.
  208. //
  209. // I'm sure there are good sparse multiplication algorithms
  210. // available in the litterature. I invented this one
  211. // to fill a gap. Tell me if you know of better ones.
  212. //
  213. #define SPARSE_SPARSE_MUL( TYPX ) \
  214. SuperMatrix X; \
  215. if (Anc != Bnr) { \
  216. gripe_nonconformant ("operator *", Anr, Anc, Bnr, Bnc); \
  217. } else { \
  218. assert (Anc == Bnr ); \
  219. int nnz = NCFA->nnz + NCFB->nnz ; \
  220. TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz * sizeof(TYPX)); \
  221. int * ridxX= (int *)oct_sparse_malloc( nnz * sizeof(int) ); \
  222. int * cidxX= (int *)oct_sparse_malloc((Bnc+1)*sizeof(int)); cidxX[0]= 0; \
  223. \
  224. TYPX * Xcol= (TYPX*)oct_sparse_malloc( Anr * sizeof(TYPX)); \
  225. int cx= 0; \
  226. for ( int i=0; i < Bnc ; i++) { \
  227. OCTAVE_QUIT; \
  228. for (int k=0; k<Anr; k++) Xcol[k]= 0.; \
  229. for (int j= cidxB[i]; j< cidxB[i+1]; j++) { \
  230. int col= ridxB[j]; \
  231. TYPX tmpval = coefB[j]; \
  232. for (int k= cidxA[col] ; k< cidxA[col+1]; k++) \
  233. Xcol[ ridxA[k] ]+= tmpval * coefA[k]; \
  234. } \
  235. for (int k=0; k<Anr; k++) { \
  236. if ( Xcol[k] !=0. ) { \
  237. check_bounds( cx, nnz, ridxX, coefX ); \
  238. ridxX[ cx ]= k; \
  239. coefX[ cx ]= Xcol[k]; \
  240. cx++; \
  241. } \
  242. } \
  243. cidxX[i+1] = cx; \
  244. } \
  245. free( Xcol ); \
  246. maybe_shrink( cx, nnz, ridxX, coefX ); \
  247. X= create_SuperMatrix( Anr, Bnc, cx, coefX, ridxX, cidxX ); \
  248. }
  249. // assemble a sparse matrix from elements
  250. // called by > 1 args for sparse
  251. // NOTE: index vectors are 1 based!
  252. //
  253. // NOTE2: be careful about when we convert ri to int,
  254. // otherwise the maximum matrix size will be m*n < maxint/2
  255. //
  256. // The OCTAVE_LOCAL_BUFFER can't be used for the sort index
  257. // when the requested size is too big. we need to put it on the
  258. // heap.
  259. #define ASSEMBLE_SPARSE( TYPX , REJECT_OUT_OF_RANGE ) \
  260. int nnz= MAX( ridxA.length(), cidxA.length() ); \
  261. TYPX* coefX= (TYPX*)oct_sparse_malloc( nnz * sizeof(TYPX)); \
  262. int * ridxX= (int *)oct_sparse_malloc( nnz * sizeof(int) ); \
  263. int * cidxX= (int *)oct_sparse_malloc( (n+1)* sizeof(int)); cidxX[0]= 0; \
  264. \
  265. bool ri_scalar = (ridxA.length() == 1); \
  266. bool ci_scalar = (cidxA.length() == 1); \
  267. bool cf_scalar = (coefA.length() == 1); \
  268. \
  269. sort_idxl* sidx = (sort_idxl *)oct_sparse_malloc( nnz* sizeof(sort_idxl) );\
  270. int actual_nnz=0; \
  271. INT64 m64= m; \
  272. OCTAVE_QUIT; \
  273. for (int i=0; i<nnz; i++) { \
  274. double rowidx= ( ri_scalar ? ridxA(0) : ridxA(i) ) - 1; \
  275. double colidx= ( ci_scalar ? cidxA(0) : cidxA(i) ) - 1; \
  276. if (rowidx < m && rowidx >= 0 && \
  277. colidx < n && colidx >= 0 ) { \
  278. if ( coefA( cf_scalar ? 0 : i ) != 0. ) { \
  279. sidx[actual_nnz].val = rowidx + m64 * colidx; \
  280. sidx[actual_nnz].idx = i; \
  281. actual_nnz++; \
  282. } \
  283. } \
  284. else { \
  285. if( REJECT_OUT_OF_RANGE ) { \
  286. error("sparse (row,col) index (%d,%d) out of range", rowidx,colidx);\
  287. actual_nnz=0; \
  288. break; \
  289. } \
  290. } \
  291. } \
  292. \
  293. OCTAVE_QUIT; \
  294. qsort( sidx, actual_nnz, sizeof(sort_idxl), sidxl_comp ); \
  295. \
  296. OCTAVE_QUIT; \
  297. int cx= 0; \
  298. INT64 prev_val= -1; \
  299. int ii= -1; \
  300. for (int i=0; i<actual_nnz; i++) { \
  301. OCTAVE_QUIT; \
  302. unsigned long idx= sidx[i].idx; \
  303. INT64 val= sidx[i].val; \
  304. if (prev_val < val) { \
  305. ii++; \
  306. coefX[ii]= coefA( cf_scalar ? 0 : idx ); \
  307. double ri= ridxA( ri_scalar ? 0 : idx ) - 1 ; \
  308. ridxX[ii]= ri - floor(ri/m64)*m64 ; \
  309. double ci= cidxA( ci_scalar ? 0 : idx ) - 1 ; \
  310. while( cx < ci ) cidxX[++cx]= ii; \
  311. } else { \
  312. if (assemble_do_sum) \
  313. coefX[ii]+= coefA( cf_scalar ? 0 : idx ); \
  314. else \
  315. coefX[ii] = coefA( cf_scalar ? 0 : idx ); \
  316. } \
  317. prev_val= val;\
  318. } \
  319. while( cx < n ) cidxX[++cx]= ii+1; \
  320. OCTAVE_QUIT; \
  321. maybe_shrink( ii+1, actual_nnz, ridxX, coefX ); \
  322. \
  323. OCTAVE_QUIT; \
  324. SuperMatrix X= create_SuperMatrix( m, n, ii+1, coefX, ridxX, cidxX ); \
  325. oct_sparse_free( sidx );
  326. // destructively resize a sparse matrix
  327. #define SPARSE_RESIZE( TYPX, VECTYP1, VECTYP2) \
  328. /* Special case the redimensioning to an empty matrix */ \
  329. if (dv.numel() == 0) \
  330. return new octave_sparse (create_SuperMatrix (0, 0, 0, (TYPX *)0, \
  331. 0, 0)); \
  332. \
  333. /* Special case the redimensioning of an empty matrix */ \
  334. NCformat * Xstore = (NCformat * ) X.Store; \
  335. if (Xstore->nnz == 0) \
  336. { \
  337. int * colptr = (int *) oct_sparse_malloc ((dv(1) + 1) * sizeof(int)); \
  338. for (int i = 0; i <= dv(1); i++) \
  339. colptr [i] = 0; \
  340. return new octave_sparse (create_SuperMatrix (dv(0), dv(1), 0, \
  341. (TYPX *)0, 0, colptr)); \
  342. } \
  343. \
  344. octave_value_list find_val= find(); \
  345. int n= (int) dv(1); \
  346. int m= (int) dv(0); \
  347. ColumnVector ridxA= find_val(0).column_vector_value(); \
  348. ColumnVector cidxA= find_val(1).column_vector_value(); \
  349. VECTYP1 ## ColumnVector coefA= find_val(2). VECTYP2 ## column_vector_value(); \
  350. int assemble_do_sum = 0; \
  351. ASSEMBLE_SPARSE( TYPX, false);
  352. // assemble a sparse matrix from full
  353. // called by one arg for sparse
  354. // start with an initial estimate for nnz and
  355. // work with it.
  356. #define MATRIX_TO_SPARSE( TYPX ) \
  357. int nnz= 100; \
  358. TYPX * coef = (TYPX *) oct_sparse_malloc ( nnz * sizeof(TYPX) ); \
  359. int * ridx = (int *) oct_sparse_malloc ( nnz * sizeof(int) ); \
  360. int * cidx = (int *) oct_sparse_malloc ((Anc+1)* sizeof(int) ); cidx[0]= 0; \
  361. int jx=0; \
  362. for (int j= 0; j<Anc ; j++ ) { \
  363. for (int i= 0; i<Anr ; i++ ) { \
  364. OCTAVE_QUIT; \
  365. TYPX tmpval= A(i,j); \
  366. if (tmpval != 0.) { \
  367. check_bounds( jx, nnz, ridx, coef ); \
  368. ridx[ jx ]= i; \
  369. coef[ jx ]= tmpval; \
  370. jx++; \
  371. } \
  372. } \
  373. cidx[j+1]= jx; \
  374. } \
  375. maybe_shrink( jx, nnz, ridx, coef ); \
  376. SuperMatrix X= create_SuperMatrix( Anr, Anc, jx, coef, ridx, cidx );
  377. //
  378. // Next three functions
  379. //
  380. // Fix the row ordering problems that
  381. // LUExtract seems to cause
  382. // NOTE: we don't try to fix other structural errors
  383. // in the generated matrices, but we bail out
  384. // if we find any. This should work since I
  385. // haven't seen any problems other than ordering
  386. //
  387. // NOTE: The right way to fix this, of course, is to
  388. // track down the bug in the superlu codebase.
  389. // define a struct and qsort
  390. // comparison function to do the reordering
  391. #define FIX_ROW_ORDER_SORT_FUNCTIONS( TYPE ) \
  392. typedef struct { unsigned int idx; \
  393. TYPE val; } fixrow_sort; \
  394. static inline int \
  395. fixrow_comp( const void *i, const void *j) \
  396. { \
  397. return ((fixrow_sort *) i)->idx - \
  398. ((fixrow_sort *) j)->idx ; \
  399. }
  400. // this define ends function like
  401. // void
  402. // fix_row_order ## TYPE( SuperMatrix X )
  403. // {
  404. // DEBUGMSG("fix_row_order");
  405. // DEFINE_SP_POINTERS_REAL( X )
  406. // FIX_ROW_ORDER_FUNCTIONS( TYPE )
  407. // }
  408. #define FIX_ROW_ORDER_FUNCTIONS \
  409. for ( int i=0; i < Xnr ; i++) { \
  410. OCTAVE_QUIT; \
  411. assert( cidxX[i] >= 0.); \
  412. assert( cidxX[i] <= NCFX->nnz); \
  413. assert( cidxX[i] <= cidxX[i+1]); \
  414. int reorder=0; \
  415. for( int j= cidxX[i]; \
  416. j< cidxX[i+1]; \
  417. j++ ) { \
  418. assert( ridxX[j] >= 0.); \
  419. assert( ridxX[j] < Xnc); \
  420. assert( coefX[j] != 0.); /* don't keep zero values */ \
  421. if (j> cidxX[i]) \
  422. if ( ridxX[j-1] > ridxX[j] ) \
  423. reorder=1; \
  424. } /* for j */ \
  425. if(reorder) { \
  426. int snum= cidxX[i+1] - cidxX[i]; \
  427. OCTAVE_LOCAL_BUFFER (fixrow_sort, arry, snum); \
  428. /* copy to the sort struct */ \
  429. for( int k=0, \
  430. j= cidxX[i]; \
  431. j< cidxX[i+1]; \
  432. j++ ) { \
  433. arry[k].idx= ridxX[j]; \
  434. arry[k].val= coefX[j]; \
  435. k++; \
  436. } \
  437. OCTAVE_QUIT; \
  438. qsort( arry, snum, sizeof(fixrow_sort), fixrow_comp ); \
  439. OCTAVE_QUIT; \
  440. /* copy back to the position */ \
  441. for( int k=0, \
  442. j= cidxX[i]; \
  443. j< cidxX[i+1]; \
  444. j++ ) { \
  445. ridxX[j]= arry[k].idx; \
  446. coefX[j]= arry[k].val; \
  447. k++; \
  448. } \
  449. } \
  450. } // for i
  451. // endof FIX_ROW_ORDER_FUNCTIONS
  452. // calculate the inverse of an
  453. // upper triangular sparse matrix
  454. //
  455. // iUt = inv(U)
  456. // Note that the transpose is returned
  457. //
  458. // CODE:
  459. //
  460. // note that the input matrix is accesses in
  461. // row major order, and the output is accessed
  462. // in column major. This is the reason that the
  463. // output matrix is the transpose of the input
  464. //
  465. // I= eye( size(A) );
  466. // for n=1:N # across
  467. // for m= n+1:N # down
  468. // v=0;
  469. // for i= m-1:-1:n
  470. // v=v- A(m,i)*I(i,n);
  471. // end
  472. // I(m,n)= v/A(m,m);
  473. // end
  474. // I(:,n)= I(:,n)/A(n,n); <- for non unit norm
  475. // end
  476. //
  477. // we need to be careful here,
  478. // U is uppertriangular, but we're treating
  479. // it as though it is a lower triag matrix in NR form
  480. //
  481. // debug code
  482. // printf("n=%d, m=%d, X[%d]=%7.4f U[%d]=%7.4f cXp=%d cUp=%d v=%7.4f\n",
  483. // n,m, rpX, coefX[ scolXp ], rpU, coefU[ scolUp ],
  484. // scolXp,scolUp, v);
  485. #if 0
  486. // this is a pain - we can't use c++ constructors because
  487. // dmalloc complains when they're free'd
  488. int * ridxX = new int [nnz]; \
  489. TYPE* coefX = new TYPE [nnz]; \
  490. int * cidxX = new int [Unc+1]; \
  491. #endif
  492. #define SPARSE_INV_UPPERTRIANG( TYPE ) \
  493. assert ( Unc == Unr ); \
  494. /* estimate inverse nnz= input nnz */ \
  495. int nnz = NCFU->nnz; \
  496. int * ridxX = (int * ) oct_sparse_malloc ( sizeof(int) *nnz); \
  497. TYPE* coefX = (TYPE* ) oct_sparse_malloc ( sizeof(TYPE)*nnz); \
  498. int * cidxX = (int * ) oct_sparse_malloc ( sizeof(int )*(Unc+1)); \
  499. \
  500. int cx= 0; \
  501. \
  502. /* iterate accross columns of output matrix */ \
  503. for ( int n=0; n < Unr ; n++) { \
  504. OCTAVE_QUIT; \
  505. /* place the 1 in the identity position */ \
  506. int cx_colstart= cx; \
  507. \
  508. cidxX[n]= cx; \
  509. check_bounds( cx, nnz, ridxX, coefX ); \
  510. ridxX[cx]= n; \
  511. coefX[cx]= 1.0; \
  512. cx++; \
  513. \
  514. /* iterate accross columns of input matrix */ \
  515. for ( int m= n+1; m< Unr; m++) { \
  516. TYPE v=0.; \
  517. /* iterate to calculate sum */ \
  518. int colXp= cidxX[n]; \
  519. int colUp= cidxU[m]; \
  520. \
  521. int rpX, rpU; \
  522. do { \
  523. OCTAVE_QUIT; \
  524. rpX= ridxX [ colXp ]; \
  525. rpU= ridxU [ colUp ]; \
  526. \
  527. if (rpX < rpU) { \
  528. colXp++; \
  529. } else \
  530. if (rpX > rpU) { \
  531. colUp++; \
  532. } else { \
  533. assert(rpX == rpU); \
  534. assert(rpX >= n); \
  535. \
  536. v-= coefX[ colXp ]*coefU[ colUp ]; \
  537. colXp++; \
  538. colUp++; \
  539. } \
  540. \
  541. } while ((rpX<m) && (rpU<m) && (colXp<cx) && (colUp<nnzU)); \
  542. \
  543. /* get A(m,m) */ \
  544. colUp= cidxU[m+1]-1; \
  545. assert (ridxU[ colUp ] == m ); /* assert U is upper triangular */ \
  546. \
  547. TYPE pivot= coefU[ colUp ]; \
  548. if (pivot == 0.) gripe_divide_by_zero (); \
  549. \
  550. if (v!=0.) { \
  551. check_bounds( cx, nnz, ridxX, coefX ); \
  552. ridxX[cx]= m; \
  553. coefX[cx]= v / pivot; \
  554. cx++; \
  555. } \
  556. } /* for m */ \
  557. \
  558. /* get A(m,m) */ \
  559. int colUp= cidxU[n+1]-1; \
  560. assert (ridxU[ colUp ] == n ); /* assert U upper triangular */ \
  561. TYPE pivot= coefU[ colUp ]; \
  562. if (pivot == 0.) gripe_divide_by_zero (); \
  563. \
  564. if (pivot!=1.0) \
  565. for( int i= cx_colstart; i< cx; i++) \
  566. coefX[i]/= pivot; \
  567. } /* for n */ \
  568. cidxX[Unr]= cx; \
  569. maybe_shrink( cx, nnz, ridxX, coefX );
  570. #ifdef HAVE_OLD_OCTAVE_CONCAT
  571. #define DEFCATOP_SPARSE_FN(name, ret, t1, t2, e1, e2, f) \
  572. CATOPDECL (name, a1, a2) \
  573. { \
  574. DEBUGMSG("CATOP ## name ## t1 ## t2 ## f"); \
  575. CAST_BINOP_ARGS (const octave_ ## t1&, const octave_ ## t2&); \
  576. SuperMatrix B = f (v1.e1 (), v2.e2 (), ra_idx); \
  577. if (error_state) \
  578. return new octave_ ## ret (); \
  579. else \
  580. return new octave_ ## ret (B); \
  581. }
  582. #define INSTALL_SPARSE_CATOP(t1, t2, f) INSTALL_CATOP(t1, t2, f)
  583. #elif defined (HAVE_OCTAVE_CONCAT)
  584. #define DEFCATOP_SPARSE_FN(name, ret, t1, t2, e1, e2, f) \
  585. CATOPDECL (name, a1, a2) \
  586. { \
  587. DEBUGMSG("CATOP ## name ## t1 ## t2 ## f"); \
  588. CAST_BINOP_ARGS (octave_ ## t1&, const octave_ ## t2&); \
  589. SuperMatrix B = f (v1.e1 (), v2.e2 (), ra_idx); \
  590. if (error_state) \
  591. return new octave_ ## ret (); \
  592. else \
  593. return new octave_ ## ret (B); \
  594. }
  595. #define INSTALL_SPARSE_CATOP(t1, t2, f) INSTALL_CATOP(t1, t2, f)
  596. #else
  597. #define DEFCATOP_SPARSE_FN(name, ret, t1, t2, e1, e2, f)
  598. #define INSTALL_SPARSE_CATOP(t1, t2, f)
  599. #endif