PageRenderTime 56ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/opensees-websocket/OTHER/SuperLU_MT/dsp_blas2.c

https://code.google.com/
C | 459 lines | 287 code | 40 blank | 132 comment | 112 complexity | c2f0accab470d1bba8f12568d4828528 MD5 | raw file
  1. /*
  2. * -- SuperLU MT routine (version 1.0) --
  3. * Univ. of California Berkeley, Xerox Palo Alto Research Center,
  4. * and Lawrence Berkeley National Lab.
  5. * August 15, 1997
  6. *
  7. * Sparse BLAS-2, using some dense BLAS-2 operations.
  8. */
  9. #include "pdsp_defs.h"
  10. /*
  11. * Function prototypes
  12. */
  13. extern void dusolve(int, int, double*, double*);
  14. extern void dlsolve(int, int, double*, double*);
  15. extern void dmatvec(int, int, int, double*, double*, double*);
  16. int
  17. sp_dtrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, SuperMatrix *U,
  18. double *x, int *info)
  19. {
  20. /*
  21. * Purpose
  22. * =======
  23. *
  24. * sp_dtrsv() solves one of the systems of equations
  25. * A*x = b, or A'*x = b,
  26. * where b and x are n element vectors and A is a sparse unit , or
  27. * non-unit, upper or lower triangular matrix.
  28. * No test for singularity or near-singularity is included in this
  29. * routine. Such tests must be performed before calling this routine.
  30. *
  31. * Parameters
  32. * ==========
  33. *
  34. * uplo - (input) char*
  35. * On entry, uplo specifies whether the matrix is an upper or
  36. * lower triangular matrix as follows:
  37. * uplo = 'U' or 'u' A is an upper triangular matrix.
  38. * uplo = 'L' or 'l' A is a lower triangular matrix.
  39. *
  40. * trans - (input) char*
  41. * On entry, trans specifies the equations to be solved as
  42. * follows:
  43. * trans = 'N' or 'n' A*x = b.
  44. * trans = 'T' or 't' A'*x = b.
  45. * trans = 'C' or 'c' A'*x = b.
  46. *
  47. * diag - (input) char*
  48. * On entry, diag specifies whether or not A is unit
  49. * triangular as follows:
  50. * diag = 'U' or 'u' A is assumed to be unit triangular.
  51. * diag = 'N' or 'n' A is not assumed to be unit
  52. * triangular.
  53. *
  54. * L - (input) SuperMatrix*
  55. * The factor L from the factorization Pr*A*Pc=L*U. Use
  56. * compressed row subscripts storage for supernodes, i.e., L
  57. * has types: Stype = SCP, Dtype = _D, Mtype = TRLU.
  58. *
  59. * U - (input) SuperMatrix*
  60. * The factor U from the factorization Pr*A*Pc=L*U.
  61. * U has types: Stype = NCP, Dtype = _D, Mtype = TRU.
  62. *
  63. * x - (input/output) double*
  64. * Before entry, the incremented array X must contain the n
  65. * element right-hand side vector b. On exit, X is overwritten
  66. * with the solution vector x.
  67. *
  68. * info - (output) int*
  69. * If *info = -i, the i-th argument had an illegal value.
  70. *
  71. */
  72. #if ( MACH==CRAY_PVP )
  73. _fcd ftcs1, ftcs2, ftcs3;
  74. #endif
  75. SCPformat *Lstore;
  76. NCPformat *Ustore;
  77. double *Lval, *Uval;
  78. int incx = 1;
  79. #ifdef USE_VENDOR_BLAS
  80. int incy = 1;
  81. double alpha = 1.0, beta = 1.0;
  82. #endif
  83. register int fsupc, luptr, istart, irow, k, iptr, jcol, nsuper;
  84. int nsupr, nsupc, nrow, i;
  85. double *work;
  86. flops_t solve_ops;
  87. /* Test the input parameters */
  88. *info = 0;
  89. if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
  90. else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2;
  91. else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
  92. else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
  93. else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
  94. if ( *info ) {
  95. i = -(*info);
  96. xerbla_("sp_dtrsv", &i);
  97. return 0;
  98. }
  99. Lstore = L->Store;
  100. Lval = Lstore->nzval;
  101. Ustore = U->Store;
  102. Uval = Ustore->nzval;
  103. nsuper = Lstore->nsuper;
  104. solve_ops = 0;
  105. if ( !(work = doubleCalloc(L->nrow)) )
  106. ABORT("Malloc fails for work in sp_dtrsv().");
  107. if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
  108. if ( lsame_(uplo, "L") ) {
  109. /* Form x := inv(L)*x */
  110. if ( L->nrow == 0 ) return 0; /* Quick return */
  111. for (k = 0; k <= nsuper; k++) {
  112. fsupc = L_FST_SUPC(k);
  113. istart = L_SUB_START(fsupc);
  114. nsupr = L_SUB_END(fsupc) - istart;
  115. nsupc = L_LAST_SUPC(k) - fsupc;
  116. luptr = L_NZ_START(fsupc);
  117. nrow = nsupr - nsupc;
  118. solve_ops += nsupc * (nsupc - 1);
  119. solve_ops += 2 * nrow * nsupc;
  120. if ( nsupc == 1 ) {
  121. for (iptr=istart+1; iptr < L_SUB_END(fsupc); ++iptr) {
  122. irow = L_SUB(iptr);
  123. ++luptr;
  124. x[irow] -= x[fsupc] * Lval[luptr];
  125. }
  126. } else {
  127. #ifdef USE_VENDOR_BLAS
  128. #if ( MACH==CRAY_PVP )
  129. ftcs1 = _cptofcd("L", strlen("L"));
  130. ftcs2 = _cptofcd("N", strlen("N"));
  131. ftcs3 = _cptofcd("U", strlen("U"));
  132. STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
  133. &x[fsupc], &incx);
  134. SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
  135. &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
  136. #else
  137. dtrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
  138. &x[fsupc], &incx);
  139. dgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
  140. &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
  141. #endif
  142. #else
  143. dlsolve (nsupr, nsupc, &Lval[luptr], &x[fsupc]);
  144. dmatvec (nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
  145. &x[fsupc], &work[0] );
  146. #endif
  147. iptr = istart + nsupc;
  148. for (i = 0; i < nrow; ++i, ++iptr) {
  149. irow = L_SUB(iptr);
  150. x[irow] -= work[i]; /* Scatter */
  151. work[i] = 0.0;
  152. }
  153. }
  154. } /* for k ... */
  155. } else {
  156. /* Form x := inv(U)*x */
  157. if ( U->nrow == 0 ) return 0; /* Quick return */
  158. for (k = nsuper; k >= 0; k--) {
  159. fsupc = L_FST_SUPC(k);
  160. nsupr = L_SUB_END(fsupc) - L_SUB_START(fsupc);
  161. nsupc = L_LAST_SUPC(k) - fsupc;
  162. luptr = L_NZ_START(fsupc);
  163. solve_ops += nsupc * (nsupc + 1);
  164. if ( nsupc == 1 ) {
  165. x[fsupc] /= Lval[luptr];
  166. for (i = U_NZ_START(fsupc); i < U_NZ_END(fsupc); ++i) {
  167. irow = U_SUB(i);
  168. x[irow] -= x[fsupc] * Uval[i];
  169. }
  170. } else {
  171. #ifdef USE_VENDOR_BLAS
  172. #if ( MACH==CRAY_PVP )
  173. ftcs1 = _cptofcd("U", strlen("U"));
  174. ftcs2 = _cptofcd("N", strlen("N"));
  175. ftcs3 = _cptofcd("N", strlen("N"));
  176. STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
  177. &x[fsupc], &incx);
  178. #else
  179. dtrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
  180. &x[fsupc], &incx);
  181. #endif
  182. #else
  183. dusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
  184. #endif
  185. for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
  186. solve_ops += 2*(U_NZ_END(jcol) - U_NZ_START(jcol));
  187. for (i = U_NZ_START(jcol); i < U_NZ_END(jcol); i++) {
  188. irow = U_SUB(i);
  189. x[irow] -= x[jcol] * Uval[i];
  190. }
  191. }
  192. }
  193. } /* for k ... */
  194. }
  195. } else { /* Form x := inv(A')*x */
  196. if ( lsame_(uplo, "L") ) {
  197. /* Form x := inv(L')*x */
  198. if ( L->nrow == 0 ) return 0; /* Quick return */
  199. for (k = nsuper; k >= 0; --k) {
  200. fsupc = L_FST_SUPC(k);
  201. istart = L_SUB_START(fsupc);
  202. nsupr = L_SUB_END(fsupc) - istart;
  203. nsupc = L_LAST_SUPC(k) - fsupc;
  204. luptr = L_NZ_START(fsupc);
  205. solve_ops += 2 * (nsupr - nsupc) * nsupc;
  206. for (jcol = fsupc; jcol < L_LAST_SUPC(k); jcol++) {
  207. iptr = istart + nsupc;
  208. for (i = L_NZ_START(jcol) + nsupc;
  209. i < L_NZ_END(jcol); i++) {
  210. irow = L_SUB(iptr);
  211. x[jcol] -= x[irow] * Lval[i];
  212. iptr++;
  213. }
  214. }
  215. if ( nsupc > 1 ) {
  216. solve_ops += nsupc * (nsupc - 1);
  217. #if ( MACH==CRAY_PVP )
  218. ftcs1 = _cptofcd("L", strlen("L"));
  219. ftcs2 = _cptofcd("T", strlen("T"));
  220. ftcs3 = _cptofcd("U", strlen("U"));
  221. STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
  222. &x[fsupc], &incx);
  223. #else
  224. dtrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
  225. &x[fsupc], &incx);
  226. #endif
  227. }
  228. }
  229. } else {
  230. /* Form x := inv(U')*x */
  231. if ( U->nrow == 0 ) return 0; /* Quick return */
  232. for (k = 0; k <= nsuper; k++) {
  233. fsupc = L_FST_SUPC(k);
  234. nsupr = L_SUB_END(fsupc) - L_SUB_START(fsupc);
  235. nsupc = L_LAST_SUPC(k) - fsupc;
  236. luptr = L_NZ_START(fsupc);
  237. for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
  238. solve_ops += 2*(U_NZ_END(jcol) - U_NZ_START(jcol));
  239. for (i = U_NZ_START(jcol); i < U_NZ_END(jcol); i++) {
  240. irow = U_SUB(i);
  241. x[jcol] -= x[irow] * Uval[i];
  242. }
  243. }
  244. solve_ops += nsupc * (nsupc + 1);
  245. if ( nsupc == 1 ) {
  246. x[fsupc] /= Lval[luptr];
  247. } else {
  248. #if ( MACH==CRAY_PVP )
  249. ftcs1 = _cptofcd("U", strlen("U"));
  250. ftcs2 = _cptofcd("T", strlen("T"));
  251. ftcs3 = _cptofcd("N", strlen("N"));
  252. STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
  253. &x[fsupc], &incx);
  254. #else
  255. dtrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
  256. &x[fsupc], &incx);
  257. #endif
  258. }
  259. } /* for k ... */
  260. }
  261. }
  262. SUPERLU_FREE(work);
  263. return 0;
  264. }
  265. int
  266. sp_dgemv(char *trans, double alpha, SuperMatrix *A, double *x, int incx,
  267. double beta, double *y, int incy)
  268. {
  269. /* Purpose
  270. =======
  271. sp_dgemv() performs one of the matrix-vector operations
  272. y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
  273. where alpha and beta are scalars, x and y are vectors and A is a
  274. sparse A->nrow by A->ncol matrix.
  275. Parameters
  276. ==========
  277. TRANS - (input) char*
  278. On entry, TRANS specifies the operation to be performed as
  279. follows:
  280. TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
  281. TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
  282. TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
  283. ALPHA - (input) double
  284. On entry, ALPHA specifies the scalar alpha.
  285. A - (input) SuperMatrix*
  286. Before entry, the leading m by n part of the array A must
  287. contain the matrix of coefficients.
  288. X - (input) double*, array of DIMENSION at least
  289. ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
  290. and at least
  291. ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
  292. Before entry, the incremented array X must contain the
  293. vector x.
  294. INCX - (input) int
  295. On entry, INCX specifies the increment for the elements of
  296. X. INCX must not be zero.
  297. BETA - (input) double
  298. On entry, BETA specifies the scalar beta. When BETA is
  299. supplied as zero then Y need not be set on input.
  300. Y - (output) double*, array of DIMENSION at least
  301. ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
  302. and at least
  303. ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
  304. Before entry with BETA non-zero, the incremented array Y
  305. must contain the vector y. On exit, Y is overwritten by the
  306. updated vector y.
  307. INCY - (input) int
  308. On entry, INCY specifies the increment for the elements of
  309. Y. INCY must not be zero.
  310. ==== Sparse Level 2 Blas routine.
  311. */
  312. /* Local variables */
  313. NCformat *Astore;
  314. double *Aval;
  315. int info;
  316. double temp;
  317. int lenx, leny, i, j, irow;
  318. int iy, jx, jy, kx, ky;
  319. int notran;
  320. notran = lsame_(trans, "N");
  321. Astore = A->Store;
  322. Aval = Astore->nzval;
  323. /* Test the input parameters */
  324. info = 0;
  325. if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
  326. else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
  327. else if (incx == 0) info = 5;
  328. else if (incy == 0) info = 8;
  329. if (info != 0) {
  330. xerbla_("sp_dgemv ", &info);
  331. return 0;
  332. }
  333. /* Quick return if possible. */
  334. if (A->nrow == 0 || A->ncol == 0 || alpha == 0. && beta == 1.)
  335. return 0;
  336. /* Set LENX and LENY, the lengths of the vectors x and y, and set
  337. up the start points in X and Y. */
  338. if (lsame_(trans, "N")) {
  339. lenx = A->ncol;
  340. leny = A->nrow;
  341. } else {
  342. lenx = A->nrow;
  343. leny = A->ncol;
  344. }
  345. if (incx > 0) kx = 0;
  346. else kx = - (lenx - 1) * incx;
  347. if (incy > 0) ky = 0;
  348. else ky = - (leny - 1) * incy;
  349. /* Start the operations. In this version the elements of A are
  350. accessed sequentially with one pass through A. */
  351. /* First form y := beta*y. */
  352. if (beta != 1.) {
  353. if (incy == 1) {
  354. if (beta == 0.)
  355. for (i = 0; i < leny; ++i) y[i] = 0.;
  356. else
  357. for (i = 0; i < leny; ++i) y[i] = beta * y[i];
  358. } else {
  359. iy = ky;
  360. if (beta == 0.)
  361. for (i = 0; i < leny; ++i) {
  362. y[iy] = 0.;
  363. iy += incy;
  364. }
  365. else
  366. for (i = 0; i < leny; ++i) {
  367. y[iy] = beta * y[iy];
  368. iy += incy;
  369. }
  370. }
  371. }
  372. if (alpha == 0.) return 0;
  373. if ( notran ) {
  374. /* Form y := alpha*A*x + y. */
  375. jx = kx;
  376. if (incy == 1) {
  377. for (j = 0; j < A->ncol; ++j) {
  378. if (x[jx] != 0.) {
  379. temp = alpha * x[jx];
  380. for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
  381. irow = Astore->rowind[i];
  382. y[irow] += temp * Aval[i];
  383. }
  384. }
  385. jx += incx;
  386. }
  387. } else {
  388. ABORT("Not implemented.");
  389. }
  390. } else {
  391. /* Form y := alpha*A'*x + y. */
  392. jy = ky;
  393. if (incx == 1) {
  394. for (j = 0; j < A->ncol; ++j) {
  395. temp = 0.;
  396. for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
  397. irow = Astore->rowind[i];
  398. temp += Aval[i] * x[irow];
  399. }
  400. y[jy] += alpha * temp;
  401. jy += incy;
  402. }
  403. } else {
  404. ABORT("Not implemented.");
  405. }
  406. }
  407. return 0;
  408. } /* sp_dgemv */