PageRenderTime 47ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/scipy/sparse/linalg/_dsolve/SuperLU/SRC/ilu_ddrop_row.c

https://github.com/matthew-brett/scipy
C | 339 lines | 273 code | 21 blank | 45 comment | 57 complexity | 1e4507e0bcd0ad18a44bdc98a459407f MD5 | raw file
  1. /*! \file
  2. Copyright (c) 2003, The Regents of the University of California, through
  3. Lawrence Berkeley National Laboratory (subject to receipt of any required
  4. approvals from U.S. Dept. of Energy)
  5. All rights reserved.
  6. The source code is distributed under BSD license, see the file License.txt
  7. at the top-level directory.
  8. */
  9. /*! @file ilu_ddrop_row.c
  10. * \brief Drop small rows from L
  11. *
  12. * <pre>
  13. * -- SuperLU routine (version 4.1) --
  14. * Lawrence Berkeley National Laboratory.
  15. * June 30, 2009
  16. * </pre>
  17. */
  18. #include <math.h>
  19. #include <stdlib.h>
  20. #include "slu_ddefs.h"
  21. extern void dswap_(int *, double [], int *, double [], int *);
  22. extern void daxpy_(int *, double *, double [], int *, double [], int *);
  23. extern void dcopy_(int *, double [], int *, double [], int *);
  24. extern double dasum_(int *, double *, int *);
  25. extern double dnrm2_(int *, double *, int *);
  26. extern double dnrm2_(int *, double [], int *);
  27. extern int idamax_(int *, double [], int *);
  28. static double *A; /* used in _compare_ only */
  29. static int _compare_(const void *a, const void *b)
  30. {
  31. register int *x = (int *)a, *y = (int *)b;
  32. if (A[*x] - A[*y] > 0.0) return -1;
  33. else if (A[*x] - A[*y] < 0.0) return 1;
  34. else return 0;
  35. }
  36. /*! \brief
  37. * <pre>
  38. * Purpose
  39. * =======
  40. * ilu_ddrop_row() - Drop some small rows from the previous
  41. * supernode (L-part only).
  42. * </pre>
  43. */
  44. int ilu_ddrop_row(
  45. superlu_options_t *options, /* options */
  46. int first, /* index of the first column in the supernode */
  47. int last, /* index of the last column in the supernode */
  48. double drop_tol, /* dropping parameter */
  49. int quota, /* maximum nonzero entries allowed */
  50. int *nnzLj, /* in/out number of nonzeros in L(:, 1:last) */
  51. double *fill_tol, /* in/out - on exit, fill_tol=-num_zero_pivots,
  52. * does not change if options->ILU_MILU != SMILU1 */
  53. GlobalLU_t *Glu, /* modified */
  54. double dwork[], /* working space
  55. * the length of dwork[] should be no less than
  56. * the number of rows in the supernode */
  57. double dwork2[], /* working space with the same size as dwork[],
  58. * used only by the second dropping rule */
  59. int lastc /* if lastc == 0, there is nothing after the
  60. * working supernode [first:last];
  61. * if lastc == 1, there is one more column after
  62. * the working supernode. */ )
  63. {
  64. register int i, j, k, m1;
  65. register int nzlc; /* number of nonzeros in column last+1 */
  66. register int xlusup_first, xlsub_first;
  67. int m, n; /* m x n is the size of the supernode */
  68. int r = 0; /* number of dropped rows */
  69. register double *temp;
  70. register double *lusup = (double *) Glu->lusup;
  71. register int *lsub = Glu->lsub;
  72. register int *xlsub = Glu->xlsub;
  73. register int *xlusup = Glu->xlusup;
  74. register double d_max = 0.0, d_min = 1.0;
  75. int drop_rule = options->ILU_DropRule;
  76. milu_t milu = options->ILU_MILU;
  77. norm_t nrm = options->ILU_Norm;
  78. double zero = 0.0;
  79. double one = 1.0;
  80. double none = -1.0;
  81. int i_1 = 1;
  82. int inc_diag; /* inc_diag = m + 1 */
  83. int nzp = 0; /* number of zero pivots */
  84. double alpha = pow((double)(Glu->n), -1.0 / options->ILU_MILU_Dim);
  85. xlusup_first = xlusup[first];
  86. xlsub_first = xlsub[first];
  87. m = xlusup[first + 1] - xlusup_first;
  88. n = last - first + 1;
  89. m1 = m - 1;
  90. inc_diag = m + 1;
  91. nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0;
  92. temp = dwork - n;
  93. /* Quick return if nothing to do. */
  94. if (m == 0 || m == n || drop_rule == NODROP)
  95. {
  96. *nnzLj += m * n;
  97. return 0;
  98. }
  99. /* basic dropping: ILU(tau) */
  100. for (i = n; i <= m1; )
  101. {
  102. /* the average abs value of ith row */
  103. switch (nrm)
  104. {
  105. case ONE_NORM:
  106. temp[i] = dasum_(&n, &lusup[xlusup_first + i], &m) / (double)n;
  107. break;
  108. case TWO_NORM:
  109. temp[i] = dnrm2_(&n, &lusup[xlusup_first + i], &m)
  110. / sqrt((double)n);
  111. break;
  112. case INF_NORM:
  113. default:
  114. k = idamax_(&n, &lusup[xlusup_first + i], &m) - 1;
  115. temp[i] = fabs(lusup[xlusup_first + i + m * k]);
  116. break;
  117. }
  118. /* drop small entries due to drop_tol */
  119. if (drop_rule & DROP_BASIC && temp[i] < drop_tol)
  120. {
  121. r++;
  122. /* drop the current row and move the last undropped row here */
  123. if (r > 1) /* add to last row */
  124. {
  125. /* accumulate the sum (for MILU) */
  126. switch (milu)
  127. {
  128. case SMILU_1:
  129. case SMILU_2:
  130. daxpy_(&n, &one, &lusup[xlusup_first + i], &m,
  131. &lusup[xlusup_first + m - 1], &m);
  132. break;
  133. case SMILU_3:
  134. for (j = 0; j < n; j++)
  135. lusup[xlusup_first + (m - 1) + j * m] +=
  136. fabs(lusup[xlusup_first + i + j * m]);
  137. break;
  138. case SILU:
  139. default:
  140. break;
  141. }
  142. dcopy_(&n, &lusup[xlusup_first + m1], &m,
  143. &lusup[xlusup_first + i], &m);
  144. } /* if (r > 1) */
  145. else /* move to last row */
  146. {
  147. dswap_(&n, &lusup[xlusup_first + m1], &m,
  148. &lusup[xlusup_first + i], &m);
  149. if (milu == SMILU_3)
  150. for (j = 0; j < n; j++) {
  151. lusup[xlusup_first + m1 + j * m] =
  152. fabs(lusup[xlusup_first + m1 + j * m]);
  153. }
  154. }
  155. lsub[xlsub_first + i] = lsub[xlsub_first + m1];
  156. m1--;
  157. continue;
  158. } /* if dropping */
  159. else
  160. {
  161. if (temp[i] > d_max) d_max = temp[i];
  162. if (temp[i] < d_min) d_min = temp[i];
  163. }
  164. i++;
  165. } /* for */
  166. /* Secondary dropping: drop more rows according to the quota. */
  167. quota = ceil((double)quota / (double)n);
  168. if (drop_rule & DROP_SECONDARY && m - r > quota)
  169. {
  170. register double tol = d_max;
  171. /* Calculate the second dropping tolerance */
  172. if (quota > n)
  173. {
  174. if (drop_rule & DROP_INTERP) /* by interpolation */
  175. {
  176. d_max = 1.0 / d_max; d_min = 1.0 / d_min;
  177. tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r));
  178. }
  179. else /* by quick select */
  180. {
  181. int len = m1 - n + 1;
  182. dcopy_(&len, dwork, &i_1, dwork2, &i_1);
  183. tol = dqselect(len, dwork2, quota - n);
  184. #if 0
  185. register int *itemp = iwork - n;
  186. A = temp;
  187. for (i = n; i <= m1; i++) itemp[i] = i;
  188. qsort(iwork, m1 - n + 1, sizeof(int), _compare_);
  189. tol = temp[itemp[quota]];
  190. #endif
  191. }
  192. }
  193. for (i = n; i <= m1; )
  194. {
  195. if (temp[i] <= tol)
  196. {
  197. register int j;
  198. r++;
  199. /* drop the current row and move the last undropped row here */
  200. if (r > 1) /* add to last row */
  201. {
  202. /* accumulate the sum (for MILU) */
  203. switch (milu)
  204. {
  205. case SMILU_1:
  206. case SMILU_2:
  207. daxpy_(&n, &one, &lusup[xlusup_first + i], &m,
  208. &lusup[xlusup_first + m - 1], &m);
  209. break;
  210. case SMILU_3:
  211. for (j = 0; j < n; j++)
  212. lusup[xlusup_first + (m - 1) + j * m] +=
  213. fabs(lusup[xlusup_first + i + j * m]);
  214. break;
  215. case SILU:
  216. default:
  217. break;
  218. }
  219. dcopy_(&n, &lusup[xlusup_first + m1], &m,
  220. &lusup[xlusup_first + i], &m);
  221. } /* if (r > 1) */
  222. else /* move to last row */
  223. {
  224. dswap_(&n, &lusup[xlusup_first + m1], &m,
  225. &lusup[xlusup_first + i], &m);
  226. if (milu == SMILU_3)
  227. for (j = 0; j < n; j++) {
  228. lusup[xlusup_first + m1 + j * m] =
  229. fabs(lusup[xlusup_first + m1 + j * m]);
  230. }
  231. }
  232. lsub[xlsub_first + i] = lsub[xlsub_first + m1];
  233. m1--;
  234. temp[i] = temp[m1];
  235. continue;
  236. }
  237. i++;
  238. } /* for */
  239. } /* if secondary dropping */
  240. for (i = n; i < m; i++) temp[i] = 0.0;
  241. if (r == 0)
  242. {
  243. *nnzLj += m * n;
  244. return 0;
  245. }
  246. /* add dropped entries to the diagnal */
  247. if (milu != SILU)
  248. {
  249. register int j;
  250. double t;
  251. double omega;
  252. for (j = 0; j < n; j++)
  253. {
  254. t = lusup[xlusup_first + (m - 1) + j * m];
  255. if (t == zero) continue;
  256. if (t > zero)
  257. omega = SUPERLU_MIN(2.0 * (1.0 - alpha) / t, 1.0);
  258. else
  259. omega = SUPERLU_MAX(2.0 * (1.0 - alpha) / t, -1.0);
  260. t *= omega;
  261. switch (milu)
  262. {
  263. case SMILU_1:
  264. if (t != none) {
  265. lusup[xlusup_first + j * inc_diag] *= (one + t);
  266. }
  267. else
  268. {
  269. lusup[xlusup_first + j * inc_diag] *= *fill_tol;
  270. #ifdef DEBUG
  271. printf("[1] ZERO PIVOT: FILL col %d.\n", first + j);
  272. fflush(stdout);
  273. #endif
  274. nzp++;
  275. }
  276. break;
  277. case SMILU_2:
  278. lusup[xlusup_first + j * inc_diag] *= (1.0 + fabs(t));
  279. break;
  280. case SMILU_3:
  281. lusup[xlusup_first + j * inc_diag] *= (one + t);
  282. break;
  283. case SILU:
  284. default:
  285. break;
  286. }
  287. }
  288. if (nzp > 0) *fill_tol = -nzp;
  289. }
  290. /* Remove dropped entries from the memory and fix the pointers. */
  291. m1 = m - r;
  292. for (j = 1; j < n; j++)
  293. {
  294. register int tmp1, tmp2;
  295. tmp1 = xlusup_first + j * m1;
  296. tmp2 = xlusup_first + j * m;
  297. for (i = 0; i < m1; i++)
  298. lusup[i + tmp1] = lusup[i + tmp2];
  299. }
  300. for (i = 0; i < nzlc; i++)
  301. lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m];
  302. for (i = 0; i < nzlc; i++)
  303. lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i];
  304. for (i = first + 1; i <= last + 1; i++)
  305. {
  306. xlusup[i] -= r * (i - first);
  307. xlsub[i] -= r;
  308. }
  309. if (lastc)
  310. {
  311. xlusup[last + 2] -= r * n;
  312. xlsub[last + 2] -= r;
  313. }
  314. *nnzLj += (m - r) * n;
  315. return r;
  316. }