PageRenderTime 46ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/extlibs/taucs-2.2--my-fix/progs/direct.c

https://code.google.com/p/inla/
C | 756 lines | 547 code | 120 blank | 89 comment | 139 complexity | 916a851e0ef4a8bb3694cf63fea3b1a8 MD5 | raw file
  1. /*********************************************************/
  2. /* TAUCS */
  3. /* Author: Sivan Toledo */
  4. /*********************************************************/
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <string.h>
  8. #include <math.h>
  9. #include <assert.h>
  10. #ifndef WIN32
  11. #include <unistd.h>
  12. #include <pthread.h>
  13. #endif
  14. #include <taucs.h>
  15. /* extern functions omer*/
  16. extern int zscal_(int*, taucs_dcomplex*,
  17. double*, int*);
  18. extern int zaxpy_(int*,
  19. taucs_dcomplex*, double*, int*, double*,
  20. int*);
  21. /*********************************************************/
  22. /* */
  23. /*********************************************************/
  24. /* added ifndef omer*/
  25. #ifndef max
  26. #define max(x,y) ( ((x) > (y)) ? (x) : (y) )
  27. #endif
  28. int M,N; /* Size of matrix */
  29. /*********************************************************/
  30. /* */
  31. /*********************************************************/
  32. #if 0
  33. static double twonorm(int n, double* v)
  34. {
  35. /*
  36. double norm;
  37. int i;
  38. for (i=0, norm=0.0; i<n; i++) norm += v[i]*v[i];
  39. norm = sqrt(norm);
  40. return norm;
  41. */
  42. double ssq, scale, absvi;
  43. int i;
  44. if (n==1) return fabs(v[0]);
  45. scale = 0.0;
  46. ssq = 1.0;
  47. for (i=0; i<n; i++) {
  48. if ( v[i] != 0 ) {
  49. absvi = fabs(v[i]);
  50. if (scale < absvi) {
  51. ssq = 1.0 + ssq * (scale/absvi)*(scale/absvi);
  52. scale = absvi;
  53. } else
  54. ssq = ssq + (absvi/scale)*(absvi/scale);
  55. }
  56. }
  57. return scale * sqrt( ssq );
  58. }
  59. #endif
  60. /*********************************************************/
  61. /* */
  62. /*********************************************************/
  63. void usage(int argc, char* argv[])
  64. {
  65. printf("%s OPTIONS\n",argv[0]);
  66. printf(" -help or -h or no arguments at all\n");
  67. printf(" this help\n");
  68. printf(" MATRIX OPTIONS:\n");
  69. printf(" -hb Harwll-Boeing-filename (matrix from file)\n");
  70. printf(" -ijv ijvfilename (matrix from file)\n");
  71. printf(" -mtx mtxfilename (matrix from file)\n");
  72. printf(" -ccs ccsfilename (matrix from file)\n");
  73. printf(" -mesh3d X Y Z (X-by-Y-by-Z poisson problem)\n");
  74. printf(" -mesh2d n (n-by-n poisson problem)\n");
  75. printf(" -mat_type [anisotropic_y/anisotropic_x/dirichlet/neumann]\n");
  76. printf(" mat_type only applicable to -mesh2d\n");
  77. printf(" (3D meshes are always Neumann BC)\n");
  78. printf(" anisotropic problems are Neumann\n");
  79. printf(" RIGHT-HAND SIDE OPTIONS:\n");
  80. printf(" -n+rhs filename (order n followed by rhs, ascii)\n");
  81. printf(" FACTOR/SOLVE OPTIONS:\n");
  82. printf(" -ldlt\n");
  83. printf(" LDL^T factorization (as opposed to LL^T)\n");
  84. printf(" -snmf\n");
  85. printf(" supernodal-multifrontal\n");
  86. printf(" -snll\n");
  87. printf(" supernodal-left-looking\n");
  88. printf(" -symb\n");
  89. printf(" supernodal-multifrontal, separate symbolic & numeric phases\n");
  90. printf(" -ooc\n");
  91. printf(" out-of-core supernodal left-looking\n");
  92. printf(" OOC OPTIONS:\n");
  93. printf(" -matrixfile basename\n");
  94. printf(" base name for files; default is /tmp/taucs\n");
  95. printf(" -memory M\n");
  96. printf(" amount of memory in megabytes to use;\n");
  97. printf(" if not given, TAUCS tries to figure an optimal amount;\n");
  98. printf(" OTHER OPTIONS:\n");
  99. printf(" -log filename\n");
  100. printf(" write log into filename; can be stdout and stderr\n");
  101. exit(0);
  102. }
  103. /*********************************************************/
  104. /* */
  105. /*********************************************************/
  106. #if 1
  107. int main(int argc, char* argv[])
  108. {
  109. int actual_main(int argc, char* argv[]);
  110. return actual_main(argc,argv);
  111. }
  112. #else
  113. struct arg { int c; char** v; };
  114. void* start_routine(void* varg) {
  115. struct arg* cv = (struct arg*) varg;
  116. int actual_main(int argc, char* argv[]);
  117. fprintf(stderr,"***3\n");
  118. (void) actual_main(cv->c, cv->v);
  119. fprintf(stderr,"***4\n");
  120. return NULL;
  121. }
  122. int main(int argc, char* argv[])
  123. {
  124. pthread_t thread;
  125. pthread_attr_t attr;
  126. struct arg cv;
  127. void* ret_val;
  128. void* (*start_routine)(void *);
  129. cv.c = argc;
  130. cv.v = argv;
  131. pthread_attr_init(&attr);
  132. fprintf(stderr,"***1\n");
  133. pthread_create(&thread, NULL, start_routine, (void*) &cv);
  134. fprintf(stderr,"***2\n");
  135. /*pthread_join(thread, &ret_val);*/
  136. pthread_join(thread, NULL);
  137. return 0;
  138. }
  139. #endif
  140. /*********************************************************/
  141. /* */
  142. /*********************************************************/
  143. int actual_main(int argc, char* argv[])
  144. {
  145. double wtime_order;
  146. double wtime_permute;
  147. double wtime_factor;
  148. double wtime_solve;
  149. /*double wtime_precond_create; omer*/
  150. double ctime_factor;
  151. int i;/*,j omer*/
  152. double NormErr;
  153. taucs_ccs_matrix* A = NULL;
  154. taucs_ccs_matrix* PAPT = NULL;
  155. taucs_ccs_matrix* L = NULL;
  156. double* Xd = NULL;
  157. double* Bd = NULL;
  158. double* PXd = NULL;
  159. double* PBd = NULL;
  160. double* NXd = NULL;
  161. float* Xs = NULL;
  162. float* Bs = NULL;
  163. float* PXs = NULL;
  164. float* PBs = NULL;
  165. float* NXs = NULL;
  166. taucs_dcomplex* Xz = NULL;
  167. taucs_dcomplex* Bz = NULL;
  168. taucs_dcomplex* PXz = NULL;
  169. taucs_dcomplex* PBz = NULL;
  170. taucs_dcomplex* NXz = NULL;
  171. char* ordering = "metis";
  172. char* mat_type = "neumann";
  173. int* perm;
  174. int* invperm;
  175. int precision = TAUCS_DOUBLE;
  176. int ldlt_flag = 0;
  177. int snmf_flag = 0;
  178. int snll_flag = 0;
  179. int symb_flag = 0;
  180. int mesh2d_flag = 0,mesh2d_size = 0;
  181. int ooc_flag = 0;
  182. int panelize = 0;
  183. double memory_mb = -1.0;
  184. char* matrixfile = "/tmp/taucs.L";
  185. taucs_io_handle* oocL = NULL;
  186. int (*precond_fn)(void*,void* x,void* b);
  187. void* precond_args;
  188. /***********************************************************/
  189. /* Read arguments: log file, matrix, memory size, ooc name */
  190. /***********************************************************/
  191. if ((argc == 1) ||((argc == 2) && !strncmp(argv[1],"-h",2)))
  192. usage(argc,argv);
  193. A = NULL;
  194. for (i=1; i<argc; i++) {
  195. if (!strcmp(argv[i],"-single")) precision = TAUCS_SINGLE;
  196. if (!strcmp(argv[i],"-dcomplex")) precision = TAUCS_DCOMPLEX;
  197. if (!strcmp(argv[i],"-ldlt")) ldlt_flag = 1;
  198. if (!strcmp(argv[i],"-snmf")) snmf_flag = 1;
  199. if (!strcmp(argv[i],"-snll")) snll_flag = 1;
  200. if (!strcmp(argv[i],"-symb")) symb_flag = 1;
  201. if (!strcmp(argv[i],"-ooc")) ooc_flag = 1;
  202. if (!strcmp(argv[i],"-log") && i <= argc-1 ) {
  203. i++;
  204. taucs_logfile(argv[i]);
  205. }
  206. if (!strcmp(argv[i],"-panelize") && i <= argc-1) {
  207. i++;
  208. if (sscanf(argv[i],"%d",&panelize) != 1) {
  209. taucs_printf("0 (smart), 1 (in-core), or 2 (single supernode) follow -panelize argument\n");
  210. exit(1);
  211. }
  212. }
  213. if (!strcmp(argv[i],"-memory") && i <= argc-1) {
  214. i++;
  215. if (sscanf(argv[i],"%lf",&memory_mb) != 1) {
  216. taucs_printf("memory size in MB must follow -memory argument\n");
  217. exit(1);
  218. }
  219. }
  220. if (!strcmp(argv[i],"-matrixfile") && i <= argc-1 ) {
  221. i++;
  222. matrixfile = argv[i];
  223. }
  224. if (!strcmp(argv[i],"-ordering") && i <= argc-1 ) {
  225. i++;
  226. ordering = argv[i];
  227. }
  228. if (!strcmp(argv[i],"-mat_type") && i <= argc-1 ) {
  229. i++;
  230. mat_type = argv[i];
  231. }
  232. #if 0
  233. if (!strcmp(argv[i],"-hb") && i <= argc-1 ) {
  234. int nrows,ncols,nnz,j;
  235. char fname[256];
  236. char type[3];
  237. i++;
  238. for (j=0; j<256; j++) fname[j] = ' ';
  239. strcpy(fname,argv[i]);
  240. taucs_printf("main: reading HB matrix %s\n",argv[i]);
  241. ireadhb_(fname,type,&nrows,&ncols,&nnz);
  242. A = taucs_dccs_creagte(nrows,ncols,nnz);
  243. if (type[1] == 's' || type[1] == 'S')
  244. A->flags |= TAUCS_SYMMETRIC | TAUCS_LOWER;
  245. dreadhb_(fname,&nrows,&ncols,&nnz,
  246. A->colptr,A->rowind,A->values);
  247. /* make indices 0-based */
  248. for (j=0; j<=ncols; j++) ((A->colptr)[j])--;
  249. for (j=0; j<nnz; j++) ((A->rowind)[j])--;
  250. taucs_printf("main: done reading\n");
  251. }
  252. #endif
  253. if (!strcmp(argv[i],"-hb") && i <= argc-1) {
  254. i++;
  255. taucs_printf("main: reading hb matrix %s\n",argv[i]);
  256. switch (precision) {
  257. case TAUCS_SINGLE:
  258. A = taucs_ccs_read_hb (argv[i], TAUCS_SINGLE); break;
  259. case TAUCS_DOUBLE:
  260. A = taucs_ccs_read_hb (argv[i], TAUCS_DOUBLE); break;
  261. case TAUCS_DCOMPLEX:
  262. A = taucs_ccs_read_hb (argv[i], TAUCS_DCOMPLEX); break;
  263. default:
  264. taucs_printf("main: unknown precision\n");
  265. exit(1);
  266. }
  267. taucs_printf("main: done reading\n");
  268. }
  269. if (!strcmp(argv[i],"-mtx") && i <= argc-1) {
  270. i++;
  271. taucs_printf("main: reading mtx matrix %s\n",argv[i]);
  272. A = taucs_ccs_read_mtx (argv[i],TAUCS_SYMMETRIC | TAUCS_PATTERN);
  273. taucs_printf("main: done reading\n");
  274. }
  275. if (!strcmp(argv[i],"-ijv") && i <= argc-1) {
  276. printf(">>> ijv\n");
  277. i++;
  278. taucs_printf("main: reading ijv matrix %s\n",argv[i]);
  279. switch (precision) {
  280. case TAUCS_SINGLE:
  281. A = taucs_ccs_read_ijv (argv[i],TAUCS_SYMMETRIC | TAUCS_SINGLE); break;
  282. case TAUCS_DOUBLE:
  283. A = taucs_ccs_read_ijv (argv[i],TAUCS_SYMMETRIC | TAUCS_DOUBLE); break;
  284. case TAUCS_DCOMPLEX:
  285. A = taucs_ccs_read_ijv (argv[i],TAUCS_HERMITIAN | TAUCS_DCOMPLEX); break;
  286. default:
  287. taucs_printf("main: unknown precision\n");
  288. exit(1);
  289. }
  290. taucs_printf("main: done reading\n");
  291. }
  292. if (!strcmp(argv[i],"-ccs") && i <= argc-1) {
  293. i++;
  294. taucs_printf("main: reading ccs matrix %s\n",argv[i]);
  295. A = taucs_ccs_read_ccs (argv[i],TAUCS_SYMMETRIC);
  296. taucs_printf("main: done reading\n");
  297. }
  298. if (!strcmp(argv[i],"-mesh2d") && i <= argc-1) {
  299. mesh2d_flag = 1;
  300. taucs_printf("A is a mesh2d\n");
  301. i++;
  302. if (sscanf(argv[i],"%d",&mesh2d_size) != 1) {
  303. taucs_printf("mesh size (n, where the mesh is n-by-n) must follow -mesh2d argument\n");
  304. exit(1);
  305. }
  306. }
  307. if (!strcmp(argv[i],"-mesh3d") && i <= argc-3) {
  308. int X,Y,Z;
  309. taucs_printf("A is a mesh3d\n");
  310. if (sscanf(argv[i+1],"%d",&X) != 1
  311. || sscanf(argv[i+2],"%d",&Y) != 1
  312. || sscanf(argv[i+3],"%d",&Z) != 1) {
  313. taucs_printf("mesh size (X Y Z must follow -mesh3d argument\n");
  314. exit(1);
  315. }
  316. i += 3;
  317. taucs_printf("main: creating mesh\n");
  318. A = taucs_ccs_generate_mesh3d(X,Y,Z);
  319. }
  320. if (!strcmp(argv[i],"-n+rhs") && i <= argc-1) {
  321. FILE* f;
  322. int n,j,nnz;
  323. i++;
  324. taucs_printf("main: reading right-hand side %s\n",argv[i]);
  325. f=fopen(argv[i],"r");
  326. assert(f);
  327. fscanf(f,"%d",&n);
  328. Bd=(double*) malloc(n*sizeof(double));
  329. nnz = 0;
  330. for (j=0; j<n; j++) {
  331. fscanf(f,"%lg",(Bd)+j);
  332. if (Bd[j]) nnz++;
  333. }
  334. fclose(f);
  335. taucs_printf("main: done reading rhs, %d nonzeros\n",nnz);
  336. }
  337. }
  338. taucs_printf("Chosen Ordering: %s\n",ordering);
  339. if (mesh2d_flag)
  340. {
  341. taucs_printf("Matrix type is %s\n",mat_type);
  342. taucs_printf("Grid Size is %d\n",mesh2d_size);
  343. A = taucs_ccs_generate_mesh2d(mesh2d_size,mat_type);
  344. }
  345. if (!A) {
  346. taucs_printf("matrix argument not given or matrix file not found\n");
  347. usage(argc,argv);
  348. }
  349. N = M = A->n;
  350. /*taucs_maximize_stacksize();*/
  351. /***********************************************************/
  352. /* Create exact solution, compute right-hand-side */
  353. /***********************************************************/
  354. if (A->flags & TAUCS_SINGLE) {
  355. if (! (Xs)) {
  356. Xs = (float*)malloc(N*sizeof(float));
  357. /*for(i=0; i<N; i++) (Xs)[i]=(double)random()/RAND_MAX; omer*/
  358. for(i=0; i<N; i++) (Xs)[i]=(float)((double)rand()/RAND_MAX);
  359. } else
  360. taucs_printf("iter: not using a random X, already allocated\n");
  361. if (!(Bs)) {
  362. Bs = (float*)malloc(N*sizeof(float));
  363. taucs_ccs_times_vec(A,Xs,Bs);
  364. } else {
  365. /*double zero1 = 0.0;
  366. double nan = zero1 / zero1; omer*/
  367. double nan = taucs_get_nan();
  368. for(i=0; i<N; i++) Xs[i]= (float)nan;
  369. }
  370. NXs=(float*)malloc(N*sizeof(float));
  371. PXs=(float*)malloc(N*sizeof(float));
  372. PBs=(float*)malloc(N*sizeof(float));
  373. }
  374. if (A->flags & TAUCS_DOUBLE) {
  375. if (! (Xd)) {
  376. Xd =(double*)malloc(N*sizeof(double));
  377. /*for(i=0; i<N; i++) (Xd)[i]=(double)rand()/RAND_MAX; omer*/
  378. for(i=0; i<N; i++) (Xd)[i]=(float)((double)rand()/RAND_MAX);
  379. } else
  380. taucs_printf("iter: not using a random X, already allocated\n");
  381. if (!(Bd)) {
  382. Bd =(double*)malloc(N*sizeof(double));
  383. taucs_ccs_times_vec(A,Xd,Bd);
  384. } else {
  385. /*double zero1 = 0.0;
  386. double nan = zero1 / zero1; omer*/
  387. double nan = taucs_get_nan();
  388. for(i=0; i<N; i++) Xd[i]= (float)nan;
  389. }
  390. NXd=(double*)malloc(N*sizeof(double));
  391. PXd=(double*)malloc(N*sizeof(double));
  392. PBd=(double*)malloc(N*sizeof(double));
  393. }
  394. if (A->flags & TAUCS_DCOMPLEX) {
  395. if (!(Xz)) {
  396. double* p;
  397. taucs_printf("direct: creating a random dcomplex X\n");
  398. Xz =(taucs_dcomplex*)malloc(N*sizeof(taucs_dcomplex));
  399. p = (double*) Xz;
  400. for(i=0; i<2*N; i++) p[i] = (double)rand()/RAND_MAX;
  401. } else
  402. taucs_printf("iter: not using a random X, already allocated\n");
  403. if (!(Bz)) {
  404. Bz =(taucs_dcomplex*)malloc(N*sizeof(taucs_dcomplex));
  405. taucs_ccs_times_vec(A,Xz,Bz);
  406. } else {
  407. double* p;
  408. /*double zero1 = 0.0;
  409. double nan = zero1 / zero1; omer*/
  410. double nan = taucs_get_nan();
  411. p = (double*) Xz;
  412. for(i=0; i<2*N; i++) p[i] = nan;
  413. }
  414. NXz=(taucs_dcomplex*)malloc(N*sizeof(taucs_dcomplex));
  415. PXz=(taucs_dcomplex*)malloc(N*sizeof(taucs_dcomplex));
  416. PBz=(taucs_dcomplex*)malloc(N*sizeof(taucs_dcomplex));
  417. }
  418. /***********************************************************/
  419. /* Compute column ordering */
  420. /***********************************************************/
  421. /***********************************************************/
  422. /* factor */
  423. /***********************************************************/
  424. {
  425. int n;
  426. double unit;
  427. n = A->n;
  428. unit = (n-1.)+n;
  429. wtime_order = taucs_wtime();
  430. taucs_ccs_order(A,&perm,&invperm,ordering);
  431. wtime_order = taucs_wtime() - wtime_order;
  432. taucs_printf("\tOrdering time = % 10.3f seconds\n",wtime_order);
  433. if (!perm) {
  434. taucs_printf("\tOrdering Failed\n");
  435. exit(1);
  436. }
  437. if (0) {
  438. int i;
  439. FILE* f;
  440. f=fopen("p.ijv","w");
  441. for (i=0; i<n; i++) fprintf(f,"%d\n",perm[i]+1);
  442. fclose(f);
  443. }
  444. if (A->flags & TAUCS_SYMMETRIC || A->flags & TAUCS_HERMITIAN) {
  445. wtime_permute = taucs_wtime();
  446. PAPT = taucs_ccs_permute_symmetrically(A,perm,invperm);
  447. wtime_permute = taucs_wtime() - wtime_permute;
  448. taucs_printf("\tPermute time = % 10.3f seconds\n",wtime_permute);
  449. }
  450. wtime_factor = taucs_wtime();
  451. ctime_factor = taucs_ctime();
  452. if (ldlt_flag) {
  453. L = taucs_ccs_factor_ldlt(PAPT);
  454. precond_args = L;
  455. precond_fn = taucs_ccs_solve_ldlt;
  456. } else if (snmf_flag) {
  457. /*taucs_ccs_matrix* C;*/
  458. L = taucs_ccs_factor_llt_mf(PAPT);
  459. precond_args = L;
  460. precond_fn = taucs_supernodal_solve_llt;
  461. {
  462. taucs_ccs_matrix* C;
  463. C = taucs_supernodal_factor_to_ccs(L);
  464. /*taucs_ccs_write_ijv(PAPT,"PAPT.ijv");*/
  465. /*C->flags = TAUCS_DCOMPLEX | TAUCS_TRIANGULAR | TAUCS_LOWER;*/
  466. precond_args = C;
  467. precond_fn = taucs_ccs_solve_llt;
  468. /*taucs_ccs_write_ijv(C,"L.ijv");*/
  469. /*
  470. {
  471. int i;
  472. double* diag = taucs_supernodal_factor_get_diag(L);
  473. for (i=0; i<C->n; i++) {
  474. printf("%.2le\n",diag[i]);
  475. }
  476. }
  477. */
  478. }
  479. } else if (ooc_flag) {
  480. if (A->flags & TAUCS_SYMMETRIC || A->flags & TAUCS_HERMITIAN) {
  481. #define TESTING
  482. #ifdef TESTING
  483. int taucs_ooc_factor_llt_panelchoice(taucs_ccs_matrix* A,
  484. taucs_io_handle* handle,
  485. double memory,
  486. int panelization_method);
  487. /*int c; omer*/
  488. oocL = taucs_io_create_multifile(matrixfile);
  489. assert(oocL);
  490. if (memory_mb == -1.0) memory_mb = taucs_available_memory_size()/1048576.0;
  491. taucs_ooc_factor_llt_panelchoice(PAPT, oocL, memory_mb*1048576.0,panelize);
  492. precond_args = oocL;
  493. precond_fn = taucs_ooc_solve_llt;
  494. #else
  495. /*int c;*/
  496. oocL = taucs_io_create_multifile(matrixfile);
  497. assert(oocL);
  498. if (memory_mb == -1.0) memory_mb = taucs_available_memory_size()/1048576.0;
  499. taucs_ooc_factor_llt(PAPT, oocL, memory_mb*1048576.0);
  500. precond_args = oocL;
  501. precond_fn = taucs_ooc_solve_llt;
  502. #endif
  503. } else {
  504. if (memory_mb == -1.0) memory_mb = taucs_available_memory_size()/1048576.0;
  505. oocL = taucs_io_create_multifile(matrixfile);
  506. taucs_ooc_factor_lu(A, perm, oocL, memory_mb*1048576.0);
  507. precond_args = matrixfile;
  508. precond_fn = NULL;
  509. }
  510. } else if (snll_flag) {
  511. L = taucs_ccs_factor_llt_ll(PAPT);
  512. precond_args = L;
  513. precond_fn = taucs_supernodal_solve_llt;
  514. } else if (symb_flag) {
  515. L = taucs_ccs_factor_llt_symbolic(PAPT);
  516. taucs_ccs_factor_llt_numeric(PAPT,L); /* should check error code */
  517. precond_args = L;
  518. precond_fn = taucs_supernodal_solve_llt;
  519. } else {
  520. L = taucs_ccs_factor_llt(PAPT,0.0,0);
  521. precond_args = L;
  522. precond_fn = taucs_ccs_solve_llt;
  523. }
  524. wtime_factor = taucs_wtime() - wtime_factor;
  525. ctime_factor = taucs_ctime() - ctime_factor;
  526. taucs_printf("\tFactor time = % 10.3f seconds ",wtime_factor);
  527. taucs_printf("(%.3f cpu time)\n",ctime_factor);
  528. }
  529. if (!L && !ooc_flag /* no L in ooc */) {
  530. taucs_printf("\tFactorization Failed\n");
  531. exit(1);
  532. }
  533. /*taucs_ccs_write_ijv(PAPT,"A.ijv",1);*/ /* 1 = complete the upper part */
  534. /*taucs_ccs_write_ijv(L,"L.ijv",0);*/
  535. /***********************************************************/
  536. /* solve */
  537. /***********************************************************/
  538. if (!L) {
  539. taucs_printf("FACTORIZATION FAILED!\n");
  540. exit(1);
  541. }
  542. if (A->flags & TAUCS_SYMMETRIC || A->flags & TAUCS_HERMITIAN) {
  543. if (A->flags & TAUCS_DOUBLE)
  544. taucs_vec_permute(A->n,A->flags,Bd,PBd,perm);
  545. if (A->flags & TAUCS_SINGLE)
  546. taucs_vec_permute(A->n,A->flags,Bs,PBs,perm);
  547. if (A->flags & TAUCS_DCOMPLEX)
  548. taucs_vec_permute(A->n,A->flags,Bz,PBz,perm);
  549. wtime_solve = taucs_wtime();
  550. if (A->flags & TAUCS_DOUBLE)
  551. precond_fn(precond_args,PXd,PBd); /* direct solver */
  552. if (A->flags & TAUCS_SINGLE)
  553. precond_fn(precond_args,PXs,PBs); /* direct solver */
  554. if (A->flags & TAUCS_DCOMPLEX)
  555. precond_fn(precond_args,PXz,PBz); /* direct solver */
  556. #ifdef TAUCS_CONFIG_SINGLE
  557. if (A->flags & TAUCS_SINGLE) {
  558. taucs_sccs_times_vec_dacc(PAPT,PXs,NXs);
  559. for(i=0; i<(A->n); i++) NXs[i] -= PBs[i];
  560. precond_fn(precond_args,PBs,NXs); /* direct solver */
  561. for(i=0; i<(A->n); i++) PXs[i] -= PBs[i];
  562. }
  563. #endif
  564. wtime_solve = taucs_wtime() - wtime_solve;
  565. taucs_printf("\tSolve time = % 10.3f seconds\n",wtime_solve);
  566. if (A->flags & TAUCS_DOUBLE)
  567. taucs_vec_ipermute(A->n,A->flags,PXd,NXd,perm);
  568. if (A->flags & TAUCS_SINGLE)
  569. taucs_vec_ipermute(A->n,A->flags,PXs,NXs,perm);
  570. if (A->flags & TAUCS_DCOMPLEX)
  571. taucs_vec_ipermute(A->n,A->flags,PXz,NXz,perm);
  572. } else {
  573. taucs_ooc_solve_lu(oocL, NXd, Bd);
  574. }
  575. /***********************************************************/
  576. /* delete out-of-core matrices */
  577. /***********************************************************/
  578. if (ooc_flag) {
  579. taucs_io_delete(oocL);
  580. /*taucs_io_close(oocL);*/
  581. }
  582. /***********************************************************/
  583. /* Compute norm of forward error */
  584. /***********************************************************/
  585. if (A->flags & TAUCS_SINGLE) {
  586. float snrm2_();
  587. int one = 1;
  588. NormErr = 0.0;
  589. for(i=0; i<N; i++) NormErr = max(NormErr,fabs((NXs[i]-Xs[i])/Xs[i]));
  590. for(i=0; i<N; i++) PXs[i] = NXs[i]-Xs[i];
  591. taucs_printf("main: max relative error = %1.6e, 2-norm relative error %.2e \n",
  592. NormErr,
  593. snrm2_(&(A->n),PXs,&one)/snrm2_(&(A->n),Xs,&one));
  594. }
  595. if (A->flags & TAUCS_DOUBLE) {
  596. double dnrm2_();
  597. int one = 1;
  598. NormErr = 0.0;
  599. for(i=0; i<N; i++) NormErr = max(NormErr,fabs((NXd[i]-Xd[i])/Xd[i]));
  600. for(i=0; i<N; i++) PXd[i] = NXd[i]-Xd[i];
  601. taucs_printf("main: max relative error = %1.6e, 2-norm relative error %.2e \n",
  602. NormErr,
  603. dnrm2_(&(A->n),PXd,&one)/dnrm2_(&(A->n),Xd,&one));
  604. }
  605. #ifdef TAUCS_CONFIG_DCOMPLEX
  606. if (A->flags & TAUCS_DCOMPLEX) {
  607. double dznrm2_();
  608. int one = 1;
  609. double* pX = (double*) Xz;
  610. double* pNX = (double*) NXz;
  611. double* pPX = (double*) PXz;
  612. taucs_dcomplex zzero = taucs_zzero_const;
  613. taucs_dcomplex zone = taucs_zone_const;
  614. taucs_dcomplex zmone = taucs_zneg(taucs_zone_const);
  615. NormErr = 0.0;
  616. /*
  617. for(i=0; i<N; i++) NormErr = max(NormErr,fabs((NXd[i]-Xd[i])/Xd[i]));
  618. */
  619. /*for(i=0; i<N; i++) PXd[i] = NXd[i]-Xd[i];*/
  620. /*for(i=0; i<N; i++) PXz[i] = taucs_add(NXz[i],taucs_neg(Xz[i]));*/
  621. zscal_(&(A->n),&zzero,pPX,&one);
  622. zaxpy_(&(A->n),&zone ,pNX,&one,pPX,&one);
  623. zaxpy_(&(A->n),&zmone,pX ,&one,pPX,&one);
  624. taucs_printf("main: max relative error = %1.6e, 2-norm relative error %.2e \n",
  625. NormErr,
  626. dznrm2_(&(A->n),PXz,&one)/dznrm2_(&(A->n),Xz,&one));
  627. }
  628. #endif
  629. /***********************************************************/
  630. /* Exit */
  631. /***********************************************************/
  632. taucs_printf("main: done\n");
  633. return 0;
  634. }