PageRenderTime 25ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/tools/cscalapack/pdpotrf.c

https://bitbucket.org/bosilca/dague.public
C | 254 lines | 215 code | 27 blank | 12 comment | 36 complexity | 966006e95d5e7a47f884c8294f0c5e13 MD5 | raw file
  1. /*
  2. * Copyright (c) 2009-2013 The University of Tennessee and The University
  3. * of Tennessee Research Foundation. All rights
  4. * reserved.
  5. * Copyright (c) 2010 University of Denver, Colorado.
  6. */
  7. #include <stdlib.h>
  8. #include <stdio.h>
  9. #include <string.h>
  10. #include <assert.h>
  11. #include <mpi.h>
  12. #include <math.h>
  13. #include "myscalapack.h"
  14. #include "../../dplasma/testing/flops.h"
  15. #ifndef max
  16. #define max(_a, _b) ( (_a) < (_b) ? (_b) : (_a) )
  17. #define min(_a, _b) ( (_a) > (_b) ? (_b) : (_a) )
  18. #endif
  19. static int i0=0, i1=1;
  20. static double m1=-1e0, p0=0e0, p1=1e0;
  21. typedef enum {
  22. PARAM_BLACS_CTX,
  23. PARAM_RANK,
  24. PARAM_M,
  25. PARAM_N,
  26. PARAM_NB,
  27. PARAM_SEED,
  28. PARAM_VALIDATE,
  29. PARAM_NRHS
  30. } params_enum_t;
  31. static void setup_params( int params[], int argc, char* argv[] );
  32. static void random_matrix( double* M, int descM[], int seed, double diagbump );
  33. static double check_solution( int params[], double *Allt );
  34. int main( int argc, char **argv ) {
  35. int params[8];
  36. int info;
  37. int ictxt, nprow, npcol, myrow, mycol, iam;
  38. int m, n, nb, s, mloc, nloc;
  39. double *A=NULL; int descA[9];
  40. double resid;
  41. double telapsed, gflops, pgflops;
  42. setup_params( params, argc, argv );
  43. ictxt = params[PARAM_BLACS_CTX];
  44. iam = params[PARAM_RANK];
  45. m = params[PARAM_M];
  46. n = params[PARAM_N];
  47. nb = params[PARAM_NB];
  48. s = params[PARAM_NRHS];
  49. Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
  50. mloc = numroc_( &m, &nb, &myrow, &i0, &nprow );
  51. nloc = numroc_( &n, &nb, &mycol, &i0, &npcol );
  52. descinit_( descA, &m, &n, &nb, &nb, &i0, &i0, &ictxt, &mloc, &info );
  53. assert( 0 == info );
  54. A = malloc( sizeof(double)*mloc*nloc );
  55. random_matrix( A, descA, iam*n*max(m,s), n );
  56. { double t1, t2;
  57. t1 = MPI_Wtime();
  58. pdpotrf_( "L", &n, A, &i1, &i1, descA, &info );
  59. assert( 0 == info );
  60. t2 = MPI_Wtime();
  61. telapsed = t2-t1;
  62. }
  63. resid = check_solution( params, A );
  64. if( 0 != iam )
  65. MPI_Reduce( &telapsed, NULL, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD );
  66. else {
  67. MPI_Reduce( MPI_IN_PLACE, &telapsed, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD );
  68. gflops = FLOPS_DPOTRF((double)n)/1e+9/telapsed;
  69. pgflops = gflops/(((double)nprow)*((double)npcol));
  70. printf( "### PDPOTRF ###\n"
  71. "#%4sx%-4s %7s %7s %4s %4s # %10s %10s %10s %11s\n", "P", "Q", "M", "N", "NB", "NRHS", "resid", "time(s)", "gflops", "gflops/PxQ" );
  72. printf( " %4d %-4d %7d %7d %4d %4d %10.3e %10.3g %10.3g %11.3g\n", nprow, npcol, m, n, nb, s, resid, telapsed, gflops, pgflops );
  73. }
  74. free( A ); A = NULL;
  75. Cblacs_exit( 0 );
  76. return 0;
  77. }
  78. static double check_solution( int params[], double* Allt ) {
  79. double resid = NAN;
  80. if( params[PARAM_VALIDATE] ) {
  81. int info;
  82. int ictxt = params[PARAM_BLACS_CTX],
  83. iam = params[PARAM_RANK];
  84. int m = params[PARAM_M],
  85. n = params[PARAM_N],
  86. nb = params[PARAM_NB],
  87. s = params[PARAM_NRHS];
  88. int nprow, npcol, myrow, mycol;
  89. int mloc, nloc, sloc;
  90. double *A=NULL; int descA[9];
  91. double *B=NULL; int descB[9];
  92. double *X=NULL;
  93. double eps, Anorm, Bnorm, Xnorm, Rnorm;
  94. Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
  95. mloc = numroc_( &m, &nb, &myrow, &i0, &nprow );
  96. nloc = numroc_( &n, &nb, &mycol, &i0, &npcol );
  97. sloc = numroc_( &s, &nb, &mycol, &i0, &npcol );
  98. /* recreate A */
  99. descinit_( descA, &m, &n, &nb, &nb, &i0, &i0, &ictxt, &mloc, &info );
  100. assert( 0 == info );
  101. A = malloc( sizeof(double)*mloc*nloc );
  102. random_matrix( A, descA, iam*n*max(m,s), n );
  103. /* create B and copy it to X */
  104. descinit_( descB, &n, &s, &nb, &nb, &i0, &i0, &ictxt, &mloc, &info );
  105. assert( 0 == info );
  106. B = malloc( sizeof(double)*mloc*sloc );
  107. X = malloc( sizeof(double)*mloc*sloc );
  108. random_matrix( B, descB, -1, 0e0 );
  109. pdlacpy_( "All", &n, &s, B, &i1, &i1, descB, X, &i1, &i1, descB );
  110. { double ldw = nb*ceil(ceil(mloc/(double)nb)/(ilcm_(&nprow, &npcol)/nprow));
  111. double *work = malloc( sizeof(double)*(2*nloc + mloc + ldw) );
  112. Anorm = pdlansy_( "I", "L", &n, A, &i1, &i1, descA, work );
  113. Bnorm = pdlange_( "I", &n, &s, B, &i1, &i1, descB, work );
  114. free( work );
  115. }
  116. /* Compute X from Allt */
  117. pdpotrs_( "L", &n, &s, Allt, &i1, &i1, descA, X, &i1, &i1, descB, &info );
  118. assert( 0 == info );
  119. /* Compute B-AX */
  120. pdsymm_( "L", "L", &n, &s, &m1, A, &i1, &i1, descA, X, &i1, &i1, descB,
  121. &p1, B, &i1, &i1, descB);
  122. { double *work = malloc( sizeof(double)*mloc );
  123. Xnorm = pdlange_( "I", &n, &s, X, &i1, &i1, descB, work );
  124. Rnorm = pdlange_( "I", &n, &s, B, &i1, &i1, descB, work );
  125. free( work );
  126. }
  127. eps = pdlamch_( &ictxt, "Epsilon" );
  128. resid = Rnorm / ( (Bnorm + Anorm * Xnorm) * fmax(m, n) * eps );
  129. free( A ); free( B ); free( X );
  130. }
  131. return resid;
  132. }
  133. /* not that smart..., if only pdmatgen was available */
  134. static void random_matrix( double* M, int descM[], int seed, double diagbump ) {
  135. int m = descM[2],
  136. n = descM[3],
  137. nb = descM[5];
  138. pdlaset_( "All", &m, &n, &p0, &diagbump, M, &i1, &i1, descM );
  139. { int ictxt = descM[1];
  140. int nprow, npcol, myrow, mycol;
  141. int mloc, nloc;
  142. int i, j, k = 0;
  143. Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
  144. mloc = numroc_( &m, &nb, &myrow, &i0, &nprow );
  145. nloc = numroc_( &n, &nb, &mycol, &i0, &npcol );
  146. if( seed >= 0 ) srand( seed );
  147. for( i = 0; i < mloc; i++ ) {
  148. for( j = 0; j < nloc; j++ ) {
  149. M[k] += ((double)rand()) / ((double)RAND_MAX) - 0.5;
  150. k++;
  151. }
  152. }
  153. }
  154. }
  155. static void setup_params( int params[], int argc, char* argv[] ) {
  156. int i;
  157. int ictxt, iam, nprocs, p, q;
  158. MPI_Init( &argc, &argv );
  159. Cblacs_pinfo( &iam, &nprocs );
  160. Cblacs_get( -1, 0, &ictxt );
  161. p = 1;
  162. q = 1;
  163. params[PARAM_M] = 0;
  164. params[PARAM_N] = 1000;
  165. params[PARAM_NB] = 64;
  166. params[PARAM_SEED] = 0;
  167. params[PARAM_VALIDATE] = 1;
  168. params[PARAM_NRHS] = 1;
  169. for( i = 1; i < argc; i++ ) {
  170. if( strcmp( argv[i], "-p" ) == 0 ) {
  171. p = atoi(argv[i+1]);
  172. i++;
  173. continue;
  174. }
  175. if( strcmp( argv[i], "-q" ) == 0 ) {
  176. q = atoi(argv[i+1]);
  177. i++;
  178. continue;
  179. }
  180. if( strcmp( argv[i], "-n" ) == 0 ) {
  181. params[PARAM_N] = atoi(argv[i+1]);
  182. i++;
  183. continue;
  184. }
  185. if( strcmp( argv[i], "-b" ) == 0 ) {
  186. params[PARAM_NB] = atoi(argv[i+1]);
  187. i++;
  188. continue;
  189. }
  190. if( strcmp( argv[i], "-x" ) == 0 ) {
  191. params[PARAM_VALIDATE] = 0;
  192. continue;
  193. }
  194. if( strcmp( argv[i], "-s" ) == 0 ) {
  195. params[PARAM_NRHS] = atoi(argv[i+1]);
  196. i++;
  197. continue;
  198. }
  199. fprintf( stderr, "### USAGE: %s [-p NUM][-q NUM][-n NUM][-b NUM][-x][-s NUM]\n"
  200. "# -p: number of rows in the PxQ process grid\n"
  201. "# -q: number of columns in the PxQ process grid\n"
  202. "# -n: dimension of the matrix (NxN)\n"
  203. "# -b: block size (NB)\n"
  204. "# -s: number of right hand sides for backward error computation (NRHS)\n"
  205. "# -x: disable verification\n", argv[0] );
  206. Cblacs_abort( ictxt, i );
  207. }
  208. /* Validity checks etc. */
  209. if( params[PARAM_NB] > params[PARAM_N] )
  210. params[PARAM_NB] = params[PARAM_N];
  211. if( 0 == params[PARAM_M] )
  212. params[PARAM_M] = params[PARAM_N];
  213. if( p*q > nprocs ) {
  214. if( 0 == iam )
  215. fprintf( stderr, "### ERROR: we do not have enough processes available to make a p-by-q process grid ###\n"
  216. "### Bye-bye ###\n" );
  217. Cblacs_abort( ictxt, 1 );
  218. }
  219. if( params[PARAM_VALIDATE] && (params[PARAM_M] != params[PARAM_N]) ) {
  220. if( 0 == iam )
  221. fprintf( stderr, "### WARNING: Unable to validate on a non-square matrix. Canceling validation.\n" );
  222. params[PARAM_VALIDATE] = 0;
  223. }
  224. Cblacs_gridinit( &ictxt, "Row", p, q );
  225. params[PARAM_BLACS_CTX] = ictxt;
  226. params[PARAM_RANK] = iam;
  227. }