PageRenderTime 61ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/tests/staggered_invert_test.cpp

https://github.com/jinhou/quda
C++ | 592 lines | 457 code | 116 blank | 19 comment | 58 complexity | 4c9a5e0fcf0d005499572b332fc8f88a MD5 | raw file
Possible License(s): BSD-3-Clause
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <time.h>
  4. #include <math.h>
  5. #include <test_util.h>
  6. #include <dslash_util.h>
  7. #include <blas_reference.h>
  8. #include <staggered_dslash_reference.h>
  9. #include <quda.h>
  10. #include <string.h>
  11. #include <face_quda.h>
  12. #include "misc.h"
  13. #include <gauge_field.h>
  14. #include <blas_quda.h>
  15. #if defined(QMP_COMMS)
  16. #include <qmp.h>
  17. #elif defined(MPI_COMMS)
  18. #include <mpi.h>
  19. #endif
  20. #ifdef MULTI_GPU
  21. #include <face_quda.h>
  22. #endif
  23. #define MAX(a,b) ((a)>(b)?(a):(b))
  24. #define mySpinorSiteSize 6
  25. extern void usage(char** argv);
  26. void *qdp_fatlink[4];
  27. void *qdp_longlink[4];
  28. void *fatlink;
  29. void *longlink;
  30. #ifdef MULTI_GPU
  31. void** ghost_fatlink, **ghost_longlink;
  32. #endif
  33. extern int device;
  34. extern bool tune;
  35. extern QudaReconstructType link_recon;
  36. extern QudaPrecision prec;
  37. QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
  38. extern QudaReconstructType link_recon_sloppy;
  39. extern QudaPrecision prec_sloppy;
  40. cpuColorSpinorField* in;
  41. cpuColorSpinorField* out;
  42. cpuColorSpinorField* ref;
  43. cpuColorSpinorField* tmp;
  44. cpuGaugeField *cpuFat = NULL;
  45. cpuGaugeField *cpuLong = NULL;
  46. static double tol = 1e-7;
  47. extern int test_type;
  48. extern int xdim;
  49. extern int ydim;
  50. extern int zdim;
  51. extern int tdim;
  52. extern int gridsize_from_cmdline[];
  53. static void end();
  54. template<typename Float>
  55. void constructSpinorField(Float *res) {
  56. for(int i = 0; i < Vh; i++) {
  57. for (int s = 0; s < 1; s++) {
  58. for (int m = 0; m < 3; m++) {
  59. res[i*(1*3*2) + s*(3*2) + m*(2) + 0] = rand() / (Float)RAND_MAX;
  60. res[i*(1*3*2) + s*(3*2) + m*(2) + 1] = rand() / (Float)RAND_MAX;
  61. }
  62. }
  63. }
  64. }
  65. static void
  66. set_params(QudaGaugeParam* gaugeParam, QudaInvertParam* inv_param,
  67. int X1, int X2, int X3, int X4,
  68. QudaPrecision cpu_prec, QudaPrecision prec, QudaPrecision prec_sloppy,
  69. QudaReconstructType link_recon, QudaReconstructType link_recon_sloppy,
  70. double mass, double tol, int maxiter, double reliable_delta,
  71. double tadpole_coeff
  72. )
  73. {
  74. gaugeParam->X[0] = X1;
  75. gaugeParam->X[1] = X2;
  76. gaugeParam->X[2] = X3;
  77. gaugeParam->X[3] = X4;
  78. gaugeParam->cpu_prec = cpu_prec;
  79. gaugeParam->cuda_prec = prec;
  80. gaugeParam->reconstruct = link_recon;
  81. gaugeParam->cuda_prec_sloppy = prec_sloppy;
  82. gaugeParam->reconstruct_sloppy = link_recon_sloppy;
  83. gaugeParam->gauge_fix = QUDA_GAUGE_FIXED_NO;
  84. gaugeParam->anisotropy = 1.0;
  85. gaugeParam->tadpole_coeff = tadpole_coeff;
  86. gaugeParam->scale = -1.0/(24.0*tadpole_coeff*tadpole_coeff);
  87. gaugeParam->t_boundary = QUDA_ANTI_PERIODIC_T;
  88. gaugeParam->gauge_order = QUDA_MILC_GAUGE_ORDER;
  89. gaugeParam->ga_pad = X1*X2*X3/2;
  90. inv_param->verbosity = QUDA_VERBOSE;
  91. inv_param->mass = mass;
  92. // outer solver parameters
  93. inv_param->inv_type = QUDA_CG_INVERTER;
  94. inv_param->tol = tol;
  95. inv_param->maxiter = 500000;
  96. inv_param->reliable_delta = 1e-1;
  97. #if __COMPUTE_CAPABILITY__ >= 200
  98. // require both L2 relative and heavy quark residual to determine convergence
  99. inv_param->residual_type = static_cast<QudaResidualType>(QUDA_L2_RELATIVE_RESIDUAL | QUDA_HEAVY_QUARK_RESIDUAL);
  100. inv_param->tol_hq = 1e-3; // specify a tolerance for the residual for heavy quark residual
  101. #else
  102. // Pre Fermi architecture only supports L2 relative residual norm
  103. inv_param->residual_type = QUDA_L2_RELATIVE_RESIDUAL;
  104. #endif
  105. //inv_param->inv_type = QUDA_GCR_INVERTER;
  106. //inv_param->gcrNkrylov = 10;
  107. // domain decomposition preconditioner parameters
  108. //inv_param->inv_type_precondition = QUDA_MR_INVERTER;
  109. //inv_param->tol_precondition = 1e-1;
  110. //inv_param->maxiter_precondition = 100;
  111. //inv_param->verbosity_precondition = QUDA_SILENT;
  112. //inv_param->prec_precondition = prec_sloppy;
  113. inv_param->solution_type = QUDA_MATPCDAG_MATPC_SOLUTION;
  114. inv_param->solve_type = QUDA_NORMOP_PC_SOLVE;
  115. inv_param->matpc_type = QUDA_MATPC_EVEN_EVEN;
  116. inv_param->dagger = QUDA_DAG_NO;
  117. inv_param->mass_normalization = QUDA_MASS_NORMALIZATION;
  118. inv_param->cpu_prec = cpu_prec;
  119. inv_param->cuda_prec = prec;
  120. inv_param->cuda_prec_sloppy = prec_sloppy;
  121. inv_param->preserve_source = QUDA_PRESERVE_SOURCE_YES;
  122. inv_param->gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // this is meaningless, but must be thus set
  123. inv_param->dirac_order = QUDA_DIRAC_ORDER;
  124. inv_param->dslash_type = QUDA_ASQTAD_DSLASH;
  125. inv_param->tune = tune ? QUDA_TUNE_YES : QUDA_TUNE_NO;
  126. inv_param->sp_pad = X1*X2*X3/2;
  127. inv_param->use_init_guess = QUDA_USE_INIT_GUESS_YES;
  128. inv_param->input_location = QUDA_CPU_FIELD_LOCATION;
  129. inv_param->output_location = QUDA_CPU_FIELD_LOCATION;
  130. }
  131. int
  132. invert_test(void)
  133. {
  134. QudaGaugeParam gaugeParam = newQudaGaugeParam();
  135. QudaInvertParam inv_param = newQudaInvertParam();
  136. double mass = 0.5;
  137. set_params(&gaugeParam, &inv_param,
  138. xdim, ydim, zdim, tdim,
  139. cpu_prec, prec, prec_sloppy,
  140. link_recon, link_recon_sloppy, mass, tol, 500, 1e-3,
  141. 0.8);
  142. // declare the dimensions of the communication grid
  143. initCommsGridQuda(4, gridsize_from_cmdline, NULL, NULL);
  144. // this must be before the FaceBuffer is created (this is because it allocates pinned memory - FIXME)
  145. initQuda(device);
  146. setDims(gaugeParam.X);
  147. setSpinorSiteSize(6);
  148. size_t gSize = (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
  149. for (int dir = 0; dir < 4; dir++) {
  150. qdp_fatlink[dir] = malloc(V*gaugeSiteSize*gSize);
  151. qdp_longlink[dir] = malloc(V*gaugeSiteSize*gSize);
  152. }
  153. fatlink = malloc(4*V*gaugeSiteSize*gSize);
  154. longlink = malloc(4*V*gaugeSiteSize*gSize);
  155. construct_fat_long_gauge_field(qdp_fatlink, qdp_longlink, 1, gaugeParam.cpu_prec, &gaugeParam);
  156. const double cos_pi_3 = 0.5; // Cos(pi/3)
  157. const double sin_pi_3 = sqrt(0.75); // Sin(pi/3)
  158. for(int dir=0; dir<4; ++dir){
  159. for(int i=0; i<V; ++i){
  160. for(int j=0; j<gaugeSiteSize; ++j){
  161. if(gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION){
  162. ((double*)qdp_fatlink[dir])[i*gaugeSiteSize + j] = 0.5*rand()/RAND_MAX;
  163. if(link_recon != QUDA_RECONSTRUCT_8 && link_recon != QUDA_RECONSTRUCT_12){ // incorporate non-trivial phase into long links
  164. if(j%2 == 0){
  165. const double real = ((double*)qdp_longlink[dir])[i*gaugeSiteSize + j];
  166. const double imag = ((double*)qdp_longlink[dir])[i*gaugeSiteSize + j + 1];
  167. ((double*)qdp_longlink[dir])[i*gaugeSiteSize + j] = real*cos_pi_3 - imag*sin_pi_3;
  168. ((double*)qdp_longlink[dir])[i*gaugeSiteSize + j + 1] = real*sin_pi_3 + imag*cos_pi_3;
  169. }
  170. }
  171. ((double*)fatlink)[(i*4 + dir)*gaugeSiteSize + j] = ((double*)qdp_fatlink[dir])[i*gaugeSiteSize + j];
  172. ((double*)longlink)[(i*4 + dir)*gaugeSiteSize + j] = ((double*)qdp_longlink[dir])[i*gaugeSiteSize + j];
  173. }else{
  174. ((float*)qdp_fatlink[dir])[i] = 0.5*rand()/RAND_MAX;
  175. if(link_recon != QUDA_RECONSTRUCT_8 && link_recon != QUDA_RECONSTRUCT_12){ // incorporate non-trivial phase into long links
  176. if(j%2 == 0){
  177. const float real = ((float*)qdp_longlink[dir])[i*gaugeSiteSize + j];
  178. const float imag = ((float*)qdp_longlink[dir])[i*gaugeSiteSize + j + 1];
  179. ((float*)qdp_longlink[dir])[i*gaugeSiteSize + j] = real*cos_pi_3 - imag*sin_pi_3;
  180. ((float*)qdp_longlink[dir])[i*gaugeSiteSize + j + 1] = real*sin_pi_3 + imag*cos_pi_3;
  181. }
  182. }
  183. ((double*)fatlink)[(i*4 + dir)*gaugeSiteSize + j] = ((double*)qdp_fatlink[dir])[i*gaugeSiteSize + j];
  184. ((float*)fatlink)[(i*4 + dir)*gaugeSiteSize + j] = ((float*)qdp_fatlink[dir])[i*gaugeSiteSize + j];
  185. ((float*)longlink)[(i*4 + dir)*gaugeSiteSize + j] = ((float*)qdp_longlink[dir])[i*gaugeSiteSize + j];
  186. }
  187. }
  188. }
  189. }
  190. ColorSpinorParam csParam;
  191. csParam.nColor=3;
  192. csParam.nSpin=1;
  193. csParam.nDim=4;
  194. for(int d = 0; d < 4; d++) {
  195. csParam.x[d] = gaugeParam.X[d];
  196. }
  197. csParam.x[0] /= 2;
  198. csParam.precision = inv_param.cpu_prec;
  199. csParam.pad = 0;
  200. csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
  201. csParam.siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
  202. csParam.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
  203. csParam.gammaBasis = inv_param.gamma_basis;
  204. csParam.create = QUDA_ZERO_FIELD_CREATE;
  205. in = new cpuColorSpinorField(csParam);
  206. out = new cpuColorSpinorField(csParam);
  207. ref = new cpuColorSpinorField(csParam);
  208. tmp = new cpuColorSpinorField(csParam);
  209. if (inv_param.cpu_prec == QUDA_SINGLE_PRECISION){
  210. constructSpinorField((float*)in->V());
  211. }else{
  212. constructSpinorField((double*)in->V());
  213. }
  214. #ifdef MULTI_GPU
  215. int tmp_value = MAX(ydim*zdim*tdim/2, xdim*zdim*tdim/2);
  216. tmp_value = MAX(tmp_value, xdim*ydim*tdim/2);
  217. tmp_value = MAX(tmp_value, xdim*ydim*zdim/2);
  218. int fat_pad = tmp_value;
  219. int link_pad = 3*tmp_value;
  220. gaugeParam.type = QUDA_ASQTAD_FAT_LINKS;
  221. gaugeParam.reconstruct = QUDA_RECONSTRUCT_NO;
  222. GaugeFieldParam cpuFatParam(fatlink, gaugeParam);
  223. cpuFat = new cpuGaugeField(cpuFatParam);
  224. ghost_fatlink = (void**)cpuFat->Ghost();
  225. gaugeParam.type = QUDA_ASQTAD_LONG_LINKS;
  226. GaugeFieldParam cpuLongParam(longlink, gaugeParam);
  227. cpuLong = new cpuGaugeField(cpuLongParam);
  228. ghost_longlink = (void**)cpuLong->Ghost();
  229. gaugeParam.type = QUDA_ASQTAD_FAT_LINKS;
  230. gaugeParam.ga_pad = fat_pad;
  231. gaugeParam.reconstruct= gaugeParam.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
  232. loadGaugeQuda(fatlink, &gaugeParam);
  233. gaugeParam.type = QUDA_ASQTAD_LONG_LINKS;
  234. gaugeParam.ga_pad = link_pad;
  235. gaugeParam.reconstruct= link_recon;
  236. gaugeParam.reconstruct_sloppy = link_recon_sloppy;
  237. loadGaugeQuda(longlink, &gaugeParam);
  238. #else
  239. gaugeParam.type = QUDA_ASQTAD_FAT_LINKS;
  240. gaugeParam.reconstruct = gaugeParam.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
  241. loadGaugeQuda(fatlink, &gaugeParam);
  242. gaugeParam.type = QUDA_ASQTAD_LONG_LINKS;
  243. gaugeParam.reconstruct = link_recon;
  244. gaugeParam.reconstruct_sloppy = link_recon_sloppy;
  245. loadGaugeQuda(longlink, &gaugeParam);
  246. #endif
  247. double time0 = -((double)clock()); // Start the timer
  248. double nrm2=0;
  249. double src2=0;
  250. int ret = 0;
  251. switch(test_type){
  252. case 0: //even
  253. inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
  254. invertQuda(out->V(), in->V(), &inv_param);
  255. time0 += clock();
  256. time0 /= CLOCKS_PER_SEC;
  257. #ifdef MULTI_GPU
  258. matdagmat_mg4dir(ref, qdp_fatlink, qdp_longlink, ghost_fatlink, ghost_longlink,
  259. out, mass, 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp, QUDA_EVEN_PARITY);
  260. #else
  261. matdagmat(ref->V(), qdp_fatlink, qdp_longlink, out->V(), mass, 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp->V(), QUDA_EVEN_PARITY);
  262. #endif
  263. mxpy(in->V(), ref->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
  264. nrm2 = norm_2(ref->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
  265. src2 = norm_2(in->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
  266. break;
  267. case 1: //odd
  268. inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
  269. invertQuda(out->V(), in->V(), &inv_param);
  270. time0 += clock(); // stop the timer
  271. time0 /= CLOCKS_PER_SEC;
  272. #ifdef MULTI_GPU
  273. matdagmat_mg4dir(ref, qdp_fatlink, qdp_longlink, ghost_fatlink, ghost_longlink,
  274. out, mass, 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp, QUDA_ODD_PARITY);
  275. #else
  276. matdagmat(ref->V(), qdp_fatlink, qdp_longlink, out->V(), mass, 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp->V(), QUDA_ODD_PARITY);
  277. #endif
  278. mxpy(in->V(), ref->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
  279. nrm2 = norm_2(ref->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
  280. src2 = norm_2(in->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
  281. break;
  282. case 2: //full spinor
  283. errorQuda("full spinor not supported\n");
  284. break;
  285. case 3: //multi mass CG, even
  286. case 4:
  287. #define NUM_OFFSETS 12
  288. {
  289. double masses[NUM_OFFSETS] ={0.002, 0.0021, 0.0064, 0.070, 0.077, 0.081, 0.1, 0.11, 0.12, 0.13, 0.14, 0.205};
  290. inv_param.num_offset = NUM_OFFSETS;
  291. // these can be set independently
  292. for (int i=0; i<inv_param.num_offset; i++) {
  293. inv_param.tol_offset[i] = inv_param.tol;
  294. inv_param.tol_hq_offset[i] = inv_param.tol_hq;
  295. }
  296. void* outArray[NUM_OFFSETS];
  297. int len;
  298. cpuColorSpinorField* spinorOutArray[NUM_OFFSETS];
  299. spinorOutArray[0] = out;
  300. for(int i=1;i < inv_param.num_offset; i++){
  301. spinorOutArray[i] = new cpuColorSpinorField(csParam);
  302. }
  303. for(int i=0;i < inv_param.num_offset; i++){
  304. outArray[i] = spinorOutArray[i]->V();
  305. inv_param.offset[i] = 4*masses[i]*masses[i];
  306. }
  307. len=Vh;
  308. if (test_type == 3) {
  309. inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
  310. } else {
  311. inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
  312. }
  313. invertMultiShiftQuda(outArray, in->V(), &inv_param);
  314. cudaDeviceSynchronize();
  315. time0 += clock(); // stop the timer
  316. time0 /= CLOCKS_PER_SEC;
  317. printfQuda("done: total time = %g secs, compute time = %g, %i iter / %g secs = %g gflops\n",
  318. time0, inv_param.secs, inv_param.iter, inv_param.secs,
  319. inv_param.gflops/inv_param.secs);
  320. printfQuda("checking the solution\n");
  321. QudaParity parity = QUDA_INVALID_PARITY;
  322. if (inv_param.solve_type == QUDA_NORMOP_SOLVE){
  323. //parity = QUDA_EVENODD_PARITY;
  324. errorQuda("full parity not supported\n");
  325. }else if (inv_param.matpc_type == QUDA_MATPC_EVEN_EVEN){
  326. parity = QUDA_EVEN_PARITY;
  327. }else if (inv_param.matpc_type == QUDA_MATPC_ODD_ODD){
  328. parity = QUDA_ODD_PARITY;
  329. }else{
  330. errorQuda("ERROR: invalid spinor parity \n");
  331. exit(1);
  332. }
  333. for(int i=0;i < inv_param.num_offset;i++){
  334. printfQuda("%dth solution: mass=%f, ", i, masses[i]);
  335. #ifdef MULTI_GPU
  336. matdagmat_mg4dir(ref, qdp_fatlink, qdp_longlink, ghost_fatlink, ghost_longlink,
  337. spinorOutArray[i], masses[i], 0, inv_param.cpu_prec,
  338. gaugeParam.cpu_prec, tmp, parity);
  339. #else
  340. matdagmat(ref->V(), qdp_fatlink, qdp_longlink, outArray[i], masses[i], 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp->V(), parity);
  341. #endif
  342. mxpy(in->V(), ref->V(), len*mySpinorSiteSize, inv_param.cpu_prec);
  343. double nrm2 = norm_2(ref->V(), len*mySpinorSiteSize, inv_param.cpu_prec);
  344. double src2 = norm_2(in->V(), len*mySpinorSiteSize, inv_param.cpu_prec);
  345. double hqr = sqrt(HeavyQuarkResidualNormCpu(*spinorOutArray[i], *ref).z);
  346. double l2r = sqrt(nrm2/src2);
  347. printfQuda("Shift %d residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g, host = %g\n",
  348. i, inv_param.tol_offset[i], inv_param.true_res_offset[i], l2r,
  349. inv_param.tol_hq_offset[i], inv_param.true_res_hq_offset[i], hqr);
  350. //emperical, if the cpu residue is more than 1 order the target accuracy, the it fails to converge
  351. if (sqrt(nrm2/src2) > 10*inv_param.tol_offset[i]){
  352. ret |=1;
  353. }
  354. }
  355. for(int i=1; i < inv_param.num_offset;i++) delete spinorOutArray[i];
  356. }
  357. break;
  358. default:
  359. errorQuda("Unsupported test type");
  360. }//switch
  361. if (test_type <=2){
  362. double hqr = sqrt(HeavyQuarkResidualNormCpu(*out, *ref).z);
  363. double l2r = sqrt(nrm2/src2);
  364. printfQuda("Residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g, host = %g\n",
  365. inv_param.tol, inv_param.true_res, l2r, inv_param.tol_hq, inv_param.true_res_hq, hqr);
  366. printfQuda("done: total time = %g secs, compute time = %g secs, %i iter / %g secs = %g gflops, \n",
  367. time0, inv_param.secs, inv_param.iter, inv_param.secs,
  368. inv_param.gflops/inv_param.secs);
  369. }
  370. end();
  371. return ret;
  372. }
  373. static void
  374. end(void)
  375. {
  376. for(int i=0;i < 4;i++){
  377. free(qdp_fatlink[i]);
  378. free(qdp_longlink[i]);
  379. }
  380. free(fatlink);
  381. free(longlink);
  382. delete in;
  383. delete out;
  384. delete ref;
  385. delete tmp;
  386. if (cpuFat) delete cpuFat;
  387. if (cpuLong) delete cpuLong;
  388. endQuda();
  389. }
  390. void
  391. display_test_info()
  392. {
  393. printfQuda("running the following test:\n");
  394. printfQuda("prec sloppy_prec link_recon sloppy_link_recon test_type S_dimension T_dimension\n");
  395. printfQuda("%s %s %s %s %s %d/%d/%d %d \n",
  396. get_prec_str(prec),get_prec_str(prec_sloppy),
  397. get_recon_str(link_recon),
  398. get_recon_str(link_recon_sloppy), get_test_type(test_type), xdim, ydim, zdim, tdim);
  399. printfQuda("Grid partition info: X Y Z T\n");
  400. printfQuda(" %d %d %d %d\n",
  401. dimPartitioned(0),
  402. dimPartitioned(1),
  403. dimPartitioned(2),
  404. dimPartitioned(3));
  405. return ;
  406. }
  407. void
  408. usage_extra(char** argv )
  409. {
  410. printfQuda("Extra options:\n");
  411. printfQuda(" --tol <resid_tol> # Set residual tolerance\n");
  412. printfQuda(" --test <0/1> # Test method\n");
  413. printfQuda(" 0: Even even spinor CG inverter\n");
  414. printfQuda(" 1: Odd odd spinor CG inverter\n");
  415. printfQuda(" 3: Even even spinor multishift CG inverter\n");
  416. printfQuda(" 4: Odd odd spinor multishift CG inverter\n");
  417. printfQuda(" --cpu_prec <double/single/half> # Set CPU precision\n");
  418. return ;
  419. }
  420. int main(int argc, char** argv)
  421. {
  422. for (int i = 1; i < argc; i++) {
  423. if(process_command_line_option(argc, argv, &i) == 0){
  424. continue;
  425. }
  426. if( strcmp(argv[i], "--tol") == 0){
  427. float tmpf;
  428. if (i+1 >= argc){
  429. usage(argv);
  430. }
  431. sscanf(argv[i+1], "%f", &tmpf);
  432. if (tmpf <= 0){
  433. printf("ERROR: invalid tol(%f)\n", tmpf);
  434. usage(argv);
  435. }
  436. tol = tmpf;
  437. i++;
  438. continue;
  439. }
  440. if( strcmp(argv[i], "--cpu_prec") == 0){
  441. if (i+1 >= argc){
  442. usage(argv);
  443. }
  444. cpu_prec= get_prec(argv[i+1]);
  445. i++;
  446. continue;
  447. }
  448. printf("ERROR: Invalid option:%s\n", argv[i]);
  449. usage(argv);
  450. }
  451. if (prec_sloppy == QUDA_INVALID_PRECISION){
  452. prec_sloppy = prec;
  453. }
  454. if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID){
  455. link_recon_sloppy = link_recon;
  456. }
  457. // initialize QMP or MPI
  458. #if defined(QMP_COMMS)
  459. QMP_thread_level_t tl;
  460. QMP_init_msg_passing(&argc, &argv, QMP_THREAD_SINGLE, &tl);
  461. #elif defined(MPI_COMMS)
  462. MPI_Init(&argc, &argv);
  463. #endif
  464. // call srand() with a rank-dependent seed
  465. initRand();
  466. display_test_info();
  467. int ret = invert_test();
  468. display_test_info();
  469. // finalize the communications layer
  470. #if defined(QMP_COMMS)
  471. QMP_finalize_msg_passing();
  472. #elif defined(MPI_COMMS)
  473. MPI_Finalize();
  474. #endif
  475. return ret;
  476. }