/PG5/newton/direct/direct.c

https://bitbucket.org/kohji/phantom-grape · C · 327 lines · 282 code · 35 blank · 10 comment · 34 complexity · 268f385f2729e322e241b8a0a7a2b5e8 MD5 · raw file

  1. #define ERRORTEST 0
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <math.h>
  5. #include <sys/resource.h>
  6. #include <sys/time.h>
  7. #ifdef ENABLE_OPENMP
  8. #include <omp.h>
  9. #endif
  10. #include "gp5util.h"
  11. #define NJMAX (JMEMSIZE)
  12. // #define NJMAX 65536
  13. #define rdtscll(val) do { \
  14. unsigned int a,d; \
  15. asm volatile("rdtsc" : "=a"(a), "=d"(d)); \
  16. (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
  17. } while(0)
  18. #if 1
  19. #define GIGAHELTZ 3.8
  20. double get_dtime(void){
  21. struct timeval tv;
  22. gettimeofday(&tv, NULL);
  23. return ((double)(tv.tv_sec) + (double)(tv.tv_usec) * 0.001 * 0.001);
  24. }
  25. #endif
  26. void
  27. get_cputime(double *laptime, double *sprittime);
  28. void
  29. readnbody(int *nj, double *mj, double (*xj)[3], double (*vj)[3], char *fname)
  30. {
  31. int i, dummy, fi;
  32. double dummyd;
  33. FILE *fp;
  34. fp = fopen(fname, "r");
  35. if (fp == NULL)
  36. {
  37. perror("readnbody");
  38. exit(1);
  39. }
  40. fi = fscanf(fp, "%d\n", nj);
  41. fi = fscanf(fp, "%d\n", &dummy);
  42. fi = fscanf(fp, "%lf\n", &dummyd);
  43. fi = fprintf(stderr, "nj: %d\n", *nj);
  44. for (i = 0; i < *nj; i++)
  45. {
  46. fi = fscanf(fp, "%lf\n", mj+i);
  47. }
  48. for (i = 0; i < *nj; i++)
  49. {
  50. fi = fscanf(fp, "%lf %lf %lf\n",
  51. xj[i]+0, xj[i]+1, xj[i]+2);
  52. }
  53. for (i = 0; i < *nj; i++)
  54. {
  55. fi = fscanf(fp, "%lf %lf %lf\n",
  56. vj[i]+0, vj[i]+1, vj[i]+2);
  57. }
  58. }
  59. void
  60. writenbody(int nj, double *mj, double (*xj)[3], double (*vj)[3], char *fname)
  61. {
  62. int i;
  63. FILE *fp;
  64. fp = fopen(fname, "w");
  65. fprintf(fp, "%d\n", nj);
  66. fprintf(fp, "%d\n", 3);
  67. fprintf(fp, "%e\n", 0.0);
  68. for (i = 0; i < nj; i++)
  69. {
  70. fprintf(fp, "%e\n", mj[i]);
  71. }
  72. for (i = 0; i < nj; i++)
  73. {
  74. fprintf(fp, "%e %e %e\n",
  75. xj[i][0], xj[i][1], xj[i][2]);
  76. }
  77. for (i = 0; i < nj; i++)
  78. {
  79. fprintf(fp, "%e %e %e\n",
  80. vj[i][0], vj[i][1], vj[i][2]);
  81. }
  82. }
  83. #ifdef SYMMETRIC
  84. void
  85. calc_gravity0(double *mj, double (*xj)[3], double (*vj)[3],
  86. double *epsj2, double (*a)[3], double *p, int nj)
  87. {
  88. double epsinv;
  89. int i;
  90. double cycle;
  91. g5_set_xmj(0, nj, xj, mj, epsj2);
  92. g5_set_n(nj);
  93. double st1 = get_dtime();
  94. g5_calculate_force_on_xe(xj, epsj2, a, p, nj);
  95. double st2 = get_dtime();
  96. cycle = (double)(st2 - st1) * GIGAHELTZ * 1e9 / ((double)nj*(double)nj/4);
  97. #ifdef ENABLE_OPENMP
  98. #pragma omp parallel
  99. #if 1
  100. {
  101. if(omp_get_thread_num() == 0) cycle *= omp_get_num_threads();
  102. }
  103. #else
  104. cycle *= omp_get_num_threads();
  105. #endif
  106. #endif
  107. printf("gravity %f cycle per loop\n", cycle);
  108. for (i = 0; i < nj; i++)
  109. {
  110. p[i] = -p[i];
  111. if (epsj2[i] != 0.0)
  112. {
  113. epsinv = 1.0 / (sqrt(2.0) * sqrt(epsj2[i]));
  114. p[i] = p[i] + mj[i] * epsinv;
  115. }
  116. }
  117. }
  118. #else
  119. void
  120. calc_gravity(double *mj, double (*xj)[3], double (*vj)[3],
  121. double eps, double (*a)[3], double *p, int nj)
  122. {
  123. double epsinv;
  124. int i;
  125. double cycle;
  126. // g5_set_xj(0, nj, xj);
  127. // g5_set_mj(0, nj, mj);
  128. g5_set_xmj(0, nj, xj, mj);
  129. g5_set_eps_to_all(eps);
  130. g5_set_n(nj);
  131. double st1 = get_dtime();
  132. g5_calculate_force_on_x(xj, a, p, nj);
  133. double st2 = get_dtime();
  134. cycle = (double)(st2 - st1) * GIGAHELTZ * 1e9 / ((double)nj*(double)nj/4);
  135. #ifdef ENABLE_OPENMP
  136. #pragma omp parallel
  137. #if 1
  138. {
  139. if(omp_get_thread_num() == 0) cycle *= omp_get_num_threads();
  140. }
  141. #else
  142. cycle *= omp_get_num_threads();
  143. #endif
  144. #endif
  145. printf("gravity %f cycle per loop\n", cycle);
  146. for (i = 0; i < nj; i++)
  147. {
  148. p[i] = -p[i];
  149. }
  150. if (eps != 0.0)
  151. {
  152. epsinv = 1.0/eps;
  153. for (i = 0; i < nj; i++)
  154. {
  155. p[i] = p[i] + mj[i] * epsinv;
  156. }
  157. }
  158. }
  159. #endif
  160. void
  161. push_velocity(double (*vj)[3], double (*a)[3], double dt, int nj)
  162. {
  163. int j, k;
  164. for (j = 0; j < nj; j++)
  165. {
  166. for (k = 0; k < 3; k++)
  167. {
  168. vj[j][k] += dt * a[j][k];
  169. }
  170. }
  171. }
  172. void
  173. push_position(double (*xj)[3], double (*vj)[3], double (*a)[3],
  174. double dt, int nj)
  175. {
  176. int j, k;
  177. for (j = 0; j < nj; j++)
  178. {
  179. for (k = 0; k < 3; k++)
  180. {
  181. xj[j][k] += dt * vj[j][k];
  182. }
  183. }
  184. }
  185. void
  186. energy(double *mj, double (*vj)[3], double *p, int nj, double *ke, double *pe)
  187. {
  188. int i, k;
  189. *pe = 0;
  190. *ke = 0;
  191. for (i = 0; i < nj; i++)
  192. {
  193. *pe += mj[i] * p[i];
  194. for (k = 0; k < 3; k++)
  195. {
  196. *ke += 0.5 * mj[i] * vj[i][k] * vj[i][k];
  197. }
  198. }
  199. *pe /= 2.0;
  200. }
  201. int
  202. main(int argc, char **argv)
  203. {
  204. static double mj[NJMAX], xj[NJMAX][3], vj[NJMAX][3], epsj2[NJMAX];
  205. static double a[NJMAX][3], p[NJMAX];
  206. double time;
  207. // double eps, dt, endt;
  208. double dt, endt;
  209. double e, e0, ke, pe;
  210. double LapTime, SpritTime, IntPerSec, Gflops;
  211. int nj;
  212. int nstep, step;
  213. dt = 0.01;
  214. endt = 10.0;
  215. time = 0.0;
  216. nstep = endt/dt;
  217. if (argc < 3)
  218. {
  219. fprintf(stderr, "usage: %s <infile> <outfile>\n", argv[0]);
  220. exit(1);
  221. }
  222. readnbody(&nj, mj, xj, vj, argv[1]);
  223. #if ERRORTEST == 1
  224. double eps;
  225. eps = 4.0 / (double)nj;
  226. #else
  227. #ifdef SYMMETRIC
  228. int i;
  229. for(i = 0; i < nj; i++)
  230. epsj2[i] = (0.01 + 0.01 * (double)i / (double)nj) * (0.01 + 0.01 * (double)i / (double)nj);
  231. // mj[1021] = 1.0;
  232. // mj[1022] = 1.0;
  233. // mj[1023] = 1.0;
  234. #else
  235. double eps;
  236. eps = 0.02;
  237. #endif
  238. #endif
  239. g5_open();
  240. #ifdef SYMMETRIC
  241. calc_gravity0(mj, xj, vj, epsj2, a, p, nj);
  242. #else
  243. calc_gravity(mj, xj, vj, eps, a, p, nj);
  244. #endif
  245. energy(mj, vj, p, nj, &ke, &pe);
  246. e0 = ke+pe;
  247. #if ERRORTEST == 1
  248. int i;
  249. char out[1024];
  250. FILE *fp;
  251. sprintf(out, "pl%03dk_eps4n_avx.ap", nj / 1024);
  252. fp = fopen(out, "w");
  253. for(i = 0; i < nj; i++)
  254. fprintf(fp, "%5d %+.16e %+.16e\n", i, sqrt(a[i][0]*a[i][0]+a[i][1]*a[i][1]+a[i][2]*a[i][2]), p[i]);
  255. fclose(fp);
  256. exit(0);
  257. #endif
  258. // TimeStart = (double)clock() / CLOCKS_PER_SEC;
  259. get_cputime(&LapTime, &SpritTime);
  260. for (step = 1; step < nstep; step++)
  261. {
  262. push_velocity(vj, a, 0.5*dt, nj);
  263. push_position(xj, vj, a, dt, nj);
  264. time = time + dt;
  265. #ifdef SYMMETRIC
  266. calc_gravity0(mj, xj, vj, epsj2, a, p, nj);
  267. #else
  268. calc_gravity(mj, xj, vj, eps, a, p, nj);
  269. #endif
  270. push_velocity(vj, a, 0.5*dt, nj);
  271. #ifdef ANIM
  272. plot_star(xj, nj, time, 0.3, mj, mj[0]);
  273. #endif /* ANIM */
  274. if (step % (nstep/10) == 0) {
  275. energy(mj, vj, p, nj, &ke, &pe);
  276. e = ke+pe;
  277. // TimeEnd = (double)clock() / CLOCKS_PER_SEC;
  278. get_cputime(&LapTime, &SpritTime);
  279. IntPerSec = ((double)nj * (double)nj * (long)(nstep/10)) / LapTime;
  280. Gflops = IntPerSec * 38. * 1.e-9;
  281. printf("step: %d time: %e\n", step, time);
  282. printf("e: %e de: %e\n", e, e-e0);
  283. printf("ke: %e pe: %e\n", ke, pe);
  284. printf("ke/pe: %e\n\n", ke/pe);
  285. printf("%e interaction per sec, %f Gflops \n", IntPerSec, Gflops);
  286. // TimeStart = TimeEnd;
  287. }
  288. }
  289. g5_close();
  290. writenbody(nj, mj, xj, vj, argv[2]);
  291. return 0;
  292. }