PageRenderTime 35ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/src/temp/lift_w_bitshift.c

https://github.com/tdfischer/fourier
C | 407 lines | 315 code | 55 blank | 37 comment | 47 complexity | 30341c57c7155fa33f4f2cf5eb334412 MD5 | raw file
  1. #include<stdio.h>
  2. #include<stdlib.h>
  3. #include<math.h>
  4. #include<mpi.h>
  5. #define size 32
  6. #define b 4
  7. double temp1[size];
  8. double sum2[size];
  9. double pout[size];
  10. double poutb[size];
  11. double loc1[size];
  12. double sinTable[(size >> 3) + (size >> 2)];
  13. double cosTable[(size >> 3) + (size >> 2)];
  14. double rTable[(size >> 3) + (size >> 2)];
  15. void buildTables() {
  16. double angle;
  17. int i;
  18. //Generate x/(size*2) angles
  19. for(i = 0;i < size >> 3;i++) {
  20. angle = (M_PI*((i << 2)+1))/(size << 1);
  21. sinTable[i] = sin(angle);
  22. cosTable[i] = cos(angle);
  23. rTable[i] = (cosTable[i] - 1) / sinTable[i];
  24. }
  25. //Generate x/size angles
  26. for (i = 0;i<size >> 2;i++) {
  27. angle = (M_PI*((i << 2)+1)) / size;
  28. sinTable[i + size >> 3] = sin(angle);
  29. cosTable[i + size >> 3] = cos(angle);
  30. rTable[i] = (cosTable[i] - 1) / sinTable[i];
  31. }
  32. }
  33. double getSin(int numerator, int denominator) {
  34. //For x/8 and x/16, the only values ever passed in are
  35. //1/8, 1/16, and 5/16.
  36. switch(denominator) {
  37. case 8:
  38. return 0.382683424; //sin(pi/8)
  39. case 16:
  40. return (numerator == 1) ? 0.195090322 : 0.8314696123; //sin(pi/16), sin(5pi/16)
  41. case size:
  42. return sinTable[(numerator - 1) >> 2];
  43. case size << 1:
  44. return sinTable[((numerator -1) >> 2) + (size >> 3)];
  45. }
  46. }
  47. double getCos(int numerator, int denominator) {
  48. //For x/8 and x/16, the only values ever passed in are
  49. //1/8, 1/16, and 5/16.
  50. switch(denominator) {
  51. case 8:
  52. return 0.9238795325; //cos(pi/8)
  53. case 16:
  54. return (numerator == 1) ? 0.9807852804 : 0.555570233; //cos(pi/16), cos(5pi/16)
  55. case size:
  56. return cosTable[(numerator - 1) >> 2];
  57. case size << 1:
  58. return cosTable[((numerator -1) >> 2) + (size >> 3)];
  59. }
  60. }
  61. double getR(int numerator, int denominator) {
  62. //For x/8 and x/16, the only values ever passed in are
  63. //1/8, 1/16, and 5/16.
  64. switch(denominator) {
  65. case 8:
  66. return -0.1989123674; //angle == pi/8
  67. case 16:
  68. return (numerator == 1) ? -0.0984914034 : -0.534511136; //angle == pi/16, angle == 5pi/16)
  69. case size:
  70. return rTable[(numerator - 1) >> 2];
  71. case size << 1:
  72. return rTable[((numerator -1) >> 2) + (size >> 3)];
  73. }
  74. }
  75. main ( int argc, char *argv[] )
  76. {
  77. int L, m2, myrank, pcounter,hin, h5, h6, h7, h16, h8, J1, liftL, mrs, tag =
  78. 1234, j4r[size],count, h1, h3, h4, numprocs, n = 0, i, j, cj, m, k = 1;
  79. double a1, a2, cf, sf, rf, ak, h2[10];
  80. MPI_Status status;
  81. double angnum, ang, mult, C[size], S[size], temp[size], temprs[size],
  82. v[size], S2[size], Rnew[size], Snew[size], bm, cm, p1[size], S4[size],
  83. R4[size], R2[size], temp2rs[size], p2[size], mytime, pair1[size],
  84. pair2[size], R[size], xtemp[size], Rold[size], temp2[size], Sold[size],
  85. temp3[size], temp4[size];
  86. double x[size], x1[size], c, s, r, s1, r1, p1a[size], p2a[size], p[size],
  87. xtemp1[size],fx1[size],fp[size];
  88. MPI_Init ( &argc, &argv );
  89. mytime = MPI_Wtime ();
  90. MPI_Comm_rank ( MPI_COMM_WORLD, &myrank );
  91. MPI_Comm_size ( MPI_COMM_WORLD, &numprocs );
  92. if ( myrank == 0 ) { //procs 0 starts in here
  93. //Input values
  94. do { //taking 8 values in x array
  95. x[n] = n + 1;
  96. n++;
  97. } while ( n < size );
  98. //Build cosine, sine, R table
  99. for ( j = 1; j <= ( n >> 2 ); j++ ) {
  100. angnum = (( j - 1 ) << 2) + 1; //angnum = 4 * ( j - 1 ) + 1;
  101. mult = M_PI / ( n << 2 );
  102. ang = angnum * mult;
  103. C[j] = cos ( ang );
  104. S[j] = sin ( ang );
  105. R[j] = ( C[j] - 1 ) / S[j];
  106. //printf("s value is %lf,R %lf\n", S[j],R[j]);
  107. }
  108. for ( j = 1; j <= ( n >> 2 ); j++ ) {
  109. cj = 2 * ( j - 1 );
  110. m = n - 1;
  111. //Tournament sort
  112. //0,7,4,3 in the first iteration and 2 5 6 1 in the second iteration
  113. pair1[0] = x[cj];
  114. pair1[1] = x[ ( m - cj ) ];
  115. pair2[0] = x[ ( cj + ( n >> 1 ) ) ]; //xtemp(3:4)
  116. pair2[1] = x[ ( m - ( cj + ( n >> 1 ) ) ) ];
  117. lift ( pair1, S[j], R[j] );
  118. v[0] = pout[0];
  119. v[1] = pout[1];
  120. sumdiff ( pair2, b >> 1 );
  121. pair2[0] = temp1[0] * M_SQRT2;
  122. pair2[1] = ( -1 * temp1[1] ) * M_SQRT2;
  123. lift ( pair2, S[j], R[j] );
  124. v[2] = pout[0];
  125. v[3] = pout[1];
  126. sumdiff ( v, b );
  127. p1[ ( 2 * j ) - 2] = temp1[0];
  128. p1[ ( 2 * j ) - 1] = temp1[1];
  129. p2[ ( 2 * j ) - 2] = temp1[2];
  130. p2[ ( 2 * j ) - 1] = temp1[3];
  131. }
  132. //p1 and p2 are now two sets of points, one for each processor
  133. m = size >> 1;
  134. J1 = 3;
  135. mrs = 4;
  136. //Computes the next set of angles
  137. ///TODO: Move to lookup table
  138. for ( i = 0; i < mrs; i++ ) {
  139. double tempR = 1 + ( R[i + 1] * S[i + 1] );
  140. double s = 2 * S[i + 1] * tempR;
  141. tempR = ( -1 * S[i + 1] ) / tempR;
  142. tempR = 1 + ( tempR * s );
  143. S[i] = 2 * s * tempR;
  144. R[i] = ( -1 * s ) / tempR;
  145. }
  146. //This only executes twice
  147. pcounter = 1;
  148. //Loop one is first processor, two is the other
  149. while ( pcounter <= 2 ) {
  150. if ( pcounter == 1 ) {
  151. //Copy in first set of points
  152. for ( i = 0; i < n >> 1; i++ ) {
  153. p[i] = p1[i];
  154. }
  155. } else {
  156. m = size >> 1;
  157. //do magic on second set of points
  158. lift90sr ( p2, S4, R4, m );
  159. //Copy return values into first half of p[]
  160. for ( i = 0; i < n >> 1; i++ ) {
  161. p[i] = poutb[i];
  162. }
  163. }
  164. //Each processor rejoins here.
  165. liftL = 4;
  166. for ( i = 0; i < liftL; i++ ) {
  167. Rold[i] = R[i];
  168. Sold[i] = S[i];
  169. }
  170. //Also only executed twice
  171. for ( j = 1; j <= 2; j++ ) {
  172. //1 2 3
  173. //4 5 6 on second loop
  174. m2 = pow ( 2, ( j - 1 ) );
  175. L = 16 / m2;
  176. ///TODO: Use lookup table instead
  177. for ( i = 0; i < liftL; i++ ) {
  178. Rold[i] = -1 / (1 + Rold[i]);
  179. Sold[i] = 2 * Sold[i] * (Sold[i] * Rold[i] + 1); //Double angle
  180. }
  181. //Splittin up the work happens in here
  182. for ( k = 1; k <= m2; k++ ) {
  183. h1 = L * ( k - 1 );
  184. h3 = ( L * k ) - 1;
  185. h6 = L * ( k - 1 );
  186. h16 = 0;
  187. for ( h1; h1 <= h3; h1++ ) {
  188. temp[h1] = p[h1];
  189. h16++;
  190. }
  191. sumdiff2 ( temp, h6, h3 + 1 );
  192. for ( i = 0; i < h16; i++ ) {
  193. temp2[i] = sum2[i];
  194. }
  195. h4 = L >> 1;
  196. for ( i = 0, h4; h4 < L; h4++, i++ ) {
  197. temp3[i] = temp2[h4]; //8-1a5
  198. }
  199. lift90sr ( temp3, Sold, Rold, i + 1 );
  200. for ( i = 0, h5 = L >> 1; h5 < L; i++, h5++ ) {
  201. temp2[h5] = poutb[i];
  202. }
  203. h7 = L * ( k - 1 );
  204. h8 = ( L * k ) - 1;
  205. for ( h7, i = 0; h7 <= h8; i++, h7++ ) {
  206. p[h7] = temp2[i];
  207. }
  208. } //kloop ends here
  209. liftL = liftL >> 1;
  210. } //end of for loop....
  211. // printf ("\n****************\n");
  212. // for (i = 0; i < 16; i++)
  213. // printf ("hi %lf\n", p[i]);
  214. j = J1;
  215. m2 = m >> 2;
  216. L = 4;
  217. for ( k = 1; k <= m2; k++ ) {
  218. h1 = L * ( k - 1 );
  219. h3 = ( L * k ) - 1;
  220. h6 = L * ( k - 1 );
  221. h16 = 0;
  222. for ( h1; h1 <= h3; h1++ ) {
  223. temp[h1] = p[h1];
  224. h16++;
  225. // printf("temp is here%lf\n",temp[h1]);
  226. }
  227. sumdiff2 ( temp, h6, h3 + 1 );
  228. for ( i = 0; i < h16; i++ ) {
  229. temp2[i] = sum2[i];
  230. //printf("Temp2 is here %lf\n",temp2[i]);
  231. }
  232. h4 = L >> 1;
  233. for ( i = 0, h4; h4 < L; h4++, i++ ) {
  234. temp3[i] = temp2[h4]; //8-1a5
  235. }
  236. sumdiff2 ( temp3,0,i );
  237. for ( i = 0, h5 = L >> 1; h5 < L; i++, h5++ ) {
  238. temp2[h5] = sum2[i]*M_SQRT2;
  239. // printf("sumdiff result is %lf\n",temp2[h5]);
  240. }
  241. h7 = L * ( k - 1 );
  242. h8 = ( L * k ) - 1;
  243. for ( h7, i = 0; h7 <= h8; i++, h7++ ) {
  244. p[h7] = temp2[i];
  245. }
  246. // printf("\n*****************\n");
  247. locations ( n,pcounter );
  248. j4r[k-1] = loc1[k];
  249. //Final result vector
  250. fx1[ j4r[k-1] ] = temp2[0];
  251. fx1[ ( n-1 )-j4r[k-1] ] = temp2[1];
  252. fx1[ ( n >> 1)-1-j4r[k-1] ] = temp2[2];
  253. fx1[ j4r[k-1]+ ( n >> 1 ) ] = temp2[3];
  254. }//k ends here
  255. pcounter++; //have to end after the program
  256. }
  257. mytime = MPI_Wtime () - mytime;
  258. for ( i=0;i<32;i++ )
  259. printf ( "fx is %lf\n",fx1[i] );
  260. mytime = mytime * 1000000;
  261. printf ( "Timing from rank %d is %lfus.\n", myrank, mytime );
  262. } //end of rank 0
  263. MPI_Finalize ();
  264. return 0;
  265. }
  266. //Descrambles after the tournament sort and DCT
  267. //The "unsort"
  268. locations ( int n1,int pcounter )
  269. {
  270. int dc =4;
  271. int mc = n1 >> 4;
  272. int noterms = 1;
  273. int fcount = n1 >> 3;
  274. int nparts = n1 >> 3;
  275. double Js = log ( nparts ) /log ( 2 );
  276. int j1,nj,j2,nj2;
  277. loc1[1]= pcounter-1;
  278. for ( j1=1;j1<=Js;j1++ ) {
  279. for ( j2=1;j2<=noterms;j2++ ) {
  280. nj = mc + 1 + ( (( j2-1 ) << 1) * mc );
  281. nj2= loc1[1 + (( j2-1 ) << 1) * mc];
  282. loc1[nj] = ( dc-1 )-nj2;
  283. }
  284. dc = dc << 1;
  285. mc = mc >> 1;
  286. noterms= noterms << 1;
  287. fcount = fcount >> 1;
  288. }
  289. }
  290. lift ( double x[size], double s, double R )
  291. {
  292. double L1;
  293. L1 = x[0] - R * x[1];
  294. pout[1] = s * L1 - x[1];
  295. pout[0] = L1 + R * pout[1];
  296. }
  297. lift90sr ( double pin[size], double Sm[size], double Rm[size], int m )
  298. {
  299. int j, mj, i, N;
  300. double s[size], kemp[size], r[size], pout1[size], pout2[size], temp[size];
  301. for ( j = 1; j <= m >> 2; j++ ) {
  302. s[j - 1] = Sm[j - 1];
  303. r[j - 1] = Rm[j - 1];
  304. mj = (j << 1) - 1;
  305. kemp[0] = pin[mj - 1];
  306. kemp[1] = pin[mj];
  307. lift ( kemp, s[j - 1], r[j - 1] );
  308. pout1[mj - 1] = pout[0];
  309. pout1[mj] = pout[1];
  310. temp[0] = pin[ ( m >> 1 ) + mj - 1];
  311. temp[1] = pin[ ( m >> 1 ) + mj];
  312. lift ( temp, s[j - 1], r[j - 1] );
  313. pout2[mj - 1] = ( -1 ) * pout[1];
  314. pout2[mj] = pout[0];
  315. }
  316. for ( i = 0; i < m >> 1; i++ )
  317. poutb[i] = pout1[i];
  318. for ( i = m >> 1, j = 0; i < m; i++, j++ )
  319. poutb[i] = pout2[j];
  320. }
  321. sumdiff2 ( double xy3[size], int r1, int r2 )
  322. {
  323. int f1, r, i, j, h, k, l;
  324. double x1[size], x2[size], xy2[size];
  325. r = r2 - r1;
  326. f1 = ( r >> 1 );
  327. for ( i = r1, k = 0; i < r2; i++, k++ )
  328. xy2[k] = xy3[i];
  329. for ( i = 0, k = 0; i < f1; i++, k++ )
  330. x1[k] = xy2[i];
  331. for ( h = 0, j = f1; j < r; j++, h++ )
  332. x2[h] = xy2[j];
  333. for ( k = 0; k < f1; k++ )
  334. sum2[k] = x1[k] + x2[k];
  335. for ( l = f1, k = 0; l < r; l++, k++ )
  336. sum2[l] = x1[k] - x2[k];
  337. }
  338. sumdiff ( double xy[size], int r )
  339. {
  340. int f1, f2, i, j, h, k, l;
  341. double x1[size], x2[size];
  342. f1 = ( r >> 1 );
  343. f2 = ( r >> 1 ) + 1;
  344. for ( i = 0; i < f1; i++ )
  345. x1[i] = xy[i];
  346. for ( h = 0, j = f1; j < r; j++, h++ )
  347. x2[h] = xy[j];
  348. for ( k = 0; k < f1; k++ )
  349. temp1[k] = x1[k] + x2[k];
  350. for ( l = f1, k = 0; l < r; l++, k++ )
  351. temp1[l] = x1[k] - x2[k];
  352. }