/test_program/matrix_ad_vs_adolc_vs_tapenade.cpp

http://github.com/b45ch1/hpsc_hanoi_2009_walter · C++ · 552 lines · 360 code · 83 blank · 109 comment · 52 complexity · b7ed7e7809a790309718e96aef11bf82 MD5 · raw file

  1. #include <iostream>
  2. #include <fstream>
  3. // for memory consumption
  4. #include <sys/types.h>
  5. #include <unistd.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include "adolc/adolc.h"
  9. #include "dense_inverse.hpp"
  10. #include "dot.hpp"
  11. extern "C"{
  12. #include "clapack.h"
  13. #include "cblas.h"
  14. #include "dense_inverse.h"
  15. #include "inv_b.h"
  16. #include "utpm.h"
  17. }
  18. #include <sys/time.h>
  19. struct timeval tv;
  20. int mtime(void)
  21. {
  22. gettimeofday(&tv,NULL);
  23. return (int)(tv.tv_sec*1000 + (tv.tv_usec / 1000));
  24. }
  25. using namespace std;
  26. /** computes y = trace(A) */
  27. template <class Tdouble>
  28. int trace(Tdouble *A, Tdouble *Ainv, Tdouble *R, Tdouble *y, int N){
  29. *y = 0;
  30. inv(A, Ainv, R, N);
  31. for(int n = 0; n < N; ++n){
  32. (*y) += Ainv[id(n,n,N)];
  33. }
  34. }
  35. int main(int argc, char* argv[]){
  36. int N = 100;
  37. if( argc >= 2){
  38. int tmp = atoi(argv[1]);
  39. if(tmp <= 300){
  40. N = tmp;
  41. }
  42. }
  43. cout<<endl<<endl;
  44. cout<<"================================================"<<endl;
  45. cout<<" RUNNING TESTS "<<endl;
  46. cout<<"================================================"<<endl;
  47. cout<<endl<<endl;
  48. cout<<"Setting N = "<<N<<endl;
  49. cout<<"PID of this process = "<< getpid() <<endl;
  50. char mycmd[255];
  51. snprintf (mycmd, (size_t)255, "echo [N=%d]>> mem_consumption.txt", N);
  52. system(mycmd);
  53. snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
  54. system(mycmd);
  55. /* write output to runtimes.txt */
  56. ofstream runtimes_file;
  57. runtimes_file.open("runtimes.txt", fstream::in | fstream::out | fstream::app);
  58. runtimes_file<<N<<"\t";
  59. double *A = new double[N*N];
  60. double *Ainv = new double[N*N];
  61. double *Ainv2 = new double[N*N];
  62. double *QT = new double[N*N];
  63. double *Q = new double[N*N];
  64. double *B = new double[N*N];
  65. double *R = new double[N*N];
  66. double *Id = new double[N*N];
  67. double *Abar = new double[N*N];
  68. double *WORK = new double[N*N];
  69. int start_time,end_time;
  70. /* fill A with random numbers */
  71. for(int n = 0; n!=N; ++n){
  72. for(int m = 0; m!=N; ++m){
  73. A[id(n,m,N)] = rand()/100000000000.;
  74. Ainv[id(n,m,N)] = A[id(n,m,N)];
  75. }
  76. }
  77. /*
  78. RUNNING TESTS:
  79. ==============
  80. The tests compare:
  81. * Lapack VS my implementation to compute VS taped version of my implementation to compute A^-1
  82. * It turns out that my implementation is a less than a factor 2 slower than LAPACK.
  83. * The taped function evaluation is much slower though.
  84. * Compare the perfomance to compute the gradient:
  85. 1) Matrix AD
  86. 2) ADOL-C
  87. 3) TAPENADE
  88. * Check the obtained results for consistency, i.e. check that there are no bugs in the code!
  89. */
  90. cout<<endl<<endl;
  91. cout<<"================================================"<<endl;
  92. cout<<" TESTING FUNCTION EVALUATION "<<endl;
  93. cout<<"================================================"<<endl;
  94. cout<<endl<<endl;
  95. /* TIMING LAPACK IMPLEMENTATION INVERSE COMPUTATION */
  96. /* =============================================== */
  97. /* compute the inverse by combination of dgetrf and dgetri */
  98. int *ipiv = new int[N]; int info; int LWORK = N*N;
  99. start_time = mtime();
  100. clapack_dgetrf(CblasRowMajor, N, N, Ainv, N, ipiv);
  101. // cout<<"info="<<info<<endl;
  102. clapack_dgetri(CblasRowMajor, N, Ainv, N, ipiv);
  103. // cout<<"info="<<info<<endl;
  104. end_time = mtime();
  105. printf("normal LAPACK function evaluation of inv needs %d ms.\n",(end_time-start_time));
  106. runtimes_file<<end_time-start_time<<"\t";
  107. /* check that dot(A,Ainv) = I */
  108. double one = 1;
  109. double zero = 0;
  110. char notrans = 'n';
  111. cblas_dgemm(CblasRowMajor, CblasNoTrans,
  112. CblasNoTrans, N, N, N, 1., A,
  113. N, Ainv, N,
  114. 0., Id, N);
  115. {
  116. double sum = 0;
  117. for (int n = 0; n != N; ++n){
  118. for (int m = 0; m != N; ++m){
  119. sum += Id[id(n,m,N)];
  120. }
  121. }
  122. cout<<sum<<endl;
  123. cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl;
  124. }
  125. /* TIMING MY C-IMPLEMENTATION INVERSE COMPUTATION */
  126. /* ============================================== */
  127. start_time = mtime();
  128. inv(A, Ainv, R, N);
  129. end_time = mtime();
  130. printf("normal selfmade function evaluation in C++ of inv needs %d ms.\n",(end_time-start_time));
  131. runtimes_file<<end_time-start_time<<"\t";
  132. /* check that dot(A,Ainv) = I */
  133. cblas_dgemm(CblasRowMajor, CblasNoTrans,
  134. CblasNoTrans, N, N,
  135. N, 1., A,
  136. N, Ainv, N,
  137. 0., Id, N);
  138. {
  139. double sum = 0;
  140. for (int n = 0; n != N; ++n){
  141. for (int m = 0; m != N; ++m){
  142. sum += Id[id(n,m,N)];
  143. }
  144. }
  145. cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl;
  146. }
  147. /* TIMING MY F77-IMPLEMENTATION INVERSE COMPUTATION */
  148. /* ================================================ */
  149. start_time = mtime();
  150. {
  151. int Ntmp = N;
  152. inv(A,Ainv,R, Ntmp);
  153. }
  154. end_time = mtime();
  155. printf("normal selfmade function evaluation in F77 of inv needs %d ms.\n",(end_time-start_time));
  156. runtimes_file<<end_time-start_time<<"\t";
  157. /* check that dot(A,Ainv) = I */
  158. cblas_dgemm(CblasRowMajor, CblasNoTrans,
  159. CblasNoTrans, N, N,
  160. N, 1., A,
  161. N, Ainv, N,
  162. 0., Id, N);
  163. {
  164. double sum = 0;
  165. for (int n = 0; n != N; ++n){
  166. for (int m = 0; m != N; ++m){
  167. sum += Id[id(n,m,N)];
  168. }
  169. }
  170. cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl;
  171. }
  172. /* TIMING MY C++-IMPLEMENTATION INVERSE COMPUTATION */
  173. /* ================================================ */
  174. int Ntmp = N;
  175. start_time = mtime();
  176. inv(A,Ainv,R, N);
  177. end_time = mtime();
  178. printf("normal selfmade function evaluation in C of inv needs %d ms.\n",(end_time-start_time));
  179. runtimes_file<<end_time-start_time<<"\t";
  180. /* check that dot(A,Ainv) = I */
  181. cblas_dgemm(CblasRowMajor, CblasNoTrans,
  182. CblasNoTrans, N, N,
  183. N, 1., A,
  184. N, Ainv, N,
  185. 0., Id, N);
  186. {
  187. double sum = 0;
  188. for (int n = 0; n != N; ++n){
  189. for (int m = 0; m != N; ++m){
  190. sum += Id[id(n,m,N)];
  191. }
  192. }
  193. cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl;
  194. }
  195. /* taping inv */
  196. cout<<"start tracing my implementation of inv with ADOL-C"<<endl;
  197. adouble *aA = new adouble[N*N];
  198. adouble *aR = new adouble[N*N];
  199. adouble *aAinv = new adouble[N*N];
  200. snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
  201. system(mycmd);
  202. start_time = mtime();
  203. trace_on(0);
  204. for(int n = 0; n!=N; ++n){
  205. for(int m = 0; m!=N; ++m){
  206. aA[id(n,m,N)]<<= A[id(n,m,N)];
  207. }
  208. }
  209. inv(aA, aAinv, aR, N);
  210. for(int n = 0; n!=N; ++n){
  211. for(int m = 0; m!=N; ++m){
  212. aAinv[id(n,m,N)]>>= Ainv[id(n,m,N)];
  213. }
  214. }
  215. trace_off();
  216. end_time = mtime();
  217. printf("normal selfmade function evaluation in C++ of inv needs %d ms.\n",(end_time-start_time));
  218. runtimes_file<<end_time-start_time<<"\t";
  219. cout<<"finished tracing"<<endl;
  220. /* ================================ */
  221. /* TIMING TAPED INVERSE COMPUTATION */
  222. /* ================================ */
  223. start_time = mtime();
  224. function(0,N*N,N*N,A, Ainv);
  225. end_time = mtime();
  226. printf("taped function evaluation of inv needs %d ms.\n",(end_time-start_time));
  227. runtimes_file<<end_time-start_time<<"\t";
  228. /* check that dot(A,Ainv) = I */
  229. cblas_dgemm(CblasRowMajor, CblasNoTrans,
  230. CblasNoTrans, N, N,
  231. N, 1., A,
  232. N, Ainv, N,
  233. 0., Id, N);
  234. {
  235. double sum = 0;
  236. for (int n = 0; n != N; ++n){
  237. for (int m = 0; m != N; ++m){
  238. sum += Id[id(n,m,N)];
  239. }
  240. }
  241. cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl;
  242. }
  243. // /* TIMING MY C++-IMPLEMENTATION f COMPUTATION */
  244. // /* ======================================== */
  245. // start_time = mtime();
  246. // f(A,Ainv);
  247. // end_time = mtime();
  248. // printf("normal function evaluation of f needs %d ms.\n",(end_time-start_time));
  249. // runtimes_file<<end_time-start_time<<"\t";
  250. // /* TIMING TAPED f COMPUTATION */
  251. // /* ========================== */
  252. // start_time = mtime();
  253. // function(1,1,N*N,A.data, &y);
  254. // end_time = mtime();
  255. // printf("taped function evaluation of f needs %d ms.\n",(end_time-start_time));
  256. // runtimes_file<<end_time-start_time<<"\t";
  257. cout<<endl<<endl;
  258. cout<<"================================================"<<endl;
  259. cout<<" TESTING GRADIENT EVALUATION "<<endl;
  260. cout<<"================================================"<<endl;
  261. cout<<endl<<endl;
  262. snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
  263. system(mycmd);
  264. /* TIMING TAPED JACOBIAN COMPUTATION OF INV */
  265. if(N<=30){
  266. double **J;
  267. J=myalloc2(N*N,N*N);
  268. start_time = mtime();
  269. jacobian(0,N*N,N*N,A, J);
  270. end_time = mtime();
  271. printf("taped jacobian of inv needs %d ms.\n",(end_time-start_time));
  272. }
  273. /* taping inv */
  274. cout<<"start tracing my implementation of trace(inv(.)) with ADOL-C"<<endl;
  275. snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
  276. system(mycmd);
  277. double y;
  278. trace_on(1);
  279. for(int n = 0; n!=N; ++n){
  280. for(int m = 0; m!=N; ++m){
  281. aA[id(n,m,N)]<<= A[id(n,m,N)];
  282. }
  283. }
  284. adouble ay;
  285. trace(aA, aAinv, aR, &ay, N);
  286. ay >>= y;
  287. trace_off();
  288. cout<<"finished tracing"<<endl;
  289. /* ====================================== */
  290. /* TIMING TAPED GRADIENT COMPUTATION OF f */
  291. /* ====================================== */
  292. double *g;
  293. g = myalloc(N*N);
  294. start_time = mtime();
  295. gradient(1,N*N,A, g);
  296. end_time = mtime();
  297. printf("ADOL-C gradient of f needs %d ms.\n",(end_time-start_time));
  298. runtimes_file<<end_time-start_time<<"\t";
  299. snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
  300. system(mycmd);
  301. /* ===================================================== */
  302. /* COMPUTING THE GRADIENT OF f WITH MATRIX AD */
  303. /* ===================================================== */
  304. /* need to compute
  305. // d tr(A^-1) = tr( - (A^-1)^2 dA) and therefore
  306. // \nabla tr(A^-1) = - ((A^-1)^2)^T
  307. */
  308. start_time = mtime();
  309. /* Ainv2 == A here */
  310. inv(A, Ainv, R, N);
  311. /* compute Abar = - (A^{-1} A^{-1})^T */
  312. cblas_dgemm(CblasRowMajor, CblasTrans,
  313. CblasTrans, N, N,
  314. N, -1., Ainv,
  315. N, Ainv, N,
  316. 0., Abar, N);
  317. end_time = mtime();
  318. printf("UTPM gradient evaluation of f needs %d ms.\n",(end_time-start_time));
  319. runtimes_file<<end_time-start_time<<"\t";
  320. snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
  321. system(mycmd);
  322. /* check that dot(A,Ainv) = I */
  323. cblas_dgemm(CblasRowMajor, CblasNoTrans,
  324. CblasNoTrans, N, N,
  325. N, 1., A,
  326. N, Ainv, N,
  327. 0., Id, N);
  328. {
  329. double sum = 0;
  330. for (int n = 0; n != N; ++n){
  331. for (int m = 0; m != N; ++m){
  332. sum += abs( (n==m) - Id[id(n,m,N)]);
  333. }
  334. }
  335. cout<< "Computation correct? "<< (bool) (sum< N*1e-8 ) <<endl;
  336. }
  337. /* CHECK THAT TAPED GRADIENT AND LAPACK GRADIENT EVALUATION ARE THE SAME */
  338. double difference = 0.;
  339. for(int n = 0; n != N; ++n){
  340. for(int m = 0; m != N; ++m){
  341. difference += abs(g[id(n,m,N)] - Abar[id(n,m,N)]);
  342. // cout<< g[id(n,m,N)]<<" "<<Abar[id(n,m,N)]<<endl;
  343. }
  344. }
  345. cout<< "Computation correct? "<< (bool) (difference< N*1e-6 ) <<endl;
  346. /* ===================================================== */
  347. /* COMPUTING THE GRADIENT OF f WITH MATRIX AD via LAPACK */
  348. /* ===================================================== */
  349. /* need to compute
  350. // d tr(A^-1) = tr( - (A^-1)^2 dA) and therefore
  351. // \nabla tr(A^-1) = - ((A^-1)^2)^T
  352. */
  353. for(int n = 0; n != N; ++n){
  354. for(int m = 0; m != N; ++m){
  355. Ainv[id(n,m,N)] = A[id(n,m,N)];
  356. }
  357. }
  358. start_time = mtime();
  359. /* compute the inverse by combination of dgetrf and dgetri */
  360. clapack_dgetrf(CblasRowMajor, N, N, Ainv, N, ipiv);
  361. clapack_dgetri(CblasRowMajor, N, Ainv, N, ipiv);
  362. /* compute Abar = - (A^{-1} A^{-1})^T */
  363. cblas_dgemm(CblasRowMajor, CblasTrans,
  364. CblasTrans, N, N,
  365. N, -1., Ainv,
  366. N, Ainv, N,
  367. 0., Abar, N);
  368. end_time = mtime();
  369. printf("UTPM lapack gradient evaluation of f needs %d ms.\n",(end_time-start_time));
  370. runtimes_file<<end_time-start_time<<"\t";
  371. snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
  372. system(mycmd);
  373. // /* CHECK THAT TAPED GRADIENT AND LAPACK GRADIENT EVALUATION ARE THE SAME */
  374. // double difference = 0.;
  375. // for(int n = 0; n != N; ++n){
  376. // for(int m = 0; m != N; ++m){
  377. // difference += abs(g[n*N + m] - Abar[n][m]);
  378. // }
  379. // }
  380. // assert(difference/(N*N)<=1e-6);
  381. /* ============================================= */
  382. /* COMPUTING THE GRADIENT OF trace WITH TAPENADE */
  383. /* ============================================= */
  384. /* we have to compute
  385. fbar df = fbar d tr C
  386. = tr ( fbar Id d C )
  387. */
  388. /* since fbar = 1, fbar Id = Id */
  389. for(int n = 0; n!=N; ++n){
  390. for(int m = 0; m!=N; ++m){
  391. Id[id(n,m,N)] = (n == m);
  392. }
  393. }
  394. /* now compute Abar */
  395. double *Ainvbar = new double[N*N];
  396. double *Rbar = new double[N*N];
  397. start_time = mtime();
  398. inv_b(A, Abar, Ainv, Ainvbar, R, Rbar, N);
  399. end_time = mtime();
  400. printf("TAPENADE gradient evaluation of f using the c code needs %d ms.\n",(end_time-start_time));
  401. runtimes_file<<end_time-start_time<<"\t";
  402. snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
  403. system(mycmd);
  404. // // fortran version
  405. // start_time = mtime();
  406. // int Ntmp2 = N;
  407. // inv_b_(A, Abar, Ainv, Id, R, Ntmp2);
  408. // end_time = mtime();
  409. // printf("TAPENADE gradient evaluation of f using the fortrang code needs %d ms.\n",(end_time-start_time));
  410. // runtimes_file<<end_time-start_time<<endl;
  411. /* ============================================= */
  412. /* TESTING HIGHER-ORDER DERIVATIVES */
  413. /* ============================================= */
  414. int P = 1; int D = 5;
  415. double *utpm_A = new double[(1+(D-1)*P)*(N*N)];
  416. double *utpm_B = new double[(1+(D-1)*P)*(N*N)];
  417. double *utpm_C = new double[(1+(D-1)*P)*(N*N)];
  418. int *utpm_strides = new int[3];
  419. utpm_strides[0] = N*N*sizeof(double);
  420. utpm_strides[1] = sizeof(double);
  421. utpm_strides[2] = N*sizeof(double);
  422. int *utpm_shape = new int[2];
  423. utpm_shape[0] = N; utpm_shape[1] = N;
  424. /* fill utpm_A and utpm_B with random numbers */
  425. for(int i = 0; i!=N*N + (D-1)*P*(N*N); ++i){
  426. utpm_A[i] = rand()/100000000000.;
  427. utpm_B[i] = rand()/100000000000.;
  428. }
  429. // print_utpm(P, D, 2, utpm_shape, &utpm_strides[1], utpm_A);
  430. start_time = mtime();
  431. utpm_dot(P, D, N, N, N, 1., utpm_A, utpm_strides, utpm_B, utpm_strides, 0., utpm_C, utpm_strides);
  432. end_time = mtime();
  433. runtimes_file<<end_time-start_time<<"\t";
  434. printf("taylorpoly UTPM needs %d ms.\n",(end_time-start_time));
  435. /* trace the operations with ADOL-C */
  436. adouble *aA2 = new adouble[(N*N)];
  437. adouble *aB2 = new adouble[(N*N)];
  438. adouble *aC2 = new adouble[(N*N)];
  439. start_time = mtime();
  440. trace_on(2);
  441. for(int n = 0; n!=N; ++n){
  442. for(int m = 0; m!=N; ++m){
  443. aA2[id(n,m,N)]<<= utpm_A[id(n,m,N)];
  444. aB2[id(n,m,N)]<<= utpm_B[id(n,m,N)];
  445. }
  446. }
  447. dot(aA2, aB2, aC2, N);
  448. for(int n = 0; n!=N; ++n){
  449. for(int m = 0; m!=N; ++m){
  450. aC2[id(n,m,N)]>>= utpm_C[id(n,m,N)];
  451. }
  452. }
  453. trace_off();
  454. end_time = mtime();
  455. double *x2 = new double[2*N*N];
  456. double *y2 = new double[N*N];
  457. double ***V2 = myalloc3(2*N*N,P, D-1);
  458. double ***W2 = myalloc3(N*N,P, D-1);
  459. // start_time = mtime();
  460. start_time = mtime();
  461. hov_forward(2,N*N,2*N*N,D-1, P, x2, V2, y2, W2);
  462. end_time = mtime();
  463. runtimes_file<<end_time-start_time<<"\t";
  464. printf("ADOL-C hov_forward needs %d ms.\n",(end_time-start_time));
  465. runtimes_file<<endl;
  466. runtimes_file.close();
  467. return 0;
  468. }