/test_program/matrix_ad_vs_adolc_vs_tapenade.cpp
C++ | 552 lines | 360 code | 83 blank | 109 comment | 52 complexity | b7ed7e7809a790309718e96aef11bf82 MD5 | raw file
1#include <iostream> 2#include <fstream> 3 4// for memory consumption 5#include <sys/types.h> 6#include <unistd.h> 7#include <stdio.h> 8#include <stdlib.h> 9 10#include "adolc/adolc.h" 11#include "dense_inverse.hpp" 12#include "dot.hpp" 13 14extern "C"{ 15#include "clapack.h" 16#include "cblas.h" 17#include "dense_inverse.h" 18#include "inv_b.h" 19#include "utpm.h" 20 21} 22 23#include <sys/time.h> 24struct timeval tv; 25int mtime(void) 26{ 27gettimeofday(&tv,NULL); 28return (int)(tv.tv_sec*1000 + (tv.tv_usec / 1000)); 29} 30 31using namespace std; 32/** computes y = trace(A) */ 33template <class Tdouble> 34int trace(Tdouble *A, Tdouble *Ainv, Tdouble *R, Tdouble *y, int N){ 35 *y = 0; 36 inv(A, Ainv, R, N); 37 for(int n = 0; n < N; ++n){ 38 (*y) += Ainv[id(n,n,N)]; 39 } 40} 41 42 43 44int main(int argc, char* argv[]){ 45 int N = 100; 46 if( argc >= 2){ 47 int tmp = atoi(argv[1]); 48 if(tmp <= 300){ 49 N = tmp; 50 } 51 } 52 53 54 cout<<endl<<endl; 55 cout<<"================================================"<<endl; 56 cout<<" RUNNING TESTS "<<endl; 57 cout<<"================================================"<<endl; 58 cout<<endl<<endl; 59 60 cout<<"Setting N = "<<N<<endl; 61 cout<<"PID of this process = "<< getpid() <<endl; 62 char mycmd[255]; 63 snprintf (mycmd, (size_t)255, "echo [N=%d]>> mem_consumption.txt", N); 64 system(mycmd); 65 66 snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid()); 67 system(mycmd); 68 69 70 /* write output to runtimes.txt */ 71 ofstream runtimes_file; 72 runtimes_file.open("runtimes.txt", fstream::in | fstream::out | fstream::app); 73 runtimes_file<<N<<"\t"; 74 75 double *A = new double[N*N]; 76 double *Ainv = new double[N*N]; 77 double *Ainv2 = new double[N*N]; 78 double *QT = new double[N*N]; 79 double *Q = new double[N*N]; 80 double *B = new double[N*N]; 81 double *R = new double[N*N]; 82 double *Id = new double[N*N]; 83 double *Abar = new double[N*N]; 84 double *WORK = new double[N*N]; 85 86 87 int start_time,end_time; 88 89 /* fill A with random numbers */ 90 for(int n = 0; n!=N; ++n){ 91 for(int m = 0; m!=N; ++m){ 92 A[id(n,m,N)] = rand()/100000000000.; 93 Ainv[id(n,m,N)] = A[id(n,m,N)]; 94 } 95 } 96 /* 97 RUNNING TESTS: 98 ============== 99 100 The tests compare: 101 * Lapack VS my implementation to compute VS taped version of my implementation to compute A^-1 102 * It turns out that my implementation is a less than a factor 2 slower than LAPACK. 103 * The taped function evaluation is much slower though. 104 * Compare the perfomance to compute the gradient: 105 1) Matrix AD 106 2) ADOL-C 107 3) TAPENADE 108 * Check the obtained results for consistency, i.e. check that there are no bugs in the code! 109 */ 110 111 cout<<endl<<endl; 112 cout<<"================================================"<<endl; 113 cout<<" TESTING FUNCTION EVALUATION "<<endl; 114 cout<<"================================================"<<endl; 115 cout<<endl<<endl; 116 117 118 /* TIMING LAPACK IMPLEMENTATION INVERSE COMPUTATION */ 119 /* =============================================== */ 120 /* compute the inverse by combination of dgetrf and dgetri */ 121 int *ipiv = new int[N]; int info; int LWORK = N*N; 122 start_time = mtime(); 123 clapack_dgetrf(CblasRowMajor, N, N, Ainv, N, ipiv); 124 // cout<<"info="<<info<<endl; 125 clapack_dgetri(CblasRowMajor, N, Ainv, N, ipiv); 126 // cout<<"info="<<info<<endl; 127 end_time = mtime(); 128 printf("normal LAPACK function evaluation of inv needs %d ms.\n",(end_time-start_time)); 129 runtimes_file<<end_time-start_time<<"\t"; 130 131 132 /* check that dot(A,Ainv) = I */ 133 double one = 1; 134 double zero = 0; 135 char notrans = 'n'; 136 cblas_dgemm(CblasRowMajor, CblasNoTrans, 137 CblasNoTrans, N, N, N, 1., A, 138 N, Ainv, N, 139 0., Id, N); 140 { 141 double sum = 0; 142 for (int n = 0; n != N; ++n){ 143 for (int m = 0; m != N; ++m){ 144 sum += Id[id(n,m,N)]; 145 } 146 } 147 cout<<sum<<endl; 148 cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl; 149 } 150 151 152 /* TIMING MY C-IMPLEMENTATION INVERSE COMPUTATION */ 153 /* ============================================== */ 154 155 start_time = mtime(); 156 inv(A, Ainv, R, N); 157 end_time = mtime(); 158 printf("normal selfmade function evaluation in C++ of inv needs %d ms.\n",(end_time-start_time)); 159 runtimes_file<<end_time-start_time<<"\t"; 160 161 /* check that dot(A,Ainv) = I */ 162 cblas_dgemm(CblasRowMajor, CblasNoTrans, 163 CblasNoTrans, N, N, 164 N, 1., A, 165 N, Ainv, N, 166 0., Id, N); 167 { 168 double sum = 0; 169 for (int n = 0; n != N; ++n){ 170 for (int m = 0; m != N; ++m){ 171 sum += Id[id(n,m,N)]; 172 } 173 } 174 cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl; 175 } 176 177 178 /* TIMING MY F77-IMPLEMENTATION INVERSE COMPUTATION */ 179 /* ================================================ */ 180 start_time = mtime(); 181 { 182 int Ntmp = N; 183 inv(A,Ainv,R, Ntmp); 184 } 185 end_time = mtime(); 186 printf("normal selfmade function evaluation in F77 of inv needs %d ms.\n",(end_time-start_time)); 187 runtimes_file<<end_time-start_time<<"\t"; 188 189 190 /* check that dot(A,Ainv) = I */ 191 cblas_dgemm(CblasRowMajor, CblasNoTrans, 192 CblasNoTrans, N, N, 193 N, 1., A, 194 N, Ainv, N, 195 0., Id, N); 196 { 197 double sum = 0; 198 for (int n = 0; n != N; ++n){ 199 for (int m = 0; m != N; ++m){ 200 sum += Id[id(n,m,N)]; 201 } 202 } 203 cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl; 204 } 205 206 207 /* TIMING MY C++-IMPLEMENTATION INVERSE COMPUTATION */ 208 /* ================================================ */ 209 int Ntmp = N; 210 start_time = mtime(); 211 inv(A,Ainv,R, N); 212 end_time = mtime(); 213 printf("normal selfmade function evaluation in C of inv needs %d ms.\n",(end_time-start_time)); 214 runtimes_file<<end_time-start_time<<"\t"; 215 216 217 /* check that dot(A,Ainv) = I */ 218 cblas_dgemm(CblasRowMajor, CblasNoTrans, 219 CblasNoTrans, N, N, 220 N, 1., A, 221 N, Ainv, N, 222 0., Id, N); 223 { 224 double sum = 0; 225 for (int n = 0; n != N; ++n){ 226 for (int m = 0; m != N; ++m){ 227 sum += Id[id(n,m,N)]; 228 } 229 } 230 cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl; 231 } 232 233 /* taping inv */ 234 cout<<"start tracing my implementation of inv with ADOL-C"<<endl; 235 adouble *aA = new adouble[N*N]; 236 adouble *aR = new adouble[N*N]; 237 adouble *aAinv = new adouble[N*N]; 238 239 snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid()); 240 system(mycmd); 241 242 start_time = mtime(); 243 trace_on(0); 244 for(int n = 0; n!=N; ++n){ 245 for(int m = 0; m!=N; ++m){ 246 aA[id(n,m,N)]<<= A[id(n,m,N)]; 247 } 248 } 249 inv(aA, aAinv, aR, N); 250 for(int n = 0; n!=N; ++n){ 251 for(int m = 0; m!=N; ++m){ 252 aAinv[id(n,m,N)]>>= Ainv[id(n,m,N)]; 253 } 254 } 255 trace_off(); 256 end_time = mtime(); 257 printf("normal selfmade function evaluation in C++ of inv needs %d ms.\n",(end_time-start_time)); 258 runtimes_file<<end_time-start_time<<"\t"; 259 cout<<"finished tracing"<<endl; 260 261 262 /* ================================ */ 263 /* TIMING TAPED INVERSE COMPUTATION */ 264 /* ================================ */ 265 start_time = mtime(); 266 function(0,N*N,N*N,A, Ainv); 267 end_time = mtime(); 268 printf("taped function evaluation of inv needs %d ms.\n",(end_time-start_time)); 269 runtimes_file<<end_time-start_time<<"\t"; 270 271 272 /* check that dot(A,Ainv) = I */ 273 cblas_dgemm(CblasRowMajor, CblasNoTrans, 274 CblasNoTrans, N, N, 275 N, 1., A, 276 N, Ainv, N, 277 0., Id, N); 278 { 279 double sum = 0; 280 for (int n = 0; n != N; ++n){ 281 for (int m = 0; m != N; ++m){ 282 sum += Id[id(n,m,N)]; 283 } 284 } 285 cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl; 286 } 287 288// /* TIMING MY C++-IMPLEMENTATION f COMPUTATION */ 289// /* ======================================== */ 290// start_time = mtime(); 291// f(A,Ainv); 292// end_time = mtime(); 293// printf("normal function evaluation of f needs %d ms.\n",(end_time-start_time)); 294// runtimes_file<<end_time-start_time<<"\t"; 295 296 297// /* TIMING TAPED f COMPUTATION */ 298// /* ========================== */ 299// start_time = mtime(); 300// function(1,1,N*N,A.data, &y); 301// end_time = mtime(); 302// printf("taped function evaluation of f needs %d ms.\n",(end_time-start_time)); 303// runtimes_file<<end_time-start_time<<"\t"; 304 305 306 cout<<endl<<endl; 307 cout<<"================================================"<<endl; 308 cout<<" TESTING GRADIENT EVALUATION "<<endl; 309 cout<<"================================================"<<endl; 310 cout<<endl<<endl; 311 312 snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid()); 313 system(mycmd); 314 315 /* TIMING TAPED JACOBIAN COMPUTATION OF INV */ 316 if(N<=30){ 317 double **J; 318 J=myalloc2(N*N,N*N); 319 start_time = mtime(); 320 jacobian(0,N*N,N*N,A, J); 321 end_time = mtime(); 322 printf("taped jacobian of inv needs %d ms.\n",(end_time-start_time)); 323 324 } 325 326 /* taping inv */ 327 cout<<"start tracing my implementation of trace(inv(.)) with ADOL-C"<<endl; 328 snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid()); 329 system(mycmd); 330 double y; 331 trace_on(1); 332 for(int n = 0; n!=N; ++n){ 333 for(int m = 0; m!=N; ++m){ 334 aA[id(n,m,N)]<<= A[id(n,m,N)]; 335 } 336 } 337 adouble ay; 338 trace(aA, aAinv, aR, &ay, N); 339 ay >>= y; 340 trace_off(); 341 cout<<"finished tracing"<<endl; 342 343 /* ====================================== */ 344 /* TIMING TAPED GRADIENT COMPUTATION OF f */ 345 /* ====================================== */ 346 double *g; 347 g = myalloc(N*N); 348 start_time = mtime(); 349 gradient(1,N*N,A, g); 350 end_time = mtime(); 351 printf("ADOL-C gradient of f needs %d ms.\n",(end_time-start_time)); 352 runtimes_file<<end_time-start_time<<"\t"; 353 snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid()); 354 system(mycmd); 355 356 357 /* ===================================================== */ 358 /* COMPUTING THE GRADIENT OF f WITH MATRIX AD */ 359 /* ===================================================== */ 360 /* need to compute 361 // d tr(A^-1) = tr( - (A^-1)^2 dA) and therefore 362 // \nabla tr(A^-1) = - ((A^-1)^2)^T 363 */ 364 start_time = mtime(); 365 /* Ainv2 == A here */ 366 inv(A, Ainv, R, N); 367 368 /* compute Abar = - (A^{-1} A^{-1})^T */ 369 cblas_dgemm(CblasRowMajor, CblasTrans, 370 CblasTrans, N, N, 371 N, -1., Ainv, 372 N, Ainv, N, 373 0., Abar, N); 374 end_time = mtime(); 375 printf("UTPM gradient evaluation of f needs %d ms.\n",(end_time-start_time)); 376 runtimes_file<<end_time-start_time<<"\t"; 377 378 snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid()); 379 system(mycmd); 380 381 /* check that dot(A,Ainv) = I */ 382 cblas_dgemm(CblasRowMajor, CblasNoTrans, 383 CblasNoTrans, N, N, 384 N, 1., A, 385 N, Ainv, N, 386 0., Id, N); 387 { 388 double sum = 0; 389 for (int n = 0; n != N; ++n){ 390 for (int m = 0; m != N; ++m){ 391 sum += abs( (n==m) - Id[id(n,m,N)]); 392 } 393 } 394 cout<< "Computation correct? "<< (bool) (sum< N*1e-8 ) <<endl; 395 } 396 397 /* CHECK THAT TAPED GRADIENT AND LAPACK GRADIENT EVALUATION ARE THE SAME */ 398 double difference = 0.; 399 for(int n = 0; n != N; ++n){ 400 for(int m = 0; m != N; ++m){ 401 difference += abs(g[id(n,m,N)] - Abar[id(n,m,N)]); 402 // cout<< g[id(n,m,N)]<<" "<<Abar[id(n,m,N)]<<endl; 403 } 404 } 405 cout<< "Computation correct? "<< (bool) (difference< N*1e-6 ) <<endl; 406 407 408 /* ===================================================== */ 409 /* COMPUTING THE GRADIENT OF f WITH MATRIX AD via LAPACK */ 410 /* ===================================================== */ 411 /* need to compute 412 // d tr(A^-1) = tr( - (A^-1)^2 dA) and therefore 413 // \nabla tr(A^-1) = - ((A^-1)^2)^T 414 */ 415 for(int n = 0; n != N; ++n){ 416 for(int m = 0; m != N; ++m){ 417 Ainv[id(n,m,N)] = A[id(n,m,N)]; 418 } 419 } 420 421 start_time = mtime(); 422 /* compute the inverse by combination of dgetrf and dgetri */ 423 clapack_dgetrf(CblasRowMajor, N, N, Ainv, N, ipiv); 424 clapack_dgetri(CblasRowMajor, N, Ainv, N, ipiv); 425 426 /* compute Abar = - (A^{-1} A^{-1})^T */ 427 cblas_dgemm(CblasRowMajor, CblasTrans, 428 CblasTrans, N, N, 429 N, -1., Ainv, 430 N, Ainv, N, 431 0., Abar, N); 432 end_time = mtime(); 433 printf("UTPM lapack gradient evaluation of f needs %d ms.\n",(end_time-start_time)); 434 runtimes_file<<end_time-start_time<<"\t"; 435 436 snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid()); 437 system(mycmd); 438 439 440// /* CHECK THAT TAPED GRADIENT AND LAPACK GRADIENT EVALUATION ARE THE SAME */ 441// double difference = 0.; 442// for(int n = 0; n != N; ++n){ 443// for(int m = 0; m != N; ++m){ 444// difference += abs(g[n*N + m] - Abar[n][m]); 445// } 446// } 447// assert(difference/(N*N)<=1e-6); 448 449 450 /* ============================================= */ 451 /* COMPUTING THE GRADIENT OF trace WITH TAPENADE */ 452 /* ============================================= */ 453 454 /* we have to compute 455 fbar df = fbar d tr C 456 = tr ( fbar Id d C ) 457 */ 458 459 /* since fbar = 1, fbar Id = Id */ 460 for(int n = 0; n!=N; ++n){ 461 for(int m = 0; m!=N; ++m){ 462 Id[id(n,m,N)] = (n == m); 463 } 464 } 465 466 /* now compute Abar */ 467 double *Ainvbar = new double[N*N]; 468 double *Rbar = new double[N*N]; 469 470 start_time = mtime(); 471 inv_b(A, Abar, Ainv, Ainvbar, R, Rbar, N); 472 end_time = mtime(); 473 printf("TAPENADE gradient evaluation of f using the c code needs %d ms.\n",(end_time-start_time)); 474 runtimes_file<<end_time-start_time<<"\t"; 475 476 snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid()); 477 system(mycmd); 478 479 // // fortran version 480 // start_time = mtime(); 481 // int Ntmp2 = N; 482 // inv_b_(A, Abar, Ainv, Id, R, Ntmp2); 483 // end_time = mtime(); 484 // printf("TAPENADE gradient evaluation of f using the fortrang code needs %d ms.\n",(end_time-start_time)); 485 // runtimes_file<<end_time-start_time<<endl; 486 487 488 /* ============================================= */ 489 /* TESTING HIGHER-ORDER DERIVATIVES */ 490 /* ============================================= */ 491 int P = 1; int D = 5; 492 double *utpm_A = new double[(1+(D-1)*P)*(N*N)]; 493 double *utpm_B = new double[(1+(D-1)*P)*(N*N)]; 494 double *utpm_C = new double[(1+(D-1)*P)*(N*N)]; 495 int *utpm_strides = new int[3]; 496 utpm_strides[0] = N*N*sizeof(double); 497 utpm_strides[1] = sizeof(double); 498 utpm_strides[2] = N*sizeof(double); 499 int *utpm_shape = new int[2]; 500 utpm_shape[0] = N; utpm_shape[1] = N; 501 502 /* fill utpm_A and utpm_B with random numbers */ 503 for(int i = 0; i!=N*N + (D-1)*P*(N*N); ++i){ 504 utpm_A[i] = rand()/100000000000.; 505 utpm_B[i] = rand()/100000000000.; 506 } 507 508 // print_utpm(P, D, 2, utpm_shape, &utpm_strides[1], utpm_A); 509 start_time = mtime(); 510 utpm_dot(P, D, N, N, N, 1., utpm_A, utpm_strides, utpm_B, utpm_strides, 0., utpm_C, utpm_strides); 511 end_time = mtime(); 512 runtimes_file<<end_time-start_time<<"\t"; 513 printf("taylorpoly UTPM needs %d ms.\n",(end_time-start_time)); 514 515 /* trace the operations with ADOL-C */ 516 adouble *aA2 = new adouble[(N*N)]; 517 adouble *aB2 = new adouble[(N*N)]; 518 adouble *aC2 = new adouble[(N*N)]; 519 520 start_time = mtime(); 521 trace_on(2); 522 for(int n = 0; n!=N; ++n){ 523 for(int m = 0; m!=N; ++m){ 524 aA2[id(n,m,N)]<<= utpm_A[id(n,m,N)]; 525 aB2[id(n,m,N)]<<= utpm_B[id(n,m,N)]; 526 } 527 } 528 dot(aA2, aB2, aC2, N); 529 for(int n = 0; n!=N; ++n){ 530 for(int m = 0; m!=N; ++m){ 531 aC2[id(n,m,N)]>>= utpm_C[id(n,m,N)]; 532 } 533 } 534 trace_off(); 535 end_time = mtime(); 536 537 double *x2 = new double[2*N*N]; 538 double *y2 = new double[N*N]; 539 double ***V2 = myalloc3(2*N*N,P, D-1); 540 double ***W2 = myalloc3(N*N,P, D-1); 541 542 // start_time = mtime(); 543 start_time = mtime(); 544 hov_forward(2,N*N,2*N*N,D-1, P, x2, V2, y2, W2); 545 end_time = mtime(); 546 runtimes_file<<end_time-start_time<<"\t"; 547 printf("ADOL-C hov_forward needs %d ms.\n",(end_time-start_time)); 548 runtimes_file<<endl; 549 runtimes_file.close(); 550 return 0; 551} 552