PageRenderTime 87ms CodeModel.GetById 12ms app.highlight 70ms RepoModel.GetById 1ms app.codeStats 0ms

/test_program/matrix_ad_vs_adolc_vs_tapenade.cpp

http://github.com/b45ch1/hpsc_hanoi_2009_walter
C++ | 552 lines | 360 code | 83 blank | 109 comment | 52 complexity | b7ed7e7809a790309718e96aef11bf82 MD5 | raw file
  1#include <iostream>
  2#include <fstream>
  3
  4// for memory consumption
  5#include <sys/types.h>
  6#include <unistd.h>
  7#include <stdio.h>
  8#include <stdlib.h>
  9
 10#include "adolc/adolc.h"
 11#include "dense_inverse.hpp"
 12#include "dot.hpp"
 13
 14extern "C"{
 15#include "clapack.h"
 16#include "cblas.h"
 17#include "dense_inverse.h"
 18#include "inv_b.h"
 19#include "utpm.h"
 20
 21}
 22
 23#include <sys/time.h>
 24struct timeval tv;
 25int mtime(void)
 26{
 27gettimeofday(&tv,NULL);
 28return (int)(tv.tv_sec*1000 + (tv.tv_usec / 1000));
 29}
 30
 31using namespace std;
 32/** computes y = trace(A) */
 33template <class Tdouble>
 34int trace(Tdouble *A, Tdouble *Ainv, Tdouble *R, Tdouble *y, int N){
 35    *y = 0;
 36    inv(A, Ainv, R, N);
 37    for(int n = 0; n < N; ++n){
 38        (*y) += Ainv[id(n,n,N)];
 39    }
 40}
 41
 42
 43
 44int main(int argc, char* argv[]){
 45	int N = 100;
 46	if( argc >= 2){
 47		int tmp = atoi(argv[1]);
 48		if(tmp <= 300){
 49			N = tmp;
 50		}
 51	}
 52
 53
 54	cout<<endl<<endl;
 55	cout<<"================================================"<<endl;
 56	cout<<"                  RUNNING TESTS                 "<<endl;
 57	cout<<"================================================"<<endl;
 58	cout<<endl<<endl;
 59
 60	cout<<"Setting N = "<<N<<endl;
 61	cout<<"PID of this process  = "<< getpid() <<endl;
 62	char mycmd[255];
 63	snprintf (mycmd, (size_t)255, "echo [N=%d]>> mem_consumption.txt", N);
 64	system(mycmd);
 65
 66	snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
 67	system(mycmd);
 68
 69
 70	/* write output to runtimes.txt */
 71	ofstream runtimes_file;
 72	runtimes_file.open("runtimes.txt", fstream::in | fstream::out | fstream::app);
 73	runtimes_file<<N<<"\t";
 74	
 75	double *A = new double[N*N];
 76	double *Ainv = new double[N*N];
 77	double *Ainv2 = new double[N*N];
 78	double *QT = new double[N*N];
 79	double *Q = new double[N*N];
 80	double *B = new double[N*N];
 81	double *R = new double[N*N];
 82	double *Id = new double[N*N];
 83	double *Abar = new double[N*N];
 84	double *WORK = new double[N*N];
 85	
 86
 87	int start_time,end_time;
 88
 89	/* fill A with random numbers */
 90	for(int n = 0; n!=N; ++n){
 91		for(int m = 0; m!=N; ++m){
 92			A[id(n,m,N)] = rand()/100000000000.;
 93			Ainv[id(n,m,N)] = A[id(n,m,N)];
 94		}
 95	}
 96	/*
 97	RUNNING TESTS:
 98	==============
 99	
100		The tests compare:
101			* Lapack VS my implementation to compute VS taped version of my implementation to compute A^-1
102				* It turns out that my implementation is a less than a factor 2 slower than LAPACK.
103				* The taped function evaluation is much slower though.
104			* Compare the perfomance to compute the gradient:
105				1) Matrix AD
106				2) ADOL-C
107				3) TAPENADE
108			* Check the obtained results for consistency, i.e. check that there are no bugs in the code!
109	*/
110	
111	cout<<endl<<endl;
112	cout<<"================================================"<<endl;
113	cout<<"           TESTING FUNCTION EVALUATION          "<<endl;
114	cout<<"================================================"<<endl;
115	cout<<endl<<endl;
116	
117	
118	/* TIMING LAPACK IMPLEMENTATION INVERSE COMPUTATION */
119	/* =============================================== */
120	/* compute the inverse by combination of dgetrf and dgetri */
121	int *ipiv = new int[N];	int info; int LWORK = N*N;
122	start_time = mtime();
123	clapack_dgetrf(CblasRowMajor, N, N, Ainv, N, ipiv);
124	// cout<<"info="<<info<<endl;
125	clapack_dgetri(CblasRowMajor, N, Ainv, N, ipiv);
126	// cout<<"info="<<info<<endl;
127	end_time = mtime();
128	printf("normal LAPACK function evaluation of inv needs %d ms.\n",(end_time-start_time));
129	runtimes_file<<end_time-start_time<<"\t";
130
131
132	/* check that dot(A,Ainv) = I */
133	double one = 1;
134	double zero = 0;
135	char notrans = 'n';
136	cblas_dgemm(CblasRowMajor, CblasNoTrans,
137				 CblasNoTrans, N, N, N, 1., A,
138				 N, Ainv, N,
139				0., Id, N);
140	{
141		double sum = 0;
142		for (int n = 0; n != N; ++n){
143			for (int m = 0; m != N; ++m){
144				sum += Id[id(n,m,N)];
145			}
146		}
147		cout<<sum<<endl;
148		cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl;
149	}
150	
151
152	/* TIMING MY C-IMPLEMENTATION INVERSE COMPUTATION */
153	/* ============================================== */
154
155	start_time = mtime();
156	inv(A, Ainv, R, N);
157	end_time = mtime();
158	printf("normal selfmade function evaluation in C++ of inv needs %d ms.\n",(end_time-start_time));
159	runtimes_file<<end_time-start_time<<"\t";
160	
161	/* check that dot(A,Ainv) = I */
162	cblas_dgemm(CblasRowMajor, CblasNoTrans,
163				 CblasNoTrans, N, N,
164				N, 1., A,
165				 N, Ainv, N,
166				 0., Id, N);
167	{
168		double sum = 0;
169		for (int n = 0; n != N; ++n){
170			for (int m = 0; m != N; ++m){
171				sum += Id[id(n,m,N)];
172			}
173		}
174		cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl;
175	}
176	
177	
178	/* TIMING MY F77-IMPLEMENTATION INVERSE COMPUTATION */
179	/* ================================================ */
180	start_time = mtime();
181	{
182	int Ntmp = N;
183	inv(A,Ainv,R, Ntmp);
184	}
185	end_time = mtime();
186	printf("normal selfmade function evaluation in F77 of inv needs %d ms.\n",(end_time-start_time));
187	runtimes_file<<end_time-start_time<<"\t";
188
189	
190	/* check that dot(A,Ainv) = I */
191	cblas_dgemm(CblasRowMajor, CblasNoTrans,
192				 CblasNoTrans, N, N,
193				N, 1., A,
194				 N, Ainv, N,
195				 0., Id, N);
196	{
197		double sum = 0;
198		for (int n = 0; n != N; ++n){
199			for (int m = 0; m != N; ++m){
200				sum += Id[id(n,m,N)];
201			}
202		}
203		cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl;
204	}
205	
206	
207	/* TIMING MY C++-IMPLEMENTATION INVERSE COMPUTATION */
208	/* ================================================ */
209	int Ntmp = N;
210	start_time = mtime();
211	inv(A,Ainv,R, N);
212	end_time = mtime();
213	printf("normal selfmade function evaluation in C of inv needs %d ms.\n",(end_time-start_time));
214	runtimes_file<<end_time-start_time<<"\t";
215
216	
217	/* check that dot(A,Ainv) = I */
218	cblas_dgemm(CblasRowMajor, CblasNoTrans,
219				 CblasNoTrans, N, N,
220				N, 1., A,
221				 N, Ainv, N,
222				 0., Id, N);
223	{
224		double sum = 0;
225		for (int n = 0; n != N; ++n){
226			for (int m = 0; m != N; ++m){
227				sum += Id[id(n,m,N)];
228			}
229		}
230		cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl;
231	}
232
233	/* taping inv */
234	cout<<"start tracing my implementation of inv with ADOL-C"<<endl;
235	adouble *aA = new adouble[N*N];
236	adouble *aR = new adouble[N*N];
237	adouble *aAinv = new adouble[N*N];
238
239	snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
240	system(mycmd);
241	
242	start_time = mtime();
243	trace_on(0);
244	for(int n = 0; n!=N; ++n){
245		for(int m = 0; m!=N; ++m){
246			aA[id(n,m,N)]<<= A[id(n,m,N)];
247		}
248	}
249	inv(aA, aAinv, aR, N);
250	for(int n = 0; n!=N; ++n){
251		for(int m = 0; m!=N; ++m){
252			aAinv[id(n,m,N)]>>= Ainv[id(n,m,N)];
253		}
254	}
255	trace_off();
256	end_time = mtime();
257	printf("normal selfmade function evaluation in C++ of inv needs %d ms.\n",(end_time-start_time));
258	runtimes_file<<end_time-start_time<<"\t";	
259	cout<<"finished tracing"<<endl;
260	
261	
262	/* ================================ */
263	/* TIMING TAPED INVERSE COMPUTATION */
264	/* ================================ */
265	start_time = mtime();
266	function(0,N*N,N*N,A, Ainv);
267	end_time = mtime();
268	printf("taped function evaluation of inv needs %d ms.\n",(end_time-start_time));
269	runtimes_file<<end_time-start_time<<"\t";
270
271	
272	/* check that dot(A,Ainv) = I */
273	cblas_dgemm(CblasRowMajor, CblasNoTrans,
274				 CblasNoTrans, N, N,
275				N, 1., A,
276				 N, Ainv, N,
277				 0., Id, N);
278	{
279		double sum = 0;
280		for (int n = 0; n != N; ++n){
281			for (int m = 0; m != N; ++m){
282				sum += Id[id(n,m,N)];
283			}
284		}
285		cout<< "Computation correct? "<< (bool) (abs(sum/N - 1) < N*1e-8 ) <<endl;
286	}
287	
288// 	/* TIMING MY C++-IMPLEMENTATION f COMPUTATION */
289// 	/* ======================================== */
290// 	start_time = mtime();
291// 	f(A,Ainv);
292// 	end_time = mtime();
293// 	printf("normal function evaluation of f needs %d ms.\n",(end_time-start_time));
294// 	runtimes_file<<end_time-start_time<<"\t";
295
296
297// 	/* TIMING TAPED f COMPUTATION */
298// 	/* ========================== */
299// 	start_time = mtime();
300// 	function(1,1,N*N,A.data, &y);
301// 	end_time = mtime();
302// 	printf("taped function evaluation of f needs %d ms.\n",(end_time-start_time));
303// 	runtimes_file<<end_time-start_time<<"\t";
304
305
306	cout<<endl<<endl;
307	cout<<"================================================"<<endl;
308	cout<<"           TESTING GRADIENT EVALUATION          "<<endl;
309	cout<<"================================================"<<endl;
310	cout<<endl<<endl;
311
312	snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
313	system(mycmd);
314	
315	/* TIMING TAPED JACOBIAN COMPUTATION OF INV */
316	if(N<=30){
317        double **J;
318        J=myalloc2(N*N,N*N);
319        start_time = mtime();
320        jacobian(0,N*N,N*N,A, J);
321        end_time = mtime();
322        printf("taped jacobian of inv needs %d ms.\n",(end_time-start_time));
323    
324    }
325    
326	/* taping inv */
327	cout<<"start tracing my implementation of trace(inv(.)) with ADOL-C"<<endl;
328	snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
329	system(mycmd);
330	double y;
331	trace_on(1);
332	for(int n = 0; n!=N; ++n){
333		for(int m = 0; m!=N; ++m){
334			aA[id(n,m,N)]<<= A[id(n,m,N)];
335		}
336	}
337	adouble ay;
338	trace(aA, aAinv, aR, &ay, N);
339	ay >>= y;
340	trace_off();
341	cout<<"finished tracing"<<endl;
342	
343	/* ====================================== */
344	/* TIMING TAPED GRADIENT COMPUTATION OF f */
345	/* ====================================== */
346	double *g;
347	g = myalloc(N*N);
348	start_time = mtime();
349	gradient(1,N*N,A, g);
350	end_time = mtime();
351	printf("ADOL-C gradient of f needs %d ms.\n",(end_time-start_time));
352	runtimes_file<<end_time-start_time<<"\t";
353	snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
354	system(mycmd);
355	
356	
357	/* ===================================================== */
358	/* COMPUTING THE GRADIENT OF f WITH MATRIX AD            */
359	/* ===================================================== */
360	/*              need to compute
361	//              d tr(A^-1) = tr( - (A^-1)^2 dA) and therefore
362	//              \nabla tr(A^-1) = - ((A^-1)^2)^T
363	*/
364	start_time = mtime();
365	/* Ainv2 == A here */
366	inv(A, Ainv, R, N);
367
368	 /* compute Abar = - (A^{-1} A^{-1})^T */
369	cblas_dgemm(CblasRowMajor, CblasTrans,
370				 CblasTrans, N, N,
371				N, -1., Ainv,
372				 N, Ainv, N,
373				 0., Abar, N);
374	end_time = mtime();
375	printf("UTPM gradient evaluation of f needs %d ms.\n",(end_time-start_time));
376	runtimes_file<<end_time-start_time<<"\t";
377	
378	snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
379	system(mycmd);
380	
381	/* check that dot(A,Ainv) = I */
382	cblas_dgemm(CblasRowMajor, CblasNoTrans,
383				 CblasNoTrans, N, N,
384				N, 1., A,
385				 N, Ainv, N,
386				 0., Id, N);
387	{
388		double sum = 0;
389		for (int n = 0; n != N; ++n){
390			for (int m = 0; m != N; ++m){
391				sum += abs( (n==m) - Id[id(n,m,N)]);
392			}
393		}
394		cout<< "Computation correct? "<< (bool) (sum< N*1e-8 ) <<endl;
395	}
396
397	/* CHECK THAT TAPED GRADIENT AND LAPACK GRADIENT EVALUATION ARE THE SAME */
398	double difference = 0.;
399	for(int n = 0; n != N; ++n){
400		for(int m = 0; m != N; ++m){
401			difference += abs(g[id(n,m,N)] - Abar[id(n,m,N)]);
402			// cout<< g[id(n,m,N)]<<" "<<Abar[id(n,m,N)]<<endl;
403		}
404	} 
405    cout<< "Computation correct? "<< (bool) (difference< N*1e-6 ) <<endl;
406
407	
408	/* ===================================================== */
409	/* COMPUTING THE GRADIENT OF f WITH MATRIX AD via LAPACK */
410	/* ===================================================== */
411	/*              need to compute
412	//              d tr(A^-1) = tr( - (A^-1)^2 dA) and therefore
413	//              \nabla tr(A^-1) = - ((A^-1)^2)^T
414	*/
415	for(int n = 0; n != N; ++n){
416		for(int m = 0; m != N; ++m){
417		    Ainv[id(n,m,N)] = A[id(n,m,N)];
418		}
419	}
420	
421	start_time = mtime();
422	/* compute the inverse by combination of dgetrf and dgetri */
423	clapack_dgetrf(CblasRowMajor, N, N, Ainv, N, ipiv);
424	clapack_dgetri(CblasRowMajor, N, Ainv, N, ipiv);
425	
426	 /* compute Abar = - (A^{-1} A^{-1})^T */
427	cblas_dgemm(CblasRowMajor, CblasTrans,
428				 CblasTrans, N, N,
429				N, -1., Ainv,
430				 N, Ainv, N,
431				 0., Abar, N);
432	end_time = mtime();
433	printf("UTPM lapack gradient evaluation of f needs %d ms.\n",(end_time-start_time));
434	runtimes_file<<end_time-start_time<<"\t";
435
436	snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
437	system(mycmd);
438	
439
440// 	/* CHECK THAT TAPED GRADIENT AND LAPACK GRADIENT EVALUATION ARE THE SAME */
441// 	double difference = 0.;
442// 	for(int n = 0; n != N; ++n){
443// 		for(int m = 0; m != N; ++m){
444// 			difference += abs(g[n*N + m] - Abar[n][m]);
445// 		}
446// 	} 
447// 	assert(difference/(N*N)<=1e-6);
448	
449	
450	/* ============================================= */
451	/* COMPUTING THE GRADIENT OF trace WITH TAPENADE */
452	/* ============================================= */
453	
454	/* we have to compute
455		fbar df = fbar d tr C
456				= tr ( fbar Id d C )
457	*/
458	
459	/* since fbar = 1, fbar Id = Id */
460	for(int n = 0; n!=N; ++n){
461		for(int m = 0; m!=N; ++m){
462			Id[id(n,m,N)] = (n == m);
463		}
464	}
465	
466	/* now compute Abar */
467	double *Ainvbar = new double[N*N];
468	double *Rbar = new double[N*N];
469	
470	start_time = mtime();
471	inv_b(A, Abar, Ainv, Ainvbar, R, Rbar, N);
472	end_time = mtime();
473	printf("TAPENADE gradient evaluation of f using the c code needs %d ms.\n",(end_time-start_time));
474	runtimes_file<<end_time-start_time<<"\t";
475	
476	snprintf (mycmd, (size_t)255, "cat /proc/%d/status | grep VmPeak >> mem_consumption.txt", getpid());
477	system(mycmd);
478	
479	// // fortran version
480	// start_time = mtime();
481	// int Ntmp2 = N;
482	// inv_b_(A, Abar, Ainv, Id, R, Ntmp2);
483	// end_time = mtime();
484	// printf("TAPENADE gradient evaluation of f using the fortrang code needs %d ms.\n",(end_time-start_time));
485	// runtimes_file<<end_time-start_time<<endl;
486	
487	
488	/* ============================================= */
489	/* TESTING HIGHER-ORDER DERIVATIVES              */
490	/* ============================================= */
491	int P = 1; int D = 5;
492	double *utpm_A = new double[(1+(D-1)*P)*(N*N)];
493	double *utpm_B = new double[(1+(D-1)*P)*(N*N)];
494	double *utpm_C = new double[(1+(D-1)*P)*(N*N)];
495	int *utpm_strides = new int[3];
496	utpm_strides[0] = N*N*sizeof(double);
497	utpm_strides[1] = sizeof(double);
498	utpm_strides[2] = N*sizeof(double);
499	int *utpm_shape = new int[2];
500	utpm_shape[0] = N; utpm_shape[1] = N;
501	
502	/* fill utpm_A and utpm_B with random numbers */
503	for(int i = 0; i!=N*N + (D-1)*P*(N*N); ++i){
504		utpm_A[i] = rand()/100000000000.;
505		utpm_B[i] = rand()/100000000000.;
506	}
507	
508	// print_utpm(P, D, 2, utpm_shape, &utpm_strides[1], utpm_A);
509	start_time = mtime();
510	utpm_dot(P, D, N, N, N, 1., utpm_A, utpm_strides, utpm_B, utpm_strides, 0., utpm_C, utpm_strides);
511	end_time = mtime();
512	runtimes_file<<end_time-start_time<<"\t";
513	printf("taylorpoly UTPM needs %d ms.\n",(end_time-start_time));	
514	
515	/* trace the operations with ADOL-C */
516	adouble *aA2 = new adouble[(N*N)];
517	adouble *aB2 = new adouble[(N*N)];
518	adouble *aC2 = new adouble[(N*N)];
519	
520	start_time = mtime();
521	trace_on(2);
522	for(int n = 0; n!=N; ++n){
523		for(int m = 0; m!=N; ++m){
524			aA2[id(n,m,N)]<<= utpm_A[id(n,m,N)];
525			aB2[id(n,m,N)]<<= utpm_B[id(n,m,N)];
526		}
527	}
528	dot(aA2, aB2, aC2, N);
529	for(int n = 0; n!=N; ++n){
530		for(int m = 0; m!=N; ++m){
531			aC2[id(n,m,N)]>>= utpm_C[id(n,m,N)];
532		}
533	}
534	trace_off();
535	end_time = mtime();
536	
537	double *x2 = new double[2*N*N];
538	double *y2 = new double[N*N];
539	double ***V2 = myalloc3(2*N*N,P, D-1);
540	double ***W2 = myalloc3(N*N,P, D-1);
541	
542	// start_time = mtime();
543	start_time = mtime();
544	hov_forward(2,N*N,2*N*N,D-1, P, x2, V2, y2, W2);
545	end_time = mtime();
546	runtimes_file<<end_time-start_time<<"\t";
547	printf("ADOL-C hov_forward needs %d ms.\n",(end_time-start_time));
548	runtimes_file<<endl;
549	runtimes_file.close();
550	return 0;
551}
552