/lib/ode/ode_source/ode/src/timer.cpp

http://narutortsproject.googlecode.com/ · C++ · 423 lines · 288 code · 77 blank · 58 comment · 24 complexity · ea84ff575c0d3f13ab4f017208b7c215 MD5 · raw file

  1. /*************************************************************************
  2. * *
  3. * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
  4. * All rights reserved. Email: russ@q12.org Web: www.q12.org *
  5. * *
  6. * This library is free software; you can redistribute it and/or *
  7. * modify it under the terms of EITHER: *
  8. * (1) The GNU Lesser General Public License as published by the Free *
  9. * Software Foundation; either version 2.1 of the License, or (at *
  10. * your option) any later version. The text of the GNU Lesser *
  11. * General Public License is included with this library in the *
  12. * file LICENSE.TXT. *
  13. * (2) The BSD-style license that is included with this library in *
  14. * the file LICENSE-BSD.TXT. *
  15. * *
  16. * This library is distributed in the hope that it will be useful, *
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
  19. * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
  20. * *
  21. *************************************************************************/
  22. /*
  23. TODO
  24. ----
  25. * gettimeofday() and the pentium time stamp counter return the real time,
  26. not the process time. fix this somehow!
  27. */
  28. #include <ode/common.h>
  29. #include <ode/timer.h>
  30. #include "config.h"
  31. #include "util.h"
  32. // misc defines
  33. #define ALLOCA dALLOCA16
  34. //****************************************************************************
  35. // implementation for windows based on the multimedia performance counter.
  36. #ifdef WIN32
  37. #include "windows.h"
  38. static inline void getClockCount (unsigned long cc[2])
  39. {
  40. LARGE_INTEGER a;
  41. QueryPerformanceCounter (&a);
  42. cc[0] = a.LowPart;
  43. cc[1] = a.HighPart;
  44. }
  45. static inline void serialize()
  46. {
  47. }
  48. static inline double loadClockCount (unsigned long cc[2])
  49. {
  50. LARGE_INTEGER a;
  51. a.LowPart = cc[0];
  52. a.HighPart = cc[1];
  53. return double(a.QuadPart);
  54. }
  55. double dTimerResolution()
  56. {
  57. return 1.0/dTimerTicksPerSecond();
  58. }
  59. double dTimerTicksPerSecond()
  60. {
  61. static int query=0;
  62. static double hz=0.0;
  63. if (!query) {
  64. LARGE_INTEGER a;
  65. QueryPerformanceFrequency (&a);
  66. hz = double(a.QuadPart);
  67. query = 1;
  68. }
  69. return hz;
  70. }
  71. #endif
  72. //****************************************************************************
  73. // implementation based on the pentium time stamp counter. the timer functions
  74. // can be serializing or non-serializing. serializing will ensure that all
  75. // instructions have executed and data has been written back before the cpu
  76. // time stamp counter is read. the CPUID instruction is used to serialize.
  77. #if defined(PENTIUM) && !defined(WIN32)
  78. // we need to know the clock rate so that the timing function can report
  79. // accurate times. this number only needs to be set accurately if we're
  80. // doing performance tests and care about real-world time numbers - otherwise,
  81. // just ignore this. i have not worked out how to determine this number
  82. // automatically yet.
  83. #define PENTIUM_HZ (500e6)
  84. static inline void getClockCount (unsigned long cc[2])
  85. {
  86. #ifndef X86_64_SYSTEM
  87. asm volatile (
  88. "rdtsc\n"
  89. "movl %%eax,(%%esi)\n"
  90. "movl %%edx,4(%%esi)\n"
  91. : : "S" (cc) : "%eax","%edx","cc","memory");
  92. #else
  93. asm volatile (
  94. "rdtsc\n"
  95. "movl %%eax,(%%rsi)\n"
  96. "movl %%edx,4(%%rsi)\n"
  97. : : "S" (cc) : "%eax","%edx","cc","memory");
  98. #endif
  99. }
  100. static inline void serialize()
  101. {
  102. #ifndef X86_64_SYSTEM
  103. asm volatile (
  104. "mov $0,%%eax\n"
  105. "push %%ebx\n"
  106. "cpuid\n"
  107. "pop %%ebx\n"
  108. : : : "%eax","%ecx","%edx","cc","memory");
  109. #else
  110. asm volatile (
  111. "mov $0,%%rax\n"
  112. "push %%rbx\n"
  113. "cpuid\n"
  114. "pop %%rbx\n"
  115. : : : "%rax","%rcx","%rdx","cc","memory");
  116. #endif
  117. }
  118. static inline double loadClockCount (unsigned long a[2])
  119. {
  120. double ret;
  121. #ifndef X86_64_SYSTEM
  122. asm volatile ("fildll %1; fstpl %0" : "=m" (ret) : "m" (a[0]) :
  123. "cc","memory");
  124. #else
  125. asm volatile ("fildll %1; fstpl %0" : "=m" (ret) : "m" (a[0]) :
  126. "cc","memory");
  127. #endif
  128. return ret;
  129. }
  130. double dTimerResolution()
  131. {
  132. return 1.0/PENTIUM_HZ;
  133. }
  134. double dTimerTicksPerSecond()
  135. {
  136. return PENTIUM_HZ;
  137. }
  138. #endif
  139. //****************************************************************************
  140. // otherwise, do the implementation based on gettimeofday().
  141. #if !defined(PENTIUM) && !defined(WIN32)
  142. #ifndef macintosh
  143. #include <sys/time.h>
  144. #include <unistd.h>
  145. static inline void getClockCount (unsigned long cc[2])
  146. {
  147. struct timeval tv;
  148. gettimeofday (&tv,0);
  149. cc[0] = tv.tv_usec;
  150. cc[1] = tv.tv_sec;
  151. }
  152. #else // macintosh
  153. #include <CoreServices/CoreServices.h>
  154. #include <ode/Timer.h>
  155. static inline void getClockCount (unsigned long cc[2])
  156. {
  157. UnsignedWide ms;
  158. Microseconds (&ms);
  159. cc[1] = ms.lo / 1000000;
  160. cc[0] = ms.lo - ( cc[1] * 1000000 );
  161. }
  162. #endif
  163. static inline void serialize()
  164. {
  165. }
  166. static inline double loadClockCount (unsigned long a[2])
  167. {
  168. return a[1]*1.0e6 + a[0];
  169. }
  170. double dTimerResolution()
  171. {
  172. unsigned long cc1[2],cc2[2];
  173. getClockCount (cc1);
  174. do {
  175. getClockCount (cc2);
  176. }
  177. while (cc1[0]==cc2[0] && cc1[1]==cc2[1]);
  178. do {
  179. getClockCount (cc1);
  180. }
  181. while (cc1[0]==cc2[0] && cc1[1]==cc2[1]);
  182. double t1 = loadClockCount (cc1);
  183. double t2 = loadClockCount (cc2);
  184. return (t1-t2) / dTimerTicksPerSecond();
  185. }
  186. double dTimerTicksPerSecond()
  187. {
  188. return 1000000;
  189. }
  190. #endif
  191. //****************************************************************************
  192. // stop watches
  193. void dStopwatchReset (dStopwatch *s)
  194. {
  195. s->time = 0;
  196. s->cc[0] = 0;
  197. s->cc[1] = 0;
  198. }
  199. void dStopwatchStart (dStopwatch *s)
  200. {
  201. serialize();
  202. getClockCount (s->cc);
  203. }
  204. void dStopwatchStop (dStopwatch *s)
  205. {
  206. unsigned long cc[2];
  207. serialize();
  208. getClockCount (cc);
  209. double t1 = loadClockCount (s->cc);
  210. double t2 = loadClockCount (cc);
  211. s->time += t2-t1;
  212. }
  213. double dStopwatchTime (dStopwatch *s)
  214. {
  215. return s->time / dTimerTicksPerSecond();
  216. }
  217. //****************************************************************************
  218. // code timers
  219. // maximum number of events to record
  220. #define MAXNUM 100
  221. static int num = 0; // number of entries used in event array
  222. static struct {
  223. unsigned long cc[2]; // clock counts
  224. double total_t; // total clocks used in this slot.
  225. double total_p; // total percentage points used in this slot.
  226. int count; // number of times this slot has been updated.
  227. const char *description; // pointer to static string
  228. } event[MAXNUM];
  229. // make sure all slot totals and counts reset to 0 at start
  230. static void initSlots()
  231. {
  232. static int initialized=0;
  233. if (!initialized) {
  234. for (int i=0; i<MAXNUM; i++) {
  235. event[i].count = 0;
  236. event[i].total_t = 0;
  237. event[i].total_p = 0;
  238. }
  239. initialized = 1;
  240. }
  241. }
  242. void dTimerStart (const char *description)
  243. {
  244. initSlots();
  245. event[0].description = const_cast<char*> (description);
  246. num = 1;
  247. serialize();
  248. getClockCount (event[0].cc);
  249. }
  250. void dTimerNow (const char *description)
  251. {
  252. if (num < MAXNUM) {
  253. // do not serialize
  254. getClockCount (event[num].cc);
  255. event[num].description = const_cast<char*> (description);
  256. num++;
  257. }
  258. }
  259. void dTimerEnd()
  260. {
  261. if (num < MAXNUM) {
  262. serialize();
  263. getClockCount (event[num].cc);
  264. event[num].description = "TOTAL";
  265. num++;
  266. }
  267. }
  268. //****************************************************************************
  269. // print report
  270. static void fprintDoubleWithPrefix (FILE *f, double a, const char *fmt)
  271. {
  272. if (a >= 0.999999) {
  273. fprintf (f,fmt,a);
  274. return;
  275. }
  276. a *= 1000.0;
  277. if (a >= 0.999999) {
  278. fprintf (f,fmt,a);
  279. fprintf (f,"m");
  280. return;
  281. }
  282. a *= 1000.0;
  283. if (a >= 0.999999) {
  284. fprintf (f,fmt,a);
  285. fprintf (f,"u");
  286. return;
  287. }
  288. a *= 1000.0;
  289. fprintf (f,fmt,a);
  290. fprintf (f,"n");
  291. }
  292. void dTimerReport (FILE *fout, int average)
  293. {
  294. int i;
  295. size_t maxl;
  296. double ccunit = 1.0/dTimerTicksPerSecond();
  297. fprintf (fout,"\nTimer Report (");
  298. fprintDoubleWithPrefix (fout,ccunit,"%.2f ");
  299. fprintf (fout,"s resolution)\n------------\n");
  300. if (num < 1) return;
  301. // get maximum description length
  302. maxl = 0;
  303. for (i=0; i<num; i++) {
  304. size_t l = strlen (event[i].description);
  305. if (l > maxl) maxl = l;
  306. }
  307. // calculate total time
  308. double t1 = loadClockCount (event[0].cc);
  309. double t2 = loadClockCount (event[num-1].cc);
  310. double total = t2 - t1;
  311. if (total <= 0) total = 1;
  312. // compute time difference for all slots except the last one. update totals
  313. double *times = (double*) ALLOCA (num * sizeof(double));
  314. for (i=0; i < (num-1); i++) {
  315. double t1 = loadClockCount (event[i].cc);
  316. double t2 = loadClockCount (event[i+1].cc);
  317. times[i] = t2 - t1;
  318. event[i].count++;
  319. event[i].total_t += times[i];
  320. event[i].total_p += times[i]/total * 100.0;
  321. }
  322. // print report (with optional averages)
  323. for (i=0; i<num; i++) {
  324. double t,p;
  325. if (i < (num-1)) {
  326. t = times[i];
  327. p = t/total * 100.0;
  328. }
  329. else {
  330. t = total;
  331. p = 100.0;
  332. }
  333. fprintf (fout,"%-*s %7.2fms %6.2f%%",(int)maxl,event[i].description,
  334. t*ccunit * 1000.0, p);
  335. if (average && i < (num-1)) {
  336. fprintf (fout," (avg %7.2fms %6.2f%%)",
  337. (event[i].total_t / event[i].count)*ccunit * 1000.0,
  338. event[i].total_p / event[i].count);
  339. }
  340. fprintf (fout,"\n");
  341. }
  342. fprintf (fout,"\n");
  343. }