/SDK/bullet/Test/Source/Tests/Test_mindot.cpp

https://bitbucket.org/wlitzlbauer/spacecrafts_old · C++ · 269 lines · 218 code · 41 blank · 10 comment · 45 complexity · 8fcd5bd8b8a1d512e206d7141a3c79ee MD5 · raw file

  1. //
  2. // Test_mindot.cpp
  3. // BulletTest
  4. //
  5. // Copyright (c) 2011 Apple Inc.
  6. //
  7. #include "LinearMath/btScalar.h"
  8. #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
  9. #include "Test_mindot.h"
  10. #include "vector.h"
  11. #include "Utils.h"
  12. #include "main.h"
  13. #include <math.h>
  14. #include <string.h>
  15. #include <LinearMath/btVector3.h>
  16. // reference code for testing purposes
  17. static long mindot_ref( const btSimdFloat4 *vertices,
  18. float *vec,
  19. size_t count,
  20. float *dotResult );
  21. #ifdef __arm__
  22. #define MAX_LOG2_SIZE 9
  23. #else
  24. #define MAX_LOG2_SIZE 9
  25. #endif
  26. #define MAX_SIZE (1U << MAX_LOG2_SIZE)
  27. #define LOOPCOUNT 100
  28. int Test_mindot(void)
  29. {
  30. // Init an array flanked by guard pages
  31. btSimdFloat4 *data = (btSimdFloat4*) GuardCalloc( 1, MAX_SIZE * sizeof(btSimdFloat4), NULL );
  32. float *fp = (float*) data;
  33. long correct, test;
  34. btVector3 localScaling( 0.1f, 0.2f, 0.3f);
  35. size_t size;
  36. // Init the data
  37. size_t i;
  38. for( i = 0; i < MAX_SIZE; i++ )
  39. {
  40. fp[4*i] = (int32_t) RANDF_16;
  41. fp[4*i+1] = (int32_t) RANDF_16;
  42. fp[4*i+2] = (int32_t) RANDF_16;
  43. fp[4*i+3] = BT_NAN; // w channel NaN
  44. }
  45. float correctDot, testDot;
  46. fp = (float*) localScaling;
  47. float maxRelativeError = 0.f;
  48. for( size = 1; size <= MAX_SIZE; size++ )
  49. {
  50. float *in = (float*)(data + MAX_SIZE - size);
  51. size_t position;
  52. for( position = 0; position < size; position++ )
  53. {
  54. float *biggest = in + position * 4;
  55. float old[4] = { biggest[0], biggest[1], biggest[2], biggest[3] };
  56. biggest[0] -= LARGE_FLOAT17;
  57. biggest[1] -= LARGE_FLOAT17;
  58. biggest[2] -= LARGE_FLOAT17;
  59. biggest[3] -= LARGE_FLOAT17;
  60. correctDot = BT_NAN;
  61. testDot = BT_NAN;
  62. correct = mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
  63. test = localScaling.minDot( (btVector3*) in, size, testDot);
  64. if( test < 0 || test >= size )
  65. {
  66. vlog( "Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
  67. continue;
  68. }
  69. if( correct != test )
  70. {
  71. vlog( "Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test,
  72. fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
  73. fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
  74. return 1;
  75. }
  76. if( test != position )
  77. {
  78. vlog( "Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
  79. fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2],
  80. fp[0] * in[4*position] + fp[1] * in[4*position+1] + fp[2] * in[4*position+2] );
  81. return 1;
  82. }
  83. if( correctDot != testDot )
  84. {
  85. float relativeError = btFabs((testDot - correctDot) / correctDot);
  86. if (relativeError>1e6)
  87. {
  88. vlog( "Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
  89. fp[0] * in[4*correct] + fp[1] * in[4*correct+1] + fp[2] * in[4*correct+2],
  90. fp[0] * in[4*test] + fp[1] * in[4*test+1] + fp[2] * in[4*test+2] );
  91. return 1;
  92. } else
  93. {
  94. if (maxRelativeError < relativeError)
  95. {
  96. maxRelativeError = relativeError;
  97. }
  98. }
  99. }
  100. memcpy( biggest, old, 16 );
  101. }
  102. }
  103. if (maxRelativeError)
  104. {
  105. printf("Warning: relative error = %e\n", maxRelativeError);
  106. }
  107. uint64_t scalarTimes[33 + (MAX_LOG2_SIZE-5)];
  108. uint64_t vectorTimes[33 + (MAX_LOG2_SIZE-5)];
  109. size_t j, k;
  110. float *in = (float*) data;
  111. for( size = 1; size <= 32; size++ )
  112. {
  113. uint64_t startTime, bestTime, currentTime;
  114. bestTime = -1LL;
  115. scalarTimes[size] = 0;
  116. for (j = 0; j < 100; j++) {
  117. startTime = ReadTicks();
  118. for( k = 0; k < LOOPCOUNT; k++ )
  119. correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
  120. currentTime = ReadTicks() - startTime;
  121. scalarTimes[size] += currentTime;
  122. if( currentTime < bestTime )
  123. bestTime = currentTime;
  124. }
  125. if( 0 == gReportAverageTimes )
  126. scalarTimes[size] = bestTime;
  127. else
  128. scalarTimes[size] /= 100;
  129. }
  130. uint64_t *timep = &scalarTimes[33];
  131. for( size = 64; size <= MAX_SIZE; size *= 2 )
  132. {
  133. uint64_t startTime, bestTime, currentTime;
  134. bestTime = -1LL;
  135. timep[0] =0;
  136. for (j = 0; j < 100; j++) {
  137. startTime = ReadTicks();
  138. for( k = 0; k < LOOPCOUNT; k++ )
  139. correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
  140. currentTime = ReadTicks() - startTime;
  141. timep[0] += currentTime;
  142. if( currentTime < bestTime )
  143. bestTime = currentTime;
  144. }
  145. if( 0 == gReportAverageTimes )
  146. timep[0] = bestTime;
  147. else
  148. timep[0] /= 100;
  149. timep++;
  150. }
  151. for( size = 1; size <= 32; size++ )
  152. {
  153. uint64_t startTime, bestTime, currentTime;
  154. bestTime = -1LL;
  155. vectorTimes[size] = 0;
  156. for (j = 0; j < 100; j++) {
  157. startTime = ReadTicks();
  158. for( k = 0; k < LOOPCOUNT; k++ )
  159. test += localScaling.minDot( (btVector3*) in, size, testDot);
  160. currentTime = ReadTicks() - startTime;
  161. vectorTimes[size] += currentTime;
  162. if( currentTime < bestTime )
  163. bestTime = currentTime;
  164. }
  165. if( 0 == gReportAverageTimes )
  166. vectorTimes[size] = bestTime;
  167. else
  168. vectorTimes[size] /= 100;
  169. }
  170. timep = &vectorTimes[33];
  171. for( size = 64; size <= MAX_SIZE; size *= 2 )
  172. {
  173. uint64_t startTime, bestTime, currentTime;
  174. bestTime = -1LL;
  175. timep[0] =0;
  176. for (j = 0; j < 100; j++) {
  177. startTime = ReadTicks();
  178. for( k = 0; k < LOOPCOUNT; k++ )
  179. test += localScaling.minDot( (btVector3*) in, size, testDot);
  180. currentTime = ReadTicks() - startTime;
  181. timep[0] += currentTime;
  182. if( currentTime < bestTime )
  183. bestTime = currentTime;
  184. }
  185. if( 0 == gReportAverageTimes )
  186. timep[0] = bestTime;
  187. else
  188. timep[0] /= 100;
  189. timep++;
  190. }
  191. vlog( "Timing:\n" );
  192. vlog( " size\t scalar\t vector\n" );
  193. for( size = 1; size <= 32; size++ )
  194. vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[size] ) / LOOPCOUNT, TicksToCycles( vectorTimes[size] ) / LOOPCOUNT );
  195. size_t index = 33;
  196. for( size = 64; size <= MAX_SIZE; size *= 2 )
  197. {
  198. vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[index] ) / LOOPCOUNT, TicksToCycles( vectorTimes[index] ) / LOOPCOUNT );
  199. index++;
  200. }
  201. // Useless check to make sure that the timing loops are not optimized away
  202. if( test != correct )
  203. vlog( "Error: Test != correct: *%ld vs. %ld\n", correct, test);
  204. GuardFree(data);
  205. return 0;
  206. }
  207. static long mindot_ref( const btSimdFloat4 *vertices,
  208. float *vec,
  209. size_t count,
  210. float *dotResult )
  211. {
  212. const float *dp = (const float*) vertices;
  213. float minDot = BT_INFINITY;
  214. long i = 0;
  215. long ptIndex = -1;
  216. for( i = 0; i < count; i++ )
  217. {
  218. float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2]; dp += 4;
  219. if( dot < minDot )
  220. {
  221. minDot = dot;
  222. ptIndex = i;
  223. }
  224. }
  225. *dotResult = minDot;
  226. return ptIndex;
  227. }
  228. #endif //BT_USE_SSE