/Q_rsqrt_test.c

https://github.com/Necropolis/Q_rsqrt · C · 158 lines · 126 code · 29 blank · 3 comment · 30 complexity · 005631cd5a3f662b19f92c8acedf8f05 MD5 · raw file

  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <stdint.h>
  4. #include <math.h>
  5. #include <assert.h>
  6. #include <unistd.h>
  7. #include <getopt.h>
  8. float Q_rsqrt( float number );
  9. uint64_t r;
  10. uint64_t rdtsc() {
  11. asm("cpuid\n\t"
  12. "rdtsc"
  13. : "=A" (r)
  14. : /* no inputs */ );
  15. return r;
  16. }
  17. typedef enum {
  18. arg_fBegin=0,
  19. arg_fStep,
  20. arg_fEnd,
  21. arg_outfile
  22. } arg_names;
  23. #define INNER_ITERATION 10000
  24. int main ( int argc, char * argv[] ) {
  25. float fBegin, fStep, fEnd, f, r0, r1, rQ, rF;
  26. char * outfile;
  27. int c;
  28. int option_index = 0;
  29. uint64_t begin, end, q_avg, i_avg;
  30. size_t i;
  31. while (1) {
  32. static struct option long_options[] = {
  33. {"fBegin", required_argument, 0, 0},
  34. {"fStep", required_argument, 0, 0},
  35. {"fEnd", required_argument, 0, 0},
  36. {"outfile",required_argument, 0, 0},
  37. {0, 0, 0, 0}
  38. };
  39. c = getopt_long (argc, argv, "", long_options, &option_index);
  40. if (c==-1)
  41. break;
  42. switch(c) {
  43. case 0:
  44. if(long_options[option_index].flag != 0)
  45. break;
  46. if(option_index == arg_fBegin)
  47. fBegin = (float)atof(optarg);
  48. else if(option_index == arg_fStep)
  49. fStep = (float)atof(optarg);
  50. else if(option_index == arg_fEnd)
  51. fEnd = (float)atof(optarg);
  52. else if(option_index == arg_outfile)
  53. outfile = optarg;
  54. break;
  55. case '?':
  56. break;
  57. default:
  58. break;
  59. }
  60. }
  61. if (fBegin > fEnd) {
  62. printf("Start cannot be greater than end!\n");
  63. return 0;
  64. }
  65. printf("Iterating from %f to %f on %f increments. Will perform %d iterations.\n",
  66. fBegin,
  67. fEnd,
  68. fStep,
  69. ((int)((fEnd - fBegin) / fStep)));
  70. for ( float fIter = fBegin;
  71. fIter < fEnd;
  72. fIter += fStep ) {
  73. q_avg = 0;
  74. for ( i = 0;
  75. i < INNER_ITERATION;
  76. ++i ) {
  77. begin = rdtsc();
  78. r0 = Q_rsqrt(fIter);
  79. end = rdtsc();
  80. if (i > 0)
  81. assert(r0 == r1);
  82. r1 = r0;
  83. if (end - begin > 700) // pitch the timings
  84. --i;
  85. else if (i > 0)
  86. q_avg = ((uint64_t)((q_avg + (end - begin)) / 2.0f));
  87. else
  88. q_avg = end - begin;
  89. }
  90. rQ = r0;
  91. i_avg = 0;
  92. for ( i = 0;
  93. i < INNER_ITERATION;
  94. ++i ) {
  95. begin = rdtsc();
  96. r0 = 1.0f / sqrt(fIter);
  97. end = rdtsc();
  98. if (i > 0)
  99. assert(r0 == r1);
  100. r1 = r0;
  101. if (end - begin > 700) // toss the timing
  102. --i;
  103. else if (i > 0)
  104. i_avg = ((uint64_t)((i_avg + (end - begin)) / 2.0f));
  105. else
  106. i_avg = end - begin;
  107. }
  108. rF = r0;
  109. printf("Q_rsqrt result: %f\tAvg: %llu%s\n", rQ, q_avg, (q_avg < i_avg)?" (*)":"");
  110. printf("fsqrt result: %f\tAvg: %llu%s\n", r0, i_avg, (q_avg > i_avg)?" (*)":"");
  111. printf("Error: %f\n\n", rF - rQ);
  112. }
  113. return 0;
  114. }
  115. /*
  116. ** float q_rsqrt( float number )
  117. */
  118. float Q_rsqrt( float number ) {
  119. long i;
  120. float x2, y;
  121. const float threehalfs = 1.5F;
  122. x2 = number * 0.5F;
  123. y = number;
  124. i = * ( long * ) &y; // evil floating point bit level hacking
  125. i = 0x5f3759df - ( i >> 1 ); // what the fuck?
  126. y = * ( float * ) &i;
  127. y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration
  128. return y;
  129. }