PageRenderTime 26ms CodeModel.GetById 36ms RepoModel.GetById 0ms app.codeStats 0ms

/volk/kernels/volk/volk_32f_invsqrt_32f.h

https://github.com/balister/GNU-Radio
C Header | 77 lines | 50 code | 14 blank | 13 comment | 3 complexity | f5fd8cae54133e93233e5fc43302ed4d MD5 | raw file
  1. #ifndef INCLUDED_volk_32f_invsqrt_32f_a_H
  2. #define INCLUDED_volk_32f_invsqrt_32f_a_H
  3. #include <inttypes.h>
  4. #include <stdio.h>
  5. #include <math.h>
  6. static inline float Q_rsqrt( float number )
  7. {
  8. long i;
  9. float x2, y;
  10. const float threehalfs = 1.5F;
  11. x2 = number * 0.5F;
  12. y = number;
  13. i = * ( long * ) &y; // evil floating point bit level hacking
  14. i = 0x5f3759df - ( i >> 1 ); // what the fuck?
  15. y = * ( float * ) &i;
  16. y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration
  17. // y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed
  18. return y;
  19. }
  20. #ifdef LV_HAVE_SSE
  21. #include <xmmintrin.h>
  22. /*!
  23. \brief Sqrts the two input vectors and store their results in the third vector
  24. \param cVector The vector where the results will be stored
  25. \param aVector One of the vectors to be invsqrted
  26. \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
  27. */
  28. static inline void volk_32f_invsqrt_32f_a_sse(float* cVector, const float* aVector, unsigned int num_points){
  29. unsigned int number = 0;
  30. const unsigned int quarterPoints = num_points / 4;
  31. float* cPtr = cVector;
  32. const float* aPtr = aVector;
  33. __m128 aVal, cVal;
  34. for(;number < quarterPoints; number++){
  35. aVal = _mm_load_ps(aPtr);
  36. cVal = _mm_rsqrt_ps(aVal);
  37. _mm_store_ps(cPtr,cVal); // Store the results back into the C container
  38. aPtr += 4;
  39. cPtr += 4;
  40. }
  41. number = quarterPoints * 4;
  42. for(;number < num_points; number++){
  43. *cPtr++ = Q_rsqrt(*aPtr++);
  44. }
  45. }
  46. #endif /* LV_HAVE_SSE */
  47. #ifdef LV_HAVE_GENERIC
  48. /*!
  49. \brief Sqrts the two input vectors and store their results in the third vector
  50. \param cVector The vector where the results will be stored
  51. \param aVector One of the vectors to be invsqrted
  52. \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
  53. */
  54. static inline void volk_32f_invsqrt_32f_generic(float* cVector, const float* aVector, unsigned int num_points){
  55. float* cPtr = cVector;
  56. const float* aPtr = aVector;
  57. unsigned int number = 0;
  58. for(number = 0; number < num_points; number++){
  59. *cPtr++ = Q_rsqrt(*aPtr++);
  60. }
  61. }
  62. #endif /* LV_HAVE_GENERIC */
  63. #endif /* INCLUDED_volk_32f_invsqrt_32f_a_H */