/src/Utils/SIMD.cpp

https://github.com/rfabbri/Gem · C++ · 220 lines · 169 code · 37 blank · 14 comment · 27 complexity · 5dd3a8f88278e9572f9930d167305dbe MD5 · raw file

  1. #include "SIMD.h"
  2. #include "Thread.h"
  3. #include "Gem/RTE.h"
  4. #include <string>
  5. int GemSIMD::cpuid = GEM_SIMD_NONE;
  6. int GemSIMD::realcpuid = GEM_SIMD_NONE;
  7. GemSIMD :: GemSIMD(void)
  8. {
  9. int compiledarchs=0;
  10. cpuid=simd_runtime_check();
  11. std::string compiledstr;
  12. #ifdef __MMX__
  13. if(compiledarchs>0)compiledstr+="/";
  14. compiledstr+="MMX";
  15. compiledarchs++;
  16. #endif
  17. #ifdef __SSE2__
  18. if(compiledarchs>0)compiledstr+="/";
  19. compiledstr+="SSE2";
  20. compiledarchs++;
  21. #endif
  22. #ifdef __VEC__
  23. if(compiledarchs>0)compiledstr+="/";
  24. compiledstr+="AltiVec";
  25. compiledarchs++;
  26. #endif
  27. if(compiledarchs>0) {
  28. verbose(-1, "GEM: compiled for %s architecture", compiledstr.c_str());
  29. }
  30. if(cpuid){
  31. std::string usingstr;
  32. switch (cpuid){
  33. #ifdef __SSE2__
  34. case GEM_SIMD_SSE2:
  35. usingstr="SSE2";
  36. break;
  37. #endif
  38. #ifdef __MMX__
  39. case GEM_SIMD_MMX:
  40. usingstr="MMX";
  41. break;
  42. #endif
  43. #ifdef __VEC__
  44. case GEM_SIMD_ALTIVEC:
  45. usingstr="AltiVec";
  46. break;
  47. #endif
  48. default:
  49. usingstr="no";
  50. break;
  51. case 0: /* this should never happen but is here for compilers who hate to "switch" with only one "case" */
  52. usingstr="invalid";
  53. }
  54. verbose(-1, "GEM: using %s optimization", usingstr.c_str());
  55. verbose(-1, "GEM: detected %d CPUs", gem::thread::getCPUCount());
  56. }
  57. }
  58. GemSIMD :: ~GemSIMD()
  59. {
  60. }
  61. int GemSIMD :: requestCPU(int req_cpuid)
  62. {
  63. if(GEM_SIMD_ALTIVEC==realcpuid && (GEM_SIMD_SSE2==req_cpuid || GEM_SIMD_MMX==req_cpuid))// invalid selection
  64. return cpuid;
  65. if(GEM_SIMD_ALTIVEC==req_cpuid && (GEM_SIMD_SSE2==realcpuid || GEM_SIMD_MMX==realcpuid))// invalid selection
  66. return cpuid;
  67. if(realcpuid<req_cpuid)
  68. { /* requested too much! */
  69. cpuid=realcpuid;
  70. }
  71. else
  72. { /* fine! */
  73. cpuid=req_cpuid;
  74. }
  75. return cpuid;
  76. }
  77. int GemSIMD :: getCPU()
  78. {
  79. return cpuid;
  80. }
  81. int GemSIMD :: simd_runtime_check(void)
  82. {
  83. unsigned int eax=0, edx=0;
  84. #if defined(_WIN32) && defined(_MSC_VER)
  85. unsigned int feature;
  86. #define _MMX_FEATURE_BIT 0x00800000
  87. /* on w32 we assume that there is only x86 */
  88. /* _MSC_VER and __GNUC__ are different in how you inline assember */
  89. __asm
  90. {
  91. push ebx
  92. push ecx
  93. push edx
  94. xor eax,eax
  95. cpuid
  96. mov eax, 1
  97. cpuid
  98. mov feature,edx
  99. pop ebx
  100. pop ecx
  101. pop edx
  102. }
  103. if(feature & 1<<26) {
  104. realcpuid=GEM_SIMD_SSE2;
  105. return realcpuid;
  106. }
  107. if(feature & 1<<23) {
  108. realcpuid=GEM_SIMD_MMX;
  109. return realcpuid;
  110. }
  111. #elif defined (__GNUC__)
  112. # if defined (__POWERPC__)
  113. /* detecting whether a powerPC supports AltiVec or not seems to be complicated.
  114. * therefore we rely on the compile-time setting (preprocessor)
  115. * see also http://lists.debian.org/debian-powerpc/2004/01/msg00106.html
  116. */
  117. # if defined __VEC__
  118. realcpuid=GEM_SIMD_ALTIVEC;
  119. return realcpuid;
  120. # endif /* __VEC__ */
  121. # elif (defined(_X86_) || defined(__i386__) || defined(__i586__) || defined(__i686__))
  122. __asm__("push %%ebx \n" /* ebx might be used as PIC register :-( */
  123. "cpuid \n"
  124. "pop %%ebx \n"
  125. : "=a"(eax),"=d"(edx) : "a" (1): "cx");
  126. # elif defined (__x86_64__)
  127. /* for x86_64 */
  128. # if 0
  129. __asm__("mov %%bx, %%si \n"
  130. "cpuid \n"
  131. "xchg %%bx, %%si \n"
  132. : "=a"(eax),"=d"(edx)
  133. : "a" (1)
  134. : "cx", "si");
  135. # else
  136. // x86_64 always supports MMX and SSE2
  137. edx |= (1<<23); // MMX
  138. edx |= (1<<25); // SSE
  139. edx |= (1<<26); // SSE2
  140. # endif
  141. # endif
  142. /* AltiVec should have been handled above */
  143. /* now comes the parsing of the cpuid on x86 hardware
  144. * see http://www.sandpile.org/ia32/cpuid.htm for what which bit is
  145. */
  146. # ifdef __SSE2__
  147. if(edx & 1<<26){ // SSE2
  148. realcpuid=GEM_SIMD_SSE2;
  149. return realcpuid;
  150. }
  151. # endif
  152. # ifdef __SSE__
  153. if(edx & 1<<25){ // SSE
  154. }
  155. # endif
  156. # ifdef __MMX__
  157. if(edx & 1<<23){ // MMX
  158. realcpuid=GEM_SIMD_MMX;
  159. return realcpuid;
  160. }
  161. # endif
  162. #endif /* __GNUC__ */
  163. realcpuid=GEM_SIMD_NONE;
  164. return realcpuid;
  165. }
  166. // =========================================================================================================
  167. #if 0
  168. // a collection of CPU detection code...
  169. unsigned int OGPProcessorHasAltivec(void)
  170. {
  171. static int _OGPProcessorHasAltivec = -1;
  172. if (_OGPProcessorHasAltivec < 0) {
  173. int name[] = {CTL_HW, HW_VECTORUNIT};
  174. size_t size;
  175. size = sizeof(_OGPProcessorHasAltivec);
  176. if (sysctl(name, 2, &_OGPProcessorHasAltivec, &size, NULL, 0) < 0) {
  177. perror("sysctl");
  178. _OGPProcessorHasAltivec = 0;
  179. }
  180. }
  181. return _OGPProcessorHasAltivec;
  182. }
  183. #endif