PageRenderTime 53ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/src/core/cpuid.d

http://github.com/ldc-developers/druntime
D | 1150 lines | 784 code | 63 blank | 303 comment | 143 complexity | e1e3b8aa071c03b58c9da5d18c3c147e MD5 | raw file
  1. /**
  2. * Identify the characteristics of the host CPU, providing information
  3. * about cache sizes and assembly optimisation hints. This module is
  4. * provided primarily for assembly language programmers.
  5. *
  6. * References:
  7. * Some of this information was extremely difficult to track down. Some of the
  8. * documents below were found only in cached versions stored by search engines!
  9. * This code relies on information found in:
  10. *
  11. * $(UL
  12. * $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual,
  13. * Volume 2A: Instruction Set Reference, A-M" (2007).
  14. * )
  15. * $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).
  16. * )
  17. * $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD
  18. * Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).
  19. * )
  20. * $(LI "AMD Geode(TM) GX Processors Data Book",
  21. * Advanced Micro Devices, Publication ID 31505E, (2005).
  22. * )
  23. * $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).
  24. * )
  25. * $(LI "Application note 106: Software Customization for the 6x86 Family",
  26. * Cyrix Corporation, Rev 1.5 (1998)
  27. * )
  28. * $(LI $(LINK http://www.datasheetcatalog.org/datasheet/nationalsemiconductor/GX1.pdf))
  29. * $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution",
  30. * National Semiconductor, (2002)
  31. * )
  32. * $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).
  33. * )
  34. * $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm))
  35. * $(LI $(LINK http://www.akkadia.org/drepper/cpumemory.pdf))
  36. * $(LI "What every programmer should know about memory",
  37. * Ulrich Depper, Red Hat, Inc., (2007).
  38. * )
  39. * $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009).
  40. * $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm)
  41. * )
  42. * $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application
  43. * Note 485" (2009).
  44. * )
  45. * )
  46. *
  47. * Bugs: Currently only works on x86 and Itanium CPUs.
  48. * Many processors have bugs in their microcode for the CPUID instruction,
  49. * so sometimes the cache information may be incorrect.
  50. *
  51. * Copyright: Copyright Don Clugston 2007 - 2009.
  52. * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
  53. * Authors: Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk>
  54. * Source: $(DRUNTIMESRC core/_cpuid.d)
  55. */
  56. module core.cpuid;
  57. version (GNU) version = GNU_OR_LDC;
  58. version (LDC) version = GNU_OR_LDC;
  59. @trusted:
  60. nothrow:
  61. @nogc:
  62. // If optimizing for a particular processor, it is generally better
  63. // to identify based on features rather than model. NOTE: Normally
  64. // it's only worthwhile to optimise for the latest Intel and AMD CPU,
  65. // with a backup for other CPUs.
  66. // Pentium -- preferPentium1()
  67. // PMMX -- + mmx()
  68. // PPro -- default
  69. // PII -- + mmx()
  70. // PIII -- + mmx() + sse()
  71. // PentiumM -- + mmx() + sse() + sse2()
  72. // Pentium4 -- preferPentium4()
  73. // PentiumD -- + isX86_64()
  74. // Core2 -- default + isX86_64()
  75. // AMD K5 -- preferPentium1()
  76. // AMD K6 -- + mmx()
  77. // AMD K6-II -- + mmx() + 3dnow()
  78. // AMD K7 -- preferAthlon()
  79. // AMD K8 -- + sse2()
  80. // AMD K10 -- + isX86_64()
  81. // Cyrix 6x86 -- preferPentium1()
  82. // 6x86MX -- + mmx()
  83. // GDC support uses extended inline assembly:
  84. // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html (general information and hints)
  85. // https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html (binding variables to registers)
  86. // https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html (x86 specific register short names)
  87. public:
  88. /// Cache size and behaviour
  89. struct CacheInfo
  90. {
  91. /// Size of the cache, in kilobytes, per CPU.
  92. /// For L1 unified (data + code) caches, this size is half the physical size.
  93. /// (we don't halve it for larger sizes, since normally
  94. /// data size is much greater than code size for critical loops).
  95. size_t size;
  96. /// Number of ways of associativity, eg:
  97. /// $(UL
  98. /// $(LI 1 = direct mapped)
  99. /// $(LI 2 = 2-way set associative)
  100. /// $(LI 3 = 3-way set associative)
  101. /// $(LI ubyte.max = fully associative)
  102. /// )
  103. ubyte associativity;
  104. /// Number of bytes read into the cache when a cache miss occurs.
  105. uint lineSize;
  106. }
  107. public:
  108. /// $(RED Scheduled for deprecation. Please use $(D dataCaches) instead.)
  109. // Note: When we deprecate it, we simply make it private.
  110. __gshared CacheInfo[5] datacache;
  111. @property pure
  112. {
  113. /// The data caches. If there are fewer than 5 physical caches levels,
  114. /// the remaining levels are set to size_t.max (== entire memory space)
  115. const(CacheInfo)[5] dataCaches() { return _dataCaches; }
  116. /// Returns vendor string, for display purposes only.
  117. /// Do NOT use this to determine features!
  118. /// Note that some CPUs have programmable vendorIDs.
  119. string vendor() {return _vendor;}
  120. /// Returns processor string, for display purposes only
  121. string processor() {return _processor;}
  122. /// Does it have an x87 FPU on-chip?
  123. bool x87onChip() {return _x87onChip;}
  124. /// Is MMX supported?
  125. bool mmx() {return _mmx;}
  126. /// Is SSE supported?
  127. bool sse() {return _sse;}
  128. /// Is SSE2 supported?
  129. bool sse2() {return _sse2;}
  130. /// Is SSE3 supported?
  131. bool sse3() {return _sse3;}
  132. /// Is SSSE3 supported?
  133. bool ssse3() {return _ssse3;}
  134. /// Is SSE4.1 supported?
  135. bool sse41() {return _sse41;}
  136. /// Is SSE4.2 supported?
  137. bool sse42() {return _sse42;}
  138. /// Is SSE4a supported?
  139. bool sse4a() {return _sse4a;}
  140. /// Is AES supported
  141. bool aes() {return _aes;}
  142. /// Is pclmulqdq supported
  143. bool hasPclmulqdq() {return _hasPclmulqdq;}
  144. /// Is rdrand supported
  145. bool hasRdrand() {return _hasRdrand;}
  146. /// Is AVX supported
  147. bool avx() {return _avx;}
  148. /// Is VEX-Encoded AES supported
  149. bool vaes() {return _vaes;}
  150. /// Is vpclmulqdq supported
  151. bool hasVpclmulqdq(){return _hasVpclmulqdq; }
  152. /// Is FMA supported
  153. bool fma() {return _fma;}
  154. /// Is FP16C supported
  155. bool fp16c() {return _fp16c;}
  156. /// Is AVX2 supported
  157. bool avx2() {return _avx2;}
  158. /// Is HLE (hardware lock elision) supported
  159. bool hle() {return _hle;}
  160. /// Is RTM (restricted transactional memory) supported
  161. bool rtm() {return _rtm;}
  162. /// Is rdseed supported
  163. bool hasRdseed() {return _hasRdseed;}
  164. /// Is SHA supported
  165. bool hasSha() {return _hasSha;}
  166. /// Is AMD 3DNOW supported?
  167. bool amd3dnow() {return _amd3dnow;}
  168. /// Is AMD 3DNOW Ext supported?
  169. bool amd3dnowExt() {return _amd3dnowExt;}
  170. /// Are AMD extensions to MMX supported?
  171. bool amdMmx() {return _amdMmx;}
  172. /// Is fxsave/fxrstor supported?
  173. bool hasFxsr() {return _hasFxsr;}
  174. /// Is cmov supported?
  175. bool hasCmov() {return _hasCmov;}
  176. /// Is rdtsc supported?
  177. bool hasRdtsc() {return _hasRdtsc;}
  178. /// Is cmpxchg8b supported?
  179. bool hasCmpxchg8b() {return _hasCmpxchg8b;}
  180. /// Is cmpxchg8b supported?
  181. bool hasCmpxchg16b() {return _hasCmpxchg16b;}
  182. /// Is SYSENTER/SYSEXIT supported?
  183. bool hasSysEnterSysExit() {return _hasSysEnterSysExit;}
  184. /// Is 3DNow prefetch supported?
  185. bool has3dnowPrefetch() {return _has3dnowPrefetch;}
  186. /// Are LAHF and SAHF supported in 64-bit mode?
  187. bool hasLahfSahf() {return _hasLahfSahf;}
  188. /// Is POPCNT supported?
  189. bool hasPopcnt() {return _hasPopcnt;}
  190. /// Is LZCNT supported?
  191. bool hasLzcnt() {return _hasLzcnt;}
  192. /// Is this an Intel64 or AMD 64?
  193. bool isX86_64() {return _isX86_64;}
  194. /// Is this an IA64 (Itanium) processor?
  195. bool isItanium() { return _isItanium; }
  196. /// Is hyperthreading supported?
  197. bool hyperThreading() { return _hyperThreading; }
  198. /// Returns number of threads per CPU
  199. uint threadsPerCPU() {return _threadsPerCPU;}
  200. /// Returns number of cores in CPU
  201. uint coresPerCPU() {return _coresPerCPU;}
  202. /// Optimisation hints for assembly code.
  203. ///
  204. /// For forward compatibility, the CPU is compared against different
  205. /// microarchitectures. For 32-bit x86, comparisons are made against
  206. /// the Intel PPro/PII/PIII/PM family.
  207. ///
  208. /// The major 32-bit x86 microarchitecture 'dynasties' have been:
  209. ///
  210. /// $(UL
  211. /// $(LI Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). )
  212. /// $(LI AMD Athlon (K7, K8, K10). )
  213. /// $(LI Intel NetBurst (Pentium 4, Pentium D). )
  214. /// $(LI In-order Pentium (Pentium1, PMMX, Atom) )
  215. /// )
  216. ///
  217. /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta,
  218. /// Cyrix, Rise) were mostly in-order.
  219. ///
  220. /// Some new processors do not fit into the existing categories:
  221. ///
  222. /// $(UL
  223. /// $(LI Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. )
  224. /// $(LI Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. )
  225. /// )
  226. ///
  227. /// Within each dynasty, the optimisation techniques are largely
  228. /// identical (eg, use instruction pairing for group 4). Major
  229. /// instruction set improvements occur within each dynasty.
  230. /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code?
  231. bool preferAthlon() { return _preferAthlon; }
  232. /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code?
  233. bool preferPentium4() { return _preferPentium4; }
  234. /// Does this CPU perform better on Pentium I code than Pentium Pro code?
  235. bool preferPentium1() { return _preferPentium1; }
  236. }
  237. private immutable
  238. {
  239. /* These exist as immutables so that the query property functions can
  240. * be backwards compatible with code that called them with ().
  241. * Also, immutables can only be set by the static this().
  242. */
  243. const(CacheInfo)[5] _dataCaches;
  244. string _vendor;
  245. string _processor;
  246. bool _x87onChip;
  247. bool _mmx;
  248. bool _sse;
  249. bool _sse2;
  250. bool _sse3;
  251. bool _ssse3;
  252. bool _sse41;
  253. bool _sse42;
  254. bool _sse4a;
  255. bool _aes;
  256. bool _hasPclmulqdq;
  257. bool _hasRdrand;
  258. bool _avx;
  259. bool _vaes;
  260. bool _hasVpclmulqdq;
  261. bool _fma;
  262. bool _fp16c;
  263. bool _avx2;
  264. bool _hle;
  265. bool _rtm;
  266. bool _hasRdseed;
  267. bool _hasSha;
  268. bool _amd3dnow;
  269. bool _amd3dnowExt;
  270. bool _amdMmx;
  271. bool _hasFxsr;
  272. bool _hasCmov;
  273. bool _hasRdtsc;
  274. bool _hasCmpxchg8b;
  275. bool _hasCmpxchg16b;
  276. bool _hasSysEnterSysExit;
  277. bool _has3dnowPrefetch;
  278. bool _hasLahfSahf;
  279. bool _hasPopcnt;
  280. bool _hasLzcnt;
  281. bool _isX86_64;
  282. bool _isItanium;
  283. bool _hyperThreading;
  284. uint _threadsPerCPU;
  285. uint _coresPerCPU;
  286. bool _preferAthlon;
  287. bool _preferPentium4;
  288. bool _preferPentium1;
  289. }
  290. __gshared:
  291. // All these values are set only once, and never subsequently modified.
  292. public:
  293. /// $(RED Warning: This field will be turned into a property in a future release.)
  294. ///
  295. /// Processor type (vendor-dependent).
  296. /// This should be visible ONLY for display purposes.
  297. uint stepping, model, family;
  298. /// $(RED This field has been deprecated. Please use $(D cacheLevels) instead.)
  299. uint numCacheLevels = 1;
  300. /// The number of cache levels in the CPU.
  301. @property uint cacheLevels() { return numCacheLevels; }
  302. private:
  303. struct CpuFeatures
  304. {
  305. bool probablyIntel; // true = _probably_ an Intel processor, might be faking
  306. bool probablyAMD; // true = _probably_ an AMD or Hygon processor
  307. string processorName;
  308. char [12] vendorID = 0;
  309. char [48] processorNameBuffer = 0;
  310. uint features = 0; // mmx, sse, sse2, hyperthreading, etc
  311. uint miscfeatures = 0; // sse3, etc.
  312. uint extfeatures = 0; // HLE, AVX2, RTM, etc.
  313. uint amdfeatures = 0; // 3DNow!, mmxext, etc
  314. uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc
  315. ulong xfeatures = 0; // XFEATURES_ENABLED_MASK
  316. uint maxCores = 1;
  317. uint maxThreads = 1;
  318. }
  319. CpuFeatures cpuFeatures;
  320. /* Hide from the optimizer where cf (a register) is coming from, so that
  321. * cf doesn't get "optimized away". The idea is to reference
  322. * the global data through cf so not so many fixups are inserted
  323. * into the executable image.
  324. */
  325. CpuFeatures* getCpuFeatures() @nogc nothrow
  326. {
  327. pragma(inline, false);
  328. return &cpuFeatures;
  329. }
  330. // Note that this may indicate multi-core rather than hyperthreading.
  331. @property bool hyperThreadingBit() { return (cpuFeatures.features&HTT_BIT)!=0;}
  332. // feature flags CPUID1_EDX
  333. enum : uint
  334. {
  335. FPU_BIT = 1,
  336. TIMESTAMP_BIT = 1<<4, // rdtsc
  337. MDSR_BIT = 1<<5, // RDMSR/WRMSR
  338. CMPXCHG8B_BIT = 1<<8,
  339. SYSENTERSYSEXIT_BIT = 1<<11,
  340. CMOV_BIT = 1<<15,
  341. MMX_BIT = 1<<23,
  342. FXSR_BIT = 1<<24,
  343. SSE_BIT = 1<<25,
  344. SSE2_BIT = 1<<26,
  345. HTT_BIT = 1<<28,
  346. IA64_BIT = 1<<30
  347. }
  348. // feature flags misc CPUID1_ECX
  349. enum : uint
  350. {
  351. SSE3_BIT = 1,
  352. PCLMULQDQ_BIT = 1<<1, // from AVX
  353. MWAIT_BIT = 1<<3,
  354. SSSE3_BIT = 1<<9,
  355. FMA_BIT = 1<<12, // from AVX
  356. CMPXCHG16B_BIT = 1<<13,
  357. SSE41_BIT = 1<<19,
  358. SSE42_BIT = 1<<20,
  359. POPCNT_BIT = 1<<23,
  360. AES_BIT = 1<<25, // AES instructions from AVX
  361. OSXSAVE_BIT = 1<<27, // Used for AVX
  362. AVX_BIT = 1<<28,
  363. FP16C_BIT = 1<<29,
  364. RDRAND_BIT = 1<<30,
  365. }
  366. // Feature flags for cpuid.{EAX = 7, ECX = 0}.EBX.
  367. enum : uint
  368. {
  369. FSGSBASE_BIT = 1 << 0,
  370. BMI1_BIT = 1 << 3,
  371. HLE_BIT = 1 << 4,
  372. AVX2_BIT = 1 << 5,
  373. SMEP_BIT = 1 << 7,
  374. BMI2_BIT = 1 << 8,
  375. ERMS_BIT = 1 << 9,
  376. INVPCID_BIT = 1 << 10,
  377. RTM_BIT = 1 << 11,
  378. RDSEED_BIT = 1 << 18,
  379. SHA_BIT = 1 << 29,
  380. }
  381. // feature flags XFEATURES_ENABLED_MASK
  382. enum : ulong
  383. {
  384. XF_FP_BIT = 0x1,
  385. XF_SSE_BIT = 0x2,
  386. XF_YMM_BIT = 0x4,
  387. }
  388. // AMD feature flags CPUID80000001_EDX
  389. enum : uint
  390. {
  391. AMD_MMX_BIT = 1<<22,
  392. // FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions.
  393. FFXSR_BIT = 1<<25,
  394. PAGE1GB_BIT = 1<<26, // support for 1GB pages
  395. RDTSCP_BIT = 1<<27,
  396. AMD64_BIT = 1<<29,
  397. AMD_3DNOW_EXT_BIT = 1<<30,
  398. AMD_3DNOW_BIT = 1<<31
  399. }
  400. // AMD misc feature flags CPUID80000001_ECX
  401. enum : uint
  402. {
  403. LAHFSAHF_BIT = 1,
  404. LZCNT_BIT = 1<<5,
  405. SSE4A_BIT = 1<<6,
  406. AMD_3DNOW_PREFETCH_BIT = 1<<8,
  407. }
  408. version (GNU_OR_LDC) {
  409. version (X86)
  410. enum supportedX86 = true;
  411. else version (X86_64)
  412. enum supportedX86 = true;
  413. else
  414. enum supportedX86 = false;
  415. } else version (D_InlineAsm_X86) {
  416. enum supportedX86 = true;
  417. } else version (D_InlineAsm_X86_64) {
  418. enum supportedX86 = true;
  419. } else {
  420. enum supportedX86 = false;
  421. }
  422. static if (supportedX86) {
  423. // Note that this code will also work for Itanium in x86 mode.
  424. __gshared uint max_cpuid, max_extended_cpuid;
  425. // CPUID2: "cache and tlb information"
  426. void getcacheinfoCPUID2()
  427. {
  428. // We are only interested in the data caches
  429. void decipherCpuid2(ubyte x) @nogc nothrow {
  430. if (x==0) return;
  431. // Values from http://www.sandpile.org/ia32/cpuid.htm.
  432. // Includes Itanium and non-Intel CPUs.
  433. //
  434. static immutable ubyte [63] ids = [
  435. 0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68,
  436. // level 2 cache
  437. 0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F,
  438. 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E,
  439. 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81,
  440. // level 3 cache
  441. 0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D,
  442. 0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE,
  443. 0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC
  444. ];
  445. static immutable uint [63] sizes = [
  446. 8, 16, 16, 64, 16, 24, 8, 16, 32,
  447. 128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512,
  448. 256, 512, 1024, 2048, 512, 1024, 4096, 6*1024,
  449. 128, 192, 128, 256, 384, 512, 3072, 512, 128,
  450. 512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024,
  451. 512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024,
  452. 2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024
  453. ];
  454. // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative
  455. static immutable ubyte [63] ways = [
  456. 2, 4, 4, 8, 8, 6, 4, 4, 4,
  457. 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2,
  458. 8, 8, 8, 8, 4, 8, 16, 24,
  459. 4, 6, 2, 4, 6, 4, 12, 8, 8,
  460. 4, 8, 8, 8, 4, 8, 12, 16, 12, 16,
  461. 4, 4, 4, 8, 8, 8, 12, 12, 12,
  462. 16, 16, 16, 24, 24, 24
  463. ];
  464. enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 }
  465. for (size_t i=0; i< ids.length; ++i) {
  466. if (x==ids[i]) {
  467. int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2;
  468. if (x==0x49 && family==0xF && model==0x6) level=2;
  469. datacache[level].size=sizes[i];
  470. datacache[level].associativity=ways[i];
  471. if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80)
  472. || x==0x86 || x==0x87
  473. || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){
  474. datacache[level].lineSize = 64;
  475. } else datacache[level].lineSize = 32;
  476. }
  477. }
  478. }
  479. uint[4] a;
  480. bool firstTime = true;
  481. // On a multi-core system, this could theoretically fail, but it's only used
  482. // for old single-core CPUs.
  483. uint numinfos = 1;
  484. do {
  485. version (GNU_OR_LDC) asm pure nothrow @nogc {
  486. "cpuid" : "=a" (a[0]), "=b" (a[1]), "=c" (a[2]), "=d" (a[3]) : "a" (2);
  487. } else asm pure nothrow @nogc {
  488. mov EAX, 2;
  489. cpuid;
  490. mov a, EAX;
  491. mov a+4, EBX;
  492. mov a+8, ECX;
  493. mov a+12, EDX;
  494. }
  495. if (firstTime) {
  496. if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) {
  497. // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080.
  498. // These are NOT standard Intel values
  499. // (TLB = 32 entry, 4 way associative, 4K pages)
  500. // (L1 cache = 16K, 4way, linesize16)
  501. datacache[0].size=8;
  502. datacache[0].associativity=4;
  503. datacache[0].lineSize=16;
  504. return;
  505. }
  506. // lsb of a is how many times to loop.
  507. numinfos = a[0] & 0xFF;
  508. // and otherwise it should be ignored
  509. a[0] &= 0xFFFF_FF00;
  510. firstTime = false;
  511. }
  512. for (int c=0; c<4;++c) {
  513. // high bit set == no info.
  514. if (a[c] & 0x8000_0000) continue;
  515. decipherCpuid2(cast(ubyte)(a[c] & 0xFF));
  516. decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF));
  517. decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF));
  518. decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF));
  519. }
  520. } while (--numinfos);
  521. }
  522. // CPUID4: "Deterministic cache parameters" leaf
  523. void getcacheinfoCPUID4()
  524. {
  525. int cachenum = 0;
  526. for (;;) {
  527. uint a, b, number_of_sets;
  528. version (GNU_OR_LDC) asm pure nothrow @nogc {
  529. "cpuid" : "=a" (a), "=b" (b), "=c" (number_of_sets) : "a" (4), "c" (cachenum) : "edx";
  530. } else asm pure nothrow @nogc {
  531. mov EAX, 4;
  532. mov ECX, cachenum;
  533. cpuid;
  534. mov a, EAX;
  535. mov b, EBX;
  536. mov number_of_sets, ECX;
  537. }
  538. ++cachenum;
  539. if ((a&0x1F)==0) break; // no more caches
  540. immutable uint numthreads = ((a>>14) & 0xFFF) + 1;
  541. immutable uint numcores = ((a>>26) & 0x3F) + 1;
  542. if (numcores > cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
  543. if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches
  544. ++number_of_sets;
  545. immutable ubyte level = cast(ubyte)(((a>>5)&7)-1);
  546. if (level > datacache.length) continue; // ignore deep caches
  547. datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1);
  548. datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size
  549. immutable uint line_partitions = ((b >> 12)& 0x3FF) + 1;
  550. // Size = number of sets * associativity * cachelinesize * linepartitions
  551. // and must convert to Kb, also dividing by the number of hyperthreads using this cache.
  552. immutable ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets *
  553. datacache[level].associativity : number_of_sets;
  554. datacache[level].size = cast(size_t)(
  555. (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024));
  556. if (level == 0 && (a&0xF)==3) {
  557. // Halve the size for unified L1 caches
  558. datacache[level].size/=2;
  559. }
  560. }
  561. }
  562. // CPUID8000_0005 & 6
  563. void getAMDcacheinfo()
  564. {
  565. uint dummy, c5, c6, d6;
  566. version (GNU_OR_LDC) asm pure nothrow @nogc {
  567. "cpuid" : "=a" (dummy), "=c" (c5) : "a" (0x8000_0005) : "ebx", "edx";
  568. } else asm pure nothrow @nogc {
  569. mov EAX, 0x8000_0005; // L1 cache
  570. cpuid;
  571. // EAX has L1_TLB_4M.
  572. // EBX has L1_TLB_4K
  573. // EDX has L1 instruction cache
  574. mov c5, ECX;
  575. }
  576. datacache[0].size = ( (c5>>24) & 0xFF);
  577. datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF);
  578. datacache[0].lineSize = c5 & 0xFF;
  579. if (max_extended_cpuid >= 0x8000_0006) {
  580. // AMD K6-III or K6-2+ or later.
  581. ubyte numcores = 1;
  582. if (max_extended_cpuid >= 0x8000_0008) {
  583. version (GNU_OR_LDC) asm pure nothrow @nogc {
  584. "cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx";
  585. } else asm pure nothrow @nogc {
  586. mov EAX, 0x8000_0008;
  587. cpuid;
  588. mov numcores, CL;
  589. }
  590. ++numcores;
  591. if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
  592. }
  593. version (GNU_OR_LDC) asm pure nothrow @nogc {
  594. "cpuid" : "=a" (dummy), "=c" (c6), "=d" (d6) : "a" (0x8000_0006) : "ebx";
  595. } else asm pure nothrow @nogc {
  596. mov EAX, 0x8000_0006; // L2/L3 cache
  597. cpuid;
  598. mov c6, ECX; // L2 cache info
  599. mov d6, EDX; // L3 cache info
  600. }
  601. static immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ];
  602. datacache[1].size = (c6>>16) & 0xFFFF;
  603. datacache[1].associativity = assocmap[(c6>>12)&0xF];
  604. datacache[1].lineSize = c6 & 0xFF;
  605. // The L3 cache value is TOTAL, not per core.
  606. datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1.
  607. datacache[2].associativity = assocmap[(d6>>12)&0xF];
  608. datacache[2].lineSize = d6 & 0xFF;
  609. }
  610. }
  611. // For Intel CoreI7 and later, use function 0x0B
  612. // to determine number of processors.
  613. void getCpuInfo0B()
  614. {
  615. int level=0;
  616. int threadsPerCore;
  617. uint a, b, c, d;
  618. do {
  619. version (GNU_OR_LDC) asm pure nothrow @nogc {
  620. "cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0x0B), "c" (level);
  621. } else asm pure nothrow @nogc {
  622. mov EAX, 0x0B;
  623. mov ECX, level;
  624. cpuid;
  625. mov a, EAX;
  626. mov b, EBX;
  627. mov c, ECX;
  628. mov d, EDX;
  629. }
  630. if (b!=0) {
  631. // I'm not sure about this. The docs state that there
  632. // are 2 hyperthreads per core if HT is factory enabled.
  633. if (level==0)
  634. threadsPerCore = b & 0xFFFF;
  635. else if (level==1) {
  636. cpuFeatures.maxThreads = b & 0xFFFF;
  637. cpuFeatures.maxCores = cpuFeatures.maxThreads / threadsPerCore;
  638. }
  639. }
  640. ++level;
  641. } while (a!=0 || b!=0);
  642. }
  643. void cpuidX86()
  644. {
  645. auto cf = getCpuFeatures();
  646. uint a, b, c, d;
  647. uint* venptr = cast(uint*)cf.vendorID.ptr;
  648. version (GNU_OR_LDC)
  649. {
  650. asm pure nothrow @nogc {
  651. "cpuid" : "=a" (max_cpuid), "=b" (venptr[0]), "=d" (venptr[1]), "=c" (venptr[2]) : "a" (0);
  652. "cpuid" : "=a" (max_extended_cpuid) : "a" (0x8000_0000) : "ebx", "ecx", "edx";
  653. }
  654. }
  655. else
  656. {
  657. uint a2;
  658. version (D_InlineAsm_X86)
  659. {
  660. asm pure nothrow @nogc {
  661. mov EAX, 0;
  662. cpuid;
  663. mov a, EAX;
  664. mov EAX, venptr;
  665. mov [EAX], EBX;
  666. mov [EAX + 4], EDX;
  667. mov [EAX + 8], ECX;
  668. }
  669. }
  670. else version (D_InlineAsm_X86_64)
  671. {
  672. asm pure nothrow @nogc {
  673. mov EAX, 0;
  674. cpuid;
  675. mov a, EAX;
  676. mov RAX, venptr;
  677. mov [RAX], EBX;
  678. mov [RAX + 4], EDX;
  679. mov [RAX + 8], ECX;
  680. }
  681. }
  682. asm pure nothrow @nogc {
  683. mov EAX, 0x8000_0000;
  684. cpuid;
  685. mov a2, EAX;
  686. }
  687. max_cpuid = a;
  688. max_extended_cpuid = a2;
  689. }
  690. cf.probablyIntel = cf.vendorID == "GenuineIntel";
  691. cf.probablyAMD = (cf.vendorID == "AuthenticAMD" || cf.vendorID == "HygonGenuine");
  692. uint apic = 0; // brand index, apic id
  693. version (GNU_OR_LDC) asm pure nothrow @nogc {
  694. "cpuid" : "=a" (a), "=b" (apic), "=c" (cf.miscfeatures), "=d" (cf.features) : "a" (1);
  695. } else {
  696. asm pure nothrow @nogc {
  697. mov EAX, 1; // model, stepping
  698. cpuid;
  699. mov a, EAX;
  700. mov apic, EBX;
  701. mov c, ECX;
  702. mov d, EDX;
  703. }
  704. cf.features = d;
  705. cf.miscfeatures = c;
  706. }
  707. stepping = a & 0xF;
  708. immutable uint fbase = (a >> 8) & 0xF;
  709. immutable uint mbase = (a >> 4) & 0xF;
  710. family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase;
  711. model = ((fbase == 0xF) || (fbase == 6 && cf.probablyIntel) ) ?
  712. mbase + ((a >> 12) & 0xF0) : mbase;
  713. if (max_cpuid >= 7)
  714. {
  715. version (GNU_OR_LDC) asm pure nothrow @nogc {
  716. "cpuid" : "=a" (a), "=b" (cf.extfeatures), "=c" (c) : "a" (7), "c" (0) : "edx";
  717. } else {
  718. uint ext;
  719. asm pure nothrow @nogc {
  720. mov EAX, 7; // Structured extended feature leaf.
  721. mov ECX, 0; // Main leaf.
  722. cpuid;
  723. mov ext, EBX; // HLE, AVX2, RTM, etc.
  724. }
  725. cf.extfeatures = ext;
  726. }
  727. }
  728. if (cf.miscfeatures & OSXSAVE_BIT)
  729. {
  730. version (GNU_OR_LDC) asm pure nothrow @nogc {
  731. "xgetbv" : "=a" (a), "=d" (d) : "c" (0);
  732. } else asm pure nothrow @nogc {
  733. mov ECX, 0;
  734. xgetbv;
  735. mov d, EDX;
  736. mov a, EAX;
  737. }
  738. cf.xfeatures = cast(ulong)d << 32 | a;
  739. }
  740. cf.amdfeatures = 0;
  741. cf.amdmiscfeatures = 0;
  742. if (max_extended_cpuid >= 0x8000_0001) {
  743. version (GNU_OR_LDC) asm pure nothrow @nogc {
  744. "cpuid" : "=a" (a), "=c" (cf.amdmiscfeatures), "=d" (cf.amdfeatures) : "a" (0x8000_0001) : "ebx";
  745. } else {
  746. asm pure nothrow @nogc {
  747. mov EAX, 0x8000_0001;
  748. cpuid;
  749. mov c, ECX;
  750. mov d, EDX;
  751. }
  752. cf.amdmiscfeatures = c;
  753. cf.amdfeatures = d;
  754. }
  755. }
  756. // Try to detect fraudulent vendorIDs
  757. if (amd3dnow) cf.probablyIntel = false;
  758. if (!cf.probablyIntel && max_extended_cpuid >= 0x8000_0008) {
  759. //http://support.amd.com/TechDocs/25481.pdf pg.36
  760. cf.maxCores = 1;
  761. if (hyperThreadingBit) {
  762. // determine max number of cores for AMD
  763. version (GNU_OR_LDC) asm pure nothrow @nogc {
  764. "cpuid" : "=a" (a), "=c" (c) : "a" (0x8000_0008) : "ebx", "edx";
  765. } else asm pure nothrow @nogc {
  766. mov EAX, 0x8000_0008;
  767. cpuid;
  768. mov c, ECX;
  769. }
  770. cf.maxCores += c & 0xFF;
  771. }
  772. }
  773. if (max_extended_cpuid >= 0x8000_0004) {
  774. uint* pnb = cast(uint*)cf.processorNameBuffer.ptr;
  775. version (GNU_OR_LDC)
  776. {
  777. asm pure nothrow @nogc {
  778. "cpuid" : "=a" (pnb[0]), "=b" (pnb[1]), "=c" (pnb[ 2]), "=d" (pnb[ 3]) : "a" (0x8000_0002);
  779. "cpuid" : "=a" (pnb[4]), "=b" (pnb[5]), "=c" (pnb[ 6]), "=d" (pnb[ 7]) : "a" (0x8000_0003);
  780. "cpuid" : "=a" (pnb[8]), "=b" (pnb[9]), "=c" (pnb[10]), "=d" (pnb[11]) : "a" (0x8000_0004);
  781. }
  782. }
  783. else version (D_InlineAsm_X86)
  784. {
  785. asm pure nothrow @nogc {
  786. push ESI;
  787. mov ESI, pnb;
  788. mov EAX, 0x8000_0002;
  789. cpuid;
  790. mov [ESI], EAX;
  791. mov [ESI+4], EBX;
  792. mov [ESI+8], ECX;
  793. mov [ESI+12], EDX;
  794. mov EAX, 0x8000_0003;
  795. cpuid;
  796. mov [ESI+16], EAX;
  797. mov [ESI+20], EBX;
  798. mov [ESI+24], ECX;
  799. mov [ESI+28], EDX;
  800. mov EAX, 0x8000_0004;
  801. cpuid;
  802. mov [ESI+32], EAX;
  803. mov [ESI+36], EBX;
  804. mov [ESI+40], ECX;
  805. mov [ESI+44], EDX;
  806. pop ESI;
  807. }
  808. }
  809. else version (D_InlineAsm_X86_64)
  810. {
  811. asm pure nothrow @nogc {
  812. push RSI;
  813. mov RSI, pnb;
  814. mov EAX, 0x8000_0002;
  815. cpuid;
  816. mov [RSI], EAX;
  817. mov [RSI+4], EBX;
  818. mov [RSI+8], ECX;
  819. mov [RSI+12], EDX;
  820. mov EAX, 0x8000_0003;
  821. cpuid;
  822. mov [RSI+16], EAX;
  823. mov [RSI+20], EBX;
  824. mov [RSI+24], ECX;
  825. mov [RSI+28], EDX;
  826. mov EAX, 0x8000_0004;
  827. cpuid;
  828. mov [RSI+32], EAX;
  829. mov [RSI+36], EBX;
  830. mov [RSI+40], ECX;
  831. mov [RSI+44], EDX;
  832. pop RSI;
  833. }
  834. }
  835. // Intel P4 and PM pad at front with spaces.
  836. // Other CPUs pad at end with nulls.
  837. int start = 0, end = 0;
  838. while (cf.processorNameBuffer[start] == ' ') { ++start; }
  839. while (cf.processorNameBuffer[cf.processorNameBuffer.length-end-1] == 0) { ++end; }
  840. cf.processorName = cast(string)(cf.processorNameBuffer[start..$-end]);
  841. } else {
  842. cf.processorName = "Unknown CPU";
  843. }
  844. // Determine cache sizes
  845. // Intel docs specify that they return 0 for 0x8000_0005.
  846. // AMD docs do not specify the behaviour for 0004 and 0002.
  847. // Centaur/VIA and most other manufacturers use the AMD method,
  848. // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2!
  849. // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour
  850. // for CPUID80000005. But Geode GX uses the AMD method
  851. // Deal with Geode GX1 - make it same as MediaGX MMX.
  852. if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) {
  853. max_extended_cpuid = 0x8000_0004;
  854. }
  855. // Therefore, we try the AMD method unless it's an Intel chip.
  856. // If we still have no info, try the Intel methods.
  857. datacache[0].size = 0;
  858. if (max_cpuid<2 || !cf.probablyIntel) {
  859. if (max_extended_cpuid >= 0x8000_0005) {
  860. getAMDcacheinfo();
  861. } else if (cf.probablyAMD) {
  862. // According to AMDProcRecognitionAppNote, this means CPU
  863. // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4)
  864. // Am5x86 has 16Kb 4-way unified data & code cache.
  865. datacache[0].size = 8;
  866. datacache[0].associativity = 4;
  867. datacache[0].lineSize = 32;
  868. } else {
  869. // Some obscure CPU.
  870. // Values for Cyrix 6x86MX (family 6, model 0)
  871. datacache[0].size = 64;
  872. datacache[0].associativity = 4;
  873. datacache[0].lineSize = 32;
  874. }
  875. }
  876. if ((datacache[0].size == 0) && max_cpuid>=4) {
  877. getcacheinfoCPUID4();
  878. }
  879. if ((datacache[0].size == 0) && max_cpuid>=2) {
  880. getcacheinfoCPUID2();
  881. }
  882. if (datacache[0].size == 0) {
  883. // Pentium, PMMX, late model 486, or an obscure CPU
  884. if (mmx) { // Pentium MMX. Also has 8kB code cache.
  885. datacache[0].size = 16;
  886. datacache[0].associativity = 4;
  887. datacache[0].lineSize = 32;
  888. } else { // Pentium 1 (which also has 8kB code cache)
  889. // or 486.
  890. // Cyrix 6x86: 16, 4way, 32 linesize
  891. datacache[0].size = 8;
  892. datacache[0].associativity = 2;
  893. datacache[0].lineSize = 32;
  894. }
  895. }
  896. if (cf.probablyIntel && max_cpuid >= 0x0B) {
  897. // For Intel i7 and later, use function 0x0B to determine
  898. // cores and hyperthreads.
  899. getCpuInfo0B();
  900. } else {
  901. if (hyperThreadingBit) cf.maxThreads = (apic>>>16) & 0xFF;
  902. else cf.maxThreads = cf.maxCores;
  903. if (cf.probablyAMD && max_extended_cpuid >= 0x8000_001E) {
  904. version (GNU_OR_LDC) asm pure nothrow @nogc {
  905. "cpuid" : "=a" (a), "=b" (b) : "a" (0x8000_001E) : "ecx", "edx";
  906. } else {
  907. asm pure nothrow @nogc {
  908. mov EAX, 0x8000_001e;
  909. cpuid;
  910. mov b, EBX;
  911. }
  912. }
  913. ubyte coresPerComputeUnit = ((b >> 8) & 3) + 1;
  914. cf.maxCores = cf.maxThreads / coresPerComputeUnit;
  915. }
  916. }
  917. }
  918. // Return true if the cpuid instruction is supported.
  919. // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines.
  920. bool hasCPUID()
  921. {
  922. version (X86_64)
  923. return true;
  924. else
  925. {
  926. uint flags;
  927. version (GNU_OR_LDC)
  928. {
  929. // http://wiki.osdev.org/CPUID#Checking_CPUID_availability
  930. asm nothrow @nogc { "
  931. pushfl # Save EFLAGS
  932. pushfl # Store EFLAGS
  933. xorl $0x00200000, (%%esp) # Invert the ID bit in stored EFLAGS
  934. popfl # Load stored EFLAGS (with ID bit inverted)
  935. pushfl # Store EFLAGS again (ID bit may or may not be inverted)
  936. popl %%eax # eax = modified EFLAGS (ID bit may or may not be inverted)
  937. xorl (%%esp), %%eax # eax = whichever bits were changed
  938. popfl # Restore original EFLAGS
  939. " : "=a" (flags);
  940. }
  941. }
  942. else version (D_InlineAsm_X86)
  943. {
  944. asm nothrow @nogc {
  945. pushfd;
  946. pop EAX;
  947. mov flags, EAX;
  948. xor EAX, 0x0020_0000;
  949. push EAX;
  950. popfd;
  951. pushfd;
  952. pop EAX;
  953. xor flags, EAX;
  954. }
  955. }
  956. return (flags & 0x0020_0000) != 0;
  957. }
  958. }
  959. } else { // supported X86
  960. bool hasCPUID() { return false; }
  961. void cpuidX86()
  962. {
  963. datacache[0].size = 8;
  964. datacache[0].associativity = 2;
  965. datacache[0].lineSize = 32;
  966. }
  967. }
  968. /*
  969. // TODO: Implement this function with OS support
  970. void cpuidPPC()
  971. {
  972. enum :int { PPC601, PPC603, PPC603E, PPC604,
  973. PPC604E, PPC620, PPCG3, PPCG4, PPCG5 }
  974. // TODO:
  975. // asm { mfpvr; } returns the CPU version but unfortunately it can
  976. // only be used in kernel mode. So OS support is required.
  977. int cputype = PPC603;
  978. // 601 has a 8KB combined data & code L1 cache.
  979. uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64];
  980. ubyte ways[] = [8, 2, 4, 4, 4, 8, 8, 8, 8];
  981. uint L2size[]= [0, 0, 0, 0, 0, 0, 0, 256, 512];
  982. uint L3size[]= [0, 0, 0, 0, 0, 0, 0, 2048, 0];
  983. datacache[0].size = sizes[cputype];
  984. datacache[0].associativity = ways[cputype];
  985. datacache[0].lineSize = (cputype==PPCG5)? 128 :
  986. (cputype == PPC620 || cputype == PPCG3)? 64 : 32;
  987. datacache[1].size = L2size[cputype];
  988. datacache[2].size = L3size[cputype];
  989. datacache[1].lineSize = datacache[0].lineSize;
  990. datacache[2].lineSize = datacache[0].lineSize;
  991. }
  992. // TODO: Implement this function with OS support
  993. void cpuidSparc()
  994. {
  995. // UltaSparcIIi : L1 = 16, 2way. L2 = 512, 4 way.
  996. // UltraSparcIII : L1 = 64, 4way. L2= 4096 or 8192.
  997. // UltraSparcIIIi: L1 = 64, 4way. L2= 1024, 4 way
  998. // UltraSparcIV : L1 = 64, 4way. L2 = 16*1024.
  999. // UltraSparcIV+ : L1 = 64, 4way. L2 = 2048, L3=32*1024.
  1000. // Sparc64V : L1 = 128, 2way. L2 = 4096 4way.
  1001. }
  1002. */
  1003. shared static this()
  1004. {
  1005. auto cf = getCpuFeatures();
  1006. if (hasCPUID()) {
  1007. cpuidX86();
  1008. } else {
  1009. // it's a 386 or 486, or a Cyrix 6x86.
  1010. //Probably still has an external cache.
  1011. }
  1012. if (datacache[0].size==0) {
  1013. // Guess same as Pentium 1.
  1014. datacache[0].size = 8;
  1015. datacache[0].associativity = 2;
  1016. datacache[0].lineSize = 32;
  1017. }
  1018. numCacheLevels = 1;
  1019. // And now fill up all the unused levels with full memory space.
  1020. for (size_t i=1; i< datacache.length; ++i) {
  1021. if (datacache[i].size==0) {
  1022. // Set all remaining levels of cache equal to full address space.
  1023. datacache[i].size = size_t.max/1024;
  1024. datacache[i].associativity = 1;
  1025. datacache[i].lineSize = datacache[i-1].lineSize;
  1026. }
  1027. else
  1028. ++numCacheLevels;
  1029. }
  1030. // Set the immortals
  1031. _dataCaches = datacache;
  1032. _vendor = cast(string)cf.vendorID;
  1033. _processor = cf.processorName;
  1034. _x87onChip = (cf.features&FPU_BIT)!=0;
  1035. _mmx = (cf.features&MMX_BIT)!=0;
  1036. _sse = (cf.features&SSE_BIT)!=0;
  1037. _sse2 = (cf.features&SSE2_BIT)!=0;
  1038. _sse3 = (cf.miscfeatures&SSE3_BIT)!=0;
  1039. _ssse3 = (cf.miscfeatures&SSSE3_BIT)!=0;
  1040. _sse41 = (cf.miscfeatures&SSE41_BIT)!=0;
  1041. _sse42 = (cf.miscfeatures&SSE42_BIT)!=0;
  1042. _sse4a = (cf.amdmiscfeatures&SSE4A_BIT)!=0;
  1043. _aes = (cf.miscfeatures&AES_BIT)!=0;
  1044. _hasPclmulqdq = (cf.miscfeatures&PCLMULQDQ_BIT)!=0;
  1045. _hasRdrand = (cf.miscfeatures&RDRAND_BIT)!=0;
  1046. enum avx_mask = XF_SSE_BIT|XF_YMM_BIT;
  1047. _avx = (cf.xfeatures & avx_mask) == avx_mask && (cf.miscfeatures&AVX_BIT)!=0;
  1048. _vaes = avx && aes;
  1049. _hasVpclmulqdq = avx && hasPclmulqdq;
  1050. _fma = avx && (cf.miscfeatures&FMA_BIT)!=0;
  1051. _fp16c = avx && (cf.miscfeatures&FP16C_BIT)!=0;
  1052. _avx2 = avx && (cf.extfeatures & AVX2_BIT) != 0;
  1053. _hle = (cf.extfeatures & HLE_BIT) != 0;
  1054. _rtm = (cf.extfeatures & RTM_BIT) != 0;
  1055. _hasRdseed = (cf.extfeatures&RDSEED_BIT)!=0;
  1056. _hasSha = (cf.extfeatures&SHA_BIT)!=0;
  1057. _amd3dnow = (cf.amdfeatures&AMD_3DNOW_BIT)!=0;
  1058. _amd3dnowExt = (cf.amdfeatures&AMD_3DNOW_EXT_BIT)!=0;
  1059. _amdMmx = (cf.amdfeatures&AMD_MMX_BIT)!=0;
  1060. _hasFxsr = (cf.features&FXSR_BIT)!=0;
  1061. _hasCmov = (cf.features&CMOV_BIT)!=0;
  1062. _hasRdtsc = (cf.features&TIMESTAMP_BIT)!=0;
  1063. _hasCmpxchg8b = (cf.features&CMPXCHG8B_BIT)!=0;
  1064. _hasCmpxchg16b = (cf.miscfeatures&CMPXCHG16B_BIT)!=0;
  1065. _hasSysEnterSysExit =
  1066. // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII.
  1067. // (REF: www.geoffchappell.com).
  1068. (cf.probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3)))))
  1069. ? false
  1070. : (cf.features & SYSENTERSYSEXIT_BIT)!=0;
  1071. _has3dnowPrefetch = (cf.amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;
  1072. _hasLahfSahf = (cf.amdmiscfeatures&LAHFSAHF_BIT)!=0;
  1073. _hasPopcnt = (cf.miscfeatures&POPCNT_BIT)!=0;
  1074. _hasLzcnt = (cf.amdmiscfeatures&LZCNT_BIT)!=0;
  1075. _isX86_64 = (cf.amdfeatures&AMD64_BIT)!=0;
  1076. _isItanium = (cf.features&IA64_BIT)!=0;
  1077. _hyperThreading = cf.maxThreads>cf.maxCores;
  1078. _threadsPerCPU = cf.maxThreads;
  1079. _coresPerCPU = cf.maxCores;
  1080. _preferAthlon = cf.probablyAMD && family >=6;
  1081. _preferPentium4 = cf.probablyIntel && family == 0xF;
  1082. _preferPentium1 = family < 6 || (family==6 && model < 0xF && !cf.probablyIntel);
  1083. }