PageRenderTime 53ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/tango/core/tools/Cpuid.d

http://github.com/SiegeLord/Tango-D2
D | 788 lines | 498 code | 53 blank | 237 comment | 115 complexity | 1a95384e5aee62da963b133978836d36 MD5 | raw file
  1. /**
  2. Identify the characteristics of the host CPU, providing information
  3. about cache sizes and assembly optimisation hints.
  4. Some of this information was extremely difficult to track down. Some of the
  5. documents below were found only in cached versions stored by search engines!
  6. This code relies on information found in:
  7. $(UL
  8. $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual,
  9. Volume 2A: Instruction Set Reference, A-M" (2007).)
  10. $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).)
  11. $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD
  12. Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).)
  13. $(LI "AMD Geode(TM) GX Processors Data Book",
  14. Advanced Micro Devices, Publication ID 31505E, (2005).)
  15. $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).)
  16. $(LI "Application note 106: Software Customization for the 6x86 Family",
  17. Cyrix Corporation, Rev 1.5 (1998))
  18. $(LI $(LINK http://ftp.intron.ac/pub/document/cpu/cpuid.htm))
  19. $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution",
  20. National Semiconductor, (2002))
  21. $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).)
  22. $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm))
  23. $(LI $(LINK http://grafi.ii.pw.edu.pl/gbm/x86/cpuid.html))
  24. $(LI "What every programmer should know about memory",
  25. Ulrich Depper, Red Hat, Inc., (2007).)
  26. $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009).
  27. $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm))
  28. $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application
  29. Note 485" (2009).)
  30. )
  31. AUTHORS: Don Clugston,
  32. Tomas Lindquist Olsen $(EMAIL tomas@famolsen.dk)
  33. COPYRIGHT: Public Domain
  34. BUGS: Currently only works on x86 CPUs.
  35. Many processors have bugs in their microcode for the CPUID instruction,
  36. so sometimes the cache information may be incorrect.
  37. */
  38. module tango.core.tools.Cpuid;
  39. // If optimizing for a particular processor, it is generally better
  40. // to identify based on features rather than model. NOTE: Normally
  41. // it's only worthwhile to optimise for the latest Intel and AMD CPU,
  42. // with a backup for other CPUs.
  43. // Pentium -- preferPentium1()
  44. // PMMX -- + mmx()
  45. // PPro -- default
  46. // PII -- + mmx()
  47. // PIII -- + mmx() + sse()
  48. // PentiumM -- + mmx() + sse() + sse2()
  49. // Pentium4 -- preferPentium4()
  50. // PentiumD -- + isX86_64()
  51. // Core2 -- default + isX86_64()
  52. // AMD K5 -- preferPentium1()
  53. // AMD K6 -- + mmx()
  54. // AMD K6-II -- + mmx() + 3dnow()
  55. // AMD K7 -- preferAthlon()
  56. // AMD K8 -- + sse2()
  57. // AMD K10 -- + isX86_64()
  58. // Cyrix 6x86 -- preferPentium1()
  59. // 6x86MX -- + mmx()
  60. public:
  61. /// Cache size and behaviour.
  62. struct CacheInfo
  63. {
  64. /// Size of the cache, in kilobytes, per CPU.
  65. /// For L1 unified (data + code) caches, this size is half the physical size.
  66. /// (we don't halve it for larger sizes, since normally
  67. /// data size is much greater than code size for critical loops).
  68. uint size;
  69. /// Number of ways of associativity, eg:$(BR)
  70. /// 1 = direct mapped$(BR)
  71. /// 2 = 2-way set associative$(BR)
  72. /// 3 = 3-way set associative$(BR)
  73. /// ubyte.max = fully associative
  74. ubyte associativity;
  75. /// Number of bytes read into the cache when a cache miss occurs.
  76. uint lineSize;
  77. }
  78. public:
  79. /// Returns vendor string, for display purposes only.
  80. /// Do NOT use this to determine features!
  81. /// Note that some CPUs have programmable vendorIDs.
  82. const(char)[] vendor() {return cast(const(char)[]) vendorID;}
  83. /// Returns processor string, for display purposes only
  84. const(char)[] processor() {return processorName;}
  85. /// The data caches. If there are fewer than 5 physical caches levels,
  86. /// the remaining levels are set to uint.max (== entire memory space)
  87. CacheInfo[5] datacache;
  88. /// Does it have an x87 FPU on-chip?
  89. @property bool x87onChip() {return (features&FPU_BIT)!=0;}
  90. /// Is MMX supported?
  91. @property bool mmx() {return (features&MMX_BIT)!=0;}
  92. /// Is SSE supported?
  93. @property bool sse() {return (features&SSE_BIT)!=0;}
  94. /// Is SSE2 supported?
  95. @property bool sse2() {return (features&SSE2_BIT)!=0;}
  96. /// Is SSE3 supported?
  97. @property bool sse3() {return (miscfeatures&SSE3_BIT)!=0;}
  98. /// Is SSSE3 supported?
  99. @property bool ssse3() {return (miscfeatures&SSSE3_BIT)!=0;}
  100. /// Is SSE4.1 supported?
  101. @property bool sse41() {return (miscfeatures&SSE41_BIT)!=0;}
  102. /// Is SSE4.2 supported?
  103. @property bool sse42() {return (miscfeatures&SSE42_BIT)!=0;}
  104. /// Is SSE4a supported?
  105. @property bool sse4a() {return (amdmiscfeatures&SSE4A_BIT)!=0;}
  106. /// Is AMD 3DNOW supported?
  107. @property bool amd3dnow() {return (amdfeatures&AMD_3DNOW_BIT)!=0;}
  108. /// Is AMD 3DNOW Ext supported?
  109. @property bool amd3dnowExt() {return (amdfeatures&AMD_3DNOW_EXT_BIT)!=0;}
  110. /// Are AMD extensions to MMX supported?
  111. @property bool amdMmx() {return (amdfeatures&AMD_MMX_BIT)!=0;}
  112. /// Is fxsave/fxrstor supported?
  113. @property bool hasFxsr() {return (features&FXSR_BIT)!=0;}
  114. /// Is cmov supported?
  115. @property bool hasCmov() {return (features&CMOV_BIT)!=0;}
  116. /// Is rdtsc supported?
  117. @property bool hasRdtsc() {return (features&TIMESTAMP_BIT)!=0;}
  118. /// Is cmpxchg8b supported?
  119. @property bool hasCmpxchg8b() {return (features&CMPXCHG8B_BIT)!=0;}
  120. /// Is cmpxchg8b supported?
  121. @property bool hasCmpxchg16b() {return (miscfeatures&CMPXCHG16B_BIT)!=0;}
  122. /// Is SYSENTER/SYSEXIT supported?
  123. @property bool hasSysEnterSysExit() {
  124. // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII.
  125. // (REF: www.geoffchappell.com).
  126. if (probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3)))))
  127. return false;
  128. return (features & SYSENTERSYSEXIT_BIT)!=0;
  129. }
  130. /// Is 3DNow prefetch supported?
  131. @property bool has3dnowPrefetch()
  132. {return (amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;}
  133. /// Are LAHF and SAHF supported in 64-bit mode?
  134. @property bool hasLahfSahf() {return (amdmiscfeatures&LAHFSAHF_BIT)!=0;}
  135. /// Is POPCNT supported?
  136. @property bool hasPopcnt() {return (miscfeatures&POPCNT_BIT)!=0;}
  137. /// Is LZCNT supported?
  138. @property bool hasLzcnt() {return (amdmiscfeatures&LZCNT_BIT)!=0;}
  139. /// Is this an Intel64 or AMD 64?
  140. @property bool isX86_64() {return (amdfeatures&AMD64_BIT)!=0;}
  141. /// Is this an IA64 (Itanium) processor?
  142. @property bool isItanium() { return (features&IA64_BIT)!=0; }
  143. /// Is hyperthreading supported?
  144. @property bool hyperThreading() { return maxThreads>maxCores; }
  145. /// Returns number of threads per CPU
  146. @property uint threadsPerCPU() {return maxThreads;}
  147. /// Returns number of cores in CPU
  148. @property uint coresPerCPU() {return maxCores;}
  149. /// Optimisation hints for assembly code.
  150. /// For forward compatibility, the CPU is compared against different
  151. /// microarchitectures. For 32-bit X86, comparisons are made against
  152. /// the Intel PPro/PII/PIII/PM family.
  153. ///
  154. /// The major 32-bit x86 microarchitecture 'dynasties' have been:
  155. /// (1) Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2).
  156. /// (2) AMD Athlon (K7, K8, K10).
  157. /// (3) Intel NetBurst (Pentium 4, Pentium D).
  158. /// (4) In-order Pentium (Pentium1, PMMX, Atom)
  159. /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta,
  160. /// Cyrix, Rise) were mostly in-order.
  161. /// Some new processors do not fit into the existing categories:
  162. /// Intel Atom 230/330 (family 6, model 0x1C) is an in-order core.
  163. /// Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core.
  164. ///
  165. /// Within each dynasty, the optimisation techniques are largely
  166. /// identical (eg, use instruction pairing for group 4). Major
  167. /// instruction set improvements occur within each dynasty.
  168. /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code?
  169. @property bool preferAthlon() { return probablyAMD && family >=6; }
  170. /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code?
  171. @property bool preferPentium4() { return probablyIntel && family == 0xF; }
  172. /// Does this CPU perform better on Pentium I code than Pentium Pro code?
  173. @property bool preferPentium1() { return family < 6 || (family==6 && model < 0xF && !probablyIntel); }
  174. public:
  175. /// Processor type (vendor-dependent).
  176. /// This should be visible ONLY for display purposes.
  177. uint stepping, model, family;
  178. uint numCacheLevels = 1;
  179. private:
  180. bool probablyIntel; // true = _probably_ an Intel processor, might be faking
  181. bool probablyAMD; // true = _probably_ an AMD processor
  182. char[12] vendorID;
  183. string processorName;
  184. char[48] processorNameBuffer;
  185. uint features = 0; // mmx, sse, sse2, hyperthreading, etc
  186. uint miscfeatures = 0; // sse3, etc.
  187. uint amdfeatures = 0; // 3DNow!, mmxext, etc
  188. uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc
  189. uint maxCores = 1;
  190. uint maxThreads = 1;
  191. // Note that this may indicate multi-core rather than hyperthreading.
  192. bool hyperThreadingBit() { return (features&HTT_BIT)!=0;}
  193. // feature flags CPUID1_EDX
  194. enum : uint
  195. {
  196. FPU_BIT = 1,
  197. TIMESTAMP_BIT = 1<<4, // rdtsc
  198. MDSR_BIT = 1<<5, // RDMSR/WRMSR
  199. CMPXCHG8B_BIT = 1<<8,
  200. SYSENTERSYSEXIT_BIT = 1<<11,
  201. CMOV_BIT = 1<<15,
  202. MMX_BIT = 1<<23,
  203. FXSR_BIT = 1<<24,
  204. SSE_BIT = 1<<25,
  205. SSE2_BIT = 1<<26,
  206. HTT_BIT = 1<<28,
  207. IA64_BIT = 1<<30
  208. }
  209. // feature flags misc CPUID1_ECX
  210. enum : uint
  211. {
  212. SSE3_BIT = 1,
  213. PCLMULQDQ_BIT = 1<<1, // from AVX
  214. MWAIT_BIT = 1<<3,
  215. SSSE3_BIT = 1<<9,
  216. FMA_BIT = 1<<12, // from AVX
  217. CMPXCHG16B_BIT = 1<<13,
  218. SSE41_BIT = 1<<19,
  219. SSE42_BIT = 1<<20,
  220. POPCNT_BIT = 1<<23,
  221. AES_BIT = 1<<25, // AES instructions from AVX
  222. OSXSAVE_BIT = 1<<27, // Used for AVX
  223. AVX_BIT = 1<<28
  224. }
  225. /+
  226. version(X86_64) {
  227. bool hasAVXinHardware() {
  228. // This only indicates hardware support, not OS support.
  229. return (miscfeatures&AVX_BIT) && (miscfeatures&OSXSAVE_BIT);
  230. }
  231. // Is AVX supported (in both hardware & OS)?
  232. bool Avx() {
  233. if (!hasAVXinHardware()) return false;
  234. // Check for OS support
  235. uint xfeatures;
  236. asm {mov ECX, 0; xgetbv; mov xfeatures, EAX; }
  237. return (xfeatures&0x6)==6;
  238. }
  239. bool hasAvxFma() {
  240. if (!AVX()) return false;
  241. return (features&FMA_BIT)!=0;
  242. }
  243. }
  244. +/
  245. // AMD feature flags CPUID80000001_EDX
  246. enum : uint
  247. {
  248. AMD_MMX_BIT = 1<<22,
  249. // FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions.
  250. FFXSR_BIT = 1<<25,
  251. PAGE1GB_BIT = 1<<26, // support for 1GB pages
  252. RDTSCP_BIT = 1<<27,
  253. AMD64_BIT = 1<<29,
  254. AMD_3DNOW_EXT_BIT = 1<<30,
  255. AMD_3DNOW_BIT = 1<<31
  256. }
  257. // AMD misc feature flags CPUID80000001_ECX
  258. enum : uint
  259. {
  260. LAHFSAHF_BIT = 1,
  261. LZCNT_BIT = 1<<5,
  262. SSE4A_BIT = 1<<6,
  263. AMD_3DNOW_PREFETCH_BIT = 1<<8
  264. }
  265. version(GNU){
  266. // GDC is a filthy liar. It can't actually do inline asm.
  267. } else version(D_InlineAsm_X86) {
  268. version = Really_D_InlineAsm_X86;
  269. }
  270. version(Really_D_InlineAsm_X86) {
  271. // Note that this code will also work for Itanium in x86 mode.
  272. uint max_cpuid, max_extended_cpuid;
  273. // CPUID2: "cache and tlb information"
  274. void getcacheinfoCPUID2()
  275. {
  276. // CPUID2 is a dog's breakfast. What was Intel thinking???
  277. // We are only interested in the data caches
  278. void decipherCpuid2(ubyte x) {
  279. if (x==0) return;
  280. // Values from http://www.sandpile.org/ia32/cpuid.htm.
  281. // Includes Itanium and non-Intel CPUs.
  282. //
  283. static ubyte[63] ids = [
  284. 0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68,
  285. // level 2 cache
  286. 0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F,
  287. 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E,
  288. 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81,
  289. // level 3 cache
  290. 0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D,
  291. 0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE,
  292. 0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC
  293. ];
  294. static uint[63] sizes = [
  295. 8, 16, 16, 64, 16, 24, 8, 16, 32,
  296. 128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512,
  297. 256, 512, 1024, 2048, 512, 1024, 4096, 6*1024,
  298. 128, 192, 128, 256, 384, 512, 3072, 512, 128,
  299. 512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024,
  300. 512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024,
  301. 2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024
  302. ];
  303. // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative
  304. static ubyte[63] ways = [
  305. 2, 4, 4, 8, 8, 6, 4, 4, 4,
  306. 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2,
  307. 8, 8, 8, 8, 4, 8, 16, 24,
  308. 4, 6, 2, 4, 6, 4, 12, 8, 8,
  309. 4, 8, 8, 8, 4, 8, 12, 16, 12, 16,
  310. 4, 4, 4, 8, 8, 8, 12, 12, 12,
  311. 16, 16, 16, 24, 24, 24
  312. ];
  313. enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 }
  314. for (int i=0; i< ids.length; ++i) {
  315. if (x==ids[i]) {
  316. int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2;
  317. if (x==0x49 && family==0xF && model==0x6) level=2;
  318. datacache[level].size=sizes[i];
  319. datacache[level].associativity=ways[i];
  320. if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80)
  321. || x==0x86 || x==0x87
  322. || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){
  323. datacache[level].lineSize = 64;
  324. } else datacache[level].lineSize = 32;
  325. }
  326. }
  327. }
  328. uint[4] a;
  329. bool firstTime = true;
  330. // On a multi-core system, this could theoretically fail, but it's only used
  331. // for old single-core CPUs.
  332. uint numinfos = 1;
  333. do {
  334. asm {
  335. mov EAX, 2;
  336. cpuid;
  337. mov a, EAX;
  338. mov a+4, EBX;
  339. mov a+8, ECX;
  340. mov a+12, EDX;
  341. }
  342. if (firstTime) {
  343. if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) {
  344. // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080.
  345. // These are NOT standard Intel values
  346. // (TLB = 32 entry, 4 way associative, 4K pages)
  347. // (L1 cache = 16K, 4way, linesize16)
  348. datacache[0].size=8;
  349. datacache[0].associativity=4;
  350. datacache[0].lineSize=16;
  351. return;
  352. }
  353. // lsb of a is how many times to loop.
  354. numinfos = a[0] & 0xFF;
  355. // and otherwise it should be ignored
  356. a[0] &= 0xFFFF_FF00;
  357. firstTime = false;
  358. }
  359. for (int c=0; c<4;++c) {
  360. // high bit set == no info.
  361. if (a[c] & 0x8000_0000) continue;
  362. decipherCpuid2(cast(ubyte)(a[c] & 0xFF));
  363. decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF));
  364. decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF));
  365. decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF));
  366. }
  367. } while (--numinfos);
  368. }
  369. // CPUID4: "Deterministic cache parameters" leaf
  370. void getcacheinfoCPUID4()
  371. {
  372. int cachenum = 0;
  373. for(;;) {
  374. uint a, b, number_of_sets;
  375. asm {
  376. mov EAX, 4;
  377. mov ECX, cachenum;
  378. cpuid;
  379. mov a, EAX;
  380. mov b, EBX;
  381. mov number_of_sets, ECX;
  382. }
  383. ++cachenum;
  384. if ((a&0x1F)==0) break; // no more caches
  385. uint numthreads = ((a>>14) & 0xFFF) + 1;
  386. uint numcores = ((a>>26) & 0x3F) + 1;
  387. if (numcores > maxCores) maxCores = numcores;
  388. if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches
  389. ++number_of_sets;
  390. ubyte level = cast(ubyte)(((a>>5)&7)-1);
  391. if (level > datacache.length) continue; // ignore deep caches
  392. datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1);
  393. datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size
  394. uint line_partitions = ((b >> 12)& 0x3FF) + 1;
  395. // Size = number of sets * associativity * cachelinesize * linepartitions
  396. // and must convert to Kb, also dividing by the number of hyperthreads using this cache.
  397. ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets *
  398. datacache[level].associativity : number_of_sets;
  399. datacache[level].size = cast(uint)(
  400. (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024));
  401. if (level == 0 && (a&0xF)==3) {
  402. // Halve the size for unified L1 caches
  403. datacache[level].size/=2;
  404. }
  405. }
  406. }
  407. // CPUID8000_0005 & 6
  408. void getAMDcacheinfo()
  409. {
  410. uint c5, c6, d6;
  411. asm {
  412. mov EAX, 0x8000_0005; // L1 cache
  413. cpuid;
  414. // EAX has L1_TLB_4M.
  415. // EBX has L1_TLB_4K
  416. // EDX has L1 instruction cache
  417. mov c5, ECX;
  418. }
  419. datacache[0].size = ( (c5>>24) & 0xFF);
  420. datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF);
  421. datacache[0].lineSize = c5 & 0xFF;
  422. if (max_extended_cpuid >= 0x8000_0006) {
  423. // AMD K6-III or K6-2+ or later.
  424. ubyte numcores = 1;
  425. if (max_extended_cpuid >=0x8000_0008) {
  426. asm {
  427. mov EAX, 0x8000_0008;
  428. cpuid;
  429. mov numcores, CL;
  430. }
  431. ++numcores;
  432. if (numcores>maxCores) maxCores = numcores;
  433. }
  434. asm {
  435. mov EAX, 0x8000_0006; // L2/L3 cache
  436. cpuid;
  437. mov c6, ECX; // L2 cache info
  438. mov d6, EDX; // L3 cache info
  439. }
  440. ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ];
  441. datacache[1].size = (c6>>16) & 0xFFFF;
  442. datacache[1].associativity = assocmap[(c6>>12)&0xF];
  443. datacache[1].lineSize = c6 & 0xFF;
  444. // The L3 cache value is TOTAL, not per core.
  445. datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1.
  446. datacache[2].associativity = assocmap[(d6>>12)&0xF];
  447. datacache[2].lineSize = d6 & 0xFF;
  448. }
  449. }
  450. // For Intel CoreI7 and later, use function 0x0B
  451. // to determine number of processors.
  452. void getCpuInfo0B()
  453. {
  454. int level=0;
  455. uint a, b, c, d;
  456. do {
  457. asm {
  458. mov EAX, 0x0B;
  459. mov ECX, level;
  460. cpuid;
  461. mov a, EAX;
  462. mov b, EBX;
  463. mov c, ECX;
  464. mov d, EDX;
  465. }
  466. if (b!=0) {
  467. // I'm not sure about this. The docs state that there
  468. // are 2 hyperthreads per core if HT is factory enabled.
  469. if (level==0) maxThreads = b & 0xFFFF;
  470. else if (level==1) maxCores = b & 0xFFFF;
  471. }
  472. ++level;
  473. } while (a!=0 || b!=0);
  474. }
  475. void cpuidX86()
  476. {
  477. char * venptr = vendorID.ptr;
  478. uint a, b, c, d, a2;
  479. asm {
  480. mov EAX, 0;
  481. cpuid;
  482. mov a, EAX;
  483. mov EAX, venptr;
  484. mov [EAX], EBX;
  485. mov [EAX + 4], EDX;
  486. mov [EAX + 8], ECX;
  487. mov EAX, 0x8000_0000;
  488. cpuid;
  489. mov a2, EAX;
  490. }
  491. max_cpuid = a;
  492. max_extended_cpuid = a2;
  493. probablyIntel = vendorID == "GenuineIntel";
  494. probablyAMD = vendorID == "AuthenticAMD";
  495. uint apic = 0; // brand index, apic id
  496. asm {
  497. mov EAX, 1; // model, stepping
  498. cpuid;
  499. mov a, EAX;
  500. mov apic, EBX;
  501. mov c, ECX;
  502. mov d, EDX;
  503. }
  504. features = d;
  505. miscfeatures = c;
  506. amdfeatures = 0;
  507. amdmiscfeatures = 0;
  508. if (max_extended_cpuid >= 0x8000_0001) {
  509. asm {
  510. mov EAX, 0x8000_0001;
  511. cpuid;
  512. mov c, ECX;
  513. mov d, EDX;
  514. }
  515. amdmiscfeatures = c;
  516. amdfeatures = d;
  517. }
  518. // Try to detect fraudulent vendorIDs
  519. if (amd3dnow) probablyIntel = false;
  520. stepping = a & 0xF;
  521. uint fbase = (a >> 8) & 0xF;
  522. uint mbase = (a >> 4) & 0xF;
  523. family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase;
  524. model = ((fbase == 0xF) || (fbase == 6 && probablyIntel) ) ?
  525. mbase + ((a >> 12) & 0xF0) : mbase;
  526. if (!probablyIntel && max_extended_cpuid >= 0x8000_0008) {
  527. // determine max number of cores for AMD
  528. asm {
  529. mov EAX, 0x8000_0008;
  530. cpuid;
  531. mov c, ECX;
  532. }
  533. uint apicsize = (c>>12) & 0xF;
  534. if (apicsize == 0) {
  535. // use legacy method
  536. if (hyperThreadingBit()) maxCores = c & 0xFF;
  537. else maxCores = 1;
  538. } else {
  539. // maxcores = 2^ apicsize
  540. maxCores = 1;
  541. while (apicsize) { maxCores<<=1; --apicsize; }
  542. }
  543. }
  544. if (max_extended_cpuid >= 0x8000_0004) {
  545. char *procptr = processorNameBuffer.ptr;
  546. asm {
  547. push ESI;
  548. mov ESI, procptr;
  549. mov EAX, 0x8000_0002;
  550. cpuid;
  551. mov [ESI], EAX;
  552. mov [ESI+4], EBX;
  553. mov [ESI+8], ECX;
  554. mov [ESI+12], EDX;
  555. mov EAX, 0x8000_0003;
  556. cpuid;
  557. mov [ESI+16], EAX;
  558. mov [ESI+20], EBX;
  559. mov [ESI+24], ECX;
  560. mov [ESI+28], EDX;
  561. mov EAX, 0x8000_0004;
  562. cpuid;
  563. mov [ESI+32], EAX;
  564. mov [ESI+36], EBX;
  565. mov [ESI+40], ECX;
  566. mov [ESI+44], EDX;
  567. pop ESI;
  568. }
  569. // Intel P4 and PM pad at front with spaces.
  570. // Other CPUs pad at end with nulls.
  571. int start = 0, end = 0;
  572. while (processorNameBuffer[start] == ' ') { ++start; }
  573. while (processorNameBuffer[$-end-1] == 0) { ++end; }
  574. processorName = processorNameBuffer[start..$-end].idup;
  575. } else {
  576. processorName = "Unknown CPU";
  577. }
  578. // Determine cache sizes
  579. // Intel docs specify that they return 0 for 0x8000_0005.
  580. // AMD docs do not specify the behaviour for 0004 and 0002.
  581. // Centaur/VIA and most other manufacturers use the AMD method,
  582. // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2!
  583. // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour
  584. // for CPUID80000005. But Geode GX uses the AMD method
  585. // Deal with idiotic Geode GX1 - make it same as MediaGX MMX.
  586. if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) {
  587. max_extended_cpuid = 0x8000_0004;
  588. }
  589. // Therefore, we try the AMD method unless it's an Intel chip.
  590. // If we still have no info, try the Intel methods.
  591. datacache[0].size = 0;
  592. if (max_cpuid<2 || !probablyIntel) {
  593. if (max_extended_cpuid >= 0x8000_0005) {
  594. getAMDcacheinfo();
  595. } else if (probablyAMD) {
  596. // According to AMDProcRecognitionAppNote, this means CPU
  597. // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4)
  598. // Am5x86 has 16Kb 4-way unified data & code cache.
  599. datacache[0].size = 8;
  600. datacache[0].associativity = 4;
  601. datacache[0].lineSize = 32;
  602. } else {
  603. // Some obscure CPU.
  604. // Values for Cyrix 6x86MX (family 6, model 0)
  605. datacache[0].size = 64;
  606. datacache[0].associativity = 4;
  607. datacache[0].lineSize = 32;
  608. }
  609. }
  610. if ((datacache[0].size == 0) && max_cpuid>=4) {
  611. getcacheinfoCPUID4();
  612. }
  613. if ((datacache[0].size == 0) && max_cpuid>=2) {
  614. getcacheinfoCPUID2();
  615. }
  616. if (datacache[0].size == 0) {
  617. // Pentium, PMMX, late model 486, or an obscure CPU
  618. if (mmx) { // Pentium MMX. Also has 8kB code cache.
  619. datacache[0].size = 16;
  620. datacache[0].associativity = 4;
  621. datacache[0].lineSize = 32;
  622. } else { // Pentium 1 (which also has 8kB code cache)
  623. // or 486.
  624. // Cyrix 6x86: 16, 4way, 32 linesize
  625. datacache[0].size = 8;
  626. datacache[0].associativity = 2;
  627. datacache[0].lineSize = 32;
  628. }
  629. }
  630. if (max_cpuid >=0x0B) {
  631. // For Intel i7 and later, use function 0x0B to determine
  632. // cores and hyperthreads.
  633. getCpuInfo0B();
  634. } else {
  635. if (hyperThreadingBit()) maxThreads = (apic>>>16) & 0xFF;
  636. else maxThreads = maxCores;
  637. }
  638. }
  639. // Return true if the cpuid instruction is supported.
  640. // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines.
  641. bool hasCPUID()
  642. {
  643. uint flags;
  644. asm {
  645. pushfd;
  646. pop EAX;
  647. mov flags, EAX;
  648. xor EAX, 0x0020_0000;
  649. push EAX;
  650. popfd;
  651. pushfd;
  652. pop EAX;
  653. xor flags, EAX;
  654. }
  655. return (flags & 0x0020_0000) !=0;
  656. }
  657. } else { // inline asm X86
  658. bool hasCPUID() { return false; }
  659. void cpuidX86()
  660. {
  661. datacache[0].size = 8;
  662. datacache[0].associativity = 2;
  663. datacache[0].lineSize = 32;
  664. }
  665. }
  666. // TODO: Implement this function with OS support
  667. void cpuidPPC()
  668. {
  669. enum :int { PPC601, PPC603, PPC603E, PPC604,
  670. PPC604E, PPC620, PPCG3, PPCG4, PPCG5 }
  671. // TODO:
  672. // asm { mfpvr; } returns the CPU version but unfortunately it can
  673. // only be used in kernel mode. So OS support is required.
  674. int cputype = PPC603;
  675. // 601 has a 8KB combined data & code L1 cache.
  676. uint[] sizes = [4, 8, 16, 16, 32, 32, 32, 32, 64];
  677. ubyte[] ways = [8, 2, 4, 4, 4, 8, 8, 8, 8];
  678. uint[] L2size= [0, 0, 0, 0, 0, 0, 0, 256, 512];
  679. uint[] L3size= [0, 0, 0, 0, 0, 0, 0, 2048, 0];
  680. datacache[0].size = sizes[cputype];
  681. datacache[0].associativity = ways[cputype];
  682. datacache[0].lineSize = (cputype==PPCG5)? 128 :
  683. (cputype == PPC620 || cputype == PPCG3)? 64 : 32;
  684. datacache[1].size = L2size[cputype];
  685. datacache[2].size = L3size[cputype];
  686. datacache[1].lineSize = datacache[0].lineSize;
  687. datacache[2].lineSize = datacache[0].lineSize;
  688. }
  689. // TODO: Implement this function with OS support
  690. void cpuidSparc()
  691. {
  692. // UltaSparcIIi : L1 = 16, 2way. L2 = 512, 4 way.
  693. // UltraSparcIII : L1 = 64, 4way. L2= 4096 or 8192.
  694. // UltraSparcIIIi: L1 = 64, 4way. L2= 1024, 4 way
  695. // UltraSparcIV : L1 = 64, 4way. L2 = 16*1024.
  696. // UltraSparcIV+ : L1 = 64, 4way. L2 = 2048, L3=32*1024.
  697. // Sparc64V : L1 = 128, 2way. L2 = 4096 4way.
  698. }
  699. shared static this()
  700. {
  701. if (hasCPUID()) {
  702. cpuidX86();
  703. } else {
  704. // it's a 386 or 486, or a Cyrix 6x86.
  705. //Probably still has an external cache.
  706. }
  707. if (datacache[0].size==0) {
  708. // Guess same as Pentium 1.
  709. datacache[0].size = 8;
  710. datacache[0].associativity = 2;
  711. datacache[0].lineSize = 32;
  712. }
  713. numCacheLevels = 1;
  714. // And now fill up all the unused levels with full memory space.
  715. for (int i=1; i< datacache.length; ++i) {
  716. if (datacache[i].size==0) {
  717. // Set all remaining levels of cache equal to full address space.
  718. datacache[i].size = uint.max/1024;
  719. datacache[i].associativity = 1;
  720. datacache[i].lineSize = datacache[i-1].lineSize;
  721. } else numCacheLevels = i+1;
  722. }
  723. }
  724. debug (Cpuid)
  725. {
  726. private import tango.io.Stdout;
  727. void main()
  728. {
  729. Stdout.formatln ("{}, {} threads, {} cores", processor, threadsPerCPU, coresPerCPU);
  730. }
  731. }