/user/tango/core/Cpuid.d
D | 999 lines | 712 code | 43 blank | 244 comment | 115 complexity | d7a27d3d8d578ed2bbeca7cd34b43d67 MD5 | raw file
Possible License(s): BSD-3-Clause
- /**
- Identify the characteristics of the host CPU, providing information
- about cache sizes and assembly optimisation hints.
-
- Some of this information was extremely difficult to track down. Some of the
- documents below were found only in cached versions stored by search engines!
- This code relies on information found in:
-
- - "Intel(R) 64 and IA-32 Architectures Software Developers Manual,
- Volume 2A: Instruction Set Reference, A-M" (2007).
- - "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).
- - "AMD Processor Recognition Application Note For Processors Prior to AMD Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).
- - "AMD Geode(TM) GX Processors Data Book", AMD, Publication ID 31505E, (2005).
- - "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).
- - "Application note 106: Software Customization for the 6x86 Family", Cyrix Corporation, Rev 1.5 (1998)
- - http://ftp.intron.ac/pub/document/cpu/cpuid.htm
- - "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution", National Semiconductor, (2002)
- - "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).
- - http://www.sandpile.org/ia32/cpuid.htm
- - http://grafi.ii.pw.edu.pl/gbm/x86/cpuid.html
- - "What every programmer should know about memory", Ulrich Depper, Red Hat, Inc.
- (2007).
-
- AUTHORS: Don Clugston,
- Tomas Lindquist Olsen <tomas@famolsen.dk>
- Fawzi Mohamed
- COPYRIGHT: Public Domain
- BUGS: Currently only works on x86 CPUs.
- Many processors have bugs in their microcode for the CPUID instruction,
- so sometimes the cache information may be incorrect.
- */
- module tango.core.Cpuid;
- version(GNU){
- // GDC is a filthy liar. It can't actually do inline asm.
- } else version(D_InlineAsm_X86) {
- version = Really_D_InlineAsm_X86;
- }
- version(X86) {
- version=X86_CPU;
- } else version(X86_64) {
- version=X86_CPU;
- } else version ( PPC64 )
- {
- version=PPC_CPU;
- } else version ( PPC ) {
- version=PPC_CPU;
- } else version(ARM){
- } else version(SPARC){
- } else {
- static assert(0,"unknown cpu family");
- }
- /// Cache size and behaviour
- struct CacheInfo
- {
- /// Size of the cache, in kilobytes, per CPU.
- /// For L1 unified (data + code) caches, this size is half the physical size.
- /// (we don't halve it for larger sizes, since normally
- /// data size is much greater than code size for critical loops).
- size_t size;
- /// Number of ways of associativity, eg:
- /// 1 = direct mapped
- /// 2 = 2-way set associative
- /// 3 = 3-way set associative
- /// ubyte.max = fully associative
- ubyte associativity;
- /// Number of bytes read into the cache when a cache miss occurs.
- uint lineSize;
- /// how many threads share the cache, 0=unkown
- uint nThreadSharing;
- /// if you cannot really trust these numbers
- bool wildGuess;
- void clear(){
- size=cast(size_t)0;
- associativity=1;
- lineSize=0;
- nThreadSharing=0;
- wildGuess=true;
- }
- }
- /// the main type of cpu
- static CpuInfo mainCpu;
- static this(){
- version(X86_CPU){
- mainCpu=new CpuInfoX86();
- mainCpu.getCpuData();
- } else version(PPC_CPU){
- mainCpu=new CpuInfoPpc();
- } else version(ARM){
- mainCpu=new CpuInfoArm();
- } else version(SPARC){
- mainCpu=new CpuInfoSparc();
- }
- }
- /// the current type of cpu
- CpuInfo currentCpu() { return mainCpu; }
- /// if the system has only one kind of cpu (at the moment hardcoded to true)
- bool uniqueCpuType() { return true; }
- /// information on a cpu
- ///
- /// if you think x86,sparc,arm,ppc,... should be always defined, but throw or return null
- /// post a ticket explaining why
- class CpuInfo{
- protected:
- /// The data caches. If there are fewer than 5 physical caches levels,
- /// the remaining levels are set to size_t.max/1024 (== entire memory space)
- /// make this a function?
- CacheInfo[5] datacache;
- /// cache levels
- uint numCacheLevels;
- /// vendor name (only for display purposes)
- char [] vendorName;
- /// name of the processor (only for display purposes)
- char [] processorName;
- /// tries to get valid data from the current cpu, return false if it fails
- bool getCpuData(){
- clear();
- cacheFixup();
- return false;
- }
- public:
- this(){
- this.clear();
- }
- /// Returns vendor string, for display purposes only.
- /// Do NOT use this to determine features!
- /// Note that some CPUs have programmable vendorIDs.
- char[] vendor() { return vendorName; }
- /// Returns processor string, for display purposes only
- char[] processor() { return processorName; }
- /// Is hyperthreading supported?
- bool hyperThreading() {
- return threadsPerCPU()>coresPerCPU();
- }
- /// Returns number of threads per CPU
- uint threadsPerCPU(){
- return 1;
- }
- /// Returns number of cores in CPU
- uint coresPerCPU(){
- return 1;
- }
- /// clears info stored in this object
- void clear(){
- foreach (ref el;datacache){
- el.clear();
- }
- numCacheLevels=0;
- vendorName="unkown";
- processorName="unkown";
- }
- /// duplicates this object
- CpuInfo dup(){
- CpuInfo newInfo=cast(CpuInfo)this.classinfo.create();
- newInfo[]=this;
- return newInfo;
- }
- /// copies data from one object to the other
- CpuInfo opSliceAssign(CpuInfo other){
- assert(other.classinfo is this.classinfo);
- //datacache.length=other.datacache.length;
- datacache[]=other.datacache;
- numCacheLevels=other.numCacheLevels;
- vendorName=other.vendorName;
- processorName=other.processorName;
- return this;
- }
- /// sets unset values in cache info
- protected void cacheFixup(){
- if (datacache[0].size==0) {
- // Guess same as Pentium 1.
- datacache[0].size = 8;
- datacache[0].associativity = 2;
- datacache[0].lineSize = 32;
- datacache[0].wildGuess=true;
- }
- if (datacache[0].nThreadSharing==0){
- datacache[0].nThreadSharing=threadsPerCPU()/coresPerCPU();
- }
- numCacheLevels = 1;
- // And now fill up all the unused levels with full memory space.
- for (int i=1; i< datacache.length; ++i) {
- if (datacache[i].size==0) {
- // Set all remaining levels of cache equal to full address space.
- datacache[i].size = size_t.max/1024;
- datacache[i].associativity = 1;
- datacache[i].lineSize = datacache[i-1].lineSize;
- datacache[i].wildGuess=false;
- } else {
- numCacheLevels = i+1;
- }
- if (datacache[i].nThreadSharing==0){
- datacache[i].nThreadSharing=threadsPerCPU();
- }
- }
- }
- version(X86_CPU){
- /// utility method to get information about x86 processors
- final CpuInfoX86 x86(){
- if (auto res=cast(CpuInfoX86)this)
- return res;
- throw new Exception("non x86 cpu",__FILE__,__LINE__);
- }
- }
- version(PPC){
- /// utility method to get information about PPC processors
- final CpuInfoPpc ppc(){
- if (auto res=cast(CpuInfoPpc)this)
- return res;
- throw new Exception("non ppc cpu",__FILE__,__LINE__);
- }
- }
- version(ARM){
- /// utility method to get information about arm processors
- final CpuInfoArm arm(){
- if (auto res=cast(CpuInfoArm)this)
- return res;
- throw new Exception("non arm cpu",__FILE__,__LINE__);
- }
- }
- version(SPARC){
- /// utility method to get information about sparc processors
- final CpuInfoSparc sparc(){
- if (auto res=cast(CpuInfoSparc)this)
- return res;
- throw new Exception("non sparc cpu",__FILE__,__LINE__);
- }
- }
- }
- /// If optimizing for a particular processor, it is generally better
- /// to identify based on features rather than model. NOTE: Normally
- /// it's only worthwhile to optimise for the latest Intel and AMD CPU,
- /// with a backup for other CPUs.
- /// Pentium -- preferPentium1()
- /// PMMX -- + mmx()
- /// PPro -- default
- /// PII -- + mmx()
- /// PIII -- + mmx() + sse()
- /// PentiumM -- + mmx() + sse() + sse2()
- /// Pentium4 -- preferPentium4()
- /// PentiumD -- + isX86_64()
- /// Core2 -- default + isX86_64()
- /// AMD K5 -- preferPentium1()
- /// AMD K6 -- + mmx()
- /// AMD K6-II -- + mmx() + 3dnow()
- /// AMD K7 -- preferAthlon()
- /// AMD K8 -- + sse2()
- /// AMD K10 -- + isX86_64()
- /// Cyrix 6x86 -- preferPentium1()
- /// 6x86MX -- + mmx()
- final class CpuInfoX86: CpuInfo {
- private:
- bool probablyIntel; // true = _probably_ an Intel processor, might be faking
- bool probablyAMD; // true = _probably_ an AMD processor
- uint features; // mmx, sse, sse2, hyperthreading, etc
- uint miscfeatures; // sse3, etc.
- uint amdfeatures; // 3DNow!, mmxext, etc
- uint amdmiscfeatures; // sse4a, sse5, svm, etc
- uint maxCores;
- uint maxThreads;
- uint max_cpuid, max_extended_cpuid;
- public:
- // Note that this may indicate multi-core rather than hyperthreading.
- bool hyperThreadingBit() { return (features&HTT_BIT)!=0;}
- /// Processor type (vendor-dependent).
- /// This should be visible ONLY for display purposes.
- uint stepping, model, family;
- /// Does it have an x87 FPU on-chip?
- bool x87onChip() {return (features&FPU_BIT)!=0;}
- /// Is MMX supported?
- bool mmx() {return (features&MMX_BIT)!=0;}
- /// Is SSE supported?
- bool sse() {return (features&SSE_BIT)!=0;}
- /// Is SSE2 supported?
- bool sse2() {return (features&SSE2_BIT)!=0;}
- /// Is SSE3 supported?
- bool sse3() {return (miscfeatures&SSE3_BIT)!=0;}
- /// Is SSSE3 supported?
- bool ssse3() {return (miscfeatures&SSSE3_BIT)!=0;}
- /// Is SSE4.1 supported?
- bool sse41() {return (miscfeatures&SSE41_BIT)!=0;}
- /// Is SSE4.2 supported?
- bool sse42() {return (miscfeatures&SSE42_BIT)!=0;}
- /// Is SSE4a supported?
- bool sse4a() {return (amdmiscfeatures&SSE4A_BIT)!=0;}
- /// Is SSE5 supported?
- bool sse5() {return (amdmiscfeatures&SSE5_BIT)!=0;}
- /// Is AMD 3DNOW supported?
- bool amd3dnow() {return (amdfeatures&AMD_3DNOW_BIT)!=0;}
- /// Is AMD 3DNOW Ext supported?
- bool amd3dnowExt() {return (amdfeatures&AMD_3DNOW_EXT_BIT)!=0;}
- /// Are AMD extensions to MMX supported?
- bool amdMmx() {return (amdfeatures&AMD_MMX_BIT)!=0;}
- /// Is fxsave/fxrstor supported?
- bool hasFxsr() {return (features&FXSR_BIT)!=0;}
- /// Is cmov supported?
- bool hasCmov() {return (features&CMOV_BIT)!=0;}
- /// Is rdtsc supported?
- bool hasRdtsc() {return (features&TIMESTAMP_BIT)!=0;}
- /// Is cmpxchg8b supported?
- bool hasCmpxchg8b() {return (features&CMPXCHG8B_BIT)!=0;}
- /// Is cmpxchg8b supported?
- bool hasCmpxchg16b() {return (miscfeatures&CMPXCHG16B_BIT)!=0;}
- /// Is 3DNow prefetch supported?
- bool has3dnowPrefetch()
- {return (amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;}
- /// Are LAHF and SAHF supported in 64-bit mode?
- bool hasLahfSahf() {return (amdmiscfeatures&LAHFSAHF_BIT)!=0;}
- /// Is POPCNT supported?
- bool hasPopcnt() {return (miscfeatures&POPCNT_BIT)!=0;}
- /// Is LZCNT supported?
- bool hasLzcnt() {return (amdmiscfeatures&LZCNT_BIT)!=0;}
- /// Is this an Intel64 or AMD 64?
- bool isX86_64() {return (amdfeatures&AMD64_BIT)!=0;}
-
- /// Is this an IA64 (Itanium) processor?
- bool isItanium() { return (features&IA64_BIT)!=0; }
- /// Is hyperthreading supported?
- bool hyperThreading() { return maxThreads>maxCores; }
- /// Returns number of threads per CPU
- uint threadsPerCPU() { return maxThreads; }
- /// Returns number of cores in CPU
- uint coresPerCPU() { return maxCores; }
-
- /// Optimisation hints for assembly code.
- /// For forward compatibility, the CPU is compared against different
- /// microarchitectures. For 32-bit X86, comparisons are made against
- /// the Intel PPro/PII/PIII/PM family.
- ///
- /// The major 32-bit x86 microarchitecture 'dynasties' have been:
- /// (1) Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2).
- /// (2) AMD Athlon (K7, K8, K10).
- /// (3) Intel NetBurst (Pentium 4, Pentium D).
- /// (4) In-order Pentium (Pentium1, PMMX)
- /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta,
- /// Cyrix, Rise) were mostly in-order.
- /// Some new processors do not fit into the existing categories:
- /// Intel Atom 230/330 (family 6, model 0x1C) is an in-order core.
- /// Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core.
- ///
- /// Within each dynasty, the optimisation techniques are largely
- /// identical (eg, use instruction pairing for group 4). Major
- /// instruction set improvements occur within each group.
-
- /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code?
- bool preferAthlon() { return probablyAMD && family >=6; }
- /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code?
- bool preferPentium4() { return probablyIntel && family == 0xF; }
- /// Does this CPU perform better on Pentium I code than Pentium Pro code?
- bool preferPentium1() { return family < 6 || (family==6 && model < 0xF && !probablyIntel); }
- this(){
- super();
- }
- override void clear(){
- super.clear();
- stepping=0;
- model=0;
- family=0;
- probablyIntel=false;
- probablyAMD=false;
- vendorName="UnknownX86";
- processorName="UnknownX86";
- features=0;
- miscfeatures=0;
- amdmiscfeatures=0;
- amdfeatures=0;
- maxCores=1;
- maxThreads=1;
- }
- /// copies data from one object to the other
- CpuInfoX86 opSliceAssign(CpuInfo o){
- auto other=cast(CpuInfoX86)o;
- assert(other !is null);
- super.opSliceAssign(o);
- stepping=other.stepping;
- model=other.model;
- family=other.family;
- probablyIntel=other.probablyIntel;
- probablyAMD=other.probablyAMD;
- vendorName=other.vendorName;
- processorName=other.processorName;
- features=other.features;
- miscfeatures=other.miscfeatures;
- amdmiscfeatures=other.amdmiscfeatures;
- amdfeatures=other.amdfeatures;
- maxCores=other.maxCores;
- maxThreads=other.maxThreads;
- return this;
- }
-
- /// auto config for current cpu
- protected override bool getCpuData(){
- if (hasCPUID()) {
- cpuidX86();
- cacheFixup();
- return true;
- } else {
- // it's a 386 or 486, or a Cyrix 6x86.
- //Probably still has an external cache.
- clear();
- cacheFixup();
- return false;
- }
- }
- // feature flags CPUID1_EDX
- enum : uint
- {
- FPU_BIT = 1,
- TIMESTAMP_BIT = 1<<4, // rdtsc
- MDSR_BIT = 1<<5, // RDMSR/WRMSR
- CMPXCHG8B_BIT = 1<<8,
- CMOV_BIT = 1<<15,
- MMX_BIT = 1<<23,
- FXSR_BIT = 1<<24,
- SSE_BIT = 1<<25,
- SSE2_BIT = 1<<26,
- HTT_BIT = 1<<28,
- IA64_BIT = 1<<30
- }
- // feature flags misc CPUID1_ECX
- enum : uint
- {
- SSE3_BIT = 1,
- PCLMULQDQ_BIT = 1<<1, // from AVX
- MWAIT_BIT = 1<<3,
- SSSE3_BIT = 1<<9,
- FMA_BIT = 1<<12, // from AVX
- CMPXCHG16B_BIT = 1<<13,
- SSE41_BIT = 1<<19,
- SSE42_BIT = 1<<20,
- POPCNT_BIT = 1<<23,
- AES_BIT = 1<<25, // AES instructions from AVX
- OSXSAVE_BIT = 1<<27, // Used for AVX
- AVX_BIT = 1<<28
- }
- /+
- version(X86_64) {
- bool hasAVXinHardware() {
- // This only indicates hardware support, not OS support.
- return (miscfeatures&AVX_BIT) && (miscfeatures&OSXSAVE_BIT);
- }
- // Is AVX supported (in both hardware & OS)?
- bool Avx() {
- if (!hasAVXinHardware()) return false;
- // Check for OS support
- uint xfeatures;
- asm {mov ECX, 0; xgetbv; mov xfeatures, EAX; }
- return (xfeatures&0x6)==6;
- }
- bool hasAvxFma() {
- if (!AVX()) return false;
- return (features&FMA_BIT)!=0;
- }
- }
- +/
- // AMD feature flags CPUID80000001_EDX
- enum : uint
- {
- AMD_MMX_BIT = 1<<22,
- // FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions.
- FFXSR_BIT = 1<<25,
- PAGE1GB_BIT = 1<<26, // support for 1GB pages
- RDTSCP_BIT = 1<<27,
- AMD64_BIT = 1<<29,
- AMD_3DNOW_EXT_BIT = 1<<30,
- AMD_3DNOW_BIT = 1<<31
- }
- // AMD misc feature flags CPUID80000001_ECX
- enum : uint
- {
- LAHFSAHF_BIT = 1,
- LZCNT_BIT = 1<<5,
- SSE4A_BIT = 1<<6,
- AMD_3DNOW_PREFETCH_BIT = 1<<8,
- SSE5_BIT = 1<<11
- }
- version(Really_D_InlineAsm_X86) {
- // Note that this code will also work for Itanium, after changing the
- // register names in the asm code.
- // CPUID2: "cache and tlb information"
- void getcacheinfoCPUID2()
- {
- // CPUID2 is a dog's breakfast. What was Intel thinking???
- // We are only interested in the data caches
- void decipherCpuid2(ubyte x) {
- if (x==0) return;
- // Values from http://www.sandpile.org/ia32/cpuid.htm.
- // Includes Itanium and non-Intel CPUs.
- //
- ubyte [] ids = [
- 0x0A, 0x0C, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68,
- // level 2 cache
- 0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F,
- 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E,
- 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81,
- // level 3 cache
- 0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D
- ];
- uint [] sizes = [
- 8, 16, 32, 16, 24, 8, 16, 32,
- 128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512,
- 256, 512, 1024, 2048, 512, 1024, 4096, 6*1024,
- 128, 192, 128, 256, 384, 512, 3072, 512, 128,
- 512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024
- ];
- // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative
- ubyte [] ways = [
- 2, 4, 8, 8, 6, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2,
- 8, 8, 8, 8, 4, 8, 16, 24,
- 4, 6, 2, 4, 6, 4, 12, 8, 8,
- 4, 8, 8, 8, 4, 8, 12, 16, 12, 16
- ];
- enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 }
- for (int i=0; i< ids.length; ++i) {
- if (x==ids[i]) {
- int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2;
- if (x==0x49 && family==0xF && model==0x6) level=2;
- datacache[level].size=sizes[i];
- datacache[level].associativity=ways[i];
- if (level == 3 || x==0x2C || (x>=0x48 && x<=0x80)
- || x==0x86 || x==0x87
- || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E) ){
- datacache[level].lineSize = 64;
- } else datacache[level].lineSize = 32;
- }
- }
- }
- uint[4] a;
- bool firstTime = true;
- // On a multi-core system, this could theoretically fail, but it's only used
- // for old single-core CPUs.
- uint numinfos = 1;
- do {
- asm {
- mov EAX, 2;
- cpuid;
- mov a, EAX;
- mov a+4, EBX;
- mov a+8, ECX;
- mov a+12, EDX;
- }
- if (firstTime) {
- if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) {
- // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080.
- // These are NOT standard Intel values
- // (TLB = 32 entry, 4 way associative, 4K pages)
- // (L1 cache = 16K, 4way, linesize16)
- datacache[0].size=8;
- datacache[0].associativity=4;
- datacache[0].lineSize=16;
- return;
- }
- // lsb of a is how many times to loop.
- numinfos = a[0] & 0xFF;
- // and otherwise it should be ignored
- a[0] &= 0xFFFF_FF00;
- firstTime = false;
- }
- for (int c=0; c<4;++c) {
- // high bit set == no info.
- if (a[c] & 0x8000_0000) continue;
- decipherCpuid2(cast(ubyte)(a[c] & 0xFF));
- decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF));
- decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF));
- decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF));
- }
- } while (--numinfos);
- }
- // CPUID4: "Deterministic cache parameters" leaf
- void getcacheinfoCPUID4()
- {
- int cachenum = 0;
- for(;;) {
- uint a, b, number_of_sets;
- asm {
- mov EAX, 4;
- mov ECX, cachenum;
- cpuid;
- mov a, EAX;
- mov b, EBX;
- mov number_of_sets, ECX;
- }
- ++cachenum;
- if ((a&0x1F)==0) break; // no more caches
- uint numthreads = ((a>>14) & 0xFFF) + 1;
- uint numcores = ((a>>26) & 0x3F) + 1;
- if (numcores > maxCores) maxCores = numcores;
- if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches
-
- ++number_of_sets;
- ubyte level = cast(ubyte)(((a>>5)&7)-1);
- if (level > datacache.length) continue; // ignore deep caches
- datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1);
- datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size
- uint line_partitions = ((b >> 12)& 0x3FF) + 1;
- // Size = number of sets * associativity * cachelinesize * linepartitions
- // and must convert to Kb, also dividing by the number of cores.
- ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets *
- datacache[level].associativity : number_of_sets;
- datacache[level].size = cast(uint)(
- (sz * datacache[level].lineSize * line_partitions ) / (numcores *1024));
- if (level == 0 && (a&0xF)==3) {
- // Halve the size for unified L1 caches
- datacache[level].size/=2;
- }
- }
- }
- // CPUID8000_0005 & 6
- void getAMDcacheinfo()
- {
- uint c5, c6, d6;
- asm {
- mov EAX, 0x8000_0005; // L1 cache
- cpuid;
- // EAX has L1_TLB_4M.
- // EBX has L1_TLB_4K
- // EDX has L1 instruction cache
- mov c5, ECX;
- }
- datacache[0].size = ( (c5>>24) & 0xFF);
- datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF);
- datacache[0].lineSize = c5 & 0xFF;
- if (max_extended_cpuid >= 0x8000_0006) {
- // AMD K6-III or K6-2+ or later.
- ubyte numcores = 1;
- if (max_extended_cpuid >=0x8000_0008) {
- asm {
- mov EAX, 0x8000_0008;
- cpuid;
- mov numcores, CL;
- }
- ++numcores;
- if (numcores>maxCores) maxCores = numcores;
- }
- asm {
- mov EAX, 0x8000_0006; // L2/L3 cache
- cpuid;
- mov c6, ECX; // L2 cache info
- mov d6, EDX; // L3 cache info
- }
-
- ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ];
- datacache[1].size = (c6>>16) & 0xFFFF;
- datacache[1].associativity = assocmap[(c6>>12)&0xF];
- datacache[1].lineSize = c6 & 0xFF;
-
- // The L3 cache value is TOTAL, not per core.
- datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1.
- datacache[2].associativity = assocmap[(d6>>12)&0xF];
- datacache[2].lineSize = d6 & 0xFF;
- }
- }
- void cpuidX86()
- {
- char [12] vendorID;
- char [48] processorNameBuffer;
- uint m_1, m_2;
- char * venptr = vendorID.ptr;
- asm {
- mov EAX, 0;
- cpuid;
- mov m_1, EAX;
- mov EAX, venptr;
- mov [EAX], EBX;
- mov [EAX + 4], EDX;
- mov [EAX + 8], ECX;
- mov EAX, 0x8000_0000;
- cpuid;
- mov m_2, EAX;
- }
- max_cpuid=m_1;
- max_extended_cpuid=m_2;
-
- probablyIntel = vendorID == "GenuineIntel";
- probablyAMD = vendorID == "AuthenticAMD";
- vendorName=vendorID.dup;
- uint a, b, c, d;
- uint apic = 0; // brand index, apic id
- asm {
- mov EAX, 1; // model, stepping
- cpuid;
- mov a, EAX;
- mov apic, EBX;
- mov m_1, ECX;
- mov m_2, EDX;
- }
- miscfeatures=m_1;
- features=m_2;
- amdfeatures = 0;
- amdmiscfeatures = 0;
- if (max_extended_cpuid >= 0x8000_0001) {
- asm {
- mov EAX, 0x8000_0001;
- cpuid;
- mov m_1, ECX;
- mov m_2, EDX;
- }
- amdmiscfeatures=m_1;
- amdfeatures=m_2;
- }
- // Try to detect fraudulent vendorIDs
- if (amd3dnow) probablyIntel = false;
-
- stepping = a & 0xF;
- uint fbase = (a >> 8) & 0xF;
- uint mbase = (a >> 4) & 0xF;
- family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase;
- model = ((fbase == 0xF) || (fbase == 6 && probablyIntel) ) ?
- mbase + ((a >> 12) & 0xF0) : mbase;
-
- if (!probablyIntel && max_extended_cpuid >= 0x8000_0008) {
- // determine max number of cores for AMD
- asm {
- mov EAX, 0x8000_0008;
- cpuid;
- mov c, ECX;
- }
- uint apicsize = (c>>12) & 0xF;
- if (apicsize == 0) {
- // use legacy method
- if (hyperThreadingBit) maxCores = c & 0xFF;
- else maxCores = 1;
- } else {
- // maxcores = 2^ apicsize
- maxCores = 1;
- while (apicsize) { maxCores<<=1; --apicsize; }
- }
- }
-
- if (max_extended_cpuid >= 0x8000_0004) {
- char *procptr = processorNameBuffer.ptr;
- asm {
- push ESI;
- mov ESI, procptr;
- mov EAX, 0x8000_0002;
- cpuid;
- mov [ESI], EAX;
- mov [ESI+4], EBX;
- mov [ESI+8], ECX;
- mov [ESI+12], EDX;
- mov EAX, 0x8000_0003;
- cpuid;
- mov [ESI+16], EAX;
- mov [ESI+20], EBX;
- mov [ESI+24], ECX;
- mov [ESI+28], EDX;
- mov EAX, 0x8000_0004;
- cpuid;
- mov [ESI+32], EAX;
- mov [ESI+36], EBX;
- mov [ESI+40], ECX;
- mov [ESI+44], EDX;
- pop ESI;
- }
- // Intel P4 and PM pad at front with spaces.
- // Other CPUs pad at end with nulls.
- int start = 0, end = 0;
- while (processorNameBuffer[start] == ' ') { ++start; }
- while (processorNameBuffer[$-end-1] == 0) { ++end; }
- processorName = processorNameBuffer[start..$-end].dup;
- } else {
- processorName = "Unknown CPU";
- }
- // Determine cache sizes
-
- // Intel docs specify that they return 0 for 0x8000_0005.
- // AMD docs do not specify the behaviour for 0004 and 0002.
- // Centaur/VIA and most other manufacturers use the AMD method,
- // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2!
- // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour
- // for CPUID80000005. But Geode GX uses the AMD method
-
- // Deal with idiotic Geode GX1 - make it same as MediaGX MMX.
- if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) {
- max_extended_cpuid = 0x8000_0004;
- }
- // Therefore, we try the AMD method unless it's an Intel chip.
- // If we still have no info, try the Intel methods.
- datacache[0].size = 0;
- if (max_cpuid<2 || !probablyIntel) {
- if (max_extended_cpuid >= 0x8000_0005) {
- getAMDcacheinfo();
- } else if (probablyAMD) {
- // According to AMDProcRecognitionAppNote, this means CPU
- // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4)
- // Am5x86 has 16Kb 4-way unified data & code cache.
- datacache[0].size = 8;
- datacache[0].associativity = 4;
- datacache[0].lineSize = 32;
- } else {
- // Some obscure CPU.
- // Values for Cyrix 6x86MX (family 6, model 0)
- datacache[0].size = 64;
- datacache[0].associativity = 4;
- datacache[0].lineSize = 32;
- }
- }
- if ((datacache[0].size == 0) && max_cpuid>=4) {
- getcacheinfoCPUID4();
- }
- if ((datacache[0].size == 0) && max_cpuid>=2) {
- getcacheinfoCPUID2();
- }
- if (datacache[0].size == 0) {
- // Pentium, PMMX, late model 486, or an obscure CPU
- if (mmx) { // Pentium MMX. Also has 8kB code cache.
- datacache[0].size = 16;
- datacache[0].associativity = 4;
- datacache[0].lineSize = 32;
- } else { // Pentium 1 (which also has 8kB code cache)
- // or 486.
- // Cyrix 6x86: 16, 4way, 32 linesize
- datacache[0].size = 8;
- datacache[0].associativity = 2;
- datacache[0].lineSize = 32;
- }
- }
- if (hyperThreadingBit) maxThreads = (apic>>>16) & 0xFF;
- else maxThreads = maxCores;
- }
- // Return true if the cpuid instruction is supported.
- // BUG(WONTFIX): Doesn't work for Cyrix 6x86 and 6x86L.
- bool hasCPUID()
- {
- uint flags;
- asm {
- pushfd;
- pop EAX;
- mov flags, EAX;
- xor EAX, 0x0020_0000;
- push EAX;
- popfd;
- pushfd;
- pop EAX;
- xor flags, EAX;
- }
- return (flags & 0x0020_0000) !=0;
- }
- } else { // inline asm X86
- bool hasCPUID() { return false; }
- void cpuidX86()
- {
- datacache[0].size = 8;
- datacache[0].associativity = 2;
- datacache[0].lineSize = 32;
- }
- }
- }
- final class CpuInfoPpc: CpuInfo{
- bool hasfloatingpoint; // Floating Point Instructions
- bool hasaltivec; // AltiVec Instructions
- bool hasgraphicsops; // Graphics Operations
- bool has64bitops; // 64-bit Instructions
- bool hasfsqrt; // HW Floating Point Square Root Instruction
- bool hasstfiwx; // Store Floating Point as Integer Word Indexed Instructions
- bool hasdcba; // Data Cache Block Allocate Instruction
- bool hasdatastreams; // Data Streams Instructions
- bool hasdcbtstreams; // Data Cache Block Touch Steams Instruction Form
-
- this(){
- super();
- }
- override void clear(){
- super.clear();
- vendorName="Unknown";
- processorName="UnknownPPC";
- hasfloatingpoint= false;
- hasaltivec = false;
- hasgraphicsops = false;
- has64bitops = false;
- hasfsqrt = false;
- hasstfiwx = false;
- hasdcba = false;
- hasdatastreams = false;
- hasdcbtstreams = false;
- cacheFixup();
- }
- override CpuInfoPpc opSliceAssign(CpuInfo o){
- auto other=cast(CpuInfoPpc)o;
- assert(other !is null);
- super.opSliceAssign(o);
- hasfloatingpoint= other.hasfloatingpoint;
- hasaltivec = other.hasaltivec;
- hasgraphicsops = other.hasgraphicsops;
- has64bitops = other.has64bitops;
- hasfsqrt = other.hasfsqrt;
- hasstfiwx = other.hasstfiwx;
- hasdcba = other.hasdcba;
- hasdatastreams = other.hasdatastreams;
- hasdcbtstreams = other.hasdcbtstreams;
- return this;
- }
- enum PPC_Cputype:int { PPC601, PPC603, PPC603E, PPC604,
- PPC604E, PPC620, PPCG3, PPCG4, PPCG5 };
- // TODO: Implement this function with OS support
- void cpuidPPC(PPC_Cputype cputype)
- {
- // TODO:
- // asm { mfpvr; } returns the CPU version but unfortunately it can
- // only be used in kernel mode. So OS support is required.
-
- // 601 has a 8KB combined data & code L1 cache.
- uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64];
- ubyte ways[] = [8, 2, 4, 4, 4, 8, 8, 8, 8];
- uint L2size[]= [0, 0, 0, 0, 0, 0, 0, 256, 512];
- uint L3size[]= [0, 0, 0, 0, 0, 0, 0, 2048, 0];
-
- datacache[0].size = sizes[cputype];
- datacache[0].associativity = ways[cputype];
- datacache[0].lineSize = (cputype==PPC_Cputype.PPCG5)? 128 :
- (cputype == PPC_Cputype.PPC620 || cputype == PPC_Cputype.PPCG3)? 64 : 32;
- datacache[1].size = L2size[cputype];
- datacache[2].size = L3size[cputype];
- datacache[1].lineSize = datacache[0].lineSize;
- datacache[2].lineSize = datacache[0].lineSize;
- cacheFixup();
- }
-
- }
- /// this should be expanded by someone using sparc
- final class CpuInfoSparc: CpuInfo{
- this(){ super(); }
- override void clear(){
- super.clear();
- vendorName="unknown";
- processorName="unknownSparc";
- }
- override CpuInfoSparc opSliceAssign(CpuInfo o){
- auto other=cast(CpuInfoSparc)o;
- assert(other !is null);
- super.opSliceAssign(o);
- return this;
- }
- enum Sparc_Cputype:int {
- UltraSparcIIi, UltraSparcIII, UltraSparcIIIi,UltraSparcIV,
- UltraSparcIVplus, Sparc64V
- }
- // TODO: Implement this function with OS support
- void cpuidSparc(Sparc_Cputype cputype)
- {
- size_t l1,l2;
- ubyte way1,way2;
- switch(cputype){
- case Sparc_Cputype.UltraSparcIIi:
- l1 = 16; way1=2; l2 = 512; way2=4;
- break;
- case Sparc_Cputype.UltraSparcIII:
- l1 = 64; way1=4; l2= 4096; way2=4; // or l2=8192;
- break;
- case Sparc_Cputype.UltraSparcIIIi:
- l1 = 64; way1=4; l2= 1024; way2=4;
- break;
- case Sparc_Cputype.UltraSparcIV:
- l1 = 64; way1=4; l2= 16*1024; way2=1;
- break;
- case Sparc_Cputype.UltraSparcIVplus:
- l1 = 64; way1=4; l2 = 2048; way2=1;
- datacache[3].size=32*1024;
- break;
- case Sparc_Cputype.Sparc64V:
- l1 = 128; way1=2; l2 = 4096; way2=4;
- break;
- default:
- throw new Exception("invalid cputype",__FILE__,__LINE__);
- }
- datacache[1].size=l1;
- datacache[1].associativity=way1;
- datacache[2].size=l2;
- datacache[2].associativity=way2;
- cacheFixup();
- }
- }