PageRenderTime 74ms CodeModel.GetById 36ms RepoModel.GetById 1ms app.codeStats 0ms

/src/libtorque/hardware/x86cpuid.c

https://github.com/dankamongmen/libtorque
C | 1845 lines | 1666 code | 85 blank | 94 comment | 220 complexity | 55d4376b82afbdc3179a898c87b2a6fc MD5 | raw file
Possible License(s): Apache-2.0
  1. #include <stdint.h>
  2. #include <string.h>
  3. #include <stdlib.h>
  4. #include <limits.h>
  5. #include <libtorque/internal.h>
  6. #include <libtorque/hardware/arch.h>
  7. #include <libtorque/hardware/x86cpuid.h>
  8. static int
  9. cpuid_available(void){
  10. const unsigned long flag = 0x200000;
  11. unsigned long f1, f2;
  12. __asm__ volatile(
  13. "pushf\n\t"
  14. "pushf\n\t"
  15. "pop %0\n\t"
  16. "mov %0,%1\n\t"
  17. "xor %2,%0\n\t"
  18. "push %0\n\t"
  19. "popf\n\t"
  20. "pushf\n\t"
  21. "pop %0\n\t"
  22. "popf\n\t"
  23. : "=&r" (f1), "=&r" (f2)
  24. : "ir" (flag)
  25. );
  26. return ((f1 ^ f2) & flag) != 0;
  27. }
  28. // By far, the best reference here is Intel Application Note 485 (the CPUID
  29. // guide). Also useful are the Intel and AMD Architecture Software Developer's
  30. // Manuals. http://faydoc.tripod.com/cpu/cpuid.htm is pretty useful, as is
  31. // http://www.ee.nuigalway.ie/mirrors/www.sandpile.org/ia32/cpuid.htm.
  32. typedef enum {
  33. CPUID_MAX_SUPPORT = 0x00000000,
  34. CPUID_CPU_VERSION = 0x00000001,
  35. // these are primarily Intel
  36. CPUID_STANDARD_CPUCONF = 0x00000002, // sundry cpu data
  37. CPUID_STANDARD_PSNCONF = 0x00000003, // processor serial
  38. CPUID_STANDARD_CACHECONF = 0x00000004, // cache config
  39. CPUID_STANDARD_MWAIT = 0x00000005, // MWAIT/MONITOR
  40. CPUID_STANDARD_POWERMAN = 0x00000006, // power management
  41. CPUID_STANDARD_DIRECTCACHE = 0x00000009, // DCA access setup
  42. CPUID_STANDARD_PERFMON = 0x0000000a, // performance ctrs
  43. CPUID_STANDARD_TOPOLOGY = 0x0000000b, // topology, x2apic
  44. CPUID_STANDARD_XSTATE = 0x0000000d, // XSAVE/XRSTOR
  45. // these are primarily AMD
  46. CPUID_EXTENDED_MAX_SUPPORT = 0x80000000, // max ext. level
  47. CPUID_EXTENDED_CPU_VERSION = 0x80000001, // amd cpu sundries
  48. CPUID_EXTENDED_CPU_NAME1 = 0x80000002, // proc name part1
  49. CPUID_EXTENDED_CPU_NAME2 = 0x80000003, // proc name part2
  50. CPUID_EXTENDED_CPU_NAME3 = 0x80000004, // proc name part3
  51. CPUID_AMD_L1CACHE_TLB = 0x80000005, // l1, tlb0 (AMD)
  52. CPUID_EXTENDED_L23CACHE_TLB = 0x80000006, // l2,3, tlb1
  53. CPUID_EXTENDED_ENHANCEDPOWER = 0x80000006, // epm support
  54. CPUID_EXTENDED_LMADDRICORE = 0x80000008, // longmode addr info/corecount
  55. CPUID_EXTENDED_GBTLB = 0x80000019, // 1GB tlbs
  56. CPUID_EXTENDED_CACHEPROP = 0x8000001D, // extended cache properties
  57. CPUID_EXTENDED_APICID = 0x8000001E, // extended APIC IC
  58. CPUID_EXTENDED
  59. } cpuid_class;
  60. // Uses all four primary general-purpose 32-bit registers (e[abcd]x), returning
  61. // these in gpregs[0123]. Secondary parameters are assumed to go in ECX.
  62. static inline void
  63. cpuid(cpuid_class level,uint32_t subparam,uint32_t *gpregs){
  64. __asm__ __volatile__(
  65. #ifdef __x86_64__
  66. "cpuid\n\t" // serializing instruction
  67. : "=a" (gpregs[0]), "=b" (gpregs[1]),
  68. "=c" (gpregs[2]), "=d" (gpregs[3])
  69. : "0" (level), "2" (subparam)
  70. #else
  71. "pushl %%ebx\n\t" // can't assume use of ebx on 32-bit with PIC
  72. "cpuid\n\t" // serializing instruction
  73. "movl %%ebx,%[spill]\n\t"
  74. "popl %%ebx\n\t"
  75. : "=a" (gpregs[0]), [spill] "=S" (gpregs[1]),
  76. "=c" (gpregs[2]), "=d" (gpregs[3])
  77. : "a" (level), "c" (subparam)
  78. #endif
  79. );
  80. }
  81. // These are largely taken from the Intel document; need check AMD's FIXME
  82. struct feature_flags {
  83. // CPUID 00000001
  84. int dca; // direct cache access (18)
  85. int x2apic; // x2apic (21)
  86. int pse; // page size extension (3)
  87. int pae; // physical address extension (6)
  88. int pse36; // 36-bit page size extension (17)
  89. int ht; // physical support for hyperthreading (28)
  90. // CPUID 80000001 EDX
  91. int gbpt; // 1GB/4-level page table support (28)
  92. int lme; // long mode enable (26)
  93. };
  94. typedef struct known_x86_vendor {
  95. const char *signet;
  96. int (*memfxn)(uint32_t,const struct feature_flags *,torque_cput *);
  97. int (*topfxn)(uint32_t,const struct feature_flags *,torque_cput *);
  98. } known_x86_vendor;
  99. static int id_amd_caches(uint32_t,const struct feature_flags *,torque_cput *);
  100. static int id_via_caches(uint32_t,const struct feature_flags *,torque_cput *);
  101. static int id_intel_caches(uint32_t,const struct feature_flags *,torque_cput *);
  102. static int id_amd_topology(uint32_t,const struct feature_flags *,torque_cput *);
  103. static int id_intel_topology(uint32_t,const struct feature_flags *,torque_cput *);
  104. // There's also: (Collect them all! Impress your friends!)
  105. // " UMC UMC UMC" "CyriteadxIns" "NexGivenenDr"
  106. // "RiseRiseRise" "GenuMx86ineT" "Geod NSCe by"
  107. static const known_x86_vendor vendors[] = {
  108. { .signet = "GenuntelineI",
  109. .memfxn = id_intel_caches,
  110. .topfxn = id_intel_topology,
  111. },
  112. { .signet = "AuthcAMDenti",
  113. .memfxn = id_amd_caches,
  114. .topfxn = id_amd_topology,
  115. },
  116. { .signet = "CentaulsaurH",
  117. .memfxn = id_via_caches,
  118. .topfxn = NULL,
  119. },
  120. };
  121. // vendstr should be 12 bytes corresponding to EBX, ECX, EDX post-CPUID
  122. static const known_x86_vendor *
  123. lookup_vendor(const uint32_t *vendstr){
  124. unsigned z;
  125. for(z = 0 ; z < sizeof(vendors) / sizeof(*vendors) ; ++z){
  126. if(memcmp(vendstr,vendors[z].signet,sizeof(*vendstr) * 3) == 0){
  127. return vendors + z;
  128. }
  129. }
  130. return NULL;
  131. }
  132. static inline uint32_t
  133. identify_extended_cpuid(void){
  134. uint32_t gpregs[4];
  135. cpuid(CPUID_EXTENDED_MAX_SUPPORT,0,gpregs);
  136. return gpregs[0];
  137. }
  138. typedef struct intel_cache_descriptor {
  139. unsigned descriptor;
  140. unsigned linesize;
  141. uintmax_t totalsize;
  142. unsigned associativity;
  143. unsigned level;
  144. int memtype;
  145. } intel_cache_descriptor;
  146. static const intel_cache_descriptor intel_cache_descriptors[] = {
  147. { .descriptor = 0x06,
  148. .linesize = 32,
  149. .totalsize = 8 * 1024,
  150. .associativity = 4,
  151. .level = 1,
  152. .memtype = MEMTYPE_CODE,
  153. },
  154. { .descriptor = 0x08,
  155. .linesize = 32,
  156. .totalsize = 16 * 1024,
  157. .associativity = 4,
  158. .level = 1,
  159. .memtype = MEMTYPE_CODE,
  160. },
  161. { .descriptor = 0x09,
  162. .linesize = 64,
  163. .totalsize = 32 * 1024,
  164. .associativity = 4,
  165. .level = 1,
  166. .memtype = MEMTYPE_CODE,
  167. },
  168. { .descriptor = 0x0a,
  169. .linesize = 32,
  170. .totalsize = 8 * 1024,
  171. .associativity = 2,
  172. .level = 1,
  173. .memtype = MEMTYPE_DATA,
  174. },
  175. { .descriptor = 0x0c,
  176. .linesize = 32,
  177. .totalsize = 16 * 1024,
  178. .associativity = 4,
  179. .level = 1,
  180. .memtype = MEMTYPE_DATA,
  181. },
  182. { .descriptor = 0x0d, // ECC
  183. .linesize = 64,
  184. .totalsize = 16 * 1024,
  185. .associativity = 4,
  186. .level = 1,
  187. .memtype = MEMTYPE_DATA,
  188. },
  189. // IAN 485 describes this as an MLC cache. This doesn't mean
  190. // Multi-Level Cell (as in NAND flash technology), but
  191. // "Mid-Level Cache". This essentially means to expect an L3.
  192. { .descriptor = 0x21,
  193. .linesize = 64,
  194. .totalsize = 256 * 1024,
  195. .associativity = 8,
  196. .level = 2,
  197. .memtype = MEMTYPE_UNIFIED,
  198. },
  199. { .descriptor = 0x22,
  200. .linesize = 64,
  201. .totalsize = 512 * 1024,
  202. .associativity = 4,
  203. .level = 3,
  204. .memtype = MEMTYPE_UNIFIED, // sectored
  205. },
  206. { .descriptor = 0x23,
  207. .linesize = 64,
  208. .totalsize = 1024 * 1024,
  209. .associativity = 8,
  210. .level = 3,
  211. .memtype = MEMTYPE_UNIFIED, // sectored
  212. },
  213. { .descriptor = 0x25,
  214. .linesize = 64,
  215. .totalsize = 2 * 1024 * 1024,
  216. .associativity = 8,
  217. .level = 3,
  218. .memtype = MEMTYPE_UNIFIED, // sectored
  219. },
  220. { .descriptor = 0x29,
  221. .linesize = 64,
  222. .totalsize = 4 * 1024 * 1024,
  223. .associativity = 8,
  224. .level = 3,
  225. .memtype = MEMTYPE_UNIFIED, // sectored
  226. },
  227. { .descriptor = 0x2c,
  228. .linesize = 64,
  229. .totalsize = 32 * 1024,
  230. .associativity = 8,
  231. .level = 1,
  232. .memtype = MEMTYPE_DATA, // sectored
  233. },
  234. { .descriptor = 0x30,
  235. .linesize = 64,
  236. .totalsize = 32 * 1024,
  237. .associativity = 8,
  238. .level = 1,
  239. .memtype = MEMTYPE_CODE,
  240. },
  241. { .descriptor = 0x39,
  242. .linesize = 64,
  243. .totalsize = 128 * 1024,
  244. .associativity = 4,
  245. .level = 2,
  246. .memtype = MEMTYPE_UNIFIED, // sectored
  247. },
  248. { .descriptor = 0x3a,
  249. .linesize = 64,
  250. .totalsize = 192 * 1024,
  251. .associativity = 6,
  252. .level = 2,
  253. .memtype = MEMTYPE_UNIFIED, // sectored
  254. },
  255. { .descriptor = 0x3b,
  256. .linesize = 64,
  257. .totalsize = 128 * 1024,
  258. .associativity = 2,
  259. .level = 2,
  260. .memtype = MEMTYPE_UNIFIED, // sectored
  261. },
  262. { .descriptor = 0x3c,
  263. .linesize = 64,
  264. .totalsize = 256 * 1024,
  265. .associativity = 4,
  266. .level = 2,
  267. .memtype = MEMTYPE_UNIFIED, // sectored
  268. },
  269. { .descriptor = 0x3d,
  270. .linesize = 64,
  271. .totalsize = 384 * 1024,
  272. .associativity = 6,
  273. .level = 2,
  274. .memtype = MEMTYPE_UNIFIED, // sectored
  275. },
  276. { .descriptor = 0x3e,
  277. .linesize = 64,
  278. .totalsize = 512 * 1024,
  279. .associativity = 4,
  280. .level = 2,
  281. .memtype = MEMTYPE_UNIFIED, // sectored
  282. },
  283. { .descriptor = 0x41,
  284. .linesize = 32,
  285. .totalsize = 128 * 1024,
  286. .associativity = 4,
  287. .level = 2,
  288. .memtype = MEMTYPE_UNIFIED,
  289. },
  290. { .descriptor = 0x42,
  291. .linesize = 32,
  292. .totalsize = 256 * 1024,
  293. .associativity = 4,
  294. .level = 2,
  295. .memtype = MEMTYPE_UNIFIED,
  296. },
  297. { .descriptor = 0x43,
  298. .linesize = 32,
  299. .totalsize = 512 * 1024,
  300. .associativity = 4,
  301. .level = 2,
  302. .memtype = MEMTYPE_UNIFIED,
  303. },
  304. { .descriptor = 0x44,
  305. .linesize = 32,
  306. .totalsize = 1024 * 1024,
  307. .associativity = 4,
  308. .level = 2,
  309. .memtype = MEMTYPE_UNIFIED,
  310. },
  311. { .descriptor = 0x45,
  312. .linesize = 32,
  313. .totalsize = 2 * 1024 * 1024,
  314. .associativity = 4,
  315. .level = 2,
  316. .memtype = MEMTYPE_UNIFIED,
  317. },
  318. { .descriptor = 0x46,
  319. .linesize = 64,
  320. .totalsize = 4 * 1024 * 1024,
  321. .associativity = 4,
  322. .level = 3,
  323. .memtype = MEMTYPE_UNIFIED,
  324. },
  325. { .descriptor = 0x47,
  326. .linesize = 64,
  327. .totalsize = 8 * 1024 * 1024,
  328. .associativity = 8,
  329. .level = 3,
  330. .memtype = MEMTYPE_UNIFIED,
  331. },
  332. { .descriptor = 0x48, // unified on-die
  333. .linesize = 64,
  334. .totalsize = 3 * 1024 * 1024,
  335. .associativity = 12,
  336. .level = 2,
  337. .memtype = MEMTYPE_UNIFIED,
  338. },
  339. { .descriptor = 0x49, // FIXME has two meanings!
  340. .linesize = 64,
  341. .totalsize = 4 * 1024 * 1024,
  342. .associativity = 16,
  343. .level = 2,
  344. .memtype = MEMTYPE_UNIFIED,
  345. },
  346. { .descriptor = 0x4a,
  347. .linesize = 64,
  348. .totalsize = 6 * 1024 * 1024,
  349. .associativity = 12,
  350. .level = 3,
  351. .memtype = MEMTYPE_UNIFIED,
  352. },
  353. { .descriptor = 0x4b,
  354. .linesize = 64,
  355. .totalsize = 8 * 1024 * 1024,
  356. .associativity = 16,
  357. .level = 3,
  358. .memtype = MEMTYPE_UNIFIED,
  359. },
  360. { .descriptor = 0x4c,
  361. .linesize = 64,
  362. .totalsize = 12 * 1024 * 1024,
  363. .associativity = 12,
  364. .level = 3,
  365. .memtype = MEMTYPE_UNIFIED,
  366. },
  367. { .descriptor = 0x4d,
  368. .linesize = 64,
  369. .totalsize = 16 * 1024 * 1024,
  370. .associativity = 16,
  371. .level = 3,
  372. .memtype = MEMTYPE_UNIFIED,
  373. },
  374. { .descriptor = 0x4e,
  375. .linesize = 64,
  376. .totalsize = 6 * 1024 * 1024,
  377. .associativity = 24,
  378. .level = 2,
  379. .memtype = MEMTYPE_UNIFIED,
  380. },
  381. { .descriptor = 0x60,
  382. .linesize = 64,
  383. .totalsize = 16 * 1024,
  384. .associativity = 8,
  385. .level = 1,
  386. .memtype = MEMTYPE_DATA,
  387. },
  388. { .descriptor = 0x66,
  389. .linesize = 64,
  390. .totalsize = 8 * 1024,
  391. .associativity = 4,
  392. .level = 1,
  393. .memtype = MEMTYPE_DATA,
  394. },
  395. { .descriptor = 0x67,
  396. .linesize = 64,
  397. .totalsize = 16 * 1024,
  398. .associativity = 4,
  399. .level = 1,
  400. .memtype = MEMTYPE_DATA,
  401. },
  402. { .descriptor = 0x68,
  403. .linesize = 64,
  404. .totalsize = 32 * 1024,
  405. .associativity = 4,
  406. .level = 1,
  407. .memtype = MEMTYPE_DATA,
  408. },
  409. { .descriptor = 0x76, // FIXME identified as TLB(?)!
  410. .linesize = 64,
  411. .totalsize = 1024 * 1024,
  412. .associativity = 4,
  413. .level = 2,
  414. .memtype = MEMTYPE_UNIFIED,
  415. },
  416. { .descriptor = 0x78,
  417. .linesize = 64,
  418. .totalsize = 1024 * 1024,
  419. .associativity = 4,
  420. .level = 2,
  421. .memtype = MEMTYPE_UNIFIED,
  422. },
  423. { .descriptor = 0x79, // sectored
  424. .linesize = 64,
  425. .totalsize = 128 * 1024,
  426. .associativity = 8,
  427. .level = 2,
  428. .memtype = MEMTYPE_UNIFIED,
  429. },
  430. { .descriptor = 0x7a, // sectored
  431. .linesize = 64,
  432. .totalsize = 256 * 1024,
  433. .associativity = 8,
  434. .level = 2,
  435. .memtype = MEMTYPE_UNIFIED,
  436. },
  437. { .descriptor = 0x7b, // sectored
  438. .linesize = 64,
  439. .totalsize = 512 * 1024,
  440. .associativity = 8,
  441. .level = 2,
  442. .memtype = MEMTYPE_UNIFIED,
  443. },
  444. { .descriptor = 0x7c, // sectored
  445. .linesize = 64,
  446. .totalsize = 1 * 1024 * 1024,
  447. .associativity = 8,
  448. .level = 2,
  449. .memtype = MEMTYPE_UNIFIED,
  450. },
  451. { .descriptor = 0x7d,
  452. .linesize = 64,
  453. .totalsize = 2 * 1024 * 1024,
  454. .associativity = 8,
  455. .level = 2,
  456. .memtype = MEMTYPE_UNIFIED,
  457. },
  458. { .descriptor = 0x7f,
  459. .linesize = 64,
  460. .totalsize = 512 * 1024,
  461. .associativity = 2,
  462. .level = 2,
  463. .memtype = MEMTYPE_UNIFIED,
  464. },
  465. { .descriptor = 0x80,
  466. .linesize = 64,
  467. .totalsize = 512 * 1024,
  468. .associativity = 8,
  469. .level = 2,
  470. .memtype = MEMTYPE_UNIFIED,
  471. },
  472. { .descriptor = 0x82,
  473. .linesize = 32,
  474. .totalsize = 256 * 1024,
  475. .associativity = 8,
  476. .level = 2,
  477. .memtype = MEMTYPE_UNIFIED,
  478. },
  479. { .descriptor = 0x83,
  480. .linesize = 32,
  481. .totalsize = 512 * 1024,
  482. .associativity = 8,
  483. .level = 2,
  484. .memtype = MEMTYPE_UNIFIED,
  485. },
  486. { .descriptor = 0x84,
  487. .linesize = 32,
  488. .totalsize = 1 * 1024 * 1024,
  489. .associativity = 8,
  490. .level = 2,
  491. .memtype = MEMTYPE_UNIFIED,
  492. },
  493. { .descriptor = 0x85,
  494. .linesize = 32,
  495. .totalsize = 2 * 1024 * 1024,
  496. .associativity = 8,
  497. .level = 2,
  498. .memtype = MEMTYPE_UNIFIED,
  499. },
  500. { .descriptor = 0x86,
  501. .linesize = 64,
  502. .totalsize = 512 * 1024,
  503. .associativity = 4,
  504. .level = 2,
  505. .memtype = MEMTYPE_UNIFIED,
  506. },
  507. { .descriptor = 0x87,
  508. .linesize = 64,
  509. .totalsize = 1 * 1024,
  510. .associativity = 8,
  511. .level = 2,
  512. .memtype = MEMTYPE_UNIFIED,
  513. },
  514. { .descriptor = 0xd0,
  515. .linesize = 64,
  516. .totalsize = 512 * 1024,
  517. .associativity = 4,
  518. .level = 3,
  519. .memtype = MEMTYPE_UNIFIED,
  520. },
  521. { .descriptor = 0xd1,
  522. .linesize = 64,
  523. .totalsize = 1024 * 1024,
  524. .associativity = 4,
  525. .level = 3,
  526. .memtype = MEMTYPE_UNIFIED,
  527. },
  528. { .descriptor = 0xd2,
  529. .linesize = 64,
  530. .totalsize = 2 * 1024 * 1024,
  531. .associativity = 4,
  532. .level = 3,
  533. .memtype = MEMTYPE_UNIFIED,
  534. },
  535. { .descriptor = 0xd6,
  536. .linesize = 64,
  537. .totalsize = 12 * 1024 * 1024,
  538. .associativity = 8,
  539. .level = 3,
  540. .memtype = MEMTYPE_UNIFIED,
  541. },
  542. { .descriptor = 0xd7,
  543. .linesize = 64,
  544. .totalsize = 18 * 1024 * 1024,
  545. .associativity = 8,
  546. .level = 3,
  547. .memtype = MEMTYPE_UNIFIED,
  548. },
  549. { .descriptor = 0xd8,
  550. .linesize = 64,
  551. .totalsize = 24 * 1024 * 1024,
  552. .associativity = 8,
  553. .level = 3,
  554. .memtype = MEMTYPE_UNIFIED,
  555. },
  556. { .descriptor = 0xdc,
  557. .linesize = 64,
  558. .totalsize = (1024 + 512) * 1024,
  559. .associativity = 12,
  560. .level = 3,
  561. .memtype = MEMTYPE_UNIFIED,
  562. },
  563. { .descriptor = 0xdd,
  564. .linesize = 64,
  565. .totalsize = 3 * 1024 * 1024,
  566. .associativity = 12,
  567. .level = 3,
  568. .memtype = MEMTYPE_UNIFIED,
  569. },
  570. { .descriptor = 0xde,
  571. .linesize = 64,
  572. .totalsize = 6 * 1024 * 1024,
  573. .associativity = 12,
  574. .level = 3,
  575. .memtype = MEMTYPE_UNIFIED,
  576. },
  577. { .descriptor = 0xe2,
  578. .linesize = 64,
  579. .totalsize = 2 * 1024 * 1024,
  580. .associativity = 16,
  581. .level = 3,
  582. .memtype = MEMTYPE_UNIFIED,
  583. },
  584. { .descriptor = 0xe3,
  585. .linesize = 64,
  586. .totalsize = 4 * 1024 * 1024,
  587. .associativity = 16,
  588. .level = 3,
  589. .memtype = MEMTYPE_UNIFIED,
  590. },
  591. { .descriptor = 0xe4,
  592. .linesize = 64,
  593. .totalsize = 8 * 1024 * 1024,
  594. .associativity = 16,
  595. .level = 3,
  596. .memtype = MEMTYPE_UNIFIED,
  597. },
  598. { .descriptor = 0xea,
  599. .linesize = 64,
  600. .totalsize = 12 * 1024 * 1024,
  601. .associativity = 24,
  602. .level = 3,
  603. .memtype = MEMTYPE_UNIFIED,
  604. },
  605. { .descriptor = 0xeb,
  606. .linesize = 64,
  607. .totalsize = 18 * 1024 * 1024,
  608. .associativity = 24,
  609. .level = 3,
  610. .memtype = MEMTYPE_UNIFIED,
  611. },
  612. { .descriptor = 0xec,
  613. .linesize = 64,
  614. .totalsize = 24 * 1024 * 1024,
  615. .associativity = 24,
  616. .level = 3,
  617. .memtype = MEMTYPE_UNIFIED,
  618. },
  619. };
  620. typedef struct intel_tlb_descriptor {
  621. unsigned descriptor;
  622. unsigned pagesize;
  623. unsigned entries;
  624. unsigned associativity;
  625. unsigned level;
  626. int tlbtype;
  627. } intel_tlb_descriptor;
  628. static const intel_tlb_descriptor intel_tlb_descriptors[] = {
  629. { .descriptor = 0x01,
  630. .pagesize = 4 * 1024,
  631. .entries = 32,
  632. .associativity = 4,
  633. .level = 2,
  634. .tlbtype = MEMTYPE_CODE,
  635. },
  636. { .descriptor = 0x02,
  637. .pagesize = 4 * 1024 * 1024,
  638. .entries = 2,
  639. .associativity = 2,
  640. .level = 2,
  641. .tlbtype = MEMTYPE_CODE,
  642. },
  643. { .descriptor = 0x03,
  644. .pagesize = 4 * 1024,
  645. .entries = 64,
  646. .associativity = 4,
  647. .level = 2,
  648. .tlbtype = MEMTYPE_DATA,
  649. },
  650. { .descriptor = 0x04,
  651. .pagesize = 4 * 1024 * 1024,
  652. .entries = 8,
  653. .associativity = 4,
  654. .level = 2,
  655. .tlbtype = MEMTYPE_DATA,
  656. },
  657. { .descriptor = 0x05,
  658. .pagesize = 4 * 1024 * 1024,
  659. .entries = 32,
  660. .associativity = 4,
  661. .level = 2,
  662. .tlbtype = MEMTYPE_DATA,
  663. },
  664. { .descriptor = 0x50,
  665. .pagesize = 4 * 1024 * 1024, // FIXME 4K, 2M or 4M
  666. .entries = 64,
  667. .associativity = 64,
  668. .level = 1,
  669. .tlbtype = MEMTYPE_CODE,
  670. },
  671. { .descriptor = 0x51,
  672. .pagesize = 4 * 1024, // FIXME 4K, 2M or 4M
  673. .entries = 128,
  674. .associativity = 128,
  675. .level = 1,
  676. .tlbtype = MEMTYPE_CODE,
  677. },
  678. { .descriptor = 0x52,
  679. .pagesize = 4 * 1024 * 1024, // FIXME 4K, 2M or 4M
  680. .entries = 256,
  681. .associativity = 256,
  682. .level = 1,
  683. .tlbtype = MEMTYPE_CODE,
  684. },
  685. { .descriptor = 0x55,
  686. .pagesize = 4 * 1024, // FIXME 2M or 4M
  687. .entries = 7,
  688. .associativity = 7,
  689. .level = 2,
  690. .tlbtype = MEMTYPE_CODE,
  691. },
  692. { .descriptor = 0x56,
  693. .pagesize = 4 * 1024 * 1024,
  694. .entries = 16,
  695. .associativity = 4,
  696. .level = 1,
  697. .tlbtype = MEMTYPE_DATA,
  698. },
  699. { .descriptor = 0x57,
  700. .pagesize = 4 * 1024,
  701. .entries = 16,
  702. .associativity = 4,
  703. .level = 1,
  704. .tlbtype = MEMTYPE_DATA,
  705. },
  706. { .descriptor = 0x5a, // FIXME 2M or 4M
  707. .pagesize = 4 * 1024 * 1024,
  708. .entries = 32,
  709. .associativity = 4,
  710. .level = 1,
  711. .tlbtype = MEMTYPE_DATA,
  712. },
  713. { .descriptor = 0x5b,
  714. .pagesize = 4 * 1024, // FIXME 4k or 4M
  715. .entries = 64,
  716. .associativity = 64,
  717. .level = 2,
  718. .tlbtype = MEMTYPE_DATA,
  719. },
  720. { .descriptor = 0x5c,
  721. .pagesize = 4 * 1024, // FIXME 4k or 4M
  722. .entries = 128,
  723. .associativity = 128,
  724. .level = 2,
  725. .tlbtype = MEMTYPE_DATA,
  726. },
  727. { .descriptor = 0x5d,
  728. .pagesize = 4 * 1024, // FIXME 4k or 4M
  729. .entries = 256,
  730. .associativity = 256,
  731. .level = 2,
  732. .tlbtype = MEMTYPE_DATA,
  733. },
  734. { .descriptor = 0x61,
  735. .pagesize = 4 * 1024,
  736. .entries = 48,
  737. .associativity = 48,
  738. .level = 2,
  739. .tlbtype = MEMTYPE_CODE,
  740. },
  741. { .descriptor = 0x63,
  742. .pagesize = 1 * 1024 * 1024 * 1024,
  743. .entries = 4,
  744. .associativity = 4,
  745. .level = 3,
  746. .tlbtype = MEMTYPE_DATA,
  747. },
  748. { .descriptor = 0xb0,
  749. .pagesize = 4 * 1024,
  750. .entries = 128,
  751. .associativity = 4,
  752. .level = 1,
  753. .tlbtype = MEMTYPE_CODE,
  754. },
  755. { .descriptor = 0xb1, // FIXME 8x2M or 4x4M
  756. .pagesize = 2 * 1024 * 1024,
  757. .entries = 8,
  758. .associativity = 4,
  759. .level = 1,
  760. .tlbtype = MEMTYPE_CODE,
  761. },
  762. { .descriptor = 0xb2,
  763. .pagesize = 4 * 1024,
  764. .entries = 64,
  765. .associativity = 4,
  766. .level = 1,
  767. .tlbtype = MEMTYPE_CODE,
  768. },
  769. { .descriptor = 0xb3,
  770. .pagesize = 4 * 1024,
  771. .entries = 128,
  772. .associativity = 4,
  773. .level = 1,
  774. .tlbtype = MEMTYPE_DATA,
  775. },
  776. { .descriptor = 0xb4,
  777. .pagesize = 4 * 1024,
  778. .entries = 256,
  779. .associativity = 4,
  780. .level = 1,
  781. .tlbtype = MEMTYPE_DATA,
  782. },
  783. { .descriptor = 0xb5,
  784. .pagesize = 4 * 1024,
  785. .entries = 64,
  786. .associativity = 8,
  787. .level = 1,
  788. .tlbtype = MEMTYPE_CODE,
  789. },
  790. { .descriptor = 0xb6,
  791. .pagesize = 4 * 1024,
  792. .entries = 128,
  793. .associativity = 8,
  794. .level = 1,
  795. .tlbtype = MEMTYPE_CODE,
  796. },
  797. { .descriptor = 0xba,
  798. .pagesize = 4 * 1024,
  799. .entries = 64,
  800. .associativity = 4,
  801. .level = 1,
  802. .tlbtype = MEMTYPE_DATA,
  803. },
  804. { .descriptor = 0xc0, // FIXME 4KB/4MB pages
  805. .pagesize = 4 * 1024,
  806. .entries = 8,
  807. .associativity = 4,
  808. .level = 1,
  809. .tlbtype = MEMTYPE_DATA,
  810. },
  811. { .descriptor = 0xc1, // FIXME 4KB/2MB pages
  812. .pagesize = 4 * 1024,
  813. .entries = 1024,
  814. .associativity = 8,
  815. .level = 2,
  816. .tlbtype = MEMTYPE_UNIFIED,
  817. },
  818. { .descriptor = 0xc2, // FIXME 4KB/2MB pages
  819. .pagesize = 4 * 1024,
  820. .entries = 16,
  821. .associativity = 4,
  822. .level = 1,
  823. .tlbtype = MEMTYPE_DATA,
  824. },
  825. // 0xc3 notes "also 1GByte pages, 4-way, 16 entries"
  826. { .descriptor = 0xc3, // FIXME 4KB/2MB pages
  827. .pagesize = 4 * 1024,
  828. .entries = 1536,
  829. .associativity = 6,
  830. .level = 2,
  831. .tlbtype = MEMTYPE_UNIFIED,
  832. },
  833. { .descriptor = 0xc4, // FIXME 4KB/2MB pages
  834. .pagesize = 4 * 1024,
  835. .entries = 32,
  836. .associativity = 4,
  837. .level = 1,
  838. .tlbtype = MEMTYPE_DATA,
  839. },
  840. { .descriptor = 0xca,
  841. .pagesize = 4 * 1024,
  842. .entries = 512,
  843. .associativity = 4,
  844. .level = 2,
  845. .tlbtype = MEMTYPE_DATA,
  846. },
  847. };
  848. static const unsigned intel_trace_descriptors[] = {
  849. 0x70, // 12k uops, 8-way
  850. 0x71, // 16k uops, 8-way
  851. 0x72, // 32k uops, 8-way
  852. 0x73, // 64K uops, 8-way
  853. };
  854. static inline int
  855. compare_memdetails(const torque_memt * restrict a,
  856. const torque_memt * restrict b){
  857. #define CMP(a,b,field) do { if((a)->field < (b)->field){ return -1; } \
  858. else if((a)->field > (b)->field){ return 1; } } \
  859. while(0)
  860. // See match_memtype(); do not evaluate sharing for equality! The order
  861. // in which we compare will give rise to the memory ordering.
  862. CMP(a,b,level);
  863. CMP(a,b,memtype);
  864. CMP(a,b,totalsize);
  865. CMP(a,b,linesize);
  866. CMP(a,b,associativity);
  867. return 0;
  868. #undef CMP
  869. }
  870. // Returns the slot we just added, or NULL on failure. See compare_memdetails()
  871. // for ordering.
  872. static inline torque_memt *
  873. add_hwmem(unsigned *memories,torque_memt **mems,
  874. const torque_memt *amem){
  875. size_t s = (*memories + 1) * sizeof(**mems);
  876. typeof(**mems) *tmp;
  877. if((tmp = realloc(*mems,s)) == NULL){
  878. return NULL;
  879. }
  880. *mems = tmp;
  881. while((unsigned)(tmp - *mems) < *memories){
  882. if(compare_memdetails(tmp,amem) > 0){
  883. memmove(tmp + 1,tmp,sizeof(*tmp) *
  884. (*memories - (unsigned)(tmp - *mems)));
  885. break;
  886. }
  887. ++tmp;
  888. }
  889. // Needed due to memcmp()-based cpu compare
  890. memset(tmp,0,sizeof(*tmp));
  891. *tmp = *amem;
  892. (*memories)++;
  893. return tmp;
  894. }
  895. static inline int
  896. compare_tlbdetails(const torque_tlbt * restrict a,
  897. const torque_tlbt * restrict b){
  898. #define CMP(a,b,field) do { if((a)->field < (b)->field){ return -1; } \
  899. else if((a)->field > (b)->field){ return 1; } } \
  900. while(0)
  901. // The order in which we compare will give rise to the TLB ordering.
  902. CMP(a,b,level);
  903. CMP(a,b,tlbtype);
  904. CMP(a,b,pagesize);
  905. CMP(a,b,entries);
  906. CMP(a,b,associativity);
  907. return 0;
  908. #undef CMP
  909. }
  910. static torque_tlbt *
  911. add_tlb(unsigned *tlbs,torque_tlbt **tlbdescs,const torque_tlbt *tlb){
  912. size_t s = (*tlbs + 1) * sizeof(**tlbdescs);
  913. typeof(**tlbdescs) *tmp;
  914. if((tmp = realloc(*tlbdescs,s)) == NULL){
  915. return NULL;
  916. }
  917. *tlbdescs = tmp;
  918. while((unsigned)(tmp - *tlbdescs) < *tlbs){
  919. if(compare_tlbdetails(tmp,tlb) > 0){
  920. memmove(tmp + 1,tmp,sizeof(*tmp) *
  921. (*tlbs - (unsigned)(tmp - *tlbdescs)));
  922. break;
  923. }
  924. ++tmp;
  925. }
  926. // Needed due to memcmp()-based cpu compare
  927. memset(tmp,0,sizeof(*tmp));
  928. *tmp = *tlb;
  929. (*tlbs)++;
  930. return tmp;
  931. }
  932. static int
  933. get_intel_cache(unsigned descriptor,torque_memt *mem,unsigned sharedways){
  934. unsigned n;
  935. // FIXME convert this to a table indexed by (8-bit) descriptor
  936. for(n = 0 ; n < sizeof(intel_cache_descriptors) / sizeof(*intel_cache_descriptors) ; ++n){
  937. if(intel_cache_descriptors[n].descriptor == descriptor){
  938. mem->memtype = intel_cache_descriptors[n].memtype;
  939. mem->linesize = intel_cache_descriptors[n].linesize;
  940. mem->totalsize = intel_cache_descriptors[n].totalsize;
  941. mem->associativity = intel_cache_descriptors[n].associativity;
  942. mem->level = intel_cache_descriptors[n].level;
  943. mem->sharedways = sharedways;
  944. return 0;
  945. }
  946. }
  947. return -1;
  948. }
  949. static int
  950. get_intel_tlb(unsigned descriptor,torque_tlbt *tlb,unsigned sharedways){
  951. unsigned n;
  952. for(n = 0 ; n < sizeof(intel_tlb_descriptors) / sizeof(*intel_tlb_descriptors) ; ++n){
  953. if(intel_tlb_descriptors[n].descriptor == descriptor){
  954. tlb->pagesize = intel_tlb_descriptors[n].pagesize;
  955. tlb->entries = intel_tlb_descriptors[n].entries;
  956. tlb->associativity = intel_tlb_descriptors[n].associativity;
  957. tlb->tlbtype = intel_tlb_descriptors[n].tlbtype;
  958. tlb->level = intel_tlb_descriptors[n].level;
  959. tlb->sharedways = sharedways;
  960. return 0;
  961. }
  962. }
  963. return -1;
  964. }
  965. static int
  966. get_intel_trace(unsigned descriptor){
  967. unsigned n;
  968. for(n = 0 ; n < sizeof(intel_trace_descriptors) / sizeof(*intel_trace_descriptors) ; ++n){
  969. if(intel_trace_descriptors[n] == descriptor){
  970. return 0;
  971. }
  972. }
  973. return -1;
  974. }
  975. // *DOES NOT* compare sharing values, since that isn't yet generally detected
  976. // at memory detection time.
  977. static torque_memt *
  978. match_memtype(unsigned memtc,torque_memt *types,
  979. const torque_memt *amem){
  980. unsigned n;
  981. for(n = 0 ; n < memtc ; ++n){
  982. if(compare_memdetails(types + n,amem) == 0){
  983. return types + n;
  984. }
  985. }
  986. return NULL;
  987. }
  988. static int
  989. decode_intel_func2(torque_cput *cpu,uint32_t *gpregs){
  990. uint32_t mask;
  991. unsigned z;
  992. // Each GP register will set its MSB to 0 if it contains valid 1-byte
  993. // descriptors in each byte (save AL, the required number of calls).
  994. for(z = 0 ; z < 4 ; ++z){
  995. unsigned y;
  996. if(gpregs[z] & 0x80000000u){
  997. continue;
  998. }
  999. mask = 0xff000000;
  1000. for(y = 0 ; y < 4 ; ++y){
  1001. unsigned descriptor;
  1002. if( (descriptor = (gpregs[z] & mask) >> ((3u - y) * 8u)) ){
  1003. torque_memt mem;
  1004. torque_tlbt tlb;
  1005. // Physical resources are shared at least by
  1006. // the logical cores, but also further FIXME
  1007. if(get_intel_cache(descriptor,&mem,cpu->threadspercore) == 0){
  1008. // Don't add duplicates from CPUID Fxn4
  1009. if(!match_memtype(cpu->memories,cpu->memdescs,&mem)){
  1010. if(add_hwmem(&cpu->memories,&cpu->memdescs,&mem) == NULL){
  1011. return -1;
  1012. }
  1013. }
  1014. }else if(get_intel_tlb(descriptor,&tlb,cpu->threadspercore) == 0){
  1015. if(add_tlb(&cpu->tlbs,&cpu->tlbdescs,&tlb) == NULL){
  1016. return -1;
  1017. }
  1018. }else if(get_intel_trace(descriptor) == 0){
  1019. // no one cares
  1020. }else if(descriptor == 0xf0){
  1021. // FIXME 64-byte prefetching
  1022. }else if(descriptor == 0xf1){
  1023. // FIXME 128-byte prefetching
  1024. }else if(descriptor == 0x40){
  1025. // Means "no higher(?)-level cache"
  1026. }else if(descriptor == 0xff){
  1027. // Means "call with leaf 4"
  1028. }else{
  1029. fprintf(stderr, "UNKNOWN DESCRIPTOR %x\n",descriptor);
  1030. return -1;
  1031. }
  1032. }
  1033. // Don't interpret bits 0..7 of EAX (AL in old notation)
  1034. if((mask >>= 8) == 0x000000ff && z == 0){
  1035. break;
  1036. }
  1037. }
  1038. }
  1039. return 0;
  1040. }
  1041. // Function 2 of Intel's CPUID -- See 3.1.3 of the CPUID Application Note
  1042. static int
  1043. id_intel_caches_old(uint32_t maxlevel,torque_cput *cpu){
  1044. uint32_t gpregs[4],callreps;
  1045. int ret;
  1046. if(maxlevel < CPUID_STANDARD_CPUCONF){
  1047. return -1;
  1048. }
  1049. cpuid(CPUID_STANDARD_CPUCONF,0,gpregs);
  1050. if((callreps = gpregs[0] & 0x000000ffu) != 1){
  1051. return -1;
  1052. }
  1053. while(!(ret = decode_intel_func2(cpu,gpregs))){
  1054. if(--callreps == 0){
  1055. break;
  1056. }
  1057. cpuid(CPUID_STANDARD_CPUCONF,0,gpregs);
  1058. }
  1059. return ret;
  1060. }
  1061. static int
  1062. id_intel_caches(uint32_t maxlevel,const struct feature_flags *ff __attribute__ ((unused)),
  1063. torque_cput *cpu){
  1064. unsigned n,level,maxdc;
  1065. uint32_t gpregs[4];
  1066. if(maxlevel < CPUID_STANDARD_CACHECONF){
  1067. // We determine the number of cores per package using the
  1068. // deterministic cache function (for some reason). Thankfully,
  1069. // all multicore processors support said function.
  1070. cpu->coresperpackage = 1;
  1071. return id_intel_caches_old(maxlevel,cpu);
  1072. }
  1073. maxdc = level = 1;
  1074. do{
  1075. enum { // Table 2.9, IAN 485
  1076. NULLCACHE = 0,
  1077. DATACACHE = 1,
  1078. CODECACHE = 2,
  1079. UNIFIEDCACHE = 3,
  1080. } cachet;
  1081. n = 0;
  1082. do{
  1083. torque_memt mem;
  1084. unsigned lev,cpp;
  1085. cpuid(CPUID_STANDARD_CACHECONF,n++,gpregs);
  1086. lev = (gpregs[0] >> 5u) & 0x7u; // AX[7..5]
  1087. cachet = gpregs[0] & 0x1fu; // AX[4..0]
  1088. if(cachet == DATACACHE){ // Memory type is in AX[4..0]
  1089. mem.memtype = MEMTYPE_DATA;
  1090. }else if(cachet == CODECACHE){
  1091. mem.memtype = MEMTYPE_CODE;
  1092. }else if(cachet == UNIFIEDCACHE){
  1093. mem.memtype = MEMTYPE_UNIFIED;
  1094. }else if(cachet == NULLCACHE){
  1095. continue;
  1096. }else{
  1097. return -1;
  1098. }
  1099. if(lev > maxdc){
  1100. maxdc = lev;
  1101. }
  1102. if(lev != level){
  1103. continue;
  1104. }
  1105. // Linesize is EBX[11:0] + 1
  1106. mem.linesize = (gpregs[1] & 0xfffu) + 1;
  1107. // EAX[9]: direct, else (EBX[31..22] + 1)-assoc
  1108. mem.associativity = (gpregs[0] & 0x200u) ? 1 :
  1109. (((gpregs[1] >> 22u) & 0x3ffu) + 1);
  1110. // Partitions = EBX[21:12] + 1, sets = ECX + 1
  1111. mem.totalsize = mem.associativity *
  1112. (((gpregs[1] >> 12u) & 0x1ffu) + 1) *
  1113. mem.linesize * (gpregs[2] + 1);
  1114. // Maximum number of logical processors in a physical
  1115. // package sharing the cache is EAX[25:14] + 1
  1116. mem.sharedways = ((gpregs[0] >> 14u) & 0xfffu) + 1;
  1117. mem.level = lev;
  1118. // Cores per package = EAX[31:26] + 1. Maximum
  1119. // possible, not necessarily installed.
  1120. if((cpp = ((gpregs[0] >> 26u) & 0x3fu) + 1) == 0){
  1121. return -1;
  1122. }
  1123. if(cpu->coresperpackage == 0){
  1124. if(maxlevel < CPUID_STANDARD_TOPOLOGY){
  1125. // See comments within x86_getprocsig()
  1126. if((cpu->threadspercore /= cpp) == 0){
  1127. return -1;
  1128. }
  1129. }
  1130. cpu->coresperpackage = cpp;
  1131. }else if(cpu->coresperpackage / cpu->threadspercore > cpp){
  1132. return -1;
  1133. }
  1134. if(mem.sharedways < cpu->threadspercore){
  1135. mem.sharedways = cpu->threadspercore;
  1136. }
  1137. if(add_hwmem(&cpu->memories,&cpu->memdescs,&mem) == NULL){
  1138. return -1;
  1139. }
  1140. }while(cachet != NULLCACHE);
  1141. }while(++level <= maxdc);
  1142. return id_intel_caches_old(maxlevel,cpu);
  1143. }
  1144. static inline int
  1145. amd_dtlb_presentp(uint32_t reg){
  1146. // Must have non-zero associativity and entry count
  1147. return (reg & 0xf0000000) && (reg & 0x0fff0000);
  1148. }
  1149. static inline int
  1150. amd_itlb_presentp(uint32_t reg){
  1151. return (reg & 0x0000f000) && (reg & 0x00000fff);
  1152. }
  1153. static inline int
  1154. amd_cache_presentp(uint32_t reg){
  1155. return !!((reg >> 12u) & 0xfu);
  1156. }
  1157. static unsigned
  1158. amd_l23assoc(unsigned idx,unsigned lines){
  1159. switch(idx){
  1160. case 0xf: return lines; // fully associative
  1161. case 0xe: return 128;
  1162. case 0xd: return 96;
  1163. case 0xc: return 64;
  1164. case 0xb: return 48;
  1165. case 0xa: return 32;
  1166. case 0x8: return 16;
  1167. case 0x6: return 8;
  1168. case 0x4: return 4;
  1169. case 0x2: return 2;
  1170. case 0x1: return 1;
  1171. // 0 == explicitly disabled. all other values reserved.
  1172. }
  1173. return 0;
  1174. }
  1175. static inline unsigned
  1176. scale_amd_l23tlb(unsigned dents,unsigned psize){
  1177. if(psize == 4u * 1024 * 1024){
  1178. return dents / 2;
  1179. }else if(psize == 2u * 1024 * 1024){
  1180. return dents;
  1181. }else if(psize == 1u * 1024 * 1024 * 1024){
  1182. return dents;
  1183. }else if(psize == 4u * 1024){
  1184. return dents;
  1185. }
  1186. return 0;
  1187. }
  1188. static int
  1189. decode_amd_l23dtlb(uint32_t reg,unsigned *dassoc,unsigned *dents,unsigned psize){
  1190. *dents = (reg >> 16u) & 0xfffu;
  1191. *dents = scale_amd_l23tlb(*dents,psize);
  1192. *dassoc = amd_l23assoc(reg >> 28u,*dents);
  1193. return (*dents && *dassoc) ? 0 : -1;
  1194. }
  1195. static int
  1196. decode_amd_l23itlb(uint32_t reg,unsigned *iassoc,unsigned *ients,unsigned psize){
  1197. *ients = reg & 0xfffu;
  1198. *ients = scale_amd_l23tlb(*ients,psize);
  1199. *iassoc = amd_l23assoc((reg >> 12u) & 0xf,*ients);
  1200. return (*ients && *iassoc) ? 0 : -1;
  1201. }
  1202. static int
  1203. decode_amd_l23cache(uint32_t reg,uintmax_t *size,unsigned *assoc,unsigned *lsize,
  1204. unsigned shift,unsigned mul){
  1205. unsigned lines;
  1206. *size = (reg >> shift) * 1024 * mul;
  1207. *lsize = reg & 0xffu;
  1208. if(*size / *lsize > UINT_MAX){
  1209. return -1;
  1210. }
  1211. lines = (unsigned)(*size / *lsize);
  1212. *assoc = amd_l23assoc((reg >> 12u) & 0xf,lines);
  1213. return (*size && *assoc && *lsize) ? 0 : -1;
  1214. }
  1215. static int
  1216. id_amd_gbtlbs(uint32_t maxexlevel,const struct feature_flags *ff,uint32_t *gpregs,torque_cput *cpud){
  1217. torque_tlbt tlb,tlb2;
  1218. torque_tlbt itlb,itlb2;
  1219. if(ff->gbpt == 0){ // Check the 1GB Page Table Entries feature flag
  1220. return 0;
  1221. }
  1222. if(maxexlevel < CPUID_EXTENDED_GBTLB){
  1223. return 0;
  1224. }
  1225. cpuid(CPUID_EXTENDED_GBTLB,0,gpregs);
  1226. itlb.pagesize = itlb2.pagesize = tlb.pagesize =
  1227. tlb2.pagesize = 1024 * 1024 * 1024;
  1228. itlb.sharedways = itlb2.sharedways = tlb.sharedways =
  1229. tlb2.sharedways = cpud->threadspercore;
  1230. itlb.level = tlb.level = 1;
  1231. itlb2.level = tlb2.level = 2;
  1232. tlb.tlbtype = tlb2.tlbtype = MEMTYPE_DATA;
  1233. itlb.tlbtype = itlb2.tlbtype = MEMTYPE_CODE;
  1234. if(amd_dtlb_presentp(gpregs[0])){
  1235. if(decode_amd_l23dtlb(gpregs[0],&tlb.associativity,&tlb.entries,tlb.pagesize)){
  1236. return -1;
  1237. }
  1238. if(add_tlb(&cpud->tlbs,&cpud->tlbdescs,&tlb) == NULL){
  1239. return -1;
  1240. }
  1241. }
  1242. if(amd_itlb_presentp(gpregs[0])){
  1243. if(decode_amd_l23itlb(gpregs[0],&itlb.associativity,&itlb.entries,itlb.pagesize)){
  1244. return -1;
  1245. }
  1246. if(add_tlb(&cpud->tlbs,&cpud->tlbdescs,&itlb) == NULL){
  1247. return -1;
  1248. }
  1249. }
  1250. if(amd_dtlb_presentp(gpregs[1])){
  1251. if(decode_amd_l23dtlb(gpregs[1],&tlb2.associativity,&tlb2.entries,tlb2.pagesize)){
  1252. return -1;
  1253. }
  1254. if(add_tlb(&cpud->tlbs,&cpud->tlbdescs,&tlb2) == NULL){
  1255. return -1;
  1256. }
  1257. }
  1258. if(amd_itlb_presentp(gpregs[1])){
  1259. if(decode_amd_l23itlb(gpregs[1],&itlb2.associativity,&itlb2.entries,itlb2.pagesize)){
  1260. return -1;
  1261. }
  1262. if(add_tlb(&cpud->tlbs,&cpud->tlbdescs,&itlb2) == NULL){
  1263. return -1;
  1264. }
  1265. }
  1266. return 0;
  1267. }
  1268. // For TLBs which can be either unused, 2M or 4M, Long Mode always uses 4M,
  1269. // Legacy Mode+PAE uses 2M, +PSE uses 4M, and otherwise only 4K pages.
  1270. static size_t
  1271. determine_amd_pagesize(const struct feature_flags *ff){
  1272. if(ff->lme == 1){
  1273. return 1024 * 1024 * 4; // FIXME must also check MSR for LMER!
  1274. }
  1275. return 1024 * 1024 * 2; // FIXME can be 4MB or unused, see above
  1276. }
  1277. static int
  1278. id_amd_23caches(uint32_t maxexlevel,const struct feature_flags *ff,uint32_t *gpregs,torque_cput *cpud){
  1279. torque_tlbt tlb,tlb24,itlb,itlb24;
  1280. torque_memt l2cache,l3cache;
  1281. if(maxexlevel < CPUID_EXTENDED_L23CACHE_TLB){
  1282. return 0;
  1283. }
  1284. cpuid(CPUID_EXTENDED_L23CACHE_TLB,0,gpregs);
  1285. l2cache.sharedways = cpud->threadspercore;
  1286. l3cache.sharedways = cpud->coresperpackage;
  1287. l2cache.level = 2;
  1288. l3cache.level = 3;
  1289. l2cache.memtype = l3cache.memtype = MEMTYPE_UNIFIED;
  1290. if(amd_cache_presentp(gpregs[2])){
  1291. if(decode_amd_l23cache(gpregs[2],&l2cache.totalsize,&l2cache.associativity,
  1292. &l2cache.linesize,16,1)){
  1293. return -1;
  1294. }
  1295. if(add_hwmem(&cpud->memories,&cpud->memdescs,&l2cache) == NULL){
  1296. return -1;
  1297. }
  1298. }
  1299. if(amd_cache_presentp(gpregs[3])){
  1300. if(decode_amd_l23cache(gpregs[3],&l3cache.totalsize,&l3cache.associativity,
  1301. &l3cache.linesize,18,512)){
  1302. return -1;
  1303. }
  1304. if(add_hwmem(&cpud->memories,&cpud->memdescs,&l3cache) == NULL){
  1305. return -1;
  1306. }
  1307. }
  1308. tlb.pagesize = itlb.pagesize = 4096;
  1309. tlb24.pagesize = itlb24.pagesize = determine_amd_pagesize(ff);
  1310. tlb.sharedways = itlb.sharedways = cpud->threadspercore;
  1311. tlb24.sharedways = itlb24.sharedways = cpud->threadspercore;
  1312. tlb.level = itlb.level = tlb24.level = itlb24.level = 2;
  1313. tlb.tlbtype = tlb24.tlbtype = MEMTYPE_DATA;
  1314. itlb.tlbtype = itlb24.tlbtype = MEMTYPE_CODE;
  1315. if(amd_dtlb_presentp(gpregs[0])){
  1316. if(decode_amd_l23dtlb(gpregs[0],&tlb24.associativity,&tlb24.entries,tlb24.pagesize)){
  1317. return -1;
  1318. }
  1319. if(add_tlb(&cpud->tlbs,&cpud->tlbdescs,&tlb24) == NULL){
  1320. return -1;
  1321. }
  1322. }
  1323. if(amd_itlb_presentp(gpregs[0])){
  1324. if(decode_amd_l23itlb(gpregs[0],&itlb24.associativity,&itlb24.entries,itlb24.pagesize)){
  1325. return -1;
  1326. }
  1327. if(add_tlb(&cpud->tlbs,&cpud->tlbdescs,&itlb24) == NULL){
  1328. return -1;
  1329. }
  1330. }
  1331. if(amd_dtlb_presentp(gpregs[1])){
  1332. if(decode_amd_l23dtlb(gpregs[1],&tlb.associativity,&tlb.entries,tlb.pagesize)){
  1333. return -1;
  1334. }
  1335. if(add_tlb(&cpud->tlbs,&cpud->tlbdescs,&tlb) == NULL){
  1336. return -1;
  1337. }
  1338. }
  1339. if(amd_itlb_presentp(gpregs[1])){
  1340. if(decode_amd_l23itlb(gpregs[1],&itlb.associativity,&itlb.entries,itlb.pagesize)){
  1341. return -1;
  1342. }
  1343. if(add_tlb(&cpud->tlbs,&cpud->tlbdescs,&itlb) == NULL){
  1344. return -1;
  1345. }
  1346. }
  1347. return id_amd_gbtlbs(maxexlevel,ff,gpregs,cpud);
  1348. }
  1349. static inline unsigned
  1350. amd_l1assoc(unsigned idx,unsigned lines){
  1351. // 0xff is fully associative. 0 is reserved. otherwise, direct encode
  1352. return idx == 0xffu ? lines : idx;
  1353. }
  1354. static int
  1355. decode_amd_l1tlb(uint32_t reg,unsigned *dassoc,unsigned *iassoc,unsigned *dents,
  1356. unsigned *ients){
  1357. *dents = (reg >> 16u) & 0xffu;
  1358. *ients = reg & 0xffu;
  1359. *dassoc = amd_l1assoc(reg >> 24u,*dents);
  1360. *iassoc = amd_l1assoc((reg >> 8u) & 0xffu,*ients);
  1361. return (*dassoc && *iassoc && *dents && *ients) ? 0 : -1;
  1362. }
  1363. static int
  1364. decode_amd_l1cache(uint32_t reg,uintmax_t *size,unsigned *assoc,unsigned *lsize){
  1365. *size = (reg >> 24u) * 1024u;
  1366. *lsize = reg & 0xffu;
  1367. *assoc = amd_l1assoc((reg >> 16u) & 0xffu,(unsigned)(*size / *lsize));
  1368. return (*size && *assoc && *lsize) ? 0 : -1;
  1369. }
  1370. static int
  1371. id_amd_caches(uint32_t maxlevel __attribute__ ((unused)),const struct feature_flags *ff,torque_cput *cpud){
  1372. torque_tlbt tlb,tlb24,itlb,itlb24;
  1373. torque_memt l1dcache,l1icache;
  1374. uint32_t maxex,gpregs[4];
  1375. if((maxex = identify_extended_cpuid()) < CPUID_AMD_L1CACHE_TLB){
  1376. return -1;
  1377. }
  1378. // EAX/EBX: 2/4MB / 4KB TLB descriptors ECX: DL1 EDX: CL1
  1379. cpuid(CPUID_AMD_L1CACHE_TLB,0,gpregs);
  1380. if(decode_amd_l1cache(gpregs[2],&l1icache.totalsize,&l1icache.associativity,
  1381. &l1icache.linesize)){
  1382. return -1;
  1383. }
  1384. if(decode_amd_l1cache(gpregs[3],&l1dcache.totalsize,&l1dcache.associativity,
  1385. &l1dcache.linesize)){
  1386. return -1;
  1387. }
  1388. l1icache.sharedways = l1dcache.sharedways = cpud->threadspercore;
  1389. l1icache.level = l1dcache.level = 1;
  1390. l1icache.memtype = MEMTYPE_CODE;
  1391. l1dcache.memtype = MEMTYPE_DATA;
  1392. if(decode_amd_l1tlb(gpregs[0],&tlb24.associativity,&itlb24.associativity,
  1393. &tlb24.entries,&itlb24.entries)){
  1394. return -1;
  1395. }
  1396. if(decode_amd_l1tlb(gpregs[1],&tlb.associativity,&itlb.associativity,
  1397. &tlb.entries,&itlb.entries)){
  1398. return -1;
  1399. }
  1400. tlb.pagesize = itlb.pagesize = 4096;
  1401. tlb24.pagesize = itlb24.pagesize = determine_amd_pagesize(ff);
  1402. tlb.sharedways = itlb.sharedways = cpud->threadspercore;
  1403. tlb24.sharedways = itlb24.sharedways = cpud->threadspercore;
  1404. tlb.level = itlb.level = tlb24.level = itlb24.level = 1;
  1405. tlb.tlbtype = tlb24.tlbtype = MEMTYPE_DATA;
  1406. itlb.tlbtype = itlb24.tlbtype = MEMTYPE_CODE;
  1407. if(add_hwmem(&cpud->memories,&cpud->memdescs,&l1icache) == NULL){
  1408. return -1;
  1409. }
  1410. if(add_hwmem(&cpud->memories,&cpud->memdescs,&l1dcache) == NULL){
  1411. return -1;
  1412. }
  1413. if(add_tlb(&cpud->tlbs,&cpud->tlbdescs,&tlb) == NULL){
  1414. return -1;
  1415. }
  1416. if(add_tlb(&cpud->tlbs,&cpud->tlbdescs,&tlb24) == NULL){
  1417. return -1;
  1418. }
  1419. if(add_tlb(&cpud->tlbs,&cpud->tlbdescs,&itlb) == NULL){
  1420. return -1;
  1421. }
  1422. if(add_tlb(&cpud->tlbs,&cpud->tlbdescs,&itlb24) == NULL){
  1423. return -1;
  1424. }
  1425. if(id_amd_23caches(maxex,ff,gpregs,cpud)){
  1426. return -1;
  1427. }
  1428. return 0;
  1429. }
  1430. static int
  1431. id_via_caches(uint32_t maxlevel __attribute__ ((unused)),
  1432. const struct feature_flags *ff __attribute__ ((unused)),
  1433. torque_cput *cpu){
  1434. // FIXME What a cheap piece of garbage, yeargh! VIA doesn't supply
  1435. // cache line info via CPUID. VIA C3 Antaur/Centaur both use 32b. The
  1436. // proof is by method of esoteric reference:
  1437. // http://www.digit-life.com/articles2/rmma/rmma-via-c3.html
  1438. torque_memt l1via = {
  1439. .level = 1,
  1440. .linesize = 32, // FIXME
  1441. .associativity = 0, // FIXME
  1442. .totalsize = 0, // FIXME
  1443. .sharedways = 0, // FIXME
  1444. .memtype = MEMTYPE_UNKNOWN, // FIXME
  1445. }; // FIXME handle other levels of cache
  1446. if(add_hwmem(&cpu->memories,&cpu->memdescs,&l1via) == NULL){
  1447. return -1;
  1448. }
  1449. return 0;
  1450. }
  1451. static int
  1452. x86_getbrandname(torque_cput *cpudesc){
  1453. char *aname,brandname[16 * 3 + 1]; // _NAMEx functions return E[BCD]X
  1454. cpuid_class ops[] = { CPUID_EXTENDED_CPU_NAME1,
  1455. CPUID_EXTENDED_CPU_NAME2,
  1456. CPUID_EXTENDED_CPU_NAME3 };
  1457. uint32_t maxlevel;
  1458. int hadspace;
  1459. unsigned z;
  1460. if((maxlevel = identify_extended_cpuid()) < CPUID_EXTENDED_CPU_NAME3){
  1461. return -1;
  1462. }
  1463. // We want to remove duplicate and leading spaces (not localized white-
  1464. // space, but ASCII 0x20 (SP). Do *not* use isspace(), etc).
  1465. hadspace = 1;
  1466. aname = brandname;
  1467. for(z = 0 ; z < sizeof(ops) / sizeof(*ops) ; ++z){
  1468. uint32_t gpregs[4];
  1469. unsigned y;
  1470. cpuid(ops[z],0,gpregs);
  1471. for(y = 0 ; y < sizeof(gpregs) / sizeof(*gpregs) ; ++y){
  1472. unsigned x;
  1473. for(x = 0 ; x < 4 ; ++x){
  1474. char c = ((const char *)(gpregs + y))[x];
  1475. if(c != ' ' || !hadspace){
  1476. *aname++ = c;
  1477. hadspace = c == ' ';
  1478. }
  1479. }
  1480. }
  1481. }
  1482. *aname = '\0';
  1483. if((cpudesc->strdescription = strdup(brandname)) == NULL){
  1484. return -1;
  1485. }
  1486. return 0;
  1487. }
  1488. static int
  1489. id_x86_topology(uint32_t maxfunc,const struct feature_flags *ff,torque_cput *cpu){
  1490. uint32_t gpregs[4];
  1491. if(maxfunc < CPUID_CPU_VERSION){
  1492. return -1;
  1493. }
  1494. cpuid(CPUID_CPU_VERSION,0,gpregs);
  1495. // http://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration/
  1496. // EBX[23..16] is "the maximum number of addressable ID's that can be
  1497. // assigned to logical processors in a physical package." Also: "For
  1498. // processors that report CPUID.1:EBX[23:16] as reserved (i.e. 0), the
  1499. // processor supports only one level of topology." EBX[23:16] on AMD is
  1500. // "LogicalProcessorCount", *iff* CPUID1.EDX[HTT] is 1 FIXME.
  1501. if(ff->ht){
  1502. if((cpu->threadspercore = (gpregs[1] >> 16u) & 0xffu) == 0){
  1503. return -1;
  1504. }
  1505. }else{
  1506. cpu->threadspercore = 1; // can't have 0 threads
  1507. }
  1508. // Round it to the nearest >= power of 2...
  1509. while(cpu->threadspercore & (cpu->threadspercore - 1)){
  1510. ++cpu->threadspercore;
  1511. }
  1512. // ...then divide by EAX[31:26] with CPUID_STANDARD_CACHECONF. We
  1513. // do this in id_intel_caches(), if it's available.
  1514. return 0;
  1515. }
  1516. // CPUID function 80000001 feature flags
  1517. // ECX results
  1518. #define CPUID_FMA4 0x00010000u // bit 16
  1519. #define CPUID_WDT 0x00001000u
  1520. #define CPUID_XOP 0x00000800u // bit 11
  1521. #define CPUID_IBS 0x00000400u
  1522. #define CPUID_OSVW 0x00000200u
  1523. #define CPUID_3DNOWPREFETCH 0x00000100u
  1524. #define CPUID_MISALIGNSSE 0x00000080u // bit 7
  1525. #define CPUID_SSE4A 0x00000040u
  1526. #define CPUID_ABM 0x00000020u
  1527. #define CPUID_ALTMOVCR8 0x00000010u
  1528. #define CPUID_EXTAPICSPACE 0x00000008u // bit 3
  1529. #define CPUID_SVM 0x00000004u
  1530. #define CPUID_CMPLEGACY 0x00000002u
  1531. #define CPUID_CLAHFSAHF 0x00000001u
  1532. // EDX results (taken from AMD manual, need crosschecking with Intel FIXME)
  1533. #define CPUID_3DNOW 0x80000000u // 3D-Now
  1534. #define CPUID_3DNOWEXT 0x40000000u // 3D-Now extensions
  1535. #define CPUID_LME 0x20000000u // long-mode enable
  1536. #define CPUID_RDTSCP 0x08000000u // read tscp
  1537. #define CPUID_1GB 0x04000000u // 1GB/4-level pages
  1538. static inline int
  1539. x86apic(unsigned maxlevel,uint32_t *apic){
  1540. uint32_t gpregs[4],lev;
  1541. // CPUID1.EBX[31:24] is the local APIC on Intel and AMD
  1542. if(maxlevel < CPUID_CPU_VERSION){
  1543. return -1; // FIXME any other way to get local APIC?
  1544. }
  1545. cpuid(CPUID_CPU_VERSION,0,gpregs);
  1546. *apic = (gpregs[1] >> 24u) & 0xffu; // 8-bit legacy APIC
  1547. // AMD doesn't have extended APIC as of 25481-Revision 2.28 FIXME
  1548. // (but does by the time of 25481 Rev 2.34, 2010-09)
  1549. if(maxlevel < CPUID_STANDARD_TOPOLOGY){ // We only have legacy APIC
  1550. return 0;
  1551. }
  1552. cpuid(CPUID_STANDARD_TOPOLOGY,0,gpregs);
  1553. // EDX holds the 32-bit Extended APIC. Last 8 bits ought equal legacy.
  1554. if((gpregs[3] & 0xff) != *apic){
  1555. fprintf(stderr, "apic nonmatch %u != %u\n", gpregs[3], *apic);
  1556. return -1;
  1557. }
  1558. fprintf(stderr, "apic nmatch %u == %u\n", gpregs[3], *apic);
  1559. *apic = gpregs[3];
  1560. // ECX[15..8] holds "Level type": 0 == invalid, 1 == thread, 2 == core
  1561. while( (lev = (gpregs[2] >> 8u) & 0xffu) ){
  1562. switch(lev){
  1563. case 0x2: // core
  1564. break;
  1565. case 0x1: // thread
  1566. break;
  1567. default:
  1568. return -1;
  1569. }
  1570. cpuid(CPUID_STANDARD_TOPOLOGY,++lev,gpregs);
  1571. if(gpregs[3] != *apic){
  1572. return -1;
  1573. }
  1574. }
  1575. return 0;
  1576. }
  1577. static int
  1578. id_amd_topology(uint32_t maxfunc, const struct feature_flags *ff,
  1579. torque_cput *cpu){
  1580. unsigned apiccorebits;
  1581. uint32_t gpregs[4];
  1582. cpu->coresperpackage = 1;
  1583. if(maxfunc < CPUID_EXTENDED_LMADDRICORE){
  1584. return id_x86_topology(maxfunc,ff,cpu);
  1585. }
  1586. cpuid(CPUID_EXTENDED_LMADDRICORE,0,gpregs);
  1587. if( (apiccorebits = ((gpregs[2] >> 12u) & 0xf)) ){
  1588. unsigned z = apiccorebits;
  1589. do{
  1590. cpu->coresperpackage <<= 1u;
  1591. }while(--z);
  1592. }else{
  1593. // cores per processor = ECX[7:0] + 1 ("NC")
  1594. cpu->coresperpackage = (gpregs[2] & 0xff) + 1;
  1595. }
  1596. // CPUID.80000001 ECX[1] is CmpLegacy. LogicalProcessorCount is
  1597. // reserved when CmpLegacy is set, or HTT is 0.
  1598. if(!(gpregs[3] & CPUID_CMPLEGACY)){
  1599. return id_x86_topology(maxfunc,ff,cpu);
  1600. }
  1601. cpu->threadspercore = 1;
  1602. if(x86apic(maxfunc, &cpu->spec.x86.apic)){
  1603. return -1;
  1604. }
  1605. return 0;
  1606. }
  1607. static int
  1608. id_intel_topology(uint32_t maxfunc,const struct feature_flags *ff,torque_cput *cpu){
  1609. uint32_t gpregs[4];
  1610. if(maxfunc < CPUID_STANDARD_TOPOLOGY){
  1611. return id_x86_topology(maxfunc,ff,cpu);
  1612. }
  1613. cpuid(CPUID_STANDARD_TOPOLOGY,0,gpregs);
  1614. if(((gpregs[2] >> 8u) & 0xffu) != 1u){
  1615. return -1;
  1616. }
  1617. if((cpu->threadspercore = (gpregs[1] & 0xffffu)) == 0){
  1618. return -1;
  1619. }
  1620. cpuid(CPUID_STANDARD_TOPOLOGY,1,gpregs);
  1621. if(((gpregs[2] >> 8u) & 0xffu) != 2u){
  1622. return -1;
  1623. }
  1624. if((cpu->coresperpackage = (gpregs[1] & 0xffffu)) == 0){
  1625. return -1;
  1626. }
  1627. if(x86apic(maxfunc, &cpu->spec.x86.apic)){
  1628. return -1;
  1629. }
  1630. return 0;
  1631. }
  1632. // CPUID function 00000001 feature flags
  1633. // ECX feature flags
  1634. #define FFLAG_SSE3 0x00000001u // bit 0
  1635. #define FFLAG_SSE4A 0x00000040u // bit 6
  1636. #define FFLAG_SSSE3 0x00000200u // bit 9
  1637. #define FFLAG_DCA 0x00040000u // bit 18
  1638. #define FFLAG_SSE41 0x00080000u // bit 19
  1639. #define FFLAG_SSE42 0x00100000u // bit 20
  1640. #define FFLAG_X2APIC 0x00200000u // bit 21
  1641. #define FFLAG_MOVBE 0x00400000u // bit 22, MOVBE instruction
  1642. #define FFLAG_POPCNT 0x00800000u // bit 23, POPCNT instruction
  1643. #define FFLAG_AES 0x02000000u // bit 25, AESni instructions
  1644. #define FFLAG_XSAVE 0x04000000u // bit 26, XSAVE/XRSTOR/X[SG]ETBV
  1645. #define FFLAG_AVX 0x10000000u // bit 28, AVX
  1646. #define FFLAG_RDRAND 0x40000000u // bit 30, RDRAND
  1647. // EDX feature flags
  1648. #define FFLAG_PSE 0x00000008u // bit 3
  1649. #define FFLAG_PAE 0x00000040u // bit 6
  1650. #define FFLAG_PSE36 0x00020000u // bit 17
  1651. #define FFLAG_MMX 0x00800000u // bit 23
  1652. #define FFLAG_SSE 0x02000000u // bit 25
  1653. #define FFLAG_SSE2 0x04000000u // bit 26
  1654. #define FFLAG_HT 0x10000000u // bit 28
  1655. static int
  1656. x86_getprocsig(uint32_t maxfunc,x86_details *cpu,struct feature_flags *ff){
  1657. uint32_t gpregs[4],maxex;
  1658. if(maxfunc < CPUID_CPU_VERSION){
  1659. return -1;
  1660. } // CPUID1.EAX is the same on Intel and AMD
  1661. cpuid(CPUID_CPU_VERSION,0,gpregs);
  1662. cpu->stepping = gpregs[0] & 0xfu; // Stepping: EAX[3..0]
  1663. cpu->x86type = (gpregs[0] >> 12u) & 0x2u; // Processor type: EAX[13..12]
  1664. // Extended model is EAX[19..16]. Model is EAX[7..4].
  1665. cpu->model = ((gpregs[0] >> 12u) & 0xf0u) | ((gpregs[0] >> 4u) & 0xfu);
  1666. // Extended family is EAX[27..20]. Family is EAX[11..8].
  1667. cpu->family = ((gpregs[0] >> 17u) & 0x7f8u) | ((gpregs[0] >> 8u) & 0xfu);
  1668. memset(ff,0,sizeof(*ff));
  1669. cpu->features.sse3 = !!(gpregs[2] & FFLAG_SSE3);
  1670. cpu->features.ssse3 = !!(gpregs[2] & FFLAG_SSSE3);
  1671. cpu->features.sse41 = !!(gpregs[2] & FFLAG_SSE41);
  1672. cpu->features.sse42 = !!(gpregs[2] & FFLAG_SSE42);
  1673. cpu->features.sse4a = !!(gpregs[2] & FFLAG_SSE4A);
  1674. ff->dca = !!(gpregs[2] & FFLAG_DCA);
  1675. ff->x2apic = !!(gpregs[2] & FFLAG_X2APIC);
  1676. ff->pse = !!(gpregs[3] & FFLAG_PSE);
  1677. ff->pae = !!(gpregs[3] & FFLAG_PAE);
  1678. ff->pse36 = !!(gpregs[3] & FFLAG_PSE36);
  1679. ff->ht = !!(gpregs[3] & FFLAG_HT);
  1680. cpu->features.mmx = !!(gpregs[3] & FFLAG_MMX);
  1681. cpu->features.sse = !!(gpregs[3] & FFLAG_SSE);
  1682. cpu->features.sse2 = !!(gpregs[3] & FFLAG_SSE2);
  1683. if((maxex = identify_extended_cpuid()) >= CPUID_EXTENDED_CPU_VERSION){
  1684. cpuid(CPUID_EXTENDED_CPU_VERSION,0,gpregs);
  1685. cpu->features.xop = !!(gpregs[2] & CPUID_XOP);
  1686. cpu->features.fma4 = !!(gpregs[2] & CPUID_FMA4);
  1687. ff->lme = !!(gpregs[3] & CPUID_LME);
  1688. ff->gbpt = !!(gpregs[3] & CPUID_1GB);
  1689. }
  1690. /*printf("LME: %d GBPT: %d\n",ff->lme,ff->gbpt);
  1691. printf("DCA: %d X2APIC: %d\n",ff->dca,ff->x2apic);
  1692. printf("PSE: %d PAE: %d PSE36: %d HT: %d\n",ff->pse,ff->pae,ff->pse36,ff->ht);*/
  1693. return 0;
  1694. }
  1695. static int
  1696. x86topology(const torque_cput *cpu, unsigned *thread,
  1697. unsigned *core, unsigned *pkg){
  1698. unsigned tpc, cpp;
  1699. uint32_t apic;
  1700. *core = 0;
  1701. *thread = 0;
  1702. if((tpc = cpu->threadspercore) == 0){
  1703. return -1;
  1704. }
  1705. if((cpp = cpu->coresperpackage) == 0){
  1706. return -1;
  1707. }
  1708. apic = cpu->spec.x86.apic;
  1709. *thread = apic & (tpc - 1);
  1710. while( (tpc /= 2) ){
  1711. apic >>= 1;
  1712. }
  1713. *core = apic & (cpp - 1);
  1714. while( (cpp /= 2) ){
  1715. apic >>= 1;
  1716. }
  1717. *pkg = apic;
  1718. return 0;
  1719. }
  1720. // Before this is called, pin to the desired processor (FIXME enforce?). Relies
  1721. // on the caller to free data upon error.
  1722. int x86cpuid(torque_cput *cpudesc, unsigned *thread, unsigned *core, unsigned *pkg){
  1723. const known_x86_vendor *vendor;
  1724. struct feature_flags ff;
  1725. uint32_t gpregs[4];
  1726. unsigned maxlevel;
  1727. cpudesc->elements = 0;
  1728. cpudesc->tlbdescs = NULL;
  1729. cpudesc->memdescs = NULL;
  1730. cpudesc->strdescription = NULL;
  1731. cpudesc->tlbs = cpudesc->memories = 0;
  1732. memset(&cpudesc->spec, 0, sizeof(cpudesc->spec));
  1733. cpudesc->spec.x86.x86type = PROCESSOR_X86_UNKNOWN;
  1734. cpudesc->threadspercore = cpudesc->coresperpackage = 0;
  1735. if(!cpuid_available()){
  1736. return -1;
  1737. }
  1738. cpuid(CPUID_MAX_SUPPORT, 0, gpregs);
  1739. maxlevel = gpregs[0];
  1740. if((vendor = lookup_vendor(gpregs + 1)) == NULL){
  1741. return -1;
  1742. }
  1743. if(x86_getprocsig(maxlevel, &cpudesc->spec.x86, &ff)){
  1744. return -1;
  1745. }
  1746. if(vendor->topfxn && vendor->topfxn(maxlevel, &ff, cpudesc)){
  1747. return -1;
  1748. }
  1749. if(vendor->memfxn(maxlevel, &ff, cpudesc)){
  1750. return -1;
  1751. }
  1752. if(x86_getbrandname(cpudesc)){
  1753. return -1;
  1754. }
  1755. if(x86topology(cpudesc, thread, core, pkg)){
  1756. return -1;
  1757. }
  1758. cpudesc->isa = TORQUE_ISA_X86;
  1759. return 0;
  1760. }