PageRenderTime 24ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/atlas/src/atlas-3.8.3/CONFIG/src/backend/archinfo_x86.c

https://github.com/numpy/vendor
C | 495 lines | 362 code | 24 blank | 109 comment | 56 complexity | a9467f69fdb1552a88114200c83765a1 MD5 | raw file
  1. /*
  2. * Automatically Tuned Linear Algebra Software v@(ver)
  3. * (C) Copyright 2006 R. Clint Whaley
  4. *
  5. * Code contributers : R. Clint Whaley, Dean Gaudet
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions, and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. * 3. The name of the ATLAS group or the names of its contributers may
  16. * not be used to endorse or promote products derived from this
  17. * software without specific written permission.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  21. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
  23. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29. * POSSIBILITY OF SUCH DAMAGE.
  30. *
  31. */
  32. /*
  33. * This code written for ATLAS use by R. Clint Whaley based on code and info
  34. * submitted by Dean Gaudet, with the later help of the following websites:
  35. * http://www.sandpile.org/ia32/cpuid.htm
  36. * http://en.wikipedia.org/wiki/CPUID
  37. */
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include <stdlib.h>
  41. #include "atlconf.h"
  42. #define uint unsigned int
  43. /*
  44. * This routine returns the contents of registers set by the cpuid instruction
  45. * in the array res:
  46. * res[0] : eax
  47. * res[1] : ebx
  48. * res[2] : ecx
  49. * res[3] : edx
  50. */
  51. void do_cpuid(uint *res, uint level);
  52. /* result defines */
  53. #define EAX 0
  54. #define EBX 1
  55. #define ECX 2
  56. #define EDX 3
  57. /* My driver, based on Dean's */
  58. int ProbeArch(char *vendor, unsigned *family, unsigned *model, int *x86_64)
  59. /*
  60. * Returns 0 on success, non-zero on error
  61. */
  62. {
  63. uint r[4];
  64. uint max_level;
  65. uint *vp = (uint*) vendor;
  66. *x86_64 = 0;
  67. /*
  68. * In this call, we ask for max supported cpuid support, and return if
  69. * we can't get any usuable info. Also sets ebx,edx and ecx (16 chars of data)
  70. * to vendor ID string
  71. */
  72. do_cpuid(r, 0);
  73. max_level = r[EAX];
  74. if (!max_level)
  75. return(1);
  76. /*
  77. * Copy vendor string as 3 ints rather than 16 char, then null-term at 12
  78. */
  79. *vp = r[EBX];
  80. vp[1] = r[EDX];
  81. vp[2] = r[ECX];
  82. vendor[12] = '\0';
  83. /*
  84. * Find processor family and model, ouput EAX
  85. * According to latest docs, extended family and model should always be
  86. * added in, not just in the cases shown in the commented-out if statements
  87. * below. The original "only do it in certain cases" was from the official
  88. * IA32 ISA, but doing this causes problems on Xeons, so now we do like the
  89. * newer docs indicate and always add the extended values in
  90. */
  91. do_cpuid(r, 1);
  92. *family = (r[EAX] >> 8) & 0xf; /* base family in bits 11-8 */
  93. /* if (*family == 0xf || *family == 0) */ /* extended family is added in */
  94. *family += ((r[EAX] >> 20) & 0xff);
  95. *model = (r[0] >> 4) & 0xf; /* model in bits 7-4 */
  96. /* if (*model == 0xf) */ /* extended model is concatenated */
  97. *model |= ((r[0] >> 12) & 0xf0);
  98. /*
  99. * Find out if we have extended cpuid level, and if so, see if we've got
  100. * x86-64 capability or not
  101. */
  102. do_cpuid(r, 0x80000000);
  103. if (r[0] >= 0x80000001)
  104. {
  105. do_cpuid(r, 0x80000001);
  106. *x86_64 = (r[EDX] & (1<<29)) != 0; /* x86-64 in bit 29 */
  107. }
  108. return(0);
  109. }
  110. /*
  111. * constants used to check family + extended family
  112. */
  113. #define EF_486 4 /* also AMD 5x86 and Cyrix 5x86 */
  114. #define EF_P5 5 /* P5, K5 and K6 */
  115. #define EF_P6 6 /* P6, Core and K7 (athlon) */
  116. #define EF_ITAN0 7 /* Itanium */
  117. #define EF_K8_P4_EFF 0x00F /* P4, Hammer, Efficien */
  118. #define EF_K8_ITAN 0x01F /* Hammer, Itanium */
  119. #define EF_K8 0x02F /* Hammer */
  120. #define EF_ITAN 0x020 /* Itanium */
  121. #define EF_K8b 16 /* 3rd gen opteron */
  122. enum FAM {ERR, /* cannot decipher */
  123. i486, /* 486 & AMD 5x86 and Cyrix 5x86 */
  124. P5, /* Original Pentium and AMD K5 & K6 */
  125. P6, /* Intel PIII, Core and AMD K7 (orig athlon) */
  126. P7, /* Intel P4, AMD hammer, Efficeon */
  127. P8B, /* 3rd generation hammer */
  128. ITAN}; /* Intel Itanium */
  129. enum FAM GetFamily(int efam) /* efam = (family+ext fam) from cpuid */
  130. /*
  131. * Translates CPUID (family+extended family) to FAM enum type
  132. */
  133. {
  134. enum FAM iret;
  135. switch (efam)
  136. {
  137. case EF_486: /* also AMD 5x86 and Cyrix 5x86 */
  138. iret = i486;
  139. break;
  140. case EF_P5: /* P5, K5 and K6 */
  141. iret = P5;
  142. break;
  143. case EF_P6: /* P6, Core and K7 (athlon) */
  144. iret = P6;
  145. break;
  146. case EF_K8_P4_EFF: /* P4, Hammer, Efficien */
  147. iret = P7;
  148. break;
  149. case EF_K8_ITAN: /* Hammer, Itanium */
  150. case EF_K8: /* Hammer */
  151. iret = P7;
  152. break;
  153. case EF_K8b:
  154. iret = P8B;
  155. break;
  156. case EF_ITAN: /* Itanium */
  157. case EF_ITAN0: /* Itanium */
  158. iret = ITAN;
  159. break;
  160. default:
  161. iret = ERR;
  162. }
  163. return (iret);
  164. }
  165. enum VEND {VERR, Intel, AMD, TM};
  166. enum VEND str2vend(char *vendor)
  167. /*
  168. * Translates vendor string to enum type
  169. */
  170. {
  171. enum VEND iret;
  172. if (strstr(vendor, "GenuineIntel") != NULL)
  173. iret = Intel;
  174. else if (strstr(vendor, "AuthenticAMD") != NULL)
  175. iret = AMD;
  176. else if (strstr(vendor, "GenuineTMx86") != NULL)
  177. iret = TM;
  178. else
  179. iret = VERR;
  180. return(iret);
  181. }
  182. /*
  183. * Specific chip (family, but disambiguated using vendor string
  184. */
  185. enum CHIP {CERR, Pentium, IntP6, Pentium4, Itanium, K7, Hammer, HammerB,
  186. Crusoe, Efficeon};
  187. enum CHIP Family2Chip(char *vendor, enum FAM family)
  188. /*
  189. * Disambiguates family based on vendor string
  190. */
  191. {
  192. enum CHIP iret=CERR;
  193. enum VEND ivend;
  194. /*
  195. * Figure out the vendor
  196. */
  197. ivend = str2vend(vendor);
  198. if (ivend == VERR)
  199. return(CERR);
  200. switch(family)
  201. {
  202. case i486: /* 486 & AMD 5x86 and Cyrix 5x86; unsupported */
  203. break;
  204. case P5: /* Original Pentium and AMD K5 & K6 */
  205. if (ivend == Intel)
  206. iret = Pentium;
  207. break;
  208. case P6: /* Intel PIII, Core and AMD K7 (orig athlon) */
  209. if (ivend == Intel)
  210. iret = IntP6;
  211. else if (ivend == AMD)
  212. iret = K7;
  213. else if (ivend == TM)
  214. iret = Crusoe;
  215. break;
  216. case P7: /* Intel P4, AMD hammer, Efficeon */
  217. if (ivend == Intel)
  218. iret = Pentium4;
  219. else if (ivend == AMD)
  220. iret = Hammer;
  221. else if (ivend == TM)
  222. iret = Efficeon;
  223. break;
  224. case P8B:
  225. if (ivend == AMD)
  226. iret = HammerB;
  227. break;
  228. case ITAN: /* Intel Itanium */
  229. iret = Itanium;
  230. break;
  231. default:
  232. iret = CERR;
  233. }
  234. return(iret);
  235. }
  236. enum MACHTYPE Chip2Mach(enum CHIP chip, int model, int x8664)
  237. /*
  238. * translates chip and cpuid's model to config's machine enum
  239. */
  240. {
  241. enum MACHTYPE iret=MACHOther;
  242. switch(chip)
  243. {
  244. case Pentium:
  245. switch(model)
  246. {
  247. case 1:
  248. iret = IntP5;
  249. break;
  250. case 4:
  251. case 8:
  252. iret = IntP5MMX;
  253. break;
  254. default:
  255. iret = MACHOther;
  256. }
  257. break;
  258. case IntP6: /* includes PPRO, PII, PIII, Core and Pentium-M */
  259. switch(model)
  260. {
  261. case 0:
  262. case 1:
  263. iret = IntPPRO;
  264. break;
  265. case 3:
  266. case 5:
  267. case 6:
  268. iret = IntPII;
  269. break;
  270. case 7:
  271. case 8:
  272. case 10:
  273. case 11:
  274. iret = IntPIII;
  275. break;
  276. case 9:
  277. case 13:
  278. iret = IntPM;
  279. break;
  280. case 14:
  281. iret = IntCoreDuo;
  282. break;
  283. case 15:
  284. case 23:
  285. case 29:
  286. iret = IntCore2;
  287. break;
  288. case 26:
  289. iret = IntCorei7;
  290. break;
  291. default:
  292. iret = MACHOther;
  293. }
  294. break;
  295. case Pentium4:
  296. switch(model)
  297. {
  298. case 0:
  299. case 1:
  300. case 2:
  301. iret = IntP4;
  302. break;
  303. case 3:
  304. case 4:
  305. iret = IntP4E;
  306. break;
  307. default:
  308. iret = MACHOther;
  309. }
  310. break;
  311. case Itanium:
  312. switch(model)
  313. {
  314. case 7:
  315. iret = IA64Itan;
  316. break;
  317. case 0x1F:
  318. iret = IA64Itan2;
  319. break;
  320. default:
  321. iret = MACHOther;
  322. }
  323. break;
  324. case K7:
  325. switch(model)
  326. {
  327. case 4:
  328. case 6:
  329. case 8:
  330. case 10:
  331. iret = AmdAthlon;
  332. break;
  333. default:
  334. iret = MACHOther;
  335. }
  336. break;
  337. case Hammer:
  338. iret = AmdHammer;
  339. break;
  340. case HammerB:
  341. iret = Amd64K10h;
  342. break;
  343. case Efficeon:
  344. iret = TMEff;
  345. break;
  346. case Crusoe: /* unsupported */
  347. default:
  348. iret = MACHOther;
  349. }
  350. return(iret);
  351. }
  352. void PrintUsage(char *name, int i)
  353. {
  354. fprintf(stderr, "USAGE: %s -v (verb) -b (@ bits) -a (arch) -n (ncpu) -c <ncache> -C <lvl> (cache size) -m (Mhz) -t (cpu throttling)\n", name);
  355. exit(i);
  356. }
  357. int GetFlags(int nargs, char **args, int *CacheLevel)
  358. {
  359. int i, flag = 0;
  360. *CacheLevel = 0;
  361. for (i=1; i < nargs; i++)
  362. {
  363. if (args[i][0] != '-') PrintUsage(args[0], i);
  364. switch(args[i][1])
  365. {
  366. case 'n':
  367. flag |= Pncpu;
  368. break;
  369. case 'c':
  370. flag |= Pncache;
  371. break;
  372. case 'C':
  373. if (++i > nargs)
  374. PrintUsage(args[0], i);
  375. *CacheLevel = atoi(args[i]);
  376. break;
  377. case 'v':
  378. flag |= Pverb;
  379. break;
  380. case 'm':
  381. flag |= PMhz;
  382. break;
  383. case 'a':
  384. flag |= Parch;
  385. break;
  386. case 'b':
  387. flag |= P64;
  388. break;
  389. case 't':
  390. flag |= Pthrottle;
  391. break;
  392. default:
  393. PrintUsage(args[0], i);
  394. }
  395. }
  396. if (!flag)
  397. flag = Parch | P64;
  398. return(flag);
  399. }
  400. main(int nargs, char **args)
  401. {
  402. int ierr, x86_64, flags, CacheLevel;
  403. unsigned family, model;
  404. char *cpu="UNKNOWN", vendor[13];
  405. enum FAM fam;
  406. enum CHIP chip;
  407. enum MACHTYPE mach;
  408. flags = GetFlags(nargs, args, &CacheLevel);
  409. cpu = NULL;
  410. vendor[0] = '\0';
  411. ierr = ProbeArch(vendor, &family, &model, &x86_64);
  412. /*
  413. * If ProbeArch worked, translate vendor+family+model to ATLAS config-name
  414. */
  415. if (!ierr)
  416. {
  417. fam = GetFamily(family);
  418. if (fam)
  419. {
  420. chip = Family2Chip(vendor, fam);
  421. if (chip)
  422. {
  423. mach = Chip2Mach(chip, model, x86_64);
  424. if (!mach) ierr = 300;
  425. }
  426. else ierr = 200;
  427. }
  428. else ierr = 100;
  429. }
  430. if (ierr)
  431. {
  432. fprintf(stderr, "ERROR: enum fam=%d, chip=%d, mach=%d\n",
  433. fam, chip, mach);
  434. printf("ERROR %d: vendor='%s', family=%d, model=%d, x86_64=%d\n",
  435. ierr, vendor, family, model, x86_64);
  436. }
  437. else
  438. {
  439. /*
  440. * If verbatim set, print strings as well as enums
  441. */
  442. if (flags & Parch)
  443. {
  444. if (flags & Pverb)
  445. printf("cpu: %s\n", machnam[mach]);
  446. printf("MACHTYPE=%d\n", mach);
  447. }
  448. if (flags & P64)
  449. printf("PTR BITS=%d\n", x86_64 ? 64 : 32);
  450. /*
  451. * Not sure how to detect this. cpuid has some features that might work,
  452. * will need to experiment later
  453. */
  454. if (flags & Pthrottle)
  455. printf("CPU THROTTLE=0\n");
  456. /*
  457. * These guys can't be supported by cpuid, AFAIK
  458. */
  459. if ((flags & PMhz) || (flags & Pncpu))
  460. printf("Mhz/ncpu=0\n");
  461. /*
  462. * Cache info could be returned, but I'm lazy, so don't
  463. */
  464. if ((flags & Pncache) || (flags & PCacheSize))
  465. printf("ncache/CacheSize=0\n");
  466. if ((flags & (~Pverb)) == 0)
  467. printf("family=%d, model=%d, cpu='%s', Ptr bits=%d, arch#=%d\n",
  468. family, model, machnam[mach], x86_64?64:32, mach);
  469. }
  470. exit(ierr);
  471. }