/drivers/idle/intel_idle.c

http://github.com/mirrors/linux · C · 1695 lines · 1331 code · 178 blank · 186 comment · 80 complexity · 7418e14309f87367c81f21d4387d3d8a MD5 · raw file

  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * intel_idle.c - native hardware idle loop for modern Intel processors
  4. *
  5. * Copyright (c) 2013 - 2020, Intel Corporation.
  6. * Len Brown <len.brown@intel.com>
  7. * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  8. */
  9. /*
  10. * intel_idle is a cpuidle driver that loads on specific Intel processors
  11. * in lieu of the legacy ACPI processor_idle driver. The intent is to
  12. * make Linux more efficient on these processors, as intel_idle knows
  13. * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
  14. */
  15. /*
  16. * Design Assumptions
  17. *
  18. * All CPUs have same idle states as boot CPU
  19. *
  20. * Chipset BM_STS (bus master status) bit is a NOP
  21. * for preventing entry into deep C-stats
  22. */
  23. /*
  24. * Known limitations
  25. *
  26. * ACPI has a .suspend hack to turn off deep c-statees during suspend
  27. * to avoid complications with the lapic timer workaround.
  28. * Have not seen issues with suspend, but may need same workaround here.
  29. *
  30. */
  31. /* un-comment DEBUG to enable pr_debug() statements */
  32. #define DEBUG
  33. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  34. #include <linux/acpi.h>
  35. #include <linux/kernel.h>
  36. #include <linux/cpuidle.h>
  37. #include <linux/tick.h>
  38. #include <trace/events/power.h>
  39. #include <linux/sched.h>
  40. #include <linux/notifier.h>
  41. #include <linux/cpu.h>
  42. #include <linux/moduleparam.h>
  43. #include <asm/cpu_device_id.h>
  44. #include <asm/intel-family.h>
  45. #include <asm/mwait.h>
  46. #include <asm/msr.h>
  47. #define INTEL_IDLE_VERSION "0.5.1"
  48. static struct cpuidle_driver intel_idle_driver = {
  49. .name = "intel_idle",
  50. .owner = THIS_MODULE,
  51. };
  52. /* intel_idle.max_cstate=0 disables driver */
  53. static int max_cstate = CPUIDLE_STATE_MAX - 1;
  54. static unsigned int disabled_states_mask;
  55. static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
  56. static unsigned long auto_demotion_disable_flags;
  57. static bool disable_promotion_to_c1e;
  58. static bool lapic_timer_always_reliable;
  59. struct idle_cpu {
  60. struct cpuidle_state *state_table;
  61. /*
  62. * Hardware C-state auto-demotion may not always be optimal.
  63. * Indicate which enable bits to clear here.
  64. */
  65. unsigned long auto_demotion_disable_flags;
  66. bool byt_auto_demotion_disable_flag;
  67. bool disable_promotion_to_c1e;
  68. bool use_acpi;
  69. };
  70. static const struct idle_cpu *icpu __initdata;
  71. static struct cpuidle_state *cpuidle_state_table __initdata;
  72. static unsigned int mwait_substates __initdata;
  73. /*
  74. * Enable this state by default even if the ACPI _CST does not list it.
  75. */
  76. #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
  77. /*
  78. * Set this flag for states where the HW flushes the TLB for us
  79. * and so we don't need cross-calls to keep it consistent.
  80. * If this flag is set, SW flushes the TLB, so even if the
  81. * HW doesn't do the flushing, this flag is safe to use.
  82. */
  83. #define CPUIDLE_FLAG_TLB_FLUSHED BIT(16)
  84. /*
  85. * MWAIT takes an 8-bit "hint" in EAX "suggesting"
  86. * the C-state (top nibble) and sub-state (bottom nibble)
  87. * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
  88. *
  89. * We store the hint at the top of our "flags" for each state.
  90. */
  91. #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
  92. #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
  93. /**
  94. * intel_idle - Ask the processor to enter the given idle state.
  95. * @dev: cpuidle device of the target CPU.
  96. * @drv: cpuidle driver (assumed to point to intel_idle_driver).
  97. * @index: Target idle state index.
  98. *
  99. * Use the MWAIT instruction to notify the processor that the CPU represented by
  100. * @dev is idle and it can try to enter the idle state corresponding to @index.
  101. *
  102. * If the local APIC timer is not known to be reliable in the target idle state,
  103. * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
  104. *
  105. * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to
  106. * flushing user TLBs.
  107. *
  108. * Must be called under local_irq_disable().
  109. */
  110. static __cpuidle int intel_idle(struct cpuidle_device *dev,
  111. struct cpuidle_driver *drv, int index)
  112. {
  113. struct cpuidle_state *state = &drv->states[index];
  114. unsigned long eax = flg2MWAIT(state->flags);
  115. unsigned long ecx = 1; /* break on interrupt flag */
  116. bool uninitialized_var(tick);
  117. int cpu = smp_processor_id();
  118. /*
  119. * leave_mm() to avoid costly and often unnecessary wakeups
  120. * for flushing the user TLB's associated with the active mm.
  121. */
  122. if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
  123. leave_mm(cpu);
  124. if (!static_cpu_has(X86_FEATURE_ARAT) && !lapic_timer_always_reliable) {
  125. /*
  126. * Switch over to one-shot tick broadcast if the target C-state
  127. * is deeper than C1.
  128. */
  129. if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) {
  130. tick = true;
  131. tick_broadcast_enter();
  132. } else {
  133. tick = false;
  134. }
  135. }
  136. mwait_idle_with_hints(eax, ecx);
  137. if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
  138. tick_broadcast_exit();
  139. return index;
  140. }
  141. /**
  142. * intel_idle_s2idle - Ask the processor to enter the given idle state.
  143. * @dev: cpuidle device of the target CPU.
  144. * @drv: cpuidle driver (assumed to point to intel_idle_driver).
  145. * @index: Target idle state index.
  146. *
  147. * Use the MWAIT instruction to notify the processor that the CPU represented by
  148. * @dev is idle and it can try to enter the idle state corresponding to @index.
  149. *
  150. * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
  151. * scheduler tick and suspended scheduler clock on the target CPU.
  152. */
  153. static __cpuidle void intel_idle_s2idle(struct cpuidle_device *dev,
  154. struct cpuidle_driver *drv, int index)
  155. {
  156. unsigned long eax = flg2MWAIT(drv->states[index].flags);
  157. unsigned long ecx = 1; /* break on interrupt flag */
  158. mwait_idle_with_hints(eax, ecx);
  159. }
  160. /*
  161. * States are indexed by the cstate number,
  162. * which is also the index into the MWAIT hint array.
  163. * Thus C0 is a dummy.
  164. */
  165. static struct cpuidle_state nehalem_cstates[] __initdata = {
  166. {
  167. .name = "C1",
  168. .desc = "MWAIT 0x00",
  169. .flags = MWAIT2flg(0x00),
  170. .exit_latency = 3,
  171. .target_residency = 6,
  172. .enter = &intel_idle,
  173. .enter_s2idle = intel_idle_s2idle, },
  174. {
  175. .name = "C1E",
  176. .desc = "MWAIT 0x01",
  177. .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
  178. .exit_latency = 10,
  179. .target_residency = 20,
  180. .enter = &intel_idle,
  181. .enter_s2idle = intel_idle_s2idle, },
  182. {
  183. .name = "C3",
  184. .desc = "MWAIT 0x10",
  185. .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
  186. .exit_latency = 20,
  187. .target_residency = 80,
  188. .enter = &intel_idle,
  189. .enter_s2idle = intel_idle_s2idle, },
  190. {
  191. .name = "C6",
  192. .desc = "MWAIT 0x20",
  193. .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
  194. .exit_latency = 200,
  195. .target_residency = 800,
  196. .enter = &intel_idle,
  197. .enter_s2idle = intel_idle_s2idle, },
  198. {
  199. .enter = NULL }
  200. };
  201. static struct cpuidle_state snb_cstates[] __initdata = {
  202. {
  203. .name = "C1",
  204. .desc = "MWAIT 0x00",
  205. .flags = MWAIT2flg(0x00),
  206. .exit_latency = 2,
  207. .target_residency = 2,
  208. .enter = &intel_idle,
  209. .enter_s2idle = intel_idle_s2idle, },
  210. {
  211. .name = "C1E",
  212. .desc = "MWAIT 0x01",
  213. .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
  214. .exit_latency = 10,
  215. .target_residency = 20,
  216. .enter = &intel_idle,
  217. .enter_s2idle = intel_idle_s2idle, },
  218. {
  219. .name = "C3",
  220. .desc = "MWAIT 0x10",
  221. .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
  222. .exit_latency = 80,
  223. .target_residency = 211,
  224. .enter = &intel_idle,
  225. .enter_s2idle = intel_idle_s2idle, },
  226. {
  227. .name = "C6",
  228. .desc = "MWAIT 0x20",
  229. .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
  230. .exit_latency = 104,
  231. .target_residency = 345,
  232. .enter = &intel_idle,
  233. .enter_s2idle = intel_idle_s2idle, },
  234. {
  235. .name = "C7",
  236. .desc = "MWAIT 0x30",
  237. .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
  238. .exit_latency = 109,
  239. .target_residency = 345,
  240. .enter = &intel_idle,
  241. .enter_s2idle = intel_idle_s2idle, },
  242. {
  243. .enter = NULL }
  244. };
  245. static struct cpuidle_state byt_cstates[] __initdata = {
  246. {
  247. .name = "C1",
  248. .desc = "MWAIT 0x00",
  249. .flags = MWAIT2flg(0x00),
  250. .exit_latency = 1,
  251. .target_residency = 1,
  252. .enter = &intel_idle,
  253. .enter_s2idle = intel_idle_s2idle, },
  254. {
  255. .name = "C6N",
  256. .desc = "MWAIT 0x58",
  257. .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
  258. .exit_latency = 300,
  259. .target_residency = 275,
  260. .enter = &intel_idle,
  261. .enter_s2idle = intel_idle_s2idle, },
  262. {
  263. .name = "C6S",
  264. .desc = "MWAIT 0x52",
  265. .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
  266. .exit_latency = 500,
  267. .target_residency = 560,
  268. .enter = &intel_idle,
  269. .enter_s2idle = intel_idle_s2idle, },
  270. {
  271. .name = "C7",
  272. .desc = "MWAIT 0x60",
  273. .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
  274. .exit_latency = 1200,
  275. .target_residency = 4000,
  276. .enter = &intel_idle,
  277. .enter_s2idle = intel_idle_s2idle, },
  278. {
  279. .name = "C7S",
  280. .desc = "MWAIT 0x64",
  281. .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
  282. .exit_latency = 10000,
  283. .target_residency = 20000,
  284. .enter = &intel_idle,
  285. .enter_s2idle = intel_idle_s2idle, },
  286. {
  287. .enter = NULL }
  288. };
  289. static struct cpuidle_state cht_cstates[] __initdata = {
  290. {
  291. .name = "C1",
  292. .desc = "MWAIT 0x00",
  293. .flags = MWAIT2flg(0x00),
  294. .exit_latency = 1,
  295. .target_residency = 1,
  296. .enter = &intel_idle,
  297. .enter_s2idle = intel_idle_s2idle, },
  298. {
  299. .name = "C6N",
  300. .desc = "MWAIT 0x58",
  301. .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
  302. .exit_latency = 80,
  303. .target_residency = 275,
  304. .enter = &intel_idle,
  305. .enter_s2idle = intel_idle_s2idle, },
  306. {
  307. .name = "C6S",
  308. .desc = "MWAIT 0x52",
  309. .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
  310. .exit_latency = 200,
  311. .target_residency = 560,
  312. .enter = &intel_idle,
  313. .enter_s2idle = intel_idle_s2idle, },
  314. {
  315. .name = "C7",
  316. .desc = "MWAIT 0x60",
  317. .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
  318. .exit_latency = 1200,
  319. .target_residency = 4000,
  320. .enter = &intel_idle,
  321. .enter_s2idle = intel_idle_s2idle, },
  322. {
  323. .name = "C7S",
  324. .desc = "MWAIT 0x64",
  325. .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
  326. .exit_latency = 10000,
  327. .target_residency = 20000,
  328. .enter = &intel_idle,
  329. .enter_s2idle = intel_idle_s2idle, },
  330. {
  331. .enter = NULL }
  332. };
  333. static struct cpuidle_state ivb_cstates[] __initdata = {
  334. {
  335. .name = "C1",
  336. .desc = "MWAIT 0x00",
  337. .flags = MWAIT2flg(0x00),
  338. .exit_latency = 1,
  339. .target_residency = 1,
  340. .enter = &intel_idle,
  341. .enter_s2idle = intel_idle_s2idle, },
  342. {
  343. .name = "C1E",
  344. .desc = "MWAIT 0x01",
  345. .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
  346. .exit_latency = 10,
  347. .target_residency = 20,
  348. .enter = &intel_idle,
  349. .enter_s2idle = intel_idle_s2idle, },
  350. {
  351. .name = "C3",
  352. .desc = "MWAIT 0x10",
  353. .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
  354. .exit_latency = 59,
  355. .target_residency = 156,
  356. .enter = &intel_idle,
  357. .enter_s2idle = intel_idle_s2idle, },
  358. {
  359. .name = "C6",
  360. .desc = "MWAIT 0x20",
  361. .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
  362. .exit_latency = 80,
  363. .target_residency = 300,
  364. .enter = &intel_idle,
  365. .enter_s2idle = intel_idle_s2idle, },
  366. {
  367. .name = "C7",
  368. .desc = "MWAIT 0x30",
  369. .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
  370. .exit_latency = 87,
  371. .target_residency = 300,
  372. .enter = &intel_idle,
  373. .enter_s2idle = intel_idle_s2idle, },
  374. {
  375. .enter = NULL }
  376. };
  377. static struct cpuidle_state ivt_cstates[] __initdata = {
  378. {
  379. .name = "C1",
  380. .desc = "MWAIT 0x00",
  381. .flags = MWAIT2flg(0x00),
  382. .exit_latency = 1,
  383. .target_residency = 1,
  384. .enter = &intel_idle,
  385. .enter_s2idle = intel_idle_s2idle, },
  386. {
  387. .name = "C1E",
  388. .desc = "MWAIT 0x01",
  389. .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
  390. .exit_latency = 10,
  391. .target_residency = 80,
  392. .enter = &intel_idle,
  393. .enter_s2idle = intel_idle_s2idle, },
  394. {
  395. .name = "C3",
  396. .desc = "MWAIT 0x10",
  397. .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
  398. .exit_latency = 59,
  399. .target_residency = 156,
  400. .enter = &intel_idle,
  401. .enter_s2idle = intel_idle_s2idle, },
  402. {
  403. .name = "C6",
  404. .desc = "MWAIT 0x20",
  405. .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
  406. .exit_latency = 82,
  407. .target_residency = 300,
  408. .enter = &intel_idle,
  409. .enter_s2idle = intel_idle_s2idle, },
  410. {
  411. .enter = NULL }
  412. };
  413. static struct cpuidle_state ivt_cstates_4s[] __initdata = {
  414. {
  415. .name = "C1",
  416. .desc = "MWAIT 0x00",
  417. .flags = MWAIT2flg(0x00),
  418. .exit_latency = 1,
  419. .target_residency = 1,
  420. .enter = &intel_idle,
  421. .enter_s2idle = intel_idle_s2idle, },
  422. {
  423. .name = "C1E",
  424. .desc = "MWAIT 0x01",
  425. .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
  426. .exit_latency = 10,
  427. .target_residency = 250,
  428. .enter = &intel_idle,
  429. .enter_s2idle = intel_idle_s2idle, },
  430. {
  431. .name = "C3",
  432. .desc = "MWAIT 0x10",
  433. .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
  434. .exit_latency = 59,
  435. .target_residency = 300,
  436. .enter = &intel_idle,
  437. .enter_s2idle = intel_idle_s2idle, },
  438. {
  439. .name = "C6",
  440. .desc = "MWAIT 0x20",
  441. .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
  442. .exit_latency = 84,
  443. .target_residency = 400,
  444. .enter = &intel_idle,
  445. .enter_s2idle = intel_idle_s2idle, },
  446. {
  447. .enter = NULL }
  448. };
  449. static struct cpuidle_state ivt_cstates_8s[] __initdata = {
  450. {
  451. .name = "C1",
  452. .desc = "MWAIT 0x00",
  453. .flags = MWAIT2flg(0x00),
  454. .exit_latency = 1,
  455. .target_residency = 1,
  456. .enter = &intel_idle,
  457. .enter_s2idle = intel_idle_s2idle, },
  458. {
  459. .name = "C1E",
  460. .desc = "MWAIT 0x01",
  461. .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
  462. .exit_latency = 10,
  463. .target_residency = 500,
  464. .enter = &intel_idle,
  465. .enter_s2idle = intel_idle_s2idle, },
  466. {
  467. .name = "C3",
  468. .desc = "MWAIT 0x10",
  469. .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
  470. .exit_latency = 59,
  471. .target_residency = 600,
  472. .enter = &intel_idle,
  473. .enter_s2idle = intel_idle_s2idle, },
  474. {
  475. .name = "C6",
  476. .desc = "MWAIT 0x20",
  477. .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
  478. .exit_latency = 88,
  479. .target_residency = 700,
  480. .enter = &intel_idle,
  481. .enter_s2idle = intel_idle_s2idle, },
  482. {
  483. .enter = NULL }
  484. };
  485. static struct cpuidle_state hsw_cstates[] __initdata = {
  486. {
  487. .name = "C1",
  488. .desc = "MWAIT 0x00",
  489. .flags = MWAIT2flg(0x00),
  490. .exit_latency = 2,
  491. .target_residency = 2,
  492. .enter = &intel_idle,
  493. .enter_s2idle = intel_idle_s2idle, },
  494. {
  495. .name = "C1E",
  496. .desc = "MWAIT 0x01",
  497. .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
  498. .exit_latency = 10,
  499. .target_residency = 20,
  500. .enter = &intel_idle,
  501. .enter_s2idle = intel_idle_s2idle, },
  502. {
  503. .name = "C3",
  504. .desc = "MWAIT 0x10",
  505. .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
  506. .exit_latency = 33,
  507. .target_residency = 100,
  508. .enter = &intel_idle,
  509. .enter_s2idle = intel_idle_s2idle, },
  510. {
  511. .name = "C6",
  512. .desc = "MWAIT 0x20",
  513. .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
  514. .exit_latency = 133,
  515. .target_residency = 400,
  516. .enter = &intel_idle,
  517. .enter_s2idle = intel_idle_s2idle, },
  518. {
  519. .name = "C7s",
  520. .desc = "MWAIT 0x32",
  521. .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
  522. .exit_latency = 166,
  523. .target_residency = 500,
  524. .enter = &intel_idle,
  525. .enter_s2idle = intel_idle_s2idle, },
  526. {
  527. .name = "C8",
  528. .desc = "MWAIT 0x40",
  529. .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
  530. .exit_latency = 300,
  531. .target_residency = 900,
  532. .enter = &intel_idle,
  533. .enter_s2idle = intel_idle_s2idle, },
  534. {
  535. .name = "C9",
  536. .desc = "MWAIT 0x50",
  537. .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
  538. .exit_latency = 600,
  539. .target_residency = 1800,
  540. .enter = &intel_idle,
  541. .enter_s2idle = intel_idle_s2idle, },
  542. {
  543. .name = "C10",
  544. .desc = "MWAIT 0x60",
  545. .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
  546. .exit_latency = 2600,
  547. .target_residency = 7700,
  548. .enter = &intel_idle,
  549. .enter_s2idle = intel_idle_s2idle, },
  550. {
  551. .enter = NULL }
  552. };
  553. static struct cpuidle_state bdw_cstates[] __initdata = {
  554. {
  555. .name = "C1",
  556. .desc = "MWAIT 0x00",
  557. .flags = MWAIT2flg(0x00),
  558. .exit_latency = 2,
  559. .target_residency = 2,
  560. .enter = &intel_idle,
  561. .enter_s2idle = intel_idle_s2idle, },
  562. {
  563. .name = "C1E",
  564. .desc = "MWAIT 0x01",
  565. .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
  566. .exit_latency = 10,
  567. .target_residency = 20,
  568. .enter = &intel_idle,
  569. .enter_s2idle = intel_idle_s2idle, },
  570. {
  571. .name = "C3",
  572. .desc = "MWAIT 0x10",
  573. .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
  574. .exit_latency = 40,
  575. .target_residency = 100,
  576. .enter = &intel_idle,
  577. .enter_s2idle = intel_idle_s2idle, },
  578. {
  579. .name = "C6",
  580. .desc = "MWAIT 0x20",
  581. .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
  582. .exit_latency = 133,
  583. .target_residency = 400,
  584. .enter = &intel_idle,
  585. .enter_s2idle = intel_idle_s2idle, },
  586. {
  587. .name = "C7s",
  588. .desc = "MWAIT 0x32",
  589. .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
  590. .exit_latency = 166,
  591. .target_residency = 500,
  592. .enter = &intel_idle,
  593. .enter_s2idle = intel_idle_s2idle, },
  594. {
  595. .name = "C8",
  596. .desc = "MWAIT 0x40",
  597. .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
  598. .exit_latency = 300,
  599. .target_residency = 900,
  600. .enter = &intel_idle,
  601. .enter_s2idle = intel_idle_s2idle, },
  602. {
  603. .name = "C9",
  604. .desc = "MWAIT 0x50",
  605. .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
  606. .exit_latency = 600,
  607. .target_residency = 1800,
  608. .enter = &intel_idle,
  609. .enter_s2idle = intel_idle_s2idle, },
  610. {
  611. .name = "C10",
  612. .desc = "MWAIT 0x60",
  613. .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
  614. .exit_latency = 2600,
  615. .target_residency = 7700,
  616. .enter = &intel_idle,
  617. .enter_s2idle = intel_idle_s2idle, },
  618. {
  619. .enter = NULL }
  620. };
  621. static struct cpuidle_state skl_cstates[] __initdata = {
  622. {
  623. .name = "C1",
  624. .desc = "MWAIT 0x00",
  625. .flags = MWAIT2flg(0x00),
  626. .exit_latency = 2,
  627. .target_residency = 2,
  628. .enter = &intel_idle,
  629. .enter_s2idle = intel_idle_s2idle, },
  630. {
  631. .name = "C1E",
  632. .desc = "MWAIT 0x01",
  633. .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
  634. .exit_latency = 10,
  635. .target_residency = 20,
  636. .enter = &intel_idle,
  637. .enter_s2idle = intel_idle_s2idle, },
  638. {
  639. .name = "C3",
  640. .desc = "MWAIT 0x10",
  641. .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
  642. .exit_latency = 70,
  643. .target_residency = 100,
  644. .enter = &intel_idle,
  645. .enter_s2idle = intel_idle_s2idle, },
  646. {
  647. .name = "C6",
  648. .desc = "MWAIT 0x20",
  649. .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
  650. .exit_latency = 85,
  651. .target_residency = 200,
  652. .enter = &intel_idle,
  653. .enter_s2idle = intel_idle_s2idle, },
  654. {
  655. .name = "C7s",
  656. .desc = "MWAIT 0x33",
  657. .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
  658. .exit_latency = 124,
  659. .target_residency = 800,
  660. .enter = &intel_idle,
  661. .enter_s2idle = intel_idle_s2idle, },
  662. {
  663. .name = "C8",
  664. .desc = "MWAIT 0x40",
  665. .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
  666. .exit_latency = 200,
  667. .target_residency = 800,
  668. .enter = &intel_idle,
  669. .enter_s2idle = intel_idle_s2idle, },
  670. {
  671. .name = "C9",
  672. .desc = "MWAIT 0x50",
  673. .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
  674. .exit_latency = 480,
  675. .target_residency = 5000,
  676. .enter = &intel_idle,
  677. .enter_s2idle = intel_idle_s2idle, },
  678. {
  679. .name = "C10",
  680. .desc = "MWAIT 0x60",
  681. .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
  682. .exit_latency = 890,
  683. .target_residency = 5000,
  684. .enter = &intel_idle,
  685. .enter_s2idle = intel_idle_s2idle, },
  686. {
  687. .enter = NULL }
  688. };
  689. static struct cpuidle_state skx_cstates[] __initdata = {
  690. {
  691. .name = "C1",
  692. .desc = "MWAIT 0x00",
  693. .flags = MWAIT2flg(0x00),
  694. .exit_latency = 2,
  695. .target_residency = 2,
  696. .enter = &intel_idle,
  697. .enter_s2idle = intel_idle_s2idle, },
  698. {
  699. .name = "C1E",
  700. .desc = "MWAIT 0x01",
  701. .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
  702. .exit_latency = 10,
  703. .target_residency = 20,
  704. .enter = &intel_idle,
  705. .enter_s2idle = intel_idle_s2idle, },
  706. {
  707. .name = "C6",
  708. .desc = "MWAIT 0x20",
  709. .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
  710. .exit_latency = 133,
  711. .target_residency = 600,
  712. .enter = &intel_idle,
  713. .enter_s2idle = intel_idle_s2idle, },
  714. {
  715. .enter = NULL }
  716. };
  717. static struct cpuidle_state atom_cstates[] __initdata = {
  718. {
  719. .name = "C1E",
  720. .desc = "MWAIT 0x00",
  721. .flags = MWAIT2flg(0x00),
  722. .exit_latency = 10,
  723. .target_residency = 20,
  724. .enter = &intel_idle,
  725. .enter_s2idle = intel_idle_s2idle, },
  726. {
  727. .name = "C2",
  728. .desc = "MWAIT 0x10",
  729. .flags = MWAIT2flg(0x10),
  730. .exit_latency = 20,
  731. .target_residency = 80,
  732. .enter = &intel_idle,
  733. .enter_s2idle = intel_idle_s2idle, },
  734. {
  735. .name = "C4",
  736. .desc = "MWAIT 0x30",
  737. .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
  738. .exit_latency = 100,
  739. .target_residency = 400,
  740. .enter = &intel_idle,
  741. .enter_s2idle = intel_idle_s2idle, },
  742. {
  743. .name = "C6",
  744. .desc = "MWAIT 0x52",
  745. .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
  746. .exit_latency = 140,
  747. .target_residency = 560,
  748. .enter = &intel_idle,
  749. .enter_s2idle = intel_idle_s2idle, },
  750. {
  751. .enter = NULL }
  752. };
  753. static struct cpuidle_state tangier_cstates[] __initdata = {
  754. {
  755. .name = "C1",
  756. .desc = "MWAIT 0x00",
  757. .flags = MWAIT2flg(0x00),
  758. .exit_latency = 1,
  759. .target_residency = 4,
  760. .enter = &intel_idle,
  761. .enter_s2idle = intel_idle_s2idle, },
  762. {
  763. .name = "C4",
  764. .desc = "MWAIT 0x30",
  765. .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
  766. .exit_latency = 100,
  767. .target_residency = 400,
  768. .enter = &intel_idle,
  769. .enter_s2idle = intel_idle_s2idle, },
  770. {
  771. .name = "C6",
  772. .desc = "MWAIT 0x52",
  773. .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
  774. .exit_latency = 140,
  775. .target_residency = 560,
  776. .enter = &intel_idle,
  777. .enter_s2idle = intel_idle_s2idle, },
  778. {
  779. .name = "C7",
  780. .desc = "MWAIT 0x60",
  781. .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
  782. .exit_latency = 1200,
  783. .target_residency = 4000,
  784. .enter = &intel_idle,
  785. .enter_s2idle = intel_idle_s2idle, },
  786. {
  787. .name = "C9",
  788. .desc = "MWAIT 0x64",
  789. .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
  790. .exit_latency = 10000,
  791. .target_residency = 20000,
  792. .enter = &intel_idle,
  793. .enter_s2idle = intel_idle_s2idle, },
  794. {
  795. .enter = NULL }
  796. };
  797. static struct cpuidle_state avn_cstates[] __initdata = {
  798. {
  799. .name = "C1",
  800. .desc = "MWAIT 0x00",
  801. .flags = MWAIT2flg(0x00),
  802. .exit_latency = 2,
  803. .target_residency = 2,
  804. .enter = &intel_idle,
  805. .enter_s2idle = intel_idle_s2idle, },
  806. {
  807. .name = "C6",
  808. .desc = "MWAIT 0x51",
  809. .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
  810. .exit_latency = 15,
  811. .target_residency = 45,
  812. .enter = &intel_idle,
  813. .enter_s2idle = intel_idle_s2idle, },
  814. {
  815. .enter = NULL }
  816. };
  817. static struct cpuidle_state knl_cstates[] __initdata = {
  818. {
  819. .name = "C1",
  820. .desc = "MWAIT 0x00",
  821. .flags = MWAIT2flg(0x00),
  822. .exit_latency = 1,
  823. .target_residency = 2,
  824. .enter = &intel_idle,
  825. .enter_s2idle = intel_idle_s2idle },
  826. {
  827. .name = "C6",
  828. .desc = "MWAIT 0x10",
  829. .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
  830. .exit_latency = 120,
  831. .target_residency = 500,
  832. .enter = &intel_idle,
  833. .enter_s2idle = intel_idle_s2idle },
  834. {
  835. .enter = NULL }
  836. };
  837. static struct cpuidle_state bxt_cstates[] __initdata = {
  838. {
  839. .name = "C1",
  840. .desc = "MWAIT 0x00",
  841. .flags = MWAIT2flg(0x00),
  842. .exit_latency = 2,
  843. .target_residency = 2,
  844. .enter = &intel_idle,
  845. .enter_s2idle = intel_idle_s2idle, },
  846. {
  847. .name = "C1E",
  848. .desc = "MWAIT 0x01",
  849. .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
  850. .exit_latency = 10,
  851. .target_residency = 20,
  852. .enter = &intel_idle,
  853. .enter_s2idle = intel_idle_s2idle, },
  854. {
  855. .name = "C6",
  856. .desc = "MWAIT 0x20",
  857. .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
  858. .exit_latency = 133,
  859. .target_residency = 133,
  860. .enter = &intel_idle,
  861. .enter_s2idle = intel_idle_s2idle, },
  862. {
  863. .name = "C7s",
  864. .desc = "MWAIT 0x31",
  865. .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
  866. .exit_latency = 155,
  867. .target_residency = 155,
  868. .enter = &intel_idle,
  869. .enter_s2idle = intel_idle_s2idle, },
  870. {
  871. .name = "C8",
  872. .desc = "MWAIT 0x40",
  873. .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
  874. .exit_latency = 1000,
  875. .target_residency = 1000,
  876. .enter = &intel_idle,
  877. .enter_s2idle = intel_idle_s2idle, },
  878. {
  879. .name = "C9",
  880. .desc = "MWAIT 0x50",
  881. .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
  882. .exit_latency = 2000,
  883. .target_residency = 2000,
  884. .enter = &intel_idle,
  885. .enter_s2idle = intel_idle_s2idle, },
  886. {
  887. .name = "C10",
  888. .desc = "MWAIT 0x60",
  889. .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
  890. .exit_latency = 10000,
  891. .target_residency = 10000,
  892. .enter = &intel_idle,
  893. .enter_s2idle = intel_idle_s2idle, },
  894. {
  895. .enter = NULL }
  896. };
  897. static struct cpuidle_state dnv_cstates[] __initdata = {
  898. {
  899. .name = "C1",
  900. .desc = "MWAIT 0x00",
  901. .flags = MWAIT2flg(0x00),
  902. .exit_latency = 2,
  903. .target_residency = 2,
  904. .enter = &intel_idle,
  905. .enter_s2idle = intel_idle_s2idle, },
  906. {
  907. .name = "C1E",
  908. .desc = "MWAIT 0x01",
  909. .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
  910. .exit_latency = 10,
  911. .target_residency = 20,
  912. .enter = &intel_idle,
  913. .enter_s2idle = intel_idle_s2idle, },
  914. {
  915. .name = "C6",
  916. .desc = "MWAIT 0x20",
  917. .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
  918. .exit_latency = 50,
  919. .target_residency = 500,
  920. .enter = &intel_idle,
  921. .enter_s2idle = intel_idle_s2idle, },
  922. {
  923. .enter = NULL }
  924. };
  925. static const struct idle_cpu idle_cpu_nehalem __initconst = {
  926. .state_table = nehalem_cstates,
  927. .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
  928. .disable_promotion_to_c1e = true,
  929. };
  930. static const struct idle_cpu idle_cpu_nhx __initconst = {
  931. .state_table = nehalem_cstates,
  932. .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
  933. .disable_promotion_to_c1e = true,
  934. .use_acpi = true,
  935. };
  936. static const struct idle_cpu idle_cpu_atom __initconst = {
  937. .state_table = atom_cstates,
  938. };
  939. static const struct idle_cpu idle_cpu_tangier __initconst = {
  940. .state_table = tangier_cstates,
  941. };
  942. static const struct idle_cpu idle_cpu_lincroft __initconst = {
  943. .state_table = atom_cstates,
  944. .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
  945. };
  946. static const struct idle_cpu idle_cpu_snb __initconst = {
  947. .state_table = snb_cstates,
  948. .disable_promotion_to_c1e = true,
  949. };
  950. static const struct idle_cpu idle_cpu_snx __initconst = {
  951. .state_table = snb_cstates,
  952. .disable_promotion_to_c1e = true,
  953. .use_acpi = true,
  954. };
  955. static const struct idle_cpu idle_cpu_byt __initconst = {
  956. .state_table = byt_cstates,
  957. .disable_promotion_to_c1e = true,
  958. .byt_auto_demotion_disable_flag = true,
  959. };
  960. static const struct idle_cpu idle_cpu_cht __initconst = {
  961. .state_table = cht_cstates,
  962. .disable_promotion_to_c1e = true,
  963. .byt_auto_demotion_disable_flag = true,
  964. };
  965. static const struct idle_cpu idle_cpu_ivb __initconst = {
  966. .state_table = ivb_cstates,
  967. .disable_promotion_to_c1e = true,
  968. };
  969. static const struct idle_cpu idle_cpu_ivt __initconst = {
  970. .state_table = ivt_cstates,
  971. .disable_promotion_to_c1e = true,
  972. .use_acpi = true,
  973. };
  974. static const struct idle_cpu idle_cpu_hsw __initconst = {
  975. .state_table = hsw_cstates,
  976. .disable_promotion_to_c1e = true,
  977. };
  978. static const struct idle_cpu idle_cpu_hsx __initconst = {
  979. .state_table = hsw_cstates,
  980. .disable_promotion_to_c1e = true,
  981. .use_acpi = true,
  982. };
  983. static const struct idle_cpu idle_cpu_bdw __initconst = {
  984. .state_table = bdw_cstates,
  985. .disable_promotion_to_c1e = true,
  986. };
  987. static const struct idle_cpu idle_cpu_bdx __initconst = {
  988. .state_table = bdw_cstates,
  989. .disable_promotion_to_c1e = true,
  990. .use_acpi = true,
  991. };
  992. static const struct idle_cpu idle_cpu_skl __initconst = {
  993. .state_table = skl_cstates,
  994. .disable_promotion_to_c1e = true,
  995. };
  996. static const struct idle_cpu idle_cpu_skx __initconst = {
  997. .state_table = skx_cstates,
  998. .disable_promotion_to_c1e = true,
  999. .use_acpi = true,
  1000. };
  1001. static const struct idle_cpu idle_cpu_avn __initconst = {
  1002. .state_table = avn_cstates,
  1003. .disable_promotion_to_c1e = true,
  1004. .use_acpi = true,
  1005. };
  1006. static const struct idle_cpu idle_cpu_knl __initconst = {
  1007. .state_table = knl_cstates,
  1008. .use_acpi = true,
  1009. };
  1010. static const struct idle_cpu idle_cpu_bxt __initconst = {
  1011. .state_table = bxt_cstates,
  1012. .disable_promotion_to_c1e = true,
  1013. };
  1014. static const struct idle_cpu idle_cpu_dnv __initconst = {
  1015. .state_table = dnv_cstates,
  1016. .disable_promotion_to_c1e = true,
  1017. .use_acpi = true,
  1018. };
  1019. static const struct x86_cpu_id intel_idle_ids[] __initconst = {
  1020. X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx),
  1021. X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem),
  1022. X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem),
  1023. X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem),
  1024. X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx),
  1025. X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx),
  1026. X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom),
  1027. X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft),
  1028. X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx),
  1029. X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb),
  1030. X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx),
  1031. X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom),
  1032. X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt),
  1033. X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier),
  1034. X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht),
  1035. X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb),
  1036. X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt),
  1037. X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw),
  1038. X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx),
  1039. X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw),
  1040. X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw),
  1041. X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn),
  1042. X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw),
  1043. X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw),
  1044. X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx),
  1045. X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx),
  1046. X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl),
  1047. X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl),
  1048. X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl),
  1049. X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl),
  1050. X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx),
  1051. X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
  1052. X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
  1053. X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt),
  1054. X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt),
  1055. X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv),
  1056. X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv),
  1057. {}
  1058. };
  1059. static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
  1060. X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
  1061. {}
  1062. };
  1063. static bool __init intel_idle_max_cstate_reached(int cstate)
  1064. {
  1065. if (cstate + 1 > max_cstate) {
  1066. pr_info("max_cstate %d reached\n", max_cstate);
  1067. return true;
  1068. }
  1069. return false;
  1070. }
  1071. #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
  1072. #include <acpi/processor.h>
  1073. static bool no_acpi __read_mostly;
  1074. module_param(no_acpi, bool, 0444);
  1075. MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
  1076. static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
  1077. module_param_named(use_acpi, force_use_acpi, bool, 0444);
  1078. MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
  1079. static struct acpi_processor_power acpi_state_table __initdata;
  1080. /**
  1081. * intel_idle_cst_usable - Check if the _CST information can be used.
  1082. *
  1083. * Check if all of the C-states listed by _CST in the max_cstate range are
  1084. * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
  1085. */
  1086. static bool __init intel_idle_cst_usable(void)
  1087. {
  1088. int cstate, limit;
  1089. limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
  1090. acpi_state_table.count);
  1091. for (cstate = 1; cstate < limit; cstate++) {
  1092. struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
  1093. if (cx->entry_method != ACPI_CSTATE_FFH)
  1094. return false;
  1095. }
  1096. return true;
  1097. }
  1098. static bool __init intel_idle_acpi_cst_extract(void)
  1099. {
  1100. unsigned int cpu;
  1101. if (no_acpi) {
  1102. pr_debug("Not allowed to use ACPI _CST\n");
  1103. return false;
  1104. }
  1105. for_each_possible_cpu(cpu) {
  1106. struct acpi_processor *pr = per_cpu(processors, cpu);
  1107. if (!pr)
  1108. continue;
  1109. if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
  1110. continue;
  1111. acpi_state_table.count++;
  1112. if (!intel_idle_cst_usable())
  1113. continue;
  1114. if (!acpi_processor_claim_cst_control()) {
  1115. acpi_state_table.count = 0;
  1116. return false;
  1117. }
  1118. return true;
  1119. }
  1120. pr_debug("ACPI _CST not found or not usable\n");
  1121. return false;
  1122. }
  1123. static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
  1124. {
  1125. int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
  1126. /*
  1127. * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
  1128. * the interesting states are ACPI_CSTATE_FFH.
  1129. */
  1130. for (cstate = 1; cstate < limit; cstate++) {
  1131. struct acpi_processor_cx *cx;
  1132. struct cpuidle_state *state;
  1133. if (intel_idle_max_cstate_reached(cstate))
  1134. break;
  1135. cx = &acpi_state_table.states[cstate];
  1136. state = &drv->states[drv->state_count++];
  1137. snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
  1138. strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
  1139. state->exit_latency = cx->latency;
  1140. /*
  1141. * For C1-type C-states use the same number for both the exit
  1142. * latency and target residency, because that is the case for
  1143. * C1 in the majority of the static C-states tables above.
  1144. * For the other types of C-states, however, set the target
  1145. * residency to 3 times the exit latency which should lead to
  1146. * a reasonable balance between energy-efficiency and
  1147. * performance in the majority of interesting cases.
  1148. */
  1149. state->target_residency = cx->latency;
  1150. if (cx->type > ACPI_STATE_C1)
  1151. state->target_residency *= 3;
  1152. state->flags = MWAIT2flg(cx->address);
  1153. if (cx->type > ACPI_STATE_C2)
  1154. state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
  1155. if (disabled_states_mask & BIT(cstate))
  1156. state->flags |= CPUIDLE_FLAG_OFF;
  1157. state->enter = intel_idle;
  1158. state->enter_s2idle = intel_idle_s2idle;
  1159. }
  1160. }
  1161. static bool __init intel_idle_off_by_default(u32 mwait_hint)
  1162. {
  1163. int cstate, limit;
  1164. /*
  1165. * If there are no _CST C-states, do not disable any C-states by
  1166. * default.
  1167. */
  1168. if (!acpi_state_table.count)
  1169. return false;
  1170. limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
  1171. /*
  1172. * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
  1173. * the interesting states are ACPI_CSTATE_FFH.
  1174. */
  1175. for (cstate = 1; cstate < limit; cstate++) {
  1176. if (acpi_state_table.states[cstate].address == mwait_hint)
  1177. return false;
  1178. }
  1179. return true;
  1180. }
  1181. #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
  1182. #define force_use_acpi (false)
  1183. static inline bool intel_idle_acpi_cst_extract(void) { return false; }
  1184. static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
  1185. static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
  1186. #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
  1187. /**
  1188. * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
  1189. *
  1190. * Tune IVT multi-socket targets.
  1191. * Assumption: num_sockets == (max_package_num + 1).
  1192. */
  1193. static void __init ivt_idle_state_table_update(void)
  1194. {
  1195. /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
  1196. int cpu, package_num, num_sockets = 1;
  1197. for_each_online_cpu(cpu) {
  1198. package_num = topology_physical_package_id(cpu);
  1199. if (package_num + 1 > num_sockets) {
  1200. num_sockets = package_num + 1;
  1201. if (num_sockets > 4) {
  1202. cpuidle_state_table = ivt_cstates_8s;
  1203. return;
  1204. }
  1205. }
  1206. }
  1207. if (num_sockets > 2)
  1208. cpuidle_state_table = ivt_cstates_4s;
  1209. /* else, 1 and 2 socket systems use default ivt_cstates */
  1210. }
  1211. /**
  1212. * irtl_2_usec - IRTL to microseconds conversion.
  1213. * @irtl: IRTL MSR value.
  1214. *
  1215. * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
  1216. */
  1217. static unsigned long long __init irtl_2_usec(unsigned long long irtl)
  1218. {
  1219. static const unsigned int irtl_ns_units[] __initconst = {
  1220. 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
  1221. };
  1222. unsigned long long ns;
  1223. if (!irtl)
  1224. return 0;
  1225. ns = irtl_ns_units[(irtl >> 10) & 0x7];
  1226. return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
  1227. }
  1228. /**
  1229. * bxt_idle_state_table_update - Fix up the Broxton idle states table.
  1230. *
  1231. * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
  1232. * definitive maximum latency and use the same value for target_residency.
  1233. */
  1234. static void __init bxt_idle_state_table_update(void)
  1235. {
  1236. unsigned long long msr;
  1237. unsigned int usec;
  1238. rdmsrl(MSR_PKGC6_IRTL, msr);
  1239. usec = irtl_2_usec(msr);
  1240. if (usec) {
  1241. bxt_cstates[2].exit_latency = usec;
  1242. bxt_cstates[2].target_residency = usec;
  1243. }
  1244. rdmsrl(MSR_PKGC7_IRTL, msr);
  1245. usec = irtl_2_usec(msr);
  1246. if (usec) {
  1247. bxt_cstates[3].exit_latency = usec;
  1248. bxt_cstates[3].target_residency = usec;
  1249. }
  1250. rdmsrl(MSR_PKGC8_IRTL, msr);
  1251. usec = irtl_2_usec(msr);
  1252. if (usec) {
  1253. bxt_cstates[4].exit_latency = usec;
  1254. bxt_cstates[4].target_residency = usec;
  1255. }
  1256. rdmsrl(MSR_PKGC9_IRTL, msr);
  1257. usec = irtl_2_usec(msr);
  1258. if (usec) {
  1259. bxt_cstates[5].exit_latency = usec;
  1260. bxt_cstates[5].target_residency = usec;
  1261. }
  1262. rdmsrl(MSR_PKGC10_IRTL, msr);
  1263. usec = irtl_2_usec(msr);
  1264. if (usec) {
  1265. bxt_cstates[6].exit_latency = usec;
  1266. bxt_cstates[6].target_residency = usec;
  1267. }
  1268. }
  1269. /**
  1270. * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
  1271. *
  1272. * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
  1273. */
  1274. static void __init sklh_idle_state_table_update(void)
  1275. {
  1276. unsigned long long msr;
  1277. unsigned int eax, ebx, ecx, edx;
  1278. /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
  1279. if (max_cstate <= 7)
  1280. return;
  1281. /* if PC10 not present in CPUID.MWAIT.EDX */
  1282. if ((mwait_substates & (0xF << 28)) == 0)
  1283. return;
  1284. rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
  1285. /* PC10 is not enabled in PKG C-state limit */
  1286. if ((msr & 0xF) != 8)
  1287. return;
  1288. ecx = 0;
  1289. cpuid(7, &eax, &ebx, &ecx, &edx);
  1290. /* if SGX is present */
  1291. if (ebx & (1 << 2)) {
  1292. rdmsrl(MSR_IA32_FEAT_CTL, msr);
  1293. /* if SGX is enabled */
  1294. if (msr & (1 << 18))
  1295. return;
  1296. }
  1297. skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */
  1298. skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */
  1299. }
  1300. static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
  1301. {
  1302. unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
  1303. unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
  1304. MWAIT_SUBSTATE_MASK;
  1305. /* Ignore the C-state if there are NO sub-states in CPUID for it. */
  1306. if (num_substates == 0)
  1307. return false;
  1308. if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
  1309. mark_tsc_unstable("TSC halts in idle states deeper than C2");
  1310. return true;
  1311. }
  1312. static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
  1313. {
  1314. int cstate;
  1315. switch (boot_cpu_data.x86_model) {
  1316. case INTEL_FAM6_IVYBRIDGE_X:
  1317. ivt_idle_state_table_update();
  1318. break;
  1319. case INTEL_FAM6_ATOM_GOLDMONT:
  1320. case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
  1321. bxt_idle_state_table_update();
  1322. break;
  1323. case INTEL_FAM6_SKYLAKE:
  1324. sklh_idle_state_table_update();
  1325. break;
  1326. }
  1327. for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
  1328. unsigned int mwait_hint;
  1329. if (intel_idle_max_cstate_reached(cstate))
  1330. break;
  1331. if (!cpuidle_state_table[cstate].enter &&
  1332. !cpuidle_state_table[cstate].enter_s2idle)
  1333. break;
  1334. /* If marked as unusable, skip this state. */
  1335. if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
  1336. pr_debug("state %s is disabled\n",
  1337. cpuidle_state_table[cstate].name);
  1338. continue;
  1339. }
  1340. mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
  1341. if (!intel_idle_verify_cstate(mwait_hint))
  1342. continue;
  1343. /* Structure copy. */
  1344. drv->states[drv->state_count] = cpuidle_state_table[cstate];
  1345. if ((disabled_states_mask & BIT(drv->state_count)) ||
  1346. ((icpu->use_acpi || force_use_acpi) &&
  1347. intel_idle_off_by_default(mwait_hint) &&
  1348. !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
  1349. drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
  1350. drv->state_count++;
  1351. }
  1352. if (icpu->byt_auto_demotion_disable_flag) {
  1353. wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
  1354. wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
  1355. }
  1356. }
  1357. /**
  1358. * intel_idle_cpuidle_driver_init - Create the list of available idle states.
  1359. * @drv: cpuidle driver structure to initialize.
  1360. */
  1361. static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
  1362. {
  1363. cpuidle_poll_state_init(drv);
  1364. if (disabled_states_mask & BIT(0))
  1365. drv->states[0].flags |= CPUIDLE_FLAG_OFF;
  1366. drv->state_count = 1;
  1367. if (icpu)
  1368. intel_idle_init_cstates_icpu(drv);
  1369. else
  1370. intel_idle_init_cstates_acpi(drv);
  1371. }
  1372. static void auto_demotion_disable(void)
  1373. {
  1374. unsigned long long msr_bits;
  1375. rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
  1376. msr_bits &= ~auto_demotion_disable_flags;
  1377. wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
  1378. }
  1379. static void c1e_promotion_disable(void)
  1380. {
  1381. unsigned long long msr_bits;
  1382. rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
  1383. msr_bits &= ~0x2;
  1384. wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
  1385. }
  1386. /**
  1387. * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
  1388. * @cpu: CPU to initialize.
  1389. *
  1390. * Register a cpuidle device object for @cpu and update its MSRs in accordance
  1391. * with the processor model flags.
  1392. */
  1393. static int intel_idle_cpu_init(unsigned int cpu)
  1394. {
  1395. struct cpuidle_device *dev;
  1396. dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
  1397. dev->cpu = cpu;
  1398. if (cpuidle_register_device(dev)) {
  1399. pr_debug("cpuidle_register_device %d failed!\n", cpu);
  1400. return -EIO;
  1401. }
  1402. if (auto_demotion_disable_flags)
  1403. auto_demotion_disable();
  1404. if (disable_promotion_to_c1e)
  1405. c1e_promotion_disable();
  1406. return 0;
  1407. }
  1408. static int intel_idle_cpu_online(unsigned int cpu)
  1409. {
  1410. struct cpuidle_device *dev;
  1411. if (!lapic_timer_always_reliable)
  1412. tick_broadcast_enable();
  1413. /*
  1414. * Some systems can hotplug a cpu at runtime after
  1415. * the kernel has booted, we have to initialize the
  1416. * driver in this case
  1417. */
  1418. dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
  1419. if (!dev->registered)
  1420. return intel_idle_cpu_init(cpu);
  1421. return 0;
  1422. }
  1423. /**
  1424. * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
  1425. */
  1426. static void __init intel_idle_cpuidle_devices_uninit(void)
  1427. {
  1428. int i;
  1429. for_each_online_cpu(i)
  1430. cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
  1431. }
  1432. static int __init intel_idle_init(void)
  1433. {
  1434. const struct x86_cpu_id *id;
  1435. unsigned int eax, ebx, ecx;
  1436. int retval;
  1437. /* Do not load intel_idle at all for now if idle= is passed */
  1438. if (boot_option_idle_override != IDLE_NO_OVERRIDE)
  1439. return -ENODEV;
  1440. if (max_cstate == 0) {
  1441. pr_debug("disabled\n");
  1442. return -EPERM;
  1443. }
  1444. id = x86_match_cpu(intel_idle_ids);
  1445. if (id) {
  1446. if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
  1447. pr_debug("Please enable MWAIT in BIOS SETUP\n");
  1448. return -ENODEV;
  1449. }
  1450. } else {
  1451. id = x86_match_cpu(intel_mwait_ids);
  1452. if (!id)
  1453. return -ENODEV;
  1454. }
  1455. if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
  1456. return -ENODEV;
  1457. cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
  1458. if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
  1459. !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
  1460. !mwait_substates)
  1461. return -ENODEV;
  1462. pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
  1463. icpu = (const struct idle_cpu *)id->driver_data;
  1464. if (icpu) {
  1465. cpuidle_state_table = icpu->state_table;
  1466. auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
  1467. disable_promotion_to_c1e = icpu->disable_promotion_to_c1e;
  1468. if (icpu->use_acpi || force_use_acpi)
  1469. intel_idle_acpi_cst_extract();
  1470. } else if (!intel_idle_acpi_cst_extract()) {
  1471. return -ENODEV;
  1472. }
  1473. pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
  1474. boot_cpu_data.x86_model);
  1475. intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
  1476. if (!intel_idle_cpuidle_devices)
  1477. return -ENOMEM;
  1478. intel_idle_cpuidle_driver_init(&intel_idle_driver);
  1479. retval = cpuidle_register_driver(&intel_idle_driver);
  1480. if (retval) {
  1481. struct cpuidle_driver *drv = cpuidle_get_driver();
  1482. printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
  1483. drv ? drv->name : "none");
  1484. goto init_driver_fail;
  1485. }
  1486. if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */
  1487. lapic_timer_always_reliable = true;
  1488. retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
  1489. intel_idle_cpu_online, NULL);
  1490. if (retval < 0)
  1491. goto hp_setup_fail;
  1492. pr_debug("Local APIC timer is reliable in %s\n",
  1493. lapic_timer_always_reliable ? "all C-states" : "C1");
  1494. return 0;
  1495. hp_setup_fail:
  1496. intel_idle_cpuidle_devices_uninit();
  1497. cpuidle_unregister_driver(&intel_idle_driver);
  1498. init_driver_fail:
  1499. free_percpu(intel_idle_cpuidle_devices);
  1500. return retval;
  1501. }
  1502. device_initcall(intel_idle_init);
  1503. /*
  1504. * We are not really modular, but we used to support that. Meaning we also
  1505. * support "intel_idle.max_cstate=..." at boot and also a read-only export of
  1506. * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
  1507. * is the easiest way (currently) to continue doing that.
  1508. */
  1509. module_param(max_cstate, int, 0444);
  1510. /*
  1511. * The positions of the bits that are set in this number are the indices of the
  1512. * idle states to be disabled by default (as reflected by the names of the
  1513. * corresponding idle state directories in sysfs, "state0", "state1" ...
  1514. * "state<i>" ..., where <i> is the index of the given state).
  1515. */
  1516. module_param_named(states_off, disabled_states_mask, uint, 0444);
  1517. MODULE_PARM_DESC(states_off, "Mask of disabled idle states");