/arch/i386/kernel/nmi.c

https://bitbucket.org/evzijst/gittest · C · 570 lines · 395 code · 79 blank · 96 comment · 77 complexity · 273347a8f8b58eb973dcf1653dafb745 MD5 · raw file

  1. /*
  2. * linux/arch/i386/nmi.c
  3. *
  4. * NMI watchdog support on APIC systems
  5. *
  6. * Started by Ingo Molnar <mingo@redhat.com>
  7. *
  8. * Fixes:
  9. * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
  10. * Mikael Pettersson : Power Management for local APIC NMI watchdog.
  11. * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
  12. * Pavel Machek and
  13. * Mikael Pettersson : PM converted to driver model. Disable/enable API.
  14. */
  15. #include <linux/config.h>
  16. #include <linux/mm.h>
  17. #include <linux/irq.h>
  18. #include <linux/delay.h>
  19. #include <linux/bootmem.h>
  20. #include <linux/smp_lock.h>
  21. #include <linux/interrupt.h>
  22. #include <linux/mc146818rtc.h>
  23. #include <linux/kernel_stat.h>
  24. #include <linux/module.h>
  25. #include <linux/nmi.h>
  26. #include <linux/sysdev.h>
  27. #include <linux/sysctl.h>
  28. #include <asm/smp.h>
  29. #include <asm/mtrr.h>
  30. #include <asm/mpspec.h>
  31. #include <asm/nmi.h>
  32. #include "mach_traps.h"
  33. unsigned int nmi_watchdog = NMI_NONE;
  34. extern int unknown_nmi_panic;
  35. static unsigned int nmi_hz = HZ;
  36. static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
  37. static unsigned int nmi_p4_cccr_val;
  38. extern void show_registers(struct pt_regs *regs);
  39. /*
  40. * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
  41. * - it may be reserved by some other driver, or not
  42. * - when not reserved by some other driver, it may be used for
  43. * the NMI watchdog, or not
  44. *
  45. * This is maintained separately from nmi_active because the NMI
  46. * watchdog may also be driven from the I/O APIC timer.
  47. */
  48. static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
  49. static unsigned int lapic_nmi_owner;
  50. #define LAPIC_NMI_WATCHDOG (1<<0)
  51. #define LAPIC_NMI_RESERVED (1<<1)
  52. /* nmi_active:
  53. * +1: the lapic NMI watchdog is active, but can be disabled
  54. * 0: the lapic NMI watchdog has not been set up, and cannot
  55. * be enabled
  56. * -1: the lapic NMI watchdog is disabled, but can be enabled
  57. */
  58. int nmi_active;
  59. #define K7_EVNTSEL_ENABLE (1 << 22)
  60. #define K7_EVNTSEL_INT (1 << 20)
  61. #define K7_EVNTSEL_OS (1 << 17)
  62. #define K7_EVNTSEL_USR (1 << 16)
  63. #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
  64. #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
  65. #define P6_EVNTSEL0_ENABLE (1 << 22)
  66. #define P6_EVNTSEL_INT (1 << 20)
  67. #define P6_EVNTSEL_OS (1 << 17)
  68. #define P6_EVNTSEL_USR (1 << 16)
  69. #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
  70. #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
  71. #define MSR_P4_MISC_ENABLE 0x1A0
  72. #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
  73. #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
  74. #define MSR_P4_PERFCTR0 0x300
  75. #define MSR_P4_CCCR0 0x360
  76. #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
  77. #define P4_ESCR_OS (1<<3)
  78. #define P4_ESCR_USR (1<<2)
  79. #define P4_CCCR_OVF_PMI0 (1<<26)
  80. #define P4_CCCR_OVF_PMI1 (1<<27)
  81. #define P4_CCCR_THRESHOLD(N) ((N)<<20)
  82. #define P4_CCCR_COMPLEMENT (1<<19)
  83. #define P4_CCCR_COMPARE (1<<18)
  84. #define P4_CCCR_REQUIRED (3<<16)
  85. #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
  86. #define P4_CCCR_ENABLE (1<<12)
  87. /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  88. CRU_ESCR0 (with any non-null event selector) through a complemented
  89. max threshold. [IA32-Vol3, Section 14.9.9] */
  90. #define MSR_P4_IQ_COUNTER0 0x30C
  91. #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
  92. #define P4_NMI_IQ_CCCR0 \
  93. (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
  94. P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
  95. int __init check_nmi_watchdog (void)
  96. {
  97. unsigned int prev_nmi_count[NR_CPUS];
  98. int cpu;
  99. printk(KERN_INFO "testing NMI watchdog ... ");
  100. for (cpu = 0; cpu < NR_CPUS; cpu++)
  101. prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
  102. local_irq_enable();
  103. mdelay((10*1000)/nmi_hz); // wait 10 ticks
  104. /* FIXME: Only boot CPU is online at this stage. Check CPUs
  105. as they come up. */
  106. for (cpu = 0; cpu < NR_CPUS; cpu++) {
  107. #ifdef CONFIG_SMP
  108. /* Check cpu_callin_map here because that is set
  109. after the timer is started. */
  110. if (!cpu_isset(cpu, cpu_callin_map))
  111. continue;
  112. #endif
  113. if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
  114. printk("CPU#%d: NMI appears to be stuck!\n", cpu);
  115. nmi_active = 0;
  116. lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
  117. return -1;
  118. }
  119. }
  120. printk("OK.\n");
  121. /* now that we know it works we can reduce NMI frequency to
  122. something more reasonable; makes a difference in some configs */
  123. if (nmi_watchdog == NMI_LOCAL_APIC)
  124. nmi_hz = 1;
  125. return 0;
  126. }
  127. static int __init setup_nmi_watchdog(char *str)
  128. {
  129. int nmi;
  130. get_option(&str, &nmi);
  131. if (nmi >= NMI_INVALID)
  132. return 0;
  133. if (nmi == NMI_NONE)
  134. nmi_watchdog = nmi;
  135. /*
  136. * If any other x86 CPU has a local APIC, then
  137. * please test the NMI stuff there and send me the
  138. * missing bits. Right now Intel P6/P4 and AMD K7 only.
  139. */
  140. if ((nmi == NMI_LOCAL_APIC) &&
  141. (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
  142. (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
  143. nmi_watchdog = nmi;
  144. if ((nmi == NMI_LOCAL_APIC) &&
  145. (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
  146. (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
  147. nmi_watchdog = nmi;
  148. /*
  149. * We can enable the IO-APIC watchdog
  150. * unconditionally.
  151. */
  152. if (nmi == NMI_IO_APIC) {
  153. nmi_active = 1;
  154. nmi_watchdog = nmi;
  155. }
  156. return 1;
  157. }
  158. __setup("nmi_watchdog=", setup_nmi_watchdog);
  159. static void disable_lapic_nmi_watchdog(void)
  160. {
  161. if (nmi_active <= 0)
  162. return;
  163. switch (boot_cpu_data.x86_vendor) {
  164. case X86_VENDOR_AMD:
  165. wrmsr(MSR_K7_EVNTSEL0, 0, 0);
  166. break;
  167. case X86_VENDOR_INTEL:
  168. switch (boot_cpu_data.x86) {
  169. case 6:
  170. if (boot_cpu_data.x86_model > 0xd)
  171. break;
  172. wrmsr(MSR_P6_EVNTSEL0, 0, 0);
  173. break;
  174. case 15:
  175. if (boot_cpu_data.x86_model > 0x3)
  176. break;
  177. wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
  178. wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
  179. break;
  180. }
  181. break;
  182. }
  183. nmi_active = -1;
  184. /* tell do_nmi() and others that we're not active any more */
  185. nmi_watchdog = 0;
  186. }
  187. static void enable_lapic_nmi_watchdog(void)
  188. {
  189. if (nmi_active < 0) {
  190. nmi_watchdog = NMI_LOCAL_APIC;
  191. setup_apic_nmi_watchdog();
  192. }
  193. }
  194. int reserve_lapic_nmi(void)
  195. {
  196. unsigned int old_owner;
  197. spin_lock(&lapic_nmi_owner_lock);
  198. old_owner = lapic_nmi_owner;
  199. lapic_nmi_owner |= LAPIC_NMI_RESERVED;
  200. spin_unlock(&lapic_nmi_owner_lock);
  201. if (old_owner & LAPIC_NMI_RESERVED)
  202. return -EBUSY;
  203. if (old_owner & LAPIC_NMI_WATCHDOG)
  204. disable_lapic_nmi_watchdog();
  205. return 0;
  206. }
  207. void release_lapic_nmi(void)
  208. {
  209. unsigned int new_owner;
  210. spin_lock(&lapic_nmi_owner_lock);
  211. new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
  212. lapic_nmi_owner = new_owner;
  213. spin_unlock(&lapic_nmi_owner_lock);
  214. if (new_owner & LAPIC_NMI_WATCHDOG)
  215. enable_lapic_nmi_watchdog();
  216. }
  217. void disable_timer_nmi_watchdog(void)
  218. {
  219. if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0))
  220. return;
  221. unset_nmi_callback();
  222. nmi_active = -1;
  223. nmi_watchdog = NMI_NONE;
  224. }
  225. void enable_timer_nmi_watchdog(void)
  226. {
  227. if (nmi_active < 0) {
  228. nmi_watchdog = NMI_IO_APIC;
  229. touch_nmi_watchdog();
  230. nmi_active = 1;
  231. }
  232. }
  233. #ifdef CONFIG_PM
  234. static int nmi_pm_active; /* nmi_active before suspend */
  235. static int lapic_nmi_suspend(struct sys_device *dev, u32 state)
  236. {
  237. nmi_pm_active = nmi_active;
  238. disable_lapic_nmi_watchdog();
  239. return 0;
  240. }
  241. static int lapic_nmi_resume(struct sys_device *dev)
  242. {
  243. if (nmi_pm_active > 0)
  244. enable_lapic_nmi_watchdog();
  245. return 0;
  246. }
  247. static struct sysdev_class nmi_sysclass = {
  248. set_kset_name("lapic_nmi"),
  249. .resume = lapic_nmi_resume,
  250. .suspend = lapic_nmi_suspend,
  251. };
  252. static struct sys_device device_lapic_nmi = {
  253. .id = 0,
  254. .cls = &nmi_sysclass,
  255. };
  256. static int __init init_lapic_nmi_sysfs(void)
  257. {
  258. int error;
  259. if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC)
  260. return 0;
  261. error = sysdev_class_register(&nmi_sysclass);
  262. if (!error)
  263. error = sysdev_register(&device_lapic_nmi);
  264. return error;
  265. }
  266. /* must come after the local APIC's device_initcall() */
  267. late_initcall(init_lapic_nmi_sysfs);
  268. #endif /* CONFIG_PM */
  269. /*
  270. * Activate the NMI watchdog via the local APIC.
  271. * Original code written by Keith Owens.
  272. */
  273. static void clear_msr_range(unsigned int base, unsigned int n)
  274. {
  275. unsigned int i;
  276. for(i = 0; i < n; ++i)
  277. wrmsr(base+i, 0, 0);
  278. }
  279. static void setup_k7_watchdog(void)
  280. {
  281. unsigned int evntsel;
  282. nmi_perfctr_msr = MSR_K7_PERFCTR0;
  283. clear_msr_range(MSR_K7_EVNTSEL0, 4);
  284. clear_msr_range(MSR_K7_PERFCTR0, 4);
  285. evntsel = K7_EVNTSEL_INT
  286. | K7_EVNTSEL_OS
  287. | K7_EVNTSEL_USR
  288. | K7_NMI_EVENT;
  289. wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
  290. Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
  291. wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
  292. apic_write(APIC_LVTPC, APIC_DM_NMI);
  293. evntsel |= K7_EVNTSEL_ENABLE;
  294. wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
  295. }
  296. static void setup_p6_watchdog(void)
  297. {
  298. unsigned int evntsel;
  299. nmi_perfctr_msr = MSR_P6_PERFCTR0;
  300. clear_msr_range(MSR_P6_EVNTSEL0, 2);
  301. clear_msr_range(MSR_P6_PERFCTR0, 2);
  302. evntsel = P6_EVNTSEL_INT
  303. | P6_EVNTSEL_OS
  304. | P6_EVNTSEL_USR
  305. | P6_NMI_EVENT;
  306. wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
  307. Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
  308. wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
  309. apic_write(APIC_LVTPC, APIC_DM_NMI);
  310. evntsel |= P6_EVNTSEL0_ENABLE;
  311. wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
  312. }
  313. static int setup_p4_watchdog(void)
  314. {
  315. unsigned int misc_enable, dummy;
  316. rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
  317. if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
  318. return 0;
  319. nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
  320. nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
  321. #ifdef CONFIG_SMP
  322. if (smp_num_siblings == 2)
  323. nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
  324. #endif
  325. if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
  326. clear_msr_range(0x3F1, 2);
  327. /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
  328. docs doesn't fully define it, so leave it alone for now. */
  329. if (boot_cpu_data.x86_model >= 0x3) {
  330. /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
  331. clear_msr_range(0x3A0, 26);
  332. clear_msr_range(0x3BC, 3);
  333. } else {
  334. clear_msr_range(0x3A0, 31);
  335. }
  336. clear_msr_range(0x3C0, 6);
  337. clear_msr_range(0x3C8, 6);
  338. clear_msr_range(0x3E0, 2);
  339. clear_msr_range(MSR_P4_CCCR0, 18);
  340. clear_msr_range(MSR_P4_PERFCTR0, 18);
  341. wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
  342. wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
  343. Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
  344. wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
  345. apic_write(APIC_LVTPC, APIC_DM_NMI);
  346. wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
  347. return 1;
  348. }
  349. void setup_apic_nmi_watchdog (void)
  350. {
  351. switch (boot_cpu_data.x86_vendor) {
  352. case X86_VENDOR_AMD:
  353. if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
  354. return;
  355. setup_k7_watchdog();
  356. break;
  357. case X86_VENDOR_INTEL:
  358. switch (boot_cpu_data.x86) {
  359. case 6:
  360. if (boot_cpu_data.x86_model > 0xd)
  361. return;
  362. setup_p6_watchdog();
  363. break;
  364. case 15:
  365. if (boot_cpu_data.x86_model > 0x3)
  366. return;
  367. if (!setup_p4_watchdog())
  368. return;
  369. break;
  370. default:
  371. return;
  372. }
  373. break;
  374. default:
  375. return;
  376. }
  377. lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
  378. nmi_active = 1;
  379. }
  380. /*
  381. * the best way to detect whether a CPU has a 'hard lockup' problem
  382. * is to check it's local APIC timer IRQ counts. If they are not
  383. * changing then that CPU has some problem.
  384. *
  385. * as these watchdog NMI IRQs are generated on every CPU, we only
  386. * have to check the current processor.
  387. *
  388. * since NMIs don't listen to _any_ locks, we have to be extremely
  389. * careful not to rely on unsafe variables. The printk might lock
  390. * up though, so we have to break up any console locks first ...
  391. * [when there will be more tty-related locks, break them up
  392. * here too!]
  393. */
  394. static unsigned int
  395. last_irq_sums [NR_CPUS],
  396. alert_counter [NR_CPUS];
  397. void touch_nmi_watchdog (void)
  398. {
  399. int i;
  400. /*
  401. * Just reset the alert counters, (other CPUs might be
  402. * spinning on locks we hold):
  403. */
  404. for (i = 0; i < NR_CPUS; i++)
  405. alert_counter[i] = 0;
  406. }
  407. extern void die_nmi(struct pt_regs *, const char *msg);
  408. void nmi_watchdog_tick (struct pt_regs * regs)
  409. {
  410. /*
  411. * Since current_thread_info()-> is always on the stack, and we
  412. * always switch the stack NMI-atomically, it's safe to use
  413. * smp_processor_id().
  414. */
  415. int sum, cpu = smp_processor_id();
  416. sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
  417. if (last_irq_sums[cpu] == sum) {
  418. /*
  419. * Ayiee, looks like this CPU is stuck ...
  420. * wait a few IRQs (5 seconds) before doing the oops ...
  421. */
  422. alert_counter[cpu]++;
  423. if (alert_counter[cpu] == 5*nmi_hz)
  424. die_nmi(regs, "NMI Watchdog detected LOCKUP");
  425. } else {
  426. last_irq_sums[cpu] = sum;
  427. alert_counter[cpu] = 0;
  428. }
  429. if (nmi_perfctr_msr) {
  430. if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
  431. /*
  432. * P4 quirks:
  433. * - An overflown perfctr will assert its interrupt
  434. * until the OVF flag in its CCCR is cleared.
  435. * - LVTPC is masked on interrupt and must be
  436. * unmasked by the LVTPC handler.
  437. */
  438. wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
  439. apic_write(APIC_LVTPC, APIC_DM_NMI);
  440. }
  441. else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
  442. /* Only P6 based Pentium M need to re-unmask
  443. * the apic vector but it doesn't hurt
  444. * other P6 variant */
  445. apic_write(APIC_LVTPC, APIC_DM_NMI);
  446. }
  447. wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
  448. }
  449. }
  450. #ifdef CONFIG_SYSCTL
  451. static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
  452. {
  453. unsigned char reason = get_nmi_reason();
  454. char buf[64];
  455. if (!(reason & 0xc0)) {
  456. sprintf(buf, "NMI received for unknown reason %02x\n", reason);
  457. die_nmi(regs, buf);
  458. }
  459. return 0;
  460. }
  461. /*
  462. * proc handler for /proc/sys/kernel/unknown_nmi_panic
  463. */
  464. int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
  465. void __user *buffer, size_t *length, loff_t *ppos)
  466. {
  467. int old_state;
  468. old_state = unknown_nmi_panic;
  469. proc_dointvec(table, write, file, buffer, length, ppos);
  470. if (!!old_state == !!unknown_nmi_panic)
  471. return 0;
  472. if (unknown_nmi_panic) {
  473. if (reserve_lapic_nmi() < 0) {
  474. unknown_nmi_panic = 0;
  475. return -EBUSY;
  476. } else {
  477. set_nmi_callback(unknown_nmi_panic_callback);
  478. }
  479. } else {
  480. release_lapic_nmi();
  481. unset_nmi_callback();
  482. }
  483. return 0;
  484. }
  485. #endif
  486. EXPORT_SYMBOL(nmi_active);
  487. EXPORT_SYMBOL(nmi_watchdog);
  488. EXPORT_SYMBOL(reserve_lapic_nmi);
  489. EXPORT_SYMBOL(release_lapic_nmi);
  490. EXPORT_SYMBOL(disable_timer_nmi_watchdog);
  491. EXPORT_SYMBOL(enable_timer_nmi_watchdog);