PageRenderTime 38ms CodeModel.GetById 13ms app.highlight 20ms RepoModel.GetById 2ms app.codeStats 0ms

/arch/i386/kernel/nmi.c

https://bitbucket.org/evzijst/gittest
C | 570 lines | 395 code | 79 blank | 96 comment | 77 complexity | 273347a8f8b58eb973dcf1653dafb745 MD5 | raw file
  1/*
  2 *  linux/arch/i386/nmi.c
  3 *
  4 *  NMI watchdog support on APIC systems
  5 *
  6 *  Started by Ingo Molnar <mingo@redhat.com>
  7 *
  8 *  Fixes:
  9 *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
 10 *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
 11 *  Mikael Pettersson	: Pentium 4 support for local APIC NMI watchdog.
 12 *  Pavel Machek and
 13 *  Mikael Pettersson	: PM converted to driver model. Disable/enable API.
 14 */
 15
 16#include <linux/config.h>
 17#include <linux/mm.h>
 18#include <linux/irq.h>
 19#include <linux/delay.h>
 20#include <linux/bootmem.h>
 21#include <linux/smp_lock.h>
 22#include <linux/interrupt.h>
 23#include <linux/mc146818rtc.h>
 24#include <linux/kernel_stat.h>
 25#include <linux/module.h>
 26#include <linux/nmi.h>
 27#include <linux/sysdev.h>
 28#include <linux/sysctl.h>
 29
 30#include <asm/smp.h>
 31#include <asm/mtrr.h>
 32#include <asm/mpspec.h>
 33#include <asm/nmi.h>
 34
 35#include "mach_traps.h"
 36
 37unsigned int nmi_watchdog = NMI_NONE;
 38extern int unknown_nmi_panic;
 39static unsigned int nmi_hz = HZ;
 40static unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
 41static unsigned int nmi_p4_cccr_val;
 42extern void show_registers(struct pt_regs *regs);
 43
 44/*
 45 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
 46 * - it may be reserved by some other driver, or not
 47 * - when not reserved by some other driver, it may be used for
 48 *   the NMI watchdog, or not
 49 *
 50 * This is maintained separately from nmi_active because the NMI
 51 * watchdog may also be driven from the I/O APIC timer.
 52 */
 53static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
 54static unsigned int lapic_nmi_owner;
 55#define LAPIC_NMI_WATCHDOG	(1<<0)
 56#define LAPIC_NMI_RESERVED	(1<<1)
 57
 58/* nmi_active:
 59 * +1: the lapic NMI watchdog is active, but can be disabled
 60 *  0: the lapic NMI watchdog has not been set up, and cannot
 61 *     be enabled
 62 * -1: the lapic NMI watchdog is disabled, but can be enabled
 63 */
 64int nmi_active;
 65
 66#define K7_EVNTSEL_ENABLE	(1 << 22)
 67#define K7_EVNTSEL_INT		(1 << 20)
 68#define K7_EVNTSEL_OS		(1 << 17)
 69#define K7_EVNTSEL_USR		(1 << 16)
 70#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
 71#define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
 72
 73#define P6_EVNTSEL0_ENABLE	(1 << 22)
 74#define P6_EVNTSEL_INT		(1 << 20)
 75#define P6_EVNTSEL_OS		(1 << 17)
 76#define P6_EVNTSEL_USR		(1 << 16)
 77#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
 78#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
 79
 80#define MSR_P4_MISC_ENABLE	0x1A0
 81#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
 82#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL	(1<<12)
 83#define MSR_P4_PERFCTR0		0x300
 84#define MSR_P4_CCCR0		0x360
 85#define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
 86#define P4_ESCR_OS		(1<<3)
 87#define P4_ESCR_USR		(1<<2)
 88#define P4_CCCR_OVF_PMI0	(1<<26)
 89#define P4_CCCR_OVF_PMI1	(1<<27)
 90#define P4_CCCR_THRESHOLD(N)	((N)<<20)
 91#define P4_CCCR_COMPLEMENT	(1<<19)
 92#define P4_CCCR_COMPARE		(1<<18)
 93#define P4_CCCR_REQUIRED	(3<<16)
 94#define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
 95#define P4_CCCR_ENABLE		(1<<12)
 96/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
 97   CRU_ESCR0 (with any non-null event selector) through a complemented
 98   max threshold. [IA32-Vol3, Section 14.9.9] */
 99#define MSR_P4_IQ_COUNTER0	0x30C
100#define P4_NMI_CRU_ESCR0	(P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
101#define P4_NMI_IQ_CCCR0	\
102	(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|	\
103	 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
104
105int __init check_nmi_watchdog (void)
106{
107	unsigned int prev_nmi_count[NR_CPUS];
108	int cpu;
109
110	printk(KERN_INFO "testing NMI watchdog ... ");
111
112	for (cpu = 0; cpu < NR_CPUS; cpu++)
113		prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
114	local_irq_enable();
115	mdelay((10*1000)/nmi_hz); // wait 10 ticks
116
117	/* FIXME: Only boot CPU is online at this stage.  Check CPUs
118           as they come up. */
119	for (cpu = 0; cpu < NR_CPUS; cpu++) {
120#ifdef CONFIG_SMP
121		/* Check cpu_callin_map here because that is set
122		   after the timer is started. */
123		if (!cpu_isset(cpu, cpu_callin_map))
124			continue;
125#endif
126		if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
127			printk("CPU#%d: NMI appears to be stuck!\n", cpu);
128			nmi_active = 0;
129			lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
130			return -1;
131		}
132	}
133	printk("OK.\n");
134
135	/* now that we know it works we can reduce NMI frequency to
136	   something more reasonable; makes a difference in some configs */
137	if (nmi_watchdog == NMI_LOCAL_APIC)
138		nmi_hz = 1;
139
140	return 0;
141}
142
143static int __init setup_nmi_watchdog(char *str)
144{
145	int nmi;
146
147	get_option(&str, &nmi);
148
149	if (nmi >= NMI_INVALID)
150		return 0;
151	if (nmi == NMI_NONE)
152		nmi_watchdog = nmi;
153	/*
154	 * If any other x86 CPU has a local APIC, then
155	 * please test the NMI stuff there and send me the
156	 * missing bits. Right now Intel P6/P4 and AMD K7 only.
157	 */
158	if ((nmi == NMI_LOCAL_APIC) &&
159			(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
160			(boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
161		nmi_watchdog = nmi;
162	if ((nmi == NMI_LOCAL_APIC) &&
163			(boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
164	  		(boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
165		nmi_watchdog = nmi;
166	/*
167	 * We can enable the IO-APIC watchdog
168	 * unconditionally.
169	 */
170	if (nmi == NMI_IO_APIC) {
171		nmi_active = 1;
172		nmi_watchdog = nmi;
173	}
174	return 1;
175}
176
177__setup("nmi_watchdog=", setup_nmi_watchdog);
178
179static void disable_lapic_nmi_watchdog(void)
180{
181	if (nmi_active <= 0)
182		return;
183	switch (boot_cpu_data.x86_vendor) {
184	case X86_VENDOR_AMD:
185		wrmsr(MSR_K7_EVNTSEL0, 0, 0);
186		break;
187	case X86_VENDOR_INTEL:
188		switch (boot_cpu_data.x86) {
189		case 6:
190			if (boot_cpu_data.x86_model > 0xd)
191				break;
192
193			wrmsr(MSR_P6_EVNTSEL0, 0, 0);
194			break;
195		case 15:
196			if (boot_cpu_data.x86_model > 0x3)
197				break;
198
199			wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
200			wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
201			break;
202		}
203		break;
204	}
205	nmi_active = -1;
206	/* tell do_nmi() and others that we're not active any more */
207	nmi_watchdog = 0;
208}
209
210static void enable_lapic_nmi_watchdog(void)
211{
212	if (nmi_active < 0) {
213		nmi_watchdog = NMI_LOCAL_APIC;
214		setup_apic_nmi_watchdog();
215	}
216}
217
218int reserve_lapic_nmi(void)
219{
220	unsigned int old_owner;
221
222	spin_lock(&lapic_nmi_owner_lock);
223	old_owner = lapic_nmi_owner;
224	lapic_nmi_owner |= LAPIC_NMI_RESERVED;
225	spin_unlock(&lapic_nmi_owner_lock);
226	if (old_owner & LAPIC_NMI_RESERVED)
227		return -EBUSY;
228	if (old_owner & LAPIC_NMI_WATCHDOG)
229		disable_lapic_nmi_watchdog();
230	return 0;
231}
232
233void release_lapic_nmi(void)
234{
235	unsigned int new_owner;
236
237	spin_lock(&lapic_nmi_owner_lock);
238	new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
239	lapic_nmi_owner = new_owner;
240	spin_unlock(&lapic_nmi_owner_lock);
241	if (new_owner & LAPIC_NMI_WATCHDOG)
242		enable_lapic_nmi_watchdog();
243}
244
245void disable_timer_nmi_watchdog(void)
246{
247	if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0))
248		return;
249
250	unset_nmi_callback();
251	nmi_active = -1;
252	nmi_watchdog = NMI_NONE;
253}
254
255void enable_timer_nmi_watchdog(void)
256{
257	if (nmi_active < 0) {
258		nmi_watchdog = NMI_IO_APIC;
259		touch_nmi_watchdog();
260		nmi_active = 1;
261	}
262}
263
264#ifdef CONFIG_PM
265
266static int nmi_pm_active; /* nmi_active before suspend */
267
268static int lapic_nmi_suspend(struct sys_device *dev, u32 state)
269{
270	nmi_pm_active = nmi_active;
271	disable_lapic_nmi_watchdog();
272	return 0;
273}
274
275static int lapic_nmi_resume(struct sys_device *dev)
276{
277	if (nmi_pm_active > 0)
278		enable_lapic_nmi_watchdog();
279	return 0;
280}
281
282
283static struct sysdev_class nmi_sysclass = {
284	set_kset_name("lapic_nmi"),
285	.resume		= lapic_nmi_resume,
286	.suspend	= lapic_nmi_suspend,
287};
288
289static struct sys_device device_lapic_nmi = {
290	.id	= 0,
291	.cls	= &nmi_sysclass,
292};
293
294static int __init init_lapic_nmi_sysfs(void)
295{
296	int error;
297
298	if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC)
299		return 0;
300
301	error = sysdev_class_register(&nmi_sysclass);
302	if (!error)
303		error = sysdev_register(&device_lapic_nmi);
304	return error;
305}
306/* must come after the local APIC's device_initcall() */
307late_initcall(init_lapic_nmi_sysfs);
308
309#endif	/* CONFIG_PM */
310
311/*
312 * Activate the NMI watchdog via the local APIC.
313 * Original code written by Keith Owens.
314 */
315
316static void clear_msr_range(unsigned int base, unsigned int n)
317{
318	unsigned int i;
319
320	for(i = 0; i < n; ++i)
321		wrmsr(base+i, 0, 0);
322}
323
324static void setup_k7_watchdog(void)
325{
326	unsigned int evntsel;
327
328	nmi_perfctr_msr = MSR_K7_PERFCTR0;
329
330	clear_msr_range(MSR_K7_EVNTSEL0, 4);
331	clear_msr_range(MSR_K7_PERFCTR0, 4);
332
333	evntsel = K7_EVNTSEL_INT
334		| K7_EVNTSEL_OS
335		| K7_EVNTSEL_USR
336		| K7_NMI_EVENT;
337
338	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
339	Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
340	wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
341	apic_write(APIC_LVTPC, APIC_DM_NMI);
342	evntsel |= K7_EVNTSEL_ENABLE;
343	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
344}
345
346static void setup_p6_watchdog(void)
347{
348	unsigned int evntsel;
349
350	nmi_perfctr_msr = MSR_P6_PERFCTR0;
351
352	clear_msr_range(MSR_P6_EVNTSEL0, 2);
353	clear_msr_range(MSR_P6_PERFCTR0, 2);
354
355	evntsel = P6_EVNTSEL_INT
356		| P6_EVNTSEL_OS
357		| P6_EVNTSEL_USR
358		| P6_NMI_EVENT;
359
360	wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
361	Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
362	wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
363	apic_write(APIC_LVTPC, APIC_DM_NMI);
364	evntsel |= P6_EVNTSEL0_ENABLE;
365	wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
366}
367
368static int setup_p4_watchdog(void)
369{
370	unsigned int misc_enable, dummy;
371
372	rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
373	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
374		return 0;
375
376	nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
377	nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
378#ifdef CONFIG_SMP
379	if (smp_num_siblings == 2)
380		nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
381#endif
382
383	if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
384		clear_msr_range(0x3F1, 2);
385	/* MSR 0x3F0 seems to have a default value of 0xFC00, but current
386	   docs doesn't fully define it, so leave it alone for now. */
387	if (boot_cpu_data.x86_model >= 0x3) {
388		/* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
389		clear_msr_range(0x3A0, 26);
390		clear_msr_range(0x3BC, 3);
391	} else {
392		clear_msr_range(0x3A0, 31);
393	}
394	clear_msr_range(0x3C0, 6);
395	clear_msr_range(0x3C8, 6);
396	clear_msr_range(0x3E0, 2);
397	clear_msr_range(MSR_P4_CCCR0, 18);
398	clear_msr_range(MSR_P4_PERFCTR0, 18);
399
400	wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
401	wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
402	Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
403	wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
404	apic_write(APIC_LVTPC, APIC_DM_NMI);
405	wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
406	return 1;
407}
408
409void setup_apic_nmi_watchdog (void)
410{
411	switch (boot_cpu_data.x86_vendor) {
412	case X86_VENDOR_AMD:
413		if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
414			return;
415		setup_k7_watchdog();
416		break;
417	case X86_VENDOR_INTEL:
418		switch (boot_cpu_data.x86) {
419		case 6:
420			if (boot_cpu_data.x86_model > 0xd)
421				return;
422
423			setup_p6_watchdog();
424			break;
425		case 15:
426			if (boot_cpu_data.x86_model > 0x3)
427				return;
428
429			if (!setup_p4_watchdog())
430				return;
431			break;
432		default:
433			return;
434		}
435		break;
436	default:
437		return;
438	}
439	lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
440	nmi_active = 1;
441}
442
443/*
444 * the best way to detect whether a CPU has a 'hard lockup' problem
445 * is to check it's local APIC timer IRQ counts. If they are not
446 * changing then that CPU has some problem.
447 *
448 * as these watchdog NMI IRQs are generated on every CPU, we only
449 * have to check the current processor.
450 *
451 * since NMIs don't listen to _any_ locks, we have to be extremely
452 * careful not to rely on unsafe variables. The printk might lock
453 * up though, so we have to break up any console locks first ...
454 * [when there will be more tty-related locks, break them up
455 *  here too!]
456 */
457
458static unsigned int
459	last_irq_sums [NR_CPUS],
460	alert_counter [NR_CPUS];
461
462void touch_nmi_watchdog (void)
463{
464	int i;
465
466	/*
467	 * Just reset the alert counters, (other CPUs might be
468	 * spinning on locks we hold):
469	 */
470	for (i = 0; i < NR_CPUS; i++)
471		alert_counter[i] = 0;
472}
473
474extern void die_nmi(struct pt_regs *, const char *msg);
475
476void nmi_watchdog_tick (struct pt_regs * regs)
477{
478
479	/*
480	 * Since current_thread_info()-> is always on the stack, and we
481	 * always switch the stack NMI-atomically, it's safe to use
482	 * smp_processor_id().
483	 */
484	int sum, cpu = smp_processor_id();
485
486	sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
487
488	if (last_irq_sums[cpu] == sum) {
489		/*
490		 * Ayiee, looks like this CPU is stuck ...
491		 * wait a few IRQs (5 seconds) before doing the oops ...
492		 */
493		alert_counter[cpu]++;
494		if (alert_counter[cpu] == 5*nmi_hz)
495			die_nmi(regs, "NMI Watchdog detected LOCKUP");
496	} else {
497		last_irq_sums[cpu] = sum;
498		alert_counter[cpu] = 0;
499	}
500	if (nmi_perfctr_msr) {
501		if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
502			/*
503			 * P4 quirks:
504			 * - An overflown perfctr will assert its interrupt
505			 *   until the OVF flag in its CCCR is cleared.
506			 * - LVTPC is masked on interrupt and must be
507			 *   unmasked by the LVTPC handler.
508			 */
509			wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
510			apic_write(APIC_LVTPC, APIC_DM_NMI);
511		}
512		else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
513			/* Only P6 based Pentium M need to re-unmask
514			 * the apic vector but it doesn't hurt
515			 * other P6 variant */
516			apic_write(APIC_LVTPC, APIC_DM_NMI);
517		}
518		wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
519	}
520}
521
522#ifdef CONFIG_SYSCTL
523
524static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
525{
526	unsigned char reason = get_nmi_reason();
527	char buf[64];
528
529	if (!(reason & 0xc0)) {
530		sprintf(buf, "NMI received for unknown reason %02x\n", reason);
531		die_nmi(regs, buf);
532	}
533	return 0;
534}
535
536/*
537 * proc handler for /proc/sys/kernel/unknown_nmi_panic
538 */
539int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
540			void __user *buffer, size_t *length, loff_t *ppos)
541{
542	int old_state;
543
544	old_state = unknown_nmi_panic;
545	proc_dointvec(table, write, file, buffer, length, ppos);
546	if (!!old_state == !!unknown_nmi_panic)
547		return 0;
548
549	if (unknown_nmi_panic) {
550		if (reserve_lapic_nmi() < 0) {
551			unknown_nmi_panic = 0;
552			return -EBUSY;
553		} else {
554			set_nmi_callback(unknown_nmi_panic_callback);
555		}
556	} else {
557		release_lapic_nmi();
558		unset_nmi_callback();
559	}
560	return 0;
561}
562
563#endif
564
565EXPORT_SYMBOL(nmi_active);
566EXPORT_SYMBOL(nmi_watchdog);
567EXPORT_SYMBOL(reserve_lapic_nmi);
568EXPORT_SYMBOL(release_lapic_nmi);
569EXPORT_SYMBOL(disable_timer_nmi_watchdog);
570EXPORT_SYMBOL(enable_timer_nmi_watchdog);