PageRenderTime 115ms CodeModel.GetById 36ms app.highlight 65ms RepoModel.GetById 1ms app.codeStats 1ms

/arch/powerpc/kernel/traps.c

http://github.com/mirrors/linux
C | 2299 lines | 1568 code | 326 blank | 405 comment | 347 complexity | 50237b531a1132d55e3e85896e80f25b MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *  Copyright (C) 1995-1996  Gary Thomas (gdt@linuxppc.org)
   4 *  Copyright 2007-2010 Freescale Semiconductor, Inc.
   5 *
   6 *  Modified by Cort Dougan (cort@cs.nmt.edu)
   7 *  and Paul Mackerras (paulus@samba.org)
   8 */
   9
  10/*
  11 * This file handles the architecture-dependent parts of hardware exceptions
  12 */
  13
  14#include <linux/errno.h>
  15#include <linux/sched.h>
  16#include <linux/sched/debug.h>
  17#include <linux/kernel.h>
  18#include <linux/mm.h>
  19#include <linux/pkeys.h>
  20#include <linux/stddef.h>
  21#include <linux/unistd.h>
  22#include <linux/ptrace.h>
  23#include <linux/user.h>
  24#include <linux/interrupt.h>
  25#include <linux/init.h>
  26#include <linux/extable.h>
  27#include <linux/module.h>	/* print_modules */
  28#include <linux/prctl.h>
  29#include <linux/delay.h>
  30#include <linux/kprobes.h>
  31#include <linux/kexec.h>
  32#include <linux/backlight.h>
  33#include <linux/bug.h>
  34#include <linux/kdebug.h>
  35#include <linux/ratelimit.h>
  36#include <linux/context_tracking.h>
  37#include <linux/smp.h>
  38#include <linux/console.h>
  39#include <linux/kmsg_dump.h>
  40
  41#include <asm/emulated_ops.h>
  42#include <asm/pgtable.h>
  43#include <linux/uaccess.h>
  44#include <asm/debugfs.h>
  45#include <asm/io.h>
  46#include <asm/machdep.h>
  47#include <asm/rtas.h>
  48#include <asm/pmc.h>
  49#include <asm/reg.h>
  50#ifdef CONFIG_PMAC_BACKLIGHT
  51#include <asm/backlight.h>
  52#endif
  53#ifdef CONFIG_PPC64
  54#include <asm/firmware.h>
  55#include <asm/processor.h>
  56#include <asm/tm.h>
  57#endif
  58#include <asm/kexec.h>
  59#include <asm/ppc-opcode.h>
  60#include <asm/rio.h>
  61#include <asm/fadump.h>
  62#include <asm/switch_to.h>
  63#include <asm/tm.h>
  64#include <asm/debug.h>
  65#include <asm/asm-prototypes.h>
  66#include <asm/hmi.h>
  67#include <sysdev/fsl_pci.h>
  68#include <asm/kprobes.h>
  69#include <asm/stacktrace.h>
  70#include <asm/nmi.h>
  71
  72#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
  73int (*__debugger)(struct pt_regs *regs) __read_mostly;
  74int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly;
  75int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly;
  76int (*__debugger_sstep)(struct pt_regs *regs) __read_mostly;
  77int (*__debugger_iabr_match)(struct pt_regs *regs) __read_mostly;
  78int (*__debugger_break_match)(struct pt_regs *regs) __read_mostly;
  79int (*__debugger_fault_handler)(struct pt_regs *regs) __read_mostly;
  80
  81EXPORT_SYMBOL(__debugger);
  82EXPORT_SYMBOL(__debugger_ipi);
  83EXPORT_SYMBOL(__debugger_bpt);
  84EXPORT_SYMBOL(__debugger_sstep);
  85EXPORT_SYMBOL(__debugger_iabr_match);
  86EXPORT_SYMBOL(__debugger_break_match);
  87EXPORT_SYMBOL(__debugger_fault_handler);
  88#endif
  89
  90/* Transactional Memory trap debug */
  91#ifdef TM_DEBUG_SW
  92#define TM_DEBUG(x...) printk(KERN_INFO x)
  93#else
  94#define TM_DEBUG(x...) do { } while(0)
  95#endif
  96
  97static const char *signame(int signr)
  98{
  99	switch (signr) {
 100	case SIGBUS:	return "bus error";
 101	case SIGFPE:	return "floating point exception";
 102	case SIGILL:	return "illegal instruction";
 103	case SIGSEGV:	return "segfault";
 104	case SIGTRAP:	return "unhandled trap";
 105	}
 106
 107	return "unknown signal";
 108}
 109
 110/*
 111 * Trap & Exception support
 112 */
 113
 114#ifdef CONFIG_PMAC_BACKLIGHT
 115static void pmac_backlight_unblank(void)
 116{
 117	mutex_lock(&pmac_backlight_mutex);
 118	if (pmac_backlight) {
 119		struct backlight_properties *props;
 120
 121		props = &pmac_backlight->props;
 122		props->brightness = props->max_brightness;
 123		props->power = FB_BLANK_UNBLANK;
 124		backlight_update_status(pmac_backlight);
 125	}
 126	mutex_unlock(&pmac_backlight_mutex);
 127}
 128#else
 129static inline void pmac_backlight_unblank(void) { }
 130#endif
 131
 132/*
 133 * If oops/die is expected to crash the machine, return true here.
 134 *
 135 * This should not be expected to be 100% accurate, there may be
 136 * notifiers registered or other unexpected conditions that may bring
 137 * down the kernel. Or if the current process in the kernel is holding
 138 * locks or has other critical state, the kernel may become effectively
 139 * unusable anyway.
 140 */
 141bool die_will_crash(void)
 142{
 143	if (should_fadump_crash())
 144		return true;
 145	if (kexec_should_crash(current))
 146		return true;
 147	if (in_interrupt() || panic_on_oops ||
 148			!current->pid || is_global_init(current))
 149		return true;
 150
 151	return false;
 152}
 153
 154static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 155static int die_owner = -1;
 156static unsigned int die_nest_count;
 157static int die_counter;
 158
 159extern void panic_flush_kmsg_start(void)
 160{
 161	/*
 162	 * These are mostly taken from kernel/panic.c, but tries to do
 163	 * relatively minimal work. Don't use delay functions (TB may
 164	 * be broken), don't crash dump (need to set a firmware log),
 165	 * don't run notifiers. We do want to get some information to
 166	 * Linux console.
 167	 */
 168	console_verbose();
 169	bust_spinlocks(1);
 170}
 171
 172extern void panic_flush_kmsg_end(void)
 173{
 174	printk_safe_flush_on_panic();
 175	kmsg_dump(KMSG_DUMP_PANIC);
 176	bust_spinlocks(0);
 177	debug_locks_off();
 178	console_flush_on_panic(CONSOLE_FLUSH_PENDING);
 179}
 180
 181static unsigned long oops_begin(struct pt_regs *regs)
 182{
 183	int cpu;
 184	unsigned long flags;
 185
 186	oops_enter();
 187
 188	/* racy, but better than risking deadlock. */
 189	raw_local_irq_save(flags);
 190	cpu = smp_processor_id();
 191	if (!arch_spin_trylock(&die_lock)) {
 192		if (cpu == die_owner)
 193			/* nested oops. should stop eventually */;
 194		else
 195			arch_spin_lock(&die_lock);
 196	}
 197	die_nest_count++;
 198	die_owner = cpu;
 199	console_verbose();
 200	bust_spinlocks(1);
 201	if (machine_is(powermac))
 202		pmac_backlight_unblank();
 203	return flags;
 204}
 205NOKPROBE_SYMBOL(oops_begin);
 206
 207static void oops_end(unsigned long flags, struct pt_regs *regs,
 208			       int signr)
 209{
 210	bust_spinlocks(0);
 211	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 212	die_nest_count--;
 213	oops_exit();
 214	printk("\n");
 215	if (!die_nest_count) {
 216		/* Nest count reaches zero, release the lock. */
 217		die_owner = -1;
 218		arch_spin_unlock(&die_lock);
 219	}
 220	raw_local_irq_restore(flags);
 221
 222	/*
 223	 * system_reset_excption handles debugger, crash dump, panic, for 0x100
 224	 */
 225	if (TRAP(regs) == 0x100)
 226		return;
 227
 228	crash_fadump(regs, "die oops");
 229
 230	if (kexec_should_crash(current))
 231		crash_kexec(regs);
 232
 233	if (!signr)
 234		return;
 235
 236	/*
 237	 * While our oops output is serialised by a spinlock, output
 238	 * from panic() called below can race and corrupt it. If we
 239	 * know we are going to panic, delay for 1 second so we have a
 240	 * chance to get clean backtraces from all CPUs that are oopsing.
 241	 */
 242	if (in_interrupt() || panic_on_oops || !current->pid ||
 243	    is_global_init(current)) {
 244		mdelay(MSEC_PER_SEC);
 245	}
 246
 247	if (panic_on_oops)
 248		panic("Fatal exception");
 249	do_exit(signr);
 250}
 251NOKPROBE_SYMBOL(oops_end);
 252
 253static char *get_mmu_str(void)
 254{
 255	if (early_radix_enabled())
 256		return " MMU=Radix";
 257	if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
 258		return " MMU=Hash";
 259	return "";
 260}
 261
 262static int __die(const char *str, struct pt_regs *regs, long err)
 263{
 264	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
 265
 266	printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
 267	       IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
 268	       PAGE_SIZE / 1024, get_mmu_str(),
 269	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
 270	       IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
 271	       IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
 272	       debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
 273	       IS_ENABLED(CONFIG_NUMA) ? " NUMA" : "",
 274	       ppc_md.name ? ppc_md.name : "");
 275
 276	if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
 277		return 1;
 278
 279	print_modules();
 280	show_regs(regs);
 281
 282	return 0;
 283}
 284NOKPROBE_SYMBOL(__die);
 285
 286void die(const char *str, struct pt_regs *regs, long err)
 287{
 288	unsigned long flags;
 289
 290	/*
 291	 * system_reset_excption handles debugger, crash dump, panic, for 0x100
 292	 */
 293	if (TRAP(regs) != 0x100) {
 294		if (debugger(regs))
 295			return;
 296	}
 297
 298	flags = oops_begin(regs);
 299	if (__die(str, regs, err))
 300		err = 0;
 301	oops_end(flags, regs, err);
 302}
 303NOKPROBE_SYMBOL(die);
 304
 305void user_single_step_report(struct pt_regs *regs)
 306{
 307	force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip);
 308}
 309
 310static void show_signal_msg(int signr, struct pt_regs *regs, int code,
 311			    unsigned long addr)
 312{
 313	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
 314				      DEFAULT_RATELIMIT_BURST);
 315
 316	if (!show_unhandled_signals)
 317		return;
 318
 319	if (!unhandled_signal(current, signr))
 320		return;
 321
 322	if (!__ratelimit(&rs))
 323		return;
 324
 325	pr_info("%s[%d]: %s (%d) at %lx nip %lx lr %lx code %x",
 326		current->comm, current->pid, signame(signr), signr,
 327		addr, regs->nip, regs->link, code);
 328
 329	print_vma_addr(KERN_CONT " in ", regs->nip);
 330
 331	pr_cont("\n");
 332
 333	show_user_instructions(regs);
 334}
 335
 336static bool exception_common(int signr, struct pt_regs *regs, int code,
 337			      unsigned long addr)
 338{
 339	if (!user_mode(regs)) {
 340		die("Exception in kernel mode", regs, signr);
 341		return false;
 342	}
 343
 344	show_signal_msg(signr, regs, code, addr);
 345
 346	if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs))
 347		local_irq_enable();
 348
 349	current->thread.trap_nr = code;
 350
 351	/*
 352	 * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need
 353	 * to capture the content, if the task gets killed.
 354	 */
 355	thread_pkey_regs_save(&current->thread);
 356
 357	return true;
 358}
 359
 360void _exception_pkey(struct pt_regs *regs, unsigned long addr, int key)
 361{
 362	if (!exception_common(SIGSEGV, regs, SEGV_PKUERR, addr))
 363		return;
 364
 365	force_sig_pkuerr((void __user *) addr, key);
 366}
 367
 368void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 369{
 370	if (!exception_common(signr, regs, code, addr))
 371		return;
 372
 373	force_sig_fault(signr, code, (void __user *)addr);
 374}
 375
 376/*
 377 * The interrupt architecture has a quirk in that the HV interrupts excluding
 378 * the NMIs (0x100 and 0x200) do not clear MSR[RI] at entry. The first thing
 379 * that an interrupt handler must do is save off a GPR into a scratch register,
 380 * and all interrupts on POWERNV (HV=1) use the HSPRG1 register as scratch.
 381 * Therefore an NMI can clobber an HV interrupt's live HSPRG1 without noticing
 382 * that it is non-reentrant, which leads to random data corruption.
 383 *
 384 * The solution is for NMI interrupts in HV mode to check if they originated
 385 * from these critical HV interrupt regions. If so, then mark them not
 386 * recoverable.
 387 *
 388 * An alternative would be for HV NMIs to use SPRG for scratch to avoid the
 389 * HSPRG1 clobber, however this would cause guest SPRG to be clobbered. Linux
 390 * guests should always have MSR[RI]=0 when its scratch SPRG is in use, so
 391 * that would work. However any other guest OS that may have the SPRG live
 392 * and MSR[RI]=1 could encounter silent corruption.
 393 *
 394 * Builds that do not support KVM could take this second option to increase
 395 * the recoverability of NMIs.
 396 */
 397void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
 398{
 399#ifdef CONFIG_PPC_POWERNV
 400	unsigned long kbase = (unsigned long)_stext;
 401	unsigned long nip = regs->nip;
 402
 403	if (!(regs->msr & MSR_RI))
 404		return;
 405	if (!(regs->msr & MSR_HV))
 406		return;
 407	if (regs->msr & MSR_PR)
 408		return;
 409
 410	/*
 411	 * Now test if the interrupt has hit a range that may be using
 412	 * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
 413	 * problem ranges all run un-relocated. Test real and virt modes
 414	 * at the same time by droping the high bit of the nip (virt mode
 415	 * entry points still have the +0x4000 offset).
 416	 */
 417	nip &= ~0xc000000000000000ULL;
 418	if ((nip >= 0x500 && nip < 0x600) || (nip >= 0x4500 && nip < 0x4600))
 419		goto nonrecoverable;
 420	if ((nip >= 0x980 && nip < 0xa00) || (nip >= 0x4980 && nip < 0x4a00))
 421		goto nonrecoverable;
 422	if ((nip >= 0xe00 && nip < 0xec0) || (nip >= 0x4e00 && nip < 0x4ec0))
 423		goto nonrecoverable;
 424	if ((nip >= 0xf80 && nip < 0xfa0) || (nip >= 0x4f80 && nip < 0x4fa0))
 425		goto nonrecoverable;
 426
 427	/* Trampoline code runs un-relocated so subtract kbase. */
 428	if (nip >= (unsigned long)(start_real_trampolines - kbase) &&
 429			nip < (unsigned long)(end_real_trampolines - kbase))
 430		goto nonrecoverable;
 431	if (nip >= (unsigned long)(start_virt_trampolines - kbase) &&
 432			nip < (unsigned long)(end_virt_trampolines - kbase))
 433		goto nonrecoverable;
 434	return;
 435
 436nonrecoverable:
 437	regs->msr &= ~MSR_RI;
 438#endif
 439}
 440
 441void system_reset_exception(struct pt_regs *regs)
 442{
 443	unsigned long hsrr0, hsrr1;
 444	bool nested = in_nmi();
 445	bool saved_hsrrs = false;
 446
 447	/*
 448	 * Avoid crashes in case of nested NMI exceptions. Recoverability
 449	 * is determined by RI and in_nmi
 450	 */
 451	if (!nested)
 452		nmi_enter();
 453
 454	/*
 455	 * System reset can interrupt code where HSRRs are live and MSR[RI]=1.
 456	 * The system reset interrupt itself may clobber HSRRs (e.g., to call
 457	 * OPAL), so save them here and restore them before returning.
 458	 *
 459	 * Machine checks don't need to save HSRRs, as the real mode handler
 460	 * is careful to avoid them, and the regular handler is not delivered
 461	 * as an NMI.
 462	 */
 463	if (cpu_has_feature(CPU_FTR_HVMODE)) {
 464		hsrr0 = mfspr(SPRN_HSRR0);
 465		hsrr1 = mfspr(SPRN_HSRR1);
 466		saved_hsrrs = true;
 467	}
 468
 469	hv_nmi_check_nonrecoverable(regs);
 470
 471	__this_cpu_inc(irq_stat.sreset_irqs);
 472
 473	/* See if any machine dependent calls */
 474	if (ppc_md.system_reset_exception) {
 475		if (ppc_md.system_reset_exception(regs))
 476			goto out;
 477	}
 478
 479	if (debugger(regs))
 480		goto out;
 481
 482	kmsg_dump(KMSG_DUMP_OOPS);
 483	/*
 484	 * A system reset is a request to dump, so we always send
 485	 * it through the crashdump code (if fadump or kdump are
 486	 * registered).
 487	 */
 488	crash_fadump(regs, "System Reset");
 489
 490	crash_kexec(regs);
 491
 492	/*
 493	 * We aren't the primary crash CPU. We need to send it
 494	 * to a holding pattern to avoid it ending up in the panic
 495	 * code.
 496	 */
 497	crash_kexec_secondary(regs);
 498
 499	/*
 500	 * No debugger or crash dump registered, print logs then
 501	 * panic.
 502	 */
 503	die("System Reset", regs, SIGABRT);
 504
 505	mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
 506	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 507	nmi_panic(regs, "System Reset");
 508
 509out:
 510#ifdef CONFIG_PPC_BOOK3S_64
 511	BUG_ON(get_paca()->in_nmi == 0);
 512	if (get_paca()->in_nmi > 1)
 513		nmi_panic(regs, "Unrecoverable nested System Reset");
 514#endif
 515	/* Must die if the interrupt is not recoverable */
 516	if (!(regs->msr & MSR_RI))
 517		nmi_panic(regs, "Unrecoverable System Reset");
 518
 519	if (saved_hsrrs) {
 520		mtspr(SPRN_HSRR0, hsrr0);
 521		mtspr(SPRN_HSRR1, hsrr1);
 522	}
 523
 524	if (!nested)
 525		nmi_exit();
 526
 527	/* What should we do here? We could issue a shutdown or hard reset. */
 528}
 529
 530/*
 531 * I/O accesses can cause machine checks on powermacs.
 532 * Check if the NIP corresponds to the address of a sync
 533 * instruction for which there is an entry in the exception
 534 * table.
 535 * Note that the 601 only takes a machine check on TEA
 536 * (transfer error ack) signal assertion, and does not
 537 * set any of the top 16 bits of SRR1.
 538 *  -- paulus.
 539 */
 540static inline int check_io_access(struct pt_regs *regs)
 541{
 542#ifdef CONFIG_PPC32
 543	unsigned long msr = regs->msr;
 544	const struct exception_table_entry *entry;
 545	unsigned int *nip = (unsigned int *)regs->nip;
 546
 547	if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000)))
 548	    && (entry = search_exception_tables(regs->nip)) != NULL) {
 549		/*
 550		 * Check that it's a sync instruction, or somewhere
 551		 * in the twi; isync; nop sequence that inb/inw/inl uses.
 552		 * As the address is in the exception table
 553		 * we should be able to read the instr there.
 554		 * For the debug message, we look at the preceding
 555		 * load or store.
 556		 */
 557		if (*nip == PPC_INST_NOP)
 558			nip -= 2;
 559		else if (*nip == PPC_INST_ISYNC)
 560			--nip;
 561		if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) {
 562			unsigned int rb;
 563
 564			--nip;
 565			rb = (*nip >> 11) & 0x1f;
 566			printk(KERN_DEBUG "%s bad port %lx at %p\n",
 567			       (*nip & 0x100)? "OUT to": "IN from",
 568			       regs->gpr[rb] - _IO_BASE, nip);
 569			regs->msr |= MSR_RI;
 570			regs->nip = extable_fixup(entry);
 571			return 1;
 572		}
 573	}
 574#endif /* CONFIG_PPC32 */
 575	return 0;
 576}
 577
 578#ifdef CONFIG_PPC_ADV_DEBUG_REGS
 579/* On 4xx, the reason for the machine check or program exception
 580   is in the ESR. */
 581#define get_reason(regs)	((regs)->dsisr)
 582#define REASON_FP		ESR_FP
 583#define REASON_ILLEGAL		(ESR_PIL | ESR_PUO)
 584#define REASON_PRIVILEGED	ESR_PPR
 585#define REASON_TRAP		ESR_PTR
 586
 587/* single-step stuff */
 588#define single_stepping(regs)	(current->thread.debug.dbcr0 & DBCR0_IC)
 589#define clear_single_step(regs)	(current->thread.debug.dbcr0 &= ~DBCR0_IC)
 590#define clear_br_trace(regs)	do {} while(0)
 591#else
 592/* On non-4xx, the reason for the machine check or program
 593   exception is in the MSR. */
 594#define get_reason(regs)	((regs)->msr)
 595#define REASON_TM		SRR1_PROGTM
 596#define REASON_FP		SRR1_PROGFPE
 597#define REASON_ILLEGAL		SRR1_PROGILL
 598#define REASON_PRIVILEGED	SRR1_PROGPRIV
 599#define REASON_TRAP		SRR1_PROGTRAP
 600
 601#define single_stepping(regs)	((regs)->msr & MSR_SE)
 602#define clear_single_step(regs)	((regs)->msr &= ~MSR_SE)
 603#define clear_br_trace(regs)	((regs)->msr &= ~MSR_BE)
 604#endif
 605
 606#if defined(CONFIG_E500)
 607int machine_check_e500mc(struct pt_regs *regs)
 608{
 609	unsigned long mcsr = mfspr(SPRN_MCSR);
 610	unsigned long pvr = mfspr(SPRN_PVR);
 611	unsigned long reason = mcsr;
 612	int recoverable = 1;
 613
 614	if (reason & MCSR_LD) {
 615		recoverable = fsl_rio_mcheck_exception(regs);
 616		if (recoverable == 1)
 617			goto silent_out;
 618	}
 619
 620	printk("Machine check in kernel mode.\n");
 621	printk("Caused by (from MCSR=%lx): ", reason);
 622
 623	if (reason & MCSR_MCP)
 624		pr_cont("Machine Check Signal\n");
 625
 626	if (reason & MCSR_ICPERR) {
 627		pr_cont("Instruction Cache Parity Error\n");
 628
 629		/*
 630		 * This is recoverable by invalidating the i-cache.
 631		 */
 632		mtspr(SPRN_L1CSR1, mfspr(SPRN_L1CSR1) | L1CSR1_ICFI);
 633		while (mfspr(SPRN_L1CSR1) & L1CSR1_ICFI)
 634			;
 635
 636		/*
 637		 * This will generally be accompanied by an instruction
 638		 * fetch error report -- only treat MCSR_IF as fatal
 639		 * if it wasn't due to an L1 parity error.
 640		 */
 641		reason &= ~MCSR_IF;
 642	}
 643
 644	if (reason & MCSR_DCPERR_MC) {
 645		pr_cont("Data Cache Parity Error\n");
 646
 647		/*
 648		 * In write shadow mode we auto-recover from the error, but it
 649		 * may still get logged and cause a machine check.  We should
 650		 * only treat the non-write shadow case as non-recoverable.
 651		 */
 652		/* On e6500 core, L1 DCWS (Data cache write shadow mode) bit
 653		 * is not implemented but L1 data cache always runs in write
 654		 * shadow mode. Hence on data cache parity errors HW will
 655		 * automatically invalidate the L1 Data Cache.
 656		 */
 657		if (PVR_VER(pvr) != PVR_VER_E6500) {
 658			if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
 659				recoverable = 0;
 660		}
 661	}
 662
 663	if (reason & MCSR_L2MMU_MHIT) {
 664		pr_cont("Hit on multiple TLB entries\n");
 665		recoverable = 0;
 666	}
 667
 668	if (reason & MCSR_NMI)
 669		pr_cont("Non-maskable interrupt\n");
 670
 671	if (reason & MCSR_IF) {
 672		pr_cont("Instruction Fetch Error Report\n");
 673		recoverable = 0;
 674	}
 675
 676	if (reason & MCSR_LD) {
 677		pr_cont("Load Error Report\n");
 678		recoverable = 0;
 679	}
 680
 681	if (reason & MCSR_ST) {
 682		pr_cont("Store Error Report\n");
 683		recoverable = 0;
 684	}
 685
 686	if (reason & MCSR_LDG) {
 687		pr_cont("Guarded Load Error Report\n");
 688		recoverable = 0;
 689	}
 690
 691	if (reason & MCSR_TLBSYNC)
 692		pr_cont("Simultaneous tlbsync operations\n");
 693
 694	if (reason & MCSR_BSL2_ERR) {
 695		pr_cont("Level 2 Cache Error\n");
 696		recoverable = 0;
 697	}
 698
 699	if (reason & MCSR_MAV) {
 700		u64 addr;
 701
 702		addr = mfspr(SPRN_MCAR);
 703		addr |= (u64)mfspr(SPRN_MCARU) << 32;
 704
 705		pr_cont("Machine Check %s Address: %#llx\n",
 706		       reason & MCSR_MEA ? "Effective" : "Physical", addr);
 707	}
 708
 709silent_out:
 710	mtspr(SPRN_MCSR, mcsr);
 711	return mfspr(SPRN_MCSR) == 0 && recoverable;
 712}
 713
 714int machine_check_e500(struct pt_regs *regs)
 715{
 716	unsigned long reason = mfspr(SPRN_MCSR);
 717
 718	if (reason & MCSR_BUS_RBERR) {
 719		if (fsl_rio_mcheck_exception(regs))
 720			return 1;
 721		if (fsl_pci_mcheck_exception(regs))
 722			return 1;
 723	}
 724
 725	printk("Machine check in kernel mode.\n");
 726	printk("Caused by (from MCSR=%lx): ", reason);
 727
 728	if (reason & MCSR_MCP)
 729		pr_cont("Machine Check Signal\n");
 730	if (reason & MCSR_ICPERR)
 731		pr_cont("Instruction Cache Parity Error\n");
 732	if (reason & MCSR_DCP_PERR)
 733		pr_cont("Data Cache Push Parity Error\n");
 734	if (reason & MCSR_DCPERR)
 735		pr_cont("Data Cache Parity Error\n");
 736	if (reason & MCSR_BUS_IAERR)
 737		pr_cont("Bus - Instruction Address Error\n");
 738	if (reason & MCSR_BUS_RAERR)
 739		pr_cont("Bus - Read Address Error\n");
 740	if (reason & MCSR_BUS_WAERR)
 741		pr_cont("Bus - Write Address Error\n");
 742	if (reason & MCSR_BUS_IBERR)
 743		pr_cont("Bus - Instruction Data Error\n");
 744	if (reason & MCSR_BUS_RBERR)
 745		pr_cont("Bus - Read Data Bus Error\n");
 746	if (reason & MCSR_BUS_WBERR)
 747		pr_cont("Bus - Write Data Bus Error\n");
 748	if (reason & MCSR_BUS_IPERR)
 749		pr_cont("Bus - Instruction Parity Error\n");
 750	if (reason & MCSR_BUS_RPERR)
 751		pr_cont("Bus - Read Parity Error\n");
 752
 753	return 0;
 754}
 755
 756int machine_check_generic(struct pt_regs *regs)
 757{
 758	return 0;
 759}
 760#elif defined(CONFIG_E200)
 761int machine_check_e200(struct pt_regs *regs)
 762{
 763	unsigned long reason = mfspr(SPRN_MCSR);
 764
 765	printk("Machine check in kernel mode.\n");
 766	printk("Caused by (from MCSR=%lx): ", reason);
 767
 768	if (reason & MCSR_MCP)
 769		pr_cont("Machine Check Signal\n");
 770	if (reason & MCSR_CP_PERR)
 771		pr_cont("Cache Push Parity Error\n");
 772	if (reason & MCSR_CPERR)
 773		pr_cont("Cache Parity Error\n");
 774	if (reason & MCSR_EXCP_ERR)
 775		pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
 776	if (reason & MCSR_BUS_IRERR)
 777		pr_cont("Bus - Read Bus Error on instruction fetch\n");
 778	if (reason & MCSR_BUS_DRERR)
 779		pr_cont("Bus - Read Bus Error on data load\n");
 780	if (reason & MCSR_BUS_WRERR)
 781		pr_cont("Bus - Write Bus Error on buffered store or cache line push\n");
 782
 783	return 0;
 784}
 785#elif defined(CONFIG_PPC32)
 786int machine_check_generic(struct pt_regs *regs)
 787{
 788	unsigned long reason = regs->msr;
 789
 790	printk("Machine check in kernel mode.\n");
 791	printk("Caused by (from SRR1=%lx): ", reason);
 792	switch (reason & 0x601F0000) {
 793	case 0x80000:
 794		pr_cont("Machine check signal\n");
 795		break;
 796	case 0:		/* for 601 */
 797	case 0x40000:
 798	case 0x140000:	/* 7450 MSS error and TEA */
 799		pr_cont("Transfer error ack signal\n");
 800		break;
 801	case 0x20000:
 802		pr_cont("Data parity error signal\n");
 803		break;
 804	case 0x10000:
 805		pr_cont("Address parity error signal\n");
 806		break;
 807	case 0x20000000:
 808		pr_cont("L1 Data Cache error\n");
 809		break;
 810	case 0x40000000:
 811		pr_cont("L1 Instruction Cache error\n");
 812		break;
 813	case 0x00100000:
 814		pr_cont("L2 data cache parity error\n");
 815		break;
 816	default:
 817		pr_cont("Unknown values in msr\n");
 818	}
 819	return 0;
 820}
 821#endif /* everything else */
 822
 823void machine_check_exception(struct pt_regs *regs)
 824{
 825	int recover = 0;
 826	bool nested = in_nmi();
 827	if (!nested)
 828		nmi_enter();
 829
 830	__this_cpu_inc(irq_stat.mce_exceptions);
 831
 832	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 833
 834	/* See if any machine dependent calls. In theory, we would want
 835	 * to call the CPU first, and call the ppc_md. one if the CPU
 836	 * one returns a positive number. However there is existing code
 837	 * that assumes the board gets a first chance, so let's keep it
 838	 * that way for now and fix things later. --BenH.
 839	 */
 840	if (ppc_md.machine_check_exception)
 841		recover = ppc_md.machine_check_exception(regs);
 842	else if (cur_cpu_spec->machine_check)
 843		recover = cur_cpu_spec->machine_check(regs);
 844
 845	if (recover > 0)
 846		goto bail;
 847
 848	if (debugger_fault_handler(regs))
 849		goto bail;
 850
 851	if (check_io_access(regs))
 852		goto bail;
 853
 854	if (!nested)
 855		nmi_exit();
 856
 857	die("Machine check", regs, SIGBUS);
 858
 859	/* Must die if the interrupt is not recoverable */
 860	if (!(regs->msr & MSR_RI))
 861		nmi_panic(regs, "Unrecoverable Machine check");
 862
 863	return;
 864
 865bail:
 866	if (!nested)
 867		nmi_exit();
 868}
 869
 870void SMIException(struct pt_regs *regs)
 871{
 872	die("System Management Interrupt", regs, SIGABRT);
 873}
 874
 875#ifdef CONFIG_VSX
 876static void p9_hmi_special_emu(struct pt_regs *regs)
 877{
 878	unsigned int ra, rb, t, i, sel, instr, rc;
 879	const void __user *addr;
 880	u8 vbuf[16], *vdst;
 881	unsigned long ea, msr, msr_mask;
 882	bool swap;
 883
 884	if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip))
 885		return;
 886
 887	/*
 888	 * lxvb16x	opcode: 0x7c0006d8
 889	 * lxvd2x	opcode: 0x7c000698
 890	 * lxvh8x	opcode: 0x7c000658
 891	 * lxvw4x	opcode: 0x7c000618
 892	 */
 893	if ((instr & 0xfc00073e) != 0x7c000618) {
 894		pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
 895			 " instr=%08x\n",
 896			 smp_processor_id(), current->comm, current->pid,
 897			 regs->nip, instr);
 898		return;
 899	}
 900
 901	/* Grab vector registers into the task struct */
 902	msr = regs->msr; /* Grab msr before we flush the bits */
 903	flush_vsx_to_thread(current);
 904	enable_kernel_altivec();
 905
 906	/*
 907	 * Is userspace running with a different endian (this is rare but
 908	 * not impossible)
 909	 */
 910	swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
 911
 912	/* Decode the instruction */
 913	ra = (instr >> 16) & 0x1f;
 914	rb = (instr >> 11) & 0x1f;
 915	t = (instr >> 21) & 0x1f;
 916	if (instr & 1)
 917		vdst = (u8 *)&current->thread.vr_state.vr[t];
 918	else
 919		vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
 920
 921	/* Grab the vector address */
 922	ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
 923	if (is_32bit_task())
 924		ea &= 0xfffffffful;
 925	addr = (__force const void __user *)ea;
 926
 927	/* Check it */
 928	if (!access_ok(addr, 16)) {
 929		pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
 930			 " instr=%08x addr=%016lx\n",
 931			 smp_processor_id(), current->comm, current->pid,
 932			 regs->nip, instr, (unsigned long)addr);
 933		return;
 934	}
 935
 936	/* Read the vector */
 937	rc = 0;
 938	if ((unsigned long)addr & 0xfUL)
 939		/* unaligned case */
 940		rc = __copy_from_user_inatomic(vbuf, addr, 16);
 941	else
 942		__get_user_atomic_128_aligned(vbuf, addr, rc);
 943	if (rc) {
 944		pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
 945			 " instr=%08x addr=%016lx\n",
 946			 smp_processor_id(), current->comm, current->pid,
 947			 regs->nip, instr, (unsigned long)addr);
 948		return;
 949	}
 950
 951	pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
 952		 " instr=%08x addr=%016lx\n",
 953		 smp_processor_id(), current->comm, current->pid, regs->nip,
 954		 instr, (unsigned long) addr);
 955
 956	/* Grab instruction "selector" */
 957	sel = (instr >> 6) & 3;
 958
 959	/*
 960	 * Check to make sure the facility is actually enabled. This
 961	 * could happen if we get a false positive hit.
 962	 *
 963	 * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
 964	 * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
 965	 */
 966	msr_mask = MSR_VSX;
 967	if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
 968		msr_mask = MSR_VEC;
 969	if (!(msr & msr_mask)) {
 970		pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
 971			 " instr=%08x msr:%016lx\n",
 972			 smp_processor_id(), current->comm, current->pid,
 973			 regs->nip, instr, msr);
 974		return;
 975	}
 976
 977	/* Do logging here before we modify sel based on endian */
 978	switch (sel) {
 979	case 0:	/* lxvw4x */
 980		PPC_WARN_EMULATED(lxvw4x, regs);
 981		break;
 982	case 1: /* lxvh8x */
 983		PPC_WARN_EMULATED(lxvh8x, regs);
 984		break;
 985	case 2: /* lxvd2x */
 986		PPC_WARN_EMULATED(lxvd2x, regs);
 987		break;
 988	case 3: /* lxvb16x */
 989		PPC_WARN_EMULATED(lxvb16x, regs);
 990		break;
 991	}
 992
 993#ifdef __LITTLE_ENDIAN__
 994	/*
 995	 * An LE kernel stores the vector in the task struct as an LE
 996	 * byte array (effectively swapping both the components and
 997	 * the content of the components). Those instructions expect
 998	 * the components to remain in ascending address order, so we
 999	 * swap them back.
1000	 *
1001	 * If we are running a BE user space, the expectation is that
1002	 * of a simple memcpy, so forcing the emulation to look like
1003	 * a lxvb16x should do the trick.
1004	 */
1005	if (swap)
1006		sel = 3;
1007
1008	switch (sel) {
1009	case 0:	/* lxvw4x */
1010		for (i = 0; i < 4; i++)
1011			((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
1012		break;
1013	case 1: /* lxvh8x */
1014		for (i = 0; i < 8; i++)
1015			((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
1016		break;
1017	case 2: /* lxvd2x */
1018		for (i = 0; i < 2; i++)
1019			((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
1020		break;
1021	case 3: /* lxvb16x */
1022		for (i = 0; i < 16; i++)
1023			vdst[i] = vbuf[15-i];
1024		break;
1025	}
1026#else /* __LITTLE_ENDIAN__ */
1027	/* On a big endian kernel, a BE userspace only needs a memcpy */
1028	if (!swap)
1029		sel = 3;
1030
1031	/* Otherwise, we need to swap the content of the components */
1032	switch (sel) {
1033	case 0:	/* lxvw4x */
1034		for (i = 0; i < 4; i++)
1035			((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
1036		break;
1037	case 1: /* lxvh8x */
1038		for (i = 0; i < 8; i++)
1039			((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
1040		break;
1041	case 2: /* lxvd2x */
1042		for (i = 0; i < 2; i++)
1043			((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
1044		break;
1045	case 3: /* lxvb16x */
1046		memcpy(vdst, vbuf, 16);
1047		break;
1048	}
1049#endif /* !__LITTLE_ENDIAN__ */
1050
1051	/* Go to next instruction */
1052	regs->nip += 4;
1053}
1054#endif /* CONFIG_VSX */
1055
1056void handle_hmi_exception(struct pt_regs *regs)
1057{
1058	struct pt_regs *old_regs;
1059
1060	old_regs = set_irq_regs(regs);
1061	irq_enter();
1062
1063#ifdef CONFIG_VSX
1064	/* Real mode flagged P9 special emu is needed */
1065	if (local_paca->hmi_p9_special_emu) {
1066		local_paca->hmi_p9_special_emu = 0;
1067
1068		/*
1069		 * We don't want to take page faults while doing the
1070		 * emulation, we just replay the instruction if necessary.
1071		 */
1072		pagefault_disable();
1073		p9_hmi_special_emu(regs);
1074		pagefault_enable();
1075	}
1076#endif /* CONFIG_VSX */
1077
1078	if (ppc_md.handle_hmi_exception)
1079		ppc_md.handle_hmi_exception(regs);
1080
1081	irq_exit();
1082	set_irq_regs(old_regs);
1083}
1084
1085void unknown_exception(struct pt_regs *regs)
1086{
1087	enum ctx_state prev_state = exception_enter();
1088
1089	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
1090	       regs->nip, regs->msr, regs->trap);
1091
1092	_exception(SIGTRAP, regs, TRAP_UNK, 0);
1093
1094	exception_exit(prev_state);
1095}
1096
1097void instruction_breakpoint_exception(struct pt_regs *regs)
1098{
1099	enum ctx_state prev_state = exception_enter();
1100
1101	if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
1102					5, SIGTRAP) == NOTIFY_STOP)
1103		goto bail;
1104	if (debugger_iabr_match(regs))
1105		goto bail;
1106	_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
1107
1108bail:
1109	exception_exit(prev_state);
1110}
1111
1112void RunModeException(struct pt_regs *regs)
1113{
1114	_exception(SIGTRAP, regs, TRAP_UNK, 0);
1115}
1116
1117void single_step_exception(struct pt_regs *regs)
1118{
1119	enum ctx_state prev_state = exception_enter();
1120
1121	clear_single_step(regs);
1122	clear_br_trace(regs);
1123
1124	if (kprobe_post_handler(regs))
1125		return;
1126
1127	if (notify_die(DIE_SSTEP, "single_step", regs, 5,
1128					5, SIGTRAP) == NOTIFY_STOP)
1129		goto bail;
1130	if (debugger_sstep(regs))
1131		goto bail;
1132
1133	_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
1134
1135bail:
1136	exception_exit(prev_state);
1137}
1138NOKPROBE_SYMBOL(single_step_exception);
1139
1140/*
1141 * After we have successfully emulated an instruction, we have to
1142 * check if the instruction was being single-stepped, and if so,
1143 * pretend we got a single-step exception.  This was pointed out
1144 * by Kumar Gala.  -- paulus
1145 */
1146static void emulate_single_step(struct pt_regs *regs)
1147{
1148	if (single_stepping(regs))
1149		single_step_exception(regs);
1150}
1151
1152static inline int __parse_fpscr(unsigned long fpscr)
1153{
1154	int ret = FPE_FLTUNK;
1155
1156	/* Invalid operation */
1157	if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
1158		ret = FPE_FLTINV;
1159
1160	/* Overflow */
1161	else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX))
1162		ret = FPE_FLTOVF;
1163
1164	/* Underflow */
1165	else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX))
1166		ret = FPE_FLTUND;
1167
1168	/* Divide by zero */
1169	else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX))
1170		ret = FPE_FLTDIV;
1171
1172	/* Inexact result */
1173	else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX))
1174		ret = FPE_FLTRES;
1175
1176	return ret;
1177}
1178
1179static void parse_fpe(struct pt_regs *regs)
1180{
1181	int code = 0;
1182
1183	flush_fp_to_thread(current);
1184
1185	code = __parse_fpscr(current->thread.fp_state.fpscr);
1186
1187	_exception(SIGFPE, regs, code, regs->nip);
1188}
1189
1190/*
1191 * Illegal instruction emulation support.  Originally written to
1192 * provide the PVR to user applications using the mfspr rd, PVR.
1193 * Return non-zero if we can't emulate, or -EFAULT if the associated
1194 * memory access caused an access fault.  Return zero on success.
1195 *
1196 * There are a couple of ways to do this, either "decode" the instruction
1197 * or directly match lots of bits.  In this case, matching lots of
1198 * bits is faster and easier.
1199 *
1200 */
1201static int emulate_string_inst(struct pt_regs *regs, u32 instword)
1202{
1203	u8 rT = (instword >> 21) & 0x1f;
1204	u8 rA = (instword >> 16) & 0x1f;
1205	u8 NB_RB = (instword >> 11) & 0x1f;
1206	u32 num_bytes;
1207	unsigned long EA;
1208	int pos = 0;
1209
1210	/* Early out if we are an invalid form of lswx */
1211	if ((instword & PPC_INST_STRING_MASK) == PPC_INST_LSWX)
1212		if ((rT == rA) || (rT == NB_RB))
1213			return -EINVAL;
1214
1215	EA = (rA == 0) ? 0 : regs->gpr[rA];
1216
1217	switch (instword & PPC_INST_STRING_MASK) {
1218		case PPC_INST_LSWX:
1219		case PPC_INST_STSWX:
1220			EA += NB_RB;
1221			num_bytes = regs->xer & 0x7f;
1222			break;
1223		case PPC_INST_LSWI:
1224		case PPC_INST_STSWI:
1225			num_bytes = (NB_RB == 0) ? 32 : NB_RB;
1226			break;
1227		default:
1228			return -EINVAL;
1229	}
1230
1231	while (num_bytes != 0)
1232	{
1233		u8 val;
1234		u32 shift = 8 * (3 - (pos & 0x3));
1235
1236		/* if process is 32-bit, clear upper 32 bits of EA */
1237		if ((regs->msr & MSR_64BIT) == 0)
1238			EA &= 0xFFFFFFFF;
1239
1240		switch ((instword & PPC_INST_STRING_MASK)) {
1241			case PPC_INST_LSWX:
1242			case PPC_INST_LSWI:
1243				if (get_user(val, (u8 __user *)EA))
1244					return -EFAULT;
1245				/* first time updating this reg,
1246				 * zero it out */
1247				if (pos == 0)
1248					regs->gpr[rT] = 0;
1249				regs->gpr[rT] |= val << shift;
1250				break;
1251			case PPC_INST_STSWI:
1252			case PPC_INST_STSWX:
1253				val = regs->gpr[rT] >> shift;
1254				if (put_user(val, (u8 __user *)EA))
1255					return -EFAULT;
1256				break;
1257		}
1258		/* move EA to next address */
1259		EA += 1;
1260		num_bytes--;
1261
1262		/* manage our position within the register */
1263		if (++pos == 4) {
1264			pos = 0;
1265			if (++rT == 32)
1266				rT = 0;
1267		}
1268	}
1269
1270	return 0;
1271}
1272
1273static int emulate_popcntb_inst(struct pt_regs *regs, u32 instword)
1274{
1275	u32 ra,rs;
1276	unsigned long tmp;
1277
1278	ra = (instword >> 16) & 0x1f;
1279	rs = (instword >> 21) & 0x1f;
1280
1281	tmp = regs->gpr[rs];
1282	tmp = tmp - ((tmp >> 1) & 0x5555555555555555ULL);
1283	tmp = (tmp & 0x3333333333333333ULL) + ((tmp >> 2) & 0x3333333333333333ULL);
1284	tmp = (tmp + (tmp >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
1285	regs->gpr[ra] = tmp;
1286
1287	return 0;
1288}
1289
1290static int emulate_isel(struct pt_regs *regs, u32 instword)
1291{
1292	u8 rT = (instword >> 21) & 0x1f;
1293	u8 rA = (instword >> 16) & 0x1f;
1294	u8 rB = (instword >> 11) & 0x1f;
1295	u8 BC = (instword >> 6) & 0x1f;
1296	u8 bit;
1297	unsigned long tmp;
1298
1299	tmp = (rA == 0) ? 0 : regs->gpr[rA];
1300	bit = (regs->ccr >> (31 - BC)) & 0x1;
1301
1302	regs->gpr[rT] = bit ? tmp : regs->gpr[rB];
1303
1304	return 0;
1305}
1306
1307#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1308static inline bool tm_abort_check(struct pt_regs *regs, int cause)
1309{
1310        /* If we're emulating a load/store in an active transaction, we cannot
1311         * emulate it as the kernel operates in transaction suspended context.
1312         * We need to abort the transaction.  This creates a persistent TM
1313         * abort so tell the user what caused it with a new code.
1314	 */
1315	if (MSR_TM_TRANSACTIONAL(regs->msr)) {
1316		tm_enable();
1317		tm_abort(cause);
1318		return true;
1319	}
1320	return false;
1321}
1322#else
1323static inline bool tm_abort_check(struct pt_regs *regs, int reason)
1324{
1325	return false;
1326}
1327#endif
1328
1329static int emulate_instruction(struct pt_regs *regs)
1330{
1331	u32 instword;
1332	u32 rd;
1333
1334	if (!user_mode(regs))
1335		return -EINVAL;
1336	CHECK_FULL_REGS(regs);
1337
1338	if (get_user(instword, (u32 __user *)(regs->nip)))
1339		return -EFAULT;
1340
1341	/* Emulate the mfspr rD, PVR. */
1342	if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
1343		PPC_WARN_EMULATED(mfpvr, regs);
1344		rd = (instword >> 21) & 0x1f;
1345		regs->gpr[rd] = mfspr(SPRN_PVR);
1346		return 0;
1347	}
1348
1349	/* Emulating the dcba insn is just a no-op.  */
1350	if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
1351		PPC_WARN_EMULATED(dcba, regs);
1352		return 0;
1353	}
1354
1355	/* Emulate the mcrxr insn.  */
1356	if ((instword & PPC_INST_MCRXR_MASK) == PPC_INST_MCRXR) {
1357		int shift = (instword >> 21) & 0x1c;
1358		unsigned long msk = 0xf0000000UL >> shift;
1359
1360		PPC_WARN_EMULATED(mcrxr, regs);
1361		regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
1362		regs->xer &= ~0xf0000000UL;
1363		return 0;
1364	}
1365
1366	/* Emulate load/store string insn. */
1367	if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
1368		if (tm_abort_check(regs,
1369				   TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
1370			return -EINVAL;
1371		PPC_WARN_EMULATED(string, regs);
1372		return emulate_string_inst(regs, instword);
1373	}
1374
1375	/* Emulate the popcntb (Population Count Bytes) instruction. */
1376	if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
1377		PPC_WARN_EMULATED(popcntb, regs);
1378		return emulate_popcntb_inst(regs, instword);
1379	}
1380
1381	/* Emulate isel (Integer Select) instruction */
1382	if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
1383		PPC_WARN_EMULATED(isel, regs);
1384		return emulate_isel(regs, instword);
1385	}
1386
1387	/* Emulate sync instruction variants */
1388	if ((instword & PPC_INST_SYNC_MASK) == PPC_INST_SYNC) {
1389		PPC_WARN_EMULATED(sync, regs);
1390		asm volatile("sync");
1391		return 0;
1392	}
1393
1394#ifdef CONFIG_PPC64
1395	/* Emulate the mfspr rD, DSCR. */
1396	if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) ==
1397		PPC_INST_MFSPR_DSCR_USER) ||
1398	     ((instword & PPC_INST_MFSPR_DSCR_MASK) ==
1399		PPC_INST_MFSPR_DSCR)) &&
1400			cpu_has_feature(CPU_FTR_DSCR)) {
1401		PPC_WARN_EMULATED(mfdscr, regs);
1402		rd = (instword >> 21) & 0x1f;
1403		regs->gpr[rd] = mfspr(SPRN_DSCR);
1404		return 0;
1405	}
1406	/* Emulate the mtspr DSCR, rD. */
1407	if ((((instword & PPC_INST_MTSPR_DSCR_USER_MASK) ==
1408		PPC_INST_MTSPR_DSCR_USER) ||
1409	     ((instword & PPC_INST_MTSPR_DSCR_MASK) ==
1410		PPC_INST_MTSPR_DSCR)) &&
1411			cpu_has_feature(CPU_FTR_DSCR)) {
1412		PPC_WARN_EMULATED(mtdscr, regs);
1413		rd = (instword >> 21) & 0x1f;
1414		current->thread.dscr = regs->gpr[rd];
1415		current->thread.dscr_inherit = 1;
1416		mtspr(SPRN_DSCR, current->thread.dscr);
1417		return 0;
1418	}
1419#endif
1420
1421	return -EINVAL;
1422}
1423
1424int is_valid_bugaddr(unsigned long addr)
1425{
1426	return is_kernel_addr(addr);
1427}
1428
1429#ifdef CONFIG_MATH_EMULATION
1430static int emulate_math(struct pt_regs *regs)
1431{
1432	int ret;
1433	extern int do_mathemu(struct pt_regs *regs);
1434
1435	ret = do_mathemu(regs);
1436	if (ret >= 0)
1437		PPC_WARN_EMULATED(math, regs);
1438
1439	switch (ret) {
1440	case 0:
1441		emulate_single_step(regs);
1442		return 0;
1443	case 1: {
1444			int code = 0;
1445			code = __parse_fpscr(current->thread.fp_state.fpscr);
1446			_exception(SIGFPE, regs, code, regs->nip);
1447			return 0;
1448		}
1449	case -EFAULT:
1450		_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1451		return 0;
1452	}
1453
1454	return -1;
1455}
1456#else
1457static inline int emulate_math(struct pt_regs *regs) { return -1; }
1458#endif
1459
1460void program_check_exception(struct pt_regs *regs)
1461{
1462	enum ctx_state prev_state = exception_enter();
1463	unsigned int reason = get_reason(regs);
1464
1465	/* We can now get here via a FP Unavailable exception if the core
1466	 * has no FPU, in that case the reason flags will be 0 */
1467
1468	if (reason & REASON_FP) {
1469		/* IEEE FP exception */
1470		parse_fpe(regs);
1471		goto bail;
1472	}
1473	if (reason & REASON_TRAP) {
1474		unsigned long bugaddr;
1475		/* Debugger is first in line to stop recursive faults in
1476		 * rcu_lock, notify_die, or atomic_notifier_call_chain */
1477		if (debugger_bpt(regs))
1478			goto bail;
1479
1480		if (kprobe_handler(regs))
1481			goto bail;
1482
1483		/* trap exception */
1484		if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
1485				== NOTIFY_STOP)
1486			goto bail;
1487
1488		bugaddr = regs->nip;
1489		/*
1490		 * Fixup bugaddr for BUG_ON() in real mode
1491		 */
1492		if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR))
1493			bugaddr += PAGE_OFFSET;
1494
1495		if (!(regs->msr & MSR_PR) &&  /* not user-mode */
1496		    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
1497			regs->nip += 4;
1498			goto bail;
1499		}
1500		_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
1501		goto bail;
1502	}
1503#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1504	if (reason & REASON_TM) {
1505		/* This is a TM "Bad Thing Exception" program check.
1506		 * This occurs when:
1507		 * -  An rfid/hrfid/mtmsrd attempts to cause an illegal
1508		 *    transition in TM states.
1509		 * -  A trechkpt is attempted when transactional.
1510		 * -  A treclaim is attempted when non transactional.
1511		 * -  A tend is illegally attempted.
1512		 * -  writing a TM SPR when transactional.
1513		 *
1514		 * If usermode caused this, it's done something illegal and
1515		 * gets a SIGILL slap on the wrist.  We call it an illegal
1516		 * operand to distinguish from the instruction just being bad
1517		 * (e.g. executing a 'tend' on a CPU without TM!); it's an
1518		 * illegal /placement/ of a valid instruction.
1519		 */
1520		if (user_mode(regs)) {
1521			_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
1522			goto bail;
1523		} else {
1524			printk(KERN_EMERG "Unexpected TM Bad Thing exception "
1525			       "at %lx (msr 0x%lx) tm_scratch=%llx\n",
1526			       regs->nip, regs->msr, get_paca()->tm_scratch);
1527			die("Unrecoverable exception", regs, SIGABRT);
1528		}
1529	}
1530#endif
1531
1532	/*
1533	 * If we took the program check in the kernel skip down to sending a
1534	 * SIGILL. The subsequent cases all relate to emulating instructions
1535	 * which we should only do for userspace. We also do not want to enable
1536	 * interrupts for kernel faults because that might lead to further
1537	 * faults, and loose the context of the original exception.
1538	 */
1539	if (!user_mode(regs))
1540		goto sigill;
1541
1542	/* We restore the interrupt state now */
1543	if (!arch_irq_disabled_regs(regs))
1544		local_irq_enable();
1545
1546	/* (reason & REASON_ILLEGAL) would be the obvious thing here,
1547	 * but there seems to be a hardware bug on the 405GP (RevD)
1548	 * that means ESR is sometimes set incorrectly - either to
1549	 * ESR_DST (!?) or 0.  In the process of chasing this with the
1550	 * hardware people - not sure if it can happen on any illegal
1551	 * instruction or only on FP instructions, whether there is a
1552	 * pattern to occurrences etc. -dgibson 31/Mar/2003
1553	 */
1554	if (!emulate_math(regs))
1555		goto bail;
1556
1557	/* Try to emulate it if we should. */
1558	if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
1559		switch (emulate_instruction(regs)) {
1560		case 0:
1561			regs->nip += 4;
1562			emulate_single_step(regs);
1563			goto bail;
1564		case -EFAULT:
1565			_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1566			goto bail;
1567		}
1568	}
1569
1570sigill:
1571	if (reason & REASON_PRIVILEGED)
1572		_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
1573	else
1574		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1575
1576bail:
1577	exception_exit(prev_state);
1578}
1579NOKPROBE_SYMBOL(program_check_exception);
1580
1581/*
1582 * This occurs when running in hypervisor mode on POWER6 or later
1583 * and an illegal instruction is encountered.
1584 */
1585void emulation_assist_interrupt(struct pt_regs *regs)
1586{
1587	regs->msr |= REASON_ILLEGAL;
1588	program_check_exception(regs);
1589}
1590NOKPROBE_SYMBOL(emulation_assist_interrupt);
1591
1592void alignment_exception(struct pt_regs *regs)
1593{
1594	enum ctx_state prev_state = exception_enter();
1595	int sig, code, fixed = 0;
1596
1597	/* We restore the interrupt state now */
1598	if (!arch_irq_disabled_regs(regs))
1599		local_irq_enable();
1600
1601	if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
1602		goto bail;
1603
1604	/* we don't implement logging of alignment exceptions */
1605	if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
1606		fixed = fix_alignment(regs);
1607
1608	if (fixed == 1) {
1609		regs->nip += 4;	/* skip over emulated instruction */
1610		emulate_single_step(regs);
1611		goto bail;
1612	}
1613
1614	/* Operand address was bad */
1615	if (fixed == -EFAULT) {
1616		sig = SIGSEGV;
1617		code = SEGV_ACCERR;
1618	} else {
1619		sig = SIGBUS;
1620		code = BUS_ADRALN;
1621	}
1622	if (user_mode(regs))
1623		_exception(sig, regs, code, regs->dar);
1624	else
1625		bad_page_fault(regs, regs->dar, sig);
1626
1627bail:
1628	exception_exit(prev_state);
1629}
1630
1631void StackOverflow(struct pt_regs *regs)
1632{
1633	pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n",
1634		current->comm, task_pid_nr(current), regs->gpr[1]);
1635	debugger(regs);
1636	show_regs(regs);
1637	panic("kernel stack overflow");
1638}
1639
1640void stack_overflow_exception(struct pt_regs *regs)
1641{
1642	enum ctx_state prev_state = exception_enter();
1643
1644	die("Kernel stack overflow", regs, SIGSEGV);
1645
1646	exception_exit(prev_state);
1647}
1648
1649void kernel_fp_unavailable_exception(struct pt_regs *regs)
1650{
1651	enum ctx_state prev_state = exception_enter();
1652
1653	printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
1654			  "%lx at %lx\n", regs->trap, regs->nip);
1655	die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
1656
1657	exception_exit(prev_state);
1658}
1659
1660void altivec_unavailable_exception(struct pt_regs *regs)
1661{
1662	enum ctx_state prev_state = exception_enter();
1663
1664	if (user_mode(regs)) {
1665		/* A user program has executed an altivec instruction,
1666		   but this kernel doesn't support altivec. */
1667		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1668		goto bail;
1669	}
1670
1671	printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
1672			"%lx at %lx\n", regs->trap, regs->nip);
1673	die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
1674
1675bail:
1676	exception_exit(prev_state);
1677}
1678
1679void vsx_unavailable_exception(struct pt_regs *regs)
1680{
1681	if (user_mode(regs)) {
1682		/* A user program has executed an vsx instruction,
1683		   but this kernel doesn't support vsx. */
1684		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1685		return;
1686	}
1687
1688	printk(KERN_EMERG "Unrecoverable VSX Unavailable Exception "
1689			"%lx at %lx\n", regs->trap, regs->nip);
1690	die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
1691}
1692
1693#ifdef CONFIG_PPC64
1694static void tm_unavailable(struct pt_regs *regs)
1695{
1696#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1697	if (user_mode(regs)) {
1698		current->thread.load_tm++;
1699		regs->msr |= MSR_TM;
1700		tm_enable();
1701		tm_restore_sprs(&current->thread);
1702		return;
1703	}
1704#endif
1705	pr_emerg("Unrecoverable TM Unavailable Exception "
1706			"%lx at %lx\n", regs->trap, regs->nip);
1707	die("Unrecoverable TM Unavailable Exception", regs, SIGABRT);
1708}
1709
1710void facility_unavailable_exception(struct pt_regs *regs)
1711{
1712	static char *facility_strings[] = {
1713		[FSCR_FP_LG] = "FPU",
1714		[FSCR_VECVSX_LG] = "VMX/VSX",
1715		[FSCR_DSCR_LG] = "DSCR",
1716		[FSCR_PM_LG] = "PMU SPRs",
1717		[FSCR_BHRB_LG] = "BHRB",
1718		[FSCR_TM_LG] = "TM",
1719		[FSCR_EBB_LG] = "EBB",
1720		[FSCR_TAR_LG] = "TAR",
1721		[FSCR_MSGP_LG] = "MSGP",
1722		[FSCR_SCV_LG] = "SCV",
1723	};
1724	char *facility = "unknown";
1725	u64 value;
1726	u32 instword, rd;
1727	u8 status;
1728	bool hv;
1729
1730	hv = (TRAP(regs) == 0xf80);
1731	if (hv)
1732		value = mfspr(SPRN_HFSCR);
1733	else
1734		value = mfspr(SPRN_FSCR);
1735
1736	status = value >> 56;
1737	if ((hv || status >= 2) &&
1738	    (status < ARRAY_SIZE(facility_strings)) &&
1739	    facility_strings[status])
1740		facility = facility_strings[status];
1741
1742	/* We should not have taken this interrupt in kernel */
1743	if (!user_mode(regs)) {
1744		pr_emerg("Facility '%s' unavailable (%d) exception in kernel mode at %lx\n",
1745			 facility, status, regs->nip);
1746		die("Unexpected facility unavailable exception", regs, SIGABRT);
1747	}
1748
1749	/* We restore the interrupt state now */
1750	if (!arch_irq_disabled_regs(regs))
1751		local_irq_enable();
1752
1753	if (status == FSCR_DSCR_LG) {
1754		/*
1755		 * User is accessing the DSCR register using the problem
1756		 * state only SPR number (0x03) either through a mfspr or
1757		 * a mtspr instruction. If it is a write attempt through
1758		 * a mtspr, then we set the inherit bit. This also allows
1759		 * the user to write or read the register directly in the
1760		 * future by setting via the FSCR DSCR bit. But in case it
1761		 * is a read DSCR attempt through a mfspr instruction, we
1762		 * just emulate the instruction instead. This code path will
1763		 * always emulate all the mfspr instructions till the user
1764		 * has attempted at least one mtspr instruction. This way it
1765		 * preserves the same behaviour when the user is accessing
1766		 * the DSCR through privilege level only SPR number (0x11)
1767		 * which is emulated through illegal instruction exception.
1768		 * We always leave HFSCR DSCR set.
1769		 */
1770		if (get_user(instword, (u32 __user *)(regs->nip))) {
1771			pr_err("Failed to fetch the user instruction\n");
1772			return;
1773		}
1774
1775		/* Write into DSCR (mtspr 0x03, RS) */
1776		if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK)
1777				== PPC_INST_MTSPR_DSCR_USER) {
1778			rd = (instword >> 21) & 0x1f;
1779			current->thread.dscr = regs->gpr[rd];
1780			current->thread.dscr_inherit = 1;
1781			current->thread.fscr |= FSCR_DSCR;
1782			mtspr(SPRN_FSCR, current->thread.fscr);
1783		}
1784
1785		/* Read from DSCR (mfspr RT, 0x03) */
1786		if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK)
1787				== PPC_INST_MFSPR_DSCR_USER) {
1788			if (emulate_instruction(regs)) {
1789				pr_err("DSCR based mfspr emulation failed\n");
1790				return;
1791			}
1792			regs->nip += 4;
1793			emulate_single_step(regs);
1794		}
1795		return;
1796	}
1797
1798	if (status == FSCR_TM_LG) {
1799		/*
1800		 * If we're here then the hardware is TM aware because it
1801		 * generated an exception with FSRM_TM set.
1802		 *
1803		 * If cpu_has_feature(CPU_FTR_TM) is false, then either firmware
1804		 * told us not to do TM, or the kernel is not built with TM
1805		 * support.
1806		 *
1807		 * If both of those things are true, then userspace can spam the
1808		 * console by triggering the printk() below just by continually
1809		 * doing tbegin (or any TM instruction). So in that case just
1810		 * send the process a SIGILL immediately.
1811		 */
1812		if (!cpu_has_feature(CPU_FTR_TM))
1813			goto out;
1814
1815		tm_unavailable(regs);
1816		return;
1817	}
1818
1819	pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
1820		hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
1821
1822out:
1823	_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1824}
1825#endif
1826
1827#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1828
1829void fp_unavailable_tm(struct pt_regs *regs)
1830{
1831	/* Note:  This does not handle any kind of FP laziness. */
1832
1833	TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n",
1834		 regs->nip, regs->msr);
1835
1836        /* We can only have got here if the task started using FP after
1837         * beginning the transaction.  So, the transactional regs are just a
1838         * copy of the checkpointed ones.  But, we still need to recheckpoint
1839         * as we're enabling FP for the process; it will return, abort the
1840         * transaction, and probably retry but now with FP enabled.  So the
1841         * checkpointed FP registers need to be loaded.
1842	 */
1843	tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1844
1845	/*
1846	 * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
1847	 * then it was overwrite by the thr->fp_state by tm_reclaim_thread().
1848	 *
1849	 * At this point, ck{fp,vr}_state contains the exact values we want to
1850	 * recheckpoint.
1851	 */
1852
1853	/* Enable FP for the task: */
1854	current->thread.load_fp = 1;
1855
1856	/*
1857	 * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
1858	 */
1859	tm_recheckpoint(&current->thread);
1860}
1861
1862void altivec_unavailable_tm(struct pt_regs *regs)
1863{
1864	/* See the comments in fp_unavailable_tm().  This function operates
1865	 * the same way.
1866	 */
1867
1868	TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx,"
1869		 "MSR=%lx\n",
1870		 regs->nip, regs->msr);
1871	tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1872	current->thread.load_vec = 1;
1873	tm_recheckpoint(&current->thread);
1874	current->thread.used_vr = 1;
1875}
1876
1877void vsx_unavailable_tm(struct pt_regs *regs)
1878{
1879	/* See the comments in fp_unavailable_tm().  This works similarly,
1880	 * though we're loading both FP and VEC registers in here.
1881	 *
1882	 * If FP isn't in use, load FP regs.  If VEC isn't in use, load VEC
1883	 * regs.  Either way, set MSR_VSX.
1884	 */
1885
1886	TM_DEBUG("VSX Unavailable trap whilst transactional at 0x%lx,"
1887		 "MSR=%lx\n",
1888		 regs->nip, regs->msr);
1889
1890	current->thread.used_vsr = 1;
1891
1892	/* This reclaims FP and/or VR regs if they're already enabled */
1893	tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1894
1895	current->thread.load_vec = 1;
1896	current->thread.load_fp = 1;
1897
1898	tm_recheckpoint(&current->thread);
1899}
1900#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
1901
1902void performance_monitor_exception(st

Large files files are truncated, but you can click here to view the full file