PageRenderTime 86ms CodeModel.GetById 14ms app.highlight 61ms RepoModel.GetById 1ms app.codeStats 0ms

/arch/ia64/kernel/unaligned.c

https://bitbucket.org/evzijst/gittest
C | 1521 lines | 819 code | 184 blank | 518 comment | 127 complexity | 4ece0d10d271aaa2a3b860207c1697ae MD5 | raw file
   1/*
   2 * Architecture-specific unaligned trap handling.
   3 *
   4 * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
   5 *	Stephane Eranian <eranian@hpl.hp.com>
   6 *	David Mosberger-Tang <davidm@hpl.hp.com>
   7 *
   8 * 2002/12/09   Fix rotating register handling (off-by-1 error, missing fr-rotation).  Fix
   9 *		get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
  10 *		stacked register returns an undefined value; it does NOT trigger a
  11 *		"rsvd register fault").
  12 * 2001/10/11	Fix unaligned access to rotating registers in s/w pipelined loops.
  13 * 2001/08/13	Correct size of extended floats (float_fsz) from 16 to 10 bytes.
  14 * 2001/01/17	Add support emulation of unaligned kernel accesses.
  15 */
  16#include <linux/kernel.h>
  17#include <linux/sched.h>
  18#include <linux/smp_lock.h>
  19#include <linux/tty.h>
  20
  21#include <asm/intrinsics.h>
  22#include <asm/processor.h>
  23#include <asm/rse.h>
  24#include <asm/uaccess.h>
  25#include <asm/unaligned.h>
  26
  27extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
  28
  29#undef DEBUG_UNALIGNED_TRAP
  30
  31#ifdef DEBUG_UNALIGNED_TRAP
  32# define DPRINT(a...)	do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
  33# define DDUMP(str,vp,len)	dump(str, vp, len)
  34
  35static void
  36dump (const char *str, void *vp, size_t len)
  37{
  38	unsigned char *cp = vp;
  39	int i;
  40
  41	printk("%s", str);
  42	for (i = 0; i < len; ++i)
  43		printk (" %02x", *cp++);
  44	printk("\n");
  45}
  46#else
  47# define DPRINT(a...)
  48# define DDUMP(str,vp,len)
  49#endif
  50
  51#define IA64_FIRST_STACKED_GR	32
  52#define IA64_FIRST_ROTATING_FR	32
  53#define SIGN_EXT9		0xffffffffffffff00ul
  54
  55/*
  56 * For M-unit:
  57 *
  58 *  opcode |   m  |   x6    |
  59 * --------|------|---------|
  60 * [40-37] | [36] | [35:30] |
  61 * --------|------|---------|
  62 *     4   |   1  |    6    | = 11 bits
  63 * --------------------------
  64 * However bits [31:30] are not directly useful to distinguish between
  65 * load/store so we can use [35:32] instead, which gives the following
  66 * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
  67 * checking the m-bit until later in the load/store emulation.
  68 */
  69#define IA64_OPCODE_MASK	0x1ef
  70#define IA64_OPCODE_SHIFT	32
  71
  72/*
  73 * Table C-28 Integer Load/Store
  74 *
  75 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
  76 *
  77 * ld8.fill, st8.fill  MUST be aligned because the RNATs are based on
  78 * the address (bits [8:3]), so we must failed.
  79 */
  80#define LD_OP            0x080
  81#define LDS_OP           0x081
  82#define LDA_OP           0x082
  83#define LDSA_OP          0x083
  84#define LDBIAS_OP        0x084
  85#define LDACQ_OP         0x085
  86/* 0x086, 0x087 are not relevant */
  87#define LDCCLR_OP        0x088
  88#define LDCNC_OP         0x089
  89#define LDCCLRACQ_OP     0x08a
  90#define ST_OP            0x08c
  91#define STREL_OP         0x08d
  92/* 0x08e,0x8f are not relevant */
  93
  94/*
  95 * Table C-29 Integer Load +Reg
  96 *
  97 * we use the ld->m (bit [36:36]) field to determine whether or not we have
  98 * a load/store of this form.
  99 */
 100
 101/*
 102 * Table C-30 Integer Load/Store +Imm
 103 *
 104 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
 105 *
 106 * ld8.fill, st8.fill  must be aligned because the Nat register are based on
 107 * the address, so we must fail and the program must be fixed.
 108 */
 109#define LD_IMM_OP            0x0a0
 110#define LDS_IMM_OP           0x0a1
 111#define LDA_IMM_OP           0x0a2
 112#define LDSA_IMM_OP          0x0a3
 113#define LDBIAS_IMM_OP        0x0a4
 114#define LDACQ_IMM_OP         0x0a5
 115/* 0x0a6, 0xa7 are not relevant */
 116#define LDCCLR_IMM_OP        0x0a8
 117#define LDCNC_IMM_OP         0x0a9
 118#define LDCCLRACQ_IMM_OP     0x0aa
 119#define ST_IMM_OP            0x0ac
 120#define STREL_IMM_OP         0x0ad
 121/* 0x0ae,0xaf are not relevant */
 122
 123/*
 124 * Table C-32 Floating-point Load/Store
 125 */
 126#define LDF_OP           0x0c0
 127#define LDFS_OP          0x0c1
 128#define LDFA_OP          0x0c2
 129#define LDFSA_OP         0x0c3
 130/* 0x0c6 is irrelevant */
 131#define LDFCCLR_OP       0x0c8
 132#define LDFCNC_OP        0x0c9
 133/* 0x0cb is irrelevant  */
 134#define STF_OP           0x0cc
 135
 136/*
 137 * Table C-33 Floating-point Load +Reg
 138 *
 139 * we use the ld->m (bit [36:36]) field to determine whether or not we have
 140 * a load/store of this form.
 141 */
 142
 143/*
 144 * Table C-34 Floating-point Load/Store +Imm
 145 */
 146#define LDF_IMM_OP       0x0e0
 147#define LDFS_IMM_OP      0x0e1
 148#define LDFA_IMM_OP      0x0e2
 149#define LDFSA_IMM_OP     0x0e3
 150/* 0x0e6 is irrelevant */
 151#define LDFCCLR_IMM_OP   0x0e8
 152#define LDFCNC_IMM_OP    0x0e9
 153#define STF_IMM_OP       0x0ec
 154
 155typedef struct {
 156	unsigned long	 qp:6;	/* [0:5]   */
 157	unsigned long    r1:7;	/* [6:12]  */
 158	unsigned long   imm:7;	/* [13:19] */
 159	unsigned long    r3:7;	/* [20:26] */
 160	unsigned long     x:1;  /* [27:27] */
 161	unsigned long  hint:2;	/* [28:29] */
 162	unsigned long x6_sz:2;	/* [30:31] */
 163	unsigned long x6_op:4;	/* [32:35], x6 = x6_sz|x6_op */
 164	unsigned long     m:1;	/* [36:36] */
 165	unsigned long    op:4;	/* [37:40] */
 166	unsigned long   pad:23; /* [41:63] */
 167} load_store_t;
 168
 169
 170typedef enum {
 171	UPD_IMMEDIATE,	/* ldXZ r1=[r3],imm(9) */
 172	UPD_REG		/* ldXZ r1=[r3],r2     */
 173} update_t;
 174
 175/*
 176 * We use tables to keep track of the offsets of registers in the saved state.
 177 * This way we save having big switch/case statements.
 178 *
 179 * We use bit 0 to indicate switch_stack or pt_regs.
 180 * The offset is simply shifted by 1 bit.
 181 * A 2-byte value should be enough to hold any kind of offset
 182 *
 183 * In case the calling convention changes (and thus pt_regs/switch_stack)
 184 * simply use RSW instead of RPT or vice-versa.
 185 */
 186
 187#define RPO(x)	((size_t) &((struct pt_regs *)0)->x)
 188#define RSO(x)	((size_t) &((struct switch_stack *)0)->x)
 189
 190#define RPT(x)		(RPO(x) << 1)
 191#define RSW(x)		(1| RSO(x)<<1)
 192
 193#define GR_OFFS(x)	(gr_info[x]>>1)
 194#define GR_IN_SW(x)	(gr_info[x] & 0x1)
 195
 196#define FR_OFFS(x)	(fr_info[x]>>1)
 197#define FR_IN_SW(x)	(fr_info[x] & 0x1)
 198
 199static u16 gr_info[32]={
 200	0,			/* r0 is read-only : WE SHOULD NEVER GET THIS */
 201
 202	RPT(r1), RPT(r2), RPT(r3),
 203
 204	RSW(r4), RSW(r5), RSW(r6), RSW(r7),
 205
 206	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
 207	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
 208
 209	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
 210	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
 211	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
 212	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
 213};
 214
 215static u16 fr_info[32]={
 216	0,			/* constant : WE SHOULD NEVER GET THIS */
 217	0,			/* constant : WE SHOULD NEVER GET THIS */
 218
 219	RSW(f2), RSW(f3), RSW(f4), RSW(f5),
 220
 221	RPT(f6), RPT(f7), RPT(f8), RPT(f9),
 222	RPT(f10), RPT(f11),
 223
 224	RSW(f12), RSW(f13), RSW(f14),
 225	RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
 226	RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
 227	RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
 228	RSW(f30), RSW(f31)
 229};
 230
 231/* Invalidate ALAT entry for integer register REGNO.  */
 232static void
 233invala_gr (int regno)
 234{
 235#	define F(reg)	case reg: ia64_invala_gr(reg); break
 236
 237	switch (regno) {
 238		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
 239		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
 240		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
 241		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
 242		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
 243		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
 244		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
 245		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
 246		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
 247		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
 248		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
 249		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
 250		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
 251		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
 252		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
 253		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
 254	}
 255#	undef F
 256}
 257
 258/* Invalidate ALAT entry for floating-point register REGNO.  */
 259static void
 260invala_fr (int regno)
 261{
 262#	define F(reg)	case reg: ia64_invala_fr(reg); break
 263
 264	switch (regno) {
 265		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
 266		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
 267		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
 268		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
 269		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
 270		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
 271		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
 272		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
 273		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
 274		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
 275		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
 276		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
 277		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
 278		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
 279		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
 280		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
 281	}
 282#	undef F
 283}
 284
 285static inline unsigned long
 286rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
 287{
 288	reg += rrb;
 289	if (reg >= sor)
 290		reg -= sor;
 291	return reg;
 292}
 293
 294static void
 295set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
 296{
 297	struct switch_stack *sw = (struct switch_stack *) regs - 1;
 298	unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
 299	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 300	unsigned long rnats, nat_mask;
 301	unsigned long on_kbs;
 302	long sof = (regs->cr_ifs) & 0x7f;
 303	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
 304	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
 305	long ridx = r1 - 32;
 306
 307	if (ridx >= sof) {
 308		/* this should never happen, as the "rsvd register fault" has higher priority */
 309		DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
 310		return;
 311	}
 312
 313	if (ridx < sor)
 314		ridx = rotate_reg(sor, rrb_gr, ridx);
 315
 316	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
 317	       r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
 318
 319	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
 320	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
 321	if (addr >= kbs) {
 322		/* the register is on the kernel backing store: easy... */
 323		rnat_addr = ia64_rse_rnat_addr(addr);
 324		if ((unsigned long) rnat_addr >= sw->ar_bspstore)
 325			rnat_addr = &sw->ar_rnat;
 326		nat_mask = 1UL << ia64_rse_slot_num(addr);
 327
 328		*addr = val;
 329		if (nat)
 330			*rnat_addr |=  nat_mask;
 331		else
 332			*rnat_addr &= ~nat_mask;
 333		return;
 334	}
 335
 336	if (!user_stack(current, regs)) {
 337		DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
 338		return;
 339	}
 340
 341	bspstore = (unsigned long *)regs->ar_bspstore;
 342	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
 343	bsp     = ia64_rse_skip_regs(ubs_end, -sof);
 344	addr    = ia64_rse_skip_regs(bsp, ridx);
 345
 346	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
 347
 348	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
 349
 350	rnat_addr = ia64_rse_rnat_addr(addr);
 351
 352	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
 353	DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
 354	       (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
 355
 356	nat_mask = 1UL << ia64_rse_slot_num(addr);
 357	if (nat)
 358		rnats |=  nat_mask;
 359	else
 360		rnats &= ~nat_mask;
 361	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
 362
 363	DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
 364}
 365
 366
 367static void
 368get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
 369{
 370	struct switch_stack *sw = (struct switch_stack *) regs - 1;
 371	unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
 372	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 373	unsigned long rnats, nat_mask;
 374	unsigned long on_kbs;
 375	long sof = (regs->cr_ifs) & 0x7f;
 376	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
 377	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
 378	long ridx = r1 - 32;
 379
 380	if (ridx >= sof) {
 381		/* read of out-of-frame register returns an undefined value; 0 in our case.  */
 382		DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
 383		goto fail;
 384	}
 385
 386	if (ridx < sor)
 387		ridx = rotate_reg(sor, rrb_gr, ridx);
 388
 389	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
 390	       r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
 391
 392	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
 393	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
 394	if (addr >= kbs) {
 395		/* the register is on the kernel backing store: easy... */
 396		*val = *addr;
 397		if (nat) {
 398			rnat_addr = ia64_rse_rnat_addr(addr);
 399			if ((unsigned long) rnat_addr >= sw->ar_bspstore)
 400				rnat_addr = &sw->ar_rnat;
 401			nat_mask = 1UL << ia64_rse_slot_num(addr);
 402			*nat = (*rnat_addr & nat_mask) != 0;
 403		}
 404		return;
 405	}
 406
 407	if (!user_stack(current, regs)) {
 408		DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
 409		goto fail;
 410	}
 411
 412	bspstore = (unsigned long *)regs->ar_bspstore;
 413	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
 414	bsp     = ia64_rse_skip_regs(ubs_end, -sof);
 415	addr    = ia64_rse_skip_regs(bsp, ridx);
 416
 417	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
 418
 419	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
 420
 421	if (nat) {
 422		rnat_addr = ia64_rse_rnat_addr(addr);
 423		nat_mask = 1UL << ia64_rse_slot_num(addr);
 424
 425		DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
 426
 427		ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
 428		*nat = (rnats & nat_mask) != 0;
 429	}
 430	return;
 431
 432  fail:
 433	*val = 0;
 434	if (nat)
 435		*nat = 0;
 436	return;
 437}
 438
 439
 440static void
 441setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
 442{
 443	struct switch_stack *sw = (struct switch_stack *) regs - 1;
 444	unsigned long addr;
 445	unsigned long bitmask;
 446	unsigned long *unat;
 447
 448	/*
 449	 * First takes care of stacked registers
 450	 */
 451	if (regnum >= IA64_FIRST_STACKED_GR) {
 452		set_rse_reg(regs, regnum, val, nat);
 453		return;
 454	}
 455
 456	/*
 457	 * Using r0 as a target raises a General Exception fault which has higher priority
 458	 * than the Unaligned Reference fault.
 459	 */
 460
 461	/*
 462	 * Now look at registers in [0-31] range and init correct UNAT
 463	 */
 464	if (GR_IN_SW(regnum)) {
 465		addr = (unsigned long)sw;
 466		unat = &sw->ar_unat;
 467	} else {
 468		addr = (unsigned long)regs;
 469		unat = &sw->caller_unat;
 470	}
 471	DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
 472	       addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
 473	/*
 474	 * add offset from base of struct
 475	 * and do it !
 476	 */
 477	addr += GR_OFFS(regnum);
 478
 479	*(unsigned long *)addr = val;
 480
 481	/*
 482	 * We need to clear the corresponding UNAT bit to fully emulate the load
 483	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
 484	 */
 485	bitmask   = 1UL << (addr >> 3 & 0x3f);
 486	DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
 487	if (nat) {
 488		*unat |= bitmask;
 489	} else {
 490		*unat &= ~bitmask;
 491	}
 492	DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
 493}
 494
 495/*
 496 * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
 497 * range from 32-127, result is in the range from 0-95.
 498 */
 499static inline unsigned long
 500fph_index (struct pt_regs *regs, long regnum)
 501{
 502	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
 503	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
 504}
 505
 506static void
 507setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
 508{
 509	struct switch_stack *sw = (struct switch_stack *)regs - 1;
 510	unsigned long addr;
 511
 512	/*
 513	 * From EAS-2.5: FPDisableFault has higher priority than Unaligned
 514	 * Fault. Thus, when we get here, we know the partition is enabled.
 515	 * To update f32-f127, there are three choices:
 516	 *
 517	 *	(1) save f32-f127 to thread.fph and update the values there
 518	 *	(2) use a gigantic switch statement to directly access the registers
 519	 *	(3) generate code on the fly to update the desired register
 520	 *
 521	 * For now, we are using approach (1).
 522	 */
 523	if (regnum >= IA64_FIRST_ROTATING_FR) {
 524		ia64_sync_fph(current);
 525		current->thread.fph[fph_index(regs, regnum)] = *fpval;
 526	} else {
 527		/*
 528		 * pt_regs or switch_stack ?
 529		 */
 530		if (FR_IN_SW(regnum)) {
 531			addr = (unsigned long)sw;
 532		} else {
 533			addr = (unsigned long)regs;
 534		}
 535
 536		DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
 537
 538		addr += FR_OFFS(regnum);
 539		*(struct ia64_fpreg *)addr = *fpval;
 540
 541		/*
 542		 * mark the low partition as being used now
 543		 *
 544		 * It is highly unlikely that this bit is not already set, but
 545		 * let's do it for safety.
 546		 */
 547		regs->cr_ipsr |= IA64_PSR_MFL;
 548	}
 549}
 550
 551/*
 552 * Those 2 inline functions generate the spilled versions of the constant floating point
 553 * registers which can be used with stfX
 554 */
 555static inline void
 556float_spill_f0 (struct ia64_fpreg *final)
 557{
 558	ia64_stf_spill(final, 0);
 559}
 560
 561static inline void
 562float_spill_f1 (struct ia64_fpreg *final)
 563{
 564	ia64_stf_spill(final, 1);
 565}
 566
 567static void
 568getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
 569{
 570	struct switch_stack *sw = (struct switch_stack *) regs - 1;
 571	unsigned long addr;
 572
 573	/*
 574	 * From EAS-2.5: FPDisableFault has higher priority than
 575	 * Unaligned Fault. Thus, when we get here, we know the partition is
 576	 * enabled.
 577	 *
 578	 * When regnum > 31, the register is still live and we need to force a save
 579	 * to current->thread.fph to get access to it.  See discussion in setfpreg()
 580	 * for reasons and other ways of doing this.
 581	 */
 582	if (regnum >= IA64_FIRST_ROTATING_FR) {
 583		ia64_flush_fph(current);
 584		*fpval = current->thread.fph[fph_index(regs, regnum)];
 585	} else {
 586		/*
 587		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
 588		 * not saved, we must generate their spilled form on the fly
 589		 */
 590		switch(regnum) {
 591		case 0:
 592			float_spill_f0(fpval);
 593			break;
 594		case 1:
 595			float_spill_f1(fpval);
 596			break;
 597		default:
 598			/*
 599			 * pt_regs or switch_stack ?
 600			 */
 601			addr =  FR_IN_SW(regnum) ? (unsigned long)sw
 602						 : (unsigned long)regs;
 603
 604			DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
 605			       FR_IN_SW(regnum), addr, FR_OFFS(regnum));
 606
 607			addr  += FR_OFFS(regnum);
 608			*fpval = *(struct ia64_fpreg *)addr;
 609		}
 610	}
 611}
 612
 613
 614static void
 615getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
 616{
 617	struct switch_stack *sw = (struct switch_stack *) regs - 1;
 618	unsigned long addr, *unat;
 619
 620	if (regnum >= IA64_FIRST_STACKED_GR) {
 621		get_rse_reg(regs, regnum, val, nat);
 622		return;
 623	}
 624
 625	/*
 626	 * take care of r0 (read-only always evaluate to 0)
 627	 */
 628	if (regnum == 0) {
 629		*val = 0;
 630		if (nat)
 631			*nat = 0;
 632		return;
 633	}
 634
 635	/*
 636	 * Now look at registers in [0-31] range and init correct UNAT
 637	 */
 638	if (GR_IN_SW(regnum)) {
 639		addr = (unsigned long)sw;
 640		unat = &sw->ar_unat;
 641	} else {
 642		addr = (unsigned long)regs;
 643		unat = &sw->caller_unat;
 644	}
 645
 646	DPRINT("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum));
 647
 648	addr += GR_OFFS(regnum);
 649
 650	*val  = *(unsigned long *)addr;
 651
 652	/*
 653	 * do it only when requested
 654	 */
 655	if (nat)
 656		*nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
 657}
 658
 659static void
 660emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
 661{
 662	/*
 663	 * IMPORTANT:
 664	 * Given the way we handle unaligned speculative loads, we should
 665	 * not get to this point in the code but we keep this sanity check,
 666	 * just in case.
 667	 */
 668	if (ld.x6_op == 1 || ld.x6_op == 3) {
 669		printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
 670		die_if_kernel("unaligned reference on speculative load with register update\n",
 671			      regs, 30);
 672	}
 673
 674
 675	/*
 676	 * at this point, we know that the base register to update is valid i.e.,
 677	 * it's not r0
 678	 */
 679	if (type == UPD_IMMEDIATE) {
 680		unsigned long imm;
 681
 682		/*
 683		 * Load +Imm: ldXZ r1=[r3],imm(9)
 684		 *
 685		 *
 686		 * form imm9: [13:19] contain the first 7 bits
 687		 */
 688		imm = ld.x << 7 | ld.imm;
 689
 690		/*
 691		 * sign extend (1+8bits) if m set
 692		 */
 693		if (ld.m) imm |= SIGN_EXT9;
 694
 695		/*
 696		 * ifa == r3 and we know that the NaT bit on r3 was clear so
 697		 * we can directly use ifa.
 698		 */
 699		ifa += imm;
 700
 701		setreg(ld.r3, ifa, 0, regs);
 702
 703		DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
 704
 705	} else if (ld.m) {
 706		unsigned long r2;
 707		int nat_r2;
 708
 709		/*
 710		 * Load +Reg Opcode: ldXZ r1=[r3],r2
 711		 *
 712		 * Note: that we update r3 even in the case of ldfX.a
 713		 * (where the load does not happen)
 714		 *
 715		 * The way the load algorithm works, we know that r3 does not
 716		 * have its NaT bit set (would have gotten NaT consumption
 717		 * before getting the unaligned fault). So we can use ifa
 718		 * which equals r3 at this point.
 719		 *
 720		 * IMPORTANT:
 721		 * The above statement holds ONLY because we know that we
 722		 * never reach this code when trying to do a ldX.s.
 723		 * If we ever make it to here on an ldfX.s then
 724		 */
 725		getreg(ld.imm, &r2, &nat_r2, regs);
 726
 727		ifa += r2;
 728
 729		/*
 730		 * propagate Nat r2 -> r3
 731		 */
 732		setreg(ld.r3, ifa, nat_r2, regs);
 733
 734		DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
 735	}
 736}
 737
 738
 739static int
 740emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 741{
 742	unsigned int len = 1 << ld.x6_sz;
 743	unsigned long val = 0;
 744
 745	/*
 746	 * r0, as target, doesn't need to be checked because Illegal Instruction
 747	 * faults have higher priority than unaligned faults.
 748	 *
 749	 * r0 cannot be found as the base as it would never generate an
 750	 * unaligned reference.
 751	 */
 752
 753	/*
 754	 * ldX.a we will emulate load and also invalidate the ALAT entry.
 755	 * See comment below for explanation on how we handle ldX.a
 756	 */
 757
 758	if (len != 2 && len != 4 && len != 8) {
 759		DPRINT("unknown size: x6=%d\n", ld.x6_sz);
 760		return -1;
 761	}
 762	/* this assumes little-endian byte-order: */
 763	if (copy_from_user(&val, (void __user *) ifa, len))
 764		return -1;
 765	setreg(ld.r1, val, 0, regs);
 766
 767	/*
 768	 * check for updates on any kind of loads
 769	 */
 770	if (ld.op == 0x5 || ld.m)
 771		emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
 772
 773	/*
 774	 * handling of various loads (based on EAS2.4):
 775	 *
 776	 * ldX.acq (ordered load):
 777	 *	- acquire semantics would have been used, so force fence instead.
 778	 *
 779	 * ldX.c.clr (check load and clear):
 780	 *	- if we get to this handler, it's because the entry was not in the ALAT.
 781	 *	  Therefore the operation reverts to a normal load
 782	 *
 783	 * ldX.c.nc (check load no clear):
 784	 *	- same as previous one
 785	 *
 786	 * ldX.c.clr.acq (ordered check load and clear):
 787	 *	- same as above for c.clr part. The load needs to have acquire semantics. So
 788	 *	  we use the fence semantics which is stronger and thus ensures correctness.
 789	 *
 790	 * ldX.a (advanced load):
 791	 *	- suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
 792	 *	  address doesn't match requested size alignment. This means that we would
 793	 *	  possibly need more than one load to get the result.
 794	 *
 795	 *	  The load part can be handled just like a normal load, however the difficult
 796	 *	  part is to get the right thing into the ALAT. The critical piece of information
 797	 *	  in the base address of the load & size. To do that, a ld.a must be executed,
 798	 *	  clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
 799	 *	  if we use the same target register, we will be okay for the check.a instruction.
 800	 *	  If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry
 801	 *	  which would overlap within [r3,r3+X] (the size of the load was store in the
 802	 *	  ALAT). If such an entry is found the entry is invalidated. But this is not good
 803	 *	  enough, take the following example:
 804	 *		r3=3
 805	 *		ld4.a r1=[r3]
 806	 *
 807	 *	  Could be emulated by doing:
 808	 *		ld1.a r1=[r3],1
 809	 *		store to temporary;
 810	 *		ld1.a r1=[r3],1
 811	 *		store & shift to temporary;
 812	 *		ld1.a r1=[r3],1
 813	 *		store & shift to temporary;
 814	 *		ld1.a r1=[r3]
 815	 *		store & shift to temporary;
 816	 *		r1=temporary
 817	 *
 818	 *	  So in this case, you would get the right value is r1 but the wrong info in
 819	 *	  the ALAT.  Notice that you could do it in reverse to finish with address 3
 820	 *	  but you would still get the size wrong.  To get the size right, one needs to
 821	 *	  execute exactly the same kind of load. You could do it from a aligned
 822	 *	  temporary location, but you would get the address wrong.
 823	 *
 824	 *	  So no matter what, it is not possible to emulate an advanced load
 825	 *	  correctly. But is that really critical ?
 826	 *
 827	 *	  We will always convert ld.a into a normal load with ALAT invalidated.  This
 828	 *	  will enable compiler to do optimization where certain code path after ld.a
 829	 *	  is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
 830	 *
 831	 *	  If there is a store after the advanced load, one must either do a ld.c.* or
 832	 *	  chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
 833	 *	  entry found in ALAT), and that's perfectly ok because:
 834	 *
 835	 *		- ld.c.*, if the entry is not present a  normal load is executed
 836	 *		- chk.a.*, if the entry is not present, execution jumps to recovery code
 837	 *
 838	 *	  In either case, the load can be potentially retried in another form.
 839	 *
 840	 *	  ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
 841	 *	  up a stale entry later). The register base update MUST also be performed.
 842	 */
 843
 844	/*
 845	 * when the load has the .acq completer then
 846	 * use ordering fence.
 847	 */
 848	if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
 849		mb();
 850
 851	/*
 852	 * invalidate ALAT entry in case of advanced load
 853	 */
 854	if (ld.x6_op == 0x2)
 855		invala_gr(ld.r1);
 856
 857	return 0;
 858}
 859
 860static int
 861emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 862{
 863	unsigned long r2;
 864	unsigned int len = 1 << ld.x6_sz;
 865
 866	/*
 867	 * if we get to this handler, Nat bits on both r3 and r2 have already
 868	 * been checked. so we don't need to do it
 869	 *
 870	 * extract the value to be stored
 871	 */
 872	getreg(ld.imm, &r2, NULL, regs);
 873
 874	/*
 875	 * we rely on the macros in unaligned.h for now i.e.,
 876	 * we let the compiler figure out how to read memory gracefully.
 877	 *
 878	 * We need this switch/case because the way the inline function
 879	 * works. The code is optimized by the compiler and looks like
 880	 * a single switch/case.
 881	 */
 882	DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
 883
 884	if (len != 2 && len != 4 && len != 8) {
 885		DPRINT("unknown size: x6=%d\n", ld.x6_sz);
 886		return -1;
 887	}
 888
 889	/* this assumes little-endian byte-order: */
 890	if (copy_to_user((void __user *) ifa, &r2, len))
 891		return -1;
 892
 893	/*
 894	 * stX [r3]=r2,imm(9)
 895	 *
 896	 * NOTE:
 897	 * ld.r3 can never be r0, because r0 would not generate an
 898	 * unaligned access.
 899	 */
 900	if (ld.op == 0x5) {
 901		unsigned long imm;
 902
 903		/*
 904		 * form imm9: [12:6] contain first 7bits
 905		 */
 906		imm = ld.x << 7 | ld.r1;
 907		/*
 908		 * sign extend (8bits) if m set
 909		 */
 910		if (ld.m) imm |= SIGN_EXT9;
 911		/*
 912		 * ifa == r3 (NaT is necessarily cleared)
 913		 */
 914		ifa += imm;
 915
 916		DPRINT("imm=%lx r3=%lx\n", imm, ifa);
 917
 918		setreg(ld.r3, ifa, 0, regs);
 919	}
 920	/*
 921	 * we don't have alat_invalidate_multiple() so we need
 922	 * to do the complete flush :-<<
 923	 */
 924	ia64_invala();
 925
 926	/*
 927	 * stX.rel: use fence instead of release
 928	 */
 929	if (ld.x6_op == 0xd)
 930		mb();
 931
 932	return 0;
 933}
 934
 935/*
 936 * floating point operations sizes in bytes
 937 */
 938static const unsigned char float_fsz[4]={
 939	10, /* extended precision (e) */
 940	8,  /* integer (8)            */
 941	4,  /* single precision (s)   */
 942	8   /* double precision (d)   */
 943};
 944
 945static inline void
 946mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
 947{
 948	ia64_ldfe(6, init);
 949	ia64_stop();
 950	ia64_stf_spill(final, 6);
 951}
 952
 953static inline void
 954mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
 955{
 956	ia64_ldf8(6, init);
 957	ia64_stop();
 958	ia64_stf_spill(final, 6);
 959}
 960
 961static inline void
 962mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
 963{
 964	ia64_ldfs(6, init);
 965	ia64_stop();
 966	ia64_stf_spill(final, 6);
 967}
 968
 969static inline void
 970mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
 971{
 972	ia64_ldfd(6, init);
 973	ia64_stop();
 974	ia64_stf_spill(final, 6);
 975}
 976
 977static inline void
 978float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
 979{
 980	ia64_ldf_fill(6, init);
 981	ia64_stop();
 982	ia64_stfe(final, 6);
 983}
 984
 985static inline void
 986float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
 987{
 988	ia64_ldf_fill(6, init);
 989	ia64_stop();
 990	ia64_stf8(final, 6);
 991}
 992
 993static inline void
 994float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
 995{
 996	ia64_ldf_fill(6, init);
 997	ia64_stop();
 998	ia64_stfs(final, 6);
 999}
1000
1001static inline void
1002float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
1003{
1004	ia64_ldf_fill(6, init);
1005	ia64_stop();
1006	ia64_stfd(final, 6);
1007}
1008
1009static int
1010emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1011{
1012	struct ia64_fpreg fpr_init[2];
1013	struct ia64_fpreg fpr_final[2];
1014	unsigned long len = float_fsz[ld.x6_sz];
1015
1016	/*
1017	 * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
1018	 * higher priority than unaligned faults.
1019	 *
1020	 * r0 cannot be found as the base as it would never generate an unaligned
1021	 * reference.
1022	 */
1023
1024	/*
1025	 * make sure we get clean buffers
1026	 */
1027	memset(&fpr_init, 0, sizeof(fpr_init));
1028	memset(&fpr_final, 0, sizeof(fpr_final));
1029
1030	/*
1031	 * ldfpX.a: we don't try to emulate anything but we must
1032	 * invalidate the ALAT entry and execute updates, if any.
1033	 */
1034	if (ld.x6_op != 0x2) {
1035		/*
1036		 * This assumes little-endian byte-order.  Note that there is no "ldfpe"
1037		 * instruction:
1038		 */
1039		if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
1040		    || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
1041			return -1;
1042
1043		DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1044		DDUMP("frp_init =", &fpr_init, 2*len);
1045		/*
1046		 * XXX fixme
1047		 * Could optimize inlines by using ldfpX & 2 spills
1048		 */
1049		switch( ld.x6_sz ) {
1050			case 0:
1051				mem2float_extended(&fpr_init[0], &fpr_final[0]);
1052				mem2float_extended(&fpr_init[1], &fpr_final[1]);
1053				break;
1054			case 1:
1055				mem2float_integer(&fpr_init[0], &fpr_final[0]);
1056				mem2float_integer(&fpr_init[1], &fpr_final[1]);
1057				break;
1058			case 2:
1059				mem2float_single(&fpr_init[0], &fpr_final[0]);
1060				mem2float_single(&fpr_init[1], &fpr_final[1]);
1061				break;
1062			case 3:
1063				mem2float_double(&fpr_init[0], &fpr_final[0]);
1064				mem2float_double(&fpr_init[1], &fpr_final[1]);
1065				break;
1066		}
1067		DDUMP("fpr_final =", &fpr_final, 2*len);
1068		/*
1069		 * XXX fixme
1070		 *
1071		 * A possible optimization would be to drop fpr_final and directly
1072		 * use the storage from the saved context i.e., the actual final
1073		 * destination (pt_regs, switch_stack or thread structure).
1074		 */
1075		setfpreg(ld.r1, &fpr_final[0], regs);
1076		setfpreg(ld.imm, &fpr_final[1], regs);
1077	}
1078
1079	/*
1080	 * Check for updates: only immediate updates are available for this
1081	 * instruction.
1082	 */
1083	if (ld.m) {
1084		/*
1085		 * the immediate is implicit given the ldsz of the operation:
1086		 * single: 8 (2x4) and for  all others it's 16 (2x8)
1087		 */
1088		ifa += len<<1;
1089
1090		/*
1091		 * IMPORTANT:
1092		 * the fact that we force the NaT of r3 to zero is ONLY valid
1093		 * as long as we don't come here with a ldfpX.s.
1094		 * For this reason we keep this sanity check
1095		 */
1096		if (ld.x6_op == 1 || ld.x6_op == 3)
1097			printk(KERN_ERR "%s: register update on speculative load pair, error\n",
1098			       __FUNCTION__);
1099
1100		setreg(ld.r3, ifa, 0, regs);
1101	}
1102
1103	/*
1104	 * Invalidate ALAT entries, if any, for both registers.
1105	 */
1106	if (ld.x6_op == 0x2) {
1107		invala_fr(ld.r1);
1108		invala_fr(ld.imm);
1109	}
1110	return 0;
1111}
1112
1113
1114static int
1115emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1116{
1117	struct ia64_fpreg fpr_init;
1118	struct ia64_fpreg fpr_final;
1119	unsigned long len = float_fsz[ld.x6_sz];
1120
1121	/*
1122	 * fr0 & fr1 don't need to be checked because Illegal Instruction
1123	 * faults have higher priority than unaligned faults.
1124	 *
1125	 * r0 cannot be found as the base as it would never generate an
1126	 * unaligned reference.
1127	 */
1128
1129	/*
1130	 * make sure we get clean buffers
1131	 */
1132	memset(&fpr_init,0, sizeof(fpr_init));
1133	memset(&fpr_final,0, sizeof(fpr_final));
1134
1135	/*
1136	 * ldfX.a we don't try to emulate anything but we must
1137	 * invalidate the ALAT entry.
1138	 * See comments in ldX for descriptions on how the various loads are handled.
1139	 */
1140	if (ld.x6_op != 0x2) {
1141		if (copy_from_user(&fpr_init, (void __user *) ifa, len))
1142			return -1;
1143
1144		DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1145		DDUMP("fpr_init =", &fpr_init, len);
1146		/*
1147		 * we only do something for x6_op={0,8,9}
1148		 */
1149		switch( ld.x6_sz ) {
1150			case 0:
1151				mem2float_extended(&fpr_init, &fpr_final);
1152				break;
1153			case 1:
1154				mem2float_integer(&fpr_init, &fpr_final);
1155				break;
1156			case 2:
1157				mem2float_single(&fpr_init, &fpr_final);
1158				break;
1159			case 3:
1160				mem2float_double(&fpr_init, &fpr_final);
1161				break;
1162		}
1163		DDUMP("fpr_final =", &fpr_final, len);
1164		/*
1165		 * XXX fixme
1166		 *
1167		 * A possible optimization would be to drop fpr_final and directly
1168		 * use the storage from the saved context i.e., the actual final
1169		 * destination (pt_regs, switch_stack or thread structure).
1170		 */
1171		setfpreg(ld.r1, &fpr_final, regs);
1172	}
1173
1174	/*
1175	 * check for updates on any loads
1176	 */
1177	if (ld.op == 0x7 || ld.m)
1178		emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1179
1180	/*
1181	 * invalidate ALAT entry in case of advanced floating point loads
1182	 */
1183	if (ld.x6_op == 0x2)
1184		invala_fr(ld.r1);
1185
1186	return 0;
1187}
1188
1189
1190static int
1191emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1192{
1193	struct ia64_fpreg fpr_init;
1194	struct ia64_fpreg fpr_final;
1195	unsigned long len = float_fsz[ld.x6_sz];
1196
1197	/*
1198	 * make sure we get clean buffers
1199	 */
1200	memset(&fpr_init,0, sizeof(fpr_init));
1201	memset(&fpr_final,0, sizeof(fpr_final));
1202
1203	/*
1204	 * if we get to this handler, Nat bits on both r3 and r2 have already
1205	 * been checked. so we don't need to do it
1206	 *
1207	 * extract the value to be stored
1208	 */
1209	getfpreg(ld.imm, &fpr_init, regs);
1210	/*
1211	 * during this step, we extract the spilled registers from the saved
1212	 * context i.e., we refill. Then we store (no spill) to temporary
1213	 * aligned location
1214	 */
1215	switch( ld.x6_sz ) {
1216		case 0:
1217			float2mem_extended(&fpr_init, &fpr_final);
1218			break;
1219		case 1:
1220			float2mem_integer(&fpr_init, &fpr_final);
1221			break;
1222		case 2:
1223			float2mem_single(&fpr_init, &fpr_final);
1224			break;
1225		case 3:
1226			float2mem_double(&fpr_init, &fpr_final);
1227			break;
1228	}
1229	DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1230	DDUMP("fpr_init =", &fpr_init, len);
1231	DDUMP("fpr_final =", &fpr_final, len);
1232
1233	if (copy_to_user((void __user *) ifa, &fpr_final, len))
1234		return -1;
1235
1236	/*
1237	 * stfX [r3]=r2,imm(9)
1238	 *
1239	 * NOTE:
1240	 * ld.r3 can never be r0, because r0 would not generate an
1241	 * unaligned access.
1242	 */
1243	if (ld.op == 0x7) {
1244		unsigned long imm;
1245
1246		/*
1247		 * form imm9: [12:6] contain first 7bits
1248		 */
1249		imm = ld.x << 7 | ld.r1;
1250		/*
1251		 * sign extend (8bits) if m set
1252		 */
1253		if (ld.m)
1254			imm |= SIGN_EXT9;
1255		/*
1256		 * ifa == r3 (NaT is necessarily cleared)
1257		 */
1258		ifa += imm;
1259
1260		DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1261
1262		setreg(ld.r3, ifa, 0, regs);
1263	}
1264	/*
1265	 * we don't have alat_invalidate_multiple() so we need
1266	 * to do the complete flush :-<<
1267	 */
1268	ia64_invala();
1269
1270	return 0;
1271}
1272
1273/*
1274 * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1275 * eventually fix the program.  However, we don't want to do that for every access so we
1276 * pace it with jiffies.  This isn't really MP-safe, but it doesn't really have to be
1277 * either...
1278 */
1279static int
1280within_logging_rate_limit (void)
1281{
1282	static unsigned long count, last_time;
1283
1284	if (jiffies - last_time > 5*HZ)
1285		count = 0;
1286	if (++count < 5) {
1287		last_time = jiffies;
1288		return 1;
1289	}
1290	return 0;
1291
1292}
1293
1294void
1295ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1296{
1297	struct ia64_psr *ipsr = ia64_psr(regs);
1298	mm_segment_t old_fs = get_fs();
1299	unsigned long bundle[2];
1300	unsigned long opcode;
1301	struct siginfo si;
1302	const struct exception_table_entry *eh = NULL;
1303	union {
1304		unsigned long l;
1305		load_store_t insn;
1306	} u;
1307	int ret = -1;
1308
1309	if (ia64_psr(regs)->be) {
1310		/* we don't support big-endian accesses */
1311		die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
1312		goto force_sigbus;
1313	}
1314
1315	/*
1316	 * Treat kernel accesses for which there is an exception handler entry the same as
1317	 * user-level unaligned accesses.  Otherwise, a clever program could trick this
1318	 * handler into reading an arbitrary kernel addresses...
1319	 */
1320	if (!user_mode(regs))
1321		eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
1322	if (user_mode(regs) || eh) {
1323		if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1324			goto force_sigbus;
1325
1326		if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
1327		    && within_logging_rate_limit())
1328		{
1329			char buf[200];	/* comm[] is at most 16 bytes... */
1330			size_t len;
1331
1332			len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1333				      "ip=0x%016lx\n\r", current->comm, current->pid,
1334				      ifa, regs->cr_iip + ipsr->ri);
1335			/*
1336			 * Don't call tty_write_message() if we're in the kernel; we might
1337			 * be holding locks...
1338			 */
1339			if (user_mode(regs))
1340				tty_write_message(current->signal->tty, buf);
1341			buf[len-1] = '\0';	/* drop '\r' */
1342			printk(KERN_WARNING "%s", buf);	/* watch for command names containing %s */
1343		}
1344	} else {
1345		if (within_logging_rate_limit())
1346			printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1347			       ifa, regs->cr_iip + ipsr->ri);
1348		set_fs(KERNEL_DS);
1349	}
1350
1351	DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1352	       regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1353
1354	if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
1355		goto failure;
1356
1357	/*
1358	 * extract the instruction from the bundle given the slot number
1359	 */
1360	switch (ipsr->ri) {
1361	      case 0: u.l = (bundle[0] >>  5); break;
1362	      case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1363	      case 2: u.l = (bundle[1] >> 23); break;
1364	}
1365	opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1366
1367	DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1368	       "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1369	       u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1370
1371	/*
1372	 * IMPORTANT:
1373	 * Notice that the switch statement DOES not cover all possible instructions
1374	 * that DO generate unaligned references. This is made on purpose because for some
1375	 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1376	 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1377	 * the program will get a signal and die:
1378	 *
1379	 *	load/store:
1380	 *		- ldX.spill
1381	 *		- stX.spill
1382	 *	Reason: RNATs are based on addresses
1383	 *		- ld16
1384	 *		- st16
1385	 *	Reason: ld16 and st16 are supposed to occur in a single
1386	 *		memory op
1387	 *
1388	 *	synchronization:
1389	 *		- cmpxchg
1390	 *		- fetchadd
1391	 *		- xchg
1392	 *	Reason: ATOMIC operations cannot be emulated properly using multiple
1393	 *	        instructions.
1394	 *
1395	 *	speculative loads:
1396	 *		- ldX.sZ
1397	 *	Reason: side effects, code must be ready to deal with failure so simpler
1398	 *		to let the load fail.
1399	 * ---------------------------------------------------------------------------------
1400	 * XXX fixme
1401	 *
1402	 * I would like to get rid of this switch case and do something
1403	 * more elegant.
1404	 */
1405	switch (opcode) {
1406	      case LDS_OP:
1407	      case LDSA_OP:
1408		if (u.insn.x)
1409			/* oops, really a semaphore op (cmpxchg, etc) */
1410			goto failure;
1411		/* no break */
1412	      case LDS_IMM_OP:
1413	      case LDSA_IMM_OP:
1414	      case LDFS_OP:
1415	      case LDFSA_OP:
1416	      case LDFS_IMM_OP:
1417		/*
1418		 * The instruction will be retried with deferred exceptions turned on, and
1419		 * we should get Nat bit installed
1420		 *
1421		 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1422		 * are actually executed even though the operation failed. So we don't
1423		 * need to take care of this.
1424		 */
1425		DPRINT("forcing PSR_ED\n");
1426		regs->cr_ipsr |= IA64_PSR_ED;
1427		goto done;
1428
1429	      case LD_OP:
1430	      case LDA_OP:
1431	      case LDBIAS_OP:
1432	      case LDACQ_OP:
1433	      case LDCCLR_OP:
1434	      case LDCNC_OP:
1435	      case LDCCLRACQ_OP:
1436		if (u.insn.x)
1437			/* oops, really a semaphore op (cmpxchg, etc) */
1438			goto failure;
1439		/* no break */
1440	      case LD_IMM_OP:
1441	      case LDA_IMM_OP:
1442	      case LDBIAS_IMM_OP:
1443	      case LDACQ_IMM_OP:
1444	      case LDCCLR_IMM_OP:
1445	      case LDCNC_IMM_OP:
1446	      case LDCCLRACQ_IMM_OP:
1447		ret = emulate_load_int(ifa, u.insn, regs);
1448		break;
1449
1450	      case ST_OP:
1451	      case STREL_OP:
1452		if (u.insn.x)
1453			/* oops, really a semaphore op (cmpxchg, etc) */
1454			goto failure;
1455		/* no break */
1456	      case ST_IMM_OP:
1457	      case STREL_IMM_OP:
1458		ret = emulate_store_int(ifa, u.insn, regs);
1459		break;
1460
1461	      case LDF_OP:
1462	      case LDFA_OP:
1463	      case LDFCCLR_OP:
1464	      case LDFCNC_OP:
1465	      case LDF_IMM_OP:
1466	      case LDFA_IMM_OP:
1467	      case LDFCCLR_IMM_OP:
1468	      case LDFCNC_IMM_OP:
1469		if (u.insn.x)
1470			ret = emulate_load_floatpair(ifa, u.insn, regs);
1471		else
1472			ret = emulate_load_float(ifa, u.insn, regs);
1473		break;
1474
1475	      case STF_OP:
1476	      case STF_IMM_OP:
1477		ret = emulate_store_float(ifa, u.insn, regs);
1478		break;
1479
1480	      default:
1481		goto failure;
1482	}
1483	DPRINT("ret=%d\n", ret);
1484	if (ret)
1485		goto failure;
1486
1487	if (ipsr->ri == 2)
1488		/*
1489		 * given today's architecture this case is not likely to happen because a
1490		 * memory access instruction (M) can never be in the last slot of a
1491		 * bundle. But let's keep it for now.
1492		 */
1493		regs->cr_iip += 16;
1494	ipsr->ri = (ipsr->ri + 1) & 0x3;
1495
1496	DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1497  done:
1498	set_fs(old_fs);		/* restore original address limit */
1499	return;
1500
1501  failure:
1502	/* something went wrong... */
1503	if (!user_mode(regs)) {
1504		if (eh) {
1505			ia64_handle_exception(regs, eh);
1506			goto done;
1507		}
1508		die_if_kernel("error during unaligned kernel access\n", regs, ret);
1509		/* NOT_REACHED */
1510	}
1511  force_sigbus:
1512	si.si_signo = SIGBUS;
1513	si.si_errno = 0;
1514	si.si_code = BUS_ADRALN;
1515	si.si_addr = (void __user *) ifa;
1516	si.si_flags = 0;
1517	si.si_isr = 0;
1518	si.si_imm = 0;
1519	force_sig_info(SIGBUS, &si, current);
1520	goto done;
1521}