PageRenderTime 70ms CodeModel.GetById 16ms app.highlight 42ms RepoModel.GetById 1ms app.codeStats 1ms

/arch/x86/kernel/entry_32.S

https://bitbucket.org/thekraven/iscream_thunderc-2.6.35
Assembly | 1500 lines | 1421 code | 79 blank | 0 comment | 27 complexity | da2984e9ff25adeb4c2d9305686bda67 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0, AGPL-1.0
   1/*
   2 *
   3 *  Copyright (C) 1991, 1992  Linus Torvalds
   4 */
   5
   6/*
   7 * entry.S contains the system-call and fault low-level handling routines.
   8 * This also contains the timer-interrupt handler, as well as all interrupts
   9 * and faults that can result in a task-switch.
  10 *
  11 * NOTE: This code handles signal-recognition, which happens every time
  12 * after a timer-interrupt and after each system call.
  13 *
  14 * I changed all the .align's to 4 (16 byte alignment), as that's faster
  15 * on a 486.
  16 *
  17 * Stack layout in 'syscall_exit':
  18 * 	ptrace needs to have all regs on the stack.
  19 *	if the order here is changed, it needs to be
  20 *	updated in fork.c:copy_process, signal.c:do_signal,
  21 *	ptrace.c and ptrace.h
  22 *
  23 *	 0(%esp) - %ebx
  24 *	 4(%esp) - %ecx
  25 *	 8(%esp) - %edx
  26 *       C(%esp) - %esi
  27 *	10(%esp) - %edi
  28 *	14(%esp) - %ebp
  29 *	18(%esp) - %eax
  30 *	1C(%esp) - %ds
  31 *	20(%esp) - %es
  32 *	24(%esp) - %fs
  33 *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
  34 *	2C(%esp) - orig_eax
  35 *	30(%esp) - %eip
  36 *	34(%esp) - %cs
  37 *	38(%esp) - %eflags
  38 *	3C(%esp) - %oldesp
  39 *	40(%esp) - %oldss
  40 *
  41 * "current" is in register %ebx during any slow entries.
  42 */
  43
  44#include <linux/linkage.h>
  45#include <asm/thread_info.h>
  46#include <asm/irqflags.h>
  47#include <asm/errno.h>
  48#include <asm/segment.h>
  49#include <asm/smp.h>
  50#include <asm/page_types.h>
  51#include <asm/percpu.h>
  52#include <asm/dwarf2.h>
  53#include <asm/processor-flags.h>
  54#include <asm/ftrace.h>
  55#include <asm/irq_vectors.h>
  56#include <asm/cpufeature.h>
  57
  58/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
  59#include <linux/elf-em.h>
  60#define AUDIT_ARCH_I386		(EM_386|__AUDIT_ARCH_LE)
  61#define __AUDIT_ARCH_LE	   0x40000000
  62
  63#ifndef CONFIG_AUDITSYSCALL
  64#define sysenter_audit	syscall_trace_entry
  65#define sysexit_audit	syscall_exit_work
  66#endif
  67
  68/*
  69 * We use macros for low-level operations which need to be overridden
  70 * for paravirtualization.  The following will never clobber any registers:
  71 *   INTERRUPT_RETURN (aka. "iret")
  72 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
  73 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
  74 *
  75 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  76 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
  77 * Allowing a register to be clobbered can shrink the paravirt replacement
  78 * enough to patch inline, increasing performance.
  79 */
  80
  81#define nr_syscalls ((syscall_table_size)/4)
  82
  83#ifdef CONFIG_PREEMPT
  84#define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
  85#else
  86#define preempt_stop(clobbers)
  87#define resume_kernel		restore_all
  88#endif
  89
  90.macro TRACE_IRQS_IRET
  91#ifdef CONFIG_TRACE_IRQFLAGS
  92	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)     # interrupts off?
  93	jz 1f
  94	TRACE_IRQS_ON
  951:
  96#endif
  97.endm
  98
  99#ifdef CONFIG_VM86
 100#define resume_userspace_sig	check_userspace
 101#else
 102#define resume_userspace_sig	resume_userspace
 103#endif
 104
 105/*
 106 * User gs save/restore
 107 *
 108 * %gs is used for userland TLS and kernel only uses it for stack
 109 * canary which is required to be at %gs:20 by gcc.  Read the comment
 110 * at the top of stackprotector.h for more info.
 111 *
 112 * Local labels 98 and 99 are used.
 113 */
 114#ifdef CONFIG_X86_32_LAZY_GS
 115
 116 /* unfortunately push/pop can't be no-op */
 117.macro PUSH_GS
 118	pushl $0
 119	CFI_ADJUST_CFA_OFFSET 4
 120.endm
 121.macro POP_GS pop=0
 122	addl $(4 + \pop), %esp
 123	CFI_ADJUST_CFA_OFFSET -(4 + \pop)
 124.endm
 125.macro POP_GS_EX
 126.endm
 127
 128 /* all the rest are no-op */
 129.macro PTGS_TO_GS
 130.endm
 131.macro PTGS_TO_GS_EX
 132.endm
 133.macro GS_TO_REG reg
 134.endm
 135.macro REG_TO_PTGS reg
 136.endm
 137.macro SET_KERNEL_GS reg
 138.endm
 139
 140#else	/* CONFIG_X86_32_LAZY_GS */
 141
 142.macro PUSH_GS
 143	pushl %gs
 144	CFI_ADJUST_CFA_OFFSET 4
 145	/*CFI_REL_OFFSET gs, 0*/
 146.endm
 147
 148.macro POP_GS pop=0
 14998:	popl %gs
 150	CFI_ADJUST_CFA_OFFSET -4
 151	/*CFI_RESTORE gs*/
 152  .if \pop <> 0
 153	add $\pop, %esp
 154	CFI_ADJUST_CFA_OFFSET -\pop
 155  .endif
 156.endm
 157.macro POP_GS_EX
 158.pushsection .fixup, "ax"
 15999:	movl $0, (%esp)
 160	jmp 98b
 161.section __ex_table, "a"
 162	.align 4
 163	.long 98b, 99b
 164.popsection
 165.endm
 166
 167.macro PTGS_TO_GS
 16898:	mov PT_GS(%esp), %gs
 169.endm
 170.macro PTGS_TO_GS_EX
 171.pushsection .fixup, "ax"
 17299:	movl $0, PT_GS(%esp)
 173	jmp 98b
 174.section __ex_table, "a"
 175	.align 4
 176	.long 98b, 99b
 177.popsection
 178.endm
 179
 180.macro GS_TO_REG reg
 181	movl %gs, \reg
 182	/*CFI_REGISTER gs, \reg*/
 183.endm
 184.macro REG_TO_PTGS reg
 185	movl \reg, PT_GS(%esp)
 186	/*CFI_REL_OFFSET gs, PT_GS*/
 187.endm
 188.macro SET_KERNEL_GS reg
 189	movl $(__KERNEL_STACK_CANARY), \reg
 190	movl \reg, %gs
 191.endm
 192
 193#endif	/* CONFIG_X86_32_LAZY_GS */
 194
 195.macro SAVE_ALL
 196	cld
 197	PUSH_GS
 198	pushl %fs
 199	CFI_ADJUST_CFA_OFFSET 4
 200	/*CFI_REL_OFFSET fs, 0;*/
 201	pushl %es
 202	CFI_ADJUST_CFA_OFFSET 4
 203	/*CFI_REL_OFFSET es, 0;*/
 204	pushl %ds
 205	CFI_ADJUST_CFA_OFFSET 4
 206	/*CFI_REL_OFFSET ds, 0;*/
 207	pushl %eax
 208	CFI_ADJUST_CFA_OFFSET 4
 209	CFI_REL_OFFSET eax, 0
 210	pushl %ebp
 211	CFI_ADJUST_CFA_OFFSET 4
 212	CFI_REL_OFFSET ebp, 0
 213	pushl %edi
 214	CFI_ADJUST_CFA_OFFSET 4
 215	CFI_REL_OFFSET edi, 0
 216	pushl %esi
 217	CFI_ADJUST_CFA_OFFSET 4
 218	CFI_REL_OFFSET esi, 0
 219	pushl %edx
 220	CFI_ADJUST_CFA_OFFSET 4
 221	CFI_REL_OFFSET edx, 0
 222	pushl %ecx
 223	CFI_ADJUST_CFA_OFFSET 4
 224	CFI_REL_OFFSET ecx, 0
 225	pushl %ebx
 226	CFI_ADJUST_CFA_OFFSET 4
 227	CFI_REL_OFFSET ebx, 0
 228	movl $(__USER_DS), %edx
 229	movl %edx, %ds
 230	movl %edx, %es
 231	movl $(__KERNEL_PERCPU), %edx
 232	movl %edx, %fs
 233	SET_KERNEL_GS %edx
 234.endm
 235
 236.macro RESTORE_INT_REGS
 237	popl %ebx
 238	CFI_ADJUST_CFA_OFFSET -4
 239	CFI_RESTORE ebx
 240	popl %ecx
 241	CFI_ADJUST_CFA_OFFSET -4
 242	CFI_RESTORE ecx
 243	popl %edx
 244	CFI_ADJUST_CFA_OFFSET -4
 245	CFI_RESTORE edx
 246	popl %esi
 247	CFI_ADJUST_CFA_OFFSET -4
 248	CFI_RESTORE esi
 249	popl %edi
 250	CFI_ADJUST_CFA_OFFSET -4
 251	CFI_RESTORE edi
 252	popl %ebp
 253	CFI_ADJUST_CFA_OFFSET -4
 254	CFI_RESTORE ebp
 255	popl %eax
 256	CFI_ADJUST_CFA_OFFSET -4
 257	CFI_RESTORE eax
 258.endm
 259
 260.macro RESTORE_REGS pop=0
 261	RESTORE_INT_REGS
 2621:	popl %ds
 263	CFI_ADJUST_CFA_OFFSET -4
 264	/*CFI_RESTORE ds;*/
 2652:	popl %es
 266	CFI_ADJUST_CFA_OFFSET -4
 267	/*CFI_RESTORE es;*/
 2683:	popl %fs
 269	CFI_ADJUST_CFA_OFFSET -4
 270	/*CFI_RESTORE fs;*/
 271	POP_GS \pop
 272.pushsection .fixup, "ax"
 2734:	movl $0, (%esp)
 274	jmp 1b
 2755:	movl $0, (%esp)
 276	jmp 2b
 2776:	movl $0, (%esp)
 278	jmp 3b
 279.section __ex_table, "a"
 280	.align 4
 281	.long 1b, 4b
 282	.long 2b, 5b
 283	.long 3b, 6b
 284.popsection
 285	POP_GS_EX
 286.endm
 287
 288.macro RING0_INT_FRAME
 289	CFI_STARTPROC simple
 290	CFI_SIGNAL_FRAME
 291	CFI_DEF_CFA esp, 3*4
 292	/*CFI_OFFSET cs, -2*4;*/
 293	CFI_OFFSET eip, -3*4
 294.endm
 295
 296.macro RING0_EC_FRAME
 297	CFI_STARTPROC simple
 298	CFI_SIGNAL_FRAME
 299	CFI_DEF_CFA esp, 4*4
 300	/*CFI_OFFSET cs, -2*4;*/
 301	CFI_OFFSET eip, -3*4
 302.endm
 303
 304.macro RING0_PTREGS_FRAME
 305	CFI_STARTPROC simple
 306	CFI_SIGNAL_FRAME
 307	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
 308	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
 309	CFI_OFFSET eip, PT_EIP-PT_OLDESP
 310	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
 311	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
 312	CFI_OFFSET eax, PT_EAX-PT_OLDESP
 313	CFI_OFFSET ebp, PT_EBP-PT_OLDESP
 314	CFI_OFFSET edi, PT_EDI-PT_OLDESP
 315	CFI_OFFSET esi, PT_ESI-PT_OLDESP
 316	CFI_OFFSET edx, PT_EDX-PT_OLDESP
 317	CFI_OFFSET ecx, PT_ECX-PT_OLDESP
 318	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
 319.endm
 320
 321ENTRY(ret_from_fork)
 322	CFI_STARTPROC
 323	pushl %eax
 324	CFI_ADJUST_CFA_OFFSET 4
 325	call schedule_tail
 326	GET_THREAD_INFO(%ebp)
 327	popl %eax
 328	CFI_ADJUST_CFA_OFFSET -4
 329	pushl $0x0202			# Reset kernel eflags
 330	CFI_ADJUST_CFA_OFFSET 4
 331	popfl
 332	CFI_ADJUST_CFA_OFFSET -4
 333	jmp syscall_exit
 334	CFI_ENDPROC
 335END(ret_from_fork)
 336
 337/*
 338 * Interrupt exit functions should be protected against kprobes
 339 */
 340	.pushsection .kprobes.text, "ax"
 341/*
 342 * Return to user mode is not as complex as all this looks,
 343 * but we want the default path for a system call return to
 344 * go as quickly as possible which is why some of this is
 345 * less clear than it otherwise should be.
 346 */
 347
 348	# userspace resumption stub bypassing syscall exit tracing
 349	ALIGN
 350	RING0_PTREGS_FRAME
 351ret_from_exception:
 352	preempt_stop(CLBR_ANY)
 353ret_from_intr:
 354	GET_THREAD_INFO(%ebp)
 355check_userspace:
 356	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
 357	movb PT_CS(%esp), %al
 358	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
 359	cmpl $USER_RPL, %eax
 360	jb resume_kernel		# not returning to v8086 or userspace
 361
 362ENTRY(resume_userspace)
 363	LOCKDEP_SYS_EXIT
 364 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 365					# setting need_resched or sigpending
 366					# between sampling and the iret
 367	TRACE_IRQS_OFF
 368	movl TI_flags(%ebp), %ecx
 369	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
 370					# int/exception return?
 371	jne work_pending
 372	jmp restore_all
 373END(ret_from_exception)
 374
 375#ifdef CONFIG_PREEMPT
 376ENTRY(resume_kernel)
 377	DISABLE_INTERRUPTS(CLBR_ANY)
 378	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
 379	jnz restore_all
 380need_resched:
 381	movl TI_flags(%ebp), %ecx	# need_resched set ?
 382	testb $_TIF_NEED_RESCHED, %cl
 383	jz restore_all
 384	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
 385	jz restore_all
 386	call preempt_schedule_irq
 387	jmp need_resched
 388END(resume_kernel)
 389#endif
 390	CFI_ENDPROC
 391/*
 392 * End of kprobes section
 393 */
 394	.popsection
 395
 396/* SYSENTER_RETURN points to after the "sysenter" instruction in
 397   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
 398
 399	# sysenter call handler stub
 400ENTRY(ia32_sysenter_target)
 401	CFI_STARTPROC simple
 402	CFI_SIGNAL_FRAME
 403	CFI_DEF_CFA esp, 0
 404	CFI_REGISTER esp, ebp
 405	movl TSS_sysenter_sp0(%esp),%esp
 406sysenter_past_esp:
 407	/*
 408	 * Interrupts are disabled here, but we can't trace it until
 409	 * enough kernel state to call TRACE_IRQS_OFF can be called - but
 410	 * we immediately enable interrupts at that point anyway.
 411	 */
 412	pushl $(__USER_DS)
 413	CFI_ADJUST_CFA_OFFSET 4
 414	/*CFI_REL_OFFSET ss, 0*/
 415	pushl %ebp
 416	CFI_ADJUST_CFA_OFFSET 4
 417	CFI_REL_OFFSET esp, 0
 418	pushfl
 419	orl $X86_EFLAGS_IF, (%esp)
 420	CFI_ADJUST_CFA_OFFSET 4
 421	pushl $(__USER_CS)
 422	CFI_ADJUST_CFA_OFFSET 4
 423	/*CFI_REL_OFFSET cs, 0*/
 424	/*
 425	 * Push current_thread_info()->sysenter_return to the stack.
 426	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
 427	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
 428	 */
 429	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
 430	CFI_ADJUST_CFA_OFFSET 4
 431	CFI_REL_OFFSET eip, 0
 432
 433	pushl %eax
 434	CFI_ADJUST_CFA_OFFSET 4
 435	SAVE_ALL
 436	ENABLE_INTERRUPTS(CLBR_NONE)
 437
 438/*
 439 * Load the potential sixth argument from user stack.
 440 * Careful about security.
 441 */
 442	cmpl $__PAGE_OFFSET-3,%ebp
 443	jae syscall_fault
 4441:	movl (%ebp),%ebp
 445	movl %ebp,PT_EBP(%esp)
 446.section __ex_table,"a"
 447	.align 4
 448	.long 1b,syscall_fault
 449.previous
 450
 451	GET_THREAD_INFO(%ebp)
 452
 453	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 454	jnz sysenter_audit
 455sysenter_do_call:
 456	cmpl $(nr_syscalls), %eax
 457	jae syscall_badsys
 458	call *sys_call_table(,%eax,4)
 459	movl %eax,PT_EAX(%esp)
 460	LOCKDEP_SYS_EXIT
 461	DISABLE_INTERRUPTS(CLBR_ANY)
 462	TRACE_IRQS_OFF
 463	movl TI_flags(%ebp), %ecx
 464	testl $_TIF_ALLWORK_MASK, %ecx
 465	jne sysexit_audit
 466sysenter_exit:
 467/* if something modifies registers it must also disable sysexit */
 468	movl PT_EIP(%esp), %edx
 469	movl PT_OLDESP(%esp), %ecx
 470	xorl %ebp,%ebp
 471	TRACE_IRQS_ON
 4721:	mov  PT_FS(%esp), %fs
 473	PTGS_TO_GS
 474	ENABLE_INTERRUPTS_SYSEXIT
 475
 476#ifdef CONFIG_AUDITSYSCALL
 477sysenter_audit:
 478	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
 479	jnz syscall_trace_entry
 480	addl $4,%esp
 481	CFI_ADJUST_CFA_OFFSET -4
 482	/* %esi already in 8(%esp)	   6th arg: 4th syscall arg */
 483	/* %edx already in 4(%esp)	   5th arg: 3rd syscall arg */
 484	/* %ecx already in 0(%esp)	   4th arg: 2nd syscall arg */
 485	movl %ebx,%ecx			/* 3rd arg: 1st syscall arg */
 486	movl %eax,%edx			/* 2nd arg: syscall number */
 487	movl $AUDIT_ARCH_I386,%eax	/* 1st arg: audit arch */
 488	call audit_syscall_entry
 489	pushl %ebx
 490	CFI_ADJUST_CFA_OFFSET 4
 491	movl PT_EAX(%esp),%eax		/* reload syscall number */
 492	jmp sysenter_do_call
 493
 494sysexit_audit:
 495	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
 496	jne syscall_exit_work
 497	TRACE_IRQS_ON
 498	ENABLE_INTERRUPTS(CLBR_ANY)
 499	movl %eax,%edx		/* second arg, syscall return value */
 500	cmpl $0,%eax		/* is it < 0? */
 501	setl %al		/* 1 if so, 0 if not */
 502	movzbl %al,%eax		/* zero-extend that */
 503	inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
 504	call audit_syscall_exit
 505	DISABLE_INTERRUPTS(CLBR_ANY)
 506	TRACE_IRQS_OFF
 507	movl TI_flags(%ebp), %ecx
 508	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
 509	jne syscall_exit_work
 510	movl PT_EAX(%esp),%eax	/* reload syscall return value */
 511	jmp sysenter_exit
 512#endif
 513
 514	CFI_ENDPROC
 515.pushsection .fixup,"ax"
 5162:	movl $0,PT_FS(%esp)
 517	jmp 1b
 518.section __ex_table,"a"
 519	.align 4
 520	.long 1b,2b
 521.popsection
 522	PTGS_TO_GS_EX
 523ENDPROC(ia32_sysenter_target)
 524
 525/*
 526 * syscall stub including irq exit should be protected against kprobes
 527 */
 528	.pushsection .kprobes.text, "ax"
 529	# system call handler stub
 530ENTRY(system_call)
 531	RING0_INT_FRAME			# can't unwind into user space anyway
 532	pushl %eax			# save orig_eax
 533	CFI_ADJUST_CFA_OFFSET 4
 534	SAVE_ALL
 535	GET_THREAD_INFO(%ebp)
 536					# system call tracing in operation / emulation
 537	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 538	jnz syscall_trace_entry
 539	cmpl $(nr_syscalls), %eax
 540	jae syscall_badsys
 541syscall_call:
 542	call *sys_call_table(,%eax,4)
 543	movl %eax,PT_EAX(%esp)		# store the return value
 544syscall_exit:
 545	LOCKDEP_SYS_EXIT
 546	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 547					# setting need_resched or sigpending
 548					# between sampling and the iret
 549	TRACE_IRQS_OFF
 550	movl TI_flags(%ebp), %ecx
 551	testl $_TIF_ALLWORK_MASK, %ecx	# current->work
 552	jne syscall_exit_work
 553
 554restore_all:
 555	TRACE_IRQS_IRET
 556restore_all_notrace:
 557	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
 558	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
 559	# are returning to the kernel.
 560	# See comments in process.c:copy_thread() for details.
 561	movb PT_OLDSS(%esp), %ah
 562	movb PT_CS(%esp), %al
 563	andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
 564	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
 565	CFI_REMEMBER_STATE
 566	je ldt_ss			# returning to user-space with LDT SS
 567restore_nocheck:
 568	RESTORE_REGS 4			# skip orig_eax/error_code
 569	CFI_ADJUST_CFA_OFFSET -4
 570irq_return:
 571	INTERRUPT_RETURN
 572.section .fixup,"ax"
 573ENTRY(iret_exc)
 574	pushl $0			# no error code
 575	pushl $do_iret_error
 576	jmp error_code
 577.previous
 578.section __ex_table,"a"
 579	.align 4
 580	.long irq_return,iret_exc
 581.previous
 582
 583	CFI_RESTORE_STATE
 584ldt_ss:
 585	larl PT_OLDSS(%esp), %eax
 586	jnz restore_nocheck
 587	testl $0x00400000, %eax		# returning to 32bit stack?
 588	jnz restore_nocheck		# allright, normal return
 589
 590#ifdef CONFIG_PARAVIRT
 591	/*
 592	 * The kernel can't run on a non-flat stack if paravirt mode
 593	 * is active.  Rather than try to fixup the high bits of
 594	 * ESP, bypass this code entirely.  This may break DOSemu
 595	 * and/or Wine support in a paravirt VM, although the option
 596	 * is still available to implement the setting of the high
 597	 * 16-bits in the INTERRUPT_RETURN paravirt-op.
 598	 */
 599	cmpl $0, pv_info+PARAVIRT_enabled
 600	jne restore_nocheck
 601#endif
 602
 603/*
 604 * Setup and switch to ESPFIX stack
 605 *
 606 * We're returning to userspace with a 16 bit stack. The CPU will not
 607 * restore the high word of ESP for us on executing iret... This is an
 608 * "official" bug of all the x86-compatible CPUs, which we can work
 609 * around to make dosemu and wine happy. We do this by preloading the
 610 * high word of ESP with the high word of the userspace ESP while
 611 * compensating for the offset by changing to the ESPFIX segment with
 612 * a base address that matches for the difference.
 613 */
 614	mov %esp, %edx			/* load kernel esp */
 615	mov PT_OLDESP(%esp), %eax	/* load userspace esp */
 616	mov %dx, %ax			/* eax: new kernel esp */
 617	sub %eax, %edx			/* offset (low word is 0) */
 618	PER_CPU(gdt_page, %ebx)
 619	shr $16, %edx
 620	mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
 621	mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
 622	pushl $__ESPFIX_SS
 623	CFI_ADJUST_CFA_OFFSET 4
 624	push %eax			/* new kernel esp */
 625	CFI_ADJUST_CFA_OFFSET 4
 626	/* Disable interrupts, but do not irqtrace this section: we
 627	 * will soon execute iret and the tracer was already set to
 628	 * the irqstate after the iret */
 629	DISABLE_INTERRUPTS(CLBR_EAX)
 630	lss (%esp), %esp		/* switch to espfix segment */
 631	CFI_ADJUST_CFA_OFFSET -8
 632	jmp restore_nocheck
 633	CFI_ENDPROC
 634ENDPROC(system_call)
 635
 636	# perform work that needs to be done immediately before resumption
 637	ALIGN
 638	RING0_PTREGS_FRAME		# can't unwind into user space anyway
 639work_pending:
 640	testb $_TIF_NEED_RESCHED, %cl
 641	jz work_notifysig
 642work_resched:
 643	call schedule
 644	LOCKDEP_SYS_EXIT
 645	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 646					# setting need_resched or sigpending
 647					# between sampling and the iret
 648	TRACE_IRQS_OFF
 649	movl TI_flags(%ebp), %ecx
 650	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
 651					# than syscall tracing?
 652	jz restore_all
 653	testb $_TIF_NEED_RESCHED, %cl
 654	jnz work_resched
 655
 656work_notifysig:				# deal with pending signals and
 657					# notify-resume requests
 658#ifdef CONFIG_VM86
 659	testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
 660	movl %esp, %eax
 661	jne work_notifysig_v86		# returning to kernel-space or
 662					# vm86-space
 663	xorl %edx, %edx
 664	call do_notify_resume
 665	jmp resume_userspace_sig
 666
 667	ALIGN
 668work_notifysig_v86:
 669	pushl %ecx			# save ti_flags for do_notify_resume
 670	CFI_ADJUST_CFA_OFFSET 4
 671	call save_v86_state		# %eax contains pt_regs pointer
 672	popl %ecx
 673	CFI_ADJUST_CFA_OFFSET -4
 674	movl %eax, %esp
 675#else
 676	movl %esp, %eax
 677#endif
 678	xorl %edx, %edx
 679	call do_notify_resume
 680	jmp resume_userspace_sig
 681END(work_pending)
 682
 683	# perform syscall exit tracing
 684	ALIGN
 685syscall_trace_entry:
 686	movl $-ENOSYS,PT_EAX(%esp)
 687	movl %esp, %eax
 688	call syscall_trace_enter
 689	/* What it returned is what we'll actually use.  */
 690	cmpl $(nr_syscalls), %eax
 691	jnae syscall_call
 692	jmp syscall_exit
 693END(syscall_trace_entry)
 694
 695	# perform syscall exit tracing
 696	ALIGN
 697syscall_exit_work:
 698	testl $_TIF_WORK_SYSCALL_EXIT, %ecx
 699	jz work_pending
 700	TRACE_IRQS_ON
 701	ENABLE_INTERRUPTS(CLBR_ANY)	# could let syscall_trace_leave() call
 702					# schedule() instead
 703	movl %esp, %eax
 704	call syscall_trace_leave
 705	jmp resume_userspace
 706END(syscall_exit_work)
 707	CFI_ENDPROC
 708
 709	RING0_INT_FRAME			# can't unwind into user space anyway
 710syscall_fault:
 711	GET_THREAD_INFO(%ebp)
 712	movl $-EFAULT,PT_EAX(%esp)
 713	jmp resume_userspace
 714END(syscall_fault)
 715
 716syscall_badsys:
 717	movl $-ENOSYS,PT_EAX(%esp)
 718	jmp resume_userspace
 719END(syscall_badsys)
 720	CFI_ENDPROC
 721/*
 722 * End of kprobes section
 723 */
 724	.popsection
 725
 726/*
 727 * System calls that need a pt_regs pointer.
 728 */
 729#define PTREGSCALL0(name) \
 730	ALIGN; \
 731ptregs_##name: \
 732	leal 4(%esp),%eax; \
 733	jmp sys_##name;
 734
 735#define PTREGSCALL1(name) \
 736	ALIGN; \
 737ptregs_##name: \
 738	leal 4(%esp),%edx; \
 739	movl (PT_EBX+4)(%esp),%eax; \
 740	jmp sys_##name;
 741
 742#define PTREGSCALL2(name) \
 743	ALIGN; \
 744ptregs_##name: \
 745	leal 4(%esp),%ecx; \
 746	movl (PT_ECX+4)(%esp),%edx; \
 747	movl (PT_EBX+4)(%esp),%eax; \
 748	jmp sys_##name;
 749
 750#define PTREGSCALL3(name) \
 751	ALIGN; \
 752ptregs_##name: \
 753	leal 4(%esp),%eax; \
 754	pushl %eax; \
 755	movl PT_EDX(%eax),%ecx; \
 756	movl PT_ECX(%eax),%edx; \
 757	movl PT_EBX(%eax),%eax; \
 758	call sys_##name; \
 759	addl $4,%esp; \
 760	ret
 761
 762PTREGSCALL1(iopl)
 763PTREGSCALL0(fork)
 764PTREGSCALL0(vfork)
 765PTREGSCALL3(execve)
 766PTREGSCALL2(sigaltstack)
 767PTREGSCALL0(sigreturn)
 768PTREGSCALL0(rt_sigreturn)
 769PTREGSCALL2(vm86)
 770PTREGSCALL1(vm86old)
 771
 772/* Clone is an oddball.  The 4th arg is in %edi */
 773	ALIGN;
 774ptregs_clone:
 775	leal 4(%esp),%eax
 776	pushl %eax
 777	pushl PT_EDI(%eax)
 778	movl PT_EDX(%eax),%ecx
 779	movl PT_ECX(%eax),%edx
 780	movl PT_EBX(%eax),%eax
 781	call sys_clone
 782	addl $8,%esp
 783	ret
 784
 785.macro FIXUP_ESPFIX_STACK
 786/*
 787 * Switch back for ESPFIX stack to the normal zerobased stack
 788 *
 789 * We can't call C functions using the ESPFIX stack. This code reads
 790 * the high word of the segment base from the GDT and swiches to the
 791 * normal stack and adjusts ESP with the matching offset.
 792 */
 793	/* fixup the stack */
 794	PER_CPU(gdt_page, %ebx)
 795	mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
 796	mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
 797	shl $16, %eax
 798	addl %esp, %eax			/* the adjusted stack pointer */
 799	pushl $__KERNEL_DS
 800	CFI_ADJUST_CFA_OFFSET 4
 801	pushl %eax
 802	CFI_ADJUST_CFA_OFFSET 4
 803	lss (%esp), %esp		/* switch to the normal stack segment */
 804	CFI_ADJUST_CFA_OFFSET -8
 805.endm
 806.macro UNWIND_ESPFIX_STACK
 807	movl %ss, %eax
 808	/* see if on espfix stack */
 809	cmpw $__ESPFIX_SS, %ax
 810	jne 27f
 811	movl $__KERNEL_DS, %eax
 812	movl %eax, %ds
 813	movl %eax, %es
 814	/* switch to normal stack */
 815	FIXUP_ESPFIX_STACK
 81627:
 817.endm
 818
 819/*
 820 * Build the entry stubs and pointer table with some assembler magic.
 821 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
 822 * single cache line on all modern x86 implementations.
 823 */
 824.section .init.rodata,"a"
 825ENTRY(interrupt)
 826.text
 827	.p2align 5
 828	.p2align CONFIG_X86_L1_CACHE_SHIFT
 829ENTRY(irq_entries_start)
 830	RING0_INT_FRAME
 831vector=FIRST_EXTERNAL_VECTOR
 832.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
 833	.balign 32
 834  .rept	7
 835    .if vector < NR_VECTORS
 836      .if vector <> FIRST_EXTERNAL_VECTOR
 837	CFI_ADJUST_CFA_OFFSET -4
 838      .endif
 8391:	pushl $(~vector+0x80)	/* Note: always in signed byte range */
 840	CFI_ADJUST_CFA_OFFSET 4
 841      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
 842	jmp 2f
 843      .endif
 844      .previous
 845	.long 1b
 846      .text
 847vector=vector+1
 848    .endif
 849  .endr
 8502:	jmp common_interrupt
 851.endr
 852END(irq_entries_start)
 853
 854.previous
 855END(interrupt)
 856.previous
 857
 858/*
 859 * the CPU automatically disables interrupts when executing an IRQ vector,
 860 * so IRQ-flags tracing has to follow that:
 861 */
 862	.p2align CONFIG_X86_L1_CACHE_SHIFT
 863common_interrupt:
 864	addl $-0x80,(%esp)	/* Adjust vector into the [-256,-1] range */
 865	SAVE_ALL
 866	TRACE_IRQS_OFF
 867	movl %esp,%eax
 868	call do_IRQ
 869	jmp ret_from_intr
 870ENDPROC(common_interrupt)
 871	CFI_ENDPROC
 872
 873/*
 874 *  Irq entries should be protected against kprobes
 875 */
 876	.pushsection .kprobes.text, "ax"
 877#define BUILD_INTERRUPT3(name, nr, fn)	\
 878ENTRY(name)				\
 879	RING0_INT_FRAME;		\
 880	pushl $~(nr);			\
 881	CFI_ADJUST_CFA_OFFSET 4;	\
 882	SAVE_ALL;			\
 883	TRACE_IRQS_OFF			\
 884	movl %esp,%eax;			\
 885	call fn;			\
 886	jmp ret_from_intr;		\
 887	CFI_ENDPROC;			\
 888ENDPROC(name)
 889
 890#define BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(name, nr, smp_##name)
 891
 892/* The include is where all of the SMP etc. interrupts come from */
 893#include <asm/entry_arch.h>
 894
 895ENTRY(coprocessor_error)
 896	RING0_INT_FRAME
 897	pushl $0
 898	CFI_ADJUST_CFA_OFFSET 4
 899	pushl $do_coprocessor_error
 900	CFI_ADJUST_CFA_OFFSET 4
 901	jmp error_code
 902	CFI_ENDPROC
 903END(coprocessor_error)
 904
 905ENTRY(simd_coprocessor_error)
 906	RING0_INT_FRAME
 907	pushl $0
 908	CFI_ADJUST_CFA_OFFSET 4
 909#ifdef CONFIG_X86_INVD_BUG
 910	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
 911661:	pushl $do_general_protection
 912662:
 913.section .altinstructions,"a"
 914	.balign 4
 915	.long 661b
 916	.long 663f
 917	.byte X86_FEATURE_XMM
 918	.byte 662b-661b
 919	.byte 664f-663f
 920.previous
 921.section .altinstr_replacement,"ax"
 922663:	pushl $do_simd_coprocessor_error
 923664:
 924.previous
 925#else
 926	pushl $do_simd_coprocessor_error
 927#endif
 928	CFI_ADJUST_CFA_OFFSET 4
 929	jmp error_code
 930	CFI_ENDPROC
 931END(simd_coprocessor_error)
 932
 933ENTRY(device_not_available)
 934	RING0_INT_FRAME
 935	pushl $-1			# mark this as an int
 936	CFI_ADJUST_CFA_OFFSET 4
 937	pushl $do_device_not_available
 938	CFI_ADJUST_CFA_OFFSET 4
 939	jmp error_code
 940	CFI_ENDPROC
 941END(device_not_available)
 942
 943#ifdef CONFIG_PARAVIRT
 944ENTRY(native_iret)
 945	iret
 946.section __ex_table,"a"
 947	.align 4
 948	.long native_iret, iret_exc
 949.previous
 950END(native_iret)
 951
 952ENTRY(native_irq_enable_sysexit)
 953	sti
 954	sysexit
 955END(native_irq_enable_sysexit)
 956#endif
 957
 958ENTRY(overflow)
 959	RING0_INT_FRAME
 960	pushl $0
 961	CFI_ADJUST_CFA_OFFSET 4
 962	pushl $do_overflow
 963	CFI_ADJUST_CFA_OFFSET 4
 964	jmp error_code
 965	CFI_ENDPROC
 966END(overflow)
 967
 968ENTRY(bounds)
 969	RING0_INT_FRAME
 970	pushl $0
 971	CFI_ADJUST_CFA_OFFSET 4
 972	pushl $do_bounds
 973	CFI_ADJUST_CFA_OFFSET 4
 974	jmp error_code
 975	CFI_ENDPROC
 976END(bounds)
 977
 978ENTRY(invalid_op)
 979	RING0_INT_FRAME
 980	pushl $0
 981	CFI_ADJUST_CFA_OFFSET 4
 982	pushl $do_invalid_op
 983	CFI_ADJUST_CFA_OFFSET 4
 984	jmp error_code
 985	CFI_ENDPROC
 986END(invalid_op)
 987
 988ENTRY(coprocessor_segment_overrun)
 989	RING0_INT_FRAME
 990	pushl $0
 991	CFI_ADJUST_CFA_OFFSET 4
 992	pushl $do_coprocessor_segment_overrun
 993	CFI_ADJUST_CFA_OFFSET 4
 994	jmp error_code
 995	CFI_ENDPROC
 996END(coprocessor_segment_overrun)
 997
 998ENTRY(invalid_TSS)
 999	RING0_EC_FRAME
1000	pushl $do_invalid_TSS
1001	CFI_ADJUST_CFA_OFFSET 4
1002	jmp error_code
1003	CFI_ENDPROC
1004END(invalid_TSS)
1005
1006ENTRY(segment_not_present)
1007	RING0_EC_FRAME
1008	pushl $do_segment_not_present
1009	CFI_ADJUST_CFA_OFFSET 4
1010	jmp error_code
1011	CFI_ENDPROC
1012END(segment_not_present)
1013
1014ENTRY(stack_segment)
1015	RING0_EC_FRAME
1016	pushl $do_stack_segment
1017	CFI_ADJUST_CFA_OFFSET 4
1018	jmp error_code
1019	CFI_ENDPROC
1020END(stack_segment)
1021
1022ENTRY(alignment_check)
1023	RING0_EC_FRAME
1024	pushl $do_alignment_check
1025	CFI_ADJUST_CFA_OFFSET 4
1026	jmp error_code
1027	CFI_ENDPROC
1028END(alignment_check)
1029
1030ENTRY(divide_error)
1031	RING0_INT_FRAME
1032	pushl $0			# no error code
1033	CFI_ADJUST_CFA_OFFSET 4
1034	pushl $do_divide_error
1035	CFI_ADJUST_CFA_OFFSET 4
1036	jmp error_code
1037	CFI_ENDPROC
1038END(divide_error)
1039
1040#ifdef CONFIG_X86_MCE
1041ENTRY(machine_check)
1042	RING0_INT_FRAME
1043	pushl $0
1044	CFI_ADJUST_CFA_OFFSET 4
1045	pushl machine_check_vector
1046	CFI_ADJUST_CFA_OFFSET 4
1047	jmp error_code
1048	CFI_ENDPROC
1049END(machine_check)
1050#endif
1051
1052ENTRY(spurious_interrupt_bug)
1053	RING0_INT_FRAME
1054	pushl $0
1055	CFI_ADJUST_CFA_OFFSET 4
1056	pushl $do_spurious_interrupt_bug
1057	CFI_ADJUST_CFA_OFFSET 4
1058	jmp error_code
1059	CFI_ENDPROC
1060END(spurious_interrupt_bug)
1061/*
1062 * End of kprobes section
1063 */
1064	.popsection
1065
1066ENTRY(kernel_thread_helper)
1067	pushl $0		# fake return address for unwinder
1068	CFI_STARTPROC
1069	movl %edi,%eax
1070	call *%esi
1071	call do_exit
1072	ud2			# padding for call trace
1073	CFI_ENDPROC
1074ENDPROC(kernel_thread_helper)
1075
1076#ifdef CONFIG_XEN
1077/* Xen doesn't set %esp to be precisely what the normal sysenter
1078   entrypoint expects, so fix it up before using the normal path. */
1079ENTRY(xen_sysenter_target)
1080	RING0_INT_FRAME
1081	addl $5*4, %esp		/* remove xen-provided frame */
1082	CFI_ADJUST_CFA_OFFSET -5*4
1083	jmp sysenter_past_esp
1084	CFI_ENDPROC
1085
1086ENTRY(xen_hypervisor_callback)
1087	CFI_STARTPROC
1088	pushl $0
1089	CFI_ADJUST_CFA_OFFSET 4
1090	SAVE_ALL
1091	TRACE_IRQS_OFF
1092
1093	/* Check to see if we got the event in the critical
1094	   region in xen_iret_direct, after we've reenabled
1095	   events and checked for pending events.  This simulates
1096	   iret instruction's behaviour where it delivers a
1097	   pending interrupt when enabling interrupts. */
1098	movl PT_EIP(%esp),%eax
1099	cmpl $xen_iret_start_crit,%eax
1100	jb   1f
1101	cmpl $xen_iret_end_crit,%eax
1102	jae  1f
1103
1104	jmp  xen_iret_crit_fixup
1105
1106ENTRY(xen_do_upcall)
11071:	mov %esp, %eax
1108	call xen_evtchn_do_upcall
1109	jmp  ret_from_intr
1110	CFI_ENDPROC
1111ENDPROC(xen_hypervisor_callback)
1112
1113# Hypervisor uses this for application faults while it executes.
1114# We get here for two reasons:
1115#  1. Fault while reloading DS, ES, FS or GS
1116#  2. Fault while executing IRET
1117# Category 1 we fix up by reattempting the load, and zeroing the segment
1118# register if the load fails.
1119# Category 2 we fix up by jumping to do_iret_error. We cannot use the
1120# normal Linux return path in this case because if we use the IRET hypercall
1121# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1122# We distinguish between categories by maintaining a status value in EAX.
1123ENTRY(xen_failsafe_callback)
1124	CFI_STARTPROC
1125	pushl %eax
1126	CFI_ADJUST_CFA_OFFSET 4
1127	movl $1,%eax
11281:	mov 4(%esp),%ds
11292:	mov 8(%esp),%es
11303:	mov 12(%esp),%fs
11314:	mov 16(%esp),%gs
1132	testl %eax,%eax
1133	popl %eax
1134	CFI_ADJUST_CFA_OFFSET -4
1135	lea 16(%esp),%esp
1136	CFI_ADJUST_CFA_OFFSET -16
1137	jz 5f
1138	addl $16,%esp
1139	jmp iret_exc		# EAX != 0 => Category 2 (Bad IRET)
11405:	pushl $0		# EAX == 0 => Category 1 (Bad segment)
1141	CFI_ADJUST_CFA_OFFSET 4
1142	SAVE_ALL
1143	jmp ret_from_exception
1144	CFI_ENDPROC
1145
1146.section .fixup,"ax"
11476:	xorl %eax,%eax
1148	movl %eax,4(%esp)
1149	jmp 1b
11507:	xorl %eax,%eax
1151	movl %eax,8(%esp)
1152	jmp 2b
11538:	xorl %eax,%eax
1154	movl %eax,12(%esp)
1155	jmp 3b
11569:	xorl %eax,%eax
1157	movl %eax,16(%esp)
1158	jmp 4b
1159.previous
1160.section __ex_table,"a"
1161	.align 4
1162	.long 1b,6b
1163	.long 2b,7b
1164	.long 3b,8b
1165	.long 4b,9b
1166.previous
1167ENDPROC(xen_failsafe_callback)
1168
1169#endif	/* CONFIG_XEN */
1170
1171#ifdef CONFIG_FUNCTION_TRACER
1172#ifdef CONFIG_DYNAMIC_FTRACE
1173
1174ENTRY(mcount)
1175	ret
1176END(mcount)
1177
1178ENTRY(ftrace_caller)
1179	cmpl $0, function_trace_stop
1180	jne  ftrace_stub
1181
1182	pushl %eax
1183	pushl %ecx
1184	pushl %edx
1185	movl 0xc(%esp), %eax
1186	movl 0x4(%ebp), %edx
1187	subl $MCOUNT_INSN_SIZE, %eax
1188
1189.globl ftrace_call
1190ftrace_call:
1191	call ftrace_stub
1192
1193	popl %edx
1194	popl %ecx
1195	popl %eax
1196#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1197.globl ftrace_graph_call
1198ftrace_graph_call:
1199	jmp ftrace_stub
1200#endif
1201
1202.globl ftrace_stub
1203ftrace_stub:
1204	ret
1205END(ftrace_caller)
1206
1207#else /* ! CONFIG_DYNAMIC_FTRACE */
1208
1209ENTRY(mcount)
1210	cmpl $0, function_trace_stop
1211	jne  ftrace_stub
1212
1213	cmpl $ftrace_stub, ftrace_trace_function
1214	jnz trace
1215#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1216	cmpl $ftrace_stub, ftrace_graph_return
1217	jnz ftrace_graph_caller
1218
1219	cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
1220	jnz ftrace_graph_caller
1221#endif
1222.globl ftrace_stub
1223ftrace_stub:
1224	ret
1225
1226	/* taken from glibc */
1227trace:
1228	pushl %eax
1229	pushl %ecx
1230	pushl %edx
1231	movl 0xc(%esp), %eax
1232	movl 0x4(%ebp), %edx
1233	subl $MCOUNT_INSN_SIZE, %eax
1234
1235	call *ftrace_trace_function
1236
1237	popl %edx
1238	popl %ecx
1239	popl %eax
1240	jmp ftrace_stub
1241END(mcount)
1242#endif /* CONFIG_DYNAMIC_FTRACE */
1243#endif /* CONFIG_FUNCTION_TRACER */
1244
1245#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1246ENTRY(ftrace_graph_caller)
1247	cmpl $0, function_trace_stop
1248	jne ftrace_stub
1249
1250	pushl %eax
1251	pushl %ecx
1252	pushl %edx
1253	movl 0xc(%esp), %edx
1254	lea 0x4(%ebp), %eax
1255	movl (%ebp), %ecx
1256	subl $MCOUNT_INSN_SIZE, %edx
1257	call prepare_ftrace_return
1258	popl %edx
1259	popl %ecx
1260	popl %eax
1261	ret
1262END(ftrace_graph_caller)
1263
1264.globl return_to_handler
1265return_to_handler:
1266	pushl %eax
1267	pushl %edx
1268	movl %ebp, %eax
1269	call ftrace_return_to_handler
1270	movl %eax, %ecx
1271	popl %edx
1272	popl %eax
1273	jmp *%ecx
1274#endif
1275
1276.section .rodata,"a"
1277#include "syscall_table_32.S"
1278
1279syscall_table_size=(.-sys_call_table)
1280
1281/*
1282 * Some functions should be protected against kprobes
1283 */
1284	.pushsection .kprobes.text, "ax"
1285
1286ENTRY(page_fault)
1287	RING0_EC_FRAME
1288	pushl $do_page_fault
1289	CFI_ADJUST_CFA_OFFSET 4
1290	ALIGN
1291error_code:
1292	/* the function address is in %gs's slot on the stack */
1293	pushl %fs
1294	CFI_ADJUST_CFA_OFFSET 4
1295	/*CFI_REL_OFFSET fs, 0*/
1296	pushl %es
1297	CFI_ADJUST_CFA_OFFSET 4
1298	/*CFI_REL_OFFSET es, 0*/
1299	pushl %ds
1300	CFI_ADJUST_CFA_OFFSET 4
1301	/*CFI_REL_OFFSET ds, 0*/
1302	pushl %eax
1303	CFI_ADJUST_CFA_OFFSET 4
1304	CFI_REL_OFFSET eax, 0
1305	pushl %ebp
1306	CFI_ADJUST_CFA_OFFSET 4
1307	CFI_REL_OFFSET ebp, 0
1308	pushl %edi
1309	CFI_ADJUST_CFA_OFFSET 4
1310	CFI_REL_OFFSET edi, 0
1311	pushl %esi
1312	CFI_ADJUST_CFA_OFFSET 4
1313	CFI_REL_OFFSET esi, 0
1314	pushl %edx
1315	CFI_ADJUST_CFA_OFFSET 4
1316	CFI_REL_OFFSET edx, 0
1317	pushl %ecx
1318	CFI_ADJUST_CFA_OFFSET 4
1319	CFI_REL_OFFSET ecx, 0
1320	pushl %ebx
1321	CFI_ADJUST_CFA_OFFSET 4
1322	CFI_REL_OFFSET ebx, 0
1323	cld
1324	movl $(__KERNEL_PERCPU), %ecx
1325	movl %ecx, %fs
1326	UNWIND_ESPFIX_STACK
1327	GS_TO_REG %ecx
1328	movl PT_GS(%esp), %edi		# get the function address
1329	movl PT_ORIG_EAX(%esp), %edx	# get the error code
1330	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
1331	REG_TO_PTGS %ecx
1332	SET_KERNEL_GS %ecx
1333	movl $(__USER_DS), %ecx
1334	movl %ecx, %ds
1335	movl %ecx, %es
1336	TRACE_IRQS_OFF
1337	movl %esp,%eax			# pt_regs pointer
1338	call *%edi
1339	jmp ret_from_exception
1340	CFI_ENDPROC
1341END(page_fault)
1342
1343/*
1344 * Debug traps and NMI can happen at the one SYSENTER instruction
1345 * that sets up the real kernel stack. Check here, since we can't
1346 * allow the wrong stack to be used.
1347 *
1348 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
1349 * already pushed 3 words if it hits on the sysenter instruction:
1350 * eflags, cs and eip.
1351 *
1352 * We just load the right stack, and push the three (known) values
1353 * by hand onto the new stack - while updating the return eip past
1354 * the instruction that would have done it for sysenter.
1355 */
1356.macro FIX_STACK offset ok label
1357	cmpw $__KERNEL_CS, 4(%esp)
1358	jne \ok
1359\label:
1360	movl TSS_sysenter_sp0 + \offset(%esp), %esp
1361	CFI_DEF_CFA esp, 0
1362	CFI_UNDEFINED eip
1363	pushfl
1364	CFI_ADJUST_CFA_OFFSET 4
1365	pushl $__KERNEL_CS
1366	CFI_ADJUST_CFA_OFFSET 4
1367	pushl $sysenter_past_esp
1368	CFI_ADJUST_CFA_OFFSET 4
1369	CFI_REL_OFFSET eip, 0
1370.endm
1371
1372ENTRY(debug)
1373	RING0_INT_FRAME
1374	cmpl $ia32_sysenter_target,(%esp)
1375	jne debug_stack_correct
1376	FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1377debug_stack_correct:
1378	pushl $-1			# mark this as an int
1379	CFI_ADJUST_CFA_OFFSET 4
1380	SAVE_ALL
1381	TRACE_IRQS_OFF
1382	xorl %edx,%edx			# error code 0
1383	movl %esp,%eax			# pt_regs pointer
1384	call do_debug
1385	jmp ret_from_exception
1386	CFI_ENDPROC
1387END(debug)
1388
1389/*
1390 * NMI is doubly nasty. It can happen _while_ we're handling
1391 * a debug fault, and the debug fault hasn't yet been able to
1392 * clear up the stack. So we first check whether we got  an
1393 * NMI on the sysenter entry path, but after that we need to
1394 * check whether we got an NMI on the debug path where the debug
1395 * fault happened on the sysenter path.
1396 */
1397ENTRY(nmi)
1398	RING0_INT_FRAME
1399	pushl %eax
1400	CFI_ADJUST_CFA_OFFSET 4
1401	movl %ss, %eax
1402	cmpw $__ESPFIX_SS, %ax
1403	popl %eax
1404	CFI_ADJUST_CFA_OFFSET -4
1405	je nmi_espfix_stack
1406	cmpl $ia32_sysenter_target,(%esp)
1407	je nmi_stack_fixup
1408	pushl %eax
1409	CFI_ADJUST_CFA_OFFSET 4
1410	movl %esp,%eax
1411	/* Do not access memory above the end of our stack page,
1412	 * it might not exist.
1413	 */
1414	andl $(THREAD_SIZE-1),%eax
1415	cmpl $(THREAD_SIZE-20),%eax
1416	popl %eax
1417	CFI_ADJUST_CFA_OFFSET -4
1418	jae nmi_stack_correct
1419	cmpl $ia32_sysenter_target,12(%esp)
1420	je nmi_debug_stack_check
1421nmi_stack_correct:
1422	/* We have a RING0_INT_FRAME here */
1423	pushl %eax
1424	CFI_ADJUST_CFA_OFFSET 4
1425	SAVE_ALL
1426	xorl %edx,%edx		# zero error code
1427	movl %esp,%eax		# pt_regs pointer
1428	call do_nmi
1429	jmp restore_all_notrace
1430	CFI_ENDPROC
1431
1432nmi_stack_fixup:
1433	RING0_INT_FRAME
1434	FIX_STACK 12, nmi_stack_correct, 1
1435	jmp nmi_stack_correct
1436
1437nmi_debug_stack_check:
1438	/* We have a RING0_INT_FRAME here */
1439	cmpw $__KERNEL_CS,16(%esp)
1440	jne nmi_stack_correct
1441	cmpl $debug,(%esp)
1442	jb nmi_stack_correct
1443	cmpl $debug_esp_fix_insn,(%esp)
1444	ja nmi_stack_correct
1445	FIX_STACK 24, nmi_stack_correct, 1
1446	jmp nmi_stack_correct
1447
1448nmi_espfix_stack:
1449	/* We have a RING0_INT_FRAME here.
1450	 *
1451	 * create the pointer to lss back
1452	 */
1453	pushl %ss
1454	CFI_ADJUST_CFA_OFFSET 4
1455	pushl %esp
1456	CFI_ADJUST_CFA_OFFSET 4
1457	addl $4, (%esp)
1458	/* copy the iret frame of 12 bytes */
1459	.rept 3
1460	pushl 16(%esp)
1461	CFI_ADJUST_CFA_OFFSET 4
1462	.endr
1463	pushl %eax
1464	CFI_ADJUST_CFA_OFFSET 4
1465	SAVE_ALL
1466	FIXUP_ESPFIX_STACK		# %eax == %esp
1467	xorl %edx,%edx			# zero error code
1468	call do_nmi
1469	RESTORE_REGS
1470	lss 12+4(%esp), %esp		# back to espfix stack
1471	CFI_ADJUST_CFA_OFFSET -24
1472	jmp irq_return
1473	CFI_ENDPROC
1474END(nmi)
1475
1476ENTRY(int3)
1477	RING0_INT_FRAME
1478	pushl $-1			# mark this as an int
1479	CFI_ADJUST_CFA_OFFSET 4
1480	SAVE_ALL
1481	TRACE_IRQS_OFF
1482	xorl %edx,%edx		# zero error code
1483	movl %esp,%eax		# pt_regs pointer
1484	call do_int3
1485	jmp ret_from_exception
1486	CFI_ENDPROC
1487END(int3)
1488
1489ENTRY(general_protection)
1490	RING0_EC_FRAME
1491	pushl $do_general_protection
1492	CFI_ADJUST_CFA_OFFSET 4
1493	jmp error_code
1494	CFI_ENDPROC
1495END(general_protection)
1496
1497/*
1498 * End of kprobes section
1499 */
1500	.popsection