PageRenderTime 75ms CodeModel.GetById 14ms app.highlight 56ms RepoModel.GetById 1ms app.codeStats 0ms

/arch/x86/kernel/process_64.c

https://gitlab.com/Team-OSE-old/SimpleKernel
C | 556 lines | 399 code | 63 blank | 94 comment | 55 complexity | e1e06f54deb1f3bf5d0f586970856dd7 MD5 | raw file
  1/*
  2 *  Copyright (C) 1995  Linus Torvalds
  3 *
  4 *  Pentium III FXSR, SSE support
  5 *	Gareth Hughes <gareth@valinux.com>, May 2000
  6 *
  7 *  X86-64 port
  8 *	Andi Kleen.
  9 *
 10 *	CPU hotplug support - ashok.raj@intel.com
 11 */
 12
 13/*
 14 * This file handles the architecture-dependent parts of process handling..
 15 */
 16
 17#include <linux/cpu.h>
 18#include <linux/errno.h>
 19#include <linux/sched.h>
 20#include <linux/fs.h>
 21#include <linux/kernel.h>
 22#include <linux/mm.h>
 23#include <linux/elfcore.h>
 24#include <linux/smp.h>
 25#include <linux/slab.h>
 26#include <linux/user.h>
 27#include <linux/interrupt.h>
 28#include <linux/delay.h>
 29#include <linux/module.h>
 30#include <linux/ptrace.h>
 31#include <linux/notifier.h>
 32#include <linux/kprobes.h>
 33#include <linux/kdebug.h>
 34#include <linux/prctl.h>
 35#include <linux/uaccess.h>
 36#include <linux/io.h>
 37#include <linux/ftrace.h>
 38
 39#include <asm/pgtable.h>
 40#include <asm/processor.h>
 41#include <asm/i387.h>
 42#include <asm/fpu-internal.h>
 43#include <asm/mmu_context.h>
 44#include <asm/prctl.h>
 45#include <asm/desc.h>
 46#include <asm/proto.h>
 47#include <asm/ia32.h>
 48#include <asm/idle.h>
 49#include <asm/syscalls.h>
 50#include <asm/debugreg.h>
 51#include <asm/switch_to.h>
 52
 53asmlinkage extern void ret_from_fork(void);
 54
 55DEFINE_PER_CPU(unsigned long, old_rsp);
 56
 57/* Prints also some state that isn't saved in the pt_regs */
 58void __show_regs(struct pt_regs *regs, int all)
 59{
 60	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 61	unsigned long d0, d1, d2, d3, d6, d7;
 62	unsigned int fsindex, gsindex;
 63	unsigned int ds, cs, es;
 64
 65	show_regs_common();
 66	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
 67	printk_address(regs->ip, 1);
 68	printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
 69			regs->sp, regs->flags);
 70	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 71	       regs->ax, regs->bx, regs->cx);
 72	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
 73	       regs->dx, regs->si, regs->di);
 74	printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
 75	       regs->bp, regs->r8, regs->r9);
 76	printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
 77	       regs->r10, regs->r11, regs->r12);
 78	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
 79	       regs->r13, regs->r14, regs->r15);
 80
 81	asm("movl %%ds,%0" : "=r" (ds));
 82	asm("movl %%cs,%0" : "=r" (cs));
 83	asm("movl %%es,%0" : "=r" (es));
 84	asm("movl %%fs,%0" : "=r" (fsindex));
 85	asm("movl %%gs,%0" : "=r" (gsindex));
 86
 87	rdmsrl(MSR_FS_BASE, fs);
 88	rdmsrl(MSR_GS_BASE, gs);
 89	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
 90
 91	if (!all)
 92		return;
 93
 94	cr0 = read_cr0();
 95	cr2 = read_cr2();
 96	cr3 = read_cr3();
 97	cr4 = read_cr4();
 98
 99	printk(KERN_DEFAULT "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
100	       fs, fsindex, gs, gsindex, shadowgs);
101	printk(KERN_DEFAULT "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
102			es, cr0);
103	printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
104			cr4);
105
106	get_debugreg(d0, 0);
107	get_debugreg(d1, 1);
108	get_debugreg(d2, 2);
109	printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
110	get_debugreg(d3, 3);
111	get_debugreg(d6, 6);
112	get_debugreg(d7, 7);
113	printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
114}
115
116void release_thread(struct task_struct *dead_task)
117{
118	if (dead_task->mm) {
119		if (dead_task->mm->context.size) {
120			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
121					dead_task->comm,
122					dead_task->mm->context.ldt,
123					dead_task->mm->context.size);
124			BUG();
125		}
126	}
127}
128
129static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
130{
131	struct user_desc ud = {
132		.base_addr = addr,
133		.limit = 0xfffff,
134		.seg_32bit = 1,
135		.limit_in_pages = 1,
136		.useable = 1,
137	};
138	struct desc_struct *desc = t->thread.tls_array;
139	desc += tls;
140	fill_ldt(desc, &ud);
141}
142
143static inline u32 read_32bit_tls(struct task_struct *t, int tls)
144{
145	return get_desc_base(&t->thread.tls_array[tls]);
146}
147
148/*
149 * This gets called before we allocate a new thread and copy
150 * the current task into it.
151 */
152void prepare_to_copy(struct task_struct *tsk)
153{
154	unlazy_fpu(tsk);
155}
156
157int copy_thread(unsigned long clone_flags, unsigned long sp,
158		unsigned long unused,
159	struct task_struct *p, struct pt_regs *regs)
160{
161	int err;
162	struct pt_regs *childregs;
163	struct task_struct *me = current;
164
165	childregs = ((struct pt_regs *)
166			(THREAD_SIZE + task_stack_page(p))) - 1;
167	*childregs = *regs;
168
169	childregs->ax = 0;
170	if (user_mode(regs))
171		childregs->sp = sp;
172	else
173		childregs->sp = (unsigned long)childregs;
174
175	p->thread.sp = (unsigned long) childregs;
176	p->thread.sp0 = (unsigned long) (childregs+1);
177	p->thread.usersp = me->thread.usersp;
178
179	set_tsk_thread_flag(p, TIF_FORK);
180
181	p->fpu_counter = 0;
182	p->thread.io_bitmap_ptr = NULL;
183
184	savesegment(gs, p->thread.gsindex);
185	p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
186	savesegment(fs, p->thread.fsindex);
187	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
188	savesegment(es, p->thread.es);
189	savesegment(ds, p->thread.ds);
190
191	err = -ENOMEM;
192	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
193
194	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
195		p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
196						  IO_BITMAP_BYTES, GFP_KERNEL);
197		if (!p->thread.io_bitmap_ptr) {
198			p->thread.io_bitmap_max = 0;
199			return -ENOMEM;
200		}
201		set_tsk_thread_flag(p, TIF_IO_BITMAP);
202	}
203
204	/*
205	 * Set a new TLS for the child thread?
206	 */
207	if (clone_flags & CLONE_SETTLS) {
208#ifdef CONFIG_IA32_EMULATION
209		if (test_thread_flag(TIF_IA32))
210			err = do_set_thread_area(p, -1,
211				(struct user_desc __user *)childregs->si, 0);
212		else
213#endif
214			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
215		if (err)
216			goto out;
217	}
218	err = 0;
219out:
220	if (err && p->thread.io_bitmap_ptr) {
221		kfree(p->thread.io_bitmap_ptr);
222		p->thread.io_bitmap_max = 0;
223	}
224
225	return err;
226}
227
228static void
229start_thread_common(struct pt_regs *regs, unsigned long new_ip,
230		    unsigned long new_sp,
231		    unsigned int _cs, unsigned int _ss, unsigned int _ds)
232{
233	loadsegment(fs, 0);
234	loadsegment(es, _ds);
235	loadsegment(ds, _ds);
236	load_gs_index(0);
237	current->thread.usersp	= new_sp;
238	regs->ip		= new_ip;
239	regs->sp		= new_sp;
240	percpu_write(old_rsp, new_sp);
241	regs->cs		= _cs;
242	regs->ss		= _ss;
243	regs->flags		= X86_EFLAGS_IF;
244	/*
245	 * Free the old FP and other extended state
246	 */
247	free_thread_xstate(current);
248}
249
250void
251start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
252{
253	start_thread_common(regs, new_ip, new_sp,
254			    __USER_CS, __USER_DS, 0);
255}
256
257#ifdef CONFIG_IA32_EMULATION
258void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
259{
260	start_thread_common(regs, new_ip, new_sp,
261			    test_thread_flag(TIF_X32)
262			    ? __USER_CS : __USER32_CS,
263			    __USER_DS, __USER_DS);
264}
265#endif
266
267/*
268 *	switch_to(x,y) should switch tasks from x to y.
269 *
270 * This could still be optimized:
271 * - fold all the options into a flag word and test it with a single test.
272 * - could test fs/gs bitsliced
273 *
274 * Kprobes not supported here. Set the probe on schedule instead.
275 * Function graph tracer not supported too.
276 */
277__notrace_funcgraph struct task_struct *
278__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
279{
280	struct thread_struct *prev = &prev_p->thread;
281	struct thread_struct *next = &next_p->thread;
282	int cpu = smp_processor_id();
283	struct tss_struct *tss = &per_cpu(init_tss, cpu);
284	unsigned fsindex, gsindex;
285	fpu_switch_t fpu;
286
287	fpu = switch_fpu_prepare(prev_p, next_p, cpu);
288
289	/*
290	 * Reload esp0, LDT and the page table pointer:
291	 */
292	load_sp0(tss, next);
293
294	/*
295	 * Switch DS and ES.
296	 * This won't pick up thread selector changes, but I guess that is ok.
297	 */
298	savesegment(es, prev->es);
299	if (unlikely(next->es | prev->es))
300		loadsegment(es, next->es);
301
302	savesegment(ds, prev->ds);
303	if (unlikely(next->ds | prev->ds))
304		loadsegment(ds, next->ds);
305
306
307	/* We must save %fs and %gs before load_TLS() because
308	 * %fs and %gs may be cleared by load_TLS().
309	 *
310	 * (e.g. xen_load_tls())
311	 */
312	savesegment(fs, fsindex);
313	savesegment(gs, gsindex);
314
315	load_TLS(next, cpu);
316
317	/*
318	 * Leave lazy mode, flushing any hypercalls made here.
319	 * This must be done before restoring TLS segments so
320	 * the GDT and LDT are properly updated, and must be
321	 * done before math_state_restore, so the TS bit is up
322	 * to date.
323	 */
324	arch_end_context_switch(next_p);
325
326	/*
327	 * Switch FS and GS.
328	 *
329	 * Segment register != 0 always requires a reload.  Also
330	 * reload when it has changed.  When prev process used 64bit
331	 * base always reload to avoid an information leak.
332	 */
333	if (unlikely(fsindex | next->fsindex | prev->fs)) {
334		loadsegment(fs, next->fsindex);
335		/*
336		 * Check if the user used a selector != 0; if yes
337		 *  clear 64bit base, since overloaded base is always
338		 *  mapped to the Null selector
339		 */
340		if (fsindex)
341			prev->fs = 0;
342	}
343	/* when next process has a 64bit base use it */
344	if (next->fs)
345		wrmsrl(MSR_FS_BASE, next->fs);
346	prev->fsindex = fsindex;
347
348	if (unlikely(gsindex | next->gsindex | prev->gs)) {
349		load_gs_index(next->gsindex);
350		if (gsindex)
351			prev->gs = 0;
352	}
353	if (next->gs)
354		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
355	prev->gsindex = gsindex;
356
357	switch_fpu_finish(next_p, fpu);
358
359	/*
360	 * Switch the PDA and FPU contexts.
361	 */
362	prev->usersp = percpu_read(old_rsp);
363	percpu_write(old_rsp, next->usersp);
364	percpu_write(current_task, next_p);
365
366	percpu_write(kernel_stack,
367		  (unsigned long)task_stack_page(next_p) +
368		  THREAD_SIZE - KERNEL_STACK_OFFSET);
369
370	/*
371	 * Now maybe reload the debug registers and handle I/O bitmaps
372	 */
373	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
374		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
375		__switch_to_xtra(prev_p, next_p, tss);
376
377	return prev_p;
378}
379
380void set_personality_64bit(void)
381{
382	/* inherit personality from parent */
383
384	/* Make sure to be in 64bit mode */
385	clear_thread_flag(TIF_IA32);
386	clear_thread_flag(TIF_ADDR32);
387	clear_thread_flag(TIF_X32);
388
389	/* Ensure the corresponding mm is not marked. */
390	if (current->mm)
391		current->mm->context.ia32_compat = 0;
392
393	/* TBD: overwrites user setup. Should have two bits.
394	   But 64bit processes have always behaved this way,
395	   so it's not too bad. The main problem is just that
396	   32bit childs are affected again. */
397	current->personality &= ~READ_IMPLIES_EXEC;
398}
399
400void set_personality_ia32(bool x32)
401{
402	/* inherit personality from parent */
403
404	/* Make sure to be in 32bit mode */
405	set_thread_flag(TIF_ADDR32);
406
407	/* Mark the associated mm as containing 32-bit tasks. */
408	if (current->mm)
409		current->mm->context.ia32_compat = 1;
410
411	if (x32) {
412		clear_thread_flag(TIF_IA32);
413		set_thread_flag(TIF_X32);
414		current->personality &= ~READ_IMPLIES_EXEC;
415		/* is_compat_task() uses the presence of the x32
416		   syscall bit flag to determine compat status */
417		current_thread_info()->status &= ~TS_COMPAT;
418	} else {
419		set_thread_flag(TIF_IA32);
420		clear_thread_flag(TIF_X32);
421		current->personality |= force_personality32;
422		/* Prepare the first "return" to user space */
423		current_thread_info()->status |= TS_COMPAT;
424	}
425}
426EXPORT_SYMBOL_GPL(set_personality_ia32);
427
428unsigned long get_wchan(struct task_struct *p)
429{
430	unsigned long stack;
431	u64 fp, ip;
432	int count = 0;
433
434	if (!p || p == current || p->state == TASK_RUNNING)
435		return 0;
436	stack = (unsigned long)task_stack_page(p);
437	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
438		return 0;
439	fp = *(u64 *)(p->thread.sp);
440	do {
441		if (fp < (unsigned long)stack ||
442		    fp >= (unsigned long)stack+THREAD_SIZE)
443			return 0;
444		ip = *(u64 *)(fp+8);
445		if (!in_sched_functions(ip))
446			return ip;
447		fp = *(u64 *)fp;
448	} while (count++ < 16);
449	return 0;
450}
451
452long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
453{
454	int ret = 0;
455	int doit = task == current;
456	int cpu;
457
458	switch (code) {
459	case ARCH_SET_GS:
460		if (addr >= TASK_SIZE_OF(task))
461			return -EPERM;
462		cpu = get_cpu();
463		/* handle small bases via the GDT because that's faster to
464		   switch. */
465		if (addr <= 0xffffffff) {
466			set_32bit_tls(task, GS_TLS, addr);
467			if (doit) {
468				load_TLS(&task->thread, cpu);
469				load_gs_index(GS_TLS_SEL);
470			}
471			task->thread.gsindex = GS_TLS_SEL;
472			task->thread.gs = 0;
473		} else {
474			task->thread.gsindex = 0;
475			task->thread.gs = addr;
476			if (doit) {
477				load_gs_index(0);
478				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
479			}
480		}
481		put_cpu();
482		break;
483	case ARCH_SET_FS:
484		/* Not strictly needed for fs, but do it for symmetry
485		   with gs */
486		if (addr >= TASK_SIZE_OF(task))
487			return -EPERM;
488		cpu = get_cpu();
489		/* handle small bases via the GDT because that's faster to
490		   switch. */
491		if (addr <= 0xffffffff) {
492			set_32bit_tls(task, FS_TLS, addr);
493			if (doit) {
494				load_TLS(&task->thread, cpu);
495				loadsegment(fs, FS_TLS_SEL);
496			}
497			task->thread.fsindex = FS_TLS_SEL;
498			task->thread.fs = 0;
499		} else {
500			task->thread.fsindex = 0;
501			task->thread.fs = addr;
502			if (doit) {
503				/* set the selector to 0 to not confuse
504				   __switch_to */
505				loadsegment(fs, 0);
506				ret = checking_wrmsrl(MSR_FS_BASE, addr);
507			}
508		}
509		put_cpu();
510		break;
511	case ARCH_GET_FS: {
512		unsigned long base;
513		if (task->thread.fsindex == FS_TLS_SEL)
514			base = read_32bit_tls(task, FS_TLS);
515		else if (doit)
516			rdmsrl(MSR_FS_BASE, base);
517		else
518			base = task->thread.fs;
519		ret = put_user(base, (unsigned long __user *)addr);
520		break;
521	}
522	case ARCH_GET_GS: {
523		unsigned long base;
524		unsigned gsindex;
525		if (task->thread.gsindex == GS_TLS_SEL)
526			base = read_32bit_tls(task, GS_TLS);
527		else if (doit) {
528			savesegment(gs, gsindex);
529			if (gsindex)
530				rdmsrl(MSR_KERNEL_GS_BASE, base);
531			else
532				base = task->thread.gs;
533		} else
534			base = task->thread.gs;
535		ret = put_user(base, (unsigned long __user *)addr);
536		break;
537	}
538
539	default:
540		ret = -EINVAL;
541		break;
542	}
543
544	return ret;
545}
546
547long sys_arch_prctl(int code, unsigned long addr)
548{
549	return do_arch_prctl(current, code, addr);
550}
551
552unsigned long KSTK_ESP(struct task_struct *task)
553{
554	return (test_tsk_thread_flag(task, TIF_IA32)) ?
555			(task_pt_regs(task)->sp) : ((task)->thread.usersp);
556}