PageRenderTime 50ms CodeModel.GetById 7ms app.highlight 37ms RepoModel.GetById 1ms app.codeStats 1ms

/arch/i386/kernel/ptrace.c

https://bitbucket.org/evzijst/gittest
C | 717 lines | 490 code | 90 blank | 137 comment | 90 complexity | d28b75151044cc972e0f1aa0d2486785 MD5 | raw file
  1/* ptrace.c */
  2/* By Ross Biro 1/23/92 */
  3/*
  4 * Pentium III FXSR, SSE support
  5 *	Gareth Hughes <gareth@valinux.com>, May 2000
  6 */
  7
  8#include <linux/kernel.h>
  9#include <linux/sched.h>
 10#include <linux/mm.h>
 11#include <linux/smp.h>
 12#include <linux/smp_lock.h>
 13#include <linux/errno.h>
 14#include <linux/ptrace.h>
 15#include <linux/user.h>
 16#include <linux/security.h>
 17#include <linux/audit.h>
 18#include <linux/seccomp.h>
 19
 20#include <asm/uaccess.h>
 21#include <asm/pgtable.h>
 22#include <asm/system.h>
 23#include <asm/processor.h>
 24#include <asm/i387.h>
 25#include <asm/debugreg.h>
 26#include <asm/ldt.h>
 27#include <asm/desc.h>
 28
 29/*
 30 * does not yet catch signals sent when the child dies.
 31 * in exit.c or in signal.c.
 32 */
 33
 34/* determines which flags the user has access to. */
 35/* 1 = access 0 = no access */
 36#define FLAG_MASK 0x00044dd5
 37
 38/* set's the trap flag. */
 39#define TRAP_FLAG 0x100
 40
 41/*
 42 * Offset of eflags on child stack..
 43 */
 44#define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs))
 45
 46static inline struct pt_regs *get_child_regs(struct task_struct *task)
 47{
 48	void *stack_top = (void *)task->thread.esp0;
 49	return stack_top - sizeof(struct pt_regs);
 50}
 51
 52/*
 53 * this routine will get a word off of the processes privileged stack. 
 54 * the offset is how far from the base addr as stored in the TSS.  
 55 * this routine assumes that all the privileged stacks are in our
 56 * data space.
 57 */   
 58static inline int get_stack_long(struct task_struct *task, int offset)
 59{
 60	unsigned char *stack;
 61
 62	stack = (unsigned char *)task->thread.esp0;
 63	stack += offset;
 64	return (*((int *)stack));
 65}
 66
 67/*
 68 * this routine will put a word on the processes privileged stack. 
 69 * the offset is how far from the base addr as stored in the TSS.  
 70 * this routine assumes that all the privileged stacks are in our
 71 * data space.
 72 */
 73static inline int put_stack_long(struct task_struct *task, int offset,
 74	unsigned long data)
 75{
 76	unsigned char * stack;
 77
 78	stack = (unsigned char *) task->thread.esp0;
 79	stack += offset;
 80	*(unsigned long *) stack = data;
 81	return 0;
 82}
 83
 84static int putreg(struct task_struct *child,
 85	unsigned long regno, unsigned long value)
 86{
 87	switch (regno >> 2) {
 88		case FS:
 89			if (value && (value & 3) != 3)
 90				return -EIO;
 91			child->thread.fs = value;
 92			return 0;
 93		case GS:
 94			if (value && (value & 3) != 3)
 95				return -EIO;
 96			child->thread.gs = value;
 97			return 0;
 98		case DS:
 99		case ES:
100			if (value && (value & 3) != 3)
101				return -EIO;
102			value &= 0xffff;
103			break;
104		case SS:
105		case CS:
106			if ((value & 3) != 3)
107				return -EIO;
108			value &= 0xffff;
109			break;
110		case EFL:
111			value &= FLAG_MASK;
112			value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK;
113			break;
114	}
115	if (regno > GS*4)
116		regno -= 2*4;
117	put_stack_long(child, regno - sizeof(struct pt_regs), value);
118	return 0;
119}
120
121static unsigned long getreg(struct task_struct *child,
122	unsigned long regno)
123{
124	unsigned long retval = ~0UL;
125
126	switch (regno >> 2) {
127		case FS:
128			retval = child->thread.fs;
129			break;
130		case GS:
131			retval = child->thread.gs;
132			break;
133		case DS:
134		case ES:
135		case SS:
136		case CS:
137			retval = 0xffff;
138			/* fall through */
139		default:
140			if (regno > GS*4)
141				regno -= 2*4;
142			regno = regno - sizeof(struct pt_regs);
143			retval &= get_stack_long(child, regno);
144	}
145	return retval;
146}
147
148#define LDT_SEGMENT 4
149
150static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_regs *regs)
151{
152	unsigned long addr, seg;
153
154	addr = regs->eip;
155	seg = regs->xcs & 0xffff;
156	if (regs->eflags & VM_MASK) {
157		addr = (addr & 0xffff) + (seg << 4);
158		return addr;
159	}
160
161	/*
162	 * We'll assume that the code segments in the GDT
163	 * are all zero-based. That is largely true: the
164	 * TLS segments are used for data, and the PNPBIOS
165	 * and APM bios ones we just ignore here.
166	 */
167	if (seg & LDT_SEGMENT) {
168		u32 *desc;
169		unsigned long base;
170
171		down(&child->mm->context.sem);
172		desc = child->mm->context.ldt + (seg & ~7);
173		base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000);
174
175		/* 16-bit code segment? */
176		if (!((desc[1] >> 22) & 1))
177			addr &= 0xffff;
178		addr += base;
179		up(&child->mm->context.sem);
180	}
181	return addr;
182}
183
184static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs)
185{
186	int i, copied;
187	unsigned char opcode[16];
188	unsigned long addr = convert_eip_to_linear(child, regs);
189
190	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
191	for (i = 0; i < copied; i++) {
192		switch (opcode[i]) {
193		/* popf */
194		case 0x9d:
195			return 1;
196		/* opcode and address size prefixes */
197		case 0x66: case 0x67:
198			continue;
199		/* irrelevant prefixes (segment overrides and repeats) */
200		case 0x26: case 0x2e:
201		case 0x36: case 0x3e:
202		case 0x64: case 0x65:
203		case 0xf0: case 0xf2: case 0xf3:
204			continue;
205
206		/*
207		 * pushf: NOTE! We should probably not let
208		 * the user see the TF bit being set. But
209		 * it's more pain than it's worth to avoid
210		 * it, and a debugger could emulate this
211		 * all in user space if it _really_ cares.
212		 */
213		case 0x9c:
214		default:
215			return 0;
216		}
217	}
218	return 0;
219}
220
221static void set_singlestep(struct task_struct *child)
222{
223	struct pt_regs *regs = get_child_regs(child);
224
225	/*
226	 * Always set TIF_SINGLESTEP - this guarantees that 
227	 * we single-step system calls etc..  This will also
228	 * cause us to set TF when returning to user mode.
229	 */
230	set_tsk_thread_flag(child, TIF_SINGLESTEP);
231
232	/*
233	 * If TF was already set, don't do anything else
234	 */
235	if (regs->eflags & TRAP_FLAG)
236		return;
237
238	/* Set TF on the kernel stack.. */
239	regs->eflags |= TRAP_FLAG;
240
241	/*
242	 * ..but if TF is changed by the instruction we will trace,
243	 * don't mark it as being "us" that set it, so that we
244	 * won't clear it by hand later.
245	 */
246	if (is_at_popf(child, regs))
247		return;
248	
249	child->ptrace |= PT_DTRACE;
250}
251
252static void clear_singlestep(struct task_struct *child)
253{
254	/* Always clear TIF_SINGLESTEP... */
255	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
256
257	/* But touch TF only if it was set by us.. */
258	if (child->ptrace & PT_DTRACE) {
259		struct pt_regs *regs = get_child_regs(child);
260		regs->eflags &= ~TRAP_FLAG;
261		child->ptrace &= ~PT_DTRACE;
262	}
263}
264
265/*
266 * Called by kernel/ptrace.c when detaching..
267 *
268 * Make sure the single step bit is not set.
269 */
270void ptrace_disable(struct task_struct *child)
271{ 
272	clear_singlestep(child);
273}
274
275/*
276 * Perform get_thread_area on behalf of the traced child.
277 */
278static int
279ptrace_get_thread_area(struct task_struct *child,
280		       int idx, struct user_desc __user *user_desc)
281{
282	struct user_desc info;
283	struct desc_struct *desc;
284
285/*
286 * Get the current Thread-Local Storage area:
287 */
288
289#define GET_BASE(desc) ( \
290	(((desc)->a >> 16) & 0x0000ffff) | \
291	(((desc)->b << 16) & 0x00ff0000) | \
292	( (desc)->b        & 0xff000000)   )
293
294#define GET_LIMIT(desc) ( \
295	((desc)->a & 0x0ffff) | \
296	 ((desc)->b & 0xf0000) )
297
298#define GET_32BIT(desc)		(((desc)->b >> 22) & 1)
299#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
300#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
301#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
302#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
303#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
304
305	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
306		return -EINVAL;
307
308	desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
309
310	info.entry_number = idx;
311	info.base_addr = GET_BASE(desc);
312	info.limit = GET_LIMIT(desc);
313	info.seg_32bit = GET_32BIT(desc);
314	info.contents = GET_CONTENTS(desc);
315	info.read_exec_only = !GET_WRITABLE(desc);
316	info.limit_in_pages = GET_LIMIT_PAGES(desc);
317	info.seg_not_present = !GET_PRESENT(desc);
318	info.useable = GET_USEABLE(desc);
319
320	if (copy_to_user(user_desc, &info, sizeof(info)))
321		return -EFAULT;
322
323	return 0;
324}
325
326/*
327 * Perform set_thread_area on behalf of the traced child.
328 */
329static int
330ptrace_set_thread_area(struct task_struct *child,
331		       int idx, struct user_desc __user *user_desc)
332{
333	struct user_desc info;
334	struct desc_struct *desc;
335
336	if (copy_from_user(&info, user_desc, sizeof(info)))
337		return -EFAULT;
338
339	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
340		return -EINVAL;
341
342	desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
343	if (LDT_empty(&info)) {
344		desc->a = 0;
345		desc->b = 0;
346	} else {
347		desc->a = LDT_entry_a(&info);
348		desc->b = LDT_entry_b(&info);
349	}
350
351	return 0;
352}
353
354asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
355{
356	struct task_struct *child;
357	struct user * dummy = NULL;
358	int i, ret;
359	unsigned long __user *datap = (unsigned long __user *)data;
360
361	lock_kernel();
362	ret = -EPERM;
363	if (request == PTRACE_TRACEME) {
364		/* are we already being traced? */
365		if (current->ptrace & PT_PTRACED)
366			goto out;
367		ret = security_ptrace(current->parent, current);
368		if (ret)
369			goto out;
370		/* set the ptrace bit in the process flags. */
371		current->ptrace |= PT_PTRACED;
372		ret = 0;
373		goto out;
374	}
375	ret = -ESRCH;
376	read_lock(&tasklist_lock);
377	child = find_task_by_pid(pid);
378	if (child)
379		get_task_struct(child);
380	read_unlock(&tasklist_lock);
381	if (!child)
382		goto out;
383
384	ret = -EPERM;
385	if (pid == 1)		/* you may not mess with init */
386		goto out_tsk;
387
388	if (request == PTRACE_ATTACH) {
389		ret = ptrace_attach(child);
390		goto out_tsk;
391	}
392
393	ret = ptrace_check_attach(child, request == PTRACE_KILL);
394	if (ret < 0)
395		goto out_tsk;
396
397	switch (request) {
398	/* when I and D space are separate, these will need to be fixed. */
399	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
400	case PTRACE_PEEKDATA: {
401		unsigned long tmp;
402		int copied;
403
404		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
405		ret = -EIO;
406		if (copied != sizeof(tmp))
407			break;
408		ret = put_user(tmp, datap);
409		break;
410	}
411
412	/* read the word at location addr in the USER area. */
413	case PTRACE_PEEKUSR: {
414		unsigned long tmp;
415
416		ret = -EIO;
417		if ((addr & 3) || addr < 0 || 
418		    addr > sizeof(struct user) - 3)
419			break;
420
421		tmp = 0;  /* Default return condition */
422		if(addr < FRAME_SIZE*sizeof(long))
423			tmp = getreg(child, addr);
424		if(addr >= (long) &dummy->u_debugreg[0] &&
425		   addr <= (long) &dummy->u_debugreg[7]){
426			addr -= (long) &dummy->u_debugreg[0];
427			addr = addr >> 2;
428			tmp = child->thread.debugreg[addr];
429		}
430		ret = put_user(tmp, datap);
431		break;
432	}
433
434	/* when I and D space are separate, this will have to be fixed. */
435	case PTRACE_POKETEXT: /* write the word at location addr. */
436	case PTRACE_POKEDATA:
437		ret = 0;
438		if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
439			break;
440		ret = -EIO;
441		break;
442
443	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
444		ret = -EIO;
445		if ((addr & 3) || addr < 0 || 
446		    addr > sizeof(struct user) - 3)
447			break;
448
449		if (addr < FRAME_SIZE*sizeof(long)) {
450			ret = putreg(child, addr, data);
451			break;
452		}
453		/* We need to be very careful here.  We implicitly
454		   want to modify a portion of the task_struct, and we
455		   have to be selective about what portions we allow someone
456		   to modify. */
457
458		  ret = -EIO;
459		  if(addr >= (long) &dummy->u_debugreg[0] &&
460		     addr <= (long) &dummy->u_debugreg[7]){
461
462			  if(addr == (long) &dummy->u_debugreg[4]) break;
463			  if(addr == (long) &dummy->u_debugreg[5]) break;
464			  if(addr < (long) &dummy->u_debugreg[4] &&
465			     ((unsigned long) data) >= TASK_SIZE-3) break;
466			  
467			  /* Sanity-check data. Take one half-byte at once with
468			   * check = (val >> (16 + 4*i)) & 0xf. It contains the
469			   * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
470			   * 2 and 3 are LENi. Given a list of invalid values,
471			   * we do mask |= 1 << invalid_value, so that
472			   * (mask >> check) & 1 is a correct test for invalid
473			   * values.
474			   *
475			   * R/Wi contains the type of the breakpoint /
476			   * watchpoint, LENi contains the length of the watched
477			   * data in the watchpoint case.
478			   *
479			   * The invalid values are:
480			   * - LENi == 0x10 (undefined), so mask |= 0x0f00.
481			   * - R/Wi == 0x10 (break on I/O reads or writes), so
482			   *   mask |= 0x4444.
483			   * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
484			   *   0x1110.
485			   *
486			   * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
487			   *
488			   * See the Intel Manual "System Programming Guide",
489			   * 15.2.4
490			   *
491			   * Note that LENi == 0x10 is defined on x86_64 in long
492			   * mode (i.e. even for 32-bit userspace software, but
493			   * 64-bit kernel), so the x86_64 mask value is 0x5454.
494			   * See the AMD manual no. 24593 (AMD64 System
495			   * Programming)*/
496
497			  if(addr == (long) &dummy->u_debugreg[7]) {
498				  data &= ~DR_CONTROL_RESERVED;
499				  for(i=0; i<4; i++)
500					  if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
501						  goto out_tsk;
502			  }
503
504			  addr -= (long) &dummy->u_debugreg;
505			  addr = addr >> 2;
506			  child->thread.debugreg[addr] = data;
507			  ret = 0;
508		  }
509		  break;
510
511	case PTRACE_SYSCALL:	/* continue and stop at next (return from) syscall */
512	case PTRACE_CONT:	/* restart after signal. */
513		ret = -EIO;
514		if ((unsigned long) data > _NSIG)
515			break;
516		if (request == PTRACE_SYSCALL) {
517			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
518		}
519		else {
520			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
521		}
522		child->exit_code = data;
523		/* make sure the single step bit is not set. */
524		clear_singlestep(child);
525		wake_up_process(child);
526		ret = 0;
527		break;
528
529/*
530 * make the child exit.  Best I can do is send it a sigkill. 
531 * perhaps it should be put in the status that it wants to 
532 * exit.
533 */
534	case PTRACE_KILL:
535		ret = 0;
536		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
537			break;
538		child->exit_code = SIGKILL;
539		/* make sure the single step bit is not set. */
540		clear_singlestep(child);
541		wake_up_process(child);
542		break;
543
544	case PTRACE_SINGLESTEP:	/* set the trap flag. */
545		ret = -EIO;
546		if ((unsigned long) data > _NSIG)
547			break;
548		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
549		set_singlestep(child);
550		child->exit_code = data;
551		/* give it a chance to run. */
552		wake_up_process(child);
553		ret = 0;
554		break;
555
556	case PTRACE_DETACH:
557		/* detach a process that was attached. */
558		ret = ptrace_detach(child, data);
559		break;
560
561	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
562	  	if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) {
563			ret = -EIO;
564			break;
565		}
566		for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
567			__put_user(getreg(child, i), datap);
568			datap++;
569		}
570		ret = 0;
571		break;
572	}
573
574	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
575		unsigned long tmp;
576	  	if (!access_ok(VERIFY_READ, datap, FRAME_SIZE*sizeof(long))) {
577			ret = -EIO;
578			break;
579		}
580		for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
581			__get_user(tmp, datap);
582			putreg(child, i, tmp);
583			datap++;
584		}
585		ret = 0;
586		break;
587	}
588
589	case PTRACE_GETFPREGS: { /* Get the child FPU state. */
590		if (!access_ok(VERIFY_WRITE, datap,
591			       sizeof(struct user_i387_struct))) {
592			ret = -EIO;
593			break;
594		}
595		ret = 0;
596		if (!tsk_used_math(child))
597			init_fpu(child);
598		get_fpregs((struct user_i387_struct __user *)data, child);
599		break;
600	}
601
602	case PTRACE_SETFPREGS: { /* Set the child FPU state. */
603		if (!access_ok(VERIFY_READ, datap,
604			       sizeof(struct user_i387_struct))) {
605			ret = -EIO;
606			break;
607		}
608		set_stopped_child_used_math(child);
609		set_fpregs(child, (struct user_i387_struct __user *)data);
610		ret = 0;
611		break;
612	}
613
614	case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
615		if (!access_ok(VERIFY_WRITE, datap,
616			       sizeof(struct user_fxsr_struct))) {
617			ret = -EIO;
618			break;
619		}
620		if (!tsk_used_math(child))
621			init_fpu(child);
622		ret = get_fpxregs((struct user_fxsr_struct __user *)data, child);
623		break;
624	}
625
626	case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */
627		if (!access_ok(VERIFY_READ, datap,
628			       sizeof(struct user_fxsr_struct))) {
629			ret = -EIO;
630			break;
631		}
632		set_stopped_child_used_math(child);
633		ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data);
634		break;
635	}
636
637	case PTRACE_GET_THREAD_AREA:
638		ret = ptrace_get_thread_area(child, addr,
639					(struct user_desc __user *) data);
640		break;
641
642	case PTRACE_SET_THREAD_AREA:
643		ret = ptrace_set_thread_area(child, addr,
644					(struct user_desc __user *) data);
645		break;
646
647	default:
648		ret = ptrace_request(child, request, addr, data);
649		break;
650	}
651out_tsk:
652	put_task_struct(child);
653out:
654	unlock_kernel();
655	return ret;
656}
657
658void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
659{
660	struct siginfo info;
661
662	tsk->thread.trap_no = 1;
663	tsk->thread.error_code = error_code;
664
665	memset(&info, 0, sizeof(info));
666	info.si_signo = SIGTRAP;
667	info.si_code = TRAP_BRKPT;
668
669	/* User-mode eip? */
670	info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL;
671
672	/* Send us the fakey SIGTRAP */
673	force_sig_info(SIGTRAP, &info, tsk);
674}
675
676/* notification of system call entry/exit
677 * - triggered by current->work.syscall_trace
678 */
679__attribute__((regparm(3)))
680void do_syscall_trace(struct pt_regs *regs, int entryexit)
681{
682	/* do the secure computing check first */
683	secure_computing(regs->orig_eax);
684
685	if (unlikely(current->audit_context)) {
686		if (!entryexit)
687			audit_syscall_entry(current, regs->orig_eax,
688					    regs->ebx, regs->ecx,
689					    regs->edx, regs->esi);
690		else
691			audit_syscall_exit(current, regs->eax);
692	}
693
694	if (!(current->ptrace & PT_PTRACED))
695		return;
696
697	/* Fake a debug trap */
698	if (test_thread_flag(TIF_SINGLESTEP))
699		send_sigtrap(current, regs, 0);
700
701	if (!test_thread_flag(TIF_SYSCALL_TRACE))
702		return;
703
704	/* the 0x80 provides a way for the tracing parent to distinguish
705	   between a syscall stop and SIGTRAP delivery */
706	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
707
708	/*
709	 * this isn't the same as continuing with a signal, but it will do
710	 * for normal use.  strace only continues with a signal if the
711	 * stopping signal is not SIGTRAP.  -brl
712	 */
713	if (current->exit_code) {
714		send_sig(current->exit_code, current, 1);
715		current->exit_code = 0;
716	}
717}