PageRenderTime 71ms CodeModel.GetById 36ms app.highlight 29ms RepoModel.GetById 1ms app.codeStats 0ms

/thirdparty/breakpad/third_party/libdisasm/ia32_insn.c

http://github.com/tomahawk-player/tomahawk
C | 625 lines | 398 code | 94 blank | 133 comment | 118 complexity | b554786001c9da548a331033e403d758 MD5 | raw file
  1#include <stdio.h>
  2#include <stdlib.h>
  3#include <string.h>
  4#include "qword.h"
  5
  6#include "ia32_insn.h"
  7#include "ia32_opcode_tables.h"
  8
  9#include "ia32_reg.h"
 10#include "ia32_operand.h"
 11#include "ia32_implicit.h"
 12#include "ia32_settings.h"
 13
 14#include "libdis.h"
 15
 16extern ia32_table_desc_t ia32_tables[];
 17extern ia32_settings_t ia32_settings;
 18
 19#define IS_SP( op )  (op->type == op_register && 	\
 20		(op->data.reg.id == REG_ESP_INDEX || 	\
 21		 op->data.reg.alias == REG_ESP_INDEX) )
 22#define IS_IMM( op ) (op->type == op_immediate )
 23
 24#ifdef WIN32
 25#  define INLINE 
 26#else
 27#  define INLINE inline
 28#endif
 29
 30/* for calculating stack modification based on an operand */
 31static INLINE int32_t long_from_operand( x86_op_t *op ) {
 32
 33	if (! IS_IMM(op) ) {
 34		return 0L;
 35	}
 36
 37	switch ( op->datatype ) {
 38		case op_byte:
 39			return (int32_t) op->data.sbyte;
 40		case op_word:
 41			return (int32_t) op->data.sword;
 42		case op_qword:
 43			return (int32_t) op->data.sqword;
 44		case op_dword:
 45			return op->data.sdword;
 46		default:
 47			/* these are not used in stack insn */
 48			break;
 49	}
 50
 51	return 0L;
 52}
 53		
 54
 55/* determine what this insn does to the stack */
 56static void ia32_stack_mod(x86_insn_t *insn) {
 57	x86_op_t *dest, *src = NULL;
 58
 59	if (! insn || ! insn->operands ) {
 60		return;
 61	}
 62       
 63	dest = &insn->operands->op;
 64	if ( dest ) {
 65		src = &insn->operands->next->op;
 66	}
 67
 68	insn->stack_mod = 0; 
 69	insn->stack_mod_val = 0;
 70
 71	switch ( insn->type ) {
 72		case insn_call:
 73		case insn_callcc:
 74			insn->stack_mod = 1;
 75			insn->stack_mod_val = insn->addr_size * -1;
 76			break;
 77		case insn_push:
 78			insn->stack_mod = 1;
 79			insn->stack_mod_val = insn->addr_size * -1;
 80			break;
 81		case insn_return:
 82			insn->stack_mod = 1;
 83			insn->stack_mod_val = insn->addr_size;
 84		case insn_int: case insn_intcc:
 85		case insn_iret:
 86			break;
 87		case insn_pop:
 88			insn->stack_mod = 1;
 89			if (! IS_SP( dest ) ) {
 90				insn->stack_mod_val = insn->op_size;
 91			} /* else we don't know the stack change in a pop esp */
 92			break;
 93		case insn_enter:
 94			insn->stack_mod = 1;
 95			insn->stack_mod_val = 0; /* TODO : FIX */
 96			break;
 97		case insn_leave:
 98			insn->stack_mod = 1;
 99			insn->stack_mod_val = 0; /* TODO : FIX */
100			break;
101		case insn_pushregs:
102			insn->stack_mod = 1;
103			insn->stack_mod_val = 0; /* TODO : FIX */
104			break;
105		case insn_popregs:
106			insn->stack_mod = 1;
107			insn->stack_mod_val = 0; /* TODO : FIX */
108			break;
109		case insn_pushflags:
110			insn->stack_mod = 1;
111			insn->stack_mod_val = 0; /* TODO : FIX */
112			break;
113		case insn_popflags:
114			insn->stack_mod = 1;
115			insn->stack_mod_val = 0; /* TODO : FIX */
116			break;
117		case insn_add:
118			if ( IS_SP( dest ) ) {
119				insn->stack_mod = 1;
120				insn->stack_mod_val = long_from_operand( src ); 
121			}
122			break;
123		case insn_sub:
124			if ( IS_SP( dest ) ) {
125				insn->stack_mod = 1;
126				insn->stack_mod_val = long_from_operand( src ); 
127				insn->stack_mod_val *= -1;
128			}
129			break;
130		case insn_inc:
131			if ( IS_SP( dest ) ) {
132				insn->stack_mod = 1;
133				insn->stack_mod_val = 1;
134			}
135			break;
136		case insn_dec:
137			if ( IS_SP( dest ) ) {
138				insn->stack_mod = 1;
139				insn->stack_mod_val = 1;
140			}
141			break;
142		case insn_mov: case insn_movcc:
143		case insn_xchg: case insn_xchgcc:
144		case insn_mul: case insn_div:
145		case insn_shl: case insn_shr:
146		case insn_rol: case insn_ror:
147		case insn_and: case insn_or:
148		case insn_not: case insn_neg:
149		case insn_xor:
150			if ( IS_SP( dest ) ) {
151				insn->stack_mod = 1;
152			}
153			break;
154		default:
155			break;
156	}
157	if (! strcmp("enter", insn->mnemonic) ) {
158		insn->stack_mod = 1;
159	} else if (! strcmp("leave", insn->mnemonic) ) {
160		insn->stack_mod = 1;
161	}
162
163	/* for mov, etc we return 0 -- unknown stack mod */
164
165	return;
166}
167
168/* get the cpu details for this insn from cpu flags int */
169static void ia32_handle_cpu( x86_insn_t *insn, unsigned int cpu ) {
170	insn->cpu = (enum x86_insn_cpu) CPU_MODEL(cpu);
171	insn->isa = (enum x86_insn_isa) (ISA_SUBSET(cpu)) >> 16;
172	return;
173}
174
175/* handle mnemonic type and group */
176static void ia32_handle_mnemtype(x86_insn_t *insn, unsigned int mnemtype) {
177	unsigned int type = mnemtype & ~INS_FLAG_MASK;
178        insn->group = (enum x86_insn_group) (INS_GROUP(type)) >> 12;
179        insn->type = (enum x86_insn_type) INS_TYPE(type);
180
181	return;
182}
183
184static void ia32_handle_notes(x86_insn_t *insn, unsigned int notes) {
185	insn->note = (enum x86_insn_note) notes;
186	return;
187}
188
189static void ia32_handle_eflags( x86_insn_t *insn, unsigned int eflags) {
190        unsigned int flags;
191
192        /* handle flags effected */
193        flags = INS_FLAGS_TEST(eflags);
194        /* handle weird OR cases */
195        /* these are either JLE (ZF | SF<>OF) or JBE (CF | ZF) */
196        if (flags & INS_TEST_OR) {
197                flags &= ~INS_TEST_OR;
198                if ( flags & INS_TEST_ZERO ) {
199                        flags &= ~INS_TEST_ZERO;
200                        if ( flags & INS_TEST_CARRY ) {
201                                flags &= ~INS_TEST_CARRY ;
202                                flags |= (int)insn_carry_or_zero_set;
203                        } else if ( flags & INS_TEST_SFNEOF ) {
204                                flags &= ~INS_TEST_SFNEOF;
205                                flags |= (int)insn_zero_set_or_sign_ne_oflow;
206                        }
207                }
208        }
209        insn->flags_tested = (enum x86_flag_status) flags;
210
211        insn->flags_set = (enum x86_flag_status) INS_FLAGS_SET(eflags) >> 16;
212
213	return;
214}
215
216static void ia32_handle_prefix( x86_insn_t *insn, unsigned int prefixes ) {
217
218        insn->prefix = (enum x86_insn_prefix) prefixes & PREFIX_MASK; // >> 20;
219        if (! (insn->prefix & PREFIX_PRINT_MASK) ) {
220		/* no printable prefixes */
221                insn->prefix = insn_no_prefix;
222        }
223
224        /* concat all prefix strings */
225        if ( (unsigned int)insn->prefix & PREFIX_LOCK ) {
226                strncat(insn->prefix_string, "lock ", 32 - 
227				strlen(insn->prefix_string));
228        }
229
230        if ( (unsigned int)insn->prefix & PREFIX_REPNZ ) {
231                strncat(insn->prefix_string, "repnz ", 32  - 
232				strlen(insn->prefix_string));
233        } else if ( (unsigned int)insn->prefix & PREFIX_REPZ ) {
234                strncat(insn->prefix_string, "repz ", 32 - 
235				strlen(insn->prefix_string));
236        }
237
238        return;
239}
240
241
242static void reg_32_to_16( x86_op_t *op, x86_insn_t *insn, void *arg ) {
243
244	/* if this is a 32-bit register and it is a general register ... */
245	if ( op->type == op_register && op->data.reg.size == 4 && 
246	     (op->data.reg.type & reg_gen) ) {
247		/* WORD registers are 8 indices off from DWORD registers */
248		ia32_handle_register( &(op->data.reg), 
249				op->data.reg.id + 8 );
250	}
251}
252
253static void handle_insn_metadata( x86_insn_t *insn, ia32_insn_t *raw_insn ) {
254	ia32_handle_mnemtype( insn, raw_insn->mnem_flag );
255	ia32_handle_notes( insn, raw_insn->notes );
256	ia32_handle_eflags( insn, raw_insn->flags_effected );
257	ia32_handle_cpu( insn, raw_insn->cpu );
258	ia32_stack_mod( insn );
259}
260
261static size_t ia32_decode_insn( unsigned char *buf, size_t buf_len, 
262			   ia32_insn_t *raw_insn, x86_insn_t *insn,
263			   unsigned int prefixes ) {
264	size_t size, op_size;
265	unsigned char modrm;
266
267	/* this should never happen, but just in case... */
268	if ( raw_insn->mnem_flag == INS_INVALID ) {
269		return 0;
270	}
271
272	if (ia32_settings.options & opt_16_bit) {
273		insn->op_size = ( prefixes & PREFIX_OP_SIZE ) ? 4 : 2;
274		insn->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 4 : 2;
275	} else {
276		insn->op_size = ( prefixes & PREFIX_OP_SIZE ) ? 2 : 4;
277		insn->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 2 : 4;
278	}
279
280
281	/*  ++++   1. Copy mnemonic and mnemonic-flags to CODE struct */
282	if ((ia32_settings.options & opt_att_mnemonics) && raw_insn->mnemonic_att[0]) {
283		strncpy( insn->mnemonic, raw_insn->mnemonic_att, 16 );
284	}
285	else {
286		strncpy( insn->mnemonic, raw_insn->mnemonic, 16 );
287	}
288	ia32_handle_prefix( insn, prefixes );
289
290	handle_insn_metadata( insn, raw_insn );
291
292	/* prefetch the next byte in case it is a modr/m byte -- saves
293	 * worrying about whether the 'mod/rm' operand or the 'reg' operand
294	 * occurs first */
295	modrm = GET_BYTE( buf, buf_len );
296
297	/*  ++++   2. Decode Explicit Operands */
298	/* Intel uses up to 3 explicit operands in its instructions;
299	 * the first is 'dest', the second is 'src', and the third
300	 * is an additional source value (usually an immediate value,
301	 * e.g. in the MUL instructions). These three explicit operands
302	 * are encoded in the opcode tables, even if they are not used 
303	 * by the instruction. Additional implicit operands are stored
304	 * in a supplemental table and are handled later. */
305
306	op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->dest, 
307					raw_insn->dest_flag, prefixes, modrm );
308	/* advance buffer, increase size if necessary */
309	buf += op_size;
310	buf_len -= op_size;
311	size = op_size;
312
313	op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->src, 
314					raw_insn->src_flag, prefixes, modrm );
315	buf += op_size;
316	buf_len -= op_size;
317	size += op_size;
318
319	op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->aux, 
320					raw_insn->aux_flag, prefixes, modrm );
321	size += op_size;
322
323
324	/*  ++++   3. Decode Implicit Operands */
325	/* apply implicit operands */
326	ia32_insn_implicit_ops( insn, raw_insn->implicit_ops );
327	/* we have one small inelegant hack here, to deal with 
328	 * the two prefixes that have implicit operands. If Intel
329	 * adds more, we'll change the algorithm to suit :) */
330	if ( (prefixes & PREFIX_REPZ) || (prefixes & PREFIX_REPNZ) ) {
331		ia32_insn_implicit_ops( insn, IDX_IMPLICIT_REP );
332	}
333
334
335	/* 16-bit hack: foreach operand, if 32-bit reg, make 16-bit reg */
336	if ( insn->op_size == 2 ) {
337		x86_operand_foreach( insn, reg_32_to_16, NULL, op_any );
338	}
339
340	return size;
341}
342
343
344/* convenience routine */
345#define USES_MOD_RM(flag) \
346	(flag == ADDRMETH_E || flag == ADDRMETH_M || flag == ADDRMETH_Q || \
347	 flag == ADDRMETH_W || flag == ADDRMETH_R)
348
349static int uses_modrm_flag( unsigned int flag ) {
350	unsigned int meth;
351	if ( flag == ARG_NONE ) {
352		return 0;
353	}
354	meth = (flag & ADDRMETH_MASK);
355	if ( USES_MOD_RM(meth) ) {
356		return 1;
357	}
358
359	return 0;
360}
361
362/* This routine performs the actual byte-by-byte opcode table lookup.
363 * Originally it was pretty simple: get a byte, adjust it to a proper
364 * index into the table, then check the table row at that index to
365 * determine what to do next. But is anything that simple with Intel?
366 * This is now a huge, convoluted mess, mostly of bitter comments. */
367/* buf: pointer to next byte to read from stream 
368 * buf_len: length of buf
369 * table: index of table to use for lookups
370 * raw_insn: output pointer that receives opcode definition
371 * prefixes: output integer that is encoded with prefixes in insn 
372 * returns : number of bytes consumed from stream during lookup */ 
373size_t ia32_table_lookup( unsigned char *buf, size_t buf_len,
374				 unsigned int table, ia32_insn_t **raw_insn,
375				 unsigned int *prefixes ) {
376	unsigned char *next, op = buf[0];	/* byte value -- 'opcode' */
377	size_t size = 1, sub_size = 0, next_len;
378	ia32_table_desc_t *table_desc;
379	unsigned int subtable, prefix = 0, recurse_table = 0;
380
381	table_desc = &ia32_tables[table];
382
383	op = GET_BYTE( buf, buf_len );
384
385	if ( table_desc->type == tbl_fpu && op > table_desc->maxlim) {
386		/* one of the fucking FPU tables out of the 00-BH range */
387		/* OK,. this is a bit of a hack -- the proper way would
388		 * have been to use subtables in the 00-BF FPU opcode tables,
389		 * but that is rather wasteful of space... */
390		table_desc = &ia32_tables[table +1];
391	}
392
393	/* PERFORM TABLE LOOKUP */
394
395	/* ModR/M trick: shift extension bits into lowest bits of byte */
396	/* Note: non-ModR/M tables have a shift value of 0 */
397	op >>= table_desc->shift;
398
399	/* ModR/M trick: mask out high bits to turn extension into an index */
400	/* Note: non-ModR/M tables have a mask value of 0xFF */
401	op &= table_desc->mask;
402
403
404	/* Sparse table trick: check that byte is <= max value */
405	/* Note: full (256-entry) tables have a maxlim of 155 */
406	if ( op > table_desc->maxlim ) {
407		/* this is a partial table, truncated at the tail,
408		   and op is out of range! */
409		return INVALID_INSN;
410	}
411
412	/* Sparse table trick: check that byte is >= min value */
413	/* Note: full (256-entry) tables have a minlim of 0 */
414	if ( table_desc->minlim > op ) {
415		/* this is a partial table, truncated at the head,
416		   and op is out of range! */
417		return INVALID_INSN;
418	}
419	/* adjust op to be an offset from table index 0 */
420	op -= table_desc->minlim;
421
422	/* Yay! 'op' is now fully adjusted to be an index into 'table' */
423	*raw_insn = &(table_desc->table[op]);
424	//printf("BYTE %X TABLE %d OP %X\n", buf[0], table, op ); 
425
426	if ( (*raw_insn)->mnem_flag & INS_FLAG_PREFIX ) {
427		prefix = (*raw_insn)->mnem_flag & PREFIX_MASK;
428	}
429
430
431	/* handle escape to a multibyte/coproc/extension/etc table */
432	/* NOTE: if insn is a prefix and has a subtable, then we
433	 *       only recurse if this is the first prefix byte --
434	 *       that is, if *prefixes is 0. 
435	 * NOTE also that suffix tables are handled later */
436	subtable = (*raw_insn)->table;
437
438	if ( subtable && ia32_tables[subtable].type != tbl_suffix &&
439	     (! prefix || ! *prefixes) ) {
440
441	     	if ( ia32_tables[subtable].type == tbl_ext_ext ||
442	     	     ia32_tables[subtable].type == tbl_fpu_ext ) {
443			/* opcode extension: reuse current byte in buffer */
444			next = buf;
445			next_len = buf_len;
446		} else {
447			/* "normal" opcode: advance to next byte in buffer */
448			if ( buf_len > 1 ) {
449				next = &buf[1];
450				next_len = buf_len - 1;
451			}
452			else {
453				// buffer is truncated 
454				return INVALID_INSN;
455			}
456		}
457		/* we encountered a multibyte opcode: recurse using the
458		 * table specified in the opcode definition */
459		sub_size = ia32_table_lookup( next, next_len, subtable, 
460				raw_insn, prefixes );
461
462		/* SSE/prefix hack: if the original opcode def was a 
463		 * prefix that specified a subtable, and the subtable
464		 * lookup returned a valid insn, then we have encountered
465		 * an SSE opcode definition; otherwise, we pretend we
466		 * never did the subtable lookup, and deal with the 
467		 * prefix normally later */
468		if ( prefix && ( sub_size == INVALID_INSN  ||
469		       INS_TYPE((*raw_insn)->mnem_flag) == INS_INVALID ) ) {
470			/* this is a prefix, not an SSE insn :
471			 * lookup next byte in main table,
472			 * subsize will be reset during the
473			 * main table lookup */
474			recurse_table = 1;
475		} else {
476			/* this is either a subtable (two-byte) insn
477			 * or an invalid insn: either way, set prefix
478			 * to NULL and end the opcode lookup */
479			prefix = 0;
480			// short-circuit lookup on invalid insn
481			if (sub_size == INVALID_INSN) return INVALID_INSN;
482		}
483	} else if ( prefix ) {
484		recurse_table = 1;
485	}
486
487	/* by default, we assume that we have the opcode definition,
488	 * and there is no need to recurse on the same table, but
489	 * if we do then a prefix was encountered... */
490	if ( recurse_table ) {
491		/* this must have been a prefix: use the same table for
492		 * lookup of the next byte */
493		sub_size = ia32_table_lookup( &buf[1], buf_len - 1, table, 
494				raw_insn, prefixes );
495
496		// short-circuit lookup on invalid insn
497		if (sub_size == INVALID_INSN) return INVALID_INSN;
498
499		/* a bit of a hack for branch hints */
500		if ( prefix & BRANCH_HINT_MASK ) {
501			if ( INS_GROUP((*raw_insn)->mnem_flag) == INS_EXEC ) {
502				/* segment override prefixes are invalid for
503			 	* all branch instructions, so delete them */
504				prefix &= ~PREFIX_REG_MASK;
505			} else {
506				prefix &= ~BRANCH_HINT_MASK;
507			}
508		}
509
510		/* apply prefix to instruction */
511
512		/* TODO: implement something enforcing prefix groups */
513		(*prefixes) |= prefix;
514	}
515
516	/* if this lookup was in a ModR/M table, then an opcode byte is 
517	 * NOT consumed: subtract accordingly. NOTE that if none of the
518	 * operands used the ModR/M, then we need to consume the byte
519	 * here, but ONLY in the 'top-level' opcode extension table */
520
521	if ( table_desc->type == tbl_ext_ext ) {
522		/* extensions-to-extensions never consume a byte */
523		--size;
524	} else if ( (table_desc->type == tbl_extension || 
525	       	     table_desc->type == tbl_fpu ||
526		     table_desc->type == tbl_fpu_ext ) && 
527		/* extensions that have an operand encoded in ModR/M
528		 * never consume a byte */
529	      	    (uses_modrm_flag((*raw_insn)->dest_flag) || 
530	             uses_modrm_flag((*raw_insn)->src_flag) )  	) {
531		--size;
532	}
533
534	size += sub_size;
535
536	return size;
537}
538
539static size_t handle_insn_suffix( unsigned char *buf, size_t buf_len,
540			   ia32_insn_t *raw_insn, x86_insn_t * insn ) {
541	ia32_table_desc_t *table_desc;
542	ia32_insn_t *sfx_insn;
543	size_t size;
544	unsigned int prefixes = 0;
545
546	table_desc = &ia32_tables[raw_insn->table]; 
547	size = ia32_table_lookup( buf, buf_len, raw_insn->table, &sfx_insn,
548				 &prefixes );
549	if (size == INVALID_INSN || sfx_insn->mnem_flag == INS_INVALID ) {
550		return 0;
551	}
552
553	strncpy( insn->mnemonic, sfx_insn->mnemonic, 16 );
554	handle_insn_metadata( insn, sfx_insn );
555
556	return 1;
557}
558
559/* invalid instructions are handled by returning 0 [error] from the
560 * function, setting the size of the insn to 1 byte, and copying
561 * the byte at the start of the invalid insn into the x86_insn_t.
562 * if the caller is saving the x86_insn_t for invalid instructions,
563 * instead of discarding them, this will maintain a consistent
564 * address space in the x86_insn_ts */
565
566/* this function is called by the controlling disassembler, so its name and
567 * calling convention cannot be changed */
568/*    buf   points to the loc of the current opcode (start of the 
569 *          instruction) in the instruction stream. The instruction 
570 *          stream is assumed to be a buffer of bytes read directly 
571 *          from the file for the purpose of disassembly; a mem-mapped 
572 *          file is ideal for *        this.
573 *    insn points to a code structure to be filled by instr_decode
574 *    returns the size of the decoded instruction in bytes */
575size_t ia32_disasm_addr( unsigned char * buf, size_t buf_len, 
576		x86_insn_t *insn ) {
577	ia32_insn_t *raw_insn = NULL;
578	unsigned int prefixes = 0;
579	size_t size, sfx_size;
580	
581	if ( (ia32_settings.options & opt_ignore_nulls) && buf_len > 3 &&
582	    !buf[0] && !buf[1] && !buf[2] && !buf[3]) {
583		/* IF IGNORE_NULLS is set AND
584		 * first 4 bytes in the intruction stream are NULL
585		 * THEN return 0 (END_OF_DISASSEMBLY) */
586		/* TODO: set errno */
587		MAKE_INVALID( insn, buf );
588		return 0;	/* 4 00 bytes in a row? This isn't code! */
589	}
590
591	/* Perform recursive table lookup starting with main table (0) */
592	size = ia32_table_lookup(buf, buf_len, idx_Main, &raw_insn, &prefixes);
593	if ( size == INVALID_INSN || size > buf_len || raw_insn->mnem_flag == INS_INVALID ) {
594		MAKE_INVALID( insn, buf );
595		/* TODO: set errno */
596		return 0;
597	}
598
599	/* We now have the opcode itself figured out: we can decode
600	 * the rest of the instruction. */
601	size += ia32_decode_insn( &buf[size], buf_len - size, raw_insn, insn, 
602				  prefixes );
603	if ( raw_insn->mnem_flag & INS_FLAG_SUFFIX ) {
604		/* AMD 3DNow! suffix -- get proper operand type here */
605		sfx_size = handle_insn_suffix( &buf[size], buf_len - size,
606				raw_insn, insn );
607		if (! sfx_size ) {
608			/* TODO: set errno */
609			MAKE_INVALID( insn, buf );
610			return 0;
611		}
612
613		size += sfx_size;
614	}
615
616	if (! size ) {
617		/* invalid insn */
618		MAKE_INVALID( insn, buf );
619		return 0;
620	}
621
622
623	insn->size = size;
624	return size;		/* return size of instruction in bytes */
625}