PageRenderTime 70ms CodeModel.GetById 43ms RepoModel.GetById 1ms app.codeStats 0ms

/insn_len.h

https://github.com/oblique/insn_len
C Header | 306 lines | 144 code | 36 blank | 126 comment | 212 complexity | cef8521fb782f7aa81d4f655606d8587 MD5 | raw file
  1. /* Copyright 2011 oblique. All rights reserved.
  2. *
  3. * Redistribution and use in source and binary forms, with or without modification, are
  4. * permitted provided that the following conditions are met:
  5. *
  6. * 1. Redistributions of source code must retain the above copyright notice, this list of
  7. * conditions and the following disclaimer.
  8. *
  9. * 2. Redistributions in binary form must reproduce the above copyright notice, this list
  10. * of conditions and the following disclaimer in the documentation and/or other materials
  11. * provided with the distribution.
  12. *
  13. * THIS SOFTWARE IS PROVIDED BY OBLIQUE ``AS IS'' AND ANY EXPRESS OR IMPLIED
  14. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  15. * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OBLIQUE OR
  16. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  17. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  18. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  19. * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  20. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  21. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  22. *
  23. * The views and conclusions contained in the software and documentation are those of the
  24. * authors and should not be interpreted as representing official policies, either expressed
  25. * or implied, of oblique.
  26. */
  27. #ifndef INSN_LEN_H
  28. #define INSN_LEN_H
  29. /* References:
  30. * Intel 64 and IA-32 Architectures Software Developer's Manuals - Volume 2A Ch. 2
  31. * http://ref.x86asm.net
  32. * http://sandpile.org
  33. */
  34. #ifdef __cplusplus
  35. extern "C" {
  36. #endif
  37. #define Mod_M 0xc0
  38. #define RM_M 0x7
  39. #define Base_M 0x7
  40. #define REX_W 0x8
  41. #define MAX_INSN_LEN_x86 15
  42. #define MAX_INSN_LEN_x86_32 MAX_INSN_LEN_x86
  43. #define MAX_INSN_LEN_x86_64 MAX_INSN_LEN_x86
  44. enum __bits { __b16, __b32, __b64 };
  45. #ifdef __i386__
  46. #define insn_len(insn) insn_len_x86_32(insn)
  47. #define MAX_INSN_LEN MAX_INSN_LEN_x86_32
  48. #elif defined(__x86_64__)
  49. #define insn_len(insn) insn_len_x86_64(insn)
  50. #define MAX_INSN_LEN MAX_INSN_LEN_x86_64
  51. #endif
  52. /* This function returns the length of an x86 instruction.
  53. * I assume that instruction is valid.
  54. */
  55. static inline int __insn_len_x86(void *insn, enum __bits bits) {
  56. int len = 0, twobytes = 0, has_modrm = 0;
  57. enum __bits operand_bits = __b32, addr_bits = bits;
  58. unsigned char *c = (unsigned char*)insn, modrm, opcode;
  59. /* prefixes
  60. *
  61. * 0xf0, 0xf2, 0xf3, 0x2e, 0x36
  62. * 0x3e, 0x26, 0x64, 0x65, 0x66, 0x67
  63. */
  64. // skip prefixes
  65. while (*c == 0xf0 || *c == 0xf2 || *c == 0xf3 ||
  66. *c == 0x2e || *c == 0x36 || *c == 0x3e || *c == 0x26 ||
  67. (*c & 0xfc) == 0x64) {
  68. if (*c == 0x66) // 16bits operands
  69. operand_bits = __b16;
  70. if (*c == 0x67) // 16bits addressing (x86-32), 32bits addressing (x86-64)
  71. addr_bits = bits == __b32 ? __b16 : __b32;
  72. c++;
  73. len++;
  74. }
  75. if (bits == __b64 && (*c & 0xf0) == 0x40) { // x86-64 && REX byte
  76. if (*c & REX_W)
  77. operand_bits = __b64;
  78. c++;
  79. len++;
  80. }
  81. /* 0x9b prefix is used only by the following 1byte opcodes
  82. *
  83. * 0xd9 Mod != 11 Reg/Op = 110 or 111
  84. * 0xdb ModR/M = 0xe2 or 0xe3
  85. * 0xdd Reg/Op = 110 or 111
  86. * 0xdf ModR/M = 0xe0
  87. */
  88. // check for 2bytes opcodes (0x0f prefix)
  89. if (*c == 0x0f) {
  90. twobytes = 1;
  91. c++;
  92. len++;
  93. } else if (*c == 0x9b && // check 0x9b prefix
  94. ( (c[1] == 0xd9 && (c[2] & Mod_M) != Mod_M && (c[2] & 0x30) == 0x30) ||
  95. (c[1] == 0xdb && (c[2] == 0xe2 || c[2] == 0xe3)) ||
  96. (c[1] == 0xdd && (c[2] & 0x30) == 0x30) ||
  97. (c[1] == 0xdf && c[2] == 0xe0)
  98. )) {
  99. c++;
  100. len++;
  101. }
  102. opcode = *c++;
  103. len++;
  104. /* 1byte opcodes that use ModR/M byte:
  105. *
  106. * 0x00 - 0x03, 0x08 - 0x0b,
  107. * 0x10 - 0x13, 0x18 - 0x1b,
  108. * 0x20 - 0x23, 0x28 - 0x2b,
  109. * 0x30 - 0x33, 0x38 - 0x3b,
  110. * 0x62, 0x63, 0x69, 0x6b,
  111. * 0x80 - 0x8f, 0xc0, 0xc1,
  112. * 0xc4 - 0xc7,
  113. * 0xd0 - 0xd3, 0xd8 - 0xdf
  114. * 0xf6, 0xf7, 0xfe, 0xff
  115. */
  116. if (!twobytes &&
  117. ((opcode & 0xf4) == 0 || (opcode & 0xf4) == 0x10 ||
  118. (opcode & 0xf4) == 0x20 || (opcode & 0xf4) == 0x30 ||
  119. opcode == 0x62 || opcode == 0x63 || opcode == 0x69 || opcode == 0x6b ||
  120. (opcode & 0xf0) == 0x80 || opcode == 0xc0 || opcode == 0xc1 ||
  121. (opcode & 0xfc) == 0xc4 || (opcode & 0xfc) == 0xd0 ||
  122. (opcode & 0xf8) == 0xd8 || opcode == 0xf6 || opcode == 0xf7 ||
  123. opcode == 0xfe || opcode == 0xff))
  124. has_modrm = 1;
  125. /* 2bytes opcodes that they *don't* use ModR/M byte:
  126. *
  127. * 0x05 - 0x09, 0x0b, 0x0e,
  128. * 0x30 - 0x37, 0x77, 0x80 - 0x8f,
  129. * 0xa0 - 0xa2, 0xa8 - 0xaa, 0xb9
  130. * 0xc8 - 0xcf
  131. */
  132. if (twobytes) {
  133. if (!((opcode >= 0x05 && opcode <= 0x09) || opcode == 0x0b ||
  134. opcode == 0x0e || (opcode & 0xf8) == 0x30 || opcode == 0x77 ||
  135. (opcode & 0xf0) == 0x80 || (opcode >= 0xa0 && opcode <= 0xa2) ||
  136. (opcode >= 0xa8 && opcode <= 0xaa) || (opcode & 0xf8) == 0xc8 ||
  137. opcode == 0xb9))
  138. has_modrm = 1;
  139. // 3bytes opcodes
  140. if (opcode == 0x38 || opcode == 0x3a) {
  141. c++;
  142. len++;
  143. }
  144. // 3DNow! opcode
  145. if (opcode == 0x0f)
  146. len++;
  147. }
  148. if (has_modrm) {
  149. len++;
  150. modrm = *c++;
  151. if (addr_bits != __b16 && (modrm & (Mod_M | RM_M)) == 5) // Mod = 00 R/M = 101
  152. len += 4;
  153. if (addr_bits == __b16 && (modrm & (Mod_M | RM_M)) == 6) // Mod = 00 R/M = 110 and 16bits addressing
  154. len += 2;
  155. if ((modrm & Mod_M) == 0x40) // Mod = 01
  156. len += 1;
  157. if ((modrm & Mod_M) == 0x80) // Mod = 10
  158. len += addr_bits == __b16 ? 2 : 4;
  159. // check SIB byte
  160. if (addr_bits != __b16 && (modrm & Mod_M) != Mod_M && (modrm & RM_M) == 4) { // if it has SIB
  161. len++;
  162. if ((modrm & Mod_M) == 0 && (*c & Base_M) == 5) // Mod = 00 SIB Base = 101
  163. len += 4;
  164. c++;
  165. }
  166. }
  167. /* Immediate operands
  168. *
  169. * 1byte opcode list:
  170. *
  171. * imm8 (1 byte)
  172. *
  173. * 0x04, 0x0c, 0x14, 0x1c, 0x24, 0x2c, 0x34, 0x3c, 0x6a, 0x6b, 0x70 - 0x7f,
  174. * 0x80, 0x82, 0x83, 0xa8, 0xb0 - 0xb7, 0xc0, 0xc1, 0xc6, 0xcd, 0xd4,
  175. * 0xd5, 0xe0 - 0xe7, 0xeb, 0xf6 (Reg/Op = 000 or Reg/Op = 001)
  176. *
  177. * imm16 (2 bytes)
  178. *
  179. * 0xc2, 0xca
  180. *
  181. * imm16/32 (2 bytes if operand_bits == __b16 else 4 bytes)
  182. *
  183. * 0x05, 0x0d, 0x15, 0x1d, 0x25, 0x2d, 0x35, 0x3d, 0x68, 0x69, 0x81, 0xa9
  184. * 0xc7, 0xe8, 0xe9
  185. *
  186. * imm16/32/64 (2 bytes if operand_bits == __b16, 4 bytes if __b32, 8 bytes if __b64)
  187. *
  188. * 0xb8 - 0xbf, 0xf7 (Reg/Op = 000 or Reg/Op = 001)
  189. *
  190. * moffs (2 bytes if addr_bits == __b16, 4 bytes if __b32, 8 bytes if __b64)
  191. *
  192. * 0xa0, 0xa1, 0xa2, 0xa3
  193. *
  194. * others
  195. *
  196. * 0xea, 0x9a: imm16 + imm16/32
  197. * 0xc8: imm16 + imm8
  198. *
  199. *
  200. * 2bytes opcode list:
  201. *
  202. * imm8 (1 byte)
  203. *
  204. * 0x70 - 0x73, 0xa4, 0xac, 0xba, 0xc2, 0xc4 - 0xc6
  205. *
  206. * imm16/32 (2 bytes if operand_bits == __b16 else 4 bytes)
  207. *
  208. * 0x80 - 0x8f
  209. *
  210. *
  211. * all 3bytes opcodes with 0x3a prefix have imm8
  212. */
  213. if (!twobytes) { // 1byte opcodes
  214. // imm8
  215. if (((opcode & 7) == 4 && (opcode & 0xf0) <= 0x30) ||
  216. opcode == 0x6a || opcode == 0x6b || (opcode & 0xf0) == 0x70 ||
  217. opcode == 0x80 || opcode == 0x82 || opcode == 0x83 ||
  218. opcode == 0xa8 || (opcode & 0xf8) == 0xb0 || opcode == 0xc0 ||
  219. opcode == 0xc1 || opcode == 0xc6 || opcode == 0xcd ||
  220. opcode == 0xd4 || opcode == 0xd5 || (opcode & 0xf8) == 0xe0 ||
  221. opcode == 0xeb || (opcode == 0xf6 && (modrm & 0x30) == 0))
  222. len += 1;
  223. // imm16
  224. if (opcode == 0xc2 || opcode == 0xca)
  225. len += 2;
  226. // imm16/32
  227. if (((opcode & 7) == 5 && (opcode & 0xf0) <= 0x30) ||
  228. opcode == 0x68 || opcode == 0x69 || opcode == 0x81 ||
  229. opcode == 0xa9 || opcode == 0xc7 || opcode == 0xe8 ||
  230. opcode == 0xe9)
  231. len += operand_bits == __b16 ? 2 : 4;
  232. // imm16/32/64
  233. if ((opcode & 0xf8) == 0xb8 || (opcode == 0xf7 && (modrm & 0x30) == 0))
  234. len += operand_bits == __b16 ? 2 : operand_bits == __b32 ? 4 : 8;
  235. // moffs
  236. if ((opcode & 0xfc) == 0xa0)
  237. len += addr_bits == __b16 ? 2 : addr_bits == __b32 ? 4 : 8;
  238. // others
  239. if (opcode == 0xea || opcode == 0x9a)
  240. len += 2 + (operand_bits == __b16 ? 2 : 4);
  241. if (opcode == 0xc8)
  242. len += 3;
  243. } else { // 2bytes opcodes
  244. // imm8
  245. if ((opcode & 0xfc) == 0x70 || opcode == 0xa4 ||
  246. opcode == 0xac || opcode == 0xba || opcode == 0xc2 ||
  247. (opcode >= 0xc4 && opcode <= 0xc6))
  248. len += 1;
  249. // imm16/32
  250. if ((opcode & 0xf0) == 0x80)
  251. len += operand_bits == __b16 ? 2 : 4;
  252. // 3bytes opcodes with 0x3a prefix
  253. if (opcode == 0x3a)
  254. len += 1;
  255. }
  256. // wrong length
  257. if (len > MAX_INSN_LEN_x86)
  258. len = 1;
  259. return len;
  260. }
  261. static int insn_len_x86_32(void *insn) {
  262. return __insn_len_x86(insn, __b32);
  263. }
  264. static int insn_len_x86_64(void *insn) {
  265. return __insn_len_x86(insn, __b64);
  266. }
  267. #ifdef __cplusplus
  268. }
  269. #endif
  270. #endif