PageRenderTime 74ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/rpython/jit/backend/x86/regloc.py

https://bitbucket.org/pypy/pypy/
Python | 767 lines | 552 code | 126 blank | 89 comment | 104 complexity | 7ffc8cca937c1f8b9d7dbf7aea1693ca MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from rpython.jit.metainterp.history import ConstInt
  2. from rpython.jit.backend.x86 import rx86
  3. from rpython.rlib.unroll import unrolling_iterable
  4. from rpython.jit.backend.x86.arch import WORD, IS_X86_32, IS_X86_64
  5. from rpython.tool.sourcetools import func_with_new_name
  6. from rpython.rlib.objectmodel import specialize, instantiate
  7. from rpython.rlib.rarithmetic import intmask
  8. from rpython.jit.metainterp.history import FLOAT, INT
  9. from rpython.jit.codewriter import longlong
  10. from rpython.rtyper.lltypesystem import rffi, lltype
  11. #
  12. # This module adds support for "locations", which can be either in a Const,
  13. # or a RegLoc or a FrameLoc. It also adds operations like mc.ADD(), which
  14. # take two locations as arguments, decode them, and calls the right
  15. # mc.ADD_rr()/ADD_rb()/ADD_ri().
  16. #
  17. class AssemblerLocation(object):
  18. _attrs_ = ('value', '_location_code')
  19. _immutable_ = True
  20. def _getregkey(self):
  21. return self.value
  22. def is_memory_reference(self):
  23. return self.location_code() in ('b', 's', 'j', 'a', 'm')
  24. def location_code(self):
  25. return self._location_code
  26. def get_width(self):
  27. raise NotImplementedError
  28. def value_r(self): return self.value
  29. def value_b(self): return self.value
  30. def value_s(self): return self.value
  31. def value_j(self): return self.value
  32. def value_i(self): return self.value
  33. def value_x(self): return self.value
  34. def value_a(self): raise AssertionError("value_a undefined")
  35. def value_m(self): raise AssertionError("value_m undefined")
  36. def find_unused_reg(self): return eax
  37. def is_stack(self):
  38. return False
  39. def is_core_reg(self):
  40. return False
  41. def get_position(self):
  42. raise NotImplementedError # only for stack
  43. class RawEbpLoc(AssemblerLocation):
  44. """ The same as stack location, but does not know it's position.
  45. Mostly usable for raw frame access
  46. """
  47. _immutable_ = True
  48. _location_code = 'b'
  49. def __init__(self, value, type=INT):
  50. self.value = value
  51. self.type = type
  52. def get_width(self):
  53. if self.type == FLOAT:
  54. return 8
  55. return WORD
  56. def __repr__(self):
  57. return '%d(%%ebp)' % (self.value,)
  58. def assembler(self):
  59. return repr(self)
  60. def is_float(self):
  61. return self.type == FLOAT
  62. def add_offset(self, ofs):
  63. return RawEbpLoc(self.value + ofs)
  64. def is_stack(self):
  65. return True
  66. class RawEspLoc(AssemblerLocation):
  67. """ Esp-based location
  68. """
  69. _immutable_ = True
  70. _location_code = 's'
  71. def __init__(self, value, type):
  72. assert value >= 0 # accessing values < 0 is forbidden on x86-32.
  73. self.value = value # (on x86-64 we could allow values down to -128)
  74. self.type = type
  75. def _getregkey(self):
  76. return ~self.value
  77. def get_width(self):
  78. if self.type == FLOAT:
  79. return 8
  80. return WORD
  81. def __repr__(self):
  82. return '%d(%%esp)' % (self.value,)
  83. def assembler(self):
  84. return repr(self)
  85. def is_float(self):
  86. return self.type == FLOAT
  87. class FrameLoc(RawEbpLoc):
  88. _immutable_ = True
  89. def __init__(self, position, ebp_offset, type):
  90. # _getregkey() returns self.value; the value returned must not
  91. # conflict with RegLoc._getregkey(). It doesn't a bit by chance,
  92. # so let it fail the following assert if it no longer does.
  93. assert ebp_offset >= 8 + 8 * IS_X86_64
  94. self.position = position
  95. #if position != 9999:
  96. # assert (position + JITFRAME_FIXED_SIZE) * WORD == ebp_offset
  97. self.value = ebp_offset
  98. # One of INT, REF, FLOAT
  99. self.type = type
  100. def get_position(self):
  101. return self.position
  102. class RegLoc(AssemblerLocation):
  103. _immutable_ = True
  104. def __init__(self, regnum, is_xmm):
  105. assert regnum >= 0
  106. self.value = regnum
  107. self.is_xmm = is_xmm
  108. if self.is_xmm:
  109. self._location_code = 'x'
  110. else:
  111. self._location_code = 'r'
  112. def __repr__(self):
  113. if self.is_xmm:
  114. return rx86.R.xmmnames[self.value]
  115. else:
  116. return rx86.R.names[self.value]
  117. def get_width(self):
  118. if self.is_xmm:
  119. return 8
  120. return WORD
  121. def lowest8bits(self):
  122. assert not self.is_xmm
  123. if WORD == 4:
  124. assert 0 <= self.value < 4
  125. return RegLoc(rx86.low_byte(self.value), False)
  126. def higher8bits(self):
  127. assert not self.is_xmm
  128. return RegLoc(rx86.high_byte(self.value), False)
  129. def assembler(self):
  130. return '%' + repr(self)
  131. def find_unused_reg(self):
  132. if self.value == eax.value:
  133. return edx
  134. else:
  135. return eax
  136. def is_float(self):
  137. return self.is_xmm
  138. def is_core_reg(self):
  139. return True
  140. class ImmediateAssemblerLocation(AssemblerLocation):
  141. _immutable_ = True
  142. class ImmedLoc(ImmediateAssemblerLocation):
  143. _immutable_ = True
  144. _location_code = 'i'
  145. def __init__(self, value, is_float=False):
  146. # force as a real int
  147. self.value = rffi.cast(lltype.Signed, value)
  148. self._is_float = is_float
  149. def getint(self):
  150. return self.value
  151. def get_width(self):
  152. return WORD
  153. def __repr__(self):
  154. return "ImmedLoc(%d)" % (self.value)
  155. def lowest8bits(self):
  156. val = self.value & 0xFF
  157. if val > 0x7F:
  158. val -= 0x100
  159. return ImmedLoc(val)
  160. def is_float(self):
  161. return self._is_float
  162. class AddressLoc(AssemblerLocation):
  163. _immutable_ = True
  164. # The address is base_loc + (scaled_loc << scale) + static_offset
  165. def __init__(self, base_loc, scaled_loc, scale=0, static_offset=0):
  166. assert 0 <= scale < 4
  167. assert isinstance(base_loc, ImmedLoc) or isinstance(base_loc, RegLoc)
  168. assert isinstance(scaled_loc, ImmedLoc) or isinstance(scaled_loc, RegLoc)
  169. if isinstance(base_loc, ImmedLoc):
  170. if isinstance(scaled_loc, ImmedLoc):
  171. self._location_code = 'j'
  172. self.value = base_loc.value + (scaled_loc.value << scale) + static_offset
  173. else:
  174. self._location_code = 'a'
  175. self.loc_a = (rx86.NO_BASE_REGISTER, scaled_loc.value, scale, base_loc.value + static_offset)
  176. else:
  177. if isinstance(scaled_loc, ImmedLoc):
  178. # FIXME: What if base_loc is ebp or esp?
  179. self._location_code = 'm'
  180. self.loc_m = (base_loc.value, (scaled_loc.value << scale) + static_offset)
  181. else:
  182. self._location_code = 'a'
  183. self.loc_a = (base_loc.value, scaled_loc.value, scale, static_offset)
  184. def __repr__(self):
  185. dict = {'j': 'value', 'a': 'loc_a', 'm': 'loc_m', 'a':'loc_a'}
  186. attr = dict.get(self._location_code, '?')
  187. info = getattr(self, attr, '?')
  188. return '<AddressLoc %r: %s>' % (self._location_code, info)
  189. def get_width(self):
  190. return WORD
  191. def is_float(self):
  192. return False # not 100% true, but we don't use AddressLoc for locations
  193. # really, so it's ok
  194. def value_a(self):
  195. return self.loc_a
  196. def value_m(self):
  197. return self.loc_m
  198. def find_unused_reg(self):
  199. if self._location_code == 'm':
  200. if self.loc_m[0] == eax.value:
  201. return edx
  202. elif self._location_code == 'a':
  203. if self.loc_a[0] == eax.value:
  204. if self.loc_a[1] == edx.value:
  205. return ecx
  206. return edx
  207. if self.loc_a[1] == eax.value:
  208. if self.loc_a[0] == edx.value:
  209. return ecx
  210. return edx
  211. return eax
  212. def add_offset(self, ofs):
  213. result = instantiate(AddressLoc)
  214. result._location_code = self._location_code
  215. if self._location_code == 'm':
  216. result.loc_m = (self.loc_m[0], self.loc_m[1] + ofs)
  217. elif self._location_code == 'a':
  218. result.loc_a = self.loc_a[:3] + (self.loc_a[3] + ofs,)
  219. elif self._location_code == 'j':
  220. result.value = self.value + ofs
  221. else:
  222. raise AssertionError(self._location_code)
  223. return result
  224. class ConstFloatLoc(ImmediateAssemblerLocation):
  225. _immutable_ = True
  226. _location_code = 'j'
  227. def __init__(self, address):
  228. self.value = address
  229. def get_width(self):
  230. return 8
  231. def __repr__(self):
  232. return '<ConstFloatLoc @%s>' % (self.value,)
  233. def is_float(self):
  234. return True
  235. if IS_X86_32:
  236. class FloatImmedLoc(ImmediateAssemblerLocation):
  237. # This stands for an immediate float. It cannot be directly used in
  238. # any assembler instruction. Instead, it is meant to be decomposed
  239. # in two 32-bit halves. On 64-bit, FloatImmedLoc() is a function
  240. # instead; see below.
  241. _immutable_ = True
  242. _location_code = '#' # don't use me
  243. def __init__(self, floatstorage):
  244. self.aslonglong = floatstorage
  245. def get_width(self):
  246. return 8
  247. def low_part(self):
  248. return intmask(self.aslonglong)
  249. def high_part(self):
  250. return intmask(self.aslonglong >> 32)
  251. def low_part_loc(self):
  252. return ImmedLoc(self.low_part())
  253. def high_part_loc(self):
  254. return ImmedLoc(self.high_part())
  255. def __repr__(self):
  256. floatvalue = longlong.getrealfloat(self.aslonglong)
  257. return '<FloatImmedLoc(%s)>' % (floatvalue,)
  258. def is_float(self):
  259. return True
  260. if IS_X86_64:
  261. def FloatImmedLoc(floatstorage):
  262. from rpython.rlib.longlong2float import float2longlong
  263. value = intmask(float2longlong(floatstorage))
  264. return ImmedLoc(value, True)
  265. REGLOCS = [RegLoc(i, is_xmm=False) for i in range(16)]
  266. XMMREGLOCS = [RegLoc(i, is_xmm=True) for i in range(16)]
  267. eax, ecx, edx, ebx, esp, ebp, esi, edi, r8, r9, r10, r11, r12, r13, r14, r15 = REGLOCS
  268. xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 = XMMREGLOCS
  269. # We use a scratch register to simulate having 64-bit immediates. When we
  270. # want to do something like:
  271. # mov rax, [0xDEADBEEFDEADBEEF]
  272. # we actually do:
  273. # mov r11, 0xDEADBEEFDEADBEEF
  274. # mov rax, [r11]
  275. #
  276. # NB: You can use the scratch register as a temporary register in
  277. # assembler.py, but care must be taken when doing so. A call to a method in
  278. # LocationCodeBuilder could clobber the scratch register when certain
  279. # location types are passed in.
  280. X86_64_SCRATCH_REG = r11
  281. # XXX: a GPR scratch register is definitely needed, but we could probably do
  282. # without an xmm scratch reg.
  283. X86_64_XMM_SCRATCH_REG = xmm15
  284. unrolling_location_codes = unrolling_iterable(list("rbsmajix"))
  285. @specialize.arg(1)
  286. def _rx86_getattr(obj, methname):
  287. if hasattr(rx86.AbstractX86CodeBuilder, methname):
  288. return getattr(obj, methname)
  289. else:
  290. raise AssertionError(methname + " undefined")
  291. def _missing_binary_insn(name, code1, code2):
  292. raise AssertionError(name + "_" + code1 + code2 + " missing")
  293. _missing_binary_insn._dont_inline_ = True
  294. class LocationCodeBuilder(object):
  295. _mixin_ = True
  296. _reuse_scratch_register = False # for now, this is always False
  297. _scratch_register_known = False # for now, this is always False
  298. _scratch_register_value = 0
  299. def _binaryop(name):
  300. def insn_with_64_bit_immediate(self, loc1, loc2):
  301. # These are the worst cases:
  302. val2 = loc2.value_i()
  303. if name == 'MOV' and isinstance(loc1, RegLoc):
  304. self.MOV_ri(loc1.value, val2)
  305. return
  306. code1 = loc1.location_code()
  307. if code1 == 'j':
  308. checkvalue = loc1.value_j()
  309. elif code1 == 'm':
  310. checkvalue = loc1.value_m()[1]
  311. elif code1 == 'a':
  312. checkvalue = loc1.value_a()[3]
  313. else:
  314. checkvalue = 0
  315. if not rx86.fits_in_32bits(checkvalue):
  316. # INSN_ji, and both operands are 64-bit; or INSN_mi or INSN_ai
  317. # and the constant offset in the address is 64-bit.
  318. # Hopefully this doesn't happen too often
  319. freereg = loc1.find_unused_reg()
  320. self.PUSH_r(freereg.value)
  321. self.MOV_ri(freereg.value, val2)
  322. INSN(self, loc1, freereg)
  323. self.POP_r(freereg.value)
  324. else:
  325. # For this case, we should not need the scratch register more than here.
  326. self._load_scratch(val2)
  327. INSN(self, loc1, X86_64_SCRATCH_REG)
  328. def invoke(self, codes, val1, val2):
  329. methname = name + "_" + codes
  330. _rx86_getattr(self, methname)(val1, val2)
  331. invoke._annspecialcase_ = 'specialize:arg(1)'
  332. def has_implementation_for(loc1, loc2):
  333. # A memo function that returns True if there is any NAME_xy that could match.
  334. # If it returns False we know the whole subcase can be omitted from translated
  335. # code. Without this hack, the size of most _binaryop INSN functions ends up
  336. # quite large in C code.
  337. if loc1 == '?':
  338. return any([has_implementation_for(loc1, loc2)
  339. for loc1 in unrolling_location_codes])
  340. methname = name + "_" + loc1 + loc2
  341. if not hasattr(rx86.AbstractX86CodeBuilder, methname):
  342. return False
  343. # any NAME_j should have a NAME_m as a fallback, too. Check it
  344. if loc1 == 'j': assert has_implementation_for('m', loc2), methname
  345. if loc2 == 'j': assert has_implementation_for(loc1, 'm'), methname
  346. return True
  347. has_implementation_for._annspecialcase_ = 'specialize:memo'
  348. def INSN(self, loc1, loc2):
  349. code1 = loc1.location_code()
  350. code2 = loc2.location_code()
  351. # You can pass in the scratch register as a location, but you
  352. # must be careful not to combine it with location types that
  353. # might need to use the scratch register themselves.
  354. if loc2 is X86_64_SCRATCH_REG:
  355. if code1 == 'j':
  356. assert (name.startswith("MOV") and
  357. rx86.fits_in_32bits(loc1.value_j()))
  358. if loc1 is X86_64_SCRATCH_REG and not name.startswith("MOV"):
  359. assert code2 not in ('j', 'i')
  360. for possible_code2 in unrolling_location_codes:
  361. if not has_implementation_for('?', possible_code2):
  362. continue
  363. if code2 == possible_code2:
  364. val2 = getattr(loc2, "value_" + possible_code2)()
  365. #
  366. # Fake out certain operations for x86_64
  367. if self.WORD == 8 and possible_code2 == 'i' and not rx86.fits_in_32bits(val2):
  368. insn_with_64_bit_immediate(self, loc1, loc2)
  369. return
  370. #
  371. # Regular case
  372. for possible_code1 in unrolling_location_codes:
  373. if not has_implementation_for(possible_code1,
  374. possible_code2):
  375. continue
  376. if code1 == possible_code1:
  377. val1 = getattr(loc1, "value_" + possible_code1)()
  378. # More faking out of certain operations for x86_64
  379. fits32 = rx86.fits_in_32bits
  380. if possible_code1 == 'j' and not fits32(val1):
  381. val1 = self._addr_as_reg_offset(val1)
  382. invoke(self, "m" + possible_code2, val1, val2)
  383. return
  384. if possible_code2 == 'j' and not fits32(val2):
  385. val2 = self._addr_as_reg_offset(val2)
  386. invoke(self, possible_code1 + "m", val1, val2)
  387. return
  388. if possible_code1 == 'm' and not fits32(val1[1]):
  389. val1 = self._fix_static_offset_64_m(val1)
  390. if possible_code2 == 'm' and not fits32(val2[1]):
  391. val2 = self._fix_static_offset_64_m(val2)
  392. if possible_code1 == 'a' and not fits32(val1[3]):
  393. val1 = self._fix_static_offset_64_a(val1)
  394. if possible_code2 == 'a' and not fits32(val2[3]):
  395. val2 = self._fix_static_offset_64_a(val2)
  396. invoke(self, possible_code1 + possible_code2, val1, val2)
  397. return
  398. _missing_binary_insn(name, code1, code2)
  399. return func_with_new_name(INSN, "INSN_" + name)
  400. def _unaryop(name):
  401. def INSN(self, loc):
  402. code = loc.location_code()
  403. for possible_code in unrolling_location_codes:
  404. if code == possible_code:
  405. val = getattr(loc, "value_" + possible_code)()
  406. # Faking out of certain operations for x86_64
  407. fits32 = rx86.fits_in_32bits
  408. if possible_code == 'i' and not fits32(val):
  409. self._load_scratch(val) # for 'PUSH(imm)'
  410. _rx86_getattr(self, name + "_r")(X86_64_SCRATCH_REG.value)
  411. return
  412. if possible_code == 'j' and not fits32(val):
  413. val = self._addr_as_reg_offset(val)
  414. _rx86_getattr(self, name + "_m")(val)
  415. return
  416. if possible_code == 'm' and not fits32(val[1]):
  417. val = self._fix_static_offset_64_m(val)
  418. if possible_code == 'a' and not fits32(val[3]):
  419. val = self._fix_static_offset_64_a(val)
  420. methname = name + "_" + possible_code
  421. _rx86_getattr(self, methname)(val)
  422. return func_with_new_name(INSN, "INSN_" + name)
  423. def _relative_unaryop(name):
  424. def INSN(self, loc):
  425. code = loc.location_code()
  426. for possible_code in unrolling_location_codes:
  427. if code == possible_code:
  428. val = getattr(loc, "value_" + possible_code)()
  429. if possible_code == 'i':
  430. # This is for CALL or JMP only.
  431. if self.WORD == 4:
  432. _rx86_getattr(self, name + "_l")(val)
  433. self.add_pending_relocation()
  434. else:
  435. # xxx can we avoid "MOV r11, $val; JMP/CALL *r11"
  436. # in case it would fit a 32-bit displacement?
  437. # Hard, because we don't know yet where this insn
  438. # will end up...
  439. assert self.WORD == 8
  440. self._load_scratch(val)
  441. _rx86_getattr(self, name + "_r")(X86_64_SCRATCH_REG.value)
  442. else:
  443. methname = name + "_" + possible_code
  444. _rx86_getattr(self, methname)(val)
  445. return func_with_new_name(INSN, "INSN_" + name)
  446. def _addr_as_reg_offset(self, addr):
  447. # Encodes a (64-bit) address as an offset from the scratch register.
  448. # If we are within a "reuse_scratch_register" block, we remember the
  449. # last value we loaded to the scratch register and encode the address
  450. # as an offset from that if we can
  451. if self._scratch_register_known:
  452. offset = addr - self._scratch_register_value
  453. if rx86.fits_in_32bits(offset):
  454. return (X86_64_SCRATCH_REG.value, offset)
  455. # else: fall through
  456. if self._reuse_scratch_register:
  457. self._scratch_register_known = True
  458. self._scratch_register_value = addr
  459. self.MOV_ri(X86_64_SCRATCH_REG.value, addr)
  460. return (X86_64_SCRATCH_REG.value, 0)
  461. def _fix_static_offset_64_m(self, (basereg, static_offset)):
  462. # For cases where an AddressLoc has the location_code 'm', but
  463. # where the static offset does not fit in 32-bits. We have to fall
  464. # back to the X86_64_SCRATCH_REG. Returns a new location encoded
  465. # as mode 'm' too. These are all possibly rare cases; don't try
  466. # to reuse a past value of the scratch register at all.
  467. self._scratch_register_known = False
  468. self.MOV_ri(X86_64_SCRATCH_REG.value, static_offset)
  469. self.LEA_ra(X86_64_SCRATCH_REG.value,
  470. (basereg, X86_64_SCRATCH_REG.value, 0, 0))
  471. return (X86_64_SCRATCH_REG.value, 0)
  472. def _fix_static_offset_64_a(self, (basereg, scalereg,
  473. scale, static_offset)):
  474. # For cases where an AddressLoc has the location_code 'a', but
  475. # where the static offset does not fit in 32-bits. We have to fall
  476. # back to the X86_64_SCRATCH_REG. In one case it is even more
  477. # annoying. These are all possibly rare cases; don't try to reuse a
  478. # past value of the scratch register at all.
  479. self._scratch_register_known = False
  480. self.MOV_ri(X86_64_SCRATCH_REG.value, static_offset)
  481. #
  482. if basereg != rx86.NO_BASE_REGISTER:
  483. self.LEA_ra(X86_64_SCRATCH_REG.value,
  484. (basereg, X86_64_SCRATCH_REG.value, 0, 0))
  485. return (X86_64_SCRATCH_REG.value, scalereg, scale, 0)
  486. def _load_scratch(self, value):
  487. if (self._scratch_register_known
  488. and value == self._scratch_register_value):
  489. return
  490. if self._reuse_scratch_register:
  491. self._scratch_register_known = True
  492. self._scratch_register_value = value
  493. self.MOV_ri(X86_64_SCRATCH_REG.value, value)
  494. def begin_reuse_scratch_register(self):
  495. # --NEVER CALLED (only from a specific test)--
  496. # Flag the beginning of a block where it is okay to reuse the value
  497. # of the scratch register. In theory we shouldn't have to do this if
  498. # we were careful to mark all possible targets of a jump or call, and
  499. # "forget" the value of the scratch register at those positions, but
  500. # for now this seems safer.
  501. self._reuse_scratch_register = True
  502. def end_reuse_scratch_register(self):
  503. self._reuse_scratch_register = False
  504. self._scratch_register_known = False
  505. def _vector_size_choose(name):
  506. def invoke(self, suffix, val1, val2):
  507. methname = name + suffix
  508. _rx86_getattr(self, methname)(val1, val2)
  509. invoke._annspecialcase_ = 'specialize:arg(1)'
  510. possible_instr_unrolled = unrolling_iterable([(1,'B_xx'),(2,'W_xx'),(4,'D_xx'),(8,'Q_xx')])
  511. def INSN(self, loc1, loc2, size):
  512. code1 = loc1.location_code()
  513. code2 = loc2.location_code()
  514. assert code1 == code2 == 'x'
  515. val1 = loc1.value_x()
  516. val2 = loc2.value_x()
  517. for s,suffix in possible_instr_unrolled:
  518. if s == size:
  519. invoke(self, suffix, val1, val2)
  520. break
  521. return INSN
  522. AND = _binaryop('AND')
  523. OR = _binaryop('OR')
  524. OR8 = _binaryop('OR8')
  525. XOR = _binaryop('XOR')
  526. NOT = _unaryop('NOT')
  527. SHL = _binaryop('SHL')
  528. SHR = _binaryop('SHR')
  529. SAR = _binaryop('SAR')
  530. TEST = _binaryop('TEST')
  531. PTEST = _binaryop('PTEST')
  532. TEST8 = _binaryop('TEST8')
  533. BTS = _binaryop('BTS')
  534. INC = _unaryop('INC')
  535. ADD = _binaryop('ADD')
  536. SUB = _binaryop('SUB')
  537. IMUL = _binaryop('IMUL')
  538. NEG = _unaryop('NEG')
  539. MUL = _unaryop('MUL')
  540. CMP = _binaryop('CMP')
  541. CMP16 = _binaryop('CMP16')
  542. PCMPEQQ = _binaryop('PCMPEQQ')
  543. PCMPEQD = _binaryop('PCMPEQD')
  544. PCMPEQW = _binaryop('PCMPEQW')
  545. PCMPEQB = _binaryop('PCMPEQB')
  546. PCMPEQ = _vector_size_choose('PCMPEQ')
  547. MOV = _binaryop('MOV')
  548. MOV8 = _binaryop('MOV8')
  549. MOV16 = _binaryop('MOV16')
  550. MOVZX8 = _binaryop('MOVZX8')
  551. MOVSX8 = _binaryop('MOVSX8')
  552. MOVZX16 = _binaryop('MOVZX16')
  553. MOVSX16 = _binaryop('MOVSX16')
  554. MOV32 = _binaryop('MOV32')
  555. MOVSX32 = _binaryop('MOVSX32')
  556. # Avoid XCHG because it always implies atomic semantics, which is
  557. # slower and does not pair well for dispatch.
  558. #XCHG = _binaryop('XCHG')
  559. CMOVNS = _binaryop('CMOVNS')
  560. PUSH = _unaryop('PUSH')
  561. POP = _unaryop('POP')
  562. LEA = _binaryop('LEA')
  563. MOVSD = _binaryop('MOVSD')
  564. MOVSS = _binaryop('MOVSS')
  565. MOVAPD = _binaryop('MOVAPD')
  566. MOVAPS = _binaryop('MOVAPS')
  567. MOVDQA = _binaryop('MOVDQA')
  568. MOVDQU = _binaryop('MOVDQU')
  569. MOVUPD = _binaryop('MOVUPD')
  570. MOVUPS = _binaryop('MOVUPS')
  571. ADDSD = _binaryop('ADDSD')
  572. SUBSD = _binaryop('SUBSD')
  573. MULSD = _binaryop('MULSD')
  574. DIVSD = _binaryop('DIVSD')
  575. # packed
  576. ADDPD = _binaryop('ADDPD')
  577. ADDPS = _binaryop('ADDPS')
  578. SUBPD = _binaryop('SUBPD')
  579. SUBPS = _binaryop('SUBPS')
  580. MULPD = _binaryop('MULPD')
  581. MULPS = _binaryop('MULPS')
  582. DIVPD = _binaryop('DIVPD')
  583. DIVPS = _binaryop('DIVPS')
  584. UCOMISD = _binaryop('UCOMISD')
  585. CVTSI2SD = _binaryop('CVTSI2SD')
  586. CVTTSD2SI = _binaryop('CVTTSD2SI')
  587. CVTSD2SS = _binaryop('CVTSD2SS')
  588. CVTSS2SD = _binaryop('CVTSS2SD')
  589. CVTPD2PS = _binaryop('CVTPD2PS')
  590. CVTPS2PD = _binaryop('CVTPS2PD')
  591. CVTPD2DQ = _binaryop('CVTPD2DQ')
  592. CVTDQ2PD = _binaryop('CVTDQ2PD')
  593. SQRTSD = _binaryop('SQRTSD')
  594. ANDPD = _binaryop('ANDPD')
  595. ANDPS = _binaryop('ANDPS')
  596. XORPD = _binaryop('XORPD')
  597. XORPS = _binaryop('XORPS')
  598. PADDQ = _binaryop('PADDQ')
  599. PADDD = _binaryop('PADDD')
  600. PHADDD = _binaryop('PHADDD')
  601. PADDW = _binaryop('PADDW')
  602. PADDB = _binaryop('PADDB')
  603. PSUBQ = _binaryop('PSUBQ')
  604. PSUBD = _binaryop('PSUBD')
  605. PSUBW = _binaryop('PSUBW')
  606. PSUBB = _binaryop('PSUBB')
  607. PMULDQ = _binaryop('PMULDQ')
  608. PMULLD = _binaryop('PMULLD')
  609. PMULLW = _binaryop('PMULLW')
  610. PAND = _binaryop('PAND')
  611. POR = _binaryop('POR')
  612. PXOR = _binaryop('PXOR')
  613. PSRLDQ = _binaryop('PSRLDQ')
  614. MOVDQ = _binaryop('MOVDQ')
  615. MOVD32 = _binaryop('MOVD32')
  616. MOVUPS = _binaryop('MOVUPS')
  617. MOVDDUP = _binaryop('MOVDDUP')
  618. UNPCKHPD = _binaryop('UNPCKHPD')
  619. UNPCKLPD = _binaryop('UNPCKLPD')
  620. UNPCKHPS = _binaryop('UNPCKHPS')
  621. UNPCKLPS = _binaryop('UNPCKLPS')
  622. PUNPCKLQDQ = _binaryop('PUNPCKLQDQ')
  623. PUNPCKHQDQ = _binaryop('PUNPCKHQDQ')
  624. PUNPCKLDQ = _binaryop('PUNPCKLDQ')
  625. PUNPCKHDQ = _binaryop('PUNPCKHDQ')
  626. PSHUFB = _binaryop('PSHUFB')
  627. HADDPD = _binaryop('HADDPD')
  628. HADDPS = _binaryop('HADDPS')
  629. CALL = _relative_unaryop('CALL')
  630. JMP = _relative_unaryop('JMP')
  631. def imm(x):
  632. # XXX: ri386 migration shim
  633. if isinstance(x, ConstInt):
  634. return ImmedLoc(x.getint())
  635. else:
  636. return ImmedLoc(x)
  637. imm0 = imm(0)
  638. imm1 = imm(1)
  639. all_extra_instructions = [name for name in LocationCodeBuilder.__dict__
  640. if name[0].isupper()]
  641. all_extra_instructions.sort()