PageRenderTime 72ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 1ms

/pypy/jit/backend/x86/assembler.py

https://bitbucket.org/glavoie/pypy
Python | 2564 lines | 1956 code | 227 blank | 381 comment | 403 complexity | eec55b0a0cc0a1a9af373b7cfa665558 MD5 | raw file
  1. import sys, os
  2. from pypy.jit.backend.llsupport import symbolic
  3. from pypy.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
  4. from pypy.jit.metainterp.history import Const, Box, BoxInt, ConstInt
  5. from pypy.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
  6. from pypy.jit.metainterp.history import JitCellToken
  7. from pypy.rpython.lltypesystem import lltype, rffi, rstr, llmemory
  8. from pypy.rpython.lltypesystem.lloperation import llop
  9. from pypy.rpython.annlowlevel import llhelper
  10. from pypy.rlib.jit import AsmInfo
  11. from pypy.jit.backend.model import CompiledLoopToken
  12. from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs, _get_scale,
  13. gpr_reg_mgr_cls, _valid_addressing_size)
  14. from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
  15. IS_X86_32, IS_X86_64)
  16. from pypy.jit.backend.x86.regloc import (eax, ecx, edx, ebx,
  17. esp, ebp, esi, edi,
  18. xmm0, xmm1, xmm2, xmm3,
  19. xmm4, xmm5, xmm6, xmm7,
  20. r8, r9, r10, r11,
  21. r12, r13, r14, r15,
  22. X86_64_SCRATCH_REG,
  23. X86_64_XMM_SCRATCH_REG,
  24. RegLoc, StackLoc, ConstFloatLoc,
  25. ImmedLoc, AddressLoc, imm,
  26. imm0, imm1, FloatImmedLoc)
  27. from pypy.rlib.objectmodel import we_are_translated, specialize
  28. from pypy.jit.backend.x86 import rx86, regloc, codebuf
  29. from pypy.jit.metainterp.resoperation import rop, ResOperation
  30. from pypy.jit.backend.x86.support import values_array
  31. from pypy.jit.backend.x86 import support
  32. from pypy.rlib.debug import (debug_print, debug_start, debug_stop,
  33. have_debug_prints)
  34. from pypy.rlib import rgc
  35. from pypy.rlib.clibffi import FFI_DEFAULT_ABI
  36. from pypy.jit.backend.x86.jump import remap_frame_layout
  37. from pypy.jit.codewriter.effectinfo import EffectInfo
  38. from pypy.jit.codewriter import longlong
  39. from pypy.rlib.rarithmetic import intmask
  40. from pypy.rlib.objectmodel import compute_unique_id
  41. # darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 4.5.0,
  42. # better safe than sorry
  43. CALL_ALIGN = 16 // WORD
  44. def align_stack_words(words):
  45. return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
  46. class GuardToken(object):
  47. def __init__(self, faildescr, failargs, fail_locs, exc,
  48. is_guard_not_invalidated):
  49. self.faildescr = faildescr
  50. self.failargs = failargs
  51. self.fail_locs = fail_locs
  52. self.exc = exc
  53. self.is_guard_not_invalidated = is_guard_not_invalidated
  54. DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
  55. ('type', lltype.Char), # 'b'ridge, 'l'abel or
  56. # 'e'ntry point
  57. ('number', lltype.Signed))
  58. class Assembler386(object):
  59. _regalloc = None
  60. _output_loop_log = None
  61. def __init__(self, cpu, translate_support_code=False,
  62. failargs_limit=1000):
  63. self.cpu = cpu
  64. self.verbose = False
  65. self.rtyper = cpu.rtyper
  66. self.fail_boxes_int = values_array(lltype.Signed, failargs_limit)
  67. self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit)
  68. self.fail_boxes_float = values_array(longlong.FLOATSTORAGE,
  69. failargs_limit)
  70. self.fail_ebp = 0
  71. self.loop_run_counters = []
  72. self.float_const_neg_addr = 0
  73. self.float_const_abs_addr = 0
  74. self.malloc_slowpath1 = 0
  75. self.malloc_slowpath2 = 0
  76. self.memcpy_addr = 0
  77. self.setup_failure_recovery()
  78. self._debug = False
  79. self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
  80. self.fail_boxes_count = 0
  81. self._current_depths_cache = (0, 0)
  82. self.datablockwrapper = None
  83. self.stack_check_slowpath = 0
  84. self.propagate_exception_path = 0
  85. self.gcrootmap_retaddr_forced = 0
  86. self.teardown()
  87. def leave_jitted_hook(self):
  88. ptrs = self.fail_boxes_ptr.ar
  89. llop.gc_assume_young_pointers(lltype.Void,
  90. llmemory.cast_ptr_to_adr(ptrs))
  91. def set_debug(self, v):
  92. self._debug = v
  93. def setup_once(self):
  94. # the address of the function called by 'new'
  95. gc_ll_descr = self.cpu.gc_ll_descr
  96. gc_ll_descr.initialize()
  97. self.memcpy_addr = self.cpu.cast_ptr_to_int(support.memcpy_fn)
  98. self._build_failure_recovery(False)
  99. self._build_failure_recovery(True)
  100. if self.cpu.supports_floats:
  101. self._build_failure_recovery(False, withfloats=True)
  102. self._build_failure_recovery(True, withfloats=True)
  103. support.ensure_sse2_floats()
  104. self._build_float_constants()
  105. self._build_propagate_exception_path()
  106. if gc_ll_descr.get_malloc_slowpath_addr is not None:
  107. self._build_malloc_slowpath()
  108. self._build_stack_check_slowpath()
  109. if gc_ll_descr.gcrootmap:
  110. self._build_release_gil(gc_ll_descr.gcrootmap)
  111. debug_start('jit-backend-counts')
  112. self.set_debug(have_debug_prints())
  113. debug_stop('jit-backend-counts')
  114. def setup(self, looptoken):
  115. assert self.memcpy_addr != 0, "setup_once() not called?"
  116. self.current_clt = looptoken.compiled_loop_token
  117. self.pending_guard_tokens = []
  118. if WORD == 8:
  119. self.pending_memoryerror_trampoline_from = []
  120. self.error_trampoline_64 = 0
  121. self.mc = codebuf.MachineCodeBlockWrapper()
  122. #assert self.datablockwrapper is None --- but obscure case
  123. # possible, e.g. getting MemoryError and continuing
  124. allblocks = self.get_asmmemmgr_blocks(looptoken)
  125. self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
  126. allblocks)
  127. self.target_tokens_currently_compiling = {}
  128. def teardown(self):
  129. self.pending_guard_tokens = None
  130. if WORD == 8:
  131. self.pending_memoryerror_trampoline_from = None
  132. self.mc = None
  133. self.current_clt = None
  134. def finish_once(self):
  135. if self._debug:
  136. debug_start('jit-backend-counts')
  137. for i in range(len(self.loop_run_counters)):
  138. struct = self.loop_run_counters[i]
  139. if struct.type == 'l':
  140. prefix = 'TargetToken(%d)' % struct.number
  141. elif struct.type == 'b':
  142. prefix = 'bridge ' + str(struct.number)
  143. else:
  144. prefix = 'entry ' + str(struct.number)
  145. debug_print(prefix + ':' + str(struct.i))
  146. debug_stop('jit-backend-counts')
  147. def _build_float_constants(self):
  148. datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, [])
  149. float_constants = datablockwrapper.malloc_aligned(32, alignment=16)
  150. datablockwrapper.done()
  151. addr = rffi.cast(rffi.CArrayPtr(lltype.Char), float_constants)
  152. qword_padding = '\x00\x00\x00\x00\x00\x00\x00\x00'
  153. # 0x8000000000000000
  154. neg_const = '\x00\x00\x00\x00\x00\x00\x00\x80'
  155. # 0x7FFFFFFFFFFFFFFF
  156. abs_const = '\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F'
  157. data = neg_const + qword_padding + abs_const + qword_padding
  158. for i in range(len(data)):
  159. addr[i] = data[i]
  160. self.float_const_neg_addr = float_constants
  161. self.float_const_abs_addr = float_constants + 16
  162. def _build_malloc_slowpath(self):
  163. # With asmgcc, we need two helpers, so that we can write two CALL
  164. # instructions in assembler, with a mark_gc_roots in between.
  165. # With shadowstack, this is not needed, so we produce a single helper.
  166. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  167. shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
  168. #
  169. # ---------- first helper for the slow path of malloc ----------
  170. mc = codebuf.MachineCodeBlockWrapper()
  171. if self.cpu.supports_floats: # save the XMM registers in
  172. for i in range(self.cpu.NUM_REGS):# the *caller* frame, from esp+8
  173. mc.MOVSD_sx((WORD*2)+8*i, i)
  174. mc.SUB_rr(edx.value, eax.value) # compute the size we want
  175. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
  176. #
  177. # The registers to save in the copy area: with shadowstack, most
  178. # registers need to be saved. With asmgcc, the callee-saved registers
  179. # don't need to.
  180. save_in_copy_area = gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items()
  181. if not shadow_stack:
  182. save_in_copy_area = [(reg, ofs) for (reg, ofs) in save_in_copy_area
  183. if reg not in gpr_reg_mgr_cls.REGLOC_TO_GCROOTMAP_REG_INDEX]
  184. #
  185. for reg, ofs in save_in_copy_area:
  186. mc.MOV_br(ofs, reg.value)
  187. #
  188. if shadow_stack:
  189. # ---- shadowstack ----
  190. mc.SUB_ri(esp.value, 16 - WORD) # stack alignment of 16 bytes
  191. if IS_X86_32:
  192. mc.MOV_sr(0, edx.value) # push argument
  193. elif IS_X86_64:
  194. mc.MOV_rr(edi.value, edx.value)
  195. mc.CALL(imm(addr))
  196. mc.ADD_ri(esp.value, 16 - WORD)
  197. else:
  198. # ---- asmgcc ----
  199. if IS_X86_32:
  200. mc.MOV_sr(WORD, edx.value) # save it as the new argument
  201. elif IS_X86_64:
  202. # rdi can be clobbered: its content was saved in the
  203. # copy area of the stack
  204. mc.MOV_rr(edi.value, edx.value)
  205. mc.JMP(imm(addr)) # tail call to the real malloc
  206. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  207. self.malloc_slowpath1 = rawstart
  208. # ---------- second helper for the slow path of malloc ----------
  209. mc = codebuf.MachineCodeBlockWrapper()
  210. #
  211. for reg, ofs in save_in_copy_area:
  212. mc.MOV_rb(reg.value, ofs)
  213. assert reg is not eax and reg is not edx
  214. #
  215. if self.cpu.supports_floats: # restore the XMM registers
  216. for i in range(self.cpu.NUM_REGS):# from where they were saved
  217. mc.MOVSD_xs(i, (WORD*2)+8*i)
  218. #
  219. # Note: we check this after the code above, just because the code
  220. # above is more than 127 bytes on 64-bits...
  221. mc.TEST_rr(eax.value, eax.value)
  222. mc.J_il8(rx86.Conditions['Z'], 0) # patched later
  223. jz_location = mc.get_relative_pos()
  224. #
  225. nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
  226. mc.MOV(edx, heap(nursery_free_adr)) # load this in EDX
  227. mc.RET()
  228. #
  229. # If the slowpath malloc failed, we raise a MemoryError that
  230. # always interrupts the current loop, as a "good enough"
  231. # approximation. Also note that we didn't RET from this helper;
  232. # but the code we jump to will actually restore the stack
  233. # position based on EBP, which will get us out of here for free.
  234. offset = mc.get_relative_pos() - jz_location
  235. assert 0 < offset <= 127
  236. mc.overwrite(jz_location-1, chr(offset))
  237. mc.JMP(imm(self.propagate_exception_path))
  238. #
  239. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  240. self.malloc_slowpath2 = rawstart
  241. def _build_propagate_exception_path(self):
  242. if self.cpu.propagate_exception_v < 0:
  243. return # not supported (for tests, or non-translated)
  244. #
  245. self.mc = codebuf.MachineCodeBlockWrapper()
  246. # call on_leave_jitted_save_exc()
  247. addr = self.cpu.get_on_leave_jitted_int(save_exception=True,
  248. default_to_memoryerror=True)
  249. self.mc.CALL(imm(addr))
  250. self.mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
  251. self._call_footer()
  252. rawstart = self.mc.materialize(self.cpu.asmmemmgr, [])
  253. self.propagate_exception_path = rawstart
  254. self.mc = None
  255. def _build_stack_check_slowpath(self):
  256. _, _, slowpathaddr = self.cpu.insert_stack_check()
  257. if slowpathaddr == 0 or self.cpu.propagate_exception_v < 0:
  258. return # no stack check (for tests, or non-translated)
  259. #
  260. # make a "function" that is called immediately at the start of
  261. # an assembler function. In particular, the stack looks like:
  262. #
  263. # | ... | <-- aligned to a multiple of 16
  264. # | retaddr of caller |
  265. # | my own retaddr | <-- esp
  266. # +---------------------+
  267. #
  268. mc = codebuf.MachineCodeBlockWrapper()
  269. #
  270. stack_size = WORD
  271. if IS_X86_64:
  272. # on the x86_64, we have to save all the registers that may
  273. # have been used to pass arguments
  274. stack_size += 6*WORD + 8*8
  275. for reg in [edi, esi, edx, ecx, r8, r9]:
  276. mc.PUSH_r(reg.value)
  277. mc.SUB_ri(esp.value, 8*8)
  278. for i in range(8):
  279. mc.MOVSD_sx(8*i, i) # xmm0 to xmm7
  280. #
  281. if IS_X86_32:
  282. stack_size += 2*WORD
  283. mc.PUSH_r(eax.value) # alignment
  284. mc.PUSH_r(esp.value)
  285. elif IS_X86_64:
  286. mc.MOV_rr(edi.value, esp.value)
  287. #
  288. # esp is now aligned to a multiple of 16 again
  289. mc.CALL(imm(slowpathaddr))
  290. #
  291. mc.MOV(eax, heap(self.cpu.pos_exception()))
  292. mc.TEST_rr(eax.value, eax.value)
  293. mc.J_il8(rx86.Conditions['NZ'], 0)
  294. jnz_location = mc.get_relative_pos()
  295. #
  296. if IS_X86_32:
  297. mc.ADD_ri(esp.value, 2*WORD) # cancel the two PUSHes above
  298. elif IS_X86_64:
  299. # restore the registers
  300. for i in range(7, -1, -1):
  301. mc.MOVSD_xs(i, 8*i)
  302. mc.ADD_ri(esp.value, 8*8)
  303. for reg in [r9, r8, ecx, edx, esi, edi]:
  304. mc.POP_r(reg.value)
  305. #
  306. mc.RET()
  307. #
  308. # patch the JNZ above
  309. offset = mc.get_relative_pos() - jnz_location
  310. assert 0 < offset <= 127
  311. mc.overwrite(jnz_location-1, chr(offset))
  312. # call on_leave_jitted_save_exc()
  313. addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
  314. mc.CALL(imm(addr))
  315. #
  316. mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
  317. #
  318. # footer -- note the ADD, which skips the return address of this
  319. # function, and will instead return to the caller's caller. Note
  320. # also that we completely ignore the saved arguments, because we
  321. # are interrupting the function.
  322. mc.ADD_ri(esp.value, stack_size)
  323. mc.RET()
  324. #
  325. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  326. self.stack_check_slowpath = rawstart
  327. @staticmethod
  328. @rgc.no_collect
  329. def _release_gil_asmgcc(css):
  330. # similar to trackgcroot.py:pypy_asm_stackwalk, first part
  331. from pypy.rpython.memory.gctransform import asmgcroot
  332. new = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
  333. next = asmgcroot.gcrootanchor.next
  334. new.next = next
  335. new.prev = asmgcroot.gcrootanchor
  336. asmgcroot.gcrootanchor.next = new
  337. next.prev = new
  338. # and now release the GIL
  339. before = rffi.aroundstate.before
  340. if before:
  341. before()
  342. @staticmethod
  343. @rgc.no_collect
  344. def _reacquire_gil_asmgcc(css):
  345. # first reacquire the GIL
  346. after = rffi.aroundstate.after
  347. if after:
  348. after()
  349. # similar to trackgcroot.py:pypy_asm_stackwalk, second part
  350. from pypy.rpython.memory.gctransform import asmgcroot
  351. old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
  352. prev = old.prev
  353. next = old.next
  354. prev.next = next
  355. next.prev = prev
  356. @staticmethod
  357. @rgc.no_collect
  358. def _release_gil_shadowstack():
  359. before = rffi.aroundstate.before
  360. if before:
  361. before()
  362. @staticmethod
  363. @rgc.no_collect
  364. def _reacquire_gil_shadowstack():
  365. after = rffi.aroundstate.after
  366. if after:
  367. after()
  368. _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
  369. _CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
  370. lltype.Void))
  371. def _build_release_gil(self, gcrootmap):
  372. if gcrootmap.is_shadow_stack:
  373. releasegil_func = llhelper(self._NOARG_FUNC,
  374. self._release_gil_shadowstack)
  375. reacqgil_func = llhelper(self._NOARG_FUNC,
  376. self._reacquire_gil_shadowstack)
  377. else:
  378. releasegil_func = llhelper(self._CLOSESTACK_FUNC,
  379. self._release_gil_asmgcc)
  380. reacqgil_func = llhelper(self._CLOSESTACK_FUNC,
  381. self._reacquire_gil_asmgcc)
  382. self.releasegil_addr = self.cpu.cast_ptr_to_int(releasegil_func)
  383. self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
  384. def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
  385. '''adds the following attributes to looptoken:
  386. _x86_function_addr (address of the generated func, as an int)
  387. _x86_loop_code (debug: addr of the start of the ResOps)
  388. _x86_fullsize (debug: full size including failure)
  389. _x86_debug_checksum
  390. '''
  391. # XXX this function is too longish and contains some code
  392. # duplication with assemble_bridge(). Also, we should think
  393. # about not storing on 'self' attributes that will live only
  394. # for the duration of compiling one loop or a one bridge.
  395. clt = CompiledLoopToken(self.cpu, looptoken.number)
  396. clt.allgcrefs = []
  397. looptoken.compiled_loop_token = clt
  398. if not we_are_translated():
  399. # Arguments should be unique
  400. assert len(set(inputargs)) == len(inputargs)
  401. self.setup(looptoken)
  402. if log:
  403. operations = self._inject_debugging_code(looptoken, operations,
  404. 'e', looptoken.number)
  405. regalloc = RegAlloc(self, self.cpu.translate_support_code)
  406. #
  407. self._call_header_with_stack_check()
  408. stackadjustpos = self._patchable_stackadjust()
  409. clt._debug_nbargs = len(inputargs)
  410. operations = regalloc.prepare_loop(inputargs, operations,
  411. looptoken, clt.allgcrefs)
  412. looppos = self.mc.get_relative_pos()
  413. looptoken._x86_loop_code = looppos
  414. clt.frame_depth = -1 # temporarily
  415. clt.param_depth = -1 # temporarily
  416. frame_depth, param_depth = self._assemble(regalloc, operations)
  417. clt.frame_depth = frame_depth
  418. clt.param_depth = param_depth
  419. #
  420. size_excluding_failure_stuff = self.mc.get_relative_pos()
  421. self.write_pending_failure_recoveries()
  422. full_size = self.mc.get_relative_pos()
  423. #
  424. rawstart = self.materialize_loop(looptoken)
  425. debug_start("jit-backend-addr")
  426. debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
  427. looptoken.number, loopname,
  428. rawstart + looppos,
  429. rawstart + size_excluding_failure_stuff,
  430. rawstart))
  431. debug_stop("jit-backend-addr")
  432. self._patch_stackadjust(rawstart + stackadjustpos,
  433. frame_depth + param_depth)
  434. self.patch_pending_failure_recoveries(rawstart)
  435. #
  436. ops_offset = self.mc.ops_offset
  437. if not we_are_translated():
  438. # used only by looptoken.dump() -- useful in tests
  439. looptoken._x86_rawstart = rawstart
  440. looptoken._x86_fullsize = full_size
  441. looptoken._x86_ops_offset = ops_offset
  442. looptoken._x86_function_addr = rawstart
  443. self.fixup_target_tokens(rawstart)
  444. self.teardown()
  445. # oprofile support
  446. if self.cpu.profile_agent is not None:
  447. name = "Loop # %s: %s" % (looptoken.number, loopname)
  448. self.cpu.profile_agent.native_code_written(name,
  449. rawstart, full_size)
  450. return AsmInfo(ops_offset, rawstart + looppos,
  451. size_excluding_failure_stuff - looppos)
  452. def assemble_bridge(self, faildescr, inputargs, operations,
  453. original_loop_token, log):
  454. if not we_are_translated():
  455. # Arguments should be unique
  456. assert len(set(inputargs)) == len(inputargs)
  457. descr_number = self.cpu.get_fail_descr_number(faildescr)
  458. failure_recovery = self._find_failure_recovery_bytecode(faildescr)
  459. self.setup(original_loop_token)
  460. if log:
  461. operations = self._inject_debugging_code(faildescr, operations,
  462. 'b', descr_number)
  463. arglocs = self.rebuild_faillocs_from_descr(failure_recovery)
  464. if not we_are_translated():
  465. assert ([loc.assembler() for loc in arglocs] ==
  466. [loc.assembler() for loc in faildescr._x86_debug_faillocs])
  467. regalloc = RegAlloc(self, self.cpu.translate_support_code)
  468. fail_depths = faildescr._x86_current_depths
  469. startpos = self.mc.get_relative_pos()
  470. operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
  471. operations,
  472. self.current_clt.allgcrefs)
  473. stackadjustpos = self._patchable_stackadjust()
  474. frame_depth, param_depth = self._assemble(regalloc, operations)
  475. codeendpos = self.mc.get_relative_pos()
  476. self.write_pending_failure_recoveries()
  477. fullsize = self.mc.get_relative_pos()
  478. #
  479. rawstart = self.materialize_loop(original_loop_token)
  480. debug_start("jit-backend-addr")
  481. debug_print("bridge out of Guard %d has address %x to %x" %
  482. (descr_number, rawstart, rawstart + codeendpos))
  483. debug_stop("jit-backend-addr")
  484. self._patch_stackadjust(rawstart + stackadjustpos,
  485. frame_depth + param_depth)
  486. self.patch_pending_failure_recoveries(rawstart)
  487. if not we_are_translated():
  488. # for the benefit of tests
  489. faildescr._x86_bridge_frame_depth = frame_depth
  490. faildescr._x86_bridge_param_depth = param_depth
  491. # patch the jump from original guard
  492. self.patch_jump_for_descr(faildescr, rawstart)
  493. ops_offset = self.mc.ops_offset
  494. self.fixup_target_tokens(rawstart)
  495. self.current_clt.frame_depth = max(self.current_clt.frame_depth, frame_depth)
  496. self.current_clt.param_depth = max(self.current_clt.param_depth, param_depth)
  497. self.teardown()
  498. # oprofile support
  499. if self.cpu.profile_agent is not None:
  500. name = "Bridge # %s" % (descr_number,)
  501. self.cpu.profile_agent.native_code_written(name,
  502. rawstart, fullsize)
  503. return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
  504. def write_pending_failure_recoveries(self):
  505. # for each pending guard, generate the code of the recovery stub
  506. # at the end of self.mc.
  507. for tok in self.pending_guard_tokens:
  508. tok.pos_recovery_stub = self.generate_quick_failure(tok)
  509. if WORD == 8 and len(self.pending_memoryerror_trampoline_from) > 0:
  510. self.error_trampoline_64 = self.generate_propagate_error_64()
  511. def patch_pending_failure_recoveries(self, rawstart):
  512. # after we wrote the assembler to raw memory, set up
  513. # tok.faildescr._x86_adr_jump_offset to contain the raw address of
  514. # the 4-byte target field in the JMP/Jcond instruction, and patch
  515. # the field in question to point (initially) to the recovery stub
  516. clt = self.current_clt
  517. for tok in self.pending_guard_tokens:
  518. addr = rawstart + tok.pos_jump_offset
  519. tok.faildescr._x86_adr_jump_offset = addr
  520. relative_target = tok.pos_recovery_stub - (tok.pos_jump_offset + 4)
  521. assert rx86.fits_in_32bits(relative_target)
  522. #
  523. if not tok.is_guard_not_invalidated:
  524. mc = codebuf.MachineCodeBlockWrapper()
  525. mc.writeimm32(relative_target)
  526. mc.copy_to_raw_memory(addr)
  527. else:
  528. # GUARD_NOT_INVALIDATED, record an entry in
  529. # clt.invalidate_positions of the form:
  530. # (addr-in-the-code-of-the-not-yet-written-jump-target,
  531. # relative-target-to-use)
  532. relpos = tok.pos_jump_offset
  533. clt.invalidate_positions.append((rawstart + relpos,
  534. relative_target))
  535. # General idea: Although no code was generated by this
  536. # guard, the code might be patched with a "JMP rel32" to
  537. # the guard recovery code. This recovery code is
  538. # already generated, and looks like the recovery code
  539. # for any guard, even if at first it has no jump to it.
  540. # So we may later write 5 bytes overriding the existing
  541. # instructions; this works because a CALL instruction
  542. # would also take at least 5 bytes. If it could take
  543. # less, we would run into the issue that overwriting the
  544. # 5 bytes here might get a few nonsense bytes at the
  545. # return address of the following CALL.
  546. if WORD == 8:
  547. for pos_after_jz in self.pending_memoryerror_trampoline_from:
  548. assert self.error_trampoline_64 != 0 # only if non-empty
  549. mc = codebuf.MachineCodeBlockWrapper()
  550. mc.writeimm32(self.error_trampoline_64 - pos_after_jz)
  551. mc.copy_to_raw_memory(rawstart + pos_after_jz - 4)
  552. def get_asmmemmgr_blocks(self, looptoken):
  553. clt = looptoken.compiled_loop_token
  554. if clt.asmmemmgr_blocks is None:
  555. clt.asmmemmgr_blocks = []
  556. return clt.asmmemmgr_blocks
  557. def materialize_loop(self, looptoken):
  558. self.datablockwrapper.done() # finish using cpu.asmmemmgr
  559. self.datablockwrapper = None
  560. allblocks = self.get_asmmemmgr_blocks(looptoken)
  561. return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
  562. self.cpu.gc_ll_descr.gcrootmap)
  563. def _register_counter(self, tp, number, token):
  564. # YYY very minor leak -- we need the counters to stay alive
  565. # forever, just because we want to report them at the end
  566. # of the process
  567. struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
  568. track_allocation=False)
  569. struct.i = 0
  570. struct.type = tp
  571. if tp == 'b' or tp == 'e':
  572. struct.number = number
  573. else:
  574. assert token
  575. struct.number = compute_unique_id(token)
  576. self.loop_run_counters.append(struct)
  577. return struct
  578. def _find_failure_recovery_bytecode(self, faildescr):
  579. adr_jump_offset = faildescr._x86_adr_jump_offset
  580. if adr_jump_offset == 0:
  581. # This case should be prevented by the logic in compile.py:
  582. # look for CNT_BUSY_FLAG, which disables tracing from a guard
  583. # when another tracing from the same guard is already in progress.
  584. raise BridgeAlreadyCompiled
  585. # follow the JMP/Jcond
  586. p = rffi.cast(rffi.INTP, adr_jump_offset)
  587. adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
  588. # skip the CALL
  589. if WORD == 4:
  590. adr_target += 5 # CALL imm
  591. else:
  592. adr_target += 13 # MOV r11, imm-as-8-bytes; CALL *r11 xxxxxxxxxx
  593. return adr_target
  594. def patch_jump_for_descr(self, faildescr, adr_new_target):
  595. adr_jump_offset = faildescr._x86_adr_jump_offset
  596. assert adr_jump_offset != 0
  597. offset = adr_new_target - (adr_jump_offset + 4)
  598. # If the new target fits within a rel32 of the jump, just patch
  599. # that. Otherwise, leave the original rel32 to the recovery stub in
  600. # place, but clobber the recovery stub with a jump to the real
  601. # target.
  602. mc = codebuf.MachineCodeBlockWrapper()
  603. if rx86.fits_in_32bits(offset):
  604. mc.writeimm32(offset)
  605. mc.copy_to_raw_memory(adr_jump_offset)
  606. else:
  607. # "mov r11, addr; jmp r11" is up to 13 bytes, which fits in there
  608. # because we always write "mov r11, imm-as-8-bytes; call *r11" in
  609. # the first place.
  610. mc.MOV_ri(X86_64_SCRATCH_REG.value, adr_new_target)
  611. mc.JMP_r(X86_64_SCRATCH_REG.value)
  612. p = rffi.cast(rffi.INTP, adr_jump_offset)
  613. adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
  614. mc.copy_to_raw_memory(adr_target)
  615. faildescr._x86_adr_jump_offset = 0 # means "patched"
  616. def fixup_target_tokens(self, rawstart):
  617. for targettoken in self.target_tokens_currently_compiling:
  618. targettoken._x86_loop_code += rawstart
  619. self.target_tokens_currently_compiling = None
  620. def _append_debugging_code(self, operations, tp, number, token):
  621. counter = self._register_counter(tp, number, token)
  622. c_adr = ConstInt(rffi.cast(lltype.Signed, counter))
  623. box = BoxInt()
  624. box2 = BoxInt()
  625. ops = [ResOperation(rop.GETFIELD_RAW, [c_adr],
  626. box, descr=self.debug_counter_descr),
  627. ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
  628. ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
  629. None, descr=self.debug_counter_descr)]
  630. operations.extend(ops)
  631. @specialize.argtype(1)
  632. def _inject_debugging_code(self, looptoken, operations, tp, number):
  633. if self._debug:
  634. # before doing anything, let's increase a counter
  635. s = 0
  636. for op in operations:
  637. s += op.getopnum()
  638. looptoken._x86_debug_checksum = s
  639. newoperations = []
  640. self._append_debugging_code(newoperations, tp, number,
  641. None)
  642. for op in operations:
  643. newoperations.append(op)
  644. if op.getopnum() == rop.LABEL:
  645. self._append_debugging_code(newoperations, 'l', number,
  646. op.getdescr())
  647. operations = newoperations
  648. return operations
  649. def _assemble(self, regalloc, operations):
  650. self._regalloc = regalloc
  651. regalloc.compute_hint_frame_locations(operations)
  652. regalloc.walk_operations(operations)
  653. if we_are_translated() or self.cpu.dont_keepalive_stuff:
  654. self._regalloc = None # else keep it around for debugging
  655. frame_depth = regalloc.fm.get_frame_depth()
  656. param_depth = regalloc.param_depth
  657. jump_target_descr = regalloc.jump_target_descr
  658. if jump_target_descr is not None:
  659. target_frame_depth = jump_target_descr._x86_clt.frame_depth
  660. target_param_depth = jump_target_descr._x86_clt.param_depth
  661. frame_depth = max(frame_depth, target_frame_depth)
  662. param_depth = max(param_depth, target_param_depth)
  663. return frame_depth, param_depth
  664. def _patchable_stackadjust(self):
  665. # stack adjustment LEA
  666. self.mc.LEA32_rb(esp.value, 0)
  667. return self.mc.get_relative_pos() - 4
  668. def _patch_stackadjust(self, adr_lea, allocated_depth):
  669. # patch stack adjustment LEA
  670. mc = codebuf.MachineCodeBlockWrapper()
  671. # Compute the correct offset for the instruction LEA ESP, [EBP-4*words]
  672. mc.writeimm32(self._get_offset_of_ebp_from_esp(allocated_depth))
  673. mc.copy_to_raw_memory(adr_lea)
  674. def _get_offset_of_ebp_from_esp(self, allocated_depth):
  675. # Given that [EBP] is where we saved EBP, i.e. in the last word
  676. # of our fixed frame, then the 'words' value is:
  677. words = (FRAME_FIXED_SIZE - 1) + allocated_depth
  678. # align, e.g. for Mac OS X
  679. aligned_words = align_stack_words(words+2)-2 # 2 = EIP+EBP
  680. return -WORD * aligned_words
  681. def _call_header(self):
  682. # NB. the shape of the frame is hard-coded in get_basic_shape() too.
  683. # Also, make sure this is consistent with FRAME_FIXED_SIZE.
  684. self.mc.PUSH_r(ebp.value)
  685. self.mc.MOV_rr(ebp.value, esp.value)
  686. for loc in self.cpu.CALLEE_SAVE_REGISTERS:
  687. self.mc.PUSH_r(loc.value)
  688. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  689. if gcrootmap and gcrootmap.is_shadow_stack:
  690. self._call_header_shadowstack(gcrootmap)
  691. def _call_header_with_stack_check(self):
  692. if self.stack_check_slowpath == 0:
  693. pass # no stack check (e.g. not translated)
  694. else:
  695. endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
  696. self.mc.MOV(eax, heap(endaddr)) # MOV eax, [start]
  697. self.mc.SUB(eax, esp) # SUB eax, current
  698. self.mc.CMP(eax, heap(lengthaddr)) # CMP eax, [length]
  699. self.mc.J_il8(rx86.Conditions['BE'], 0) # JBE .skip
  700. jb_location = self.mc.get_relative_pos()
  701. self.mc.CALL(imm(self.stack_check_slowpath))# CALL slowpath
  702. # patch the JB above # .skip:
  703. offset = self.mc.get_relative_pos() - jb_location
  704. assert 0 < offset <= 127
  705. self.mc.overwrite(jb_location-1, chr(offset))
  706. #
  707. self._call_header()
  708. def _call_footer(self):
  709. self.mc.LEA_rb(esp.value, -len(self.cpu.CALLEE_SAVE_REGISTERS) * WORD)
  710. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  711. if gcrootmap and gcrootmap.is_shadow_stack:
  712. self._call_footer_shadowstack(gcrootmap)
  713. for i in range(len(self.cpu.CALLEE_SAVE_REGISTERS)-1, -1, -1):
  714. self.mc.POP_r(self.cpu.CALLEE_SAVE_REGISTERS[i].value)
  715. self.mc.POP_r(ebp.value)
  716. self.mc.RET()
  717. def _call_header_shadowstack(self, gcrootmap):
  718. # we need to put two words into the shadowstack: the MARKER_FRAME
  719. # and the address of the frame (ebp, actually)
  720. rst = gcrootmap.get_root_stack_top_addr()
  721. if rx86.fits_in_32bits(rst):
  722. self.mc.MOV_rj(eax.value, rst) # MOV eax, [rootstacktop]
  723. else:
  724. self.mc.MOV_ri(r13.value, rst) # MOV r13, rootstacktop
  725. self.mc.MOV_rm(eax.value, (r13.value, 0)) # MOV eax, [r13]
  726. #
  727. MARKER = gcrootmap.MARKER_FRAME
  728. self.mc.LEA_rm(ebx.value, (eax.value, 2*WORD)) # LEA ebx, [eax+2*WORD]
  729. self.mc.MOV_mi((eax.value, WORD), MARKER) # MOV [eax+WORD], MARKER
  730. self.mc.MOV_mr((eax.value, 0), ebp.value) # MOV [eax], ebp
  731. #
  732. if rx86.fits_in_32bits(rst):
  733. self.mc.MOV_jr(rst, ebx.value) # MOV [rootstacktop], ebx
  734. else:
  735. self.mc.MOV_mr((r13.value, 0), ebx.value) # MOV [r13], ebx
  736. def _call_footer_shadowstack(self, gcrootmap):
  737. rst = gcrootmap.get_root_stack_top_addr()
  738. if rx86.fits_in_32bits(rst):
  739. self.mc.SUB_ji8(rst, 2*WORD) # SUB [rootstacktop], 2*WORD
  740. else:
  741. self.mc.MOV_ri(ebx.value, rst) # MOV ebx, rootstacktop
  742. self.mc.SUB_mi8((ebx.value, 0), 2*WORD) # SUB [ebx], 2*WORD
  743. def redirect_call_assembler(self, oldlooptoken, newlooptoken):
  744. # some minimal sanity checking
  745. old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs
  746. new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs
  747. assert old_nbargs == new_nbargs
  748. # we overwrite the instructions at the old _x86_direct_bootstrap_code
  749. # to start with a JMP to the new _x86_direct_bootstrap_code.
  750. # Ideally we should rather patch all existing CALLs, but well.
  751. oldadr = oldlooptoken._x86_function_addr
  752. target = newlooptoken._x86_function_addr
  753. mc = codebuf.MachineCodeBlockWrapper()
  754. mc.JMP(imm(target))
  755. if WORD == 4: # keep in sync with prepare_loop()
  756. assert mc.get_relative_pos() == 5
  757. else:
  758. assert mc.get_relative_pos() <= 13
  759. mc.copy_to_raw_memory(oldadr)
  760. def dump(self, text):
  761. if not self.verbose:
  762. return
  763. _prev = Box._extended_display
  764. try:
  765. Box._extended_display = False
  766. pos = self.mc.get_relative_pos()
  767. print >> sys.stderr, ' 0x%x %s' % (pos, text)
  768. finally:
  769. Box._extended_display = _prev
  770. # ------------------------------------------------------------
  771. def mov(self, from_loc, to_loc):
  772. if (isinstance(from_loc, RegLoc) and from_loc.is_xmm) or (isinstance(to_loc, RegLoc) and to_loc.is_xmm):
  773. self.mc.MOVSD(to_loc, from_loc)
  774. else:
  775. assert to_loc is not ebp
  776. self.mc.MOV(to_loc, from_loc)
  777. regalloc_mov = mov # legacy interface
  778. def regalloc_push(self, loc):
  779. if isinstance(loc, RegLoc) and loc.is_xmm:
  780. self.mc.SUB_ri(esp.value, 8) # = size of doubles
  781. self.mc.MOVSD_sx(0, loc.value)
  782. elif WORD == 4 and isinstance(loc, StackLoc) and loc.get_width() == 8:
  783. # XXX evil trick
  784. self.mc.PUSH_b(get_ebp_ofs(loc.position))
  785. self.mc.PUSH_b(get_ebp_ofs(loc.position + 1))
  786. else:
  787. self.mc.PUSH(loc)
  788. def regalloc_pop(self, loc):
  789. if isinstance(loc, RegLoc) and loc.is_xmm:
  790. self.mc.MOVSD_xs(loc.value, 0)
  791. self.mc.ADD_ri(esp.value, 8) # = size of doubles
  792. elif WORD == 4 and isinstance(loc, StackLoc) and loc.get_width() == 8:
  793. # XXX evil trick
  794. self.mc.POP_b(get_ebp_ofs(loc.position + 1))
  795. self.mc.POP_b(get_ebp_ofs(loc.position))
  796. else:
  797. self.mc.POP(loc)
  798. def regalloc_immedmem2mem(self, from_loc, to_loc):
  799. # move a ConstFloatLoc directly to a StackLoc, as two MOVs
  800. # (even on x86-64, because the immediates are encoded as 32 bits)
  801. assert isinstance(from_loc, ConstFloatLoc)
  802. assert isinstance(to_loc, StackLoc)
  803. low_part = rffi.cast(rffi.CArrayPtr(rffi.INT), from_loc.value)[0]
  804. high_part = rffi.cast(rffi.CArrayPtr(rffi.INT), from_loc.value)[1]
  805. low_part = intmask(low_part)
  806. high_part = intmask(high_part)
  807. self.mc.MOV32_bi(to_loc.value, low_part)
  808. self.mc.MOV32_bi(to_loc.value + 4, high_part)
  809. def regalloc_perform(self, op, arglocs, resloc):
  810. genop_list[op.getopnum()](self, op, arglocs, resloc)
  811. def regalloc_perform_discard(self, op, arglocs):
  812. genop_discard_list[op.getopnum()](self, op, arglocs)
  813. def regalloc_perform_llong(self, op, arglocs, resloc):
  814. effectinfo = op.getdescr().get_extra_info()
  815. oopspecindex = effectinfo.oopspecindex
  816. genop_llong_list[oopspecindex](self, op, arglocs, resloc)
  817. def regalloc_perform_math(self, op, arglocs, resloc):
  818. effectinfo = op.getdescr().get_extra_info()
  819. oopspecindex = effectinfo.oopspecindex
  820. genop_math_list[oopspecindex](self, op, arglocs, resloc)
  821. def regalloc_perform_with_guard(self, op, guard_op, faillocs,
  822. arglocs, resloc, current_depths):
  823. faildescr = guard_op.getdescr()
  824. assert isinstance(faildescr, AbstractFailDescr)
  825. faildescr._x86_current_depths = current_depths
  826. failargs = guard_op.getfailargs()
  827. guard_opnum = guard_op.getopnum()
  828. guard_token = self.implement_guard_recovery(guard_opnum,
  829. faildescr, failargs,
  830. faillocs)
  831. if op is None:
  832. dispatch_opnum = guard_opnum
  833. else:
  834. dispatch_opnum = op.getopnum()
  835. genop_guard_list[dispatch_opnum](self, op, guard_op, guard_token,
  836. arglocs, resloc)
  837. if not we_are_translated():
  838. # must be added by the genop_guard_list[]()
  839. assert guard_token is self.pending_guard_tokens[-1]
  840. def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc,
  841. current_depths):
  842. self.regalloc_perform_with_guard(None, guard_op, faillocs, arglocs,
  843. resloc, current_depths)
  844. def load_effective_addr(self, sizereg, baseofs, scale, result, frm=imm0):
  845. self.mc.LEA(result, addr_add(frm, sizereg, baseofs, scale))
  846. def _unaryop(asmop):
  847. def genop_unary(self, op, arglocs, resloc):
  848. getattr(self.mc, asmop)(arglocs[0])
  849. return genop_unary
  850. def _binaryop(asmop, can_swap=False):
  851. def genop_binary(self, op, arglocs, result_loc):
  852. getattr(self.mc, asmop)(arglocs[0], arglocs[1])
  853. return genop_binary
  854. def _cmpop(cond, rev_cond):
  855. def genop_cmp(self, op, arglocs, result_loc):
  856. rl = result_loc.lowest8bits()
  857. if isinstance(op.getarg(0), Const):
  858. self.mc.CMP(arglocs[1], arglocs[0])
  859. self.mc.SET_ir(rx86.Conditions[rev_cond], rl.value)
  860. else:
  861. self.mc.CMP(arglocs[0], arglocs[1])
  862. self.mc.SET_ir(rx86.Conditions[cond], rl.value)
  863. self.mc.MOVZX8_rr(result_loc.value, rl.value)
  864. return genop_cmp
  865. def _cmpop_float(cond, rev_cond, is_ne=False):
  866. def genop_cmp(self, op, arglocs, result_loc):
  867. if isinstance(arglocs[0], RegLoc):
  868. self.mc.UCOMISD(arglocs[0], arglocs[1])
  869. checkcond = cond
  870. else:
  871. self.mc.UCOMISD(arglocs[1], arglocs[0])
  872. checkcond = rev_cond
  873. tmp1 = result_loc.lowest8bits()
  874. if IS_X86_32:
  875. tmp2 = result_loc.higher8bits()
  876. elif IS_X86_64:
  877. tmp2 = X86_64_SCRATCH_REG.lowest8bits()
  878. self.mc.SET_ir(rx86.Conditions[checkcond], tmp1.value)
  879. if is_ne:
  880. self.mc.SET_ir(rx86.Conditions['P'], tmp2.value)
  881. self.mc.OR8_rr(tmp1.value, tmp2.value)
  882. else:
  883. self.mc.SET_ir(rx86.Conditions['NP'], tmp2.value)
  884. self.mc.AND8_rr(tmp1.value, tmp2.value)
  885. self.mc.MOVZX8_rr(result_loc.value, tmp1.value)
  886. return genop_cmp
  887. def _cmpop_guard(cond, rev_cond, false_cond, false_rev_cond):
  888. def genop_cmp_guard(self, op, guard_op, guard_token, arglocs, result_loc):
  889. guard_opnum = guard_op.getopnum()
  890. if isinstance(op.getarg(0), Const):
  891. self.mc.CMP(arglocs[1], arglocs[0])
  892. if guard_opnum == rop.GUARD_FALSE:
  893. self.implement_guard(guard_token, rev_cond)
  894. else:
  895. self.implement_guard(guard_token, false_rev_cond)
  896. else:
  897. self.mc.CMP(arglocs[0], arglocs[1])
  898. if guard_opnum == rop.GUARD_FALSE:
  899. self.implement_guard(guard_token, cond)
  900. else:
  901. self.implement_guard(guard_token, false_cond)
  902. return genop_cmp_guard
  903. def _cmpop_guard_float(cond, rev_cond, false_cond, false_rev_cond):
  904. need_direct_jp = 'A' not in cond
  905. need_rev_jp = 'A' not in rev_cond
  906. def genop_cmp_guard_float(self, op, guard_op, guard_token, arglocs,
  907. result_loc):
  908. guard_opnum = guard_op.getopnum()
  909. if isinstance(arglocs[0], RegLoc):
  910. self.mc.UCOMISD(arglocs[0], arglocs[1])
  911. checkcond = cond
  912. checkfalsecond = false_cond
  913. need_jp = need_direct_jp
  914. else:
  915. self.mc.UCOMISD(arglocs[1], arglocs[0])
  916. checkcond = rev_cond
  917. checkfalsecond = false_rev_cond
  918. need_jp = need_rev_jp
  919. if guard_opnum == rop.GUARD_FALSE:
  920. if need_jp:
  921. self.mc.J_il8(rx86.Conditions['P'], 6)
  922. self.implement_guard(guard_token, checkcond)
  923. else:
  924. if need_jp:
  925. self.mc.J_il8(rx86.Conditions['P'], 2)
  926. self.mc.J_il8(rx86.Conditions[checkcond], 5)
  927. self.implement_guard(guard_token)
  928. else:
  929. self.implement_guard(guard_token, checkfalsecond)
  930. return genop_cmp_guard_float
  931. def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax,
  932. argtypes=None, callconv=FFI_DEFAULT_ABI):
  933. if IS_X86_64:
  934. return self._emit_call_64(force_index, x, arglocs, start, argtypes)
  935. p = 0
  936. n = len(arglocs)
  937. for i in range(start, n):
  938. loc = arglocs[i]
  939. if isinstance(loc, RegLoc):
  940. if loc.is_xmm:
  941. self.mc.MOVSD_sx(p, loc.value)
  942. else:
  943. self.mc.MOV_sr(p, loc.value)
  944. p += loc.get_width()
  945. p = 0
  946. for i in range(start, n):
  947. loc = arglocs[i]
  948. if not isinstance(loc, RegLoc):
  949. if loc.get_width() == 8:
  950. self.mc.MOVSD(xmm0, loc)
  951. self.mc.MOVSD_sx(p, xmm0.value)
  952. else:
  953. self.mc.MOV(tmp, loc)
  954. self.mc.MOV_sr(p, tmp.value)
  955. p += loc.get_width()
  956. self._regalloc.reserve_param(p//WORD)
  957. # x is a location
  958. self.mc.CALL(x)
  959. self.mark_gc_roots(force_index)
  960. #
  961. if callconv != FFI_DEFAULT_ABI:
  962. self._fix_stdcall(callconv, p)
  963. def _fix_stdcall(self, callconv, p):
  964. from pypy.rlib.clibffi import FFI_STDCALL
  965. assert callconv == FFI_STDCALL
  966. # it's a bit stupid, but we're just going to cancel the fact that
  967. # the called function just added 'p' to ESP, by subtracting it again.
  968. self.mc.SUB_ri(esp.value, p)
  969. def _emit_call_64(self, force_index, x, arglocs, start, argtypes):
  970. src_locs = []
  971. dst_locs = []
  972. xmm_src_locs = []
  973. xmm_dst_locs = []
  974. pass_on_stack = []
  975. singlefloats = None
  976. # In reverse order for use with pop()
  977. unused_gpr = [r9, r8, ecx, edx, esi, edi]
  978. unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
  979. for i in range(start, len(arglocs)):
  980. loc = arglocs[i]
  981. # XXX: Should be much simplier to tell whether a location is a
  982. # float! It's so ugly because we have to "guard" the access to
  983. # .type with isinstance, since not all AssemblerLocation classes
  984. # are "typed"
  985. if ((isinstance(loc, RegLoc) and loc.is_xmm) or
  986. (isinstance(loc, StackLoc) and loc.type == FLOAT) or
  987. (isinstance(loc, ConstFloatLoc))):
  988. if len(unused_xmm) > 0:
  989. xmm_src_locs.append(loc)
  990. xmm_dst_locs.append(unused_xmm.pop())
  991. else:
  992. pass_on_stack.append(loc)
  993. elif (argtypes is not None and argtypes[i-start] == 'S' and
  994. len(unused_xmm) > 0):
  995. # Singlefloat argument
  996. if singlefloats is None: singlefloats = []
  997. singlefloats.append((loc, unused_xmm.pop()))
  998. else:
  999. if len(unused_gpr) > 0:
  1000. src_locs.append(loc)
  1001. dst_locs.append(unused_gpr.pop())
  1002. else:
  1003. pass_on_stack.append(loc)
  1004. # Emit instructions to pass the stack arguments
  1005. # XXX: Would be nice to let remap_frame_layout take care of this, but
  1006. # we'd need to create something like StackLoc, but relative to esp,
  1007. # and I don't know if it's worth it.
  1008. for i in range(len(pass_on_stack)):
  1009. loc = pass_on_stack[i]
  1010. if not isinstance(loc, RegLoc):
  1011. if isinstance(loc, StackLoc) and loc.type == FLOAT:
  1012. self.mc.MOVSD(X86_64_XMM_SCRATCH_REG, loc)
  1013. self.mc.MOVSD_sx(i*WORD, X86_64_XMM_SCRATCH_REG.value)
  1014. else:
  1015. self.mc.MOV(X86_64_SCRATCH_REG, loc)
  1016. self.mc.MOV_sr(i*WORD, X86_64_SCRATCH_REG.value)
  1017. else:
  1018. # It's a register
  1019. if loc.is_xmm:
  1020. self.mc.MOVSD_sx(i*WORD, loc.value)
  1021. else:
  1022. self.mc.MOV_sr(i*WORD, loc.value)
  1023. # Handle register arguments: first remap the xmm arguments
  1024. remap_frame_layout(self, xmm_src_locs, xmm_dst_locs,
  1025. X86_64_XMM_SCRATCH_REG)
  1026. # Load the singlefloat arguments from main regs or stack to xmm regs
  1027. if singlefloats is not None:
  1028. for src, dst in singlefloats:
  1029. self.mc.MOVD(dst, src)
  1030. # Finally remap the arguments in the main regs
  1031. # If x is a register and is in dst_locs, then oups, it needs to
  1032. # be moved away:
  1033. if x in dst_locs:
  1034. src_locs.append(x)
  1035. dst_locs.append(r10)
  1036. x = r10
  1037. remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
  1038. self._regalloc.reserve_param(len(pass_on_stack))
  1039. self.mc.CALL(x)
  1040. self.mark_gc_roots(force_index)
  1041. def call(self, addr, args, res):
  1042. force_index = self.write_new_force_index()
  1043. self._emit_call(force_index, imm(addr), args)
  1044. assert res is eax
  1045. def write_new_force_index(self):
  1046. # for shadowstack only: get a new, unused force_index number and
  1047. # write it to FORCE_INDEX_OFS. Used to record the call shape
  1048. # (i.e. where the GC pointers are in the stack) around a CALL
  1049. # instruction that doesn't already have a force_index.
  1050. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  1051. if gcrootmap and gcrootmap.is_shadow_stack:
  1052. clt = self.current_clt
  1053. force_index = clt.reserve_and_record_some_faildescr_index()
  1054. self.mc.MOV_bi(FORCE_INDEX_OFS, force_index)
  1055. return force_index
  1056. else:
  1057. # the return value is ignored, apart from the fact that it
  1058. # is not negative.
  1059. return 0
  1060. genop_int_neg = _unaryop("NEG")
  1061. genop_int_invert = _unaryop("NOT")
  1062. genop_int_add = _binaryop("ADD", True)
  1063. genop_int_sub = _binaryop("SUB")
  1064. genop_int_mul = _binaryop("IMUL", True)
  1065. genop_int_and = _binaryop("AND", True)
  1066. genop_int_or = _binaryop("OR", True)
  1067. genop_int_xor = _binaryop("XOR", True)
  1068. genop_int_lshift = _binaryop("SHL")
  1069. genop_int_rshift = _binaryop("SAR")
  1070. genop_uint_rshift = _binaryop("SHR")
  1071. genop_float_add = _binaryop("ADDSD", True)
  1072. genop_float_sub = _binaryop('SUBSD')
  1073. genop_float_mul = _binaryop('MULSD', True)
  1074. genop_float_truediv = _binaryop('DIVSD')
  1075. genop_int_lt = _cmpop("L", "G")
  1076. genop_int_le = _cmpop("LE", "GE")
  1077. genop_int_eq = _cmpop("E", "E")
  1078. genop_int_ne = _cmpop("NE", "NE")
  1079. genop_int_gt = _cmpop("G", "L")
  1080. genop_int_ge = _cmpop("GE", "LE")
  1081. genop_ptr_eq = genop_instance_ptr_eq = genop_int_eq
  1082. genop_ptr_ne = genop_instance_ptr_ne = genop_int_ne
  1083. genop_float_lt = _cmpop_float('B', 'A')
  1084. genop_float_le = _cmpop_float('BE', 'AE')
  1085. genop_float_ne = _cmpop_float('NE', 'NE', is_ne=True)
  1086. genop_float_eq = _cmpop_float('E', 'E')
  1087. genop_float_gt = _cmpop_float('A', 'B')
  1088. genop_float_ge = _cmpop_float('AE', 'BE')
  1089. genop_uint_gt = _cmpop("A", "B")
  1090. genop_uint_lt = _cmpop("B", "A")
  1091. genop_uint_le = _cmpop("BE", "AE")
  1092. genop_uint_ge = _cmpop("AE", "BE")
  1093. genop_guard_int_lt = _cmpop_guard("L", "G", "GE", "LE")
  1094. genop_guard_int_le = _cmpop_guard("LE", "GE", "G", "L")
  1095. genop_guard_int_eq = _cmpop_guard("E", "E", "NE", "NE")
  1096. genop_guard_int_ne = _cmpop_guard("NE", "NE", "E", "E")
  1097. genop_guard_int_gt = _cmpop_guard("G", "L", "LE", "GE")
  1098. genop_guard_int_ge = _cmpop_guard("GE", "LE", "L", "G")
  1099. genop_guard_ptr_eq = genop_guard_instance_ptr_eq = genop_guard_int_eq
  1100. genop_guard_ptr_ne = genop_guard_instance_ptr_ne = genop_guard_int_ne
  1101. genop_guard_uint_gt = _cmpop_guard("A", "B", "BE", "AE")
  1102. genop_guard_uint_lt = _cmpop_guard("B", "A", "AE", "BE")
  1103. genop_guard_uint_le = _cmpop_guard("BE", "AE", "A", "B")
  1104. genop_guard_uint_ge = _cmpop_guard("AE", "BE", "B", "A")
  1105. genop_guard_float_lt = _cmpop_guard_float("B", "A", "AE","BE")
  1106. genop_guard_float_le = _cmpop_guard_float("BE","AE", "A", "B")
  1107. genop_guard_float_eq = _cmpop_guard_float("E", "E", "NE","NE")
  1108. genop_guard_float_gt = _cmpop_guard_float("A", "B", "BE","AE")
  1109. genop_guard_float_ge = _cmpop_guard_float("AE","BE", "B", "A")
  1110. def genop_math_sqrt(self, op, arglocs, resloc):
  1111. self.mc.SQRTSD(arglocs[0], resloc)
  1112. def genop_guard_float_ne(self, op, guard_op, guard_token, arglocs, result_loc):
  1113. guard_opnum = guard_op.getopnum()
  1114. if isinstance(arglocs[0], RegLoc):
  1115. self.mc.UCOMISD(arglocs[0], arglocs[1])
  1116. else:
  1117. self.mc.UCOMISD(arglocs[1], arglocs[0])
  1118. if guard_opnum == rop.GUARD_TRUE:
  1119. self.mc.J_il8(rx86.Conditions['P'], 6)
  1120. self.implement_guard(guard_token, 'E')
  1121. else:
  1122. self.mc.J_il8(rx86.Conditions['P'], 2)
  1123. self.mc.J_il8(rx86.Conditions['E'], 5)
  1124. self.implement_guard(guard_token)
  1125. def genop_float_neg(self, op, arglocs, resloc):
  1126. # Following what gcc does: res = x ^ 0x8000000000000000
  1127. self.mc.XORPD(arglocs[0], heap(self.float_const_neg_addr))
  1128. def genop_float_abs(self, op, arglocs, resloc):
  1129. # Following what gcc does: res = x & 0x7FFFFFFFFFFFFFFF
  1130. self.mc.ANDPD(arglocs[0], heap(self.float_const_abs_addr))
  1131. def genop_cast_float_to_int(self, op, arglocs, resloc):
  1132. self.mc.CVTTSD2SI(resloc, arglocs[0])
  1133. def genop_cast_int_to_float(self, op, arglocs, resloc):
  1134. self.mc.CVTSI2SD(resloc, arglocs[0])
  1135. def genop_cast_float_to_singlefloat(self, op, arglocs, resloc):
  1136. loc0, loctmp = arglocs
  1137. self.mc.CVTSD2SS(loctmp, loc0)
  1138. assert isinstance(resloc, RegLoc)
  1139. assert isinstance(loctmp, RegLoc)
  1140. self.mc.MOVD_rx(resloc.value, loctmp.value)
  1141. def genop_cast_singlefloat_to_float(self, op, arglocs, resloc):
  1142. loc0, = arglocs
  1143. assert isinstance(resloc, RegLoc)
  1144. assert isinstance(loc0, RegLoc)
  1145. self.mc.MOVD_xr(resloc.value, loc0.value)
  1146. self.mc.CVTSS2SD_xx(resloc.value, resloc.value)
  1147. def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc):
  1148. guard_opnum = guard_op.getopnum()
  1149. self.mc.CMP(arglocs[0], imm0)
  1150. if guard_opnum == rop.GUARD_TRUE:
  1151. self.implement_guard(guard_token, 'Z')
  1152. else:
  1153. self.implement_guard(guard_token, 'NZ')
  1154. def genop_int_is_true(self, op, arglocs, resloc):
  1155. self.mc.CMP(arglocs[0], imm0)
  1156. rl = resloc.lowest8bits()
  1157. self.mc.SET_ir(rx86.Conditions['NE'], rl.value)
  1158. self.mc.MOVZX8(resloc, rl)
  1159. def genop_guard_int_is_zero(self, op, guard_op, guard_token, arglocs, resloc):
  1160. guard_opnum = guard_op.getopnum()
  1161. self.mc.CMP(arglocs[0], imm0)
  1162. if guard_opnum == rop.GUARD_TRUE:
  1163. self.implement_guard(guard_token, 'NZ')
  1164. else:
  1165. self.implement_guard(guard_token, 'Z')
  1166. def genop_int_is_zero(self, op, arglocs, resloc):
  1167. self.mc.CMP(arglocs[0], imm0)
  1168. rl = resloc.lowest8bits()
  1169. self.mc.SET_ir(rx86.Conditions['E'], rl.value)
  1170. self.mc.MOVZX8(resloc, rl)
  1171. def genop_same_as(self, op, arglocs, resloc):
  1172. self.mov(arglocs[0], resloc)
  1173. genop_cast_ptr_to_int = genop_same_as
  1174. genop_cast_int_to_ptr = genop_same_as
  1175. def genop_int_mod(self, op, arglocs, resloc):
  1176. if IS_X86_32:
  1177. self.mc.CDQ()
  1178. elif IS_X86_64:
  1179. self.mc.CQO()
  1180. self.mc.IDIV_r(ecx.value)
  1181. genop_int_floordiv = genop_int_mod
  1182. def genop_uint_floordiv(self, op, arglocs, resloc):
  1183. self.mc.XOR_rr(edx.value, edx.value)
  1184. self.mc.DIV_r(ecx.value)
  1185. genop_llong_add = _binaryop("PADDQ", True)
  1186. genop_llong_sub = _binaryop("PSUBQ")
  1187. genop_llong_and = _binaryop("PAND", True)
  1188. genop_llong_or = _binaryop("POR", True)
  1189. genop_llong_xor = _binaryop("PXOR", True)
  1190. def genop_llong_to_int(self, op, arglocs, resloc):
  1191. loc = arglocs[0]
  1192. assert isinstance(resloc, RegLoc)
  1193. if isinstance(loc, RegLoc):
  1194. self.mc.MOVD_rx(resloc.value, loc.value)
  1195. elif isinstance(loc, StackLoc):
  1196. self.mc.MOV_rb(resloc.value, loc.value)
  1197. else:
  1198. not_implemented("llong_to_int: %s" % (loc,))
  1199. def genop_llong_from_int(self, op, arglocs, resloc):
  1200. loc1, loc2 = arglocs
  1201. if isinstance(loc1, ConstFloatLoc):
  1202. assert loc2 is None
  1203. self.mc.MOVSD(resloc, loc1)
  1204. else:
  1205. assert isinstance(loc1, RegLoc)
  1206. assert isinstance(loc2, RegLoc)
  1207. assert isinstance(resloc, RegLoc)
  1208. self.mc.MOVD_xr(loc2.value, loc1.value)
  1209. self.mc.PSRAD_xi(loc2.value, 31) # -> 0 or -1
  1210. self.mc.MOVD_xr(resloc.value, loc1.value)
  1211. self.mc.PUNPCKLDQ_xx(resloc.value, loc2.value)
  1212. def genop_llong_from_uint(self, op, arglocs, resloc):
  1213. loc1, = arglocs
  1214. assert isinstance(resloc, RegLoc)
  1215. assert isinstance(loc1, RegLoc)
  1216. self.mc.MOVD_xr(resloc.value, loc1.value)
  1217. def genop_llong_eq(self, op, arglocs, resloc):
  1218. loc1, loc2, locxtmp = arglocs
  1219. self.mc.MOVSD(locxtmp, loc1)
  1220. self.mc.PCMPEQD(locxtmp, loc2)
  1221. self.mc.PMOVMSKB_rx(resloc.value, locxtmp.value)
  1222. # Now the lower 8 bits of resloc contain 0x00, 0x0F, 0xF0 or 0xFF
  1223. # depending on the result of the comparison of each of the two
  1224. # double-words of loc1 and loc2. The higher 8 bits contain random
  1225. # results. We want to map 0xFF to 1, and 0x00, 0x0F and 0xF0 to 0.
  1226. self.mc.CMP8_ri(resloc.value | rx86.BYTE_REG_FLAG, -1)
  1227. self.mc.SBB_rr(resloc.value, resloc.value)
  1228. self.mc.ADD_ri(resloc.value, 1)
  1229. def genop_llong_ne(self, op, arglocs, resloc):
  1230. loc1, loc2, locxtmp = arglocs
  1231. self.mc.MOVSD(locxtmp, loc1)
  1232. self.mc.PCMPEQD(locxtmp, loc2)
  1233. self.mc.PMOVMSKB_rx(resloc.value, locxtmp.value)
  1234. # Now the lower 8 bits of resloc contain 0x00, 0x0F, 0xF0 or 0xFF
  1235. # depending on the result of the comparison of each of the two
  1236. # double-words of loc1 and loc2. The higher 8 bits contain random
  1237. # results. We want to map 0xFF to 0, and 0x00, 0x0F and 0xF0 to 1.
  1238. self.mc.CMP8_ri(resloc.value | rx86.BYTE_REG_FLAG, -1)
  1239. self.mc.SBB_rr(resloc.value, resloc.value)
  1240. self.mc.NEG_r(resloc.value)
  1241. def genop_llong_lt(self, op, arglocs, resloc):
  1242. # XXX just a special case for now: "x < 0"
  1243. loc1, = arglocs
  1244. self.mc.PMOVMSKB_rx(resloc.value, loc1.value)
  1245. self.mc.SHR_ri(resloc.value, 7)
  1246. self.mc.AND_ri(resloc.value, 1)
  1247. # ----------
  1248. def genop_call_malloc_gc(self, op, arglocs, result_loc):
  1249. self.genop_call(op, arglocs, result_loc)
  1250. self.propagate_memoryerror_if_eax_is_null()
  1251. def propagate_memoryerror_if_eax_is_null(self):
  1252. # if self.propagate_exception_path == 0 (tests), this may jump to 0
  1253. # and segfaults. too bad. the alternative is to continue anyway
  1254. # with eax==0, but that will segfault too.
  1255. self.mc.TEST_rr(eax.value, eax.value)
  1256. if WORD == 4:
  1257. self.mc.J_il(rx86.Conditions['Z'], self.propagate_exception_path)
  1258. self.mc.add_pending_relocation()
  1259. elif WORD == 8:
  1260. self.mc.J_il(rx86.Conditions['Z'], 0)
  1261. pos = self.mc.get_relative_pos()
  1262. self.pending_memoryerror_trampoline_from.append(pos)
  1263. # ----------
  1264. def load_from_mem(self, resloc, source_addr, size_loc, sign_loc):
  1265. assert isinstance(resloc, RegLoc)
  1266. size = size_loc.value
  1267. sign = sign_loc.value
  1268. if resloc.is_xmm:
  1269. self.mc.MOVSD(resloc, source_addr)
  1270. elif size == WORD:
  1271. self.mc.MOV(resloc, source_addr)
  1272. elif size == 1:
  1273. if sign:
  1274. self.mc.MOVSX8(resloc, source_addr)
  1275. else:
  1276. self.mc.MOVZX8(resloc, source_addr)
  1277. elif size == 2:
  1278. if sign:
  1279. self.mc.MOVSX16(resloc, source_addr)
  1280. else:
  1281. self.mc.MOVZX16(resloc, source_addr)
  1282. elif IS_X86_64 and size == 4:
  1283. if sign:
  1284. self.mc.MOVSX32(resloc, source_addr)
  1285. else:
  1286. self.mc.MOV32(resloc, source_addr) # zero-extending
  1287. else:
  1288. not_implemented("load_from_mem size = %d" % size)
  1289. def save_into_mem(self, dest_addr, value_loc, size_loc):
  1290. size = size_loc.value
  1291. if isinstance(value_loc, RegLoc) and value_loc.is_xmm:
  1292. self.mc.MOVSD(dest_addr, value_loc)
  1293. elif size == 1:
  1294. self.mc.MOV8(dest_addr, value_loc.lowest8bits())
  1295. elif size == 2:
  1296. self.mc.MOV16(dest_addr, value_loc)
  1297. elif size == 4:
  1298. self.mc.MOV32(dest_addr, value_loc)
  1299. elif size == 8:
  1300. if IS_X86_64:
  1301. self.mc.MOV(dest_addr, value_loc)
  1302. else:
  1303. assert isinstance(value_loc, FloatImmedLoc)
  1304. self.mc.MOV(dest_addr, value_loc.low_part_loc())
  1305. self.mc.MOV(dest_addr.add_offset(4), value_loc.high_part_loc())
  1306. else:
  1307. not_implemented("save_into_mem size = %d" % size)
  1308. def genop_getfield_gc(self, op, arglocs, resloc):
  1309. base_loc, ofs_loc, size_loc, sign_loc = arglocs
  1310. assert isinstance(size_loc, ImmedLoc)
  1311. source_addr = AddressLoc(base_loc, ofs_loc)
  1312. self.load_from_mem(resloc, source_addr, size_loc, sign_loc)
  1313. genop_getfield_raw = genop_getfield_gc
  1314. genop_getfield_raw_pure = genop_getfield_gc
  1315. genop_getfield_gc_pure = genop_getfield_gc
  1316. def genop_getarrayitem_gc(self, op, arglocs, resloc):
  1317. base_loc, ofs_loc, size_loc, ofs, sign_loc = arglocs
  1318. assert isinstance(ofs, ImmedLoc)
  1319. assert isinstance(size_loc, ImmedLoc)
  1320. scale = _get_scale(size_loc.value)
  1321. src_addr = addr_add(base_loc, ofs_loc, ofs.value, scale)
  1322. self.load_from_mem(resloc, src_addr, size_loc, sign_loc)
  1323. genop_getarrayitem_gc_pure = genop_getarrayitem_gc
  1324. genop_getarrayitem_raw = genop_getarrayitem_gc
  1325. def _get_interiorfield_addr(self, temp_loc, index_loc, itemsize_loc,
  1326. base_loc, ofs_loc):
  1327. assert isinstance(itemsize_loc, ImmedLoc)
  1328. if isinstance(index_loc, ImmedLoc):
  1329. temp_loc = imm(index_loc.value * itemsize_loc.value)
  1330. elif _valid_addressing_size(itemsize_loc.value):
  1331. return AddressLoc(base_loc, index_loc, _get_scale(itemsize_loc.value), ofs_loc.value)
  1332. else:
  1333. # XXX should not use IMUL in more cases, it can use a clever LEA
  1334. assert isinstance(temp_loc, RegLoc)
  1335. assert isinstance(index_loc, RegLoc)
  1336. assert not temp_loc.is_xmm
  1337. self.mc.IMUL_rri(temp_loc.value, index_loc.value,
  1338. itemsize_loc.value)
  1339. assert isinstance(ofs_loc, ImmedLoc)
  1340. return AddressLoc(base_loc, temp_loc, 0, ofs_loc.value)
  1341. def genop_getinteriorfield_gc(self, op, arglocs, resloc):
  1342. (base_loc, ofs_loc, itemsize_loc, fieldsize_loc,
  1343. index_loc, temp_loc, sign_loc) = arglocs
  1344. src_addr = self._get_interiorfield_addr(temp_loc, index_loc,
  1345. itemsize_loc, base_loc,
  1346. ofs_loc)
  1347. self.load_from_mem(resloc, src_addr, fieldsize_loc, sign_loc)
  1348. genop_getinteriorfield_raw = genop_getinteriorfield_gc
  1349. def genop_discard_setfield_gc(self, op, arglocs):
  1350. base_loc, ofs_loc, size_loc, value_loc = arglocs
  1351. assert isinstance(size_loc, ImmedLoc)
  1352. dest_addr = AddressLoc(base_loc, ofs_loc)
  1353. self.save_into_mem(dest_addr, value_loc, size_loc)
  1354. def genop_discard_setinteriorfield_gc(self, op, arglocs):
  1355. (base_loc, ofs_loc, itemsize_loc, fieldsize_loc,
  1356. index_loc, temp_loc, value_loc) = arglocs
  1357. dest_addr = self._get_interiorfield_addr(temp_loc, index_loc,
  1358. itemsize_loc, base_loc,
  1359. ofs_loc)
  1360. self.save_into_mem(dest_addr, value_loc, fieldsize_loc)
  1361. genop_discard_setinteriorfield_raw = genop_discard_setinteriorfield_gc
  1362. def genop_discard_setarrayitem_gc(self, op, arglocs):
  1363. base_loc, ofs_loc, value_loc, size_loc, baseofs = arglocs
  1364. assert isinstance(baseofs, ImmedLoc)
  1365. assert isinstance(size_loc, ImmedLoc)
  1366. scale = _get_scale(size_loc.value)
  1367. dest_addr = AddressLoc(base_loc, ofs_loc, scale, baseofs.value)
  1368. self.save_into_mem(dest_addr, value_loc, size_loc)
  1369. def genop_discard_strsetitem(self, op, arglocs):
  1370. base_loc, ofs_loc, val_loc = arglocs
  1371. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
  1372. self.cpu.translate_support_code)
  1373. assert itemsize == 1
  1374. dest_addr = AddressLoc(base_loc, ofs_loc, 0, basesize)
  1375. self.mc.MOV8(dest_addr, val_loc.lowest8bits())
  1376. def genop_discard_unicodesetitem(self, op, arglocs):
  1377. base_loc, ofs_loc, val_loc = arglocs
  1378. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
  1379. self.cpu.translate_support_code)
  1380. if itemsize == 4:
  1381. self.mc.MOV32(AddressLoc(base_loc, ofs_loc, 2, basesize), val_loc)
  1382. elif itemsize == 2:
  1383. self.mc.MOV16(AddressLoc(base_loc, ofs_loc, 1, basesize), val_loc)
  1384. else:
  1385. assert 0, itemsize
  1386. genop_discard_setfield_raw = genop_discard_setfield_gc
  1387. genop_discard_setarrayitem_raw = genop_discard_setarrayitem_gc
  1388. def genop_strlen(self, op, arglocs, resloc):
  1389. base_loc = arglocs[0]
  1390. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
  1391. self.cpu.translate_support_code)
  1392. self.mc.MOV(resloc, addr_add_const(base_loc, ofs_length))
  1393. def genop_unicodelen(self, op, arglocs, resloc):
  1394. base_loc = arglocs[0]
  1395. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
  1396. self.cpu.translate_support_code)
  1397. self.mc.MOV(resloc, addr_add_const(base_loc, ofs_length))
  1398. def genop_arraylen_gc(self, op, arglocs, resloc):
  1399. base_loc, ofs_loc = arglocs
  1400. assert isinstance(ofs_loc, ImmedLoc)
  1401. self.mc.MOV(resloc, addr_add_const(base_loc, ofs_loc.value))
  1402. def genop_strgetitem(self, op, arglocs, resloc):
  1403. base_loc, ofs_loc = arglocs
  1404. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
  1405. self.cpu.translate_support_code)
  1406. assert itemsize == 1
  1407. self.mc.MOVZX8(resloc, AddressLoc(base_loc, ofs_loc, 0, basesize))
  1408. def genop_unicodegetitem(self, op, arglocs, resloc):
  1409. base_loc, ofs_loc = arglocs
  1410. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
  1411. self.cpu.translate_support_code)
  1412. if itemsize == 4:
  1413. self.mc.MOV32(resloc, AddressLoc(base_loc, ofs_loc, 2, basesize))
  1414. elif itemsize == 2:
  1415. self.mc.MOVZX16(resloc, AddressLoc(base_loc, ofs_loc, 1, basesize))
  1416. else:
  1417. assert 0, itemsize
  1418. def genop_read_timestamp(self, op, arglocs, resloc):
  1419. self.mc.RDTSC()
  1420. if longlong.is_64_bit:
  1421. self.mc.SHL_ri(edx.value, 32)
  1422. self.mc.OR_rr(edx.value, eax.value)
  1423. else:
  1424. loc1, = arglocs
  1425. self.mc.MOVD_xr(loc1.value, edx.value)
  1426. self.mc.MOVD_xr(resloc.value, eax.value)
  1427. self.mc.PUNPCKLDQ_xx(resloc.value, loc1.value)
  1428. def genop_guard_guard_true(self, ign_1, guard_op, guard_token, locs, ign_2):
  1429. loc = locs[0]
  1430. self.mc.TEST(loc, loc)
  1431. self.implement_guard(guard_token, 'Z')
  1432. genop_guard_guard_nonnull = genop_guard_guard_true
  1433. def genop_guard_guard_no_exception(self, ign_1, guard_op, guard_token,
  1434. locs, ign_2):
  1435. self.mc.CMP(heap(self.cpu.pos_exception()), imm0)
  1436. self.implement_guard(guard_token, 'NZ')
  1437. def genop_guard_guard_not_invalidated(self, ign_1, guard_op, guard_token,
  1438. locs, ign_2):
  1439. pos = self.mc.get_relative_pos() + 1 # after potential jmp
  1440. guard_token.pos_jump_offset = pos
  1441. self.pending_guard_tokens.append(guard_token)
  1442. def genop_guard_guard_exception(self, ign_1, guard_op, guard_token,
  1443. locs, resloc):
  1444. loc = locs[0]
  1445. loc1 = locs[1]
  1446. self.mc.MOV(loc1, heap(self.cpu.pos_exception()))
  1447. self.mc.CMP(loc1, loc)
  1448. self.implement_guard(guard_token, 'NE')
  1449. if resloc is not None:
  1450. self.mc.MOV(resloc, heap(self.cpu.pos_exc_value()))
  1451. self.mc.MOV(heap(self.cpu.pos_exception()), imm0)
  1452. self.mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
  1453. def _gen_guard_overflow(self, guard_op, guard_token):
  1454. guard_opnum = guard_op.getopnum()
  1455. if guard_opnum == rop.GUARD_NO_OVERFLOW:
  1456. self.implement_guard(guard_token, 'O')
  1457. elif guard_opnum == rop.GUARD_OVERFLOW:
  1458. self.implement_guard(guard_token, 'NO')
  1459. else:
  1460. not_implemented("int_xxx_ovf followed by %s" %
  1461. guard_op.getopname())
  1462. def genop_guard_int_add_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
  1463. self.genop_int_add(op, arglocs, result_loc)
  1464. return self._gen_guard_overflow(guard_op, guard_token)
  1465. def genop_guard_int_sub_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
  1466. self.genop_int_sub(op, arglocs, result_loc)
  1467. return self._gen_guard_overflow(guard_op, guard_token)
  1468. def genop_guard_int_mul_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
  1469. self.genop_int_mul(op, arglocs, result_loc)
  1470. return self._gen_guard_overflow(guard_op, guard_token)
  1471. def genop_guard_guard_false(self, ign_1, guard_op, guard_token, locs, ign_2):
  1472. loc = locs[0]
  1473. self.mc.TEST(loc, loc)
  1474. self.implement_guard(guard_token, 'NZ')
  1475. genop_guard_guard_isnull = genop_guard_guard_false
  1476. def genop_guard_guard_value(self, ign_1, guard_op, guard_token, locs, ign_2):
  1477. if guard_op.getarg(0).type == FLOAT:
  1478. assert guard_op.getarg(1).type == FLOAT
  1479. self.mc.UCOMISD(locs[0], locs[1])
  1480. else:
  1481. self.mc.CMP(locs[0], locs[1])
  1482. self.implement_guard(guard_token, 'NE')
  1483. def _cmp_guard_class(self, locs):
  1484. offset = self.cpu.vtable_offset
  1485. if offset is not None:
  1486. self.mc.CMP(mem(locs[0], offset), locs[1])
  1487. else:
  1488. # XXX hard-coded assumption: to go from an object to its class
  1489. # we use the following algorithm:
  1490. # - read the typeid from mem(locs[0]), i.e. at offset 0
  1491. # - keep the lower 16 bits read there
  1492. # - multiply by 4 and use it as an offset in type_info_group
  1493. # - add 16 bytes, to go past the TYPE_INFO structure
  1494. loc = locs[1]
  1495. assert isinstance(loc, ImmedLoc)
  1496. classptr = loc.value
  1497. # here, we have to go back from 'classptr' to the value expected
  1498. # from reading the 16 bits in the object header
  1499. from pypy.rpython.memory.gctypelayout import GCData
  1500. sizeof_ti = rffi.sizeof(GCData.TYPE_INFO)
  1501. type_info_group = llop.gc_get_type_info_group(llmemory.Address)
  1502. type_info_group = rffi.cast(lltype.Signed, type_info_group)
  1503. expected_typeid = classptr - sizeof_ti - type_info_group
  1504. if IS_X86_32:
  1505. expected_typeid >>= 2
  1506. self.mc.CMP16(mem(locs[0], 0), ImmedLoc(expected_typeid))
  1507. elif IS_X86_64:
  1508. self.mc.CMP32_mi((locs[0].value, 0), expected_typeid)
  1509. def genop_guard_guard_class(self, ign_1, guard_op, guard_token, locs, ign_2):
  1510. self._cmp_guard_class(locs)
  1511. self.implement_guard(guard_token, 'NE')
  1512. def genop_guard_guard_nonnull_class(self, ign_1, guard_op,
  1513. guard_token, locs, ign_2):
  1514. self.mc.CMP(locs[0], imm1)
  1515. # Patched below
  1516. self.mc.J_il8(rx86.Conditions['B'], 0)
  1517. jb_location = self.mc.get_relative_pos()
  1518. self._cmp_guard_class(locs)
  1519. # patch the JB above
  1520. offset = self.mc.get_relative_pos() - jb_location
  1521. assert 0 < offset <= 127
  1522. self.mc.overwrite(jb_location-1, chr(offset))
  1523. #
  1524. self.implement_guard(guard_token, 'NE')
  1525. def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
  1526. fail_locs):
  1527. exc = (guard_opnum == rop.GUARD_EXCEPTION or
  1528. guard_opnum == rop.GUARD_NO_EXCEPTION or
  1529. guard_opnum == rop.GUARD_NOT_FORCED)
  1530. is_guard_not_invalidated = guard_opnum == rop.GUARD_NOT_INVALIDATED
  1531. return GuardToken(faildescr, failargs, fail_locs, exc,
  1532. is_guard_not_invalidated)
  1533. def generate_propagate_error_64(self):
  1534. assert WORD == 8
  1535. startpos = self.mc.get_relative_pos()
  1536. self.mc.JMP(imm(self.propagate_exception_path))
  1537. return startpos
  1538. def generate_quick_failure(self, guardtok):
  1539. """Generate the initial code for handling a failure. We try to
  1540. keep it as compact as possible.
  1541. """
  1542. fail_index = self.cpu.get_fail_descr_number(guardtok.faildescr)
  1543. mc = self.mc
  1544. startpos = mc.get_relative_pos()
  1545. withfloats = False
  1546. for box in guardtok.failargs:
  1547. if box is not None and box.type == FLOAT:
  1548. withfloats = True
  1549. break
  1550. exc = guardtok.exc
  1551. target = self.failure_recovery_code[exc + 2 * withfloats]
  1552. if WORD == 4:
  1553. mc.CALL(imm(target))
  1554. else:
  1555. # Generate exactly 13 bytes:
  1556. # MOV r11, target-as-8-bytes
  1557. # CALL *r11
  1558. # Keep the number 13 in sync with _find_failure_recovery_bytecode.
  1559. start = mc.get_relative_pos()
  1560. mc.MOV_ri64(X86_64_SCRATCH_REG.value, target)
  1561. mc.CALL_r(X86_64_SCRATCH_REG.value)
  1562. assert mc.get_relative_pos() == start + 13
  1563. # write tight data that describes the failure recovery
  1564. self.write_failure_recovery_description(mc, guardtok.failargs,
  1565. guardtok.fail_locs)
  1566. # write the fail_index too
  1567. mc.writeimm32(fail_index)
  1568. # for testing the decoding, write a final byte 0xCC
  1569. if not we_are_translated():
  1570. mc.writechar('\xCC')
  1571. faillocs = [loc for loc in guardtok.fail_locs if loc is not None]
  1572. guardtok.faildescr._x86_debug_faillocs = faillocs
  1573. return startpos
  1574. DESCR_REF = 0x00
  1575. DESCR_INT = 0x01
  1576. DESCR_FLOAT = 0x02
  1577. DESCR_SPECIAL = 0x03
  1578. CODE_FROMSTACK = 4 * (8 + 8*IS_X86_64)
  1579. CODE_STOP = 0 | DESCR_SPECIAL
  1580. CODE_HOLE = 4 | DESCR_SPECIAL
  1581. CODE_INPUTARG = 8 | DESCR_SPECIAL
  1582. def write_failure_recovery_description(self, mc, failargs, locs):
  1583. for i in range(len(failargs)):
  1584. arg = failargs[i]
  1585. if arg is not None:
  1586. if arg.type == REF:
  1587. kind = self.DESCR_REF
  1588. elif arg.type == INT:
  1589. kind = self.DESCR_INT
  1590. elif arg.type == FLOAT:
  1591. kind = self.DESCR_FLOAT
  1592. else:
  1593. raise AssertionError("bogus kind")
  1594. loc = locs[i]
  1595. if isinstance(loc, StackLoc):
  1596. pos = loc.position
  1597. if pos < 0:
  1598. mc.writechar(chr(self.CODE_INPUTARG))
  1599. pos = ~pos
  1600. n = self.CODE_FROMSTACK//4 + pos
  1601. else:
  1602. assert isinstance(loc, RegLoc)
  1603. n = loc.value
  1604. n = kind + 4*n
  1605. while n > 0x7F:
  1606. mc.writechar(chr((n & 0x7F) | 0x80))
  1607. n >>= 7
  1608. else:
  1609. n = self.CODE_HOLE
  1610. mc.writechar(chr(n))
  1611. mc.writechar(chr(self.CODE_STOP))
  1612. # assert that the fail_boxes lists are big enough
  1613. assert len(failargs) <= self.fail_boxes_int.SIZE
  1614. def rebuild_faillocs_from_descr(self, bytecode):
  1615. from pypy.jit.backend.x86.regalloc import X86FrameManager
  1616. descr_to_box_type = [REF, INT, FLOAT]
  1617. bytecode = rffi.cast(rffi.UCHARP, bytecode)
  1618. arglocs = []
  1619. code_inputarg = False
  1620. while 1:
  1621. # decode the next instruction from the bytecode
  1622. code = rffi.cast(lltype.Signed, bytecode[0])
  1623. bytecode = rffi.ptradd(bytecode, 1)
  1624. if code >= self.CODE_FROMSTACK:
  1625. # 'code' identifies a stack location
  1626. if code > 0x7F:
  1627. shift = 7
  1628. code &= 0x7F
  1629. while True:
  1630. nextcode = rffi.cast(lltype.Signed, bytecode[0])
  1631. bytecode = rffi.ptradd(bytecode, 1)
  1632. code |= (nextcode & 0x7F) << shift
  1633. shift += 7
  1634. if nextcode <= 0x7F:
  1635. break
  1636. kind = code & 3
  1637. code = (code - self.CODE_FROMSTACK) >> 2
  1638. if code_inputarg:
  1639. code = ~code
  1640. code_inputarg = False
  1641. loc = X86FrameManager.frame_pos(code, descr_to_box_type[kind])
  1642. elif code == self.CODE_STOP:
  1643. break
  1644. elif code == self.CODE_HOLE:
  1645. continue
  1646. elif code == self.CODE_INPUTARG:
  1647. code_inputarg = True
  1648. continue
  1649. else:
  1650. # 'code' identifies a register
  1651. kind = code & 3
  1652. code >>= 2
  1653. if kind == self.DESCR_FLOAT:
  1654. loc = regloc.XMMREGLOCS[code]
  1655. else:
  1656. loc = regloc.REGLOCS[code]
  1657. arglocs.append(loc)
  1658. return arglocs[:]
  1659. @rgc.no_collect
  1660. def grab_frame_values(self, bytecode, frame_addr, allregisters):
  1661. # no malloc allowed here!!
  1662. self.fail_ebp = allregisters[16 + ebp.value]
  1663. code_inputarg = False
  1664. num = 0
  1665. value_hi = 0
  1666. while 1:
  1667. # decode the next instruction from the bytecode
  1668. code = rffi.cast(lltype.Signed, bytecode[0])
  1669. bytecode = rffi.ptradd(bytecode, 1)
  1670. if code >= self.CODE_FROMSTACK:
  1671. if code > 0x7F:
  1672. shift = 7
  1673. code &= 0x7F
  1674. while True:
  1675. nextcode = rffi.cast(lltype.Signed, bytecode[0])
  1676. bytecode = rffi.ptradd(bytecode, 1)
  1677. code |= (nextcode & 0x7F) << shift
  1678. shift += 7
  1679. if nextcode <= 0x7F:
  1680. break
  1681. # load the value from the stack
  1682. kind = code & 3
  1683. code = (code - self.CODE_FROMSTACK) >> 2
  1684. if code_inputarg:
  1685. code = ~code
  1686. code_inputarg = False
  1687. stackloc = frame_addr + get_ebp_ofs(code)
  1688. value = rffi.cast(rffi.LONGP, stackloc)[0]
  1689. if kind == self.DESCR_FLOAT and WORD == 4:
  1690. value_hi = value
  1691. value = rffi.cast(rffi.LONGP, stackloc - 4)[0]
  1692. else:
  1693. # 'code' identifies a register: load its value
  1694. kind = code & 3
  1695. if kind == self.DESCR_SPECIAL:
  1696. if code == self.CODE_HOLE:
  1697. num += 1
  1698. continue
  1699. if code == self.CODE_INPUTARG:
  1700. code_inputarg = True
  1701. continue
  1702. assert code == self.CODE_STOP
  1703. break
  1704. code >>= 2
  1705. if kind == self.DESCR_FLOAT:
  1706. if WORD == 4:
  1707. value = allregisters[2*code]
  1708. value_hi = allregisters[2*code + 1]
  1709. else:
  1710. value = allregisters[code]
  1711. else:
  1712. value = allregisters[16 + code]
  1713. # store the loaded value into fail_boxes_<type>
  1714. if kind == self.DESCR_INT:
  1715. tgt = self.fail_boxes_int.get_addr_for_num(num)
  1716. elif kind == self.DESCR_REF:
  1717. tgt = self.fail_boxes_ptr.get_addr_for_num(num)
  1718. elif kind == self.DESCR_FLOAT:
  1719. tgt = self.fail_boxes_float.get_addr_for_num(num)
  1720. if WORD == 4:
  1721. rffi.cast(rffi.LONGP, tgt)[1] = value_hi
  1722. else:
  1723. assert 0, "bogus kind"
  1724. rffi.cast(rffi.LONGP, tgt)[0] = value
  1725. num += 1
  1726. #
  1727. if not we_are_translated():
  1728. assert bytecode[4] == 0xCC
  1729. self.fail_boxes_count = num
  1730. fail_index = rffi.cast(rffi.INTP, bytecode)[0]
  1731. fail_index = rffi.cast(lltype.Signed, fail_index)
  1732. return fail_index
  1733. def setup_failure_recovery(self):
  1734. @rgc.no_collect
  1735. def failure_recovery_func(registers):
  1736. # 'registers' is a pointer to a structure containing the
  1737. # original value of the registers, optionally the original
  1738. # value of XMM registers, and finally a reference to the
  1739. # recovery bytecode. See _build_failure_recovery() for details.
  1740. stack_at_ebp = registers[ebp.value]
  1741. bytecode = rffi.cast(rffi.UCHARP, registers[self.cpu.NUM_REGS])
  1742. allregisters = rffi.ptradd(registers, -16)
  1743. return self.grab_frame_values(bytecode, stack_at_ebp, allregisters)
  1744. self.failure_recovery_func = failure_recovery_func
  1745. self.failure_recovery_code = [0, 0, 0, 0]
  1746. _FAILURE_RECOVERY_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
  1747. lltype.Signed))
  1748. def _build_failure_recovery(self, exc, withfloats=False):
  1749. failure_recovery_func = llhelper(self._FAILURE_RECOVERY_FUNC,
  1750. self.failure_recovery_func)
  1751. failure_recovery_func = rffi.cast(lltype.Signed,
  1752. failure_recovery_func)
  1753. mc = codebuf.MachineCodeBlockWrapper()
  1754. self.mc = mc
  1755. # Push all general purpose registers
  1756. for gpr in range(self.cpu.NUM_REGS-1, -1, -1):
  1757. mc.PUSH_r(gpr)
  1758. # ebx/rbx is callee-save in both i386 and x86-64
  1759. mc.MOV_rr(ebx.value, esp.value)
  1760. if withfloats:
  1761. # Push all float registers
  1762. mc.SUB_ri(esp.value, self.cpu.NUM_REGS*8)
  1763. for i in range(self.cpu.NUM_REGS):
  1764. mc.MOVSD_sx(8*i, i)
  1765. # we call a provided function that will
  1766. # - call our on_leave_jitted_hook which will mark
  1767. # the fail_boxes_ptr array as pointing to young objects to
  1768. # avoid unwarranted freeing
  1769. # - optionally save exception depending on the flag
  1770. addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
  1771. mc.CALL(imm(addr))
  1772. # the following call saves all values from the stack and from
  1773. # registers to the right 'fail_boxes_<type>' location.
  1774. # Note that the registers are saved so far in esi[0] to esi[7],
  1775. # as pushed above, plus optionally in esi[-16] to esi[-1] for
  1776. # the XMM registers. Moreover, esi[8] is a pointer to the recovery
  1777. # bytecode, pushed just before by the CALL instruction written by
  1778. # generate_quick_failure(). XXX misaligned stack in the call, but
  1779. # it's ok because failure_recovery_func is not calling anything more
  1780. # XXX
  1781. if IS_X86_32:
  1782. mc.PUSH_r(ebx.value)
  1783. elif IS_X86_64:
  1784. mc.MOV_rr(edi.value, ebx.value)
  1785. # XXX: Correct to only align the stack on 64-bit?
  1786. mc.AND_ri(esp.value, -16)
  1787. else:
  1788. raise AssertionError("Shouldn't happen")
  1789. mc.CALL(imm(failure_recovery_func))
  1790. # returns in eax the fail_index
  1791. # now we return from the complete frame, which starts from
  1792. # _call_header_with_stack_check(). The LEA in _call_footer below
  1793. # throws away most of the frame, including all the PUSHes that we
  1794. # did just above.
  1795. self._call_footer()
  1796. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  1797. self.failure_recovery_code[exc + 2 * withfloats] = rawstart
  1798. self.mc = None
  1799. def generate_failure(self, fail_index, locs, exc, locs_are_ref):
  1800. self.mc.begin_reuse_scratch_register()
  1801. for i in range(len(locs)):
  1802. loc = locs[i]
  1803. if isinstance(loc, RegLoc):
  1804. if loc.is_xmm:
  1805. adr = self.fail_boxes_float.get_addr_for_num(i)
  1806. self.mc.MOVSD(heap(adr), loc)
  1807. else:
  1808. if locs_are_ref[i]:
  1809. adr = self.fail_boxes_ptr.get_addr_for_num(i)
  1810. else:
  1811. adr = self.fail_boxes_int.get_addr_for_num(i)
  1812. self.mc.MOV(heap(adr), loc)
  1813. for i in range(len(locs)):
  1814. loc = locs[i]
  1815. if not isinstance(loc, RegLoc):
  1816. if ((isinstance(loc, StackLoc) and loc.type == FLOAT) or
  1817. isinstance(loc, ConstFloatLoc)):
  1818. self.mc.MOVSD(xmm0, loc)
  1819. adr = self.fail_boxes_float.get_addr_for_num(i)
  1820. self.mc.MOVSD(heap(adr), xmm0)
  1821. else:
  1822. if locs_are_ref[i]:
  1823. adr = self.fail_boxes_ptr.get_addr_for_num(i)
  1824. else:
  1825. adr = self.fail_boxes_int.get_addr_for_num(i)
  1826. self.mc.MOV(eax, loc)
  1827. self.mc.MOV(heap(adr), eax)
  1828. self.mc.end_reuse_scratch_register()
  1829. # we call a provided function that will
  1830. # - call our on_leave_jitted_hook which will mark
  1831. # the fail_boxes_ptr array as pointing to young objects to
  1832. # avoid unwarranted freeing
  1833. # - optionally save exception depending on the flag
  1834. addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
  1835. self.mc.CALL(imm(addr))
  1836. self.mc.MOV_ri(eax.value, fail_index)
  1837. # exit function
  1838. self._call_footer()
  1839. def implement_guard(self, guard_token, condition=None):
  1840. # These jumps are patched later.
  1841. if condition:
  1842. self.mc.J_il(rx86.Conditions[condition], 0)
  1843. else:
  1844. self.mc.JMP_l(0)
  1845. guard_token.pos_jump_offset = self.mc.get_relative_pos() - 4
  1846. self.pending_guard_tokens.append(guard_token)
  1847. def genop_call(self, op, arglocs, resloc):
  1848. force_index = self.write_new_force_index()
  1849. self._genop_call(op, arglocs, resloc, force_index)
  1850. def _genop_call(self, op, arglocs, resloc, force_index):
  1851. from pypy.jit.backend.llsupport.descr import CallDescr
  1852. sizeloc = arglocs[0]
  1853. assert isinstance(sizeloc, ImmedLoc)
  1854. size = sizeloc.value
  1855. signloc = arglocs[1]
  1856. x = arglocs[2] # the function address
  1857. if x is eax:
  1858. tmp = ecx
  1859. else:
  1860. tmp = eax
  1861. descr = op.getdescr()
  1862. assert isinstance(descr, CallDescr)
  1863. self._emit_call(force_index, x, arglocs, 3, tmp=tmp,
  1864. argtypes=descr.get_arg_types(),
  1865. callconv=descr.get_call_conv())
  1866. if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.type == FLOAT:
  1867. # a float or a long long return
  1868. if descr.get_result_type() == 'L':
  1869. self.mc.MOV_br(resloc.value, eax.value) # long long
  1870. self.mc.MOV_br(resloc.value + 4, edx.value)
  1871. # XXX should ideally not move the result on the stack,
  1872. # but it's a mess to load eax/edx into a xmm register
  1873. # and this way is simpler also because the result loc
  1874. # can just be always a stack location
  1875. else:
  1876. self.mc.FSTPL_b(resloc.value) # float return
  1877. elif descr.get_result_type() == 'S':
  1878. # singlefloat return
  1879. assert resloc is eax
  1880. if IS_X86_32:
  1881. # must convert ST(0) to a 32-bit singlefloat and load it into EAX
  1882. # mess mess mess
  1883. self.mc.SUB_ri(esp.value, 4)
  1884. self.mc.FSTPS_s(0)
  1885. self.mc.POP_r(eax.value)
  1886. elif IS_X86_64:
  1887. # must copy from the lower 32 bits of XMM0 into eax
  1888. self.mc.MOVD_rx(eax.value, xmm0.value)
  1889. elif size == WORD:
  1890. assert resloc is eax or resloc is xmm0 # a full word
  1891. elif size == 0:
  1892. pass # void return
  1893. else:
  1894. # use the code in load_from_mem to do the zero- or sign-extension
  1895. assert resloc is eax
  1896. if size == 1:
  1897. srcloc = eax.lowest8bits()
  1898. else:
  1899. srcloc = eax
  1900. self.load_from_mem(eax, srcloc, sizeloc, signloc)
  1901. def genop_guard_call_may_force(self, op, guard_op, guard_token,
  1902. arglocs, result_loc):
  1903. faildescr = guard_op.getdescr()
  1904. fail_index = self.cpu.get_fail_descr_number(faildescr)
  1905. self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
  1906. self._genop_call(op, arglocs, result_loc, fail_index)
  1907. self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
  1908. self.implement_guard(guard_token, 'L')
  1909. def genop_guard_call_release_gil(self, op, guard_op, guard_token,
  1910. arglocs, result_loc):
  1911. # first, close the stack in the sense of the asmgcc GC root tracker
  1912. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  1913. if gcrootmap:
  1914. self.call_release_gil(gcrootmap, arglocs)
  1915. # do the call
  1916. faildescr = guard_op.getdescr()
  1917. fail_index = self.cpu.get_fail_descr_number(faildescr)
  1918. self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
  1919. self._genop_call(op, arglocs, result_loc, fail_index)
  1920. # then reopen the stack
  1921. if gcrootmap:
  1922. self.call_reacquire_gil(gcrootmap, result_loc)
  1923. # finally, the guard_not_forced
  1924. self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
  1925. self.implement_guard(guard_token, 'L')
  1926. def call_release_gil(self, gcrootmap, save_registers):
  1927. # First, we need to save away the registers listed in
  1928. # 'save_registers' that are not callee-save. XXX We assume that
  1929. # the XMM registers won't be modified. We store them in
  1930. # [ESP+4], [ESP+8], etc., leaving enough room in [ESP] for the
  1931. # single argument to closestack_addr below.
  1932. p = WORD
  1933. for reg in self._regalloc.rm.save_around_call_regs:
  1934. if reg in save_registers:
  1935. self.mc.MOV_sr(p, reg.value)
  1936. p += WORD
  1937. self._regalloc.reserve_param(p//WORD)
  1938. #
  1939. if gcrootmap.is_shadow_stack:
  1940. args = []
  1941. else:
  1942. # note that regalloc.py used save_all_regs=True to save all
  1943. # registers, so we don't have to care about saving them (other
  1944. # than ebp) in the close_stack_struct. But if they are registers
  1945. # like %eax that would be destroyed by this call, *and* they are
  1946. # used by arglocs for the *next* call, then trouble; for now we
  1947. # will just push/pop them.
  1948. from pypy.rpython.memory.gctransform import asmgcroot
  1949. css = self._regalloc.close_stack_struct
  1950. if css == 0:
  1951. use_words = (2 + max(asmgcroot.INDEX_OF_EBP,
  1952. asmgcroot.FRAME_PTR) + 1)
  1953. pos = self._regalloc.fm.reserve_location_in_frame(use_words)
  1954. css = get_ebp_ofs(pos + use_words - 1)
  1955. self._regalloc.close_stack_struct = css
  1956. # The location where the future CALL will put its return address
  1957. # will be [ESP-WORD]. But we can't use that as the next frame's
  1958. # top address! As the code after releasegil() runs without the
  1959. # GIL, it might not be set yet by the time we need it (very
  1960. # unlikely), or it might be overwritten by the following call
  1961. # to reaquiregil() (much more likely). So we hack even more
  1962. # and use a dummy location containing a dummy value (a pointer
  1963. # to itself) which we pretend is the return address :-/ :-/ :-/
  1964. # It prevents us to store any %esp-based stack locations but we
  1965. # don't so far.
  1966. adr = self.datablockwrapper.malloc_aligned(WORD, WORD)
  1967. rffi.cast(rffi.CArrayPtr(lltype.Signed), adr)[0] = adr
  1968. self.gcrootmap_retaddr_forced = adr
  1969. frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
  1970. if rx86.fits_in_32bits(adr):
  1971. self.mc.MOV_bi(frame_ptr, adr) # MOV [css.frame], adr
  1972. else:
  1973. self.mc.MOV_ri(eax.value, adr) # MOV EAX, adr
  1974. self.mc.MOV_br(frame_ptr, eax.value) # MOV [css.frame], EAX
  1975. # Save ebp
  1976. index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
  1977. self.mc.MOV_br(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
  1978. # Call the closestack() function (also releasing the GIL)
  1979. if IS_X86_32:
  1980. reg = eax
  1981. elif IS_X86_64:
  1982. reg = edi
  1983. self.mc.LEA_rb(reg.value, css)
  1984. args = [reg]
  1985. #
  1986. self._emit_call(-1, imm(self.releasegil_addr), args)
  1987. # Finally, restore the registers saved above.
  1988. p = WORD
  1989. for reg in self._regalloc.rm.save_around_call_regs:
  1990. if reg in save_registers:
  1991. self.mc.MOV_rs(reg.value, p)
  1992. p += WORD
  1993. def call_reacquire_gil(self, gcrootmap, save_loc):
  1994. # save the previous result (eax/xmm0) into the stack temporarily.
  1995. # XXX like with call_release_gil(), we assume that we don't need
  1996. # to save xmm0 in this case.
  1997. if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
  1998. self.mc.MOV_sr(WORD, save_loc.value)
  1999. self._regalloc.reserve_param(2)
  2000. # call the reopenstack() function (also reacquiring the GIL)
  2001. if gcrootmap.is_shadow_stack:
  2002. args = []
  2003. else:
  2004. assert self.gcrootmap_retaddr_forced == -1, (
  2005. "missing mark_gc_roots() in CALL_RELEASE_GIL")
  2006. self.gcrootmap_retaddr_forced = 0
  2007. css = self._regalloc.close_stack_struct
  2008. assert css != 0
  2009. if IS_X86_32:
  2010. reg = eax
  2011. elif IS_X86_64:
  2012. reg = edi
  2013. self.mc.LEA_rb(reg.value, css)
  2014. args = [reg]
  2015. self._emit_call(-1, imm(self.reacqgil_addr), args)
  2016. # restore the result from the stack
  2017. if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
  2018. self.mc.MOV_rs(save_loc.value, WORD)
  2019. def genop_guard_call_assembler(self, op, guard_op, guard_token,
  2020. arglocs, result_loc):
  2021. faildescr = guard_op.getdescr()
  2022. fail_index = self.cpu.get_fail_descr_number(faildescr)
  2023. self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
  2024. descr = op.getdescr()
  2025. assert isinstance(descr, JitCellToken)
  2026. assert len(arglocs) - 2 == descr.compiled_loop_token._debug_nbargs
  2027. #
  2028. # Write a call to the target assembler
  2029. self._emit_call(fail_index, imm(descr._x86_function_addr),
  2030. arglocs, 2, tmp=eax)
  2031. if op.result is None:
  2032. assert result_loc is None
  2033. value = self.cpu.done_with_this_frame_void_v
  2034. else:
  2035. kind = op.result.type
  2036. if kind == INT:
  2037. assert result_loc is eax
  2038. value = self.cpu.done_with_this_frame_int_v
  2039. elif kind == REF:
  2040. assert result_loc is eax
  2041. value = self.cpu.done_with_this_frame_ref_v
  2042. elif kind == FLOAT:
  2043. value = self.cpu.done_with_this_frame_float_v
  2044. else:
  2045. raise AssertionError(kind)
  2046. self.mc.CMP_ri(eax.value, value)
  2047. # patched later
  2048. self.mc.J_il8(rx86.Conditions['E'], 0) # goto B if we get 'done_with_this_frame'
  2049. je_location = self.mc.get_relative_pos()
  2050. #
  2051. # Path A: use assembler_helper_adr
  2052. jd = descr.outermost_jitdriver_sd
  2053. assert jd is not None
  2054. asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
  2055. self._emit_call(fail_index, imm(asm_helper_adr), [eax, arglocs[1]], 0,
  2056. tmp=ecx)
  2057. if IS_X86_32 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
  2058. self.mc.FSTPL_b(result_loc.value)
  2059. #else: result_loc is already either eax or None, checked below
  2060. self.mc.JMP_l8(0) # jump to done, patched later
  2061. jmp_location = self.mc.get_relative_pos()
  2062. #
  2063. # Path B: fast path. Must load the return value, and reset the token
  2064. offset = jmp_location - je_location
  2065. assert 0 < offset <= 127
  2066. self.mc.overwrite(je_location - 1, chr(offset))
  2067. #
  2068. # Reset the vable token --- XXX really too much special logic here:-(
  2069. if jd.index_of_virtualizable >= 0:
  2070. from pypy.jit.backend.llsupport.descr import FieldDescr
  2071. fielddescr = jd.vable_token_descr
  2072. assert isinstance(fielddescr, FieldDescr)
  2073. ofs = fielddescr.offset
  2074. self.mc.MOV(eax, arglocs[1])
  2075. self.mc.MOV_mi((eax.value, ofs), 0)
  2076. # in the line above, TOKEN_NONE = 0
  2077. #
  2078. if op.result is not None:
  2079. # load the return value from fail_boxes_xxx[0]
  2080. kind = op.result.type
  2081. if kind == FLOAT:
  2082. xmmtmp = xmm0
  2083. adr = self.fail_boxes_float.get_addr_for_num(0)
  2084. self.mc.MOVSD(xmmtmp, heap(adr))
  2085. self.mc.MOVSD(result_loc, xmmtmp)
  2086. else:
  2087. assert result_loc is eax
  2088. if kind == INT:
  2089. adr = self.fail_boxes_int.get_addr_for_num(0)
  2090. self.mc.MOV(eax, heap(adr))
  2091. elif kind == REF:
  2092. adr = self.fail_boxes_ptr.get_addr_for_num(0)
  2093. self.mc.MOV(eax, heap(adr))
  2094. self.mc.MOV(heap(adr), imm0)
  2095. else:
  2096. raise AssertionError(kind)
  2097. #
  2098. # Here we join Path A and Path B again
  2099. offset = self.mc.get_relative_pos() - jmp_location
  2100. assert 0 <= offset <= 127
  2101. self.mc.overwrite(jmp_location - 1, chr(offset))
  2102. self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
  2103. self.implement_guard(guard_token, 'L')
  2104. def genop_discard_cond_call_gc_wb(self, op, arglocs):
  2105. # Write code equivalent to write_barrier() in the GC: it checks
  2106. # a flag in the object at arglocs[0], and if set, it calls the
  2107. # function remember_young_pointer() from the GC. The arguments
  2108. # to the call are in arglocs[:N]. The rest, arglocs[N:], contains
  2109. # registers that need to be saved and restored across the call.
  2110. # N is either 2 (regular write barrier) or 3 (array write barrier).
  2111. descr = op.getdescr()
  2112. if we_are_translated():
  2113. cls = self.cpu.gc_ll_descr.has_write_barrier_class()
  2114. assert cls is not None and isinstance(descr, cls)
  2115. #
  2116. opnum = op.getopnum()
  2117. if opnum == rop.COND_CALL_GC_WB:
  2118. N = 2
  2119. func = descr.get_write_barrier_fn(self.cpu)
  2120. card_marking = False
  2121. elif opnum == rop.COND_CALL_GC_WB_ARRAY:
  2122. N = 3
  2123. func = descr.get_write_barrier_from_array_fn(self.cpu)
  2124. assert func != 0
  2125. card_marking = descr.jit_wb_cards_set != 0
  2126. else:
  2127. raise AssertionError(opnum)
  2128. #
  2129. loc_base = arglocs[0]
  2130. self.mc.TEST8(addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs),
  2131. imm(descr.jit_wb_if_flag_singlebyte))
  2132. self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
  2133. jz_location = self.mc.get_relative_pos()
  2134. # for cond_call_gc_wb_array, also add another fast path:
  2135. # if GCFLAG_CARDS_SET, then we can just set one bit and be done
  2136. if card_marking:
  2137. self.mc.TEST8(addr_add_const(loc_base,
  2138. descr.jit_wb_cards_set_byteofs),
  2139. imm(descr.jit_wb_cards_set_singlebyte))
  2140. self.mc.J_il8(rx86.Conditions['NZ'], 0) # patched later
  2141. jnz_location = self.mc.get_relative_pos()
  2142. else:
  2143. jnz_location = 0
  2144. # the following is supposed to be the slow path, so whenever possible
  2145. # we choose the most compact encoding over the most efficient one.
  2146. if IS_X86_32:
  2147. limit = -1 # push all arglocs on the stack
  2148. elif IS_X86_64:
  2149. limit = N - 1 # push only arglocs[N:] on the stack
  2150. for i in range(len(arglocs)-1, limit, -1):
  2151. loc = arglocs[i]
  2152. if isinstance(loc, RegLoc):
  2153. self.mc.PUSH_r(loc.value)
  2154. else:
  2155. assert not IS_X86_64 # there should only be regs in arglocs[N:]
  2156. self.mc.PUSH_i32(loc.getint())
  2157. if IS_X86_64:
  2158. # We clobber these registers to pass the arguments, but that's
  2159. # okay, because consider_cond_call_gc_wb makes sure that any
  2160. # caller-save registers with values in them are present in
  2161. # arglocs[N:] too, so they are saved on the stack above and
  2162. # restored below.
  2163. if N == 2:
  2164. callargs = [edi, esi]
  2165. else:
  2166. callargs = [edi, esi, edx]
  2167. remap_frame_layout(self, arglocs[:N], callargs,
  2168. X86_64_SCRATCH_REG)
  2169. #
  2170. # misaligned stack in the call, but it's ok because the write barrier
  2171. # is not going to call anything more. Also, this assumes that the
  2172. # write barrier does not touch the xmm registers. (Slightly delicate
  2173. # assumption, given that the write barrier can end up calling the
  2174. # platform's malloc() from AddressStack.append(). XXX may need to
  2175. # be done properly)
  2176. self.mc.CALL(imm(func))
  2177. if IS_X86_32:
  2178. self.mc.ADD_ri(esp.value, N*WORD)
  2179. for i in range(N, len(arglocs)):
  2180. loc = arglocs[i]
  2181. assert isinstance(loc, RegLoc)
  2182. self.mc.POP_r(loc.value)
  2183. # if GCFLAG_CARDS_SET, then we can do the whole thing that would
  2184. # be done in the CALL above with just four instructions, so here
  2185. # is an inline copy of them
  2186. if card_marking:
  2187. self.mc.JMP_l8(0) # jump to the exit, patched later
  2188. jmp_location = self.mc.get_relative_pos()
  2189. # patch the JNZ above
  2190. offset = self.mc.get_relative_pos() - jnz_location
  2191. assert 0 < offset <= 127
  2192. self.mc.overwrite(jnz_location-1, chr(offset))
  2193. #
  2194. loc_index = arglocs[1]
  2195. if isinstance(loc_index, RegLoc):
  2196. # choose a scratch register
  2197. tmp1 = loc_index
  2198. self.mc.PUSH_r(tmp1.value)
  2199. # SHR tmp, card_page_shift
  2200. self.mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift)
  2201. # XOR tmp, -8
  2202. self.mc.XOR_ri(tmp1.value, -8)
  2203. # BTS [loc_base], tmp
  2204. self.mc.BTS(addr_add_const(loc_base, 0), tmp1)
  2205. # done
  2206. self.mc.POP_r(tmp1.value)
  2207. elif isinstance(loc_index, ImmedLoc):
  2208. byte_index = loc_index.value >> descr.jit_wb_card_page_shift
  2209. byte_ofs = ~(byte_index >> 3)
  2210. byte_val = 1 << (byte_index & 7)
  2211. self.mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val))
  2212. else:
  2213. raise AssertionError("index is neither RegLoc nor ImmedLoc")
  2214. # patch the JMP above
  2215. offset = self.mc.get_relative_pos() - jmp_location
  2216. assert 0 < offset <= 127
  2217. self.mc.overwrite(jmp_location-1, chr(offset))
  2218. #
  2219. # patch the JZ above
  2220. offset = self.mc.get_relative_pos() - jz_location
  2221. assert 0 < offset <= 127
  2222. self.mc.overwrite(jz_location-1, chr(offset))
  2223. genop_discard_cond_call_gc_wb_array = genop_discard_cond_call_gc_wb
  2224. def not_implemented_op_discard(self, op, arglocs):
  2225. not_implemented("not implemented operation: %s" % op.getopname())
  2226. def not_implemented_op(self, op, arglocs, resloc):
  2227. not_implemented("not implemented operation with res: %s" %
  2228. op.getopname())
  2229. def not_implemented_op_guard(self, op, guard_op,
  2230. failaddr, arglocs, resloc):
  2231. not_implemented("not implemented operation (guard): %s" %
  2232. op.getopname())
  2233. def mark_gc_roots(self, force_index, use_copy_area=False):
  2234. if force_index < 0:
  2235. return # not needed
  2236. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  2237. if gcrootmap:
  2238. mark = self._regalloc.get_mark_gc_roots(gcrootmap, use_copy_area)
  2239. if gcrootmap.is_shadow_stack:
  2240. gcrootmap.write_callshape(mark, force_index)
  2241. else:
  2242. if self.gcrootmap_retaddr_forced == 0:
  2243. self.mc.insert_gcroot_marker(mark) # common case
  2244. else:
  2245. assert self.gcrootmap_retaddr_forced != -1, (
  2246. "two mark_gc_roots() in a CALL_RELEASE_GIL")
  2247. gcrootmap.put(self.gcrootmap_retaddr_forced, mark)
  2248. self.gcrootmap_retaddr_forced = -1
  2249. def closing_jump(self, target_token):
  2250. # The backend's logic assumes that the target code is in a piece of
  2251. # assembler that was also called with the same number of arguments,
  2252. # so that the locations [ebp+8..] of the input arguments are valid
  2253. # stack locations both before and after the jump.
  2254. my_nbargs = self.current_clt._debug_nbargs
  2255. target_nbargs = target_token._x86_clt._debug_nbargs
  2256. assert my_nbargs == target_nbargs
  2257. #
  2258. target = target_token._x86_loop_code
  2259. if target_token in self.target_tokens_currently_compiling:
  2260. curpos = self.mc.get_relative_pos() + 5
  2261. self.mc.JMP_l(target - curpos)
  2262. else:
  2263. self.mc.JMP(imm(target))
  2264. def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
  2265. assert size & (WORD-1) == 0 # must be correctly aligned
  2266. self.mc.MOV(eax, heap(nursery_free_adr))
  2267. self.mc.LEA_rm(edx.value, (eax.value, size))
  2268. self.mc.CMP(edx, heap(nursery_top_adr))
  2269. self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
  2270. jmp_adr = self.mc.get_relative_pos()
  2271. # See comments in _build_malloc_slowpath for the
  2272. # details of the two helper functions that we are calling below.
  2273. # First, we need to call two of them and not just one because we
  2274. # need to have a mark_gc_roots() in between. Then the calling
  2275. # convention of slowpath_addr{1,2} are tweaked a lot to allow
  2276. # the code here to be just two CALLs: slowpath_addr1 gets the
  2277. # size of the object to allocate from (EDX-EAX) and returns the
  2278. # result in EAX; slowpath_addr2 additionally returns in EDX a
  2279. # copy of heap(nursery_free_adr), so that the final MOV below is
  2280. # a no-op.
  2281. # reserve room for the argument to the real malloc and the
  2282. # saved XMM regs (on 32 bit: 8 * 2 words; on 64 bit: 16 * 1
  2283. # word)
  2284. self._regalloc.reserve_param(1+16)
  2285. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  2286. shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
  2287. if not shadow_stack:
  2288. # there are two helpers to call only with asmgcc
  2289. slowpath_addr1 = self.malloc_slowpath1
  2290. self.mc.CALL(imm(slowpath_addr1))
  2291. self.mark_gc_roots(self.write_new_force_index(), use_copy_area=True)
  2292. slowpath_addr2 = self.malloc_slowpath2
  2293. self.mc.CALL(imm(slowpath_addr2))
  2294. offset = self.mc.get_relative_pos() - jmp_adr
  2295. assert 0 < offset <= 127
  2296. self.mc.overwrite(jmp_adr-1, chr(offset))
  2297. self.mc.MOV(heap(nursery_free_adr), edx)
  2298. genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
  2299. genop_list = [Assembler386.not_implemented_op] * rop._LAST
  2300. genop_llong_list = {}
  2301. genop_math_list = {}
  2302. genop_guard_list = [Assembler386.not_implemented_op_guard] * rop._LAST
  2303. for name, value in Assembler386.__dict__.iteritems():
  2304. if name.startswith('genop_discard_'):
  2305. opname = name[len('genop_discard_'):]
  2306. num = getattr(rop, opname.upper())
  2307. genop_discard_list[num] = value
  2308. elif name.startswith('genop_guard_') and name != 'genop_guard_exception':
  2309. opname = name[len('genop_guard_'):]
  2310. num = getattr(rop, opname.upper())
  2311. genop_guard_list[num] = value
  2312. elif name.startswith('genop_llong_'):
  2313. opname = name[len('genop_llong_'):]
  2314. num = getattr(EffectInfo, 'OS_LLONG_' + opname.upper())
  2315. genop_llong_list[num] = value
  2316. elif name.startswith('genop_math_'):
  2317. opname = name[len('genop_math_'):]
  2318. num = getattr(EffectInfo, 'OS_MATH_' + opname.upper())
  2319. genop_math_list[num] = value
  2320. elif name.startswith('genop_'):
  2321. opname = name[len('genop_'):]
  2322. num = getattr(rop, opname.upper())
  2323. genop_list[num] = value
  2324. # XXX: ri386 migration shims:
  2325. def addr_add(reg_or_imm1, reg_or_imm2, offset=0, scale=0):
  2326. return AddressLoc(reg_or_imm1, reg_or_imm2, scale, offset)
  2327. def addr_add_const(reg_or_imm1, offset):
  2328. return AddressLoc(reg_or_imm1, ImmedLoc(0), 0, offset)
  2329. def mem(loc, offset):
  2330. return AddressLoc(loc, ImmedLoc(0), 0, offset)
  2331. def heap(addr):
  2332. return AddressLoc(ImmedLoc(addr), ImmedLoc(0), 0, 0)
  2333. def not_implemented(msg):
  2334. os.write(2, '[x86/asm] %s\n' % msg)
  2335. raise NotImplementedError(msg)
  2336. class BridgeAlreadyCompiled(Exception):
  2337. pass