PageRenderTime 66ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/pypy/jit/backend/x86/assembler.py

http://github.com/pypy/pypy
Python | 2580 lines | 1964 code | 229 blank | 387 comment | 411 complexity | a0ae27c3539b9fe6f39c3dbbf3975e23 MD5 | raw file
  1. import sys, os
  2. from pypy.jit.backend.llsupport import symbolic
  3. from pypy.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
  4. from pypy.jit.metainterp.history import Const, Box, BoxInt, ConstInt
  5. from pypy.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
  6. from pypy.jit.metainterp.history import JitCellToken
  7. from pypy.rpython.lltypesystem import lltype, rffi, rstr, llmemory
  8. from pypy.rpython.lltypesystem.lloperation import llop
  9. from pypy.rpython.annlowlevel import llhelper
  10. from pypy.rlib.jit import AsmInfo
  11. from pypy.jit.backend.model import CompiledLoopToken
  12. from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs, _get_scale,
  13. gpr_reg_mgr_cls, _valid_addressing_size)
  14. from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
  15. IS_X86_32, IS_X86_64)
  16. from pypy.jit.backend.x86.regloc import (eax, ecx, edx, ebx,
  17. esp, ebp, esi, edi,
  18. xmm0, xmm1, xmm2, xmm3,
  19. xmm4, xmm5, xmm6, xmm7,
  20. r8, r9, r10, r11,
  21. r12, r13, r14, r15,
  22. X86_64_SCRATCH_REG,
  23. X86_64_XMM_SCRATCH_REG,
  24. RegLoc, StackLoc, ConstFloatLoc,
  25. ImmedLoc, AddressLoc, imm,
  26. imm0, imm1, FloatImmedLoc)
  27. from pypy.rlib.objectmodel import we_are_translated, specialize
  28. from pypy.jit.backend.x86 import rx86, regloc, codebuf
  29. from pypy.jit.metainterp.resoperation import rop, ResOperation
  30. from pypy.jit.backend.x86.support import values_array
  31. from pypy.jit.backend.x86 import support
  32. from pypy.rlib.debug import (debug_print, debug_start, debug_stop,
  33. have_debug_prints)
  34. from pypy.rlib import rgc
  35. from pypy.rlib.clibffi import FFI_DEFAULT_ABI
  36. from pypy.jit.backend.x86.jump import remap_frame_layout
  37. from pypy.jit.codewriter.effectinfo import EffectInfo
  38. from pypy.jit.codewriter import longlong
  39. from pypy.rlib.rarithmetic import intmask
  40. from pypy.rlib.objectmodel import compute_unique_id
  41. # darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 4.5.0,
  42. # better safe than sorry
  43. CALL_ALIGN = 16 // WORD
  44. def align_stack_words(words):
  45. return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
  46. class GuardToken(object):
  47. def __init__(self, faildescr, failargs, fail_locs, exc,
  48. is_guard_not_invalidated):
  49. self.faildescr = faildescr
  50. self.failargs = failargs
  51. self.fail_locs = fail_locs
  52. self.exc = exc
  53. self.is_guard_not_invalidated = is_guard_not_invalidated
  54. DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
  55. ('type', lltype.Char), # 'b'ridge, 'l'abel or
  56. # 'e'ntry point
  57. ('number', lltype.Signed))
  58. class Assembler386(object):
  59. _regalloc = None
  60. _output_loop_log = None
  61. def __init__(self, cpu, translate_support_code=False,
  62. failargs_limit=1000):
  63. self.cpu = cpu
  64. self.verbose = False
  65. self.rtyper = cpu.rtyper
  66. self.fail_boxes_int = values_array(lltype.Signed, failargs_limit)
  67. self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit)
  68. self.fail_boxes_float = values_array(longlong.FLOATSTORAGE,
  69. failargs_limit)
  70. self.fail_ebp = 0
  71. self.loop_run_counters = []
  72. self.float_const_neg_addr = 0
  73. self.float_const_abs_addr = 0
  74. self.malloc_slowpath1 = 0
  75. self.malloc_slowpath2 = 0
  76. self.memcpy_addr = 0
  77. self.setup_failure_recovery()
  78. self._debug = False
  79. self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
  80. self.fail_boxes_count = 0
  81. self.datablockwrapper = None
  82. self.stack_check_slowpath = 0
  83. self.propagate_exception_path = 0
  84. self.gcrootmap_retaddr_forced = 0
  85. self.teardown()
  86. def leave_jitted_hook(self):
  87. ptrs = self.fail_boxes_ptr.ar
  88. llop.gc_assume_young_pointers(lltype.Void,
  89. llmemory.cast_ptr_to_adr(ptrs))
  90. def set_debug(self, v):
  91. self._debug = v
  92. def setup_once(self):
  93. # the address of the function called by 'new'
  94. gc_ll_descr = self.cpu.gc_ll_descr
  95. gc_ll_descr.initialize()
  96. self.memcpy_addr = self.cpu.cast_ptr_to_int(support.memcpy_fn)
  97. self._build_failure_recovery(False)
  98. self._build_failure_recovery(True)
  99. if self.cpu.supports_floats:
  100. self._build_failure_recovery(False, withfloats=True)
  101. self._build_failure_recovery(True, withfloats=True)
  102. support.ensure_sse2_floats()
  103. self._build_float_constants()
  104. self._build_propagate_exception_path()
  105. if gc_ll_descr.get_malloc_slowpath_addr is not None:
  106. self._build_malloc_slowpath()
  107. self._build_stack_check_slowpath()
  108. if gc_ll_descr.gcrootmap:
  109. self._build_release_gil(gc_ll_descr.gcrootmap)
  110. debug_start('jit-backend-counts')
  111. self.set_debug(have_debug_prints())
  112. debug_stop('jit-backend-counts')
  113. def setup(self, looptoken):
  114. assert self.memcpy_addr != 0, "setup_once() not called?"
  115. self.current_clt = looptoken.compiled_loop_token
  116. self.pending_guard_tokens = []
  117. if WORD == 8:
  118. self.pending_memoryerror_trampoline_from = []
  119. self.error_trampoline_64 = 0
  120. self.mc = codebuf.MachineCodeBlockWrapper()
  121. #assert self.datablockwrapper is None --- but obscure case
  122. # possible, e.g. getting MemoryError and continuing
  123. allblocks = self.get_asmmemmgr_blocks(looptoken)
  124. self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
  125. allblocks)
  126. self.target_tokens_currently_compiling = {}
  127. def teardown(self):
  128. self.pending_guard_tokens = None
  129. if WORD == 8:
  130. self.pending_memoryerror_trampoline_from = None
  131. self.mc = None
  132. self.current_clt = None
  133. def finish_once(self):
  134. if self._debug:
  135. debug_start('jit-backend-counts')
  136. for i in range(len(self.loop_run_counters)):
  137. struct = self.loop_run_counters[i]
  138. if struct.type == 'l':
  139. prefix = 'TargetToken(%d)' % struct.number
  140. elif struct.type == 'b':
  141. prefix = 'bridge ' + str(struct.number)
  142. else:
  143. prefix = 'entry ' + str(struct.number)
  144. debug_print(prefix + ':' + str(struct.i))
  145. debug_stop('jit-backend-counts')
  146. def _build_float_constants(self):
  147. datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, [])
  148. float_constants = datablockwrapper.malloc_aligned(32, alignment=16)
  149. datablockwrapper.done()
  150. addr = rffi.cast(rffi.CArrayPtr(lltype.Char), float_constants)
  151. qword_padding = '\x00\x00\x00\x00\x00\x00\x00\x00'
  152. # 0x8000000000000000
  153. neg_const = '\x00\x00\x00\x00\x00\x00\x00\x80'
  154. # 0x7FFFFFFFFFFFFFFF
  155. abs_const = '\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F'
  156. data = neg_const + qword_padding + abs_const + qword_padding
  157. for i in range(len(data)):
  158. addr[i] = data[i]
  159. self.float_const_neg_addr = float_constants
  160. self.float_const_abs_addr = float_constants + 16
  161. def _build_malloc_slowpath(self):
  162. # With asmgcc, we need two helpers, so that we can write two CALL
  163. # instructions in assembler, with a mark_gc_roots in between.
  164. # With shadowstack, this is not needed, so we produce a single helper.
  165. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  166. shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
  167. #
  168. # ---------- first helper for the slow path of malloc ----------
  169. mc = codebuf.MachineCodeBlockWrapper()
  170. if self.cpu.supports_floats: # save the XMM registers in
  171. for i in range(self.cpu.NUM_REGS):# the *caller* frame, from esp+8
  172. mc.MOVSD_sx((WORD*2)+8*i, i)
  173. mc.SUB_rr(edx.value, eax.value) # compute the size we want
  174. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
  175. #
  176. # The registers to save in the copy area: with shadowstack, most
  177. # registers need to be saved. With asmgcc, the callee-saved registers
  178. # don't need to.
  179. save_in_copy_area = gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items()
  180. if not shadow_stack:
  181. save_in_copy_area = [(reg, ofs) for (reg, ofs) in save_in_copy_area
  182. if reg not in gpr_reg_mgr_cls.REGLOC_TO_GCROOTMAP_REG_INDEX]
  183. #
  184. for reg, ofs in save_in_copy_area:
  185. mc.MOV_br(ofs, reg.value)
  186. #
  187. if shadow_stack:
  188. # ---- shadowstack ----
  189. mc.SUB_ri(esp.value, 16 - WORD) # stack alignment of 16 bytes
  190. if IS_X86_32:
  191. mc.MOV_sr(0, edx.value) # push argument
  192. elif IS_X86_64:
  193. mc.MOV_rr(edi.value, edx.value)
  194. mc.CALL(imm(addr))
  195. mc.ADD_ri(esp.value, 16 - WORD)
  196. else:
  197. # ---- asmgcc ----
  198. if IS_X86_32:
  199. mc.MOV_sr(WORD, edx.value) # save it as the new argument
  200. elif IS_X86_64:
  201. # rdi can be clobbered: its content was saved in the
  202. # copy area of the stack
  203. mc.MOV_rr(edi.value, edx.value)
  204. mc.JMP(imm(addr)) # tail call to the real malloc
  205. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  206. self.malloc_slowpath1 = rawstart
  207. # ---------- second helper for the slow path of malloc ----------
  208. mc = codebuf.MachineCodeBlockWrapper()
  209. #
  210. for reg, ofs in save_in_copy_area:
  211. mc.MOV_rb(reg.value, ofs)
  212. assert reg is not eax and reg is not edx
  213. #
  214. if self.cpu.supports_floats: # restore the XMM registers
  215. for i in range(self.cpu.NUM_REGS):# from where they were saved
  216. mc.MOVSD_xs(i, (WORD*2)+8*i)
  217. #
  218. # Note: we check this after the code above, just because the code
  219. # above is more than 127 bytes on 64-bits...
  220. mc.TEST_rr(eax.value, eax.value)
  221. mc.J_il8(rx86.Conditions['Z'], 0) # patched later
  222. jz_location = mc.get_relative_pos()
  223. #
  224. nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
  225. mc.MOV(edx, heap(nursery_free_adr)) # load this in EDX
  226. mc.RET()
  227. #
  228. # If the slowpath malloc failed, we raise a MemoryError that
  229. # always interrupts the current loop, as a "good enough"
  230. # approximation. Also note that we didn't RET from this helper;
  231. # but the code we jump to will actually restore the stack
  232. # position based on EBP, which will get us out of here for free.
  233. offset = mc.get_relative_pos() - jz_location
  234. assert 0 < offset <= 127
  235. mc.overwrite(jz_location-1, chr(offset))
  236. mc.JMP(imm(self.propagate_exception_path))
  237. #
  238. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  239. self.malloc_slowpath2 = rawstart
  240. def _build_propagate_exception_path(self):
  241. if self.cpu.propagate_exception_v < 0:
  242. return # not supported (for tests, or non-translated)
  243. #
  244. self.mc = codebuf.MachineCodeBlockWrapper()
  245. # call on_leave_jitted_save_exc()
  246. addr = self.cpu.get_on_leave_jitted_int(save_exception=True,
  247. default_to_memoryerror=True)
  248. self.mc.CALL(imm(addr))
  249. self.mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
  250. self._call_footer()
  251. rawstart = self.mc.materialize(self.cpu.asmmemmgr, [])
  252. self.propagate_exception_path = rawstart
  253. self.mc = None
  254. def _build_stack_check_slowpath(self):
  255. _, _, slowpathaddr = self.cpu.insert_stack_check()
  256. if slowpathaddr == 0 or self.cpu.propagate_exception_v < 0:
  257. return # no stack check (for tests, or non-translated)
  258. #
  259. # make a "function" that is called immediately at the start of
  260. # an assembler function. In particular, the stack looks like:
  261. #
  262. # | ... | <-- aligned to a multiple of 16
  263. # | retaddr of caller |
  264. # | my own retaddr | <-- esp
  265. # +---------------------+
  266. #
  267. mc = codebuf.MachineCodeBlockWrapper()
  268. #
  269. stack_size = WORD
  270. if IS_X86_64:
  271. # on the x86_64, we have to save all the registers that may
  272. # have been used to pass arguments
  273. stack_size += 6*WORD + 8*8
  274. for reg in [edi, esi, edx, ecx, r8, r9]:
  275. mc.PUSH_r(reg.value)
  276. mc.SUB_ri(esp.value, 8*8)
  277. for i in range(8):
  278. mc.MOVSD_sx(8*i, i) # xmm0 to xmm7
  279. #
  280. if IS_X86_32:
  281. stack_size += 2*WORD
  282. mc.PUSH_r(eax.value) # alignment
  283. mc.PUSH_r(esp.value)
  284. elif IS_X86_64:
  285. mc.MOV_rr(edi.value, esp.value)
  286. #
  287. # esp is now aligned to a multiple of 16 again
  288. mc.CALL(imm(slowpathaddr))
  289. #
  290. mc.MOV(eax, heap(self.cpu.pos_exception()))
  291. mc.TEST_rr(eax.value, eax.value)
  292. mc.J_il8(rx86.Conditions['NZ'], 0)
  293. jnz_location = mc.get_relative_pos()
  294. #
  295. if IS_X86_32:
  296. mc.ADD_ri(esp.value, 2*WORD) # cancel the two PUSHes above
  297. elif IS_X86_64:
  298. # restore the registers
  299. for i in range(7, -1, -1):
  300. mc.MOVSD_xs(i, 8*i)
  301. mc.ADD_ri(esp.value, 8*8)
  302. for reg in [r9, r8, ecx, edx, esi, edi]:
  303. mc.POP_r(reg.value)
  304. #
  305. mc.RET()
  306. #
  307. # patch the JNZ above
  308. offset = mc.get_relative_pos() - jnz_location
  309. assert 0 < offset <= 127
  310. mc.overwrite(jnz_location-1, chr(offset))
  311. # call on_leave_jitted_save_exc()
  312. addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
  313. mc.CALL(imm(addr))
  314. #
  315. mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
  316. #
  317. # footer -- note the ADD, which skips the return address of this
  318. # function, and will instead return to the caller's caller. Note
  319. # also that we completely ignore the saved arguments, because we
  320. # are interrupting the function.
  321. mc.ADD_ri(esp.value, stack_size)
  322. mc.RET()
  323. #
  324. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  325. self.stack_check_slowpath = rawstart
  326. @staticmethod
  327. @rgc.no_collect
  328. def _release_gil_asmgcc(css):
  329. # similar to trackgcroot.py:pypy_asm_stackwalk, first part
  330. from pypy.rpython.memory.gctransform import asmgcroot
  331. new = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
  332. next = asmgcroot.gcrootanchor.next
  333. new.next = next
  334. new.prev = asmgcroot.gcrootanchor
  335. asmgcroot.gcrootanchor.next = new
  336. next.prev = new
  337. # and now release the GIL
  338. before = rffi.aroundstate.before
  339. if before:
  340. before()
  341. @staticmethod
  342. @rgc.no_collect
  343. def _reacquire_gil_asmgcc(css):
  344. # first reacquire the GIL
  345. after = rffi.aroundstate.after
  346. if after:
  347. after()
  348. # similar to trackgcroot.py:pypy_asm_stackwalk, second part
  349. from pypy.rpython.memory.gctransform import asmgcroot
  350. old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
  351. prev = old.prev
  352. next = old.next
  353. prev.next = next
  354. next.prev = prev
  355. @staticmethod
  356. @rgc.no_collect
  357. def _release_gil_shadowstack():
  358. before = rffi.aroundstate.before
  359. if before:
  360. before()
  361. @staticmethod
  362. @rgc.no_collect
  363. def _reacquire_gil_shadowstack():
  364. after = rffi.aroundstate.after
  365. if after:
  366. after()
  367. _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
  368. _CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
  369. lltype.Void))
  370. def _build_release_gil(self, gcrootmap):
  371. if gcrootmap.is_shadow_stack:
  372. releasegil_func = llhelper(self._NOARG_FUNC,
  373. self._release_gil_shadowstack)
  374. reacqgil_func = llhelper(self._NOARG_FUNC,
  375. self._reacquire_gil_shadowstack)
  376. else:
  377. releasegil_func = llhelper(self._CLOSESTACK_FUNC,
  378. self._release_gil_asmgcc)
  379. reacqgil_func = llhelper(self._CLOSESTACK_FUNC,
  380. self._reacquire_gil_asmgcc)
  381. self.releasegil_addr = self.cpu.cast_ptr_to_int(releasegil_func)
  382. self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
  383. def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
  384. '''adds the following attributes to looptoken:
  385. _x86_function_addr (address of the generated func, as an int)
  386. _x86_loop_code (debug: addr of the start of the ResOps)
  387. _x86_fullsize (debug: full size including failure)
  388. _x86_debug_checksum
  389. '''
  390. # XXX this function is too longish and contains some code
  391. # duplication with assemble_bridge(). Also, we should think
  392. # about not storing on 'self' attributes that will live only
  393. # for the duration of compiling one loop or a one bridge.
  394. clt = CompiledLoopToken(self.cpu, looptoken.number)
  395. clt.allgcrefs = []
  396. looptoken.compiled_loop_token = clt
  397. if not we_are_translated():
  398. # Arguments should be unique
  399. assert len(set(inputargs)) == len(inputargs)
  400. self.setup(looptoken)
  401. if log:
  402. operations = self._inject_debugging_code(looptoken, operations,
  403. 'e', looptoken.number)
  404. regalloc = RegAlloc(self, self.cpu.translate_support_code)
  405. #
  406. self._call_header_with_stack_check()
  407. stackadjustpos = self._patchable_stackadjust()
  408. clt._debug_nbargs = len(inputargs)
  409. operations = regalloc.prepare_loop(inputargs, operations,
  410. looptoken, clt.allgcrefs)
  411. looppos = self.mc.get_relative_pos()
  412. looptoken._x86_loop_code = looppos
  413. clt.frame_depth = -1 # temporarily
  414. frame_depth = self._assemble(regalloc, operations)
  415. clt.frame_depth = frame_depth
  416. #
  417. size_excluding_failure_stuff = self.mc.get_relative_pos()
  418. self.write_pending_failure_recoveries()
  419. full_size = self.mc.get_relative_pos()
  420. #
  421. rawstart = self.materialize_loop(looptoken)
  422. debug_start("jit-backend-addr")
  423. debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
  424. looptoken.number, loopname,
  425. rawstart + looppos,
  426. rawstart + size_excluding_failure_stuff,
  427. rawstart))
  428. debug_stop("jit-backend-addr")
  429. self._patch_stackadjust(rawstart + stackadjustpos, frame_depth)
  430. self.patch_pending_failure_recoveries(rawstart)
  431. #
  432. ops_offset = self.mc.ops_offset
  433. if not we_are_translated():
  434. # used only by looptoken.dump() -- useful in tests
  435. looptoken._x86_rawstart = rawstart
  436. looptoken._x86_fullsize = full_size
  437. looptoken._x86_ops_offset = ops_offset
  438. looptoken._x86_function_addr = rawstart
  439. self.fixup_target_tokens(rawstart)
  440. self.teardown()
  441. # oprofile support
  442. if self.cpu.profile_agent is not None:
  443. name = "Loop # %s: %s" % (looptoken.number, loopname)
  444. self.cpu.profile_agent.native_code_written(name,
  445. rawstart, full_size)
  446. return AsmInfo(ops_offset, rawstart + looppos,
  447. size_excluding_failure_stuff - looppos)
  448. def assemble_bridge(self, faildescr, inputargs, operations,
  449. original_loop_token, log):
  450. if not we_are_translated():
  451. # Arguments should be unique
  452. assert len(set(inputargs)) == len(inputargs)
  453. descr_number = self.cpu.get_fail_descr_number(faildescr)
  454. failure_recovery = self._find_failure_recovery_bytecode(faildescr)
  455. self.setup(original_loop_token)
  456. if log:
  457. operations = self._inject_debugging_code(faildescr, operations,
  458. 'b', descr_number)
  459. arglocs = self.rebuild_faillocs_from_descr(failure_recovery)
  460. if not we_are_translated():
  461. assert ([loc.assembler() for loc in arglocs] ==
  462. [loc.assembler() for loc in faildescr._x86_debug_faillocs])
  463. regalloc = RegAlloc(self, self.cpu.translate_support_code)
  464. startpos = self.mc.get_relative_pos()
  465. operations = regalloc.prepare_bridge(inputargs, arglocs,
  466. operations,
  467. self.current_clt.allgcrefs)
  468. stackadjustpos = self._patchable_stackadjust()
  469. frame_depth = self._assemble(regalloc, operations)
  470. codeendpos = self.mc.get_relative_pos()
  471. self.write_pending_failure_recoveries()
  472. fullsize = self.mc.get_relative_pos()
  473. #
  474. rawstart = self.materialize_loop(original_loop_token)
  475. debug_start("jit-backend-addr")
  476. debug_print("bridge out of Guard %d has address %x to %x" %
  477. (descr_number, rawstart, rawstart + codeendpos))
  478. debug_stop("jit-backend-addr")
  479. self._patch_stackadjust(rawstart + stackadjustpos, frame_depth)
  480. self.patch_pending_failure_recoveries(rawstart)
  481. if not we_are_translated():
  482. # for the benefit of tests
  483. faildescr._x86_bridge_frame_depth = frame_depth
  484. # patch the jump from original guard
  485. self.patch_jump_for_descr(faildescr, rawstart)
  486. ops_offset = self.mc.ops_offset
  487. self.fixup_target_tokens(rawstart)
  488. self.current_clt.frame_depth = max(self.current_clt.frame_depth, frame_depth)
  489. self.teardown()
  490. # oprofile support
  491. if self.cpu.profile_agent is not None:
  492. name = "Bridge # %s" % (descr_number,)
  493. self.cpu.profile_agent.native_code_written(name,
  494. rawstart, fullsize)
  495. return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
  496. def write_pending_failure_recoveries(self):
  497. # for each pending guard, generate the code of the recovery stub
  498. # at the end of self.mc.
  499. for tok in self.pending_guard_tokens:
  500. tok.pos_recovery_stub = self.generate_quick_failure(tok)
  501. if WORD == 8 and len(self.pending_memoryerror_trampoline_from) > 0:
  502. self.error_trampoline_64 = self.generate_propagate_error_64()
  503. def patch_pending_failure_recoveries(self, rawstart):
  504. # after we wrote the assembler to raw memory, set up
  505. # tok.faildescr._x86_adr_jump_offset to contain the raw address of
  506. # the 4-byte target field in the JMP/Jcond instruction, and patch
  507. # the field in question to point (initially) to the recovery stub
  508. clt = self.current_clt
  509. for tok in self.pending_guard_tokens:
  510. addr = rawstart + tok.pos_jump_offset
  511. tok.faildescr._x86_adr_jump_offset = addr
  512. relative_target = tok.pos_recovery_stub - (tok.pos_jump_offset + 4)
  513. assert rx86.fits_in_32bits(relative_target)
  514. #
  515. if not tok.is_guard_not_invalidated:
  516. mc = codebuf.MachineCodeBlockWrapper()
  517. mc.writeimm32(relative_target)
  518. mc.copy_to_raw_memory(addr)
  519. else:
  520. # GUARD_NOT_INVALIDATED, record an entry in
  521. # clt.invalidate_positions of the form:
  522. # (addr-in-the-code-of-the-not-yet-written-jump-target,
  523. # relative-target-to-use)
  524. relpos = tok.pos_jump_offset
  525. clt.invalidate_positions.append((rawstart + relpos,
  526. relative_target))
  527. # General idea: Although no code was generated by this
  528. # guard, the code might be patched with a "JMP rel32" to
  529. # the guard recovery code. This recovery code is
  530. # already generated, and looks like the recovery code
  531. # for any guard, even if at first it has no jump to it.
  532. # So we may later write 5 bytes overriding the existing
  533. # instructions; this works because a CALL instruction
  534. # would also take at least 5 bytes. If it could take
  535. # less, we would run into the issue that overwriting the
  536. # 5 bytes here might get a few nonsense bytes at the
  537. # return address of the following CALL.
  538. if WORD == 8:
  539. for pos_after_jz in self.pending_memoryerror_trampoline_from:
  540. assert self.error_trampoline_64 != 0 # only if non-empty
  541. mc = codebuf.MachineCodeBlockWrapper()
  542. mc.writeimm32(self.error_trampoline_64 - pos_after_jz)
  543. mc.copy_to_raw_memory(rawstart + pos_after_jz - 4)
  544. def get_asmmemmgr_blocks(self, looptoken):
  545. clt = looptoken.compiled_loop_token
  546. if clt.asmmemmgr_blocks is None:
  547. clt.asmmemmgr_blocks = []
  548. return clt.asmmemmgr_blocks
  549. def materialize_loop(self, looptoken):
  550. self.datablockwrapper.done() # finish using cpu.asmmemmgr
  551. self.datablockwrapper = None
  552. allblocks = self.get_asmmemmgr_blocks(looptoken)
  553. return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
  554. self.cpu.gc_ll_descr.gcrootmap)
  555. def _register_counter(self, tp, number, token):
  556. # YYY very minor leak -- we need the counters to stay alive
  557. # forever, just because we want to report them at the end
  558. # of the process
  559. struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
  560. track_allocation=False)
  561. struct.i = 0
  562. struct.type = tp
  563. if tp == 'b' or tp == 'e':
  564. struct.number = number
  565. else:
  566. assert token
  567. struct.number = compute_unique_id(token)
  568. self.loop_run_counters.append(struct)
  569. return struct
  570. def _find_failure_recovery_bytecode(self, faildescr):
  571. adr_jump_offset = faildescr._x86_adr_jump_offset
  572. if adr_jump_offset == 0:
  573. # This case should be prevented by the logic in compile.py:
  574. # look for CNT_BUSY_FLAG, which disables tracing from a guard
  575. # when another tracing from the same guard is already in progress.
  576. raise BridgeAlreadyCompiled
  577. # follow the JMP/Jcond
  578. p = rffi.cast(rffi.INTP, adr_jump_offset)
  579. adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
  580. # skip the CALL
  581. if WORD == 4:
  582. adr_target += 5 # CALL imm
  583. else:
  584. adr_target += 13 # MOV r11, imm-as-8-bytes; CALL *r11 xxxxxxxxxx
  585. return adr_target
  586. def patch_jump_for_descr(self, faildescr, adr_new_target):
  587. adr_jump_offset = faildescr._x86_adr_jump_offset
  588. assert adr_jump_offset != 0
  589. offset = adr_new_target - (adr_jump_offset + 4)
  590. # If the new target fits within a rel32 of the jump, just patch
  591. # that. Otherwise, leave the original rel32 to the recovery stub in
  592. # place, but clobber the recovery stub with a jump to the real
  593. # target.
  594. mc = codebuf.MachineCodeBlockWrapper()
  595. if rx86.fits_in_32bits(offset):
  596. mc.writeimm32(offset)
  597. mc.copy_to_raw_memory(adr_jump_offset)
  598. else:
  599. # "mov r11, addr; jmp r11" is up to 13 bytes, which fits in there
  600. # because we always write "mov r11, imm-as-8-bytes; call *r11" in
  601. # the first place.
  602. mc.MOV_ri(X86_64_SCRATCH_REG.value, adr_new_target)
  603. mc.JMP_r(X86_64_SCRATCH_REG.value)
  604. p = rffi.cast(rffi.INTP, adr_jump_offset)
  605. adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
  606. mc.copy_to_raw_memory(adr_target)
  607. faildescr._x86_adr_jump_offset = 0 # means "patched"
  608. def fixup_target_tokens(self, rawstart):
  609. for targettoken in self.target_tokens_currently_compiling:
  610. targettoken._x86_loop_code += rawstart
  611. self.target_tokens_currently_compiling = None
  612. def _append_debugging_code(self, operations, tp, number, token):
  613. counter = self._register_counter(tp, number, token)
  614. c_adr = ConstInt(rffi.cast(lltype.Signed, counter))
  615. box = BoxInt()
  616. box2 = BoxInt()
  617. ops = [ResOperation(rop.GETFIELD_RAW, [c_adr],
  618. box, descr=self.debug_counter_descr),
  619. ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
  620. ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
  621. None, descr=self.debug_counter_descr)]
  622. operations.extend(ops)
  623. @specialize.argtype(1)
  624. def _inject_debugging_code(self, looptoken, operations, tp, number):
  625. if self._debug:
  626. # before doing anything, let's increase a counter
  627. s = 0
  628. for op in operations:
  629. s += op.getopnum()
  630. looptoken._x86_debug_checksum = s
  631. newoperations = []
  632. self._append_debugging_code(newoperations, tp, number,
  633. None)
  634. for op in operations:
  635. newoperations.append(op)
  636. if op.getopnum() == rop.LABEL:
  637. self._append_debugging_code(newoperations, 'l', number,
  638. op.getdescr())
  639. operations = newoperations
  640. return operations
  641. def _assemble(self, regalloc, operations):
  642. self._regalloc = regalloc
  643. regalloc.compute_hint_frame_locations(operations)
  644. regalloc.walk_operations(operations)
  645. if we_are_translated() or self.cpu.dont_keepalive_stuff:
  646. self._regalloc = None # else keep it around for debugging
  647. frame_depth = regalloc.get_final_frame_depth()
  648. jump_target_descr = regalloc.jump_target_descr
  649. if jump_target_descr is not None:
  650. target_frame_depth = jump_target_descr._x86_clt.frame_depth
  651. frame_depth = max(frame_depth, target_frame_depth)
  652. return frame_depth
  653. def _patchable_stackadjust(self):
  654. # stack adjustment LEA
  655. self.mc.LEA32_rb(esp.value, 0)
  656. return self.mc.get_relative_pos() - 4
  657. def _patch_stackadjust(self, adr_lea, allocated_depth):
  658. # patch stack adjustment LEA
  659. mc = codebuf.MachineCodeBlockWrapper()
  660. # Compute the correct offset for the instruction LEA ESP, [EBP-4*words]
  661. mc.writeimm32(self._get_offset_of_ebp_from_esp(allocated_depth))
  662. mc.copy_to_raw_memory(adr_lea)
  663. def _get_offset_of_ebp_from_esp(self, allocated_depth):
  664. # Given that [EBP] is where we saved EBP, i.e. in the last word
  665. # of our fixed frame, then the 'words' value is:
  666. words = (FRAME_FIXED_SIZE - 1) + allocated_depth
  667. # align, e.g. for Mac OS X
  668. aligned_words = align_stack_words(words+2)-2 # 2 = EIP+EBP
  669. return -WORD * aligned_words
  670. def _call_header(self):
  671. # NB. the shape of the frame is hard-coded in get_basic_shape() too.
  672. # Also, make sure this is consistent with FRAME_FIXED_SIZE.
  673. self.mc.PUSH_r(ebp.value)
  674. self.mc.MOV_rr(ebp.value, esp.value)
  675. for loc in self.cpu.CALLEE_SAVE_REGISTERS:
  676. self.mc.PUSH_r(loc.value)
  677. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  678. if gcrootmap and gcrootmap.is_shadow_stack:
  679. self._call_header_shadowstack(gcrootmap)
  680. def _call_header_with_stack_check(self):
  681. if self.stack_check_slowpath == 0:
  682. pass # no stack check (e.g. not translated)
  683. else:
  684. endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
  685. self.mc.MOV(eax, heap(endaddr)) # MOV eax, [start]
  686. self.mc.SUB(eax, esp) # SUB eax, current
  687. self.mc.CMP(eax, heap(lengthaddr)) # CMP eax, [length]
  688. self.mc.J_il8(rx86.Conditions['BE'], 0) # JBE .skip
  689. jb_location = self.mc.get_relative_pos()
  690. self.mc.CALL(imm(self.stack_check_slowpath))# CALL slowpath
  691. # patch the JB above # .skip:
  692. offset = self.mc.get_relative_pos() - jb_location
  693. assert 0 < offset <= 127
  694. self.mc.overwrite(jb_location-1, chr(offset))
  695. #
  696. self._call_header()
  697. def _call_footer(self):
  698. self.mc.LEA_rb(esp.value, -len(self.cpu.CALLEE_SAVE_REGISTERS) * WORD)
  699. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  700. if gcrootmap and gcrootmap.is_shadow_stack:
  701. self._call_footer_shadowstack(gcrootmap)
  702. for i in range(len(self.cpu.CALLEE_SAVE_REGISTERS)-1, -1, -1):
  703. self.mc.POP_r(self.cpu.CALLEE_SAVE_REGISTERS[i].value)
  704. self.mc.POP_r(ebp.value)
  705. self.mc.RET()
  706. def _call_header_shadowstack(self, gcrootmap):
  707. # we need to put two words into the shadowstack: the MARKER_FRAME
  708. # and the address of the frame (ebp, actually)
  709. rst = gcrootmap.get_root_stack_top_addr()
  710. if rx86.fits_in_32bits(rst):
  711. self.mc.MOV_rj(eax.value, rst) # MOV eax, [rootstacktop]
  712. else:
  713. self.mc.MOV_ri(r13.value, rst) # MOV r13, rootstacktop
  714. self.mc.MOV_rm(eax.value, (r13.value, 0)) # MOV eax, [r13]
  715. #
  716. MARKER = gcrootmap.MARKER_FRAME
  717. self.mc.LEA_rm(ebx.value, (eax.value, 2*WORD)) # LEA ebx, [eax+2*WORD]
  718. self.mc.MOV_mi((eax.value, WORD), MARKER) # MOV [eax+WORD], MARKER
  719. self.mc.MOV_mr((eax.value, 0), ebp.value) # MOV [eax], ebp
  720. #
  721. if rx86.fits_in_32bits(rst):
  722. self.mc.MOV_jr(rst, ebx.value) # MOV [rootstacktop], ebx
  723. else:
  724. self.mc.MOV_mr((r13.value, 0), ebx.value) # MOV [r13], ebx
  725. def _call_footer_shadowstack(self, gcrootmap):
  726. rst = gcrootmap.get_root_stack_top_addr()
  727. if rx86.fits_in_32bits(rst):
  728. self.mc.SUB_ji8(rst, 2*WORD) # SUB [rootstacktop], 2*WORD
  729. else:
  730. self.mc.MOV_ri(ebx.value, rst) # MOV ebx, rootstacktop
  731. self.mc.SUB_mi8((ebx.value, 0), 2*WORD) # SUB [ebx], 2*WORD
  732. def redirect_call_assembler(self, oldlooptoken, newlooptoken):
  733. # some minimal sanity checking
  734. old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs
  735. new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs
  736. assert old_nbargs == new_nbargs
  737. # we overwrite the instructions at the old _x86_direct_bootstrap_code
  738. # to start with a JMP to the new _x86_direct_bootstrap_code.
  739. # Ideally we should rather patch all existing CALLs, but well.
  740. oldadr = oldlooptoken._x86_function_addr
  741. target = newlooptoken._x86_function_addr
  742. mc = codebuf.MachineCodeBlockWrapper()
  743. mc.JMP(imm(target))
  744. if WORD == 4: # keep in sync with prepare_loop()
  745. assert mc.get_relative_pos() == 5
  746. else:
  747. assert mc.get_relative_pos() <= 13
  748. mc.copy_to_raw_memory(oldadr)
  749. def dump(self, text):
  750. if not self.verbose:
  751. return
  752. _prev = Box._extended_display
  753. try:
  754. Box._extended_display = False
  755. pos = self.mc.get_relative_pos()
  756. print >> sys.stderr, ' 0x%x %s' % (pos, text)
  757. finally:
  758. Box._extended_display = _prev
  759. # ------------------------------------------------------------
  760. def mov(self, from_loc, to_loc):
  761. if (isinstance(from_loc, RegLoc) and from_loc.is_xmm) or (isinstance(to_loc, RegLoc) and to_loc.is_xmm):
  762. self.mc.MOVSD(to_loc, from_loc)
  763. else:
  764. assert to_loc is not ebp
  765. self.mc.MOV(to_loc, from_loc)
  766. regalloc_mov = mov # legacy interface
  767. def regalloc_push(self, loc):
  768. if isinstance(loc, RegLoc) and loc.is_xmm:
  769. self.mc.SUB_ri(esp.value, 8) # = size of doubles
  770. self.mc.MOVSD_sx(0, loc.value)
  771. elif WORD == 4 and isinstance(loc, StackLoc) and loc.get_width() == 8:
  772. # XXX evil trick
  773. self.mc.PUSH_b(loc.value + 4)
  774. self.mc.PUSH_b(loc.value)
  775. else:
  776. self.mc.PUSH(loc)
  777. def regalloc_pop(self, loc):
  778. if isinstance(loc, RegLoc) and loc.is_xmm:
  779. self.mc.MOVSD_xs(loc.value, 0)
  780. self.mc.ADD_ri(esp.value, 8) # = size of doubles
  781. elif WORD == 4 and isinstance(loc, StackLoc) and loc.get_width() == 8:
  782. # XXX evil trick
  783. self.mc.POP_b(loc.value)
  784. self.mc.POP_b(loc.value + 4)
  785. else:
  786. self.mc.POP(loc)
  787. def regalloc_immedmem2mem(self, from_loc, to_loc):
  788. # move a ConstFloatLoc directly to a StackLoc, as two MOVs
  789. # (even on x86-64, because the immediates are encoded as 32 bits)
  790. assert isinstance(from_loc, ConstFloatLoc)
  791. assert isinstance(to_loc, StackLoc)
  792. low_part = rffi.cast(rffi.CArrayPtr(rffi.INT), from_loc.value)[0]
  793. high_part = rffi.cast(rffi.CArrayPtr(rffi.INT), from_loc.value)[1]
  794. low_part = intmask(low_part)
  795. high_part = intmask(high_part)
  796. self.mc.MOV32_bi(to_loc.value, low_part)
  797. self.mc.MOV32_bi(to_loc.value + 4, high_part)
  798. def regalloc_perform(self, op, arglocs, resloc):
  799. genop_list[op.getopnum()](self, op, arglocs, resloc)
  800. def regalloc_perform_discard(self, op, arglocs):
  801. genop_discard_list[op.getopnum()](self, op, arglocs)
  802. def regalloc_perform_llong(self, op, arglocs, resloc):
  803. effectinfo = op.getdescr().get_extra_info()
  804. oopspecindex = effectinfo.oopspecindex
  805. genop_llong_list[oopspecindex](self, op, arglocs, resloc)
  806. def regalloc_perform_math(self, op, arglocs, resloc):
  807. effectinfo = op.getdescr().get_extra_info()
  808. oopspecindex = effectinfo.oopspecindex
  809. genop_math_list[oopspecindex](self, op, arglocs, resloc)
  810. def regalloc_perform_with_guard(self, op, guard_op, faillocs,
  811. arglocs, resloc):
  812. faildescr = guard_op.getdescr()
  813. assert isinstance(faildescr, AbstractFailDescr)
  814. failargs = guard_op.getfailargs()
  815. guard_opnum = guard_op.getopnum()
  816. guard_token = self.implement_guard_recovery(guard_opnum,
  817. faildescr, failargs,
  818. faillocs)
  819. if op is None:
  820. dispatch_opnum = guard_opnum
  821. else:
  822. dispatch_opnum = op.getopnum()
  823. genop_guard_list[dispatch_opnum](self, op, guard_op, guard_token,
  824. arglocs, resloc)
  825. if not we_are_translated():
  826. # must be added by the genop_guard_list[]()
  827. assert guard_token is self.pending_guard_tokens[-1]
  828. def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc):
  829. self.regalloc_perform_with_guard(None, guard_op, faillocs, arglocs,
  830. resloc)
  831. def load_effective_addr(self, sizereg, baseofs, scale, result, frm=imm0):
  832. self.mc.LEA(result, addr_add(frm, sizereg, baseofs, scale))
  833. def _unaryop(asmop):
  834. def genop_unary(self, op, arglocs, resloc):
  835. getattr(self.mc, asmop)(arglocs[0])
  836. return genop_unary
  837. def _binaryop(asmop, can_swap=False):
  838. def genop_binary(self, op, arglocs, result_loc):
  839. getattr(self.mc, asmop)(arglocs[0], arglocs[1])
  840. return genop_binary
  841. def _cmpop(cond, rev_cond):
  842. def genop_cmp(self, op, arglocs, result_loc):
  843. rl = result_loc.lowest8bits()
  844. if isinstance(op.getarg(0), Const):
  845. self.mc.CMP(arglocs[1], arglocs[0])
  846. self.mc.SET_ir(rx86.Conditions[rev_cond], rl.value)
  847. else:
  848. self.mc.CMP(arglocs[0], arglocs[1])
  849. self.mc.SET_ir(rx86.Conditions[cond], rl.value)
  850. self.mc.MOVZX8_rr(result_loc.value, rl.value)
  851. return genop_cmp
  852. def _cmpop_float(cond, rev_cond, is_ne=False):
  853. def genop_cmp(self, op, arglocs, result_loc):
  854. if isinstance(arglocs[0], RegLoc):
  855. self.mc.UCOMISD(arglocs[0], arglocs[1])
  856. checkcond = cond
  857. else:
  858. self.mc.UCOMISD(arglocs[1], arglocs[0])
  859. checkcond = rev_cond
  860. tmp1 = result_loc.lowest8bits()
  861. if IS_X86_32:
  862. tmp2 = result_loc.higher8bits()
  863. elif IS_X86_64:
  864. tmp2 = X86_64_SCRATCH_REG.lowest8bits()
  865. self.mc.SET_ir(rx86.Conditions[checkcond], tmp1.value)
  866. if is_ne:
  867. self.mc.SET_ir(rx86.Conditions['P'], tmp2.value)
  868. self.mc.OR8_rr(tmp1.value, tmp2.value)
  869. else:
  870. self.mc.SET_ir(rx86.Conditions['NP'], tmp2.value)
  871. self.mc.AND8_rr(tmp1.value, tmp2.value)
  872. self.mc.MOVZX8_rr(result_loc.value, tmp1.value)
  873. return genop_cmp
  874. def _cmpop_guard(cond, rev_cond, false_cond, false_rev_cond):
  875. def genop_cmp_guard(self, op, guard_op, guard_token, arglocs, result_loc):
  876. guard_opnum = guard_op.getopnum()
  877. if isinstance(op.getarg(0), Const):
  878. self.mc.CMP(arglocs[1], arglocs[0])
  879. if guard_opnum == rop.GUARD_FALSE:
  880. self.implement_guard(guard_token, rev_cond)
  881. else:
  882. self.implement_guard(guard_token, false_rev_cond)
  883. else:
  884. self.mc.CMP(arglocs[0], arglocs[1])
  885. if guard_opnum == rop.GUARD_FALSE:
  886. self.implement_guard(guard_token, cond)
  887. else:
  888. self.implement_guard(guard_token, false_cond)
  889. return genop_cmp_guard
  890. def _cmpop_guard_float(cond, rev_cond, false_cond, false_rev_cond):
  891. need_direct_jp = 'A' not in cond
  892. need_rev_jp = 'A' not in rev_cond
  893. def genop_cmp_guard_float(self, op, guard_op, guard_token, arglocs,
  894. result_loc):
  895. guard_opnum = guard_op.getopnum()
  896. if isinstance(arglocs[0], RegLoc):
  897. self.mc.UCOMISD(arglocs[0], arglocs[1])
  898. checkcond = cond
  899. checkfalsecond = false_cond
  900. need_jp = need_direct_jp
  901. else:
  902. self.mc.UCOMISD(arglocs[1], arglocs[0])
  903. checkcond = rev_cond
  904. checkfalsecond = false_rev_cond
  905. need_jp = need_rev_jp
  906. if guard_opnum == rop.GUARD_FALSE:
  907. if need_jp:
  908. self.mc.J_il8(rx86.Conditions['P'], 6)
  909. self.implement_guard(guard_token, checkcond)
  910. else:
  911. if need_jp:
  912. self.mc.J_il8(rx86.Conditions['P'], 2)
  913. self.mc.J_il8(rx86.Conditions[checkcond], 5)
  914. self.implement_guard(guard_token)
  915. else:
  916. self.implement_guard(guard_token, checkfalsecond)
  917. return genop_cmp_guard_float
  918. def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax,
  919. argtypes=None, callconv=FFI_DEFAULT_ABI):
  920. if IS_X86_64:
  921. return self._emit_call_64(force_index, x, arglocs, start, argtypes)
  922. p = 0
  923. n = len(arglocs)
  924. for i in range(start, n):
  925. loc = arglocs[i]
  926. if isinstance(loc, RegLoc):
  927. if loc.is_xmm:
  928. self.mc.MOVSD_sx(p, loc.value)
  929. else:
  930. self.mc.MOV_sr(p, loc.value)
  931. p += loc.get_width()
  932. p = 0
  933. for i in range(start, n):
  934. loc = arglocs[i]
  935. if not isinstance(loc, RegLoc):
  936. if loc.get_width() == 8:
  937. self.mc.MOVSD(xmm0, loc)
  938. self.mc.MOVSD_sx(p, xmm0.value)
  939. else:
  940. self.mc.MOV(tmp, loc)
  941. self.mc.MOV_sr(p, tmp.value)
  942. p += loc.get_width()
  943. # x is a location
  944. self.mc.CALL(x)
  945. self.mark_gc_roots(force_index)
  946. #
  947. if callconv != FFI_DEFAULT_ABI:
  948. self._fix_stdcall(callconv, p)
  949. #
  950. self._regalloc.needed_extra_stack_locations(p//WORD)
  951. def _fix_stdcall(self, callconv, p):
  952. from pypy.rlib.clibffi import FFI_STDCALL
  953. assert callconv == FFI_STDCALL
  954. # it's a bit stupid, but we're just going to cancel the fact that
  955. # the called function just added 'p' to ESP, by subtracting it again.
  956. self.mc.SUB_ri(esp.value, p)
  957. def _emit_call_64(self, force_index, x, arglocs, start, argtypes):
  958. src_locs = []
  959. dst_locs = []
  960. xmm_src_locs = []
  961. xmm_dst_locs = []
  962. pass_on_stack = []
  963. singlefloats = None
  964. # In reverse order for use with pop()
  965. unused_gpr = [r9, r8, ecx, edx, esi, edi]
  966. unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
  967. for i in range(start, len(arglocs)):
  968. loc = arglocs[i]
  969. # XXX: Should be much simplier to tell whether a location is a
  970. # float! It's so ugly because we have to "guard" the access to
  971. # .type with isinstance, since not all AssemblerLocation classes
  972. # are "typed"
  973. if ((isinstance(loc, RegLoc) and loc.is_xmm) or
  974. (isinstance(loc, StackLoc) and loc.type == FLOAT) or
  975. (isinstance(loc, ConstFloatLoc))):
  976. if len(unused_xmm) > 0:
  977. xmm_src_locs.append(loc)
  978. xmm_dst_locs.append(unused_xmm.pop())
  979. else:
  980. pass_on_stack.append(loc)
  981. elif (argtypes is not None and argtypes[i-start] == 'S' and
  982. len(unused_xmm) > 0):
  983. # Singlefloat argument
  984. if singlefloats is None: singlefloats = []
  985. singlefloats.append((loc, unused_xmm.pop()))
  986. else:
  987. if len(unused_gpr) > 0:
  988. src_locs.append(loc)
  989. dst_locs.append(unused_gpr.pop())
  990. else:
  991. pass_on_stack.append(loc)
  992. # Emit instructions to pass the stack arguments
  993. # XXX: Would be nice to let remap_frame_layout take care of this, but
  994. # we'd need to create something like StackLoc, but relative to esp,
  995. # and I don't know if it's worth it.
  996. for i in range(len(pass_on_stack)):
  997. loc = pass_on_stack[i]
  998. if not isinstance(loc, RegLoc):
  999. if isinstance(loc, StackLoc) and loc.type == FLOAT:
  1000. self.mc.MOVSD(X86_64_XMM_SCRATCH_REG, loc)
  1001. self.mc.MOVSD_sx(i*WORD, X86_64_XMM_SCRATCH_REG.value)
  1002. else:
  1003. self.mc.MOV(X86_64_SCRATCH_REG, loc)
  1004. self.mc.MOV_sr(i*WORD, X86_64_SCRATCH_REG.value)
  1005. else:
  1006. # It's a register
  1007. if loc.is_xmm:
  1008. self.mc.MOVSD_sx(i*WORD, loc.value)
  1009. else:
  1010. self.mc.MOV_sr(i*WORD, loc.value)
  1011. # Handle register arguments: first remap the xmm arguments
  1012. remap_frame_layout(self, xmm_src_locs, xmm_dst_locs,
  1013. X86_64_XMM_SCRATCH_REG)
  1014. # Load the singlefloat arguments from main regs or stack to xmm regs
  1015. if singlefloats is not None:
  1016. for src, dst in singlefloats:
  1017. self.mc.MOVD(dst, src)
  1018. # Finally remap the arguments in the main regs
  1019. # If x is a register and is in dst_locs, then oups, it needs to
  1020. # be moved away:
  1021. if x in dst_locs:
  1022. src_locs.append(x)
  1023. dst_locs.append(r10)
  1024. x = r10
  1025. remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
  1026. self.mc.CALL(x)
  1027. self.mark_gc_roots(force_index)
  1028. self._regalloc.needed_extra_stack_locations(len(pass_on_stack))
  1029. def call(self, addr, args, res):
  1030. force_index = self.write_new_force_index()
  1031. self._emit_call(force_index, imm(addr), args)
  1032. assert res is eax
  1033. def write_new_force_index(self):
  1034. # for shadowstack only: get a new, unused force_index number and
  1035. # write it to FORCE_INDEX_OFS. Used to record the call shape
  1036. # (i.e. where the GC pointers are in the stack) around a CALL
  1037. # instruction that doesn't already have a force_index.
  1038. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  1039. if gcrootmap and gcrootmap.is_shadow_stack:
  1040. clt = self.current_clt
  1041. force_index = clt.reserve_and_record_some_faildescr_index()
  1042. self.mc.MOV_bi(FORCE_INDEX_OFS, force_index)
  1043. return force_index
  1044. else:
  1045. # the return value is ignored, apart from the fact that it
  1046. # is not negative.
  1047. return 0
  1048. genop_int_neg = _unaryop("NEG")
  1049. genop_int_invert = _unaryop("NOT")
  1050. genop_int_add = _binaryop("ADD", True)
  1051. genop_int_sub = _binaryop("SUB")
  1052. genop_int_mul = _binaryop("IMUL", True)
  1053. genop_int_and = _binaryop("AND", True)
  1054. genop_int_or = _binaryop("OR", True)
  1055. genop_int_xor = _binaryop("XOR", True)
  1056. genop_int_lshift = _binaryop("SHL")
  1057. genop_int_rshift = _binaryop("SAR")
  1058. genop_uint_rshift = _binaryop("SHR")
  1059. genop_float_add = _binaryop("ADDSD", True)
  1060. genop_float_sub = _binaryop('SUBSD')
  1061. genop_float_mul = _binaryop('MULSD', True)
  1062. genop_float_truediv = _binaryop('DIVSD')
  1063. genop_int_lt = _cmpop("L", "G")
  1064. genop_int_le = _cmpop("LE", "GE")
  1065. genop_int_eq = _cmpop("E", "E")
  1066. genop_int_ne = _cmpop("NE", "NE")
  1067. genop_int_gt = _cmpop("G", "L")
  1068. genop_int_ge = _cmpop("GE", "LE")
  1069. genop_ptr_eq = genop_instance_ptr_eq = genop_int_eq
  1070. genop_ptr_ne = genop_instance_ptr_ne = genop_int_ne
  1071. genop_float_lt = _cmpop_float('B', 'A')
  1072. genop_float_le = _cmpop_float('BE', 'AE')
  1073. genop_float_ne = _cmpop_float('NE', 'NE', is_ne=True)
  1074. genop_float_eq = _cmpop_float('E', 'E')
  1075. genop_float_gt = _cmpop_float('A', 'B')
  1076. genop_float_ge = _cmpop_float('AE', 'BE')
  1077. genop_uint_gt = _cmpop("A", "B")
  1078. genop_uint_lt = _cmpop("B", "A")
  1079. genop_uint_le = _cmpop("BE", "AE")
  1080. genop_uint_ge = _cmpop("AE", "BE")
  1081. genop_guard_int_lt = _cmpop_guard("L", "G", "GE", "LE")
  1082. genop_guard_int_le = _cmpop_guard("LE", "GE", "G", "L")
  1083. genop_guard_int_eq = _cmpop_guard("E", "E", "NE", "NE")
  1084. genop_guard_int_ne = _cmpop_guard("NE", "NE", "E", "E")
  1085. genop_guard_int_gt = _cmpop_guard("G", "L", "LE", "GE")
  1086. genop_guard_int_ge = _cmpop_guard("GE", "LE", "L", "G")
  1087. genop_guard_ptr_eq = genop_guard_instance_ptr_eq = genop_guard_int_eq
  1088. genop_guard_ptr_ne = genop_guard_instance_ptr_ne = genop_guard_int_ne
  1089. genop_guard_uint_gt = _cmpop_guard("A", "B", "BE", "AE")
  1090. genop_guard_uint_lt = _cmpop_guard("B", "A", "AE", "BE")
  1091. genop_guard_uint_le = _cmpop_guard("BE", "AE", "A", "B")
  1092. genop_guard_uint_ge = _cmpop_guard("AE", "BE", "B", "A")
  1093. genop_guard_float_lt = _cmpop_guard_float("B", "A", "AE","BE")
  1094. genop_guard_float_le = _cmpop_guard_float("BE","AE", "A", "B")
  1095. genop_guard_float_eq = _cmpop_guard_float("E", "E", "NE","NE")
  1096. genop_guard_float_gt = _cmpop_guard_float("A", "B", "BE","AE")
  1097. genop_guard_float_ge = _cmpop_guard_float("AE","BE", "B", "A")
  1098. def genop_math_sqrt(self, op, arglocs, resloc):
  1099. self.mc.SQRTSD(arglocs[0], resloc)
  1100. def genop_guard_float_ne(self, op, guard_op, guard_token, arglocs, result_loc):
  1101. guard_opnum = guard_op.getopnum()
  1102. if isinstance(arglocs[0], RegLoc):
  1103. self.mc.UCOMISD(arglocs[0], arglocs[1])
  1104. else:
  1105. self.mc.UCOMISD(arglocs[1], arglocs[0])
  1106. if guard_opnum == rop.GUARD_TRUE:
  1107. self.mc.J_il8(rx86.Conditions['P'], 6)
  1108. self.implement_guard(guard_token, 'E')
  1109. else:
  1110. self.mc.J_il8(rx86.Conditions['P'], 2)
  1111. self.mc.J_il8(rx86.Conditions['E'], 5)
  1112. self.implement_guard(guard_token)
  1113. def genop_float_neg(self, op, arglocs, resloc):
  1114. # Following what gcc does: res = x ^ 0x8000000000000000
  1115. self.mc.XORPD(arglocs[0], heap(self.float_const_neg_addr))
  1116. def genop_float_abs(self, op, arglocs, resloc):
  1117. # Following what gcc does: res = x & 0x7FFFFFFFFFFFFFFF
  1118. self.mc.ANDPD(arglocs[0], heap(self.float_const_abs_addr))
  1119. def genop_cast_float_to_int(self, op, arglocs, resloc):
  1120. self.mc.CVTTSD2SI(resloc, arglocs[0])
  1121. def genop_cast_int_to_float(self, op, arglocs, resloc):
  1122. self.mc.CVTSI2SD(resloc, arglocs[0])
  1123. def genop_cast_float_to_singlefloat(self, op, arglocs, resloc):
  1124. loc0, loctmp = arglocs
  1125. self.mc.CVTSD2SS(loctmp, loc0)
  1126. assert isinstance(resloc, RegLoc)
  1127. assert isinstance(loctmp, RegLoc)
  1128. self.mc.MOVD_rx(resloc.value, loctmp.value)
  1129. def genop_cast_singlefloat_to_float(self, op, arglocs, resloc):
  1130. loc0, = arglocs
  1131. assert isinstance(resloc, RegLoc)
  1132. assert isinstance(loc0, RegLoc)
  1133. self.mc.MOVD_xr(resloc.value, loc0.value)
  1134. self.mc.CVTSS2SD_xx(resloc.value, resloc.value)
  1135. def genop_convert_float_bytes_to_longlong(self, op, arglocs, resloc):
  1136. loc0, = arglocs
  1137. if longlong.is_64_bit:
  1138. assert isinstance(resloc, RegLoc)
  1139. assert isinstance(loc0, RegLoc)
  1140. self.mc.MOVD(resloc, loc0)
  1141. else:
  1142. self.mov(loc0, resloc)
  1143. def genop_convert_longlong_bytes_to_float(self, op, arglocs, resloc):
  1144. loc0, = arglocs
  1145. if longlong.is_64_bit:
  1146. assert isinstance(resloc, RegLoc)
  1147. assert isinstance(loc0, RegLoc)
  1148. self.mc.MOVD(resloc, loc0)
  1149. else:
  1150. self.mov(loc0, resloc)
  1151. def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc):
  1152. guard_opnum = guard_op.getopnum()
  1153. self.mc.CMP(arglocs[0], imm0)
  1154. if guard_opnum == rop.GUARD_TRUE:
  1155. self.implement_guard(guard_token, 'Z')
  1156. else:
  1157. self.implement_guard(guard_token, 'NZ')
  1158. def genop_int_is_true(self, op, arglocs, resloc):
  1159. self.mc.CMP(arglocs[0], imm0)
  1160. rl = resloc.lowest8bits()
  1161. self.mc.SET_ir(rx86.Conditions['NE'], rl.value)
  1162. self.mc.MOVZX8(resloc, rl)
  1163. def genop_guard_int_is_zero(self, op, guard_op, guard_token, arglocs, resloc):
  1164. guard_opnum = guard_op.getopnum()
  1165. self.mc.CMP(arglocs[0], imm0)
  1166. if guard_opnum == rop.GUARD_TRUE:
  1167. self.implement_guard(guard_token, 'NZ')
  1168. else:
  1169. self.implement_guard(guard_token, 'Z')
  1170. def genop_int_is_zero(self, op, arglocs, resloc):
  1171. self.mc.CMP(arglocs[0], imm0)
  1172. rl = resloc.lowest8bits()
  1173. self.mc.SET_ir(rx86.Conditions['E'], rl.value)
  1174. self.mc.MOVZX8(resloc, rl)
  1175. def genop_same_as(self, op, arglocs, resloc):
  1176. self.mov(arglocs[0], resloc)
  1177. genop_cast_ptr_to_int = genop_same_as
  1178. genop_cast_int_to_ptr = genop_same_as
  1179. def genop_int_mod(self, op, arglocs, resloc):
  1180. if IS_X86_32:
  1181. self.mc.CDQ()
  1182. elif IS_X86_64:
  1183. self.mc.CQO()
  1184. self.mc.IDIV_r(ecx.value)
  1185. genop_int_floordiv = genop_int_mod
  1186. def genop_uint_floordiv(self, op, arglocs, resloc):
  1187. self.mc.XOR_rr(edx.value, edx.value)
  1188. self.mc.DIV_r(ecx.value)
  1189. genop_llong_add = _binaryop("PADDQ", True)
  1190. genop_llong_sub = _binaryop("PSUBQ")
  1191. genop_llong_and = _binaryop("PAND", True)
  1192. genop_llong_or = _binaryop("POR", True)
  1193. genop_llong_xor = _binaryop("PXOR", True)
  1194. def genop_llong_to_int(self, op, arglocs, resloc):
  1195. loc = arglocs[0]
  1196. assert isinstance(resloc, RegLoc)
  1197. if isinstance(loc, RegLoc):
  1198. self.mc.MOVD_rx(resloc.value, loc.value)
  1199. elif isinstance(loc, StackLoc):
  1200. self.mc.MOV_rb(resloc.value, loc.value)
  1201. else:
  1202. not_implemented("llong_to_int: %s" % (loc,))
  1203. def genop_llong_from_int(self, op, arglocs, resloc):
  1204. loc1, loc2 = arglocs
  1205. if isinstance(loc1, ConstFloatLoc):
  1206. assert loc2 is None
  1207. self.mc.MOVSD(resloc, loc1)
  1208. else:
  1209. assert isinstance(loc1, RegLoc)
  1210. assert isinstance(loc2, RegLoc)
  1211. assert isinstance(resloc, RegLoc)
  1212. self.mc.MOVD_xr(loc2.value, loc1.value)
  1213. self.mc.PSRAD_xi(loc2.value, 31) # -> 0 or -1
  1214. self.mc.MOVD_xr(resloc.value, loc1.value)
  1215. self.mc.PUNPCKLDQ_xx(resloc.value, loc2.value)
  1216. def genop_llong_from_uint(self, op, arglocs, resloc):
  1217. loc1, = arglocs
  1218. assert isinstance(resloc, RegLoc)
  1219. assert isinstance(loc1, RegLoc)
  1220. self.mc.MOVD_xr(resloc.value, loc1.value)
  1221. def genop_llong_eq(self, op, arglocs, resloc):
  1222. loc1, loc2, locxtmp = arglocs
  1223. self.mc.MOVSD(locxtmp, loc1)
  1224. self.mc.PCMPEQD(locxtmp, loc2)
  1225. self.mc.PMOVMSKB_rx(resloc.value, locxtmp.value)
  1226. # Now the lower 8 bits of resloc contain 0x00, 0x0F, 0xF0 or 0xFF
  1227. # depending on the result of the comparison of each of the two
  1228. # double-words of loc1 and loc2. The higher 8 bits contain random
  1229. # results. We want to map 0xFF to 1, and 0x00, 0x0F and 0xF0 to 0.
  1230. self.mc.CMP8_ri(resloc.value | rx86.BYTE_REG_FLAG, -1)
  1231. self.mc.SBB_rr(resloc.value, resloc.value)
  1232. self.mc.ADD_ri(resloc.value, 1)
  1233. def genop_llong_ne(self, op, arglocs, resloc):
  1234. loc1, loc2, locxtmp = arglocs
  1235. self.mc.MOVSD(locxtmp, loc1)
  1236. self.mc.PCMPEQD(locxtmp, loc2)
  1237. self.mc.PMOVMSKB_rx(resloc.value, locxtmp.value)
  1238. # Now the lower 8 bits of resloc contain 0x00, 0x0F, 0xF0 or 0xFF
  1239. # depending on the result of the comparison of each of the two
  1240. # double-words of loc1 and loc2. The higher 8 bits contain random
  1241. # results. We want to map 0xFF to 0, and 0x00, 0x0F and 0xF0 to 1.
  1242. self.mc.CMP8_ri(resloc.value | rx86.BYTE_REG_FLAG, -1)
  1243. self.mc.SBB_rr(resloc.value, resloc.value)
  1244. self.mc.NEG_r(resloc.value)
  1245. def genop_llong_lt(self, op, arglocs, resloc):
  1246. # XXX just a special case for now: "x < 0"
  1247. loc1, = arglocs
  1248. self.mc.PMOVMSKB_rx(resloc.value, loc1.value)
  1249. self.mc.SHR_ri(resloc.value, 7)
  1250. self.mc.AND_ri(resloc.value, 1)
  1251. # ----------
  1252. def genop_call_malloc_gc(self, op, arglocs, result_loc):
  1253. self.genop_call(op, arglocs, result_loc)
  1254. self.propagate_memoryerror_if_eax_is_null()
  1255. def propagate_memoryerror_if_eax_is_null(self):
  1256. # if self.propagate_exception_path == 0 (tests), this may jump to 0
  1257. # and segfaults. too bad. the alternative is to continue anyway
  1258. # with eax==0, but that will segfault too.
  1259. self.mc.TEST_rr(eax.value, eax.value)
  1260. if WORD == 4:
  1261. self.mc.J_il(rx86.Conditions['Z'], self.propagate_exception_path)
  1262. self.mc.add_pending_relocation()
  1263. elif WORD == 8:
  1264. self.mc.J_il(rx86.Conditions['Z'], 0)
  1265. pos = self.mc.get_relative_pos()
  1266. self.pending_memoryerror_trampoline_from.append(pos)
  1267. # ----------
  1268. def load_from_mem(self, resloc, source_addr, size_loc, sign_loc):
  1269. assert isinstance(resloc, RegLoc)
  1270. size = size_loc.value
  1271. sign = sign_loc.value
  1272. if resloc.is_xmm:
  1273. self.mc.MOVSD(resloc, source_addr)
  1274. elif size == WORD:
  1275. self.mc.MOV(resloc, source_addr)
  1276. elif size == 1:
  1277. if sign:
  1278. self.mc.MOVSX8(resloc, source_addr)
  1279. else:
  1280. self.mc.MOVZX8(resloc, source_addr)
  1281. elif size == 2:
  1282. if sign:
  1283. self.mc.MOVSX16(resloc, source_addr)
  1284. else:
  1285. self.mc.MOVZX16(resloc, source_addr)
  1286. elif IS_X86_64 and size == 4:
  1287. if sign:
  1288. self.mc.MOVSX32(resloc, source_addr)
  1289. else:
  1290. self.mc.MOV32(resloc, source_addr) # zero-extending
  1291. else:
  1292. not_implemented("load_from_mem size = %d" % size)
  1293. def save_into_mem(self, dest_addr, value_loc, size_loc):
  1294. size = size_loc.value
  1295. if isinstance(value_loc, RegLoc) and value_loc.is_xmm:
  1296. self.mc.MOVSD(dest_addr, value_loc)
  1297. elif size == 1:
  1298. self.mc.MOV8(dest_addr, value_loc.lowest8bits())
  1299. elif size == 2:
  1300. self.mc.MOV16(dest_addr, value_loc)
  1301. elif size == 4:
  1302. self.mc.MOV32(dest_addr, value_loc)
  1303. elif size == 8:
  1304. if IS_X86_64:
  1305. self.mc.MOV(dest_addr, value_loc)
  1306. else:
  1307. assert isinstance(value_loc, FloatImmedLoc)
  1308. self.mc.MOV(dest_addr, value_loc.low_part_loc())
  1309. self.mc.MOV(dest_addr.add_offset(4), value_loc.high_part_loc())
  1310. else:
  1311. not_implemented("save_into_mem size = %d" % size)
  1312. def genop_getfield_gc(self, op, arglocs, resloc):
  1313. base_loc, ofs_loc, size_loc, sign_loc = arglocs
  1314. assert isinstance(size_loc, ImmedLoc)
  1315. source_addr = AddressLoc(base_loc, ofs_loc)
  1316. self.load_from_mem(resloc, source_addr, size_loc, sign_loc)
  1317. genop_getfield_raw = genop_getfield_gc
  1318. genop_getfield_raw_pure = genop_getfield_gc
  1319. genop_getfield_gc_pure = genop_getfield_gc
  1320. def genop_getarrayitem_gc(self, op, arglocs, resloc):
  1321. base_loc, ofs_loc, size_loc, ofs, sign_loc = arglocs
  1322. assert isinstance(ofs, ImmedLoc)
  1323. assert isinstance(size_loc, ImmedLoc)
  1324. scale = _get_scale(size_loc.value)
  1325. src_addr = addr_add(base_loc, ofs_loc, ofs.value, scale)
  1326. self.load_from_mem(resloc, src_addr, size_loc, sign_loc)
  1327. genop_getarrayitem_gc_pure = genop_getarrayitem_gc
  1328. genop_getarrayitem_raw = genop_getarrayitem_gc
  1329. def _get_interiorfield_addr(self, temp_loc, index_loc, itemsize_loc,
  1330. base_loc, ofs_loc):
  1331. assert isinstance(itemsize_loc, ImmedLoc)
  1332. if isinstance(index_loc, ImmedLoc):
  1333. temp_loc = imm(index_loc.value * itemsize_loc.value)
  1334. elif _valid_addressing_size(itemsize_loc.value):
  1335. return AddressLoc(base_loc, index_loc, _get_scale(itemsize_loc.value), ofs_loc.value)
  1336. else:
  1337. # XXX should not use IMUL in more cases, it can use a clever LEA
  1338. assert isinstance(temp_loc, RegLoc)
  1339. assert isinstance(index_loc, RegLoc)
  1340. assert not temp_loc.is_xmm
  1341. self.mc.IMUL_rri(temp_loc.value, index_loc.value,
  1342. itemsize_loc.value)
  1343. assert isinstance(ofs_loc, ImmedLoc)
  1344. return AddressLoc(base_loc, temp_loc, 0, ofs_loc.value)
  1345. def genop_getinteriorfield_gc(self, op, arglocs, resloc):
  1346. (base_loc, ofs_loc, itemsize_loc, fieldsize_loc,
  1347. index_loc, temp_loc, sign_loc) = arglocs
  1348. src_addr = self._get_interiorfield_addr(temp_loc, index_loc,
  1349. itemsize_loc, base_loc,
  1350. ofs_loc)
  1351. self.load_from_mem(resloc, src_addr, fieldsize_loc, sign_loc)
  1352. genop_getinteriorfield_raw = genop_getinteriorfield_gc
  1353. def genop_discard_setfield_gc(self, op, arglocs):
  1354. base_loc, ofs_loc, size_loc, value_loc = arglocs
  1355. assert isinstance(size_loc, ImmedLoc)
  1356. dest_addr = AddressLoc(base_loc, ofs_loc)
  1357. self.save_into_mem(dest_addr, value_loc, size_loc)
  1358. def genop_discard_setinteriorfield_gc(self, op, arglocs):
  1359. (base_loc, ofs_loc, itemsize_loc, fieldsize_loc,
  1360. index_loc, temp_loc, value_loc) = arglocs
  1361. dest_addr = self._get_interiorfield_addr(temp_loc, index_loc,
  1362. itemsize_loc, base_loc,
  1363. ofs_loc)
  1364. self.save_into_mem(dest_addr, value_loc, fieldsize_loc)
  1365. genop_discard_setinteriorfield_raw = genop_discard_setinteriorfield_gc
  1366. def genop_discard_setarrayitem_gc(self, op, arglocs):
  1367. base_loc, ofs_loc, value_loc, size_loc, baseofs = arglocs
  1368. assert isinstance(baseofs, ImmedLoc)
  1369. assert isinstance(size_loc, ImmedLoc)
  1370. scale = _get_scale(size_loc.value)
  1371. dest_addr = AddressLoc(base_loc, ofs_loc, scale, baseofs.value)
  1372. self.save_into_mem(dest_addr, value_loc, size_loc)
  1373. def genop_discard_strsetitem(self, op, arglocs):
  1374. base_loc, ofs_loc, val_loc = arglocs
  1375. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
  1376. self.cpu.translate_support_code)
  1377. assert itemsize == 1
  1378. dest_addr = AddressLoc(base_loc, ofs_loc, 0, basesize)
  1379. self.mc.MOV8(dest_addr, val_loc.lowest8bits())
  1380. def genop_discard_unicodesetitem(self, op, arglocs):
  1381. base_loc, ofs_loc, val_loc = arglocs
  1382. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
  1383. self.cpu.translate_support_code)
  1384. if itemsize == 4:
  1385. self.mc.MOV32(AddressLoc(base_loc, ofs_loc, 2, basesize), val_loc)
  1386. elif itemsize == 2:
  1387. self.mc.MOV16(AddressLoc(base_loc, ofs_loc, 1, basesize), val_loc)
  1388. else:
  1389. assert 0, itemsize
  1390. genop_discard_setfield_raw = genop_discard_setfield_gc
  1391. genop_discard_setarrayitem_raw = genop_discard_setarrayitem_gc
  1392. def genop_strlen(self, op, arglocs, resloc):
  1393. base_loc = arglocs[0]
  1394. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
  1395. self.cpu.translate_support_code)
  1396. self.mc.MOV(resloc, addr_add_const(base_loc, ofs_length))
  1397. def genop_unicodelen(self, op, arglocs, resloc):
  1398. base_loc = arglocs[0]
  1399. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
  1400. self.cpu.translate_support_code)
  1401. self.mc.MOV(resloc, addr_add_const(base_loc, ofs_length))
  1402. def genop_arraylen_gc(self, op, arglocs, resloc):
  1403. base_loc, ofs_loc = arglocs
  1404. assert isinstance(ofs_loc, ImmedLoc)
  1405. self.mc.MOV(resloc, addr_add_const(base_loc, ofs_loc.value))
  1406. def genop_strgetitem(self, op, arglocs, resloc):
  1407. base_loc, ofs_loc = arglocs
  1408. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
  1409. self.cpu.translate_support_code)
  1410. assert itemsize == 1
  1411. self.mc.MOVZX8(resloc, AddressLoc(base_loc, ofs_loc, 0, basesize))
  1412. def genop_unicodegetitem(self, op, arglocs, resloc):
  1413. base_loc, ofs_loc = arglocs
  1414. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
  1415. self.cpu.translate_support_code)
  1416. if itemsize == 4:
  1417. self.mc.MOV32(resloc, AddressLoc(base_loc, ofs_loc, 2, basesize))
  1418. elif itemsize == 2:
  1419. self.mc.MOVZX16(resloc, AddressLoc(base_loc, ofs_loc, 1, basesize))
  1420. else:
  1421. assert 0, itemsize
  1422. def genop_read_timestamp(self, op, arglocs, resloc):
  1423. self.mc.RDTSC()
  1424. if longlong.is_64_bit:
  1425. self.mc.SHL_ri(edx.value, 32)
  1426. self.mc.OR_rr(edx.value, eax.value)
  1427. else:
  1428. loc1, = arglocs
  1429. self.mc.MOVD_xr(loc1.value, edx.value)
  1430. self.mc.MOVD_xr(resloc.value, eax.value)
  1431. self.mc.PUNPCKLDQ_xx(resloc.value, loc1.value)
  1432. def genop_guard_guard_true(self, ign_1, guard_op, guard_token, locs, ign_2):
  1433. loc = locs[0]
  1434. self.mc.TEST(loc, loc)
  1435. self.implement_guard(guard_token, 'Z')
  1436. genop_guard_guard_nonnull = genop_guard_guard_true
  1437. def genop_guard_guard_no_exception(self, ign_1, guard_op, guard_token,
  1438. locs, ign_2):
  1439. self.mc.CMP(heap(self.cpu.pos_exception()), imm0)
  1440. self.implement_guard(guard_token, 'NZ')
  1441. def genop_guard_guard_not_invalidated(self, ign_1, guard_op, guard_token,
  1442. locs, ign_2):
  1443. pos = self.mc.get_relative_pos() + 1 # after potential jmp
  1444. guard_token.pos_jump_offset = pos
  1445. self.pending_guard_tokens.append(guard_token)
  1446. def genop_guard_guard_exception(self, ign_1, guard_op, guard_token,
  1447. locs, resloc):
  1448. loc = locs[0]
  1449. loc1 = locs[1]
  1450. self.mc.MOV(loc1, heap(self.cpu.pos_exception()))
  1451. self.mc.CMP(loc1, loc)
  1452. self.implement_guard(guard_token, 'NE')
  1453. if resloc is not None:
  1454. self.mc.MOV(resloc, heap(self.cpu.pos_exc_value()))
  1455. self.mc.MOV(heap(self.cpu.pos_exception()), imm0)
  1456. self.mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
  1457. def _gen_guard_overflow(self, guard_op, guard_token):
  1458. guard_opnum = guard_op.getopnum()
  1459. if guard_opnum == rop.GUARD_NO_OVERFLOW:
  1460. self.implement_guard(guard_token, 'O')
  1461. elif guard_opnum == rop.GUARD_OVERFLOW:
  1462. self.implement_guard(guard_token, 'NO')
  1463. else:
  1464. not_implemented("int_xxx_ovf followed by %s" %
  1465. guard_op.getopname())
  1466. def genop_guard_int_add_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
  1467. self.genop_int_add(op, arglocs, result_loc)
  1468. return self._gen_guard_overflow(guard_op, guard_token)
  1469. def genop_guard_int_sub_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
  1470. self.genop_int_sub(op, arglocs, result_loc)
  1471. return self._gen_guard_overflow(guard_op, guard_token)
  1472. def genop_guard_int_mul_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
  1473. self.genop_int_mul(op, arglocs, result_loc)
  1474. return self._gen_guard_overflow(guard_op, guard_token)
  1475. def genop_guard_guard_false(self, ign_1, guard_op, guard_token, locs, ign_2):
  1476. loc = locs[0]
  1477. self.mc.TEST(loc, loc)
  1478. self.implement_guard(guard_token, 'NZ')
  1479. genop_guard_guard_isnull = genop_guard_guard_false
  1480. def genop_guard_guard_value(self, ign_1, guard_op, guard_token, locs, ign_2):
  1481. if guard_op.getarg(0).type == FLOAT:
  1482. assert guard_op.getarg(1).type == FLOAT
  1483. self.mc.UCOMISD(locs[0], locs[1])
  1484. else:
  1485. self.mc.CMP(locs[0], locs[1])
  1486. self.implement_guard(guard_token, 'NE')
  1487. def _cmp_guard_class(self, locs):
  1488. offset = self.cpu.vtable_offset
  1489. if offset is not None:
  1490. self.mc.CMP(mem(locs[0], offset), locs[1])
  1491. else:
  1492. # XXX hard-coded assumption: to go from an object to its class
  1493. # we use the following algorithm:
  1494. # - read the typeid from mem(locs[0]), i.e. at offset 0;
  1495. # this is a complete word (N=4 bytes on 32-bit, N=8 on
  1496. # 64-bits)
  1497. # - keep the lower half of what is read there (i.e.
  1498. # truncate to an unsigned 'N / 2' bytes value)
  1499. # - multiply by 4 (on 32-bits only) and use it as an
  1500. # offset in type_info_group
  1501. # - add 16/32 bytes, to go past the TYPE_INFO structure
  1502. loc = locs[1]
  1503. assert isinstance(loc, ImmedLoc)
  1504. classptr = loc.value
  1505. # here, we have to go back from 'classptr' to the value expected
  1506. # from reading the half-word in the object header. Note that
  1507. # this half-word is at offset 0 on a little-endian machine;
  1508. # it would be at offset 2 or 4 on a big-endian machine.
  1509. from pypy.rpython.memory.gctypelayout import GCData
  1510. sizeof_ti = rffi.sizeof(GCData.TYPE_INFO)
  1511. type_info_group = llop.gc_get_type_info_group(llmemory.Address)
  1512. type_info_group = rffi.cast(lltype.Signed, type_info_group)
  1513. expected_typeid = classptr - sizeof_ti - type_info_group
  1514. if IS_X86_32:
  1515. expected_typeid >>= 2
  1516. self.mc.CMP16(mem(locs[0], 0), ImmedLoc(expected_typeid))
  1517. elif IS_X86_64:
  1518. self.mc.CMP32_mi((locs[0].value, 0), expected_typeid)
  1519. def genop_guard_guard_class(self, ign_1, guard_op, guard_token, locs, ign_2):
  1520. self._cmp_guard_class(locs)
  1521. self.implement_guard(guard_token, 'NE')
  1522. def genop_guard_guard_nonnull_class(self, ign_1, guard_op,
  1523. guard_token, locs, ign_2):
  1524. self.mc.CMP(locs[0], imm1)
  1525. # Patched below
  1526. self.mc.J_il8(rx86.Conditions['B'], 0)
  1527. jb_location = self.mc.get_relative_pos()
  1528. self._cmp_guard_class(locs)
  1529. # patch the JB above
  1530. offset = self.mc.get_relative_pos() - jb_location
  1531. assert 0 < offset <= 127
  1532. self.mc.overwrite(jb_location-1, chr(offset))
  1533. #
  1534. self.implement_guard(guard_token, 'NE')
  1535. def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
  1536. fail_locs):
  1537. exc = (guard_opnum == rop.GUARD_EXCEPTION or
  1538. guard_opnum == rop.GUARD_NO_EXCEPTION or
  1539. guard_opnum == rop.GUARD_NOT_FORCED)
  1540. is_guard_not_invalidated = guard_opnum == rop.GUARD_NOT_INVALIDATED
  1541. return GuardToken(faildescr, failargs, fail_locs, exc,
  1542. is_guard_not_invalidated)
  1543. def generate_propagate_error_64(self):
  1544. assert WORD == 8
  1545. startpos = self.mc.get_relative_pos()
  1546. self.mc.JMP(imm(self.propagate_exception_path))
  1547. return startpos
  1548. def generate_quick_failure(self, guardtok):
  1549. """Generate the initial code for handling a failure. We try to
  1550. keep it as compact as possible.
  1551. """
  1552. fail_index = self.cpu.get_fail_descr_number(guardtok.faildescr)
  1553. mc = self.mc
  1554. startpos = mc.get_relative_pos()
  1555. withfloats = False
  1556. for box in guardtok.failargs:
  1557. if box is not None and box.type == FLOAT:
  1558. withfloats = True
  1559. break
  1560. exc = guardtok.exc
  1561. target = self.failure_recovery_code[exc + 2 * withfloats]
  1562. if WORD == 4:
  1563. mc.CALL(imm(target))
  1564. else:
  1565. # Generate exactly 13 bytes:
  1566. # MOV r11, target-as-8-bytes
  1567. # CALL *r11
  1568. # Keep the number 13 in sync with _find_failure_recovery_bytecode.
  1569. start = mc.get_relative_pos()
  1570. mc.MOV_ri64(X86_64_SCRATCH_REG.value, target)
  1571. mc.CALL_r(X86_64_SCRATCH_REG.value)
  1572. assert mc.get_relative_pos() == start + 13
  1573. # write tight data that describes the failure recovery
  1574. self.write_failure_recovery_description(mc, guardtok.failargs,
  1575. guardtok.fail_locs)
  1576. # write the fail_index too
  1577. mc.writeimm32(fail_index)
  1578. # for testing the decoding, write a final byte 0xCC
  1579. if not we_are_translated():
  1580. mc.writechar('\xCC')
  1581. faillocs = [loc for loc in guardtok.fail_locs if loc is not None]
  1582. guardtok.faildescr._x86_debug_faillocs = faillocs
  1583. return startpos
  1584. DESCR_REF = 0x00
  1585. DESCR_INT = 0x01
  1586. DESCR_FLOAT = 0x02
  1587. DESCR_SPECIAL = 0x03
  1588. CODE_FROMSTACK = 4 * (8 + 8*IS_X86_64)
  1589. CODE_STOP = 0 | DESCR_SPECIAL
  1590. CODE_HOLE = 4 | DESCR_SPECIAL
  1591. CODE_INPUTARG = 8 | DESCR_SPECIAL
  1592. def write_failure_recovery_description(self, mc, failargs, locs):
  1593. for i in range(len(failargs)):
  1594. arg = failargs[i]
  1595. if arg is not None:
  1596. if arg.type == REF:
  1597. kind = self.DESCR_REF
  1598. elif arg.type == INT:
  1599. kind = self.DESCR_INT
  1600. elif arg.type == FLOAT:
  1601. kind = self.DESCR_FLOAT
  1602. else:
  1603. raise AssertionError("bogus kind")
  1604. loc = locs[i]
  1605. if isinstance(loc, StackLoc):
  1606. pos = loc.position
  1607. if pos < 0:
  1608. mc.writechar(chr(self.CODE_INPUTARG))
  1609. pos = ~pos
  1610. n = self.CODE_FROMSTACK//4 + pos
  1611. else:
  1612. assert isinstance(loc, RegLoc)
  1613. n = loc.value
  1614. n = kind + 4*n
  1615. while n > 0x7F:
  1616. mc.writechar(chr((n & 0x7F) | 0x80))
  1617. n >>= 7
  1618. else:
  1619. n = self.CODE_HOLE
  1620. mc.writechar(chr(n))
  1621. mc.writechar(chr(self.CODE_STOP))
  1622. # assert that the fail_boxes lists are big enough
  1623. assert len(failargs) <= self.fail_boxes_int.SIZE
  1624. def rebuild_faillocs_from_descr(self, bytecode):
  1625. from pypy.jit.backend.x86.regalloc import X86FrameManager
  1626. descr_to_box_type = [REF, INT, FLOAT]
  1627. bytecode = rffi.cast(rffi.UCHARP, bytecode)
  1628. arglocs = []
  1629. code_inputarg = False
  1630. while 1:
  1631. # decode the next instruction from the bytecode
  1632. code = rffi.cast(lltype.Signed, bytecode[0])
  1633. bytecode = rffi.ptradd(bytecode, 1)
  1634. if code >= self.CODE_FROMSTACK:
  1635. # 'code' identifies a stack location
  1636. if code > 0x7F:
  1637. shift = 7
  1638. code &= 0x7F
  1639. while True:
  1640. nextcode = rffi.cast(lltype.Signed, bytecode[0])
  1641. bytecode = rffi.ptradd(bytecode, 1)
  1642. code |= (nextcode & 0x7F) << shift
  1643. shift += 7
  1644. if nextcode <= 0x7F:
  1645. break
  1646. kind = code & 3
  1647. code = (code - self.CODE_FROMSTACK) >> 2
  1648. if code_inputarg:
  1649. code = ~code
  1650. code_inputarg = False
  1651. loc = X86FrameManager.frame_pos(code, descr_to_box_type[kind])
  1652. elif code == self.CODE_STOP:
  1653. break
  1654. elif code == self.CODE_HOLE:
  1655. continue
  1656. elif code == self.CODE_INPUTARG:
  1657. code_inputarg = True
  1658. continue
  1659. else:
  1660. # 'code' identifies a register
  1661. kind = code & 3
  1662. code >>= 2
  1663. if kind == self.DESCR_FLOAT:
  1664. loc = regloc.XMMREGLOCS[code]
  1665. else:
  1666. loc = regloc.REGLOCS[code]
  1667. arglocs.append(loc)
  1668. return arglocs[:]
  1669. @rgc.no_collect
  1670. def grab_frame_values(self, bytecode, frame_addr, allregisters):
  1671. # no malloc allowed here!!
  1672. self.fail_ebp = allregisters[16 + ebp.value]
  1673. code_inputarg = False
  1674. num = 0
  1675. value_hi = 0
  1676. while 1:
  1677. # decode the next instruction from the bytecode
  1678. code = rffi.cast(lltype.Signed, bytecode[0])
  1679. bytecode = rffi.ptradd(bytecode, 1)
  1680. if code >= self.CODE_FROMSTACK:
  1681. if code > 0x7F:
  1682. shift = 7
  1683. code &= 0x7F
  1684. while True:
  1685. nextcode = rffi.cast(lltype.Signed, bytecode[0])
  1686. bytecode = rffi.ptradd(bytecode, 1)
  1687. code |= (nextcode & 0x7F) << shift
  1688. shift += 7
  1689. if nextcode <= 0x7F:
  1690. break
  1691. # load the value from the stack
  1692. kind = code & 3
  1693. code = (code - self.CODE_FROMSTACK) >> 2
  1694. if code_inputarg:
  1695. code = ~code
  1696. code_inputarg = False
  1697. stackloc = frame_addr + get_ebp_ofs(code)
  1698. value = rffi.cast(rffi.LONGP, stackloc)[0]
  1699. if kind == self.DESCR_FLOAT and WORD == 4:
  1700. value_hi = value
  1701. value = rffi.cast(rffi.LONGP, stackloc - 4)[0]
  1702. else:
  1703. # 'code' identifies a register: load its value
  1704. kind = code & 3
  1705. if kind == self.DESCR_SPECIAL:
  1706. if code == self.CODE_HOLE:
  1707. num += 1
  1708. continue
  1709. if code == self.CODE_INPUTARG:
  1710. code_inputarg = True
  1711. continue
  1712. assert code == self.CODE_STOP
  1713. break
  1714. code >>= 2
  1715. if kind == self.DESCR_FLOAT:
  1716. if WORD == 4:
  1717. value = allregisters[2*code]
  1718. value_hi = allregisters[2*code + 1]
  1719. else:
  1720. value = allregisters[code]
  1721. else:
  1722. value = allregisters[16 + code]
  1723. # store the loaded value into fail_boxes_<type>
  1724. if kind == self.DESCR_INT:
  1725. tgt = self.fail_boxes_int.get_addr_for_num(num)
  1726. elif kind == self.DESCR_REF:
  1727. tgt = self.fail_boxes_ptr.get_addr_for_num(num)
  1728. elif kind == self.DESCR_FLOAT:
  1729. tgt = self.fail_boxes_float.get_addr_for_num(num)
  1730. if WORD == 4:
  1731. rffi.cast(rffi.LONGP, tgt)[1] = value_hi
  1732. else:
  1733. assert 0, "bogus kind"
  1734. rffi.cast(rffi.LONGP, tgt)[0] = value
  1735. num += 1
  1736. #
  1737. if not we_are_translated():
  1738. assert bytecode[4] == 0xCC
  1739. self.fail_boxes_count = num
  1740. fail_index = rffi.cast(rffi.INTP, bytecode)[0]
  1741. fail_index = rffi.cast(lltype.Signed, fail_index)
  1742. return fail_index
  1743. def setup_failure_recovery(self):
  1744. @rgc.no_collect
  1745. def failure_recovery_func(registers):
  1746. # 'registers' is a pointer to a structure containing the
  1747. # original value of the registers, optionally the original
  1748. # value of XMM registers, and finally a reference to the
  1749. # recovery bytecode. See _build_failure_recovery() for details.
  1750. stack_at_ebp = registers[ebp.value]
  1751. bytecode = rffi.cast(rffi.UCHARP, registers[self.cpu.NUM_REGS])
  1752. allregisters = rffi.ptradd(registers, -16)
  1753. return self.grab_frame_values(bytecode, stack_at_ebp, allregisters)
  1754. self.failure_recovery_func = failure_recovery_func
  1755. self.failure_recovery_code = [0, 0, 0, 0]
  1756. _FAILURE_RECOVERY_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
  1757. lltype.Signed))
  1758. def _build_failure_recovery(self, exc, withfloats=False):
  1759. failure_recovery_func = llhelper(self._FAILURE_RECOVERY_FUNC,
  1760. self.failure_recovery_func)
  1761. failure_recovery_func = rffi.cast(lltype.Signed,
  1762. failure_recovery_func)
  1763. mc = codebuf.MachineCodeBlockWrapper()
  1764. self.mc = mc
  1765. # Push all general purpose registers
  1766. for gpr in range(self.cpu.NUM_REGS-1, -1, -1):
  1767. mc.PUSH_r(gpr)
  1768. # ebx/rbx is callee-save in both i386 and x86-64
  1769. mc.MOV_rr(ebx.value, esp.value)
  1770. if withfloats:
  1771. # Push all float registers
  1772. mc.SUB_ri(esp.value, self.cpu.NUM_REGS*8)
  1773. for i in range(self.cpu.NUM_REGS):
  1774. mc.MOVSD_sx(8*i, i)
  1775. # we call a provided function that will
  1776. # - call our on_leave_jitted_hook which will mark
  1777. # the fail_boxes_ptr array as pointing to young objects to
  1778. # avoid unwarranted freeing
  1779. # - optionally save exception depending on the flag
  1780. addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
  1781. mc.CALL(imm(addr))
  1782. # the following call saves all values from the stack and from
  1783. # registers to the right 'fail_boxes_<type>' location.
  1784. # Note that the registers are saved so far in esi[0] to esi[7],
  1785. # as pushed above, plus optionally in esi[-16] to esi[-1] for
  1786. # the XMM registers. Moreover, esi[8] is a pointer to the recovery
  1787. # bytecode, pushed just before by the CALL instruction written by
  1788. # generate_quick_failure(). XXX misaligned stack in the call, but
  1789. # it's ok because failure_recovery_func is not calling anything more
  1790. # XXX
  1791. if IS_X86_32:
  1792. mc.PUSH_r(ebx.value)
  1793. elif IS_X86_64:
  1794. mc.MOV_rr(edi.value, ebx.value)
  1795. else:
  1796. raise AssertionError("Shouldn't happen")
  1797. mc.CALL(imm(failure_recovery_func))
  1798. # returns in eax the fail_index
  1799. # now we return from the complete frame, which starts from
  1800. # _call_header_with_stack_check(). The LEA in _call_footer below
  1801. # throws away most of the frame, including all the PUSHes that we
  1802. # did just above.
  1803. self._call_footer()
  1804. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  1805. self.failure_recovery_code[exc + 2 * withfloats] = rawstart
  1806. self.mc = None
  1807. def generate_failure(self, fail_index, locs, exc, locs_are_ref):
  1808. self.mc.begin_reuse_scratch_register()
  1809. for i in range(len(locs)):
  1810. loc = locs[i]
  1811. if isinstance(loc, RegLoc):
  1812. if loc.is_xmm:
  1813. adr = self.fail_boxes_float.get_addr_for_num(i)
  1814. self.mc.MOVSD(heap(adr), loc)
  1815. else:
  1816. if locs_are_ref[i]:
  1817. adr = self.fail_boxes_ptr.get_addr_for_num(i)
  1818. else:
  1819. adr = self.fail_boxes_int.get_addr_for_num(i)
  1820. self.mc.MOV(heap(adr), loc)
  1821. for i in range(len(locs)):
  1822. loc = locs[i]
  1823. if not isinstance(loc, RegLoc):
  1824. if ((isinstance(loc, StackLoc) and loc.type == FLOAT) or
  1825. isinstance(loc, ConstFloatLoc)):
  1826. self.mc.MOVSD(xmm0, loc)
  1827. adr = self.fail_boxes_float.get_addr_for_num(i)
  1828. self.mc.MOVSD(heap(adr), xmm0)
  1829. else:
  1830. if locs_are_ref[i]:
  1831. adr = self.fail_boxes_ptr.get_addr_for_num(i)
  1832. else:
  1833. adr = self.fail_boxes_int.get_addr_for_num(i)
  1834. self.mc.MOV(eax, loc)
  1835. self.mc.MOV(heap(adr), eax)
  1836. self.mc.end_reuse_scratch_register()
  1837. # we call a provided function that will
  1838. # - call our on_leave_jitted_hook which will mark
  1839. # the fail_boxes_ptr array as pointing to young objects to
  1840. # avoid unwarranted freeing
  1841. # - optionally save exception depending on the flag
  1842. addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
  1843. self.mc.CALL(imm(addr))
  1844. self.mc.MOV_ri(eax.value, fail_index)
  1845. # exit function
  1846. self._call_footer()
  1847. def implement_guard(self, guard_token, condition=None):
  1848. # These jumps are patched later.
  1849. if condition:
  1850. self.mc.J_il(rx86.Conditions[condition], 0)
  1851. else:
  1852. self.mc.JMP_l(0)
  1853. guard_token.pos_jump_offset = self.mc.get_relative_pos() - 4
  1854. self.pending_guard_tokens.append(guard_token)
  1855. def genop_call(self, op, arglocs, resloc):
  1856. force_index = self.write_new_force_index()
  1857. self._genop_call(op, arglocs, resloc, force_index)
  1858. def _genop_call(self, op, arglocs, resloc, force_index):
  1859. from pypy.jit.backend.llsupport.descr import CallDescr
  1860. sizeloc = arglocs[0]
  1861. assert isinstance(sizeloc, ImmedLoc)
  1862. size = sizeloc.value
  1863. signloc = arglocs[1]
  1864. x = arglocs[2] # the function address
  1865. if x is eax:
  1866. tmp = ecx
  1867. else:
  1868. tmp = eax
  1869. descr = op.getdescr()
  1870. assert isinstance(descr, CallDescr)
  1871. self._emit_call(force_index, x, arglocs, 3, tmp=tmp,
  1872. argtypes=descr.get_arg_types(),
  1873. callconv=descr.get_call_conv())
  1874. if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.type == FLOAT:
  1875. # a float or a long long return
  1876. if descr.get_result_type() == 'L':
  1877. self.mc.MOV_br(resloc.value, eax.value) # long long
  1878. self.mc.MOV_br(resloc.value + 4, edx.value)
  1879. # XXX should ideally not move the result on the stack,
  1880. # but it's a mess to load eax/edx into a xmm register
  1881. # and this way is simpler also because the result loc
  1882. # can just be always a stack location
  1883. else:
  1884. self.mc.FSTPL_b(resloc.value) # float return
  1885. elif descr.get_result_type() == 'S':
  1886. # singlefloat return
  1887. assert resloc is eax
  1888. if IS_X86_32:
  1889. # must convert ST(0) to a 32-bit singlefloat and load it into EAX
  1890. # mess mess mess
  1891. self.mc.SUB_ri(esp.value, 4)
  1892. self.mc.FSTPS_s(0)
  1893. self.mc.POP_r(eax.value)
  1894. elif IS_X86_64:
  1895. # must copy from the lower 32 bits of XMM0 into eax
  1896. self.mc.MOVD_rx(eax.value, xmm0.value)
  1897. elif size == WORD:
  1898. assert resloc is eax or resloc is xmm0 # a full word
  1899. elif size == 0:
  1900. pass # void return
  1901. else:
  1902. # use the code in load_from_mem to do the zero- or sign-extension
  1903. assert resloc is eax
  1904. if size == 1:
  1905. srcloc = eax.lowest8bits()
  1906. else:
  1907. srcloc = eax
  1908. self.load_from_mem(eax, srcloc, sizeloc, signloc)
  1909. def genop_guard_call_may_force(self, op, guard_op, guard_token,
  1910. arglocs, result_loc):
  1911. faildescr = guard_op.getdescr()
  1912. fail_index = self.cpu.get_fail_descr_number(faildescr)
  1913. self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
  1914. self._genop_call(op, arglocs, result_loc, fail_index)
  1915. self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
  1916. self.implement_guard(guard_token, 'L')
  1917. def genop_guard_call_release_gil(self, op, guard_op, guard_token,
  1918. arglocs, result_loc):
  1919. # first, close the stack in the sense of the asmgcc GC root tracker
  1920. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  1921. if gcrootmap:
  1922. self.call_release_gil(gcrootmap, arglocs)
  1923. # do the call
  1924. faildescr = guard_op.getdescr()
  1925. fail_index = self.cpu.get_fail_descr_number(faildescr)
  1926. self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
  1927. self._genop_call(op, arglocs, result_loc, fail_index)
  1928. # then reopen the stack
  1929. if gcrootmap:
  1930. self.call_reacquire_gil(gcrootmap, result_loc)
  1931. # finally, the guard_not_forced
  1932. self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
  1933. self.implement_guard(guard_token, 'L')
  1934. def call_release_gil(self, gcrootmap, save_registers):
  1935. # First, we need to save away the registers listed in
  1936. # 'save_registers' that are not callee-save. XXX We assume that
  1937. # the XMM registers won't be modified. We store them in
  1938. # [ESP+4], [ESP+8], etc.; on x86-32 we leave enough room in [ESP]
  1939. # for the single argument to closestack_addr below.
  1940. if IS_X86_32:
  1941. p = WORD
  1942. elif IS_X86_64:
  1943. p = 0
  1944. for reg in self._regalloc.rm.save_around_call_regs:
  1945. if reg in save_registers:
  1946. self.mc.MOV_sr(p, reg.value)
  1947. p += WORD
  1948. #
  1949. if gcrootmap.is_shadow_stack:
  1950. args = []
  1951. else:
  1952. # note that regalloc.py used save_all_regs=True to save all
  1953. # registers, so we don't have to care about saving them (other
  1954. # than ebp) in the close_stack_struct. But if they are registers
  1955. # like %eax that would be destroyed by this call, *and* they are
  1956. # used by arglocs for the *next* call, then trouble; for now we
  1957. # will just push/pop them.
  1958. from pypy.rpython.memory.gctransform import asmgcroot
  1959. css = self._regalloc.close_stack_struct
  1960. if css == 0:
  1961. use_words = (2 + max(asmgcroot.INDEX_OF_EBP,
  1962. asmgcroot.FRAME_PTR) + 1)
  1963. pos = self._regalloc.fm.reserve_location_in_frame(use_words)
  1964. css = get_ebp_ofs(pos + use_words - 1)
  1965. self._regalloc.close_stack_struct = css
  1966. # The location where the future CALL will put its return address
  1967. # will be [ESP-WORD]. But we can't use that as the next frame's
  1968. # top address! As the code after releasegil() runs without the
  1969. # GIL, it might not be set yet by the time we need it (very
  1970. # unlikely), or it might be overwritten by the following call
  1971. # to reaquiregil() (much more likely). So we hack even more
  1972. # and use a dummy location containing a dummy value (a pointer
  1973. # to itself) which we pretend is the return address :-/ :-/ :-/
  1974. # It prevents us to store any %esp-based stack locations but we
  1975. # don't so far.
  1976. adr = self.datablockwrapper.malloc_aligned(WORD, WORD)
  1977. rffi.cast(rffi.CArrayPtr(lltype.Signed), adr)[0] = adr
  1978. self.gcrootmap_retaddr_forced = adr
  1979. frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
  1980. if rx86.fits_in_32bits(adr):
  1981. self.mc.MOV_bi(frame_ptr, adr) # MOV [css.frame], adr
  1982. else:
  1983. self.mc.MOV_ri(eax.value, adr) # MOV EAX, adr
  1984. self.mc.MOV_br(frame_ptr, eax.value) # MOV [css.frame], EAX
  1985. # Save ebp
  1986. index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
  1987. self.mc.MOV_br(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
  1988. # Call the closestack() function (also releasing the GIL)
  1989. if IS_X86_32:
  1990. reg = eax
  1991. elif IS_X86_64:
  1992. reg = edi
  1993. self.mc.LEA_rb(reg.value, css)
  1994. args = [reg]
  1995. #
  1996. self._emit_call(-1, imm(self.releasegil_addr), args)
  1997. # Finally, restore the registers saved above.
  1998. if IS_X86_32:
  1999. p = WORD
  2000. elif IS_X86_64:
  2001. p = 0
  2002. for reg in self._regalloc.rm.save_around_call_regs:
  2003. if reg in save_registers:
  2004. self.mc.MOV_rs(reg.value, p)
  2005. p += WORD
  2006. self._regalloc.needed_extra_stack_locations(p//WORD)
  2007. def call_reacquire_gil(self, gcrootmap, save_loc):
  2008. # save the previous result (eax/xmm0) into the stack temporarily.
  2009. # XXX like with call_release_gil(), we assume that we don't need
  2010. # to save xmm0 in this case.
  2011. if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
  2012. self.mc.MOV_sr(WORD, save_loc.value)
  2013. # call the reopenstack() function (also reacquiring the GIL)
  2014. if gcrootmap.is_shadow_stack:
  2015. args = []
  2016. else:
  2017. assert self.gcrootmap_retaddr_forced == -1, (
  2018. "missing mark_gc_roots() in CALL_RELEASE_GIL")
  2019. self.gcrootmap_retaddr_forced = 0
  2020. css = self._regalloc.close_stack_struct
  2021. assert css != 0
  2022. if IS_X86_32:
  2023. reg = eax
  2024. elif IS_X86_64:
  2025. reg = edi
  2026. self.mc.LEA_rb(reg.value, css)
  2027. args = [reg]
  2028. self._emit_call(-1, imm(self.reacqgil_addr), args)
  2029. # restore the result from the stack
  2030. if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
  2031. self.mc.MOV_rs(save_loc.value, WORD)
  2032. self._regalloc.needed_extra_stack_locations(2)
  2033. def genop_guard_call_assembler(self, op, guard_op, guard_token,
  2034. arglocs, result_loc):
  2035. faildescr = guard_op.getdescr()
  2036. fail_index = self.cpu.get_fail_descr_number(faildescr)
  2037. self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
  2038. descr = op.getdescr()
  2039. assert isinstance(descr, JitCellToken)
  2040. assert len(arglocs) - 2 == descr.compiled_loop_token._debug_nbargs
  2041. #
  2042. # Write a call to the target assembler
  2043. self._emit_call(fail_index, imm(descr._x86_function_addr),
  2044. arglocs, 2, tmp=eax)
  2045. if op.result is None:
  2046. assert result_loc is None
  2047. value = self.cpu.done_with_this_frame_void_v
  2048. else:
  2049. kind = op.result.type
  2050. if kind == INT:
  2051. assert result_loc is eax
  2052. value = self.cpu.done_with_this_frame_int_v
  2053. elif kind == REF:
  2054. assert result_loc is eax
  2055. value = self.cpu.done_with_this_frame_ref_v
  2056. elif kind == FLOAT:
  2057. value = self.cpu.done_with_this_frame_float_v
  2058. else:
  2059. raise AssertionError(kind)
  2060. self.mc.CMP_ri(eax.value, value)
  2061. # patched later
  2062. self.mc.J_il8(rx86.Conditions['E'], 0) # goto B if we get 'done_with_this_frame'
  2063. je_location = self.mc.get_relative_pos()
  2064. #
  2065. # Path A: use assembler_helper_adr
  2066. jd = descr.outermost_jitdriver_sd
  2067. assert jd is not None
  2068. asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
  2069. self._emit_call(fail_index, imm(asm_helper_adr), [eax, arglocs[1]], 0,
  2070. tmp=ecx)
  2071. if IS_X86_32 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
  2072. self.mc.FSTPL_b(result_loc.value)
  2073. #else: result_loc is already either eax or None, checked below
  2074. self.mc.JMP_l8(0) # jump to done, patched later
  2075. jmp_location = self.mc.get_relative_pos()
  2076. #
  2077. # Path B: fast path. Must load the return value, and reset the token
  2078. offset = jmp_location - je_location
  2079. assert 0 < offset <= 127
  2080. self.mc.overwrite(je_location - 1, chr(offset))
  2081. #
  2082. # Reset the vable token --- XXX really too much special logic here:-(
  2083. if jd.index_of_virtualizable >= 0:
  2084. from pypy.jit.backend.llsupport.descr import FieldDescr
  2085. fielddescr = jd.vable_token_descr
  2086. assert isinstance(fielddescr, FieldDescr)
  2087. ofs = fielddescr.offset
  2088. self.mc.MOV(eax, arglocs[1])
  2089. self.mc.MOV_mi((eax.value, ofs), 0)
  2090. # in the line above, TOKEN_NONE = 0
  2091. #
  2092. if op.result is not None:
  2093. # load the return value from fail_boxes_xxx[0]
  2094. kind = op.result.type
  2095. if kind == FLOAT:
  2096. xmmtmp = xmm0
  2097. adr = self.fail_boxes_float.get_addr_for_num(0)
  2098. self.mc.MOVSD(xmmtmp, heap(adr))
  2099. self.mc.MOVSD(result_loc, xmmtmp)
  2100. else:
  2101. assert result_loc is eax
  2102. if kind == INT:
  2103. adr = self.fail_boxes_int.get_addr_for_num(0)
  2104. self.mc.MOV(eax, heap(adr))
  2105. elif kind == REF:
  2106. adr = self.fail_boxes_ptr.get_addr_for_num(0)
  2107. self.mc.MOV(eax, heap(adr))
  2108. self.mc.MOV(heap(adr), imm0)
  2109. else:
  2110. raise AssertionError(kind)
  2111. #
  2112. # Here we join Path A and Path B again
  2113. offset = self.mc.get_relative_pos() - jmp_location
  2114. assert 0 <= offset <= 127
  2115. self.mc.overwrite(jmp_location - 1, chr(offset))
  2116. self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
  2117. self.implement_guard(guard_token, 'L')
  2118. def genop_discard_cond_call_gc_wb(self, op, arglocs):
  2119. # Write code equivalent to write_barrier() in the GC: it checks
  2120. # a flag in the object at arglocs[0], and if set, it calls the
  2121. # function remember_young_pointer() from the GC. The arguments
  2122. # to the call are in arglocs[:N]. The rest, arglocs[N:], contains
  2123. # registers that need to be saved and restored across the call.
  2124. # N is either 2 (regular write barrier) or 3 (array write barrier).
  2125. descr = op.getdescr()
  2126. if we_are_translated():
  2127. cls = self.cpu.gc_ll_descr.has_write_barrier_class()
  2128. assert cls is not None and isinstance(descr, cls)
  2129. #
  2130. opnum = op.getopnum()
  2131. if opnum == rop.COND_CALL_GC_WB:
  2132. N = 2
  2133. func = descr.get_write_barrier_fn(self.cpu)
  2134. card_marking = False
  2135. elif opnum == rop.COND_CALL_GC_WB_ARRAY:
  2136. N = 3
  2137. func = descr.get_write_barrier_from_array_fn(self.cpu)
  2138. assert func != 0
  2139. card_marking = descr.jit_wb_cards_set != 0
  2140. else:
  2141. raise AssertionError(opnum)
  2142. #
  2143. loc_base = arglocs[0]
  2144. self.mc.TEST8(addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs),
  2145. imm(descr.jit_wb_if_flag_singlebyte))
  2146. self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
  2147. jz_location = self.mc.get_relative_pos()
  2148. # for cond_call_gc_wb_array, also add another fast path:
  2149. # if GCFLAG_CARDS_SET, then we can just set one bit and be done
  2150. if card_marking:
  2151. self.mc.TEST8(addr_add_const(loc_base,
  2152. descr.jit_wb_cards_set_byteofs),
  2153. imm(descr.jit_wb_cards_set_singlebyte))
  2154. self.mc.J_il8(rx86.Conditions['NZ'], 0) # patched later
  2155. jnz_location = self.mc.get_relative_pos()
  2156. else:
  2157. jnz_location = 0
  2158. # the following is supposed to be the slow path, so whenever possible
  2159. # we choose the most compact encoding over the most efficient one.
  2160. if IS_X86_32:
  2161. limit = -1 # push all arglocs on the stack
  2162. elif IS_X86_64:
  2163. limit = N - 1 # push only arglocs[N:] on the stack
  2164. for i in range(len(arglocs)-1, limit, -1):
  2165. loc = arglocs[i]
  2166. if isinstance(loc, RegLoc):
  2167. self.mc.PUSH_r(loc.value)
  2168. else:
  2169. assert not IS_X86_64 # there should only be regs in arglocs[N:]
  2170. self.mc.PUSH_i32(loc.getint())
  2171. if IS_X86_64:
  2172. # We clobber these registers to pass the arguments, but that's
  2173. # okay, because consider_cond_call_gc_wb makes sure that any
  2174. # caller-save registers with values in them are present in
  2175. # arglocs[N:] too, so they are saved on the stack above and
  2176. # restored below.
  2177. if N == 2:
  2178. callargs = [edi, esi]
  2179. else:
  2180. callargs = [edi, esi, edx]
  2181. remap_frame_layout(self, arglocs[:N], callargs,
  2182. X86_64_SCRATCH_REG)
  2183. #
  2184. # misaligned stack in the call, but it's ok because the write barrier
  2185. # is not going to call anything more. Also, this assumes that the
  2186. # write barrier does not touch the xmm registers. (Slightly delicate
  2187. # assumption, given that the write barrier can end up calling the
  2188. # platform's malloc() from AddressStack.append(). XXX may need to
  2189. # be done properly)
  2190. self.mc.CALL(imm(func))
  2191. if IS_X86_32:
  2192. self.mc.ADD_ri(esp.value, N*WORD)
  2193. for i in range(N, len(arglocs)):
  2194. loc = arglocs[i]
  2195. assert isinstance(loc, RegLoc)
  2196. self.mc.POP_r(loc.value)
  2197. # if GCFLAG_CARDS_SET, then we can do the whole thing that would
  2198. # be done in the CALL above with just four instructions, so here
  2199. # is an inline copy of them
  2200. if card_marking:
  2201. self.mc.JMP_l8(0) # jump to the exit, patched later
  2202. jmp_location = self.mc.get_relative_pos()
  2203. # patch the JNZ above
  2204. offset = self.mc.get_relative_pos() - jnz_location
  2205. assert 0 < offset <= 127
  2206. self.mc.overwrite(jnz_location-1, chr(offset))
  2207. #
  2208. loc_index = arglocs[1]
  2209. if isinstance(loc_index, RegLoc):
  2210. # choose a scratch register
  2211. tmp1 = loc_index
  2212. self.mc.PUSH_r(tmp1.value)
  2213. # SHR tmp, card_page_shift
  2214. self.mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift)
  2215. # XOR tmp, -8
  2216. self.mc.XOR_ri(tmp1.value, -8)
  2217. # BTS [loc_base], tmp
  2218. self.mc.BTS(addr_add_const(loc_base, 0), tmp1)
  2219. # done
  2220. self.mc.POP_r(tmp1.value)
  2221. elif isinstance(loc_index, ImmedLoc):
  2222. byte_index = loc_index.value >> descr.jit_wb_card_page_shift
  2223. byte_ofs = ~(byte_index >> 3)
  2224. byte_val = 1 << (byte_index & 7)
  2225. self.mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val))
  2226. else:
  2227. raise AssertionError("index is neither RegLoc nor ImmedLoc")
  2228. # patch the JMP above
  2229. offset = self.mc.get_relative_pos() - jmp_location
  2230. assert 0 < offset <= 127
  2231. self.mc.overwrite(jmp_location-1, chr(offset))
  2232. #
  2233. # patch the JZ above
  2234. offset = self.mc.get_relative_pos() - jz_location
  2235. assert 0 < offset <= 127
  2236. self.mc.overwrite(jz_location-1, chr(offset))
  2237. genop_discard_cond_call_gc_wb_array = genop_discard_cond_call_gc_wb
  2238. def not_implemented_op_discard(self, op, arglocs):
  2239. not_implemented("not implemented operation: %s" % op.getopname())
  2240. def not_implemented_op(self, op, arglocs, resloc):
  2241. not_implemented("not implemented operation with res: %s" %
  2242. op.getopname())
  2243. def not_implemented_op_guard(self, op, guard_op,
  2244. failaddr, arglocs, resloc):
  2245. not_implemented("not implemented operation (guard): %s" %
  2246. op.getopname())
  2247. def mark_gc_roots(self, force_index, use_copy_area=False):
  2248. if force_index < 0:
  2249. return # not needed
  2250. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  2251. if gcrootmap:
  2252. mark = self._regalloc.get_mark_gc_roots(gcrootmap, use_copy_area)
  2253. if gcrootmap.is_shadow_stack:
  2254. gcrootmap.write_callshape(mark, force_index)
  2255. else:
  2256. if self.gcrootmap_retaddr_forced == 0:
  2257. self.mc.insert_gcroot_marker(mark) # common case
  2258. else:
  2259. assert self.gcrootmap_retaddr_forced != -1, (
  2260. "two mark_gc_roots() in a CALL_RELEASE_GIL")
  2261. gcrootmap.put(self.gcrootmap_retaddr_forced, mark)
  2262. self.gcrootmap_retaddr_forced = -1
  2263. def closing_jump(self, target_token):
  2264. # The backend's logic assumes that the target code is in a piece of
  2265. # assembler that was also called with the same number of arguments,
  2266. # so that the locations [ebp+8..] of the input arguments are valid
  2267. # stack locations both before and after the jump.
  2268. my_nbargs = self.current_clt._debug_nbargs
  2269. target_nbargs = target_token._x86_clt._debug_nbargs
  2270. assert my_nbargs == target_nbargs
  2271. #
  2272. target = target_token._x86_loop_code
  2273. if target_token in self.target_tokens_currently_compiling:
  2274. curpos = self.mc.get_relative_pos() + 5
  2275. self.mc.JMP_l(target - curpos)
  2276. else:
  2277. self.mc.JMP(imm(target))
  2278. def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
  2279. assert size & (WORD-1) == 0 # must be correctly aligned
  2280. self.mc.MOV(eax, heap(nursery_free_adr))
  2281. self.mc.LEA_rm(edx.value, (eax.value, size))
  2282. self.mc.CMP(edx, heap(nursery_top_adr))
  2283. self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
  2284. jmp_adr = self.mc.get_relative_pos()
  2285. # See comments in _build_malloc_slowpath for the
  2286. # details of the two helper functions that we are calling below.
  2287. # First, we need to call two of them and not just one because we
  2288. # need to have a mark_gc_roots() in between. Then the calling
  2289. # convention of slowpath_addr{1,2} are tweaked a lot to allow
  2290. # the code here to be just two CALLs: slowpath_addr1 gets the
  2291. # size of the object to allocate from (EDX-EAX) and returns the
  2292. # result in EAX; slowpath_addr2 additionally returns in EDX a
  2293. # copy of heap(nursery_free_adr), so that the final MOV below is
  2294. # a no-op.
  2295. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  2296. shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
  2297. if not shadow_stack:
  2298. # there are two helpers to call only with asmgcc
  2299. slowpath_addr1 = self.malloc_slowpath1
  2300. self.mc.CALL(imm(slowpath_addr1))
  2301. self.mark_gc_roots(self.write_new_force_index(), use_copy_area=True)
  2302. slowpath_addr2 = self.malloc_slowpath2
  2303. self.mc.CALL(imm(slowpath_addr2))
  2304. # reserve room for the argument to the real malloc and the
  2305. # saved XMM regs (on 32 bit: 8 * 2 words; on 64 bit: 16 * 1
  2306. # word)
  2307. self._regalloc.needed_extra_stack_locations(1+16)
  2308. offset = self.mc.get_relative_pos() - jmp_adr
  2309. assert 0 < offset <= 127
  2310. self.mc.overwrite(jmp_adr-1, chr(offset))
  2311. self.mc.MOV(heap(nursery_free_adr), edx)
  2312. genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
  2313. genop_list = [Assembler386.not_implemented_op] * rop._LAST
  2314. genop_llong_list = {}
  2315. genop_math_list = {}
  2316. genop_guard_list = [Assembler386.not_implemented_op_guard] * rop._LAST
  2317. for name, value in Assembler386.__dict__.iteritems():
  2318. if name.startswith('genop_discard_'):
  2319. opname = name[len('genop_discard_'):]
  2320. num = getattr(rop, opname.upper())
  2321. genop_discard_list[num] = value
  2322. elif name.startswith('genop_guard_') and name != 'genop_guard_exception':
  2323. opname = name[len('genop_guard_'):]
  2324. num = getattr(rop, opname.upper())
  2325. genop_guard_list[num] = value
  2326. elif name.startswith('genop_llong_'):
  2327. opname = name[len('genop_llong_'):]
  2328. num = getattr(EffectInfo, 'OS_LLONG_' + opname.upper())
  2329. genop_llong_list[num] = value
  2330. elif name.startswith('genop_math_'):
  2331. opname = name[len('genop_math_'):]
  2332. num = getattr(EffectInfo, 'OS_MATH_' + opname.upper())
  2333. genop_math_list[num] = value
  2334. elif name.startswith('genop_'):
  2335. opname = name[len('genop_'):]
  2336. num = getattr(rop, opname.upper())
  2337. genop_list[num] = value
  2338. # XXX: ri386 migration shims:
  2339. def addr_add(reg_or_imm1, reg_or_imm2, offset=0, scale=0):
  2340. return AddressLoc(reg_or_imm1, reg_or_imm2, scale, offset)
  2341. def addr_add_const(reg_or_imm1, offset):
  2342. return AddressLoc(reg_or_imm1, ImmedLoc(0), 0, offset)
  2343. def mem(loc, offset):
  2344. return AddressLoc(loc, ImmedLoc(0), 0, offset)
  2345. def heap(addr):
  2346. return AddressLoc(ImmedLoc(addr), ImmedLoc(0), 0, 0)
  2347. def not_implemented(msg):
  2348. os.write(2, '[x86/asm] %s\n' % msg)
  2349. raise NotImplementedError(msg)
  2350. class BridgeAlreadyCompiled(Exception):
  2351. pass