PageRenderTime 107ms CodeModel.GetById 59ms RepoModel.GetById 0ms app.codeStats 1ms

/pypy/jit/backend/x86/assembler.py

https://bitbucket.org/quangquach/pypy
Python | 2688 lines | 2046 code | 233 blank | 409 comment | 429 complexity | 8bcc51c3877913614e950524926e696e MD5 | raw file
  1. import sys, os
  2. from pypy.jit.backend.llsupport import symbolic
  3. from pypy.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
  4. from pypy.jit.metainterp.history import Const, Box, BoxInt, ConstInt
  5. from pypy.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
  6. from pypy.jit.metainterp.history import JitCellToken
  7. from pypy.rpython.lltypesystem import lltype, rffi, rstr, llmemory
  8. from pypy.rpython.lltypesystem.lloperation import llop
  9. from pypy.rpython.annlowlevel import llhelper
  10. from pypy.rlib.jit import AsmInfo
  11. from pypy.jit.backend.model import CompiledLoopToken
  12. from pypy.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs, _get_scale,
  13. gpr_reg_mgr_cls, xmm_reg_mgr_cls, _valid_addressing_size)
  14. from pypy.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
  15. IS_X86_32, IS_X86_64)
  16. from pypy.jit.backend.x86.regloc import (eax, ecx, edx, ebx,
  17. esp, ebp, esi, edi,
  18. xmm0, xmm1, xmm2, xmm3,
  19. xmm4, xmm5, xmm6, xmm7,
  20. r8, r9, r10, r11,
  21. r12, r13, r14, r15,
  22. X86_64_SCRATCH_REG,
  23. X86_64_XMM_SCRATCH_REG,
  24. RegLoc, StackLoc, ConstFloatLoc,
  25. ImmedLoc, AddressLoc, imm,
  26. imm0, imm1, FloatImmedLoc)
  27. from pypy.rlib.objectmodel import we_are_translated, specialize
  28. from pypy.jit.backend.x86 import rx86, regloc, codebuf
  29. from pypy.jit.metainterp.resoperation import rop, ResOperation
  30. from pypy.jit.backend.x86.support import values_array
  31. from pypy.jit.backend.x86 import support
  32. from pypy.rlib.debug import (debug_print, debug_start, debug_stop,
  33. have_debug_prints)
  34. from pypy.rlib import rgc
  35. from pypy.rlib.clibffi import FFI_DEFAULT_ABI
  36. from pypy.jit.backend.x86.jump import remap_frame_layout
  37. from pypy.jit.codewriter.effectinfo import EffectInfo
  38. from pypy.jit.codewriter import longlong
  39. from pypy.rlib.rarithmetic import intmask
  40. from pypy.rlib.objectmodel import compute_unique_id
  41. # darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 4.5.0,
  42. # better safe than sorry
  43. CALL_ALIGN = 16 // WORD
  44. def align_stack_words(words):
  45. return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
  46. class GuardToken(object):
  47. def __init__(self, faildescr, failargs, fail_locs, exc,
  48. is_guard_not_invalidated):
  49. self.faildescr = faildescr
  50. self.failargs = failargs
  51. self.fail_locs = fail_locs
  52. self.exc = exc
  53. self.is_guard_not_invalidated = is_guard_not_invalidated
  54. DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
  55. ('type', lltype.Char), # 'b'ridge, 'l'abel or
  56. # 'e'ntry point
  57. ('number', lltype.Signed))
  58. class Assembler386(object):
  59. _regalloc = None
  60. _output_loop_log = None
  61. def __init__(self, cpu, translate_support_code=False,
  62. failargs_limit=1000):
  63. self.cpu = cpu
  64. self.verbose = False
  65. self.rtyper = cpu.rtyper
  66. self.fail_boxes_int = values_array(lltype.Signed, failargs_limit)
  67. self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit)
  68. self.fail_boxes_float = values_array(longlong.FLOATSTORAGE,
  69. failargs_limit)
  70. self.fail_ebp = 0
  71. self.loop_run_counters = []
  72. self.float_const_neg_addr = 0
  73. self.float_const_abs_addr = 0
  74. self.malloc_slowpath1 = 0
  75. self.malloc_slowpath2 = 0
  76. self.wb_slowpath = [0, 0, 0, 0]
  77. self.memcpy_addr = 0
  78. self.setup_failure_recovery()
  79. self._debug = False
  80. self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
  81. self.fail_boxes_count = 0
  82. self.datablockwrapper = None
  83. self.stack_check_slowpath = 0
  84. self.propagate_exception_path = 0
  85. self.gcrootmap_retaddr_forced = 0
  86. self.teardown()
  87. def leave_jitted_hook(self):
  88. ptrs = self.fail_boxes_ptr.ar
  89. llop.gc_assume_young_pointers(lltype.Void,
  90. llmemory.cast_ptr_to_adr(ptrs))
  91. def set_debug(self, v):
  92. r = self._debug
  93. self._debug = v
  94. return r
  95. def setup_once(self):
  96. # the address of the function called by 'new'
  97. gc_ll_descr = self.cpu.gc_ll_descr
  98. gc_ll_descr.initialize()
  99. self.memcpy_addr = self.cpu.cast_ptr_to_int(support.memcpy_fn)
  100. self._build_failure_recovery(False)
  101. self._build_failure_recovery(True)
  102. self._build_wb_slowpath(False)
  103. self._build_wb_slowpath(True)
  104. if self.cpu.supports_floats:
  105. self._build_failure_recovery(False, withfloats=True)
  106. self._build_failure_recovery(True, withfloats=True)
  107. self._build_wb_slowpath(False, withfloats=True)
  108. self._build_wb_slowpath(True, withfloats=True)
  109. support.ensure_sse2_floats()
  110. self._build_float_constants()
  111. self._build_propagate_exception_path()
  112. if gc_ll_descr.get_malloc_slowpath_addr is not None:
  113. self._build_malloc_slowpath()
  114. self._build_stack_check_slowpath()
  115. if gc_ll_descr.gcrootmap:
  116. self._build_release_gil(gc_ll_descr.gcrootmap)
  117. if not self._debug:
  118. # if self._debug is already set it means that someone called
  119. # set_debug by hand before initializing the assembler. Leave it
  120. # as it is
  121. debug_start('jit-backend-counts')
  122. self.set_debug(have_debug_prints())
  123. debug_stop('jit-backend-counts')
  124. def setup(self, looptoken):
  125. assert self.memcpy_addr != 0, "setup_once() not called?"
  126. self.current_clt = looptoken.compiled_loop_token
  127. self.pending_guard_tokens = []
  128. if WORD == 8:
  129. self.pending_memoryerror_trampoline_from = []
  130. self.error_trampoline_64 = 0
  131. self.mc = codebuf.MachineCodeBlockWrapper()
  132. #assert self.datablockwrapper is None --- but obscure case
  133. # possible, e.g. getting MemoryError and continuing
  134. allblocks = self.get_asmmemmgr_blocks(looptoken)
  135. self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
  136. allblocks)
  137. self.target_tokens_currently_compiling = {}
  138. def teardown(self):
  139. self.pending_guard_tokens = None
  140. if WORD == 8:
  141. self.pending_memoryerror_trampoline_from = None
  142. self.mc = None
  143. self.current_clt = None
  144. def finish_once(self):
  145. if self._debug:
  146. debug_start('jit-backend-counts')
  147. for i in range(len(self.loop_run_counters)):
  148. struct = self.loop_run_counters[i]
  149. if struct.type == 'l':
  150. prefix = 'TargetToken(%d)' % struct.number
  151. elif struct.type == 'b':
  152. prefix = 'bridge ' + str(struct.number)
  153. else:
  154. prefix = 'entry ' + str(struct.number)
  155. debug_print(prefix + ':' + str(struct.i))
  156. debug_stop('jit-backend-counts')
  157. def _build_float_constants(self):
  158. datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, [])
  159. float_constants = datablockwrapper.malloc_aligned(32, alignment=16)
  160. datablockwrapper.done()
  161. addr = rffi.cast(rffi.CArrayPtr(lltype.Char), float_constants)
  162. qword_padding = '\x00\x00\x00\x00\x00\x00\x00\x00'
  163. # 0x8000000000000000
  164. neg_const = '\x00\x00\x00\x00\x00\x00\x00\x80'
  165. # 0x7FFFFFFFFFFFFFFF
  166. abs_const = '\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F'
  167. data = neg_const + qword_padding + abs_const + qword_padding
  168. for i in range(len(data)):
  169. addr[i] = data[i]
  170. self.float_const_neg_addr = float_constants
  171. self.float_const_abs_addr = float_constants + 16
  172. def _build_malloc_slowpath(self):
  173. # With asmgcc, we need two helpers, so that we can write two CALL
  174. # instructions in assembler, with a mark_gc_roots in between.
  175. # With shadowstack, this is not needed, so we produce a single helper.
  176. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  177. shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
  178. #
  179. # ---------- first helper for the slow path of malloc ----------
  180. mc = codebuf.MachineCodeBlockWrapper()
  181. if self.cpu.supports_floats: # save the XMM registers in
  182. for i in range(self.cpu.NUM_REGS):# the *caller* frame, from esp+8
  183. mc.MOVSD_sx((WORD*2)+8*i, i)
  184. mc.SUB_rr(edx.value, eax.value) # compute the size we want
  185. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
  186. #
  187. # The registers to save in the copy area: with shadowstack, most
  188. # registers need to be saved. With asmgcc, the callee-saved registers
  189. # don't need to.
  190. save_in_copy_area = gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items()
  191. if not shadow_stack:
  192. save_in_copy_area = [(reg, ofs) for (reg, ofs) in save_in_copy_area
  193. if reg not in gpr_reg_mgr_cls.REGLOC_TO_GCROOTMAP_REG_INDEX]
  194. #
  195. for reg, ofs in save_in_copy_area:
  196. mc.MOV_br(ofs, reg.value)
  197. #
  198. if shadow_stack:
  199. # ---- shadowstack ----
  200. mc.SUB_ri(esp.value, 16 - WORD) # stack alignment of 16 bytes
  201. if IS_X86_32:
  202. mc.MOV_sr(0, edx.value) # push argument
  203. elif IS_X86_64:
  204. mc.MOV_rr(edi.value, edx.value)
  205. mc.CALL(imm(addr))
  206. mc.ADD_ri(esp.value, 16 - WORD)
  207. else:
  208. # ---- asmgcc ----
  209. if IS_X86_32:
  210. mc.MOV_sr(WORD, edx.value) # save it as the new argument
  211. elif IS_X86_64:
  212. # rdi can be clobbered: its content was saved in the
  213. # copy area of the stack
  214. mc.MOV_rr(edi.value, edx.value)
  215. mc.JMP(imm(addr)) # tail call to the real malloc
  216. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  217. self.malloc_slowpath1 = rawstart
  218. # ---------- second helper for the slow path of malloc ----------
  219. mc = codebuf.MachineCodeBlockWrapper()
  220. #
  221. for reg, ofs in save_in_copy_area:
  222. mc.MOV_rb(reg.value, ofs)
  223. assert reg is not eax and reg is not edx
  224. #
  225. if self.cpu.supports_floats: # restore the XMM registers
  226. for i in range(self.cpu.NUM_REGS):# from where they were saved
  227. mc.MOVSD_xs(i, (WORD*2)+8*i)
  228. #
  229. # Note: we check this after the code above, just because the code
  230. # above is more than 127 bytes on 64-bits...
  231. mc.TEST_rr(eax.value, eax.value)
  232. mc.J_il8(rx86.Conditions['Z'], 0) # patched later
  233. jz_location = mc.get_relative_pos()
  234. #
  235. nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
  236. mc.MOV(edx, heap(nursery_free_adr)) # load this in EDX
  237. mc.RET()
  238. #
  239. # If the slowpath malloc failed, we raise a MemoryError that
  240. # always interrupts the current loop, as a "good enough"
  241. # approximation. Also note that we didn't RET from this helper;
  242. # but the code we jump to will actually restore the stack
  243. # position based on EBP, which will get us out of here for free.
  244. offset = mc.get_relative_pos() - jz_location
  245. assert 0 < offset <= 127
  246. mc.overwrite(jz_location-1, chr(offset))
  247. mc.JMP(imm(self.propagate_exception_path))
  248. #
  249. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  250. self.malloc_slowpath2 = rawstart
  251. def _build_propagate_exception_path(self):
  252. if self.cpu.propagate_exception_v < 0:
  253. return # not supported (for tests, or non-translated)
  254. #
  255. self.mc = codebuf.MachineCodeBlockWrapper()
  256. # call on_leave_jitted_save_exc()
  257. addr = self.cpu.get_on_leave_jitted_int(save_exception=True,
  258. default_to_memoryerror=True)
  259. self.mc.CALL(imm(addr))
  260. self.mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
  261. self._call_footer()
  262. rawstart = self.mc.materialize(self.cpu.asmmemmgr, [])
  263. self.propagate_exception_path = rawstart
  264. self.mc = None
  265. def _build_stack_check_slowpath(self):
  266. _, _, slowpathaddr = self.cpu.insert_stack_check()
  267. if slowpathaddr == 0 or self.cpu.propagate_exception_v < 0:
  268. return # no stack check (for tests, or non-translated)
  269. #
  270. # make a "function" that is called immediately at the start of
  271. # an assembler function. In particular, the stack looks like:
  272. #
  273. # | ... | <-- aligned to a multiple of 16
  274. # | retaddr of caller |
  275. # | my own retaddr | <-- esp
  276. # +---------------------+
  277. #
  278. mc = codebuf.MachineCodeBlockWrapper()
  279. #
  280. stack_size = WORD
  281. if IS_X86_64:
  282. # on the x86_64, we have to save all the registers that may
  283. # have been used to pass arguments
  284. stack_size += 6*WORD + 8*8
  285. for reg in [edi, esi, edx, ecx, r8, r9]:
  286. mc.PUSH_r(reg.value)
  287. mc.SUB_ri(esp.value, 8*8)
  288. for i in range(8):
  289. mc.MOVSD_sx(8*i, i) # xmm0 to xmm7
  290. #
  291. if IS_X86_32:
  292. stack_size += 2*WORD
  293. mc.PUSH_r(eax.value) # alignment
  294. mc.PUSH_r(esp.value)
  295. elif IS_X86_64:
  296. mc.MOV_rr(edi.value, esp.value)
  297. #
  298. # esp is now aligned to a multiple of 16 again
  299. mc.CALL(imm(slowpathaddr))
  300. #
  301. mc.MOV(eax, heap(self.cpu.pos_exception()))
  302. mc.TEST_rr(eax.value, eax.value)
  303. mc.J_il8(rx86.Conditions['NZ'], 0)
  304. jnz_location = mc.get_relative_pos()
  305. #
  306. if IS_X86_32:
  307. mc.ADD_ri(esp.value, 2*WORD) # cancel the two PUSHes above
  308. elif IS_X86_64:
  309. # restore the registers
  310. for i in range(7, -1, -1):
  311. mc.MOVSD_xs(i, 8*i)
  312. mc.ADD_ri(esp.value, 8*8)
  313. for reg in [r9, r8, ecx, edx, esi, edi]:
  314. mc.POP_r(reg.value)
  315. #
  316. mc.RET()
  317. #
  318. # patch the JNZ above
  319. offset = mc.get_relative_pos() - jnz_location
  320. assert 0 < offset <= 127
  321. mc.overwrite(jnz_location-1, chr(offset))
  322. # call on_leave_jitted_save_exc()
  323. addr = self.cpu.get_on_leave_jitted_int(save_exception=True)
  324. mc.CALL(imm(addr))
  325. #
  326. mc.MOV_ri(eax.value, self.cpu.propagate_exception_v)
  327. #
  328. # footer -- note the ADD, which skips the return address of this
  329. # function, and will instead return to the caller's caller. Note
  330. # also that we completely ignore the saved arguments, because we
  331. # are interrupting the function.
  332. mc.ADD_ri(esp.value, stack_size)
  333. mc.RET()
  334. #
  335. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  336. self.stack_check_slowpath = rawstart
  337. def _build_wb_slowpath(self, withcards, withfloats=False):
  338. descr = self.cpu.gc_ll_descr.write_barrier_descr
  339. if descr is None:
  340. return
  341. if not withcards:
  342. func = descr.get_write_barrier_fn(self.cpu)
  343. else:
  344. if descr.jit_wb_cards_set == 0:
  345. return
  346. func = descr.get_write_barrier_from_array_fn(self.cpu)
  347. if func == 0:
  348. return
  349. #
  350. # This builds a helper function called from the slow path of
  351. # write barriers. It must save all registers, and optionally
  352. # all XMM registers. It takes a single argument just pushed
  353. # on the stack even on X86_64. It must restore stack alignment
  354. # accordingly.
  355. mc = codebuf.MachineCodeBlockWrapper()
  356. #
  357. frame_size = (1 + # my argument, considered part of my frame
  358. 1 + # my return address
  359. len(gpr_reg_mgr_cls.save_around_call_regs))
  360. if withfloats:
  361. frame_size += 16 # X86_32: 16 words for 8 registers;
  362. # X86_64: just 16 registers
  363. if IS_X86_32:
  364. frame_size += 1 # argument to pass to the call
  365. #
  366. # align to a multiple of 16 bytes
  367. frame_size = (frame_size + (CALL_ALIGN-1)) & ~(CALL_ALIGN-1)
  368. #
  369. correct_esp_by = (frame_size - 2) * WORD
  370. mc.SUB_ri(esp.value, correct_esp_by)
  371. #
  372. ofs = correct_esp_by
  373. if withfloats:
  374. for reg in xmm_reg_mgr_cls.save_around_call_regs:
  375. ofs -= 8
  376. mc.MOVSD_sx(ofs, reg.value)
  377. for reg in gpr_reg_mgr_cls.save_around_call_regs:
  378. ofs -= WORD
  379. mc.MOV_sr(ofs, reg.value)
  380. #
  381. if IS_X86_32:
  382. mc.MOV_rs(eax.value, (frame_size - 1) * WORD)
  383. mc.MOV_sr(0, eax.value)
  384. elif IS_X86_64:
  385. mc.MOV_rs(edi.value, (frame_size - 1) * WORD)
  386. mc.CALL(imm(func))
  387. #
  388. if withcards:
  389. # A final TEST8 before the RET, for the caller. Careful to
  390. # not follow this instruction with another one that changes
  391. # the status of the CPU flags!
  392. mc.MOV_rs(eax.value, (frame_size - 1) * WORD)
  393. mc.TEST8(addr_add_const(eax, descr.jit_wb_if_flag_byteofs),
  394. imm(-0x80))
  395. #
  396. ofs = correct_esp_by
  397. if withfloats:
  398. for reg in xmm_reg_mgr_cls.save_around_call_regs:
  399. ofs -= 8
  400. mc.MOVSD_xs(reg.value, ofs)
  401. for reg in gpr_reg_mgr_cls.save_around_call_regs:
  402. ofs -= WORD
  403. mc.MOV_rs(reg.value, ofs)
  404. #
  405. # ADD esp, correct_esp_by --- but cannot use ADD, because
  406. # of its effects on the CPU flags
  407. mc.LEA_rs(esp.value, correct_esp_by)
  408. mc.RET16_i(WORD)
  409. #
  410. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  411. self.wb_slowpath[withcards + 2 * withfloats] = rawstart
  412. @staticmethod
  413. @rgc.no_collect
  414. def _release_gil_asmgcc(css):
  415. # similar to trackgcroot.py:pypy_asm_stackwalk, first part
  416. from pypy.rpython.memory.gctransform import asmgcroot
  417. new = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
  418. next = asmgcroot.gcrootanchor.next
  419. new.next = next
  420. new.prev = asmgcroot.gcrootanchor
  421. asmgcroot.gcrootanchor.next = new
  422. next.prev = new
  423. # and now release the GIL
  424. before = rffi.aroundstate.before
  425. if before:
  426. before()
  427. @staticmethod
  428. @rgc.no_collect
  429. def _reacquire_gil_asmgcc(css):
  430. # first reacquire the GIL
  431. after = rffi.aroundstate.after
  432. if after:
  433. after()
  434. # similar to trackgcroot.py:pypy_asm_stackwalk, second part
  435. from pypy.rpython.memory.gctransform import asmgcroot
  436. old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
  437. prev = old.prev
  438. next = old.next
  439. prev.next = next
  440. next.prev = prev
  441. @staticmethod
  442. @rgc.no_collect
  443. def _release_gil_shadowstack():
  444. before = rffi.aroundstate.before
  445. if before:
  446. before()
  447. @staticmethod
  448. @rgc.no_collect
  449. def _reacquire_gil_shadowstack():
  450. after = rffi.aroundstate.after
  451. if after:
  452. after()
  453. _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
  454. _CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
  455. lltype.Void))
  456. def _build_release_gil(self, gcrootmap):
  457. if gcrootmap.is_shadow_stack:
  458. releasegil_func = llhelper(self._NOARG_FUNC,
  459. self._release_gil_shadowstack)
  460. reacqgil_func = llhelper(self._NOARG_FUNC,
  461. self._reacquire_gil_shadowstack)
  462. else:
  463. releasegil_func = llhelper(self._CLOSESTACK_FUNC,
  464. self._release_gil_asmgcc)
  465. reacqgil_func = llhelper(self._CLOSESTACK_FUNC,
  466. self._reacquire_gil_asmgcc)
  467. self.releasegil_addr = self.cpu.cast_ptr_to_int(releasegil_func)
  468. self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
  469. def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
  470. '''adds the following attributes to looptoken:
  471. _x86_function_addr (address of the generated func, as an int)
  472. _x86_loop_code (debug: addr of the start of the ResOps)
  473. _x86_fullsize (debug: full size including failure)
  474. _x86_debug_checksum
  475. '''
  476. # XXX this function is too longish and contains some code
  477. # duplication with assemble_bridge(). Also, we should think
  478. # about not storing on 'self' attributes that will live only
  479. # for the duration of compiling one loop or a one bridge.
  480. clt = CompiledLoopToken(self.cpu, looptoken.number)
  481. clt.allgcrefs = []
  482. looptoken.compiled_loop_token = clt
  483. if not we_are_translated():
  484. # Arguments should be unique
  485. assert len(set(inputargs)) == len(inputargs)
  486. self.setup(looptoken)
  487. if log:
  488. operations = self._inject_debugging_code(looptoken, operations,
  489. 'e', looptoken.number)
  490. regalloc = RegAlloc(self, self.cpu.translate_support_code)
  491. #
  492. self._call_header_with_stack_check()
  493. stackadjustpos = self._patchable_stackadjust()
  494. clt._debug_nbargs = len(inputargs)
  495. operations = regalloc.prepare_loop(inputargs, operations,
  496. looptoken, clt.allgcrefs)
  497. looppos = self.mc.get_relative_pos()
  498. looptoken._x86_loop_code = looppos
  499. clt.frame_depth = -1 # temporarily
  500. frame_depth = self._assemble(regalloc, operations)
  501. clt.frame_depth = frame_depth
  502. #
  503. size_excluding_failure_stuff = self.mc.get_relative_pos()
  504. self.write_pending_failure_recoveries()
  505. full_size = self.mc.get_relative_pos()
  506. #
  507. rawstart = self.materialize_loop(looptoken)
  508. debug_start("jit-backend-addr")
  509. debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
  510. looptoken.number, loopname,
  511. rawstart + looppos,
  512. rawstart + size_excluding_failure_stuff,
  513. rawstart))
  514. debug_stop("jit-backend-addr")
  515. self._patch_stackadjust(rawstart + stackadjustpos, frame_depth)
  516. self.patch_pending_failure_recoveries(rawstart)
  517. #
  518. ops_offset = self.mc.ops_offset
  519. if not we_are_translated():
  520. # used only by looptoken.dump() -- useful in tests
  521. looptoken._x86_rawstart = rawstart
  522. looptoken._x86_fullsize = full_size
  523. looptoken._x86_ops_offset = ops_offset
  524. looptoken._x86_function_addr = rawstart
  525. self.fixup_target_tokens(rawstart)
  526. self.teardown()
  527. # oprofile support
  528. if self.cpu.profile_agent is not None:
  529. name = "Loop # %s: %s" % (looptoken.number, loopname)
  530. self.cpu.profile_agent.native_code_written(name,
  531. rawstart, full_size)
  532. return AsmInfo(ops_offset, rawstart + looppos,
  533. size_excluding_failure_stuff - looppos)
  534. def assemble_bridge(self, faildescr, inputargs, operations,
  535. original_loop_token, log):
  536. if not we_are_translated():
  537. # Arguments should be unique
  538. assert len(set(inputargs)) == len(inputargs)
  539. descr_number = self.cpu.get_fail_descr_number(faildescr)
  540. failure_recovery = self._find_failure_recovery_bytecode(faildescr)
  541. self.setup(original_loop_token)
  542. if log:
  543. operations = self._inject_debugging_code(faildescr, operations,
  544. 'b', descr_number)
  545. arglocs = self.rebuild_faillocs_from_descr(failure_recovery)
  546. if not we_are_translated():
  547. assert ([loc.assembler() for loc in arglocs] ==
  548. [loc.assembler() for loc in faildescr._x86_debug_faillocs])
  549. regalloc = RegAlloc(self, self.cpu.translate_support_code)
  550. startpos = self.mc.get_relative_pos()
  551. operations = regalloc.prepare_bridge(inputargs, arglocs,
  552. operations,
  553. self.current_clt.allgcrefs)
  554. stackadjustpos = self._patchable_stackadjust()
  555. frame_depth = self._assemble(regalloc, operations)
  556. codeendpos = self.mc.get_relative_pos()
  557. self.write_pending_failure_recoveries()
  558. fullsize = self.mc.get_relative_pos()
  559. #
  560. rawstart = self.materialize_loop(original_loop_token)
  561. debug_start("jit-backend-addr")
  562. debug_print("bridge out of Guard %d has address %x to %x" %
  563. (descr_number, rawstart, rawstart + codeendpos))
  564. debug_stop("jit-backend-addr")
  565. self._patch_stackadjust(rawstart + stackadjustpos, frame_depth)
  566. self.patch_pending_failure_recoveries(rawstart)
  567. if not we_are_translated():
  568. # for the benefit of tests
  569. faildescr._x86_bridge_frame_depth = frame_depth
  570. # patch the jump from original guard
  571. self.patch_jump_for_descr(faildescr, rawstart)
  572. ops_offset = self.mc.ops_offset
  573. self.fixup_target_tokens(rawstart)
  574. self.current_clt.frame_depth = max(self.current_clt.frame_depth, frame_depth)
  575. self.teardown()
  576. # oprofile support
  577. if self.cpu.profile_agent is not None:
  578. name = "Bridge # %s" % (descr_number,)
  579. self.cpu.profile_agent.native_code_written(name,
  580. rawstart, fullsize)
  581. return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
  582. def write_pending_failure_recoveries(self):
  583. # for each pending guard, generate the code of the recovery stub
  584. # at the end of self.mc.
  585. for tok in self.pending_guard_tokens:
  586. tok.pos_recovery_stub = self.generate_quick_failure(tok)
  587. if WORD == 8 and len(self.pending_memoryerror_trampoline_from) > 0:
  588. self.error_trampoline_64 = self.generate_propagate_error_64()
  589. def patch_pending_failure_recoveries(self, rawstart):
  590. # after we wrote the assembler to raw memory, set up
  591. # tok.faildescr._x86_adr_jump_offset to contain the raw address of
  592. # the 4-byte target field in the JMP/Jcond instruction, and patch
  593. # the field in question to point (initially) to the recovery stub
  594. clt = self.current_clt
  595. for tok in self.pending_guard_tokens:
  596. addr = rawstart + tok.pos_jump_offset
  597. tok.faildescr._x86_adr_jump_offset = addr
  598. relative_target = tok.pos_recovery_stub - (tok.pos_jump_offset + 4)
  599. assert rx86.fits_in_32bits(relative_target)
  600. #
  601. if not tok.is_guard_not_invalidated:
  602. mc = codebuf.MachineCodeBlockWrapper()
  603. mc.writeimm32(relative_target)
  604. mc.copy_to_raw_memory(addr)
  605. else:
  606. # GUARD_NOT_INVALIDATED, record an entry in
  607. # clt.invalidate_positions of the form:
  608. # (addr-in-the-code-of-the-not-yet-written-jump-target,
  609. # relative-target-to-use)
  610. relpos = tok.pos_jump_offset
  611. clt.invalidate_positions.append((rawstart + relpos,
  612. relative_target))
  613. # General idea: Although no code was generated by this
  614. # guard, the code might be patched with a "JMP rel32" to
  615. # the guard recovery code. This recovery code is
  616. # already generated, and looks like the recovery code
  617. # for any guard, even if at first it has no jump to it.
  618. # So we may later write 5 bytes overriding the existing
  619. # instructions; this works because a CALL instruction
  620. # would also take at least 5 bytes. If it could take
  621. # less, we would run into the issue that overwriting the
  622. # 5 bytes here might get a few nonsense bytes at the
  623. # return address of the following CALL.
  624. if WORD == 8:
  625. for pos_after_jz in self.pending_memoryerror_trampoline_from:
  626. assert self.error_trampoline_64 != 0 # only if non-empty
  627. mc = codebuf.MachineCodeBlockWrapper()
  628. mc.writeimm32(self.error_trampoline_64 - pos_after_jz)
  629. mc.copy_to_raw_memory(rawstart + pos_after_jz - 4)
  630. def get_asmmemmgr_blocks(self, looptoken):
  631. clt = looptoken.compiled_loop_token
  632. if clt.asmmemmgr_blocks is None:
  633. clt.asmmemmgr_blocks = []
  634. return clt.asmmemmgr_blocks
  635. def materialize_loop(self, looptoken):
  636. self.datablockwrapper.done() # finish using cpu.asmmemmgr
  637. self.datablockwrapper = None
  638. allblocks = self.get_asmmemmgr_blocks(looptoken)
  639. return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
  640. self.cpu.gc_ll_descr.gcrootmap)
  641. def _register_counter(self, tp, number, token):
  642. # YYY very minor leak -- we need the counters to stay alive
  643. # forever, just because we want to report them at the end
  644. # of the process
  645. struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
  646. track_allocation=False)
  647. struct.i = 0
  648. struct.type = tp
  649. if tp == 'b' or tp == 'e':
  650. struct.number = number
  651. else:
  652. assert token
  653. struct.number = compute_unique_id(token)
  654. self.loop_run_counters.append(struct)
  655. return struct
  656. def _find_failure_recovery_bytecode(self, faildescr):
  657. adr_jump_offset = faildescr._x86_adr_jump_offset
  658. if adr_jump_offset == 0:
  659. # This case should be prevented by the logic in compile.py:
  660. # look for CNT_BUSY_FLAG, which disables tracing from a guard
  661. # when another tracing from the same guard is already in progress.
  662. raise BridgeAlreadyCompiled
  663. # follow the JMP/Jcond
  664. p = rffi.cast(rffi.INTP, adr_jump_offset)
  665. adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
  666. # skip the CALL
  667. if WORD == 4:
  668. adr_target += 5 # CALL imm
  669. else:
  670. adr_target += 13 # MOV r11, imm-as-8-bytes; CALL *r11 xxxxxxxxxx
  671. return adr_target
  672. def patch_jump_for_descr(self, faildescr, adr_new_target):
  673. adr_jump_offset = faildescr._x86_adr_jump_offset
  674. assert adr_jump_offset != 0
  675. offset = adr_new_target - (adr_jump_offset + 4)
  676. # If the new target fits within a rel32 of the jump, just patch
  677. # that. Otherwise, leave the original rel32 to the recovery stub in
  678. # place, but clobber the recovery stub with a jump to the real
  679. # target.
  680. mc = codebuf.MachineCodeBlockWrapper()
  681. if rx86.fits_in_32bits(offset):
  682. mc.writeimm32(offset)
  683. mc.copy_to_raw_memory(adr_jump_offset)
  684. else:
  685. # "mov r11, addr; jmp r11" is up to 13 bytes, which fits in there
  686. # because we always write "mov r11, imm-as-8-bytes; call *r11" in
  687. # the first place.
  688. mc.MOV_ri(X86_64_SCRATCH_REG.value, adr_new_target)
  689. mc.JMP_r(X86_64_SCRATCH_REG.value)
  690. p = rffi.cast(rffi.INTP, adr_jump_offset)
  691. adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
  692. mc.copy_to_raw_memory(adr_target)
  693. faildescr._x86_adr_jump_offset = 0 # means "patched"
  694. def fixup_target_tokens(self, rawstart):
  695. for targettoken in self.target_tokens_currently_compiling:
  696. targettoken._x86_loop_code += rawstart
  697. self.target_tokens_currently_compiling = None
  698. def _append_debugging_code(self, operations, tp, number, token):
  699. counter = self._register_counter(tp, number, token)
  700. c_adr = ConstInt(rffi.cast(lltype.Signed, counter))
  701. box = BoxInt()
  702. box2 = BoxInt()
  703. ops = [ResOperation(rop.GETFIELD_RAW, [c_adr],
  704. box, descr=self.debug_counter_descr),
  705. ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
  706. ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
  707. None, descr=self.debug_counter_descr)]
  708. operations.extend(ops)
  709. @specialize.argtype(1)
  710. def _inject_debugging_code(self, looptoken, operations, tp, number):
  711. if self._debug:
  712. s = 0
  713. for op in operations:
  714. s += op.getopnum()
  715. looptoken._x86_debug_checksum = s
  716. newoperations = []
  717. self._append_debugging_code(newoperations, tp, number,
  718. None)
  719. for op in operations:
  720. newoperations.append(op)
  721. if op.getopnum() == rop.LABEL:
  722. self._append_debugging_code(newoperations, 'l', number,
  723. op.getdescr())
  724. operations = newoperations
  725. return operations
  726. def _assemble(self, regalloc, operations):
  727. self._regalloc = regalloc
  728. regalloc.compute_hint_frame_locations(operations)
  729. regalloc.walk_operations(operations)
  730. if we_are_translated() or self.cpu.dont_keepalive_stuff:
  731. self._regalloc = None # else keep it around for debugging
  732. frame_depth = regalloc.get_final_frame_depth()
  733. jump_target_descr = regalloc.jump_target_descr
  734. if jump_target_descr is not None:
  735. target_frame_depth = jump_target_descr._x86_clt.frame_depth
  736. frame_depth = max(frame_depth, target_frame_depth)
  737. return frame_depth
  738. def _patchable_stackadjust(self):
  739. # stack adjustment LEA
  740. self.mc.LEA32_rb(esp.value, 0)
  741. return self.mc.get_relative_pos() - 4
  742. def _patch_stackadjust(self, adr_lea, allocated_depth):
  743. # patch stack adjustment LEA
  744. mc = codebuf.MachineCodeBlockWrapper()
  745. # Compute the correct offset for the instruction LEA ESP, [EBP-4*words]
  746. mc.writeimm32(self._get_offset_of_ebp_from_esp(allocated_depth))
  747. mc.copy_to_raw_memory(adr_lea)
  748. def _get_offset_of_ebp_from_esp(self, allocated_depth):
  749. # Given that [EBP] is where we saved EBP, i.e. in the last word
  750. # of our fixed frame, then the 'words' value is:
  751. words = (FRAME_FIXED_SIZE - 1) + allocated_depth
  752. # align, e.g. for Mac OS X
  753. aligned_words = align_stack_words(words+2)-2 # 2 = EIP+EBP
  754. return -WORD * aligned_words
  755. def _call_header(self):
  756. # NB. the shape of the frame is hard-coded in get_basic_shape() too.
  757. # Also, make sure this is consistent with FRAME_FIXED_SIZE.
  758. self.mc.PUSH_r(ebp.value)
  759. self.mc.MOV_rr(ebp.value, esp.value)
  760. for loc in self.cpu.CALLEE_SAVE_REGISTERS:
  761. self.mc.PUSH_r(loc.value)
  762. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  763. if gcrootmap and gcrootmap.is_shadow_stack:
  764. self._call_header_shadowstack(gcrootmap)
  765. def _call_header_with_stack_check(self):
  766. if self.stack_check_slowpath == 0:
  767. pass # no stack check (e.g. not translated)
  768. else:
  769. endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
  770. self.mc.MOV(eax, heap(endaddr)) # MOV eax, [start]
  771. self.mc.SUB(eax, esp) # SUB eax, current
  772. self.mc.CMP(eax, heap(lengthaddr)) # CMP eax, [length]
  773. self.mc.J_il8(rx86.Conditions['BE'], 0) # JBE .skip
  774. jb_location = self.mc.get_relative_pos()
  775. self.mc.CALL(imm(self.stack_check_slowpath))# CALL slowpath
  776. # patch the JB above # .skip:
  777. offset = self.mc.get_relative_pos() - jb_location
  778. assert 0 < offset <= 127
  779. self.mc.overwrite(jb_location-1, chr(offset))
  780. #
  781. self._call_header()
  782. def _call_footer(self):
  783. self.mc.LEA_rb(esp.value, -len(self.cpu.CALLEE_SAVE_REGISTERS) * WORD)
  784. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  785. if gcrootmap and gcrootmap.is_shadow_stack:
  786. self._call_footer_shadowstack(gcrootmap)
  787. for i in range(len(self.cpu.CALLEE_SAVE_REGISTERS)-1, -1, -1):
  788. self.mc.POP_r(self.cpu.CALLEE_SAVE_REGISTERS[i].value)
  789. self.mc.POP_r(ebp.value)
  790. self.mc.RET()
  791. def _call_header_shadowstack(self, gcrootmap):
  792. # we need to put two words into the shadowstack: the MARKER_FRAME
  793. # and the address of the frame (ebp, actually)
  794. rst = gcrootmap.get_root_stack_top_addr()
  795. if rx86.fits_in_32bits(rst):
  796. self.mc.MOV_rj(eax.value, rst) # MOV eax, [rootstacktop]
  797. else:
  798. self.mc.MOV_ri(r13.value, rst) # MOV r13, rootstacktop
  799. self.mc.MOV_rm(eax.value, (r13.value, 0)) # MOV eax, [r13]
  800. #
  801. MARKER = gcrootmap.MARKER_FRAME
  802. self.mc.LEA_rm(ebx.value, (eax.value, 2*WORD)) # LEA ebx, [eax+2*WORD]
  803. self.mc.MOV_mi((eax.value, WORD), MARKER) # MOV [eax+WORD], MARKER
  804. self.mc.MOV_mr((eax.value, 0), ebp.value) # MOV [eax], ebp
  805. #
  806. if rx86.fits_in_32bits(rst):
  807. self.mc.MOV_jr(rst, ebx.value) # MOV [rootstacktop], ebx
  808. else:
  809. self.mc.MOV_mr((r13.value, 0), ebx.value) # MOV [r13], ebx
  810. def _call_footer_shadowstack(self, gcrootmap):
  811. rst = gcrootmap.get_root_stack_top_addr()
  812. if rx86.fits_in_32bits(rst):
  813. self.mc.SUB_ji8(rst, 2*WORD) # SUB [rootstacktop], 2*WORD
  814. else:
  815. self.mc.MOV_ri(ebx.value, rst) # MOV ebx, rootstacktop
  816. self.mc.SUB_mi8((ebx.value, 0), 2*WORD) # SUB [ebx], 2*WORD
  817. def redirect_call_assembler(self, oldlooptoken, newlooptoken):
  818. # some minimal sanity checking
  819. old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs
  820. new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs
  821. assert old_nbargs == new_nbargs
  822. # we overwrite the instructions at the old _x86_direct_bootstrap_code
  823. # to start with a JMP to the new _x86_direct_bootstrap_code.
  824. # Ideally we should rather patch all existing CALLs, but well.
  825. oldadr = oldlooptoken._x86_function_addr
  826. target = newlooptoken._x86_function_addr
  827. mc = codebuf.MachineCodeBlockWrapper()
  828. mc.JMP(imm(target))
  829. if WORD == 4: # keep in sync with prepare_loop()
  830. assert mc.get_relative_pos() == 5
  831. else:
  832. assert mc.get_relative_pos() <= 13
  833. mc.copy_to_raw_memory(oldadr)
  834. def dump(self, text):
  835. if not self.verbose:
  836. return
  837. _prev = Box._extended_display
  838. try:
  839. Box._extended_display = False
  840. pos = self.mc.get_relative_pos()
  841. print >> sys.stderr, ' 0x%x %s' % (pos, text)
  842. finally:
  843. Box._extended_display = _prev
  844. # ------------------------------------------------------------
  845. def mov(self, from_loc, to_loc):
  846. if (isinstance(from_loc, RegLoc) and from_loc.is_xmm) or (isinstance(to_loc, RegLoc) and to_loc.is_xmm):
  847. self.mc.MOVSD(to_loc, from_loc)
  848. else:
  849. assert to_loc is not ebp
  850. self.mc.MOV(to_loc, from_loc)
  851. regalloc_mov = mov # legacy interface
  852. def regalloc_push(self, loc):
  853. if isinstance(loc, RegLoc) and loc.is_xmm:
  854. self.mc.SUB_ri(esp.value, 8) # = size of doubles
  855. self.mc.MOVSD_sx(0, loc.value)
  856. elif WORD == 4 and isinstance(loc, StackLoc) and loc.get_width() == 8:
  857. # XXX evil trick
  858. self.mc.PUSH_b(loc.value + 4)
  859. self.mc.PUSH_b(loc.value)
  860. else:
  861. self.mc.PUSH(loc)
  862. def regalloc_pop(self, loc):
  863. if isinstance(loc, RegLoc) and loc.is_xmm:
  864. self.mc.MOVSD_xs(loc.value, 0)
  865. self.mc.ADD_ri(esp.value, 8) # = size of doubles
  866. elif WORD == 4 and isinstance(loc, StackLoc) and loc.get_width() == 8:
  867. # XXX evil trick
  868. self.mc.POP_b(loc.value)
  869. self.mc.POP_b(loc.value + 4)
  870. else:
  871. self.mc.POP(loc)
  872. def regalloc_immedmem2mem(self, from_loc, to_loc):
  873. # move a ConstFloatLoc directly to a StackLoc, as two MOVs
  874. # (even on x86-64, because the immediates are encoded as 32 bits)
  875. assert isinstance(from_loc, ConstFloatLoc)
  876. assert isinstance(to_loc, StackLoc)
  877. low_part = rffi.cast(rffi.CArrayPtr(rffi.INT), from_loc.value)[0]
  878. high_part = rffi.cast(rffi.CArrayPtr(rffi.INT), from_loc.value)[1]
  879. low_part = intmask(low_part)
  880. high_part = intmask(high_part)
  881. self.mc.MOV32_bi(to_loc.value, low_part)
  882. self.mc.MOV32_bi(to_loc.value + 4, high_part)
  883. def regalloc_perform(self, op, arglocs, resloc):
  884. genop_list[op.getopnum()](self, op, arglocs, resloc)
  885. def regalloc_perform_discard(self, op, arglocs):
  886. genop_discard_list[op.getopnum()](self, op, arglocs)
  887. def regalloc_perform_llong(self, op, arglocs, resloc):
  888. effectinfo = op.getdescr().get_extra_info()
  889. oopspecindex = effectinfo.oopspecindex
  890. genop_llong_list[oopspecindex](self, op, arglocs, resloc)
  891. def regalloc_perform_math(self, op, arglocs, resloc):
  892. effectinfo = op.getdescr().get_extra_info()
  893. oopspecindex = effectinfo.oopspecindex
  894. genop_math_list[oopspecindex](self, op, arglocs, resloc)
  895. def regalloc_perform_with_guard(self, op, guard_op, faillocs,
  896. arglocs, resloc):
  897. faildescr = guard_op.getdescr()
  898. assert isinstance(faildescr, AbstractFailDescr)
  899. failargs = guard_op.getfailargs()
  900. guard_opnum = guard_op.getopnum()
  901. guard_token = self.implement_guard_recovery(guard_opnum,
  902. faildescr, failargs,
  903. faillocs)
  904. if op is None:
  905. dispatch_opnum = guard_opnum
  906. else:
  907. dispatch_opnum = op.getopnum()
  908. genop_guard_list[dispatch_opnum](self, op, guard_op, guard_token,
  909. arglocs, resloc)
  910. if not we_are_translated():
  911. # must be added by the genop_guard_list[]()
  912. assert guard_token is self.pending_guard_tokens[-1]
  913. def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc):
  914. self.regalloc_perform_with_guard(None, guard_op, faillocs, arglocs,
  915. resloc)
  916. def load_effective_addr(self, sizereg, baseofs, scale, result, frm=imm0):
  917. self.mc.LEA(result, addr_add(frm, sizereg, baseofs, scale))
  918. def _unaryop(asmop):
  919. def genop_unary(self, op, arglocs, resloc):
  920. getattr(self.mc, asmop)(arglocs[0])
  921. return genop_unary
  922. def _binaryop(asmop, can_swap=False):
  923. def genop_binary(self, op, arglocs, result_loc):
  924. getattr(self.mc, asmop)(arglocs[0], arglocs[1])
  925. return genop_binary
  926. def _binaryop_or_lea(asmop, is_add):
  927. def genop_binary_or_lea(self, op, arglocs, result_loc):
  928. # use a regular ADD or SUB if result_loc is arglocs[0],
  929. # and a LEA only if different.
  930. if result_loc is arglocs[0]:
  931. getattr(self.mc, asmop)(arglocs[0], arglocs[1])
  932. else:
  933. loc = arglocs[0]
  934. argloc = arglocs[1]
  935. assert isinstance(loc, RegLoc)
  936. assert isinstance(argloc, ImmedLoc)
  937. assert isinstance(result_loc, RegLoc)
  938. delta = argloc.value
  939. if not is_add: # subtraction
  940. delta = -delta
  941. self.mc.LEA_rm(result_loc.value, (loc.value, delta))
  942. return genop_binary_or_lea
  943. def _cmpop(cond, rev_cond):
  944. def genop_cmp(self, op, arglocs, result_loc):
  945. rl = result_loc.lowest8bits()
  946. if isinstance(op.getarg(0), Const):
  947. self.mc.CMP(arglocs[1], arglocs[0])
  948. self.mc.SET_ir(rx86.Conditions[rev_cond], rl.value)
  949. else:
  950. self.mc.CMP(arglocs[0], arglocs[1])
  951. self.mc.SET_ir(rx86.Conditions[cond], rl.value)
  952. self.mc.MOVZX8_rr(result_loc.value, rl.value)
  953. return genop_cmp
  954. def _cmpop_float(cond, rev_cond, is_ne=False):
  955. def genop_cmp(self, op, arglocs, result_loc):
  956. if isinstance(arglocs[0], RegLoc):
  957. self.mc.UCOMISD(arglocs[0], arglocs[1])
  958. checkcond = cond
  959. else:
  960. self.mc.UCOMISD(arglocs[1], arglocs[0])
  961. checkcond = rev_cond
  962. tmp1 = result_loc.lowest8bits()
  963. if IS_X86_32:
  964. tmp2 = result_loc.higher8bits()
  965. elif IS_X86_64:
  966. tmp2 = X86_64_SCRATCH_REG.lowest8bits()
  967. self.mc.SET_ir(rx86.Conditions[checkcond], tmp1.value)
  968. if is_ne:
  969. self.mc.SET_ir(rx86.Conditions['P'], tmp2.value)
  970. self.mc.OR8_rr(tmp1.value, tmp2.value)
  971. else:
  972. self.mc.SET_ir(rx86.Conditions['NP'], tmp2.value)
  973. self.mc.AND8_rr(tmp1.value, tmp2.value)
  974. self.mc.MOVZX8_rr(result_loc.value, tmp1.value)
  975. return genop_cmp
  976. def _cmpop_guard(cond, rev_cond, false_cond, false_rev_cond):
  977. def genop_cmp_guard(self, op, guard_op, guard_token, arglocs, result_loc):
  978. guard_opnum = guard_op.getopnum()
  979. if isinstance(op.getarg(0), Const):
  980. self.mc.CMP(arglocs[1], arglocs[0])
  981. if guard_opnum == rop.GUARD_FALSE:
  982. self.implement_guard(guard_token, rev_cond)
  983. else:
  984. self.implement_guard(guard_token, false_rev_cond)
  985. else:
  986. self.mc.CMP(arglocs[0], arglocs[1])
  987. if guard_opnum == rop.GUARD_FALSE:
  988. self.implement_guard(guard_token, cond)
  989. else:
  990. self.implement_guard(guard_token, false_cond)
  991. return genop_cmp_guard
  992. def _cmpop_guard_float(cond, rev_cond, false_cond, false_rev_cond):
  993. need_direct_jp = 'A' not in cond
  994. need_rev_jp = 'A' not in rev_cond
  995. def genop_cmp_guard_float(self, op, guard_op, guard_token, arglocs,
  996. result_loc):
  997. guard_opnum = guard_op.getopnum()
  998. if isinstance(arglocs[0], RegLoc):
  999. self.mc.UCOMISD(arglocs[0], arglocs[1])
  1000. checkcond = cond
  1001. checkfalsecond = false_cond
  1002. need_jp = need_direct_jp
  1003. else:
  1004. self.mc.UCOMISD(arglocs[1], arglocs[0])
  1005. checkcond = rev_cond
  1006. checkfalsecond = false_rev_cond
  1007. need_jp = need_rev_jp
  1008. if guard_opnum == rop.GUARD_FALSE:
  1009. if need_jp:
  1010. self.mc.J_il8(rx86.Conditions['P'], 6)
  1011. self.implement_guard(guard_token, checkcond)
  1012. else:
  1013. if need_jp:
  1014. self.mc.J_il8(rx86.Conditions['P'], 2)
  1015. self.mc.J_il8(rx86.Conditions[checkcond], 5)
  1016. self.implement_guard(guard_token)
  1017. else:
  1018. self.implement_guard(guard_token, checkfalsecond)
  1019. return genop_cmp_guard_float
  1020. def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax,
  1021. argtypes=None, callconv=FFI_DEFAULT_ABI):
  1022. if IS_X86_64:
  1023. return self._emit_call_64(force_index, x, arglocs, start, argtypes)
  1024. p = 0
  1025. n = len(arglocs)
  1026. for i in range(start, n):
  1027. loc = arglocs[i]
  1028. if isinstance(loc, RegLoc):
  1029. if loc.is_xmm:
  1030. self.mc.MOVSD_sx(p, loc.value)
  1031. else:
  1032. self.mc.MOV_sr(p, loc.value)
  1033. p += loc.get_width()
  1034. p = 0
  1035. for i in range(start, n):
  1036. loc = arglocs[i]
  1037. if not isinstance(loc, RegLoc):
  1038. if loc.get_width() == 8:
  1039. self.mc.MOVSD(xmm0, loc)
  1040. self.mc.MOVSD_sx(p, xmm0.value)
  1041. else:
  1042. self.mc.MOV(tmp, loc)
  1043. self.mc.MOV_sr(p, tmp.value)
  1044. p += loc.get_width()
  1045. # x is a location
  1046. self.mc.CALL(x)
  1047. self.mark_gc_roots(force_index)
  1048. #
  1049. if callconv != FFI_DEFAULT_ABI:
  1050. self._fix_stdcall(callconv, p)
  1051. #
  1052. self._regalloc.needed_extra_stack_locations(p//WORD)
  1053. def _fix_stdcall(self, callconv, p):
  1054. from pypy.rlib.clibffi import FFI_STDCALL
  1055. assert callconv == FFI_STDCALL
  1056. # it's a bit stupid, but we're just going to cancel the fact that
  1057. # the called function just added 'p' to ESP, by subtracting it again.
  1058. self.mc.SUB_ri(esp.value, p)
  1059. def _emit_call_64(self, force_index, x, arglocs, start, argtypes):
  1060. src_locs = []
  1061. dst_locs = []
  1062. xmm_src_locs = []
  1063. xmm_dst_locs = []
  1064. pass_on_stack = []
  1065. singlefloats = None
  1066. # In reverse order for use with pop()
  1067. unused_gpr = [r9, r8, ecx, edx, esi, edi]
  1068. unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
  1069. for i in range(start, len(arglocs)):
  1070. loc = arglocs[i]
  1071. # XXX: Should be much simplier to tell whether a location is a
  1072. # float! It's so ugly because we have to "guard" the access to
  1073. # .type with isinstance, since not all AssemblerLocation classes
  1074. # are "typed"
  1075. if ((isinstance(loc, RegLoc) and loc.is_xmm) or
  1076. (isinstance(loc, StackLoc) and loc.type == FLOAT) or
  1077. (isinstance(loc, ConstFloatLoc))):
  1078. if len(unused_xmm) > 0:
  1079. xmm_src_locs.append(loc)
  1080. xmm_dst_locs.append(unused_xmm.pop())
  1081. else:
  1082. pass_on_stack.append(loc)
  1083. elif argtypes is not None and argtypes[i-start] == 'S':
  1084. # Singlefloat argument
  1085. if len(unused_xmm) > 0:
  1086. if singlefloats is None: singlefloats = []
  1087. singlefloats.append((loc, unused_xmm.pop()))
  1088. else:
  1089. pass_on_stack.append(loc)
  1090. else:
  1091. if len(unused_gpr) > 0:
  1092. src_locs.append(loc)
  1093. dst_locs.append(unused_gpr.pop())
  1094. else:
  1095. pass_on_stack.append(loc)
  1096. # Emit instructions to pass the stack arguments
  1097. # XXX: Would be nice to let remap_frame_layout take care of this, but
  1098. # we'd need to create something like StackLoc, but relative to esp,
  1099. # and I don't know if it's worth it.
  1100. for i in range(len(pass_on_stack)):
  1101. loc = pass_on_stack[i]
  1102. if not isinstance(loc, RegLoc):
  1103. if isinstance(loc, StackLoc) and loc.type == FLOAT:
  1104. self.mc.MOVSD(X86_64_XMM_SCRATCH_REG, loc)
  1105. self.mc.MOVSD_sx(i*WORD, X86_64_XMM_SCRATCH_REG.value)
  1106. else:
  1107. self.mc.MOV(X86_64_SCRATCH_REG, loc)
  1108. self.mc.MOV_sr(i*WORD, X86_64_SCRATCH_REG.value)
  1109. else:
  1110. # It's a register
  1111. if loc.is_xmm:
  1112. self.mc.MOVSD_sx(i*WORD, loc.value)
  1113. else:
  1114. self.mc.MOV_sr(i*WORD, loc.value)
  1115. # Handle register arguments: first remap the xmm arguments
  1116. remap_frame_layout(self, xmm_src_locs, xmm_dst_locs,
  1117. X86_64_XMM_SCRATCH_REG)
  1118. # Load the singlefloat arguments from main regs or stack to xmm regs
  1119. if singlefloats is not None:
  1120. for src, dst in singlefloats:
  1121. if isinstance(src, ImmedLoc):
  1122. self.mc.MOV(X86_64_SCRATCH_REG, src)
  1123. src = X86_64_SCRATCH_REG
  1124. self.mc.MOVD(dst, src)
  1125. # Finally remap the arguments in the main regs
  1126. # If x is a register and is in dst_locs, then oups, it needs to
  1127. # be moved away:
  1128. if x in dst_locs:
  1129. src_locs.append(x)
  1130. dst_locs.append(r10)
  1131. x = r10
  1132. remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
  1133. self.mc.CALL(x)
  1134. self.mark_gc_roots(force_index)
  1135. self._regalloc.needed_extra_stack_locations(len(pass_on_stack))
  1136. def call(self, addr, args, res):
  1137. force_index = self.write_new_force_index()
  1138. self._emit_call(force_index, imm(addr), args)
  1139. assert res is eax
  1140. def write_new_force_index(self):
  1141. # for shadowstack only: get a new, unused force_index number and
  1142. # write it to FORCE_INDEX_OFS. Used to record the call shape
  1143. # (i.e. where the GC pointers are in the stack) around a CALL
  1144. # instruction that doesn't already have a force_index.
  1145. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  1146. if gcrootmap and gcrootmap.is_shadow_stack:
  1147. clt = self.current_clt
  1148. force_index = clt.reserve_and_record_some_faildescr_index()
  1149. self.mc.MOV_bi(FORCE_INDEX_OFS, force_index)
  1150. return force_index
  1151. else:
  1152. # the return value is ignored, apart from the fact that it
  1153. # is not negative.
  1154. return 0
  1155. genop_int_neg = _unaryop("NEG")
  1156. genop_int_invert = _unaryop("NOT")
  1157. genop_int_add = _binaryop_or_lea("ADD", True)
  1158. genop_int_sub = _binaryop_or_lea("SUB", False)
  1159. genop_int_mul = _binaryop("IMUL", True)
  1160. genop_int_and = _binaryop("AND", True)
  1161. genop_int_or = _binaryop("OR", True)
  1162. genop_int_xor = _binaryop("XOR", True)
  1163. genop_int_lshift = _binaryop("SHL")
  1164. genop_int_rshift = _binaryop("SAR")
  1165. genop_uint_rshift = _binaryop("SHR")
  1166. genop_float_add = _binaryop("ADDSD", True)
  1167. genop_float_sub = _binaryop('SUBSD')
  1168. genop_float_mul = _binaryop('MULSD', True)
  1169. genop_float_truediv = _binaryop('DIVSD')
  1170. genop_int_lt = _cmpop("L", "G")
  1171. genop_int_le = _cmpop("LE", "GE")
  1172. genop_int_eq = _cmpop("E", "E")
  1173. genop_int_ne = _cmpop("NE", "NE")
  1174. genop_int_gt = _cmpop("G", "L")
  1175. genop_int_ge = _cmpop("GE", "LE")
  1176. genop_ptr_eq = genop_instance_ptr_eq = genop_int_eq
  1177. genop_ptr_ne = genop_instance_ptr_ne = genop_int_ne
  1178. genop_float_lt = _cmpop_float('B', 'A')
  1179. genop_float_le = _cmpop_float('BE', 'AE')
  1180. genop_float_ne = _cmpop_float('NE', 'NE', is_ne=True)
  1181. genop_float_eq = _cmpop_float('E', 'E')
  1182. genop_float_gt = _cmpop_float('A', 'B')
  1183. genop_float_ge = _cmpop_float('AE', 'BE')
  1184. genop_uint_gt = _cmpop("A", "B")
  1185. genop_uint_lt = _cmpop("B", "A")
  1186. genop_uint_le = _cmpop("BE", "AE")
  1187. genop_uint_ge = _cmpop("AE", "BE")
  1188. genop_guard_int_lt = _cmpop_guard("L", "G", "GE", "LE")
  1189. genop_guard_int_le = _cmpop_guard("LE", "GE", "G", "L")
  1190. genop_guard_int_eq = _cmpop_guard("E", "E", "NE", "NE")
  1191. genop_guard_int_ne = _cmpop_guard("NE", "NE", "E", "E")
  1192. genop_guard_int_gt = _cmpop_guard("G", "L", "LE", "GE")
  1193. genop_guard_int_ge = _cmpop_guard("GE", "LE", "L", "G")
  1194. genop_guard_ptr_eq = genop_guard_instance_ptr_eq = genop_guard_int_eq
  1195. genop_guard_ptr_ne = genop_guard_instance_ptr_ne = genop_guard_int_ne
  1196. genop_guard_uint_gt = _cmpop_guard("A", "B", "BE", "AE")
  1197. genop_guard_uint_lt = _cmpop_guard("B", "A", "AE", "BE")
  1198. genop_guard_uint_le = _cmpop_guard("BE", "AE", "A", "B")
  1199. genop_guard_uint_ge = _cmpop_guard("AE", "BE", "B", "A")
  1200. genop_guard_float_lt = _cmpop_guard_float("B", "A", "AE","BE")
  1201. genop_guard_float_le = _cmpop_guard_float("BE","AE", "A", "B")
  1202. genop_guard_float_eq = _cmpop_guard_float("E", "E", "NE","NE")
  1203. genop_guard_float_gt = _cmpop_guard_float("A", "B", "BE","AE")
  1204. genop_guard_float_ge = _cmpop_guard_float("AE","BE", "B", "A")
  1205. def genop_math_sqrt(self, op, arglocs, resloc):
  1206. self.mc.SQRTSD(arglocs[0], resloc)
  1207. def genop_guard_float_ne(self, op, guard_op, guard_token, arglocs, result_loc):
  1208. guard_opnum = guard_op.getopnum()
  1209. if isinstance(arglocs[0], RegLoc):
  1210. self.mc.UCOMISD(arglocs[0], arglocs[1])
  1211. else:
  1212. self.mc.UCOMISD(arglocs[1], arglocs[0])
  1213. if guard_opnum == rop.GUARD_TRUE:
  1214. self.mc.J_il8(rx86.Conditions['P'], 6)
  1215. self.implement_guard(guard_token, 'E')
  1216. else:
  1217. self.mc.J_il8(rx86.Conditions['P'], 2)
  1218. self.mc.J_il8(rx86.Conditions['E'], 5)
  1219. self.implement_guard(guard_token)
  1220. def genop_float_neg(self, op, arglocs, resloc):
  1221. # Following what gcc does: res = x ^ 0x8000000000000000
  1222. self.mc.XORPD(arglocs[0], heap(self.float_const_neg_addr))
  1223. def genop_float_abs(self, op, arglocs, resloc):
  1224. # Following what gcc does: res = x & 0x7FFFFFFFFFFFFFFF
  1225. self.mc.ANDPD(arglocs[0], heap(self.float_const_abs_addr))
  1226. def genop_cast_float_to_int(self, op, arglocs, resloc):
  1227. self.mc.CVTTSD2SI(resloc, arglocs[0])
  1228. def genop_cast_int_to_float(self, op, arglocs, resloc):
  1229. self.mc.CVTSI2SD(resloc, arglocs[0])
  1230. def genop_cast_float_to_singlefloat(self, op, arglocs, resloc):
  1231. loc0, loctmp = arglocs
  1232. self.mc.CVTSD2SS(loctmp, loc0)
  1233. assert isinstance(resloc, RegLoc)
  1234. assert isinstance(loctmp, RegLoc)
  1235. self.mc.MOVD_rx(resloc.value, loctmp.value)
  1236. def genop_cast_singlefloat_to_float(self, op, arglocs, resloc):
  1237. loc0, = arglocs
  1238. assert isinstance(resloc, RegLoc)
  1239. assert isinstance(loc0, RegLoc)
  1240. self.mc.MOVD_xr(resloc.value, loc0.value)
  1241. self.mc.CVTSS2SD_xx(resloc.value, resloc.value)
  1242. def genop_convert_float_bytes_to_longlong(self, op, arglocs, resloc):
  1243. loc0, = arglocs
  1244. if longlong.is_64_bit:
  1245. assert isinstance(resloc, RegLoc)
  1246. assert isinstance(loc0, RegLoc)
  1247. self.mc.MOVD(resloc, loc0)
  1248. else:
  1249. self.mov(loc0, resloc)
  1250. def genop_convert_longlong_bytes_to_float(self, op, arglocs, resloc):
  1251. loc0, = arglocs
  1252. if longlong.is_64_bit:
  1253. assert isinstance(resloc, RegLoc)
  1254. assert isinstance(loc0, RegLoc)
  1255. self.mc.MOVD(resloc, loc0)
  1256. else:
  1257. self.mov(loc0, resloc)
  1258. def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc):
  1259. guard_opnum = guard_op.getopnum()
  1260. self.mc.CMP(arglocs[0], imm0)
  1261. if guard_opnum == rop.GUARD_TRUE:
  1262. self.implement_guard(guard_token, 'Z')
  1263. else:
  1264. self.implement_guard(guard_token, 'NZ')
  1265. def genop_int_is_true(self, op, arglocs, resloc):
  1266. self.mc.CMP(arglocs[0], imm0)
  1267. rl = resloc.lowest8bits()
  1268. self.mc.SET_ir(rx86.Conditions['NE'], rl.value)
  1269. self.mc.MOVZX8(resloc, rl)
  1270. def genop_guard_int_is_zero(self, op, guard_op, guard_token, arglocs, resloc):
  1271. guard_opnum = guard_op.getopnum()
  1272. self.mc.CMP(arglocs[0], imm0)
  1273. if guard_opnum == rop.GUARD_TRUE:
  1274. self.implement_guard(guard_token, 'NZ')
  1275. else:
  1276. self.implement_guard(guard_token, 'Z')
  1277. def genop_int_is_zero(self, op, arglocs, resloc):
  1278. self.mc.CMP(arglocs[0], imm0)
  1279. rl = resloc.lowest8bits()
  1280. self.mc.SET_ir(rx86.Conditions['E'], rl.value)
  1281. self.mc.MOVZX8(resloc, rl)
  1282. def genop_same_as(self, op, arglocs, resloc):
  1283. self.mov(arglocs[0], resloc)
  1284. genop_cast_ptr_to_int = genop_same_as
  1285. genop_cast_int_to_ptr = genop_same_as
  1286. def genop_int_force_ge_zero(self, op, arglocs, resloc):
  1287. self.mc.TEST(arglocs[0], arglocs[0])
  1288. self.mov(imm0, resloc)
  1289. self.mc.CMOVNS(resloc, arglocs[0])
  1290. def genop_int_mod(self, op, arglocs, resloc):
  1291. if IS_X86_32:
  1292. self.mc.CDQ()
  1293. elif IS_X86_64:
  1294. self.mc.CQO()
  1295. self.mc.IDIV_r(ecx.value)
  1296. genop_int_floordiv = genop_int_mod
  1297. def genop_uint_floordiv(self, op, arglocs, resloc):
  1298. self.mc.XOR_rr(edx.value, edx.value)
  1299. self.mc.DIV_r(ecx.value)
  1300. genop_llong_add = _binaryop("PADDQ", True)
  1301. genop_llong_sub = _binaryop("PSUBQ")
  1302. genop_llong_and = _binaryop("PAND", True)
  1303. genop_llong_or = _binaryop("POR", True)
  1304. genop_llong_xor = _binaryop("PXOR", True)
  1305. def genop_llong_to_int(self, op, arglocs, resloc):
  1306. loc = arglocs[0]
  1307. assert isinstance(resloc, RegLoc)
  1308. if isinstance(loc, RegLoc):
  1309. self.mc.MOVD_rx(resloc.value, loc.value)
  1310. elif isinstance(loc, StackLoc):
  1311. self.mc.MOV_rb(resloc.value, loc.value)
  1312. else:
  1313. not_implemented("llong_to_int: %s" % (loc,))
  1314. def genop_llong_from_int(self, op, arglocs, resloc):
  1315. loc1, loc2 = arglocs
  1316. if isinstance(loc1, ConstFloatLoc):
  1317. assert loc2 is None
  1318. self.mc.MOVSD(resloc, loc1)
  1319. else:
  1320. assert isinstance(loc1, RegLoc)
  1321. assert isinstance(loc2, RegLoc)
  1322. assert isinstance(resloc, RegLoc)
  1323. self.mc.MOVD_xr(loc2.value, loc1.value)
  1324. self.mc.PSRAD_xi(loc2.value, 31) # -> 0 or -1
  1325. self.mc.MOVD_xr(resloc.value, loc1.value)
  1326. self.mc.PUNPCKLDQ_xx(resloc.value, loc2.value)
  1327. def genop_llong_from_uint(self, op, arglocs, resloc):
  1328. loc1, = arglocs
  1329. assert isinstance(resloc, RegLoc)
  1330. assert isinstance(loc1, RegLoc)
  1331. self.mc.MOVD_xr(resloc.value, loc1.value)
  1332. def genop_llong_eq(self, op, arglocs, resloc):
  1333. loc1, loc2, locxtmp = arglocs
  1334. self.mc.MOVSD(locxtmp, loc1)
  1335. self.mc.PCMPEQD(locxtmp, loc2)
  1336. self.mc.PMOVMSKB_rx(resloc.value, locxtmp.value)
  1337. # Now the lower 8 bits of resloc contain 0x00, 0x0F, 0xF0 or 0xFF
  1338. # depending on the result of the comparison of each of the two
  1339. # double-words of loc1 and loc2. The higher 8 bits contain random
  1340. # results. We want to map 0xFF to 1, and 0x00, 0x0F and 0xF0 to 0.
  1341. self.mc.CMP8_ri(resloc.value | rx86.BYTE_REG_FLAG, -1)
  1342. self.mc.SBB_rr(resloc.value, resloc.value)
  1343. self.mc.ADD_ri(resloc.value, 1)
  1344. def genop_llong_ne(self, op, arglocs, resloc):
  1345. loc1, loc2, locxtmp = arglocs
  1346. self.mc.MOVSD(locxtmp, loc1)
  1347. self.mc.PCMPEQD(locxtmp, loc2)
  1348. self.mc.PMOVMSKB_rx(resloc.value, locxtmp.value)
  1349. # Now the lower 8 bits of resloc contain 0x00, 0x0F, 0xF0 or 0xFF
  1350. # depending on the result of the comparison of each of the two
  1351. # double-words of loc1 and loc2. The higher 8 bits contain random
  1352. # results. We want to map 0xFF to 0, and 0x00, 0x0F and 0xF0 to 1.
  1353. self.mc.CMP8_ri(resloc.value | rx86.BYTE_REG_FLAG, -1)
  1354. self.mc.SBB_rr(resloc.value, resloc.value)
  1355. self.mc.NEG_r(resloc.value)
  1356. def genop_llong_lt(self, op, arglocs, resloc):
  1357. # XXX just a special case for now: "x < 0"
  1358. loc1, = arglocs
  1359. self.mc.PMOVMSKB_rx(resloc.value, loc1.value)
  1360. self.mc.SHR_ri(resloc.value, 7)
  1361. self.mc.AND_ri(resloc.value, 1)
  1362. # ----------
  1363. def genop_call_malloc_gc(self, op, arglocs, result_loc):
  1364. self.genop_call(op, arglocs, result_loc)
  1365. self.propagate_memoryerror_if_eax_is_null()
  1366. def propagate_memoryerror_if_eax_is_null(self):
  1367. # if self.propagate_exception_path == 0 (tests), this may jump to 0
  1368. # and segfaults. too bad. the alternative is to continue anyway
  1369. # with eax==0, but that will segfault too.
  1370. self.mc.TEST_rr(eax.value, eax.value)
  1371. if WORD == 4:
  1372. self.mc.J_il(rx86.Conditions['Z'], self.propagate_exception_path)
  1373. self.mc.add_pending_relocation()
  1374. elif WORD == 8:
  1375. self.mc.J_il(rx86.Conditions['Z'], 0)
  1376. pos = self.mc.get_relative_pos()
  1377. self.pending_memoryerror_trampoline_from.append(pos)
  1378. # ----------
  1379. def load_from_mem(self, resloc, source_addr, size_loc, sign_loc):
  1380. assert isinstance(resloc, RegLoc)
  1381. size = size_loc.value
  1382. sign = sign_loc.value
  1383. if resloc.is_xmm:
  1384. self.mc.MOVSD(resloc, source_addr)
  1385. elif size == WORD:
  1386. self.mc.MOV(resloc, source_addr)
  1387. elif size == 1:
  1388. if sign:
  1389. self.mc.MOVSX8(resloc, source_addr)
  1390. else:
  1391. self.mc.MOVZX8(resloc, source_addr)
  1392. elif size == 2:
  1393. if sign:
  1394. self.mc.MOVSX16(resloc, source_addr)
  1395. else:
  1396. self.mc.MOVZX16(resloc, source_addr)
  1397. elif IS_X86_64 and size == 4:
  1398. if sign:
  1399. self.mc.MOVSX32(resloc, source_addr)
  1400. else:
  1401. self.mc.MOV32(resloc, source_addr) # zero-extending
  1402. else:
  1403. not_implemented("load_from_mem size = %d" % size)
  1404. def save_into_mem(self, dest_addr, value_loc, size_loc):
  1405. size = size_loc.value
  1406. if isinstance(value_loc, RegLoc) and value_loc.is_xmm:
  1407. self.mc.MOVSD(dest_addr, value_loc)
  1408. elif size == 1:
  1409. self.mc.MOV8(dest_addr, value_loc.lowest8bits())
  1410. elif size == 2:
  1411. self.mc.MOV16(dest_addr, value_loc)
  1412. elif size == 4:
  1413. self.mc.MOV32(dest_addr, value_loc)
  1414. elif size == 8:
  1415. if IS_X86_64:
  1416. self.mc.MOV(dest_addr, value_loc)
  1417. else:
  1418. assert isinstance(value_loc, FloatImmedLoc)
  1419. self.mc.MOV(dest_addr, value_loc.low_part_loc())
  1420. self.mc.MOV(dest_addr.add_offset(4), value_loc.high_part_loc())
  1421. else:
  1422. not_implemented("save_into_mem size = %d" % size)
  1423. def genop_getfield_gc(self, op, arglocs, resloc):
  1424. base_loc, ofs_loc, size_loc, sign_loc = arglocs
  1425. assert isinstance(size_loc, ImmedLoc)
  1426. source_addr = AddressLoc(base_loc, ofs_loc)
  1427. self.load_from_mem(resloc, source_addr, size_loc, sign_loc)
  1428. genop_getfield_raw = genop_getfield_gc
  1429. genop_getfield_raw_pure = genop_getfield_gc
  1430. genop_getfield_gc_pure = genop_getfield_gc
  1431. def genop_getarrayitem_gc(self, op, arglocs, resloc):
  1432. base_loc, ofs_loc, size_loc, ofs, sign_loc = arglocs
  1433. assert isinstance(ofs, ImmedLoc)
  1434. assert isinstance(size_loc, ImmedLoc)
  1435. scale = _get_scale(size_loc.value)
  1436. src_addr = addr_add(base_loc, ofs_loc, ofs.value, scale)
  1437. self.load_from_mem(resloc, src_addr, size_loc, sign_loc)
  1438. genop_getarrayitem_gc_pure = genop_getarrayitem_gc
  1439. genop_getarrayitem_raw = genop_getarrayitem_gc
  1440. genop_getarrayitem_raw_pure = genop_getarrayitem_gc
  1441. def genop_raw_load(self, op, arglocs, resloc):
  1442. base_loc, ofs_loc, size_loc, ofs, sign_loc = arglocs
  1443. assert isinstance(ofs, ImmedLoc)
  1444. src_addr = addr_add(base_loc, ofs_loc, ofs.value, 0)
  1445. self.load_from_mem(resloc, src_addr, size_loc, sign_loc)
  1446. def _get_interiorfield_addr(self, temp_loc, index_loc, itemsize_loc,
  1447. base_loc, ofs_loc):
  1448. assert isinstance(itemsize_loc, ImmedLoc)
  1449. if isinstance(index_loc, ImmedLoc):
  1450. temp_loc = imm(index_loc.value * itemsize_loc.value)
  1451. elif _valid_addressing_size(itemsize_loc.value):
  1452. return AddressLoc(base_loc, index_loc, _get_scale(itemsize_loc.value), ofs_loc.value)
  1453. else:
  1454. # XXX should not use IMUL in more cases, it can use a clever LEA
  1455. assert isinstance(temp_loc, RegLoc)
  1456. assert isinstance(index_loc, RegLoc)
  1457. assert not temp_loc.is_xmm
  1458. self.mc.IMUL_rri(temp_loc.value, index_loc.value,
  1459. itemsize_loc.value)
  1460. assert isinstance(ofs_loc, ImmedLoc)
  1461. return AddressLoc(base_loc, temp_loc, 0, ofs_loc.value)
  1462. def genop_getinteriorfield_gc(self, op, arglocs, resloc):
  1463. (base_loc, ofs_loc, itemsize_loc, fieldsize_loc,
  1464. index_loc, temp_loc, sign_loc) = arglocs
  1465. src_addr = self._get_interiorfield_addr(temp_loc, index_loc,
  1466. itemsize_loc, base_loc,
  1467. ofs_loc)
  1468. self.load_from_mem(resloc, src_addr, fieldsize_loc, sign_loc)
  1469. def genop_discard_setfield_gc(self, op, arglocs):
  1470. base_loc, ofs_loc, size_loc, value_loc = arglocs
  1471. assert isinstance(size_loc, ImmedLoc)
  1472. dest_addr = AddressLoc(base_loc, ofs_loc)
  1473. self.save_into_mem(dest_addr, value_loc, size_loc)
  1474. def genop_discard_setinteriorfield_gc(self, op, arglocs):
  1475. (base_loc, ofs_loc, itemsize_loc, fieldsize_loc,
  1476. index_loc, temp_loc, value_loc) = arglocs
  1477. dest_addr = self._get_interiorfield_addr(temp_loc, index_loc,
  1478. itemsize_loc, base_loc,
  1479. ofs_loc)
  1480. self.save_into_mem(dest_addr, value_loc, fieldsize_loc)
  1481. genop_discard_setinteriorfield_raw = genop_discard_setinteriorfield_gc
  1482. def genop_discard_setarrayitem_gc(self, op, arglocs):
  1483. base_loc, ofs_loc, value_loc, size_loc, baseofs = arglocs
  1484. assert isinstance(baseofs, ImmedLoc)
  1485. assert isinstance(size_loc, ImmedLoc)
  1486. scale = _get_scale(size_loc.value)
  1487. dest_addr = AddressLoc(base_loc, ofs_loc, scale, baseofs.value)
  1488. self.save_into_mem(dest_addr, value_loc, size_loc)
  1489. def genop_discard_raw_store(self, op, arglocs):
  1490. base_loc, ofs_loc, value_loc, size_loc, baseofs = arglocs
  1491. assert isinstance(baseofs, ImmedLoc)
  1492. dest_addr = AddressLoc(base_loc, ofs_loc, 0, baseofs.value)
  1493. self.save_into_mem(dest_addr, value_loc, size_loc)
  1494. def genop_discard_strsetitem(self, op, arglocs):
  1495. base_loc, ofs_loc, val_loc = arglocs
  1496. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
  1497. self.cpu.translate_support_code)
  1498. assert itemsize == 1
  1499. dest_addr = AddressLoc(base_loc, ofs_loc, 0, basesize)
  1500. self.mc.MOV8(dest_addr, val_loc.lowest8bits())
  1501. def genop_discard_unicodesetitem(self, op, arglocs):
  1502. base_loc, ofs_loc, val_loc = arglocs
  1503. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
  1504. self.cpu.translate_support_code)
  1505. if itemsize == 4:
  1506. self.mc.MOV32(AddressLoc(base_loc, ofs_loc, 2, basesize), val_loc)
  1507. elif itemsize == 2:
  1508. self.mc.MOV16(AddressLoc(base_loc, ofs_loc, 1, basesize), val_loc)
  1509. else:
  1510. assert 0, itemsize
  1511. genop_discard_setfield_raw = genop_discard_setfield_gc
  1512. genop_discard_setarrayitem_raw = genop_discard_setarrayitem_gc
  1513. def genop_strlen(self, op, arglocs, resloc):
  1514. base_loc = arglocs[0]
  1515. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
  1516. self.cpu.translate_support_code)
  1517. self.mc.MOV(resloc, addr_add_const(base_loc, ofs_length))
  1518. def genop_unicodelen(self, op, arglocs, resloc):
  1519. base_loc = arglocs[0]
  1520. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
  1521. self.cpu.translate_support_code)
  1522. self.mc.MOV(resloc, addr_add_const(base_loc, ofs_length))
  1523. def genop_arraylen_gc(self, op, arglocs, resloc):
  1524. base_loc, ofs_loc = arglocs
  1525. assert isinstance(ofs_loc, ImmedLoc)
  1526. self.mc.MOV(resloc, addr_add_const(base_loc, ofs_loc.value))
  1527. def genop_strgetitem(self, op, arglocs, resloc):
  1528. base_loc, ofs_loc = arglocs
  1529. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
  1530. self.cpu.translate_support_code)
  1531. assert itemsize == 1
  1532. self.mc.MOVZX8(resloc, AddressLoc(base_loc, ofs_loc, 0, basesize))
  1533. def genop_unicodegetitem(self, op, arglocs, resloc):
  1534. base_loc, ofs_loc = arglocs
  1535. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
  1536. self.cpu.translate_support_code)
  1537. if itemsize == 4:
  1538. self.mc.MOV32(resloc, AddressLoc(base_loc, ofs_loc, 2, basesize))
  1539. elif itemsize == 2:
  1540. self.mc.MOVZX16(resloc, AddressLoc(base_loc, ofs_loc, 1, basesize))
  1541. else:
  1542. assert 0, itemsize
  1543. def genop_read_timestamp(self, op, arglocs, resloc):
  1544. self.mc.RDTSC()
  1545. if longlong.is_64_bit:
  1546. self.mc.SHL_ri(edx.value, 32)
  1547. self.mc.OR_rr(edx.value, eax.value)
  1548. else:
  1549. loc1, = arglocs
  1550. self.mc.MOVD_xr(loc1.value, edx.value)
  1551. self.mc.MOVD_xr(resloc.value, eax.value)
  1552. self.mc.PUNPCKLDQ_xx(resloc.value, loc1.value)
  1553. def genop_guard_guard_true(self, ign_1, guard_op, guard_token, locs, ign_2):
  1554. loc = locs[0]
  1555. self.mc.TEST(loc, loc)
  1556. self.implement_guard(guard_token, 'Z')
  1557. genop_guard_guard_nonnull = genop_guard_guard_true
  1558. def genop_guard_guard_no_exception(self, ign_1, guard_op, guard_token,
  1559. locs, ign_2):
  1560. self.mc.CMP(heap(self.cpu.pos_exception()), imm0)
  1561. self.implement_guard(guard_token, 'NZ')
  1562. def genop_guard_guard_not_invalidated(self, ign_1, guard_op, guard_token,
  1563. locs, ign_2):
  1564. pos = self.mc.get_relative_pos() + 1 # after potential jmp
  1565. guard_token.pos_jump_offset = pos
  1566. self.pending_guard_tokens.append(guard_token)
  1567. def genop_guard_guard_exception(self, ign_1, guard_op, guard_token,
  1568. locs, resloc):
  1569. loc = locs[0]
  1570. loc1 = locs[1]
  1571. self.mc.MOV(loc1, heap(self.cpu.pos_exception()))
  1572. self.mc.CMP(loc1, loc)
  1573. self.implement_guard(guard_token, 'NE')
  1574. if resloc is not None:
  1575. self.mc.MOV(resloc, heap(self.cpu.pos_exc_value()))
  1576. self.mc.MOV(heap(self.cpu.pos_exception()), imm0)
  1577. self.mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
  1578. def _gen_guard_overflow(self, guard_op, guard_token):
  1579. guard_opnum = guard_op.getopnum()
  1580. if guard_opnum == rop.GUARD_NO_OVERFLOW:
  1581. self.implement_guard(guard_token, 'O')
  1582. elif guard_opnum == rop.GUARD_OVERFLOW:
  1583. self.implement_guard(guard_token, 'NO')
  1584. else:
  1585. not_implemented("int_xxx_ovf followed by %s" %
  1586. guard_op.getopname())
  1587. def genop_guard_int_add_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
  1588. self.mc.ADD(arglocs[0], arglocs[1])
  1589. return self._gen_guard_overflow(guard_op, guard_token)
  1590. def genop_guard_int_sub_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
  1591. self.mc.SUB(arglocs[0], arglocs[1])
  1592. return self._gen_guard_overflow(guard_op, guard_token)
  1593. def genop_guard_int_mul_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
  1594. self.mc.IMUL(arglocs[0], arglocs[1])
  1595. return self._gen_guard_overflow(guard_op, guard_token)
  1596. def genop_guard_guard_false(self, ign_1, guard_op, guard_token, locs, ign_2):
  1597. loc = locs[0]
  1598. self.mc.TEST(loc, loc)
  1599. self.implement_guard(guard_token, 'NZ')
  1600. genop_guard_guard_isnull = genop_guard_guard_false
  1601. def genop_guard_guard_value(self, ign_1, guard_op, guard_token, locs, ign_2):
  1602. if guard_op.getarg(0).type == FLOAT:
  1603. assert guard_op.getarg(1).type == FLOAT
  1604. self.mc.UCOMISD(locs[0], locs[1])
  1605. else:
  1606. self.mc.CMP(locs[0], locs[1])
  1607. self.implement_guard(guard_token, 'NE')
  1608. def _cmp_guard_class(self, locs):
  1609. offset = self.cpu.vtable_offset
  1610. if offset is not None:
  1611. self.mc.CMP(mem(locs[0], offset), locs[1])
  1612. else:
  1613. # XXX hard-coded assumption: to go from an object to its class
  1614. # we use the following algorithm:
  1615. # - read the typeid from mem(locs[0]), i.e. at offset 0;
  1616. # this is a complete word (N=4 bytes on 32-bit, N=8 on
  1617. # 64-bits)
  1618. # - keep the lower half of what is read there (i.e.
  1619. # truncate to an unsigned 'N / 2' bytes value)
  1620. # - multiply by 4 (on 32-bits only) and use it as an
  1621. # offset in type_info_group
  1622. # - add 16/32 bytes, to go past the TYPE_INFO structure
  1623. loc = locs[1]
  1624. assert isinstance(loc, ImmedLoc)
  1625. classptr = loc.value
  1626. # here, we have to go back from 'classptr' to the value expected
  1627. # from reading the half-word in the object header. Note that
  1628. # this half-word is at offset 0 on a little-endian machine;
  1629. # it would be at offset 2 or 4 on a big-endian machine.
  1630. from pypy.rpython.memory.gctypelayout import GCData
  1631. sizeof_ti = rffi.sizeof(GCData.TYPE_INFO)
  1632. type_info_group = llop.gc_get_type_info_group(llmemory.Address)
  1633. type_info_group = rffi.cast(lltype.Signed, type_info_group)
  1634. expected_typeid = classptr - sizeof_ti - type_info_group
  1635. if IS_X86_32:
  1636. expected_typeid >>= 2
  1637. self.mc.CMP16(mem(locs[0], 0), ImmedLoc(expected_typeid))
  1638. elif IS_X86_64:
  1639. self.mc.CMP32_mi((locs[0].value, 0), expected_typeid)
  1640. def genop_guard_guard_class(self, ign_1, guard_op, guard_token, locs, ign_2):
  1641. self._cmp_guard_class(locs)
  1642. self.implement_guard(guard_token, 'NE')
  1643. def genop_guard_guard_nonnull_class(self, ign_1, guard_op,
  1644. guard_token, locs, ign_2):
  1645. self.mc.CMP(locs[0], imm1)
  1646. # Patched below
  1647. self.mc.J_il8(rx86.Conditions['B'], 0)
  1648. jb_location = self.mc.get_relative_pos()
  1649. self._cmp_guard_class(locs)
  1650. # patch the JB above
  1651. offset = self.mc.get_relative_pos() - jb_location
  1652. assert 0 < offset <= 127
  1653. self.mc.overwrite(jb_location-1, chr(offset))
  1654. #
  1655. self.implement_guard(guard_token, 'NE')
  1656. def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
  1657. fail_locs):
  1658. exc = (guard_opnum == rop.GUARD_EXCEPTION or
  1659. guard_opnum == rop.GUARD_NO_EXCEPTION or
  1660. guard_opnum == rop.GUARD_NOT_FORCED)
  1661. is_guard_not_invalidated = guard_opnum == rop.GUARD_NOT_INVALIDATED
  1662. return GuardToken(faildescr, failargs, fail_locs, exc,
  1663. is_guard_not_invalidated)
  1664. def generate_propagate_error_64(self):
  1665. assert WORD == 8
  1666. startpos = self.mc.get_relative_pos()
  1667. self.mc.JMP(imm(self.propagate_exception_path))
  1668. return startpos
  1669. def generate_quick_failure(self, guardtok):
  1670. """Generate the initial code for handling a failure. We try to
  1671. keep it as compact as possible.
  1672. """
  1673. fail_index = self.cpu.get_fail_descr_number(guardtok.faildescr)
  1674. mc = self.mc
  1675. startpos = mc.get_relative_pos()
  1676. withfloats = False
  1677. for box in guardtok.failargs:
  1678. if box is not None and box.type == FLOAT:
  1679. withfloats = True
  1680. break
  1681. exc = guardtok.exc
  1682. target = self.failure_recovery_code[exc + 2 * withfloats]
  1683. if WORD == 4:
  1684. mc.CALL(imm(target))
  1685. else:
  1686. # Generate exactly 13 bytes:
  1687. # MOV r11, target-as-8-bytes
  1688. # CALL *r11
  1689. # Keep the number 13 in sync with _find_failure_recovery_bytecode.
  1690. start = mc.get_relative_pos()
  1691. mc.MOV_ri64(X86_64_SCRATCH_REG.value, target)
  1692. mc.CALL_r(X86_64_SCRATCH_REG.value)
  1693. assert mc.get_relative_pos() == start + 13
  1694. # write tight data that describes the failure recovery
  1695. self.write_failure_recovery_description(mc, guardtok.failargs,
  1696. guardtok.fail_locs)
  1697. # write the fail_index too
  1698. mc.writeimm32(fail_index)
  1699. # for testing the decoding, write a final byte 0xCC
  1700. if not we_are_translated():
  1701. mc.writechar('\xCC')
  1702. faillocs = [loc for loc in guardtok.fail_locs if loc is not None]
  1703. guardtok.faildescr._x86_debug_faillocs = faillocs
  1704. return startpos
  1705. DESCR_REF = 0x00
  1706. DESCR_INT = 0x01
  1707. DESCR_FLOAT = 0x02
  1708. DESCR_SPECIAL = 0x03
  1709. CODE_FROMSTACK = 4 * (8 + 8*IS_X86_64)
  1710. CODE_STOP = 0 | DESCR_SPECIAL
  1711. CODE_HOLE = 4 | DESCR_SPECIAL
  1712. CODE_INPUTARG = 8 | DESCR_SPECIAL
  1713. def write_failure_recovery_description(self, mc, failargs, locs):
  1714. for i in range(len(failargs)):
  1715. arg = failargs[i]
  1716. if arg is not None:
  1717. if arg.type == REF:
  1718. kind = self.DESCR_REF
  1719. elif arg.type == INT:
  1720. kind = self.DESCR_INT
  1721. elif arg.type == FLOAT:
  1722. kind = self.DESCR_FLOAT
  1723. else:
  1724. raise AssertionError("bogus kind")
  1725. loc = locs[i]
  1726. if isinstance(loc, StackLoc):
  1727. pos = loc.position
  1728. if pos < 0:
  1729. mc.writechar(chr(self.CODE_INPUTARG))
  1730. pos = ~pos
  1731. n = self.CODE_FROMSTACK//4 + pos
  1732. else:
  1733. assert isinstance(loc, RegLoc)
  1734. n = loc.value
  1735. n = kind + 4*n
  1736. while n > 0x7F:
  1737. mc.writechar(chr((n & 0x7F) | 0x80))
  1738. n >>= 7
  1739. else:
  1740. n = self.CODE_HOLE
  1741. mc.writechar(chr(n))
  1742. mc.writechar(chr(self.CODE_STOP))
  1743. # assert that the fail_boxes lists are big enough
  1744. assert len(failargs) <= self.fail_boxes_int.SIZE
  1745. def rebuild_faillocs_from_descr(self, bytecode):
  1746. from pypy.jit.backend.x86.regalloc import X86FrameManager
  1747. descr_to_box_type = [REF, INT, FLOAT]
  1748. bytecode = rffi.cast(rffi.UCHARP, bytecode)
  1749. arglocs = []
  1750. code_inputarg = False
  1751. while 1:
  1752. # decode the next instruction from the bytecode
  1753. code = rffi.cast(lltype.Signed, bytecode[0])
  1754. bytecode = rffi.ptradd(bytecode, 1)
  1755. if code >= self.CODE_FROMSTACK:
  1756. # 'code' identifies a stack location
  1757. if code > 0x7F:
  1758. shift = 7
  1759. code &= 0x7F
  1760. while True:
  1761. nextcode = rffi.cast(lltype.Signed, bytecode[0])
  1762. bytecode = rffi.ptradd(bytecode, 1)
  1763. code |= (nextcode & 0x7F) << shift
  1764. shift += 7
  1765. if nextcode <= 0x7F:
  1766. break
  1767. kind = code & 3
  1768. code = (code - self.CODE_FROMSTACK) >> 2
  1769. if code_inputarg:
  1770. code = ~code
  1771. code_inputarg = False
  1772. loc = X86FrameManager.frame_pos(code, descr_to_box_type[kind])
  1773. elif code == self.CODE_STOP:
  1774. break
  1775. elif code == self.CODE_HOLE:
  1776. continue
  1777. elif code == self.CODE_INPUTARG:
  1778. code_inputarg = True
  1779. continue
  1780. else:
  1781. # 'code' identifies a register
  1782. kind = code & 3
  1783. code >>= 2
  1784. if kind == self.DESCR_FLOAT:
  1785. loc = regloc.XMMREGLOCS[code]
  1786. else:
  1787. loc = regloc.REGLOCS[code]
  1788. arglocs.append(loc)
  1789. return arglocs[:]
  1790. @rgc.no_collect
  1791. def grab_frame_values(self, bytecode, frame_addr, allregisters):
  1792. # no malloc allowed here!!
  1793. self.fail_ebp = allregisters[16 + ebp.value]
  1794. code_inputarg = False
  1795. num = 0
  1796. value_hi = 0
  1797. while 1:
  1798. # decode the next instruction from the bytecode
  1799. code = rffi.cast(lltype.Signed, bytecode[0])
  1800. bytecode = rffi.ptradd(bytecode, 1)
  1801. if code >= self.CODE_FROMSTACK:
  1802. if code > 0x7F:
  1803. shift = 7
  1804. code &= 0x7F
  1805. while True:
  1806. nextcode = rffi.cast(lltype.Signed, bytecode[0])
  1807. bytecode = rffi.ptradd(bytecode, 1)
  1808. code |= (nextcode & 0x7F) << shift
  1809. shift += 7
  1810. if nextcode <= 0x7F:
  1811. break
  1812. # load the value from the stack
  1813. kind = code & 3
  1814. code = (code - self.CODE_FROMSTACK) >> 2
  1815. if code_inputarg:
  1816. code = ~code
  1817. code_inputarg = False
  1818. stackloc = frame_addr + get_ebp_ofs(code)
  1819. value = rffi.cast(rffi.LONGP, stackloc)[0]
  1820. if kind == self.DESCR_FLOAT and WORD == 4:
  1821. value_hi = value
  1822. value = rffi.cast(rffi.LONGP, stackloc - 4)[0]
  1823. else:
  1824. # 'code' identifies a register: load its value
  1825. kind = code & 3
  1826. if kind == self.DESCR_SPECIAL:
  1827. if code == self.CODE_HOLE:
  1828. num += 1
  1829. continue
  1830. if code == self.CODE_INPUTARG:
  1831. code_inputarg = True
  1832. continue
  1833. assert code == self.CODE_STOP
  1834. break
  1835. code >>= 2
  1836. if kind == self.DESCR_FLOAT:
  1837. if WORD == 4:
  1838. value = allregisters[2*code]
  1839. value_hi = allregisters[2*code + 1]
  1840. else:
  1841. value = allregisters[code]
  1842. else:
  1843. value = allregisters[16 + code]
  1844. # store the loaded value into fail_boxes_<type>
  1845. if kind == self.DESCR_INT:
  1846. tgt = self.fail_boxes_int.get_addr_for_num(num)
  1847. elif kind == self.DESCR_REF:
  1848. tgt = self.fail_boxes_ptr.get_addr_for_num(num)
  1849. elif kind == self.DESCR_FLOAT:
  1850. tgt = self.fail_boxes_float.get_addr_for_num(num)
  1851. if WORD == 4:
  1852. rffi.cast(rffi.LONGP, tgt)[1] = value_hi
  1853. else:
  1854. assert 0, "bogus kind"
  1855. rffi.cast(rffi.LONGP, tgt)[0] = value
  1856. num += 1
  1857. #
  1858. if not we_are_translated():
  1859. assert bytecode[4] == 0xCC
  1860. self.fail_boxes_count = num
  1861. fail_index = rffi.cast(rffi.INTP, bytecode)[0]
  1862. fail_index = rffi.cast(lltype.Signed, fail_index)
  1863. return fail_index
  1864. def setup_failure_recovery(self):
  1865. @rgc.no_collect
  1866. def failure_recovery_func(registers):
  1867. # 'registers' is a pointer to a structure containing the
  1868. # original value of the registers, optionally the original
  1869. # value of XMM registers, and finally a reference to the
  1870. # recovery bytecode. See _build_failure_recovery() for details.
  1871. stack_at_ebp = registers[ebp.value]
  1872. bytecode = rffi.cast(rffi.UCHARP, registers[self.cpu.NUM_REGS])
  1873. allregisters = rffi.ptradd(registers, -16)
  1874. return self.grab_frame_values(bytecode, stack_at_ebp, allregisters)
  1875. self.failure_recovery_func = failure_recovery_func
  1876. self.failure_recovery_code = [0, 0, 0, 0]
  1877. _FAILURE_RECOVERY_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
  1878. lltype.Signed))
  1879. def _build_failure_recovery(self, exc, withfloats=False):
  1880. failure_recovery_func = llhelper(self._FAILURE_RECOVERY_FUNC,
  1881. self.failure_recovery_func)
  1882. failure_recovery_func = rffi.cast(lltype.Signed,
  1883. failure_recovery_func)
  1884. mc = codebuf.MachineCodeBlockWrapper()
  1885. self.mc = mc
  1886. # Push all general purpose registers
  1887. for gpr in range(self.cpu.NUM_REGS-1, -1, -1):
  1888. mc.PUSH_r(gpr)
  1889. # ebx/rbx is callee-save in both i386 and x86-64
  1890. mc.MOV_rr(ebx.value, esp.value)
  1891. if withfloats:
  1892. # Push all float registers
  1893. mc.SUB_ri(esp.value, self.cpu.NUM_REGS*8)
  1894. for i in range(self.cpu.NUM_REGS):
  1895. mc.MOVSD_sx(8*i, i)
  1896. # we call a provided function that will
  1897. # - call our on_leave_jitted_hook which will mark
  1898. # the fail_boxes_ptr array as pointing to young objects to
  1899. # avoid unwarranted freeing
  1900. # - optionally save exception depending on the flag
  1901. addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
  1902. mc.CALL(imm(addr))
  1903. # the following call saves all values from the stack and from
  1904. # registers to the right 'fail_boxes_<type>' location.
  1905. # Note that the registers are saved so far in esi[0] to esi[7],
  1906. # as pushed above, plus optionally in esi[-16] to esi[-1] for
  1907. # the XMM registers. Moreover, esi[8] is a pointer to the recovery
  1908. # bytecode, pushed just before by the CALL instruction written by
  1909. # generate_quick_failure(). XXX misaligned stack in the call, but
  1910. # it's ok because failure_recovery_func is not calling anything more
  1911. # XXX
  1912. if IS_X86_32:
  1913. mc.PUSH_r(ebx.value)
  1914. elif IS_X86_64:
  1915. mc.MOV_rr(edi.value, ebx.value)
  1916. else:
  1917. raise AssertionError("Shouldn't happen")
  1918. mc.CALL(imm(failure_recovery_func))
  1919. # returns in eax the fail_index
  1920. # now we return from the complete frame, which starts from
  1921. # _call_header_with_stack_check(). The LEA in _call_footer below
  1922. # throws away most of the frame, including all the PUSHes that we
  1923. # did just above.
  1924. self._call_footer()
  1925. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  1926. self.failure_recovery_code[exc + 2 * withfloats] = rawstart
  1927. self.mc = None
  1928. def generate_failure(self, fail_index, locs, exc, locs_are_ref):
  1929. self.mc.begin_reuse_scratch_register()
  1930. for i in range(len(locs)):
  1931. loc = locs[i]
  1932. if isinstance(loc, RegLoc):
  1933. if loc.is_xmm:
  1934. adr = self.fail_boxes_float.get_addr_for_num(i)
  1935. self.mc.MOVSD(heap(adr), loc)
  1936. else:
  1937. if locs_are_ref[i]:
  1938. adr = self.fail_boxes_ptr.get_addr_for_num(i)
  1939. else:
  1940. adr = self.fail_boxes_int.get_addr_for_num(i)
  1941. self.mc.MOV(heap(adr), loc)
  1942. for i in range(len(locs)):
  1943. loc = locs[i]
  1944. if not isinstance(loc, RegLoc):
  1945. if ((isinstance(loc, StackLoc) and loc.type == FLOAT) or
  1946. isinstance(loc, ConstFloatLoc)):
  1947. self.mc.MOVSD(xmm0, loc)
  1948. adr = self.fail_boxes_float.get_addr_for_num(i)
  1949. self.mc.MOVSD(heap(adr), xmm0)
  1950. else:
  1951. if locs_are_ref[i]:
  1952. adr = self.fail_boxes_ptr.get_addr_for_num(i)
  1953. else:
  1954. adr = self.fail_boxes_int.get_addr_for_num(i)
  1955. self.mc.MOV(eax, loc)
  1956. self.mc.MOV(heap(adr), eax)
  1957. self.mc.end_reuse_scratch_register()
  1958. # we call a provided function that will
  1959. # - call our on_leave_jitted_hook which will mark
  1960. # the fail_boxes_ptr array as pointing to young objects to
  1961. # avoid unwarranted freeing
  1962. # - optionally save exception depending on the flag
  1963. addr = self.cpu.get_on_leave_jitted_int(save_exception=exc)
  1964. self.mc.CALL(imm(addr))
  1965. self.mc.MOV_ri(eax.value, fail_index)
  1966. # exit function
  1967. self._call_footer()
  1968. def implement_guard(self, guard_token, condition=None):
  1969. # These jumps are patched later.
  1970. if condition:
  1971. self.mc.J_il(rx86.Conditions[condition], 0)
  1972. else:
  1973. self.mc.JMP_l(0)
  1974. guard_token.pos_jump_offset = self.mc.get_relative_pos() - 4
  1975. self.pending_guard_tokens.append(guard_token)
  1976. def genop_call(self, op, arglocs, resloc):
  1977. force_index = self.write_new_force_index()
  1978. self._genop_call(op, arglocs, resloc, force_index)
  1979. def _genop_call(self, op, arglocs, resloc, force_index):
  1980. from pypy.jit.backend.llsupport.descr import CallDescr
  1981. sizeloc = arglocs[0]
  1982. assert isinstance(sizeloc, ImmedLoc)
  1983. size = sizeloc.value
  1984. signloc = arglocs[1]
  1985. x = arglocs[2] # the function address
  1986. if x is eax:
  1987. tmp = ecx
  1988. else:
  1989. tmp = eax
  1990. descr = op.getdescr()
  1991. assert isinstance(descr, CallDescr)
  1992. self._emit_call(force_index, x, arglocs, 3, tmp=tmp,
  1993. argtypes=descr.get_arg_types(),
  1994. callconv=descr.get_call_conv())
  1995. if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.type == FLOAT:
  1996. # a float or a long long return
  1997. if descr.get_result_type() == 'L':
  1998. self.mc.MOV_br(resloc.value, eax.value) # long long
  1999. self.mc.MOV_br(resloc.value + 4, edx.value)
  2000. # XXX should ideally not move the result on the stack,
  2001. # but it's a mess to load eax/edx into a xmm register
  2002. # and this way is simpler also because the result loc
  2003. # can just be always a stack location
  2004. else:
  2005. self.mc.FSTPL_b(resloc.value) # float return
  2006. elif descr.get_result_type() == 'S':
  2007. # singlefloat return
  2008. assert resloc is eax
  2009. if IS_X86_32:
  2010. # must convert ST(0) to a 32-bit singlefloat and load it into EAX
  2011. # mess mess mess
  2012. self.mc.SUB_ri(esp.value, 4)
  2013. self.mc.FSTPS_s(0)
  2014. self.mc.POP_r(eax.value)
  2015. elif IS_X86_64:
  2016. # must copy from the lower 32 bits of XMM0 into eax
  2017. self.mc.MOVD_rx(eax.value, xmm0.value)
  2018. elif size == WORD:
  2019. assert resloc is eax or resloc is xmm0 # a full word
  2020. elif size == 0:
  2021. pass # void return
  2022. else:
  2023. # use the code in load_from_mem to do the zero- or sign-extension
  2024. assert resloc is eax
  2025. if size == 1:
  2026. srcloc = eax.lowest8bits()
  2027. else:
  2028. srcloc = eax
  2029. self.load_from_mem(eax, srcloc, sizeloc, signloc)
  2030. def genop_guard_call_may_force(self, op, guard_op, guard_token,
  2031. arglocs, result_loc):
  2032. faildescr = guard_op.getdescr()
  2033. fail_index = self.cpu.get_fail_descr_number(faildescr)
  2034. self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
  2035. self._genop_call(op, arglocs, result_loc, fail_index)
  2036. self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
  2037. self.implement_guard(guard_token, 'L')
  2038. def genop_guard_call_release_gil(self, op, guard_op, guard_token,
  2039. arglocs, result_loc):
  2040. # first, close the stack in the sense of the asmgcc GC root tracker
  2041. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  2042. if gcrootmap:
  2043. self.call_release_gil(gcrootmap, arglocs)
  2044. # do the call
  2045. faildescr = guard_op.getdescr()
  2046. fail_index = self.cpu.get_fail_descr_number(faildescr)
  2047. self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
  2048. self._genop_call(op, arglocs, result_loc, fail_index)
  2049. # then reopen the stack
  2050. if gcrootmap:
  2051. self.call_reacquire_gil(gcrootmap, result_loc)
  2052. # finally, the guard_not_forced
  2053. self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
  2054. self.implement_guard(guard_token, 'L')
  2055. def call_release_gil(self, gcrootmap, save_registers):
  2056. # First, we need to save away the registers listed in
  2057. # 'save_registers' that are not callee-save. XXX We assume that
  2058. # the XMM registers won't be modified. We store them in
  2059. # [ESP+4], [ESP+8], etc.; on x86-32 we leave enough room in [ESP]
  2060. # for the single argument to closestack_addr below.
  2061. if IS_X86_32:
  2062. p = WORD
  2063. elif IS_X86_64:
  2064. p = 0
  2065. for reg in self._regalloc.rm.save_around_call_regs:
  2066. if reg in save_registers:
  2067. self.mc.MOV_sr(p, reg.value)
  2068. p += WORD
  2069. #
  2070. if gcrootmap.is_shadow_stack:
  2071. args = []
  2072. else:
  2073. # note that regalloc.py used save_all_regs=True to save all
  2074. # registers, so we don't have to care about saving them (other
  2075. # than ebp) in the close_stack_struct. But if they are registers
  2076. # like %eax that would be destroyed by this call, *and* they are
  2077. # used by arglocs for the *next* call, then trouble; for now we
  2078. # will just push/pop them.
  2079. from pypy.rpython.memory.gctransform import asmgcroot
  2080. css = self._regalloc.close_stack_struct
  2081. if css == 0:
  2082. use_words = (2 + max(asmgcroot.INDEX_OF_EBP,
  2083. asmgcroot.FRAME_PTR) + 1)
  2084. pos = self._regalloc.fm.reserve_location_in_frame(use_words)
  2085. css = get_ebp_ofs(pos + use_words - 1)
  2086. self._regalloc.close_stack_struct = css
  2087. # The location where the future CALL will put its return address
  2088. # will be [ESP-WORD]. But we can't use that as the next frame's
  2089. # top address! As the code after releasegil() runs without the
  2090. # GIL, it might not be set yet by the time we need it (very
  2091. # unlikely), or it might be overwritten by the following call
  2092. # to reaquiregil() (much more likely). So we hack even more
  2093. # and use a dummy location containing a dummy value (a pointer
  2094. # to itself) which we pretend is the return address :-/ :-/ :-/
  2095. # It prevents us to store any %esp-based stack locations but we
  2096. # don't so far.
  2097. adr = self.datablockwrapper.malloc_aligned(WORD, WORD)
  2098. rffi.cast(rffi.CArrayPtr(lltype.Signed), adr)[0] = adr
  2099. self.gcrootmap_retaddr_forced = adr
  2100. frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
  2101. if rx86.fits_in_32bits(adr):
  2102. self.mc.MOV_bi(frame_ptr, adr) # MOV [css.frame], adr
  2103. else:
  2104. self.mc.MOV_ri(eax.value, adr) # MOV EAX, adr
  2105. self.mc.MOV_br(frame_ptr, eax.value) # MOV [css.frame], EAX
  2106. # Save ebp
  2107. index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
  2108. self.mc.MOV_br(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
  2109. # Call the closestack() function (also releasing the GIL)
  2110. if IS_X86_32:
  2111. reg = eax
  2112. elif IS_X86_64:
  2113. reg = edi
  2114. self.mc.LEA_rb(reg.value, css)
  2115. args = [reg]
  2116. #
  2117. self._emit_call(-1, imm(self.releasegil_addr), args)
  2118. # Finally, restore the registers saved above.
  2119. if IS_X86_32:
  2120. p = WORD
  2121. elif IS_X86_64:
  2122. p = 0
  2123. for reg in self._regalloc.rm.save_around_call_regs:
  2124. if reg in save_registers:
  2125. self.mc.MOV_rs(reg.value, p)
  2126. p += WORD
  2127. self._regalloc.needed_extra_stack_locations(p//WORD)
  2128. def call_reacquire_gil(self, gcrootmap, save_loc):
  2129. # save the previous result (eax/xmm0) into the stack temporarily.
  2130. # XXX like with call_release_gil(), we assume that we don't need
  2131. # to save xmm0 in this case.
  2132. if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
  2133. self.mc.MOV_sr(WORD, save_loc.value)
  2134. # call the reopenstack() function (also reacquiring the GIL)
  2135. if gcrootmap.is_shadow_stack:
  2136. args = []
  2137. else:
  2138. assert self.gcrootmap_retaddr_forced == -1, (
  2139. "missing mark_gc_roots() in CALL_RELEASE_GIL")
  2140. self.gcrootmap_retaddr_forced = 0
  2141. css = self._regalloc.close_stack_struct
  2142. assert css != 0
  2143. if IS_X86_32:
  2144. reg = eax
  2145. elif IS_X86_64:
  2146. reg = edi
  2147. self.mc.LEA_rb(reg.value, css)
  2148. args = [reg]
  2149. self._emit_call(-1, imm(self.reacqgil_addr), args)
  2150. # restore the result from the stack
  2151. if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
  2152. self.mc.MOV_rs(save_loc.value, WORD)
  2153. self._regalloc.needed_extra_stack_locations(2)
  2154. def genop_guard_call_assembler(self, op, guard_op, guard_token,
  2155. arglocs, result_loc):
  2156. faildescr = guard_op.getdescr()
  2157. fail_index = self.cpu.get_fail_descr_number(faildescr)
  2158. self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
  2159. descr = op.getdescr()
  2160. assert isinstance(descr, JitCellToken)
  2161. assert len(arglocs) - 2 == descr.compiled_loop_token._debug_nbargs
  2162. #
  2163. # Write a call to the target assembler
  2164. self._emit_call(fail_index, imm(descr._x86_function_addr),
  2165. arglocs, 2, tmp=eax)
  2166. if op.result is None:
  2167. assert result_loc is None
  2168. value = self.cpu.done_with_this_frame_void_v
  2169. else:
  2170. kind = op.result.type
  2171. if kind == INT:
  2172. assert result_loc is eax
  2173. value = self.cpu.done_with_this_frame_int_v
  2174. elif kind == REF:
  2175. assert result_loc is eax
  2176. value = self.cpu.done_with_this_frame_ref_v
  2177. elif kind == FLOAT:
  2178. value = self.cpu.done_with_this_frame_float_v
  2179. else:
  2180. raise AssertionError(kind)
  2181. self.mc.CMP_ri(eax.value, value)
  2182. # patched later
  2183. self.mc.J_il8(rx86.Conditions['E'], 0) # goto B if we get 'done_with_this_frame'
  2184. je_location = self.mc.get_relative_pos()
  2185. #
  2186. # Path A: use assembler_helper_adr
  2187. jd = descr.outermost_jitdriver_sd
  2188. assert jd is not None
  2189. asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
  2190. self._emit_call(fail_index, imm(asm_helper_adr), [eax, arglocs[1]], 0,
  2191. tmp=ecx)
  2192. if IS_X86_32 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
  2193. self.mc.FSTPL_b(result_loc.value)
  2194. #else: result_loc is already either eax or None, checked below
  2195. self.mc.JMP_l8(0) # jump to done, patched later
  2196. jmp_location = self.mc.get_relative_pos()
  2197. #
  2198. # Path B: fast path. Must load the return value, and reset the token
  2199. offset = jmp_location - je_location
  2200. assert 0 < offset <= 127
  2201. self.mc.overwrite(je_location - 1, chr(offset))
  2202. #
  2203. # Reset the vable token --- XXX really too much special logic here:-(
  2204. if jd.index_of_virtualizable >= 0:
  2205. from pypy.jit.backend.llsupport.descr import FieldDescr
  2206. fielddescr = jd.vable_token_descr
  2207. assert isinstance(fielddescr, FieldDescr)
  2208. ofs = fielddescr.offset
  2209. self.mc.MOV(eax, arglocs[1])
  2210. self.mc.MOV_mi((eax.value, ofs), 0)
  2211. # in the line above, TOKEN_NONE = 0
  2212. #
  2213. if op.result is not None:
  2214. # load the return value from fail_boxes_xxx[0]
  2215. kind = op.result.type
  2216. if kind == FLOAT:
  2217. xmmtmp = xmm0
  2218. adr = self.fail_boxes_float.get_addr_for_num(0)
  2219. self.mc.MOVSD(xmmtmp, heap(adr))
  2220. self.mc.MOVSD(result_loc, xmmtmp)
  2221. else:
  2222. assert result_loc is eax
  2223. if kind == INT:
  2224. adr = self.fail_boxes_int.get_addr_for_num(0)
  2225. self.mc.MOV(eax, heap(adr))
  2226. elif kind == REF:
  2227. adr = self.fail_boxes_ptr.get_addr_for_num(0)
  2228. self.mc.MOV(eax, heap(adr))
  2229. self.mc.MOV(heap(adr), imm0)
  2230. else:
  2231. raise AssertionError(kind)
  2232. #
  2233. # Here we join Path A and Path B again
  2234. offset = self.mc.get_relative_pos() - jmp_location
  2235. assert 0 <= offset <= 127
  2236. self.mc.overwrite(jmp_location - 1, chr(offset))
  2237. self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
  2238. self.implement_guard(guard_token, 'L')
  2239. def genop_discard_cond_call_gc_wb(self, op, arglocs):
  2240. # Write code equivalent to write_barrier() in the GC: it checks
  2241. # a flag in the object at arglocs[0], and if set, it calls a
  2242. # helper piece of assembler. The latter saves registers as needed
  2243. # and call the function jit_remember_young_pointer() from the GC.
  2244. descr = op.getdescr()
  2245. if we_are_translated():
  2246. cls = self.cpu.gc_ll_descr.has_write_barrier_class()
  2247. assert cls is not None and isinstance(descr, cls)
  2248. #
  2249. opnum = op.getopnum()
  2250. card_marking = False
  2251. mask = descr.jit_wb_if_flag_singlebyte
  2252. if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
  2253. # assumptions the rest of the function depends on:
  2254. assert (descr.jit_wb_cards_set_byteofs ==
  2255. descr.jit_wb_if_flag_byteofs)
  2256. assert descr.jit_wb_cards_set_singlebyte == -0x80
  2257. card_marking = True
  2258. mask = descr.jit_wb_if_flag_singlebyte | -0x80
  2259. #
  2260. loc_base = arglocs[0]
  2261. self.mc.TEST8(addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs),
  2262. imm(mask))
  2263. self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
  2264. jz_location = self.mc.get_relative_pos()
  2265. # for cond_call_gc_wb_array, also add another fast path:
  2266. # if GCFLAG_CARDS_SET, then we can just set one bit and be done
  2267. if card_marking:
  2268. # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can
  2269. # been checked by the status flags of the previous TEST8
  2270. self.mc.J_il8(rx86.Conditions['S'], 0) # patched later
  2271. js_location = self.mc.get_relative_pos()
  2272. else:
  2273. js_location = 0
  2274. # Write only a CALL to the helper prepared in advance, passing it as
  2275. # argument the address of the structure we are writing into
  2276. # (the first argument to COND_CALL_GC_WB).
  2277. helper_num = card_marking
  2278. if self._regalloc.xrm.reg_bindings:
  2279. helper_num += 2
  2280. if self.wb_slowpath[helper_num] == 0: # tests only
  2281. assert not we_are_translated()
  2282. self.cpu.gc_ll_descr.write_barrier_descr = descr
  2283. self._build_wb_slowpath(card_marking,
  2284. bool(self._regalloc.xrm.reg_bindings))
  2285. assert self.wb_slowpath[helper_num] != 0
  2286. #
  2287. self.mc.PUSH(loc_base)
  2288. self.mc.CALL(imm(self.wb_slowpath[helper_num]))
  2289. if card_marking:
  2290. # The helper ends again with a check of the flag in the object.
  2291. # So here, we can simply write again a 'JNS', which will be
  2292. # taken if GCFLAG_CARDS_SET is still not set.
  2293. self.mc.J_il8(rx86.Conditions['NS'], 0) # patched later
  2294. jns_location = self.mc.get_relative_pos()
  2295. #
  2296. # patch the JS above
  2297. offset = self.mc.get_relative_pos() - js_location
  2298. assert 0 < offset <= 127
  2299. self.mc.overwrite(js_location-1, chr(offset))
  2300. #
  2301. # case GCFLAG_CARDS_SET: emit a few instructions to do
  2302. # directly the card flag setting
  2303. loc_index = arglocs[1]
  2304. if isinstance(loc_index, RegLoc):
  2305. if IS_X86_64 and isinstance(loc_base, RegLoc):
  2306. # copy loc_index into r11
  2307. tmp1 = X86_64_SCRATCH_REG
  2308. self.mc.MOV_rr(tmp1.value, loc_index.value)
  2309. final_pop = False
  2310. else:
  2311. # must save the register loc_index before it is mutated
  2312. self.mc.PUSH_r(loc_index.value)
  2313. tmp1 = loc_index
  2314. final_pop = True
  2315. # SHR tmp, card_page_shift
  2316. self.mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift)
  2317. # XOR tmp, -8
  2318. self.mc.XOR_ri(tmp1.value, -8)
  2319. # BTS [loc_base], tmp
  2320. self.mc.BTS(addr_add_const(loc_base, 0), tmp1)
  2321. # done
  2322. if final_pop:
  2323. self.mc.POP_r(loc_index.value)
  2324. #
  2325. elif isinstance(loc_index, ImmedLoc):
  2326. byte_index = loc_index.value >> descr.jit_wb_card_page_shift
  2327. byte_ofs = ~(byte_index >> 3)
  2328. byte_val = 1 << (byte_index & 7)
  2329. self.mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val))
  2330. else:
  2331. raise AssertionError("index is neither RegLoc nor ImmedLoc")
  2332. #
  2333. # patch the JNS above
  2334. offset = self.mc.get_relative_pos() - jns_location
  2335. assert 0 < offset <= 127
  2336. self.mc.overwrite(jns_location-1, chr(offset))
  2337. # patch the JZ above
  2338. offset = self.mc.get_relative_pos() - jz_location
  2339. assert 0 < offset <= 127
  2340. self.mc.overwrite(jz_location-1, chr(offset))
  2341. genop_discard_cond_call_gc_wb_array = genop_discard_cond_call_gc_wb
  2342. def not_implemented_op_discard(self, op, arglocs):
  2343. not_implemented("not implemented operation: %s" % op.getopname())
  2344. def not_implemented_op(self, op, arglocs, resloc):
  2345. not_implemented("not implemented operation with res: %s" %
  2346. op.getopname())
  2347. def not_implemented_op_guard(self, op, guard_op,
  2348. failaddr, arglocs, resloc):
  2349. not_implemented("not implemented operation (guard): %s" %
  2350. op.getopname())
  2351. def mark_gc_roots(self, force_index, use_copy_area=False):
  2352. if force_index < 0:
  2353. return # not needed
  2354. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  2355. if gcrootmap:
  2356. mark = self._regalloc.get_mark_gc_roots(gcrootmap, use_copy_area)
  2357. if gcrootmap.is_shadow_stack:
  2358. gcrootmap.write_callshape(mark, force_index)
  2359. else:
  2360. if self.gcrootmap_retaddr_forced == 0:
  2361. self.mc.insert_gcroot_marker(mark) # common case
  2362. else:
  2363. assert self.gcrootmap_retaddr_forced != -1, (
  2364. "two mark_gc_roots() in a CALL_RELEASE_GIL")
  2365. gcrootmap.put(self.gcrootmap_retaddr_forced, mark)
  2366. self.gcrootmap_retaddr_forced = -1
  2367. def closing_jump(self, target_token):
  2368. # The backend's logic assumes that the target code is in a piece of
  2369. # assembler that was also called with the same number of arguments,
  2370. # so that the locations [ebp+8..] of the input arguments are valid
  2371. # stack locations both before and after the jump.
  2372. my_nbargs = self.current_clt._debug_nbargs
  2373. target_nbargs = target_token._x86_clt._debug_nbargs
  2374. assert my_nbargs == target_nbargs
  2375. #
  2376. target = target_token._x86_loop_code
  2377. if target_token in self.target_tokens_currently_compiling:
  2378. curpos = self.mc.get_relative_pos() + 5
  2379. self.mc.JMP_l(target - curpos)
  2380. else:
  2381. self.mc.JMP(imm(target))
  2382. def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
  2383. assert size & (WORD-1) == 0 # must be correctly aligned
  2384. self.mc.MOV(eax, heap(nursery_free_adr))
  2385. self.mc.LEA_rm(edx.value, (eax.value, size))
  2386. self.mc.CMP(edx, heap(nursery_top_adr))
  2387. self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
  2388. jmp_adr = self.mc.get_relative_pos()
  2389. # See comments in _build_malloc_slowpath for the
  2390. # details of the two helper functions that we are calling below.
  2391. # First, we need to call two of them and not just one because we
  2392. # need to have a mark_gc_roots() in between. Then the calling
  2393. # convention of slowpath_addr{1,2} are tweaked a lot to allow
  2394. # the code here to be just two CALLs: slowpath_addr1 gets the
  2395. # size of the object to allocate from (EDX-EAX) and returns the
  2396. # result in EAX; slowpath_addr2 additionally returns in EDX a
  2397. # copy of heap(nursery_free_adr), so that the final MOV below is
  2398. # a no-op.
  2399. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  2400. shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
  2401. if not shadow_stack:
  2402. # there are two helpers to call only with asmgcc
  2403. slowpath_addr1 = self.malloc_slowpath1
  2404. self.mc.CALL(imm(slowpath_addr1))
  2405. self.mark_gc_roots(self.write_new_force_index(), use_copy_area=True)
  2406. slowpath_addr2 = self.malloc_slowpath2
  2407. self.mc.CALL(imm(slowpath_addr2))
  2408. # reserve room for the argument to the real malloc and the
  2409. # saved XMM regs (on 32 bit: 8 * 2 words; on 64 bit: 16 * 1
  2410. # word)
  2411. self._regalloc.needed_extra_stack_locations(1+16)
  2412. offset = self.mc.get_relative_pos() - jmp_adr
  2413. assert 0 < offset <= 127
  2414. self.mc.overwrite(jmp_adr-1, chr(offset))
  2415. self.mc.MOV(heap(nursery_free_adr), edx)
  2416. genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
  2417. genop_list = [Assembler386.not_implemented_op] * rop._LAST
  2418. genop_llong_list = {}
  2419. genop_math_list = {}
  2420. genop_guard_list = [Assembler386.not_implemented_op_guard] * rop._LAST
  2421. for name, value in Assembler386.__dict__.iteritems():
  2422. if name.startswith('genop_discard_'):
  2423. opname = name[len('genop_discard_'):]
  2424. num = getattr(rop, opname.upper())
  2425. genop_discard_list[num] = value
  2426. elif name.startswith('genop_guard_') and name != 'genop_guard_exception':
  2427. opname = name[len('genop_guard_'):]
  2428. num = getattr(rop, opname.upper())
  2429. genop_guard_list[num] = value
  2430. elif name.startswith('genop_llong_'):
  2431. opname = name[len('genop_llong_'):]
  2432. num = getattr(EffectInfo, 'OS_LLONG_' + opname.upper())
  2433. genop_llong_list[num] = value
  2434. elif name.startswith('genop_math_'):
  2435. opname = name[len('genop_math_'):]
  2436. num = getattr(EffectInfo, 'OS_MATH_' + opname.upper())
  2437. genop_math_list[num] = value
  2438. elif name.startswith('genop_'):
  2439. opname = name[len('genop_'):]
  2440. num = getattr(rop, opname.upper())
  2441. genop_list[num] = value
  2442. # XXX: ri386 migration shims:
  2443. def addr_add(reg_or_imm1, reg_or_imm2, offset=0, scale=0):
  2444. return AddressLoc(reg_or_imm1, reg_or_imm2, scale, offset)
  2445. def addr_add_const(reg_or_imm1, offset):
  2446. return AddressLoc(reg_or_imm1, imm0, 0, offset)
  2447. def mem(loc, offset):
  2448. return AddressLoc(loc, imm0, 0, offset)
  2449. def heap(addr):
  2450. return AddressLoc(ImmedLoc(addr), imm0, 0, 0)
  2451. def not_implemented(msg):
  2452. os.write(2, '[x86/asm] %s\n' % msg)
  2453. raise NotImplementedError(msg)
  2454. class BridgeAlreadyCompiled(Exception):
  2455. pass