PageRenderTime 67ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/rpython/jit/backend/x86/assembler.py

https://bitbucket.org/bwesterb/pypy
Python | 2710 lines | 2063 code | 231 blank | 416 comment | 433 complexity | d917c7c1cc9ac1c5ddc2d035f28cb416 MD5 | raw file
  1. import sys, os
  2. from rpython.jit.backend.llsupport import symbolic, jitframe
  3. from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
  4. from rpython.jit.metainterp.history import Const, Box, BoxInt, ConstInt
  5. from rpython.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
  6. from rpython.jit.metainterp.history import JitCellToken
  7. from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory
  8. from rpython.rtyper.lltypesystem.lloperation import llop
  9. from rpython.rtyper.annlowlevel import llhelper
  10. from rpython.rlib.jit import AsmInfo
  11. from rpython.rlib import longlong2float
  12. from rpython.jit.backend.model import CompiledLoopToken
  13. from rpython.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs, _get_scale,
  14. gpr_reg_mgr_cls, xmm_reg_mgr_cls, _valid_addressing_size)
  15. from rpython.jit.backend.x86.arch import (FRAME_FIXED_SIZE, FORCE_INDEX_OFS, WORD,
  16. IS_X86_32, IS_X86_64)
  17. from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx,
  18. esp, ebp, esi, edi,
  19. xmm0, xmm1, xmm2, xmm3,
  20. xmm4, xmm5, xmm6, xmm7,
  21. r8, r9, r10, r11,
  22. r12, r13, r14, r15,
  23. X86_64_SCRATCH_REG,
  24. X86_64_XMM_SCRATCH_REG,
  25. RegLoc, StackLoc, ConstFloatLoc,
  26. ImmedLoc, AddressLoc, imm,
  27. imm0, imm1, FloatImmedLoc)
  28. from rpython.rlib.objectmodel import we_are_translated, specialize
  29. from rpython.jit.backend.x86 import rx86, regloc, codebuf
  30. from rpython.jit.metainterp.resoperation import rop, ResOperation
  31. from rpython.jit.backend.x86 import support
  32. from rpython.rlib.debug import (debug_print, debug_start, debug_stop,
  33. have_debug_prints, fatalerror)
  34. from rpython.rlib import rgc
  35. from rpython.rlib.clibffi import FFI_DEFAULT_ABI
  36. from rpython.jit.backend.x86.jump import remap_frame_layout
  37. from rpython.jit.codewriter.effectinfo import EffectInfo
  38. from rpython.jit.codewriter import longlong
  39. from rpython.rlib.rarithmetic import intmask
  40. from rpython.rlib.objectmodel import compute_unique_id
  41. # darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 4.5.0,
  42. # better safe than sorry
  43. CALL_ALIGN = 16 // WORD
  44. def align_stack_words(words):
  45. return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
  46. class GuardToken(object):
  47. def __init__(self, faildescr, failargs, fail_locs, exc,
  48. is_guard_not_invalidated, is_guard_not_forced):
  49. self.faildescr = faildescr
  50. self.failargs = failargs
  51. self.fail_locs = fail_locs
  52. self.exc = exc
  53. self.is_guard_not_invalidated = is_guard_not_invalidated
  54. self.is_guard_not_forced = is_guard_not_forced
  55. DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
  56. ('type', lltype.Char), # 'b'ridge, 'l'abel or
  57. # 'e'ntry point
  58. ('number', lltype.Signed))
  59. class Assembler386(object):
  60. _regalloc = None
  61. _output_loop_log = None
  62. def __init__(self, cpu, translate_support_code=False):
  63. self.cpu = cpu
  64. self.verbose = False
  65. self.rtyper = cpu.rtyper
  66. self.loop_run_counters = []
  67. self.float_const_neg_addr = 0
  68. self.float_const_abs_addr = 0
  69. self.malloc_slowpath1 = 0
  70. self.malloc_slowpath2 = 0
  71. self.wb_slowpath = [0, 0, 0, 0]
  72. self.memcpy_addr = 0
  73. self.setup_failure_recovery()
  74. self._debug = False
  75. self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
  76. self.datablockwrapper = None
  77. self.stack_check_slowpath = 0
  78. self.propagate_exception_path = 0
  79. self.gcrootmap_retaddr_forced = 0
  80. self.teardown()
  81. self.force_token_to_dead_frame = {} # XXX temporary hack
  82. def set_debug(self, v):
  83. r = self._debug
  84. self._debug = v
  85. return r
  86. def setup_once(self):
  87. # the address of the function called by 'new'
  88. gc_ll_descr = self.cpu.gc_ll_descr
  89. gc_ll_descr.initialize()
  90. self.memcpy_addr = self.cpu.cast_ptr_to_int(support.memcpy_fn)
  91. self._build_failure_recovery(False)
  92. self._build_failure_recovery(True)
  93. self._build_wb_slowpath(False)
  94. self._build_wb_slowpath(True)
  95. if self.cpu.supports_floats:
  96. self._build_failure_recovery(False, withfloats=True)
  97. self._build_failure_recovery(True, withfloats=True)
  98. self._build_wb_slowpath(False, withfloats=True)
  99. self._build_wb_slowpath(True, withfloats=True)
  100. support.ensure_sse2_floats()
  101. self._build_float_constants()
  102. self._build_propagate_exception_path()
  103. if gc_ll_descr.get_malloc_slowpath_addr is not None:
  104. self._build_malloc_slowpath()
  105. self._build_stack_check_slowpath()
  106. if gc_ll_descr.gcrootmap:
  107. self._build_release_gil(gc_ll_descr.gcrootmap)
  108. if not self._debug:
  109. # if self._debug is already set it means that someone called
  110. # set_debug by hand before initializing the assembler. Leave it
  111. # as it is
  112. debug_start('jit-backend-counts')
  113. self.set_debug(have_debug_prints())
  114. debug_stop('jit-backend-counts')
  115. def setup(self, looptoken):
  116. assert self.memcpy_addr != 0, "setup_once() not called?"
  117. self.current_clt = looptoken.compiled_loop_token
  118. self.pending_guard_tokens = []
  119. if WORD == 8:
  120. self.pending_memoryerror_trampoline_from = []
  121. self.error_trampoline_64 = 0
  122. self.mc = codebuf.MachineCodeBlockWrapper()
  123. #assert self.datablockwrapper is None --- but obscure case
  124. # possible, e.g. getting MemoryError and continuing
  125. allblocks = self.get_asmmemmgr_blocks(looptoken)
  126. self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
  127. allblocks)
  128. self.target_tokens_currently_compiling = {}
  129. def teardown(self):
  130. self.pending_guard_tokens = None
  131. if WORD == 8:
  132. self.pending_memoryerror_trampoline_from = None
  133. self.mc = None
  134. self.current_clt = None
  135. def finish_once(self):
  136. if self._debug:
  137. debug_start('jit-backend-counts')
  138. for i in range(len(self.loop_run_counters)):
  139. struct = self.loop_run_counters[i]
  140. if struct.type == 'l':
  141. prefix = 'TargetToken(%d)' % struct.number
  142. elif struct.type == 'b':
  143. prefix = 'bridge ' + str(struct.number)
  144. else:
  145. prefix = 'entry ' + str(struct.number)
  146. debug_print(prefix + ':' + str(struct.i))
  147. debug_stop('jit-backend-counts')
  148. def _build_float_constants(self):
  149. datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, [])
  150. float_constants = datablockwrapper.malloc_aligned(32, alignment=16)
  151. datablockwrapper.done()
  152. addr = rffi.cast(rffi.CArrayPtr(lltype.Char), float_constants)
  153. qword_padding = '\x00\x00\x00\x00\x00\x00\x00\x00'
  154. # 0x8000000000000000
  155. neg_const = '\x00\x00\x00\x00\x00\x00\x00\x80'
  156. # 0x7FFFFFFFFFFFFFFF
  157. abs_const = '\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F'
  158. data = neg_const + qword_padding + abs_const + qword_padding
  159. for i in range(len(data)):
  160. addr[i] = data[i]
  161. self.float_const_neg_addr = float_constants
  162. self.float_const_abs_addr = float_constants + 16
  163. def _build_malloc_slowpath(self):
  164. # With asmgcc, we need two helpers, so that we can write two CALL
  165. # instructions in assembler, with a mark_gc_roots in between.
  166. # With shadowstack, this is not needed, so we produce a single helper.
  167. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  168. shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
  169. #
  170. # ---------- first helper for the slow path of malloc ----------
  171. mc = codebuf.MachineCodeBlockWrapper()
  172. if self.cpu.supports_floats: # save the XMM registers in
  173. for i in range(self.cpu.NUM_REGS):# the *caller* frame, from esp+8
  174. mc.MOVSD_sx((WORD*2)+8*i, i)
  175. mc.SUB_rr(edx.value, eax.value) # compute the size we want
  176. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
  177. #
  178. # The registers to save in the copy area: with shadowstack, most
  179. # registers need to be saved. With asmgcc, the callee-saved registers
  180. # don't need to.
  181. save_in_copy_area = gpr_reg_mgr_cls.REGLOC_TO_COPY_AREA_OFS.items()
  182. if not shadow_stack:
  183. save_in_copy_area = [(reg, ofs) for (reg, ofs) in save_in_copy_area
  184. if reg not in gpr_reg_mgr_cls.REGLOC_TO_GCROOTMAP_REG_INDEX]
  185. #
  186. for reg, ofs in save_in_copy_area:
  187. mc.MOV_br(ofs, reg.value)
  188. #
  189. if shadow_stack:
  190. # ---- shadowstack ----
  191. mc.SUB_ri(esp.value, 16 - WORD) # stack alignment of 16 bytes
  192. if IS_X86_32:
  193. mc.MOV_sr(0, edx.value) # push argument
  194. elif IS_X86_64:
  195. mc.MOV_rr(edi.value, edx.value)
  196. mc.CALL(imm(addr))
  197. mc.ADD_ri(esp.value, 16 - WORD)
  198. else:
  199. # ---- asmgcc ----
  200. if IS_X86_32:
  201. mc.MOV_sr(WORD, edx.value) # save it as the new argument
  202. elif IS_X86_64:
  203. # rdi can be clobbered: its content was saved in the
  204. # copy area of the stack
  205. mc.MOV_rr(edi.value, edx.value)
  206. mc.JMP(imm(addr)) # tail call to the real malloc
  207. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  208. self.malloc_slowpath1 = rawstart
  209. # ---------- second helper for the slow path of malloc ----------
  210. mc = codebuf.MachineCodeBlockWrapper()
  211. #
  212. for reg, ofs in save_in_copy_area:
  213. mc.MOV_rb(reg.value, ofs)
  214. assert reg is not eax and reg is not edx
  215. #
  216. if self.cpu.supports_floats: # restore the XMM registers
  217. for i in range(self.cpu.NUM_REGS):# from where they were saved
  218. mc.MOVSD_xs(i, (WORD*2)+8*i)
  219. #
  220. # Note: we check this after the code above, just because the code
  221. # above is more than 127 bytes on 64-bits...
  222. mc.TEST_rr(eax.value, eax.value)
  223. mc.J_il8(rx86.Conditions['Z'], 0) # patched later
  224. jz_location = mc.get_relative_pos()
  225. #
  226. nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
  227. mc.MOV(edx, heap(nursery_free_adr)) # load this in EDX
  228. mc.RET()
  229. #
  230. # If the slowpath malloc failed, we raise a MemoryError that
  231. # always interrupts the current loop, as a "good enough"
  232. # approximation. Also note that we didn't RET from this helper;
  233. # but the code we jump to will actually restore the stack
  234. # position based on EBP, which will get us out of here for free.
  235. offset = mc.get_relative_pos() - jz_location
  236. assert 0 < offset <= 127
  237. mc.overwrite(jz_location-1, chr(offset))
  238. mc.JMP(imm(self.propagate_exception_path))
  239. #
  240. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  241. self.malloc_slowpath2 = rawstart
  242. def _build_propagate_exception_path(self):
  243. if self.cpu.propagate_exception_v < 0:
  244. return # not supported (for tests, or non-translated)
  245. #
  246. self.mc = codebuf.MachineCodeBlockWrapper()
  247. #
  248. # Call the helper, which will return a dead frame object with
  249. # the correct exception set, or MemoryError by default
  250. addr = rffi.cast(lltype.Signed, self.cpu.get_propagate_exception())
  251. self.mc.CALL(imm(addr))
  252. #
  253. self._call_footer()
  254. rawstart = self.mc.materialize(self.cpu.asmmemmgr, [])
  255. self.propagate_exception_path = rawstart
  256. self.mc = None
  257. def _build_stack_check_slowpath(self):
  258. _, _, slowpathaddr = self.cpu.insert_stack_check()
  259. if slowpathaddr == 0 or self.cpu.propagate_exception_v < 0:
  260. return # no stack check (for tests, or non-translated)
  261. #
  262. # make a "function" that is called immediately at the start of
  263. # an assembler function. In particular, the stack looks like:
  264. #
  265. # | ... | <-- aligned to a multiple of 16
  266. # | retaddr of caller |
  267. # | my own retaddr | <-- esp
  268. # +---------------------+
  269. #
  270. mc = codebuf.MachineCodeBlockWrapper()
  271. #
  272. stack_size = WORD
  273. if IS_X86_64:
  274. # on the x86_64, we have to save all the registers that may
  275. # have been used to pass arguments
  276. stack_size += 6*WORD + 8*8
  277. for reg in [edi, esi, edx, ecx, r8, r9]:
  278. mc.PUSH_r(reg.value)
  279. mc.SUB_ri(esp.value, 8*8)
  280. for i in range(8):
  281. mc.MOVSD_sx(8*i, i) # xmm0 to xmm7
  282. #
  283. if IS_X86_32:
  284. stack_size += 2*WORD
  285. mc.PUSH_r(eax.value) # alignment
  286. mc.PUSH_r(esp.value)
  287. elif IS_X86_64:
  288. mc.MOV_rr(edi.value, esp.value)
  289. #
  290. # esp is now aligned to a multiple of 16 again
  291. mc.CALL(imm(slowpathaddr))
  292. #
  293. mc.MOV(eax, heap(self.cpu.pos_exception()))
  294. mc.TEST_rr(eax.value, eax.value)
  295. mc.J_il8(rx86.Conditions['NZ'], 0)
  296. jnz_location = mc.get_relative_pos()
  297. #
  298. if IS_X86_32:
  299. mc.ADD_ri(esp.value, 2*WORD) # cancel the two PUSHes above
  300. elif IS_X86_64:
  301. # restore the registers
  302. for i in range(7, -1, -1):
  303. mc.MOVSD_xs(i, 8*i)
  304. mc.ADD_ri(esp.value, 8*8)
  305. for reg in [r9, r8, ecx, edx, esi, edi]:
  306. mc.POP_r(reg.value)
  307. #
  308. mc.RET()
  309. #
  310. # patch the JNZ above
  311. offset = mc.get_relative_pos() - jnz_location
  312. assert 0 < offset <= 127
  313. mc.overwrite(jnz_location-1, chr(offset))
  314. #
  315. # Call the helper, which will return a dead frame object with
  316. # the correct exception set, or MemoryError by default
  317. addr = rffi.cast(lltype.Signed, self.cpu.get_propagate_exception())
  318. mc.CALL(imm(addr))
  319. #
  320. # footer -- note the ADD, which skips the return address of this
  321. # function, and will instead return to the caller's caller. Note
  322. # also that we completely ignore the saved arguments, because we
  323. # are interrupting the function.
  324. mc.ADD_ri(esp.value, stack_size)
  325. mc.RET()
  326. #
  327. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  328. self.stack_check_slowpath = rawstart
  329. def _build_wb_slowpath(self, withcards, withfloats=False):
  330. descr = self.cpu.gc_ll_descr.write_barrier_descr
  331. if descr is None:
  332. return
  333. if not withcards:
  334. func = descr.get_write_barrier_fn(self.cpu)
  335. else:
  336. if descr.jit_wb_cards_set == 0:
  337. return
  338. func = descr.get_write_barrier_from_array_fn(self.cpu)
  339. if func == 0:
  340. return
  341. #
  342. # This builds a helper function called from the slow path of
  343. # write barriers. It must save all registers, and optionally
  344. # all XMM registers. It takes a single argument just pushed
  345. # on the stack even on X86_64. It must restore stack alignment
  346. # accordingly.
  347. mc = codebuf.MachineCodeBlockWrapper()
  348. #
  349. frame_size = (1 + # my argument, considered part of my frame
  350. 1 + # my return address
  351. len(gpr_reg_mgr_cls.save_around_call_regs))
  352. if withfloats:
  353. frame_size += 16 # X86_32: 16 words for 8 registers;
  354. # X86_64: just 16 registers
  355. if IS_X86_32:
  356. frame_size += 1 # argument to pass to the call
  357. #
  358. # align to a multiple of 16 bytes
  359. frame_size = (frame_size + (CALL_ALIGN-1)) & ~(CALL_ALIGN-1)
  360. #
  361. correct_esp_by = (frame_size - 2) * WORD
  362. mc.SUB_ri(esp.value, correct_esp_by)
  363. #
  364. ofs = correct_esp_by
  365. if withfloats:
  366. for reg in xmm_reg_mgr_cls.save_around_call_regs:
  367. ofs -= 8
  368. mc.MOVSD_sx(ofs, reg.value)
  369. for reg in gpr_reg_mgr_cls.save_around_call_regs:
  370. ofs -= WORD
  371. mc.MOV_sr(ofs, reg.value)
  372. #
  373. if IS_X86_32:
  374. mc.MOV_rs(eax.value, (frame_size - 1) * WORD)
  375. mc.MOV_sr(0, eax.value)
  376. elif IS_X86_64:
  377. mc.MOV_rs(edi.value, (frame_size - 1) * WORD)
  378. mc.CALL(imm(func))
  379. #
  380. if withcards:
  381. # A final TEST8 before the RET, for the caller. Careful to
  382. # not follow this instruction with another one that changes
  383. # the status of the CPU flags!
  384. mc.MOV_rs(eax.value, (frame_size - 1) * WORD)
  385. mc.TEST8(addr_add_const(eax, descr.jit_wb_if_flag_byteofs),
  386. imm(-0x80))
  387. #
  388. ofs = correct_esp_by
  389. if withfloats:
  390. for reg in xmm_reg_mgr_cls.save_around_call_regs:
  391. ofs -= 8
  392. mc.MOVSD_xs(reg.value, ofs)
  393. for reg in gpr_reg_mgr_cls.save_around_call_regs:
  394. ofs -= WORD
  395. mc.MOV_rs(reg.value, ofs)
  396. #
  397. # ADD esp, correct_esp_by --- but cannot use ADD, because
  398. # of its effects on the CPU flags
  399. mc.LEA_rs(esp.value, correct_esp_by)
  400. mc.RET16_i(WORD)
  401. #
  402. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  403. self.wb_slowpath[withcards + 2 * withfloats] = rawstart
  404. @staticmethod
  405. @rgc.no_collect
  406. def _release_gil_asmgcc(css):
  407. # similar to trackgcroot.py:pypy_asm_stackwalk, first part
  408. from rpython.rtyper.memory.gctransform import asmgcroot
  409. new = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
  410. next = asmgcroot.gcrootanchor.next
  411. new.next = next
  412. new.prev = asmgcroot.gcrootanchor
  413. asmgcroot.gcrootanchor.next = new
  414. next.prev = new
  415. # and now release the GIL
  416. before = rffi.aroundstate.before
  417. if before:
  418. before()
  419. @staticmethod
  420. @rgc.no_collect
  421. def _reacquire_gil_asmgcc(css):
  422. # first reacquire the GIL
  423. after = rffi.aroundstate.after
  424. if after:
  425. after()
  426. # similar to trackgcroot.py:pypy_asm_stackwalk, second part
  427. from rpython.rtyper.memory.gctransform import asmgcroot
  428. old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
  429. prev = old.prev
  430. next = old.next
  431. prev.next = next
  432. next.prev = prev
  433. @staticmethod
  434. @rgc.no_collect
  435. def _release_gil_shadowstack():
  436. before = rffi.aroundstate.before
  437. if before:
  438. before()
  439. @staticmethod
  440. @rgc.no_collect
  441. def _reacquire_gil_shadowstack():
  442. after = rffi.aroundstate.after
  443. if after:
  444. after()
  445. _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
  446. _CLOSESTACK_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
  447. lltype.Void))
  448. def _build_release_gil(self, gcrootmap):
  449. if gcrootmap.is_shadow_stack:
  450. releasegil_func = llhelper(self._NOARG_FUNC,
  451. self._release_gil_shadowstack)
  452. reacqgil_func = llhelper(self._NOARG_FUNC,
  453. self._reacquire_gil_shadowstack)
  454. else:
  455. releasegil_func = llhelper(self._CLOSESTACK_FUNC,
  456. self._release_gil_asmgcc)
  457. reacqgil_func = llhelper(self._CLOSESTACK_FUNC,
  458. self._reacquire_gil_asmgcc)
  459. self.releasegil_addr = self.cpu.cast_ptr_to_int(releasegil_func)
  460. self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
  461. def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
  462. '''adds the following attributes to looptoken:
  463. _x86_function_addr (address of the generated func, as an int)
  464. _x86_loop_code (debug: addr of the start of the ResOps)
  465. _x86_fullsize (debug: full size including failure)
  466. _x86_debug_checksum
  467. '''
  468. # XXX this function is too longish and contains some code
  469. # duplication with assemble_bridge(). Also, we should think
  470. # about not storing on 'self' attributes that will live only
  471. # for the duration of compiling one loop or a one bridge.
  472. clt = CompiledLoopToken(self.cpu, looptoken.number)
  473. clt.allgcrefs = []
  474. looptoken.compiled_loop_token = clt
  475. if not we_are_translated():
  476. # Arguments should be unique
  477. assert len(set(inputargs)) == len(inputargs)
  478. self.setup(looptoken)
  479. if log:
  480. operations = self._inject_debugging_code(looptoken, operations,
  481. 'e', looptoken.number)
  482. regalloc = RegAlloc(self, self.cpu.translate_support_code)
  483. #
  484. self._call_header_with_stack_check()
  485. stackadjustpos = self._patchable_stackadjust()
  486. clt._debug_nbargs = len(inputargs)
  487. operations = regalloc.prepare_loop(inputargs, operations,
  488. looptoken, clt.allgcrefs)
  489. looppos = self.mc.get_relative_pos()
  490. looptoken._x86_loop_code = looppos
  491. clt.frame_depth = -1 # temporarily
  492. frame_depth = self._assemble(regalloc, operations)
  493. clt.frame_depth = frame_depth
  494. #
  495. size_excluding_failure_stuff = self.mc.get_relative_pos()
  496. self.write_pending_failure_recoveries()
  497. full_size = self.mc.get_relative_pos()
  498. #
  499. rawstart = self.materialize_loop(looptoken)
  500. debug_start("jit-backend-addr")
  501. debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
  502. looptoken.number, loopname,
  503. rawstart + looppos,
  504. rawstart + size_excluding_failure_stuff,
  505. rawstart))
  506. debug_stop("jit-backend-addr")
  507. self._patch_stackadjust(rawstart + stackadjustpos, frame_depth)
  508. self.patch_pending_failure_recoveries(rawstart)
  509. #
  510. ops_offset = self.mc.ops_offset
  511. if not we_are_translated():
  512. # used only by looptoken.dump() -- useful in tests
  513. looptoken._x86_rawstart = rawstart
  514. looptoken._x86_fullsize = full_size
  515. looptoken._x86_ops_offset = ops_offset
  516. looptoken._x86_function_addr = rawstart
  517. self.fixup_target_tokens(rawstart)
  518. self.teardown()
  519. # oprofile support
  520. if self.cpu.profile_agent is not None:
  521. name = "Loop # %s: %s" % (looptoken.number, loopname)
  522. self.cpu.profile_agent.native_code_written(name,
  523. rawstart, full_size)
  524. return AsmInfo(ops_offset, rawstart + looppos,
  525. size_excluding_failure_stuff - looppos)
  526. def assemble_bridge(self, faildescr, inputargs, operations,
  527. original_loop_token, log):
  528. if not we_are_translated():
  529. # Arguments should be unique
  530. assert len(set(inputargs)) == len(inputargs)
  531. descr_number = self.cpu.get_fail_descr_number(faildescr)
  532. failure_recovery = self._find_failure_recovery_bytecode(faildescr)
  533. self.setup(original_loop_token)
  534. if log:
  535. operations = self._inject_debugging_code(faildescr, operations,
  536. 'b', descr_number)
  537. arglocs = self.rebuild_faillocs_from_descr(failure_recovery)
  538. if not we_are_translated():
  539. assert ([loc.assembler() for loc in arglocs] ==
  540. [loc.assembler() for loc in faildescr._x86_debug_faillocs])
  541. regalloc = RegAlloc(self, self.cpu.translate_support_code)
  542. startpos = self.mc.get_relative_pos()
  543. operations = regalloc.prepare_bridge(inputargs, arglocs,
  544. operations,
  545. self.current_clt.allgcrefs)
  546. stackadjustpos = self._patchable_stackadjust()
  547. frame_depth = self._assemble(regalloc, operations)
  548. codeendpos = self.mc.get_relative_pos()
  549. self.write_pending_failure_recoveries()
  550. fullsize = self.mc.get_relative_pos()
  551. #
  552. rawstart = self.materialize_loop(original_loop_token)
  553. debug_start("jit-backend-addr")
  554. debug_print("bridge out of Guard %d has address %x to %x" %
  555. (descr_number, rawstart, rawstart + codeendpos))
  556. debug_stop("jit-backend-addr")
  557. self._patch_stackadjust(rawstart + stackadjustpos, frame_depth)
  558. self.patch_pending_failure_recoveries(rawstart)
  559. if not we_are_translated():
  560. # for the benefit of tests
  561. faildescr._x86_bridge_frame_depth = frame_depth
  562. # patch the jump from original guard
  563. self.patch_jump_for_descr(faildescr, rawstart)
  564. ops_offset = self.mc.ops_offset
  565. self.fixup_target_tokens(rawstart)
  566. self.current_clt.frame_depth = max(self.current_clt.frame_depth, frame_depth)
  567. self.teardown()
  568. # oprofile support
  569. if self.cpu.profile_agent is not None:
  570. name = "Bridge # %s" % (descr_number,)
  571. self.cpu.profile_agent.native_code_written(name,
  572. rawstart, fullsize)
  573. return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
  574. def write_pending_failure_recoveries(self):
  575. # for each pending guard, generate the code of the recovery stub
  576. # at the end of self.mc.
  577. for tok in self.pending_guard_tokens:
  578. tok.pos_recovery_stub = self.generate_quick_failure(tok)
  579. if WORD == 8 and len(self.pending_memoryerror_trampoline_from) > 0:
  580. self.error_trampoline_64 = self.generate_propagate_error_64()
  581. def patch_pending_failure_recoveries(self, rawstart):
  582. # after we wrote the assembler to raw memory, set up
  583. # tok.faildescr._x86_adr_jump_offset to contain the raw address of
  584. # the 4-byte target field in the JMP/Jcond instruction, and patch
  585. # the field in question to point (initially) to the recovery stub
  586. clt = self.current_clt
  587. for tok in self.pending_guard_tokens:
  588. addr = rawstart + tok.pos_jump_offset
  589. tok.faildescr._x86_adr_jump_offset = addr
  590. relative_target = tok.pos_recovery_stub - (tok.pos_jump_offset + 4)
  591. assert rx86.fits_in_32bits(relative_target)
  592. #
  593. if not tok.is_guard_not_invalidated:
  594. mc = codebuf.MachineCodeBlockWrapper()
  595. mc.writeimm32(relative_target)
  596. mc.copy_to_raw_memory(addr)
  597. else:
  598. # GUARD_NOT_INVALIDATED, record an entry in
  599. # clt.invalidate_positions of the form:
  600. # (addr-in-the-code-of-the-not-yet-written-jump-target,
  601. # relative-target-to-use)
  602. relpos = tok.pos_jump_offset
  603. clt.invalidate_positions.append((rawstart + relpos,
  604. relative_target))
  605. # General idea: Although no code was generated by this
  606. # guard, the code might be patched with a "JMP rel32" to
  607. # the guard recovery code. This recovery code is
  608. # already generated, and looks like the recovery code
  609. # for any guard, even if at first it has no jump to it.
  610. # So we may later write 5 bytes overriding the existing
  611. # instructions; this works because a CALL instruction
  612. # would also take at least 5 bytes. If it could take
  613. # less, we would run into the issue that overwriting the
  614. # 5 bytes here might get a few nonsense bytes at the
  615. # return address of the following CALL.
  616. if WORD == 8:
  617. for pos_after_jz in self.pending_memoryerror_trampoline_from:
  618. assert self.error_trampoline_64 != 0 # only if non-empty
  619. mc = codebuf.MachineCodeBlockWrapper()
  620. mc.writeimm32(self.error_trampoline_64 - pos_after_jz)
  621. mc.copy_to_raw_memory(rawstart + pos_after_jz - 4)
  622. def get_asmmemmgr_blocks(self, looptoken):
  623. clt = looptoken.compiled_loop_token
  624. if clt.asmmemmgr_blocks is None:
  625. clt.asmmemmgr_blocks = []
  626. return clt.asmmemmgr_blocks
  627. def materialize_loop(self, looptoken):
  628. self.datablockwrapper.done() # finish using cpu.asmmemmgr
  629. self.datablockwrapper = None
  630. allblocks = self.get_asmmemmgr_blocks(looptoken)
  631. return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
  632. self.cpu.gc_ll_descr.gcrootmap)
  633. def _register_counter(self, tp, number, token):
  634. # YYY very minor leak -- we need the counters to stay alive
  635. # forever, just because we want to report them at the end
  636. # of the process
  637. struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
  638. track_allocation=False)
  639. struct.i = 0
  640. struct.type = tp
  641. if tp == 'b' or tp == 'e':
  642. struct.number = number
  643. else:
  644. assert token
  645. struct.number = compute_unique_id(token)
  646. self.loop_run_counters.append(struct)
  647. return struct
  648. def _find_failure_recovery_bytecode(self, faildescr):
  649. adr_jump_offset = faildescr._x86_adr_jump_offset
  650. if adr_jump_offset == 0:
  651. # This case should be prevented by the logic in compile.py:
  652. # look for CNT_BUSY_FLAG, which disables tracing from a guard
  653. # when another tracing from the same guard is already in progress.
  654. raise BridgeAlreadyCompiled
  655. # follow the JMP/Jcond
  656. p = rffi.cast(rffi.INTP, adr_jump_offset)
  657. adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
  658. # skip the CALL
  659. if WORD == 4:
  660. adr_target += 5 # CALL imm
  661. else:
  662. adr_target += 13 # MOV r11, imm-as-8-bytes; CALL *r11 xxxxxxxxxx
  663. return adr_target
  664. def patch_jump_for_descr(self, faildescr, adr_new_target):
  665. adr_jump_offset = faildescr._x86_adr_jump_offset
  666. assert adr_jump_offset != 0
  667. offset = adr_new_target - (adr_jump_offset + 4)
  668. # If the new target fits within a rel32 of the jump, just patch
  669. # that. Otherwise, leave the original rel32 to the recovery stub in
  670. # place, but clobber the recovery stub with a jump to the real
  671. # target.
  672. mc = codebuf.MachineCodeBlockWrapper()
  673. if rx86.fits_in_32bits(offset):
  674. mc.writeimm32(offset)
  675. mc.copy_to_raw_memory(adr_jump_offset)
  676. else:
  677. # "mov r11, addr; jmp r11" is up to 13 bytes, which fits in there
  678. # because we always write "mov r11, imm-as-8-bytes; call *r11" in
  679. # the first place.
  680. mc.MOV_ri(X86_64_SCRATCH_REG.value, adr_new_target)
  681. mc.JMP_r(X86_64_SCRATCH_REG.value)
  682. p = rffi.cast(rffi.INTP, adr_jump_offset)
  683. adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
  684. mc.copy_to_raw_memory(adr_target)
  685. faildescr._x86_adr_jump_offset = 0 # means "patched"
  686. def fixup_target_tokens(self, rawstart):
  687. for targettoken in self.target_tokens_currently_compiling:
  688. targettoken._x86_loop_code += rawstart
  689. self.target_tokens_currently_compiling = None
  690. def _append_debugging_code(self, operations, tp, number, token):
  691. counter = self._register_counter(tp, number, token)
  692. c_adr = ConstInt(rffi.cast(lltype.Signed, counter))
  693. box = BoxInt()
  694. box2 = BoxInt()
  695. ops = [ResOperation(rop.GETFIELD_RAW, [c_adr],
  696. box, descr=self.debug_counter_descr),
  697. ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
  698. ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
  699. None, descr=self.debug_counter_descr)]
  700. operations.extend(ops)
  701. @specialize.argtype(1)
  702. def _inject_debugging_code(self, looptoken, operations, tp, number):
  703. if self._debug:
  704. s = 0
  705. for op in operations:
  706. s += op.getopnum()
  707. looptoken._x86_debug_checksum = s
  708. newoperations = []
  709. self._append_debugging_code(newoperations, tp, number,
  710. None)
  711. for op in operations:
  712. newoperations.append(op)
  713. if op.getopnum() == rop.LABEL:
  714. self._append_debugging_code(newoperations, 'l', number,
  715. op.getdescr())
  716. operations = newoperations
  717. return operations
  718. def _assemble(self, regalloc, operations):
  719. self._regalloc = regalloc
  720. regalloc.compute_hint_frame_locations(operations)
  721. regalloc.walk_operations(operations)
  722. if we_are_translated() or self.cpu.dont_keepalive_stuff:
  723. self._regalloc = None # else keep it around for debugging
  724. frame_depth = regalloc.get_final_frame_depth()
  725. jump_target_descr = regalloc.jump_target_descr
  726. if jump_target_descr is not None:
  727. target_frame_depth = jump_target_descr._x86_clt.frame_depth
  728. frame_depth = max(frame_depth, target_frame_depth)
  729. return frame_depth
  730. def _patchable_stackadjust(self):
  731. # stack adjustment LEA
  732. self.mc.LEA32_rb(esp.value, 0)
  733. return self.mc.get_relative_pos() - 4
  734. def _patch_stackadjust(self, adr_lea, allocated_depth):
  735. # patch stack adjustment LEA
  736. mc = codebuf.MachineCodeBlockWrapper()
  737. # Compute the correct offset for the instruction LEA ESP, [EBP-4*words]
  738. mc.writeimm32(self._get_offset_of_ebp_from_esp(allocated_depth))
  739. mc.copy_to_raw_memory(adr_lea)
  740. def _get_offset_of_ebp_from_esp(self, allocated_depth):
  741. # Given that [EBP] is where we saved EBP, i.e. in the last word
  742. # of our fixed frame, then the 'words' value is:
  743. words = (FRAME_FIXED_SIZE - 1) + allocated_depth
  744. # align, e.g. for Mac OS X
  745. aligned_words = align_stack_words(words+2)-2 # 2 = EIP+EBP
  746. return -WORD * aligned_words
  747. def _call_header(self):
  748. # NB. the shape of the frame is hard-coded in get_basic_shape() too.
  749. # Also, make sure this is consistent with FRAME_FIXED_SIZE.
  750. self.mc.PUSH_r(ebp.value)
  751. self.mc.MOV_rr(ebp.value, esp.value)
  752. for loc in self.cpu.CALLEE_SAVE_REGISTERS:
  753. self.mc.PUSH_r(loc.value)
  754. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  755. if gcrootmap and gcrootmap.is_shadow_stack:
  756. self._call_header_shadowstack(gcrootmap)
  757. def _call_header_with_stack_check(self):
  758. if self.stack_check_slowpath == 0:
  759. pass # no stack check (e.g. not translated)
  760. else:
  761. endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
  762. self.mc.MOV(eax, heap(endaddr)) # MOV eax, [start]
  763. self.mc.SUB(eax, esp) # SUB eax, current
  764. self.mc.CMP(eax, heap(lengthaddr)) # CMP eax, [length]
  765. self.mc.J_il8(rx86.Conditions['BE'], 0) # JBE .skip
  766. jb_location = self.mc.get_relative_pos()
  767. self.mc.CALL(imm(self.stack_check_slowpath))# CALL slowpath
  768. # patch the JB above # .skip:
  769. offset = self.mc.get_relative_pos() - jb_location
  770. assert 0 < offset <= 127
  771. self.mc.overwrite(jb_location-1, chr(offset))
  772. #
  773. self._call_header()
  774. def _call_footer(self):
  775. self.mc.LEA_rb(esp.value, -len(self.cpu.CALLEE_SAVE_REGISTERS) * WORD)
  776. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  777. if gcrootmap and gcrootmap.is_shadow_stack:
  778. self._call_footer_shadowstack(gcrootmap)
  779. for i in range(len(self.cpu.CALLEE_SAVE_REGISTERS)-1, -1, -1):
  780. self.mc.POP_r(self.cpu.CALLEE_SAVE_REGISTERS[i].value)
  781. self.mc.POP_r(ebp.value)
  782. self.mc.RET()
  783. def _call_header_shadowstack(self, gcrootmap):
  784. # we need to put two words into the shadowstack: the MARKER_FRAME
  785. # and the address of the frame (ebp, actually)
  786. rst = gcrootmap.get_root_stack_top_addr()
  787. if rx86.fits_in_32bits(rst):
  788. self.mc.MOV_rj(eax.value, rst) # MOV eax, [rootstacktop]
  789. else:
  790. self.mc.MOV_ri(r13.value, rst) # MOV r13, rootstacktop
  791. self.mc.MOV_rm(eax.value, (r13.value, 0)) # MOV eax, [r13]
  792. #
  793. MARKER = gcrootmap.MARKER_FRAME
  794. self.mc.LEA_rm(ebx.value, (eax.value, 2*WORD)) # LEA ebx, [eax+2*WORD]
  795. self.mc.MOV_mi((eax.value, WORD), MARKER) # MOV [eax+WORD], MARKER
  796. self.mc.MOV_mr((eax.value, 0), ebp.value) # MOV [eax], ebp
  797. #
  798. if rx86.fits_in_32bits(rst):
  799. self.mc.MOV_jr(rst, ebx.value) # MOV [rootstacktop], ebx
  800. else:
  801. self.mc.MOV_mr((r13.value, 0), ebx.value) # MOV [r13], ebx
  802. def _call_footer_shadowstack(self, gcrootmap):
  803. rst = gcrootmap.get_root_stack_top_addr()
  804. if rx86.fits_in_32bits(rst):
  805. self.mc.SUB_ji8(rst, 2*WORD) # SUB [rootstacktop], 2*WORD
  806. else:
  807. self.mc.MOV_ri(ebx.value, rst) # MOV ebx, rootstacktop
  808. self.mc.SUB_mi8((ebx.value, 0), 2*WORD) # SUB [ebx], 2*WORD
  809. def redirect_call_assembler(self, oldlooptoken, newlooptoken):
  810. # some minimal sanity checking
  811. old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs
  812. new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs
  813. assert old_nbargs == new_nbargs
  814. # we overwrite the instructions at the old _x86_direct_bootstrap_code
  815. # to start with a JMP to the new _x86_direct_bootstrap_code.
  816. # Ideally we should rather patch all existing CALLs, but well.
  817. oldadr = oldlooptoken._x86_function_addr
  818. target = newlooptoken._x86_function_addr
  819. mc = codebuf.MachineCodeBlockWrapper()
  820. mc.JMP(imm(target))
  821. if WORD == 4: # keep in sync with prepare_loop()
  822. assert mc.get_relative_pos() == 5
  823. else:
  824. assert mc.get_relative_pos() <= 13
  825. mc.copy_to_raw_memory(oldadr)
  826. def dump(self, text):
  827. if not self.verbose:
  828. return
  829. _prev = Box._extended_display
  830. try:
  831. Box._extended_display = False
  832. pos = self.mc.get_relative_pos()
  833. print >> sys.stderr, ' 0x%x %s' % (pos, text)
  834. finally:
  835. Box._extended_display = _prev
  836. # ------------------------------------------------------------
  837. def mov(self, from_loc, to_loc):
  838. if (isinstance(from_loc, RegLoc) and from_loc.is_xmm) or (isinstance(to_loc, RegLoc) and to_loc.is_xmm):
  839. self.mc.MOVSD(to_loc, from_loc)
  840. else:
  841. assert to_loc is not ebp
  842. self.mc.MOV(to_loc, from_loc)
  843. regalloc_mov = mov # legacy interface
  844. def regalloc_push(self, loc):
  845. if isinstance(loc, RegLoc) and loc.is_xmm:
  846. self.mc.SUB_ri(esp.value, 8) # = size of doubles
  847. self.mc.MOVSD_sx(0, loc.value)
  848. elif WORD == 4 and isinstance(loc, StackLoc) and loc.get_width() == 8:
  849. # XXX evil trick
  850. self.mc.PUSH_b(loc.value + 4)
  851. self.mc.PUSH_b(loc.value)
  852. else:
  853. self.mc.PUSH(loc)
  854. def regalloc_pop(self, loc):
  855. if isinstance(loc, RegLoc) and loc.is_xmm:
  856. self.mc.MOVSD_xs(loc.value, 0)
  857. self.mc.ADD_ri(esp.value, 8) # = size of doubles
  858. elif WORD == 4 and isinstance(loc, StackLoc) and loc.get_width() == 8:
  859. # XXX evil trick
  860. self.mc.POP_b(loc.value)
  861. self.mc.POP_b(loc.value + 4)
  862. else:
  863. self.mc.POP(loc)
  864. def regalloc_immedmem2mem(self, from_loc, to_loc):
  865. # move a ConstFloatLoc directly to a StackLoc, as two MOVs
  866. # (even on x86-64, because the immediates are encoded as 32 bits)
  867. assert isinstance(from_loc, ConstFloatLoc)
  868. assert isinstance(to_loc, StackLoc)
  869. low_part = rffi.cast(rffi.CArrayPtr(rffi.INT), from_loc.value)[0]
  870. high_part = rffi.cast(rffi.CArrayPtr(rffi.INT), from_loc.value)[1]
  871. low_part = intmask(low_part)
  872. high_part = intmask(high_part)
  873. self.mc.MOV32_bi(to_loc.value, low_part)
  874. self.mc.MOV32_bi(to_loc.value + 4, high_part)
  875. def regalloc_perform(self, op, arglocs, resloc):
  876. genop_list[op.getopnum()](self, op, arglocs, resloc)
  877. def regalloc_perform_discard(self, op, arglocs):
  878. genop_discard_list[op.getopnum()](self, op, arglocs)
  879. def regalloc_perform_llong(self, op, arglocs, resloc):
  880. effectinfo = op.getdescr().get_extra_info()
  881. oopspecindex = effectinfo.oopspecindex
  882. genop_llong_list[oopspecindex](self, op, arglocs, resloc)
  883. def regalloc_perform_math(self, op, arglocs, resloc):
  884. effectinfo = op.getdescr().get_extra_info()
  885. oopspecindex = effectinfo.oopspecindex
  886. genop_math_list[oopspecindex](self, op, arglocs, resloc)
  887. def regalloc_perform_with_guard(self, op, guard_op, faillocs,
  888. arglocs, resloc):
  889. faildescr = guard_op.getdescr()
  890. assert isinstance(faildescr, AbstractFailDescr)
  891. failargs = guard_op.getfailargs()
  892. guard_opnum = guard_op.getopnum()
  893. guard_token = self.implement_guard_recovery(guard_opnum,
  894. faildescr, failargs,
  895. faillocs)
  896. if op is None:
  897. dispatch_opnum = guard_opnum
  898. else:
  899. dispatch_opnum = op.getopnum()
  900. genop_guard_list[dispatch_opnum](self, op, guard_op, guard_token,
  901. arglocs, resloc)
  902. if not we_are_translated():
  903. # must be added by the genop_guard_list[]()
  904. assert guard_token is self.pending_guard_tokens[-1]
  905. def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc):
  906. self.regalloc_perform_with_guard(None, guard_op, faillocs, arglocs,
  907. resloc)
  908. def load_effective_addr(self, sizereg, baseofs, scale, result, frm=imm0):
  909. self.mc.LEA(result, addr_add(frm, sizereg, baseofs, scale))
  910. def _unaryop(asmop):
  911. def genop_unary(self, op, arglocs, resloc):
  912. getattr(self.mc, asmop)(arglocs[0])
  913. return genop_unary
  914. def _binaryop(asmop, can_swap=False):
  915. def genop_binary(self, op, arglocs, result_loc):
  916. getattr(self.mc, asmop)(arglocs[0], arglocs[1])
  917. return genop_binary
  918. def _binaryop_or_lea(asmop, is_add):
  919. def genop_binary_or_lea(self, op, arglocs, result_loc):
  920. # use a regular ADD or SUB if result_loc is arglocs[0],
  921. # and a LEA only if different.
  922. if result_loc is arglocs[0]:
  923. getattr(self.mc, asmop)(arglocs[0], arglocs[1])
  924. else:
  925. loc = arglocs[0]
  926. argloc = arglocs[1]
  927. assert isinstance(loc, RegLoc)
  928. assert isinstance(argloc, ImmedLoc)
  929. assert isinstance(result_loc, RegLoc)
  930. delta = argloc.value
  931. if not is_add: # subtraction
  932. delta = -delta
  933. self.mc.LEA_rm(result_loc.value, (loc.value, delta))
  934. return genop_binary_or_lea
  935. def _cmpop(cond, rev_cond):
  936. def genop_cmp(self, op, arglocs, result_loc):
  937. rl = result_loc.lowest8bits()
  938. if isinstance(op.getarg(0), Const):
  939. self.mc.CMP(arglocs[1], arglocs[0])
  940. self.mc.SET_ir(rx86.Conditions[rev_cond], rl.value)
  941. else:
  942. self.mc.CMP(arglocs[0], arglocs[1])
  943. self.mc.SET_ir(rx86.Conditions[cond], rl.value)
  944. self.mc.MOVZX8_rr(result_loc.value, rl.value)
  945. return genop_cmp
  946. def _cmpop_float(cond, rev_cond, is_ne=False):
  947. def genop_cmp(self, op, arglocs, result_loc):
  948. if isinstance(arglocs[0], RegLoc):
  949. self.mc.UCOMISD(arglocs[0], arglocs[1])
  950. checkcond = cond
  951. else:
  952. self.mc.UCOMISD(arglocs[1], arglocs[0])
  953. checkcond = rev_cond
  954. tmp1 = result_loc.lowest8bits()
  955. if IS_X86_32:
  956. tmp2 = result_loc.higher8bits()
  957. elif IS_X86_64:
  958. tmp2 = X86_64_SCRATCH_REG.lowest8bits()
  959. self.mc.SET_ir(rx86.Conditions[checkcond], tmp1.value)
  960. if is_ne:
  961. self.mc.SET_ir(rx86.Conditions['P'], tmp2.value)
  962. self.mc.OR8_rr(tmp1.value, tmp2.value)
  963. else:
  964. self.mc.SET_ir(rx86.Conditions['NP'], tmp2.value)
  965. self.mc.AND8_rr(tmp1.value, tmp2.value)
  966. self.mc.MOVZX8_rr(result_loc.value, tmp1.value)
  967. return genop_cmp
  968. def _cmpop_guard(cond, rev_cond, false_cond, false_rev_cond):
  969. def genop_cmp_guard(self, op, guard_op, guard_token, arglocs, result_loc):
  970. guard_opnum = guard_op.getopnum()
  971. if isinstance(op.getarg(0), Const):
  972. self.mc.CMP(arglocs[1], arglocs[0])
  973. if guard_opnum == rop.GUARD_FALSE:
  974. self.implement_guard(guard_token, rev_cond)
  975. else:
  976. self.implement_guard(guard_token, false_rev_cond)
  977. else:
  978. self.mc.CMP(arglocs[0], arglocs[1])
  979. if guard_opnum == rop.GUARD_FALSE:
  980. self.implement_guard(guard_token, cond)
  981. else:
  982. self.implement_guard(guard_token, false_cond)
  983. return genop_cmp_guard
  984. def _cmpop_guard_float(cond, rev_cond, false_cond, false_rev_cond):
  985. need_direct_jp = 'A' not in cond
  986. need_rev_jp = 'A' not in rev_cond
  987. def genop_cmp_guard_float(self, op, guard_op, guard_token, arglocs,
  988. result_loc):
  989. guard_opnum = guard_op.getopnum()
  990. if isinstance(arglocs[0], RegLoc):
  991. self.mc.UCOMISD(arglocs[0], arglocs[1])
  992. checkcond = cond
  993. checkfalsecond = false_cond
  994. need_jp = need_direct_jp
  995. else:
  996. self.mc.UCOMISD(arglocs[1], arglocs[0])
  997. checkcond = rev_cond
  998. checkfalsecond = false_rev_cond
  999. need_jp = need_rev_jp
  1000. if guard_opnum == rop.GUARD_FALSE:
  1001. if need_jp:
  1002. self.mc.J_il8(rx86.Conditions['P'], 6)
  1003. self.implement_guard(guard_token, checkcond)
  1004. else:
  1005. if need_jp:
  1006. self.mc.J_il8(rx86.Conditions['P'], 2)
  1007. self.mc.J_il8(rx86.Conditions[checkcond], 5)
  1008. self.implement_guard(guard_token)
  1009. else:
  1010. self.implement_guard(guard_token, checkfalsecond)
  1011. return genop_cmp_guard_float
  1012. def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax,
  1013. argtypes=None, callconv=FFI_DEFAULT_ABI):
  1014. if IS_X86_64:
  1015. return self._emit_call_64(force_index, x, arglocs, start, argtypes)
  1016. p = 0
  1017. n = len(arglocs)
  1018. for i in range(start, n):
  1019. loc = arglocs[i]
  1020. if isinstance(loc, RegLoc):
  1021. if loc.is_xmm:
  1022. self.mc.MOVSD_sx(p, loc.value)
  1023. else:
  1024. self.mc.MOV_sr(p, loc.value)
  1025. p += loc.get_width()
  1026. p = 0
  1027. for i in range(start, n):
  1028. loc = arglocs[i]
  1029. if not isinstance(loc, RegLoc):
  1030. if loc.get_width() == 8:
  1031. self.mc.MOVSD(xmm0, loc)
  1032. self.mc.MOVSD_sx(p, xmm0.value)
  1033. else:
  1034. self.mc.MOV(tmp, loc)
  1035. self.mc.MOV_sr(p, tmp.value)
  1036. p += loc.get_width()
  1037. # x is a location
  1038. self.mc.CALL(x)
  1039. self.mark_gc_roots(force_index)
  1040. #
  1041. if callconv != FFI_DEFAULT_ABI:
  1042. self._fix_stdcall(callconv, p)
  1043. #
  1044. self._regalloc.needed_extra_stack_locations(p//WORD)
  1045. def _fix_stdcall(self, callconv, p):
  1046. from rpython.rlib.clibffi import FFI_STDCALL
  1047. assert callconv == FFI_STDCALL
  1048. # it's a bit stupid, but we're just going to cancel the fact that
  1049. # the called function just added 'p' to ESP, by subtracting it again.
  1050. self.mc.SUB_ri(esp.value, p)
  1051. def _emit_call_64(self, force_index, x, arglocs, start, argtypes):
  1052. src_locs = []
  1053. dst_locs = []
  1054. xmm_src_locs = []
  1055. xmm_dst_locs = []
  1056. pass_on_stack = []
  1057. singlefloats = None
  1058. # In reverse order for use with pop()
  1059. unused_gpr = [r9, r8, ecx, edx, esi, edi]
  1060. unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
  1061. for i in range(start, len(arglocs)):
  1062. loc = arglocs[i]
  1063. # XXX: Should be much simplier to tell whether a location is a
  1064. # float! It's so ugly because we have to "guard" the access to
  1065. # .type with isinstance, since not all AssemblerLocation classes
  1066. # are "typed"
  1067. if ((isinstance(loc, RegLoc) and loc.is_xmm) or
  1068. (isinstance(loc, StackLoc) and loc.type == FLOAT) or
  1069. (isinstance(loc, ConstFloatLoc))):
  1070. if len(unused_xmm) > 0:
  1071. xmm_src_locs.append(loc)
  1072. xmm_dst_locs.append(unused_xmm.pop())
  1073. else:
  1074. pass_on_stack.append(loc)
  1075. elif argtypes is not None and argtypes[i-start] == 'S':
  1076. # Singlefloat argument
  1077. if len(unused_xmm) > 0:
  1078. if singlefloats is None: singlefloats = []
  1079. singlefloats.append((loc, unused_xmm.pop()))
  1080. else:
  1081. pass_on_stack.append(loc)
  1082. else:
  1083. if len(unused_gpr) > 0:
  1084. src_locs.append(loc)
  1085. dst_locs.append(unused_gpr.pop())
  1086. else:
  1087. pass_on_stack.append(loc)
  1088. # Emit instructions to pass the stack arguments
  1089. # XXX: Would be nice to let remap_frame_layout take care of this, but
  1090. # we'd need to create something like StackLoc, but relative to esp,
  1091. # and I don't know if it's worth it.
  1092. for i in range(len(pass_on_stack)):
  1093. loc = pass_on_stack[i]
  1094. if not isinstance(loc, RegLoc):
  1095. if isinstance(loc, StackLoc) and loc.type == FLOAT:
  1096. self.mc.MOVSD(X86_64_XMM_SCRATCH_REG, loc)
  1097. self.mc.MOVSD_sx(i*WORD, X86_64_XMM_SCRATCH_REG.value)
  1098. else:
  1099. self.mc.MOV(X86_64_SCRATCH_REG, loc)
  1100. self.mc.MOV_sr(i*WORD, X86_64_SCRATCH_REG.value)
  1101. else:
  1102. # It's a register
  1103. if loc.is_xmm:
  1104. self.mc.MOVSD_sx(i*WORD, loc.value)
  1105. else:
  1106. self.mc.MOV_sr(i*WORD, loc.value)
  1107. # Handle register arguments: first remap the xmm arguments
  1108. remap_frame_layout(self, xmm_src_locs, xmm_dst_locs,
  1109. X86_64_XMM_SCRATCH_REG)
  1110. # Load the singlefloat arguments from main regs or stack to xmm regs
  1111. if singlefloats is not None:
  1112. for src, dst in singlefloats:
  1113. if isinstance(src, ImmedLoc):
  1114. self.mc.MOV(X86_64_SCRATCH_REG, src)
  1115. src = X86_64_SCRATCH_REG
  1116. self.mc.MOVD(dst, src)
  1117. # Finally remap the arguments in the main regs
  1118. # If x is a register and is in dst_locs, then oups, it needs to
  1119. # be moved away:
  1120. if x in dst_locs:
  1121. src_locs.append(x)
  1122. dst_locs.append(r10)
  1123. x = r10
  1124. remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
  1125. self.mc.CALL(x)
  1126. self.mark_gc_roots(force_index)
  1127. self._regalloc.needed_extra_stack_locations(len(pass_on_stack))
  1128. def call(self, addr, args, res):
  1129. force_index = self.write_new_force_index()
  1130. self._emit_call(force_index, imm(addr), args)
  1131. assert res is eax
  1132. def write_new_force_index(self):
  1133. # for shadowstack only: get a new, unused force_index number and
  1134. # write it to FORCE_INDEX_OFS. Used to record the call shape
  1135. # (i.e. where the GC pointers are in the stack) around a CALL
  1136. # instruction that doesn't already have a force_index.
  1137. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  1138. if gcrootmap and gcrootmap.is_shadow_stack:
  1139. clt = self.current_clt
  1140. force_index = clt.reserve_and_record_some_faildescr_index()
  1141. self.mc.MOV_bi(FORCE_INDEX_OFS, force_index)
  1142. return force_index
  1143. else:
  1144. # the return value is ignored, apart from the fact that it
  1145. # is not negative.
  1146. return 0
  1147. genop_int_neg = _unaryop("NEG")
  1148. genop_int_invert = _unaryop("NOT")
  1149. genop_int_add = _binaryop_or_lea("ADD", True)
  1150. genop_int_sub = _binaryop_or_lea("SUB", False)
  1151. genop_int_mul = _binaryop("IMUL", True)
  1152. genop_int_and = _binaryop("AND", True)
  1153. genop_int_or = _binaryop("OR", True)
  1154. genop_int_xor = _binaryop("XOR", True)
  1155. genop_int_lshift = _binaryop("SHL")
  1156. genop_int_rshift = _binaryop("SAR")
  1157. genop_uint_rshift = _binaryop("SHR")
  1158. genop_float_add = _binaryop("ADDSD", True)
  1159. genop_float_sub = _binaryop('SUBSD')
  1160. genop_float_mul = _binaryop('MULSD', True)
  1161. genop_float_truediv = _binaryop('DIVSD')
  1162. genop_int_lt = _cmpop("L", "G")
  1163. genop_int_le = _cmpop("LE", "GE")
  1164. genop_int_eq = _cmpop("E", "E")
  1165. genop_int_ne = _cmpop("NE", "NE")
  1166. genop_int_gt = _cmpop("G", "L")
  1167. genop_int_ge = _cmpop("GE", "LE")
  1168. genop_ptr_eq = genop_instance_ptr_eq = genop_int_eq
  1169. genop_ptr_ne = genop_instance_ptr_ne = genop_int_ne
  1170. genop_float_lt = _cmpop_float('B', 'A')
  1171. genop_float_le = _cmpop_float('BE', 'AE')
  1172. genop_float_ne = _cmpop_float('NE', 'NE', is_ne=True)
  1173. genop_float_eq = _cmpop_float('E', 'E')
  1174. genop_float_gt = _cmpop_float('A', 'B')
  1175. genop_float_ge = _cmpop_float('AE', 'BE')
  1176. genop_uint_gt = _cmpop("A", "B")
  1177. genop_uint_lt = _cmpop("B", "A")
  1178. genop_uint_le = _cmpop("BE", "AE")
  1179. genop_uint_ge = _cmpop("AE", "BE")
  1180. genop_guard_int_lt = _cmpop_guard("L", "G", "GE", "LE")
  1181. genop_guard_int_le = _cmpop_guard("LE", "GE", "G", "L")
  1182. genop_guard_int_eq = _cmpop_guard("E", "E", "NE", "NE")
  1183. genop_guard_int_ne = _cmpop_guard("NE", "NE", "E", "E")
  1184. genop_guard_int_gt = _cmpop_guard("G", "L", "LE", "GE")
  1185. genop_guard_int_ge = _cmpop_guard("GE", "LE", "L", "G")
  1186. genop_guard_ptr_eq = genop_guard_instance_ptr_eq = genop_guard_int_eq
  1187. genop_guard_ptr_ne = genop_guard_instance_ptr_ne = genop_guard_int_ne
  1188. genop_guard_uint_gt = _cmpop_guard("A", "B", "BE", "AE")
  1189. genop_guard_uint_lt = _cmpop_guard("B", "A", "AE", "BE")
  1190. genop_guard_uint_le = _cmpop_guard("BE", "AE", "A", "B")
  1191. genop_guard_uint_ge = _cmpop_guard("AE", "BE", "B", "A")
  1192. genop_guard_float_lt = _cmpop_guard_float("B", "A", "AE","BE")
  1193. genop_guard_float_le = _cmpop_guard_float("BE","AE", "A", "B")
  1194. genop_guard_float_eq = _cmpop_guard_float("E", "E", "NE","NE")
  1195. genop_guard_float_gt = _cmpop_guard_float("A", "B", "BE","AE")
  1196. genop_guard_float_ge = _cmpop_guard_float("AE","BE", "B", "A")
  1197. def genop_math_sqrt(self, op, arglocs, resloc):
  1198. self.mc.SQRTSD(arglocs[0], resloc)
  1199. def genop_guard_float_ne(self, op, guard_op, guard_token, arglocs, result_loc):
  1200. guard_opnum = guard_op.getopnum()
  1201. if isinstance(arglocs[0], RegLoc):
  1202. self.mc.UCOMISD(arglocs[0], arglocs[1])
  1203. else:
  1204. self.mc.UCOMISD(arglocs[1], arglocs[0])
  1205. if guard_opnum == rop.GUARD_TRUE:
  1206. self.mc.J_il8(rx86.Conditions['P'], 6)
  1207. self.implement_guard(guard_token, 'E')
  1208. else:
  1209. self.mc.J_il8(rx86.Conditions['P'], 2)
  1210. self.mc.J_il8(rx86.Conditions['E'], 5)
  1211. self.implement_guard(guard_token)
  1212. def genop_float_neg(self, op, arglocs, resloc):
  1213. # Following what gcc does: res = x ^ 0x8000000000000000
  1214. self.mc.XORPD(arglocs[0], heap(self.float_const_neg_addr))
  1215. def genop_float_abs(self, op, arglocs, resloc):
  1216. # Following what gcc does: res = x & 0x7FFFFFFFFFFFFFFF
  1217. self.mc.ANDPD(arglocs[0], heap(self.float_const_abs_addr))
  1218. def genop_cast_float_to_int(self, op, arglocs, resloc):
  1219. self.mc.CVTTSD2SI(resloc, arglocs[0])
  1220. def genop_cast_int_to_float(self, op, arglocs, resloc):
  1221. self.mc.CVTSI2SD(resloc, arglocs[0])
  1222. def genop_cast_float_to_singlefloat(self, op, arglocs, resloc):
  1223. loc0, loctmp = arglocs
  1224. self.mc.CVTSD2SS(loctmp, loc0)
  1225. assert isinstance(resloc, RegLoc)
  1226. assert isinstance(loctmp, RegLoc)
  1227. self.mc.MOVD_rx(resloc.value, loctmp.value)
  1228. def genop_cast_singlefloat_to_float(self, op, arglocs, resloc):
  1229. loc0, = arglocs
  1230. assert isinstance(resloc, RegLoc)
  1231. assert isinstance(loc0, RegLoc)
  1232. self.mc.MOVD_xr(resloc.value, loc0.value)
  1233. self.mc.CVTSS2SD_xx(resloc.value, resloc.value)
  1234. def genop_convert_float_bytes_to_longlong(self, op, arglocs, resloc):
  1235. loc0, = arglocs
  1236. if longlong.is_64_bit:
  1237. assert isinstance(resloc, RegLoc)
  1238. assert isinstance(loc0, RegLoc)
  1239. self.mc.MOVD(resloc, loc0)
  1240. else:
  1241. self.mov(loc0, resloc)
  1242. def genop_convert_longlong_bytes_to_float(self, op, arglocs, resloc):
  1243. loc0, = arglocs
  1244. if longlong.is_64_bit:
  1245. assert isinstance(resloc, RegLoc)
  1246. assert isinstance(loc0, RegLoc)
  1247. self.mc.MOVD(resloc, loc0)
  1248. else:
  1249. self.mov(loc0, resloc)
  1250. def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc):
  1251. guard_opnum = guard_op.getopnum()
  1252. self.mc.CMP(arglocs[0], imm0)
  1253. if guard_opnum == rop.GUARD_TRUE:
  1254. self.implement_guard(guard_token, 'Z')
  1255. else:
  1256. self.implement_guard(guard_token, 'NZ')
  1257. def genop_int_is_true(self, op, arglocs, resloc):
  1258. self.mc.CMP(arglocs[0], imm0)
  1259. rl = resloc.lowest8bits()
  1260. self.mc.SET_ir(rx86.Conditions['NE'], rl.value)
  1261. self.mc.MOVZX8(resloc, rl)
  1262. def genop_guard_int_is_zero(self, op, guard_op, guard_token, arglocs, resloc):
  1263. guard_opnum = guard_op.getopnum()
  1264. self.mc.CMP(arglocs[0], imm0)
  1265. if guard_opnum == rop.GUARD_TRUE:
  1266. self.implement_guard(guard_token, 'NZ')
  1267. else:
  1268. self.implement_guard(guard_token, 'Z')
  1269. def genop_int_is_zero(self, op, arglocs, resloc):
  1270. self.mc.CMP(arglocs[0], imm0)
  1271. rl = resloc.lowest8bits()
  1272. self.mc.SET_ir(rx86.Conditions['E'], rl.value)
  1273. self.mc.MOVZX8(resloc, rl)
  1274. def genop_same_as(self, op, arglocs, resloc):
  1275. self.mov(arglocs[0], resloc)
  1276. genop_cast_ptr_to_int = genop_same_as
  1277. genop_cast_int_to_ptr = genop_same_as
  1278. def genop_int_force_ge_zero(self, op, arglocs, resloc):
  1279. self.mc.TEST(arglocs[0], arglocs[0])
  1280. self.mov(imm0, resloc)
  1281. self.mc.CMOVNS(resloc, arglocs[0])
  1282. def genop_int_mod(self, op, arglocs, resloc):
  1283. if IS_X86_32:
  1284. self.mc.CDQ()
  1285. elif IS_X86_64:
  1286. self.mc.CQO()
  1287. self.mc.IDIV_r(ecx.value)
  1288. genop_int_floordiv = genop_int_mod
  1289. def genop_uint_floordiv(self, op, arglocs, resloc):
  1290. self.mc.XOR_rr(edx.value, edx.value)
  1291. self.mc.DIV_r(ecx.value)
  1292. genop_llong_add = _binaryop("PADDQ", True)
  1293. genop_llong_sub = _binaryop("PSUBQ")
  1294. genop_llong_and = _binaryop("PAND", True)
  1295. genop_llong_or = _binaryop("POR", True)
  1296. genop_llong_xor = _binaryop("PXOR", True)
  1297. def genop_llong_to_int(self, op, arglocs, resloc):
  1298. loc = arglocs[0]
  1299. assert isinstance(resloc, RegLoc)
  1300. if isinstance(loc, RegLoc):
  1301. self.mc.MOVD_rx(resloc.value, loc.value)
  1302. elif isinstance(loc, StackLoc):
  1303. self.mc.MOV_rb(resloc.value, loc.value)
  1304. else:
  1305. not_implemented("llong_to_int: %s" % (loc,))
  1306. def genop_llong_from_int(self, op, arglocs, resloc):
  1307. loc1, loc2 = arglocs
  1308. if isinstance(loc1, ConstFloatLoc):
  1309. assert loc2 is None
  1310. self.mc.MOVSD(resloc, loc1)
  1311. else:
  1312. assert isinstance(loc1, RegLoc)
  1313. assert isinstance(loc2, RegLoc)
  1314. assert isinstance(resloc, RegLoc)
  1315. self.mc.MOVD_xr(loc2.value, loc1.value)
  1316. self.mc.PSRAD_xi(loc2.value, 31) # -> 0 or -1
  1317. self.mc.MOVD_xr(resloc.value, loc1.value)
  1318. self.mc.PUNPCKLDQ_xx(resloc.value, loc2.value)
  1319. def genop_llong_from_uint(self, op, arglocs, resloc):
  1320. loc1, = arglocs
  1321. assert isinstance(resloc, RegLoc)
  1322. assert isinstance(loc1, RegLoc)
  1323. self.mc.MOVD_xr(resloc.value, loc1.value)
  1324. def genop_llong_eq(self, op, arglocs, resloc):
  1325. loc1, loc2, locxtmp = arglocs
  1326. self.mc.MOVSD(locxtmp, loc1)
  1327. self.mc.PCMPEQD(locxtmp, loc2)
  1328. self.mc.PMOVMSKB_rx(resloc.value, locxtmp.value)
  1329. # Now the lower 8 bits of resloc contain 0x00, 0x0F, 0xF0 or 0xFF
  1330. # depending on the result of the comparison of each of the two
  1331. # double-words of loc1 and loc2. The higher 8 bits contain random
  1332. # results. We want to map 0xFF to 1, and 0x00, 0x0F and 0xF0 to 0.
  1333. self.mc.CMP8_ri(resloc.value | rx86.BYTE_REG_FLAG, -1)
  1334. self.mc.SBB_rr(resloc.value, resloc.value)
  1335. self.mc.ADD_ri(resloc.value, 1)
  1336. def genop_llong_ne(self, op, arglocs, resloc):
  1337. loc1, loc2, locxtmp = arglocs
  1338. self.mc.MOVSD(locxtmp, loc1)
  1339. self.mc.PCMPEQD(locxtmp, loc2)
  1340. self.mc.PMOVMSKB_rx(resloc.value, locxtmp.value)
  1341. # Now the lower 8 bits of resloc contain 0x00, 0x0F, 0xF0 or 0xFF
  1342. # depending on the result of the comparison of each of the two
  1343. # double-words of loc1 and loc2. The higher 8 bits contain random
  1344. # results. We want to map 0xFF to 0, and 0x00, 0x0F and 0xF0 to 1.
  1345. self.mc.CMP8_ri(resloc.value | rx86.BYTE_REG_FLAG, -1)
  1346. self.mc.SBB_rr(resloc.value, resloc.value)
  1347. self.mc.NEG_r(resloc.value)
  1348. def genop_llong_lt(self, op, arglocs, resloc):
  1349. # XXX just a special case for now: "x < 0"
  1350. loc1, = arglocs
  1351. self.mc.PMOVMSKB_rx(resloc.value, loc1.value)
  1352. self.mc.SHR_ri(resloc.value, 7)
  1353. self.mc.AND_ri(resloc.value, 1)
  1354. # ----------
  1355. def genop_call_malloc_gc(self, op, arglocs, result_loc):
  1356. self.genop_call(op, arglocs, result_loc)
  1357. self.propagate_memoryerror_if_eax_is_null()
  1358. def propagate_memoryerror_if_eax_is_null(self):
  1359. # if self.propagate_exception_path == 0 (tests), this may jump to 0
  1360. # and segfaults. too bad. the alternative is to continue anyway
  1361. # with eax==0, but that will segfault too.
  1362. self.mc.TEST_rr(eax.value, eax.value)
  1363. if WORD == 4:
  1364. self.mc.J_il(rx86.Conditions['Z'], self.propagate_exception_path)
  1365. self.mc.add_pending_relocation()
  1366. elif WORD == 8:
  1367. self.mc.J_il(rx86.Conditions['Z'], 0)
  1368. pos = self.mc.get_relative_pos()
  1369. self.pending_memoryerror_trampoline_from.append(pos)
  1370. # ----------
  1371. def load_from_mem(self, resloc, source_addr, size_loc, sign_loc):
  1372. assert isinstance(resloc, RegLoc)
  1373. size = size_loc.value
  1374. sign = sign_loc.value
  1375. if resloc.is_xmm:
  1376. self.mc.MOVSD(resloc, source_addr)
  1377. elif size == WORD:
  1378. self.mc.MOV(resloc, source_addr)
  1379. elif size == 1:
  1380. if sign:
  1381. self.mc.MOVSX8(resloc, source_addr)
  1382. else:
  1383. self.mc.MOVZX8(resloc, source_addr)
  1384. elif size == 2:
  1385. if sign:
  1386. self.mc.MOVSX16(resloc, source_addr)
  1387. else:
  1388. self.mc.MOVZX16(resloc, source_addr)
  1389. elif IS_X86_64 and size == 4:
  1390. if sign:
  1391. self.mc.MOVSX32(resloc, source_addr)
  1392. else:
  1393. self.mc.MOV32(resloc, source_addr) # zero-extending
  1394. else:
  1395. not_implemented("load_from_mem size = %d" % size)
  1396. def save_into_mem(self, dest_addr, value_loc, size_loc):
  1397. size = size_loc.value
  1398. if isinstance(value_loc, RegLoc) and value_loc.is_xmm:
  1399. self.mc.MOVSD(dest_addr, value_loc)
  1400. elif size == 1:
  1401. self.mc.MOV8(dest_addr, value_loc.lowest8bits())
  1402. elif size == 2:
  1403. self.mc.MOV16(dest_addr, value_loc)
  1404. elif size == 4:
  1405. self.mc.MOV32(dest_addr, value_loc)
  1406. elif size == 8:
  1407. if IS_X86_64:
  1408. self.mc.MOV(dest_addr, value_loc)
  1409. else:
  1410. assert isinstance(value_loc, FloatImmedLoc)
  1411. self.mc.MOV(dest_addr, value_loc.low_part_loc())
  1412. self.mc.MOV(dest_addr.add_offset(4), value_loc.high_part_loc())
  1413. else:
  1414. not_implemented("save_into_mem size = %d" % size)
  1415. def genop_getfield_gc(self, op, arglocs, resloc):
  1416. base_loc, ofs_loc, size_loc, sign_loc = arglocs
  1417. assert isinstance(size_loc, ImmedLoc)
  1418. source_addr = AddressLoc(base_loc, ofs_loc)
  1419. self.load_from_mem(resloc, source_addr, size_loc, sign_loc)
  1420. genop_getfield_raw = genop_getfield_gc
  1421. genop_getfield_raw_pure = genop_getfield_gc
  1422. genop_getfield_gc_pure = genop_getfield_gc
  1423. def genop_getarrayitem_gc(self, op, arglocs, resloc):
  1424. base_loc, ofs_loc, size_loc, ofs, sign_loc = arglocs
  1425. assert isinstance(ofs, ImmedLoc)
  1426. assert isinstance(size_loc, ImmedLoc)
  1427. scale = _get_scale(size_loc.value)
  1428. src_addr = addr_add(base_loc, ofs_loc, ofs.value, scale)
  1429. self.load_from_mem(resloc, src_addr, size_loc, sign_loc)
  1430. genop_getarrayitem_gc_pure = genop_getarrayitem_gc
  1431. genop_getarrayitem_raw = genop_getarrayitem_gc
  1432. genop_getarrayitem_raw_pure = genop_getarrayitem_gc
  1433. def genop_raw_load(self, op, arglocs, resloc):
  1434. base_loc, ofs_loc, size_loc, ofs, sign_loc = arglocs
  1435. assert isinstance(ofs, ImmedLoc)
  1436. src_addr = addr_add(base_loc, ofs_loc, ofs.value, 0)
  1437. self.load_from_mem(resloc, src_addr, size_loc, sign_loc)
  1438. def _get_interiorfield_addr(self, temp_loc, index_loc, itemsize_loc,
  1439. base_loc, ofs_loc):
  1440. assert isinstance(itemsize_loc, ImmedLoc)
  1441. if isinstance(index_loc, ImmedLoc):
  1442. temp_loc = imm(index_loc.value * itemsize_loc.value)
  1443. elif _valid_addressing_size(itemsize_loc.value):
  1444. return AddressLoc(base_loc, index_loc, _get_scale(itemsize_loc.value), ofs_loc.value)
  1445. else:
  1446. # XXX should not use IMUL in more cases, it can use a clever LEA
  1447. assert isinstance(temp_loc, RegLoc)
  1448. assert isinstance(index_loc, RegLoc)
  1449. assert not temp_loc.is_xmm
  1450. self.mc.IMUL_rri(temp_loc.value, index_loc.value,
  1451. itemsize_loc.value)
  1452. assert isinstance(ofs_loc, ImmedLoc)
  1453. return AddressLoc(base_loc, temp_loc, 0, ofs_loc.value)
  1454. def genop_getinteriorfield_gc(self, op, arglocs, resloc):
  1455. (base_loc, ofs_loc, itemsize_loc, fieldsize_loc,
  1456. index_loc, temp_loc, sign_loc) = arglocs
  1457. src_addr = self._get_interiorfield_addr(temp_loc, index_loc,
  1458. itemsize_loc, base_loc,
  1459. ofs_loc)
  1460. self.load_from_mem(resloc, src_addr, fieldsize_loc, sign_loc)
  1461. def genop_discard_setfield_gc(self, op, arglocs):
  1462. base_loc, ofs_loc, size_loc, value_loc = arglocs
  1463. assert isinstance(size_loc, ImmedLoc)
  1464. dest_addr = AddressLoc(base_loc, ofs_loc)
  1465. self.save_into_mem(dest_addr, value_loc, size_loc)
  1466. def genop_discard_setinteriorfield_gc(self, op, arglocs):
  1467. (base_loc, ofs_loc, itemsize_loc, fieldsize_loc,
  1468. index_loc, temp_loc, value_loc) = arglocs
  1469. dest_addr = self._get_interiorfield_addr(temp_loc, index_loc,
  1470. itemsize_loc, base_loc,
  1471. ofs_loc)
  1472. self.save_into_mem(dest_addr, value_loc, fieldsize_loc)
  1473. genop_discard_setinteriorfield_raw = genop_discard_setinteriorfield_gc
  1474. def genop_discard_setarrayitem_gc(self, op, arglocs):
  1475. base_loc, ofs_loc, value_loc, size_loc, baseofs = arglocs
  1476. assert isinstance(baseofs, ImmedLoc)
  1477. assert isinstance(size_loc, ImmedLoc)
  1478. scale = _get_scale(size_loc.value)
  1479. dest_addr = AddressLoc(base_loc, ofs_loc, scale, baseofs.value)
  1480. self.save_into_mem(dest_addr, value_loc, size_loc)
  1481. def genop_discard_raw_store(self, op, arglocs):
  1482. base_loc, ofs_loc, value_loc, size_loc, baseofs = arglocs
  1483. assert isinstance(baseofs, ImmedLoc)
  1484. dest_addr = AddressLoc(base_loc, ofs_loc, 0, baseofs.value)
  1485. self.save_into_mem(dest_addr, value_loc, size_loc)
  1486. def genop_discard_strsetitem(self, op, arglocs):
  1487. base_loc, ofs_loc, val_loc = arglocs
  1488. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
  1489. self.cpu.translate_support_code)
  1490. assert itemsize == 1
  1491. dest_addr = AddressLoc(base_loc, ofs_loc, 0, basesize)
  1492. self.mc.MOV8(dest_addr, val_loc.lowest8bits())
  1493. def genop_discard_unicodesetitem(self, op, arglocs):
  1494. base_loc, ofs_loc, val_loc = arglocs
  1495. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
  1496. self.cpu.translate_support_code)
  1497. if itemsize == 4:
  1498. self.mc.MOV32(AddressLoc(base_loc, ofs_loc, 2, basesize), val_loc)
  1499. elif itemsize == 2:
  1500. self.mc.MOV16(AddressLoc(base_loc, ofs_loc, 1, basesize), val_loc)
  1501. else:
  1502. assert 0, itemsize
  1503. genop_discard_setfield_raw = genop_discard_setfield_gc
  1504. genop_discard_setarrayitem_raw = genop_discard_setarrayitem_gc
  1505. def genop_strlen(self, op, arglocs, resloc):
  1506. base_loc = arglocs[0]
  1507. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
  1508. self.cpu.translate_support_code)
  1509. self.mc.MOV(resloc, addr_add_const(base_loc, ofs_length))
  1510. def genop_unicodelen(self, op, arglocs, resloc):
  1511. base_loc = arglocs[0]
  1512. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
  1513. self.cpu.translate_support_code)
  1514. self.mc.MOV(resloc, addr_add_const(base_loc, ofs_length))
  1515. def genop_arraylen_gc(self, op, arglocs, resloc):
  1516. base_loc, ofs_loc = arglocs
  1517. assert isinstance(ofs_loc, ImmedLoc)
  1518. self.mc.MOV(resloc, addr_add_const(base_loc, ofs_loc.value))
  1519. def genop_strgetitem(self, op, arglocs, resloc):
  1520. base_loc, ofs_loc = arglocs
  1521. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.STR,
  1522. self.cpu.translate_support_code)
  1523. assert itemsize == 1
  1524. self.mc.MOVZX8(resloc, AddressLoc(base_loc, ofs_loc, 0, basesize))
  1525. def genop_unicodegetitem(self, op, arglocs, resloc):
  1526. base_loc, ofs_loc = arglocs
  1527. basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE,
  1528. self.cpu.translate_support_code)
  1529. if itemsize == 4:
  1530. self.mc.MOV32(resloc, AddressLoc(base_loc, ofs_loc, 2, basesize))
  1531. elif itemsize == 2:
  1532. self.mc.MOVZX16(resloc, AddressLoc(base_loc, ofs_loc, 1, basesize))
  1533. else:
  1534. assert 0, itemsize
  1535. def genop_read_timestamp(self, op, arglocs, resloc):
  1536. self.mc.RDTSC()
  1537. if longlong.is_64_bit:
  1538. self.mc.SHL_ri(edx.value, 32)
  1539. self.mc.OR_rr(edx.value, eax.value)
  1540. else:
  1541. loc1, = arglocs
  1542. self.mc.MOVD_xr(loc1.value, edx.value)
  1543. self.mc.MOVD_xr(resloc.value, eax.value)
  1544. self.mc.PUNPCKLDQ_xx(resloc.value, loc1.value)
  1545. def genop_guard_guard_true(self, ign_1, guard_op, guard_token, locs, ign_2):
  1546. loc = locs[0]
  1547. self.mc.TEST(loc, loc)
  1548. self.implement_guard(guard_token, 'Z')
  1549. genop_guard_guard_nonnull = genop_guard_guard_true
  1550. def genop_guard_guard_no_exception(self, ign_1, guard_op, guard_token,
  1551. locs, ign_2):
  1552. self.mc.CMP(heap(self.cpu.pos_exception()), imm0)
  1553. self.implement_guard(guard_token, 'NZ')
  1554. def genop_guard_guard_not_invalidated(self, ign_1, guard_op, guard_token,
  1555. locs, ign_2):
  1556. pos = self.mc.get_relative_pos() + 1 # after potential jmp
  1557. guard_token.pos_jump_offset = pos
  1558. self.pending_guard_tokens.append(guard_token)
  1559. def genop_guard_guard_exception(self, ign_1, guard_op, guard_token,
  1560. locs, resloc):
  1561. loc = locs[0]
  1562. loc1 = locs[1]
  1563. self.mc.MOV(loc1, heap(self.cpu.pos_exception()))
  1564. self.mc.CMP(loc1, loc)
  1565. self.implement_guard(guard_token, 'NE')
  1566. if resloc is not None:
  1567. self.mc.MOV(resloc, heap(self.cpu.pos_exc_value()))
  1568. self.mc.MOV(heap(self.cpu.pos_exception()), imm0)
  1569. self.mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
  1570. def _gen_guard_overflow(self, guard_op, guard_token):
  1571. guard_opnum = guard_op.getopnum()
  1572. if guard_opnum == rop.GUARD_NO_OVERFLOW:
  1573. self.implement_guard(guard_token, 'O')
  1574. elif guard_opnum == rop.GUARD_OVERFLOW:
  1575. self.implement_guard(guard_token, 'NO')
  1576. else:
  1577. not_implemented("int_xxx_ovf followed by %s" %
  1578. guard_op.getopname())
  1579. def genop_guard_int_add_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
  1580. self.mc.ADD(arglocs[0], arglocs[1])
  1581. return self._gen_guard_overflow(guard_op, guard_token)
  1582. def genop_guard_int_sub_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
  1583. self.mc.SUB(arglocs[0], arglocs[1])
  1584. return self._gen_guard_overflow(guard_op, guard_token)
  1585. def genop_guard_int_mul_ovf(self, op, guard_op, guard_token, arglocs, result_loc):
  1586. self.mc.IMUL(arglocs[0], arglocs[1])
  1587. return self._gen_guard_overflow(guard_op, guard_token)
  1588. def genop_guard_guard_false(self, ign_1, guard_op, guard_token, locs, ign_2):
  1589. loc = locs[0]
  1590. self.mc.TEST(loc, loc)
  1591. self.implement_guard(guard_token, 'NZ')
  1592. genop_guard_guard_isnull = genop_guard_guard_false
  1593. def genop_guard_guard_value(self, ign_1, guard_op, guard_token, locs, ign_2):
  1594. if guard_op.getarg(0).type == FLOAT:
  1595. assert guard_op.getarg(1).type == FLOAT
  1596. self.mc.UCOMISD(locs[0], locs[1])
  1597. else:
  1598. self.mc.CMP(locs[0], locs[1])
  1599. self.implement_guard(guard_token, 'NE')
  1600. def _cmp_guard_class(self, locs):
  1601. offset = self.cpu.vtable_offset
  1602. if offset is not None:
  1603. self.mc.CMP(mem(locs[0], offset), locs[1])
  1604. else:
  1605. # XXX hard-coded assumption: to go from an object to its class
  1606. # we use the following algorithm:
  1607. # - read the typeid from mem(locs[0]), i.e. at offset 0;
  1608. # this is a complete word (N=4 bytes on 32-bit, N=8 on
  1609. # 64-bits)
  1610. # - keep the lower half of what is read there (i.e.
  1611. # truncate to an unsigned 'N / 2' bytes value)
  1612. # - multiply by 4 (on 32-bits only) and use it as an
  1613. # offset in type_info_group
  1614. # - add 16/32 bytes, to go past the TYPE_INFO structure
  1615. loc = locs[1]
  1616. assert isinstance(loc, ImmedLoc)
  1617. classptr = loc.value
  1618. # here, we have to go back from 'classptr' to the value expected
  1619. # from reading the half-word in the object header. Note that
  1620. # this half-word is at offset 0 on a little-endian machine;
  1621. # it would be at offset 2 or 4 on a big-endian machine.
  1622. from rpython.rtyper.memory.gctypelayout import GCData
  1623. sizeof_ti = rffi.sizeof(GCData.TYPE_INFO)
  1624. type_info_group = llop.gc_get_type_info_group(llmemory.Address)
  1625. type_info_group = rffi.cast(lltype.Signed, type_info_group)
  1626. expected_typeid = classptr - sizeof_ti - type_info_group
  1627. if IS_X86_32:
  1628. expected_typeid >>= 2
  1629. self.mc.CMP16(mem(locs[0], 0), ImmedLoc(expected_typeid))
  1630. elif IS_X86_64:
  1631. self.mc.CMP32_mi((locs[0].value, 0), expected_typeid)
  1632. def genop_guard_guard_class(self, ign_1, guard_op, guard_token, locs, ign_2):
  1633. self._cmp_guard_class(locs)
  1634. self.implement_guard(guard_token, 'NE')
  1635. def genop_guard_guard_nonnull_class(self, ign_1, guard_op,
  1636. guard_token, locs, ign_2):
  1637. self.mc.CMP(locs[0], imm1)
  1638. # Patched below
  1639. self.mc.J_il8(rx86.Conditions['B'], 0)
  1640. jb_location = self.mc.get_relative_pos()
  1641. self._cmp_guard_class(locs)
  1642. # patch the JB above
  1643. offset = self.mc.get_relative_pos() - jb_location
  1644. assert 0 < offset <= 127
  1645. self.mc.overwrite(jb_location-1, chr(offset))
  1646. #
  1647. self.implement_guard(guard_token, 'NE')
  1648. def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
  1649. fail_locs):
  1650. exc = (guard_opnum == rop.GUARD_EXCEPTION or
  1651. guard_opnum == rop.GUARD_NO_EXCEPTION or
  1652. guard_opnum == rop.GUARD_NOT_FORCED)
  1653. is_guard_not_invalidated = guard_opnum == rop.GUARD_NOT_INVALIDATED
  1654. is_guard_not_forced = guard_opnum == rop.GUARD_NOT_FORCED
  1655. return GuardToken(faildescr, failargs, fail_locs, exc,
  1656. is_guard_not_invalidated, is_guard_not_forced)
  1657. def generate_propagate_error_64(self):
  1658. assert WORD == 8
  1659. startpos = self.mc.get_relative_pos()
  1660. self.mc.JMP(imm(self.propagate_exception_path))
  1661. return startpos
  1662. def generate_quick_failure(self, guardtok):
  1663. """Generate the initial code for handling a failure. We try to
  1664. keep it as compact as possible.
  1665. """
  1666. fail_index = self.cpu.get_fail_descr_number(guardtok.faildescr)
  1667. mc = self.mc
  1668. startpos = mc.get_relative_pos()
  1669. withfloats = False
  1670. for box in guardtok.failargs:
  1671. if box is not None and box.type == FLOAT:
  1672. withfloats = True
  1673. break
  1674. exc = guardtok.exc
  1675. target = self.failure_recovery_code[exc + 2 * withfloats]
  1676. if WORD == 4:
  1677. mc.CALL(imm(target))
  1678. else:
  1679. # Generate exactly 13 bytes:
  1680. # MOV r11, target-as-8-bytes
  1681. # CALL *r11
  1682. # Keep the number 13 in sync with _find_failure_recovery_bytecode.
  1683. start = mc.get_relative_pos()
  1684. mc.MOV_ri64(X86_64_SCRATCH_REG.value, target)
  1685. mc.CALL_r(X86_64_SCRATCH_REG.value)
  1686. assert mc.get_relative_pos() == start + 13
  1687. # write tight data that describes the failure recovery
  1688. if guardtok.is_guard_not_forced:
  1689. mc.writechar(chr(self.CODE_FORCED))
  1690. self.write_failure_recovery_description(mc, guardtok.failargs,
  1691. guardtok.fail_locs)
  1692. # write the fail_index too
  1693. mc.writeimm32(fail_index)
  1694. # for testing the decoding, write a final byte 0xCC
  1695. if not we_are_translated():
  1696. mc.writechar('\xCC')
  1697. faillocs = [loc for loc in guardtok.fail_locs if loc is not None]
  1698. guardtok.faildescr._x86_debug_faillocs = faillocs
  1699. return startpos
  1700. DESCR_REF = 0x00
  1701. DESCR_INT = 0x01
  1702. DESCR_FLOAT = 0x02
  1703. DESCR_SPECIAL = 0x03
  1704. CODE_FROMSTACK = 4 * (8 + 8*IS_X86_64)
  1705. CODE_STOP = 0 | DESCR_SPECIAL
  1706. CODE_HOLE = 4 | DESCR_SPECIAL
  1707. CODE_INPUTARG = 8 | DESCR_SPECIAL
  1708. CODE_FORCED = 12 | DESCR_SPECIAL
  1709. def write_failure_recovery_description(self, mc, failargs, locs):
  1710. for i in range(len(failargs)):
  1711. arg = failargs[i]
  1712. if arg is not None:
  1713. if arg.type == REF:
  1714. kind = self.DESCR_REF
  1715. elif arg.type == INT:
  1716. kind = self.DESCR_INT
  1717. elif arg.type == FLOAT:
  1718. kind = self.DESCR_FLOAT
  1719. else:
  1720. raise AssertionError("bogus kind")
  1721. loc = locs[i]
  1722. if isinstance(loc, StackLoc):
  1723. pos = loc.position
  1724. if pos < 0:
  1725. mc.writechar(chr(self.CODE_INPUTARG))
  1726. pos = ~pos
  1727. n = self.CODE_FROMSTACK//4 + pos
  1728. else:
  1729. assert isinstance(loc, RegLoc)
  1730. n = loc.value
  1731. n = kind + 4*n
  1732. while n > 0x7F:
  1733. mc.writechar(chr((n & 0x7F) | 0x80))
  1734. n >>= 7
  1735. else:
  1736. n = self.CODE_HOLE
  1737. mc.writechar(chr(n))
  1738. mc.writechar(chr(self.CODE_STOP))
  1739. def rebuild_faillocs_from_descr(self, bytecode):
  1740. from rpython.jit.backend.x86.regalloc import X86FrameManager
  1741. descr_to_box_type = [REF, INT, FLOAT]
  1742. bytecode = rffi.cast(rffi.UCHARP, bytecode)
  1743. arglocs = []
  1744. code_inputarg = False
  1745. while 1:
  1746. # decode the next instruction from the bytecode
  1747. code = rffi.cast(lltype.Signed, bytecode[0])
  1748. bytecode = rffi.ptradd(bytecode, 1)
  1749. if code >= self.CODE_FROMSTACK:
  1750. # 'code' identifies a stack location
  1751. if code > 0x7F:
  1752. shift = 7
  1753. code &= 0x7F
  1754. while True:
  1755. nextcode = rffi.cast(lltype.Signed, bytecode[0])
  1756. bytecode = rffi.ptradd(bytecode, 1)
  1757. code |= (nextcode & 0x7F) << shift
  1758. shift += 7
  1759. if nextcode <= 0x7F:
  1760. break
  1761. kind = code & 3
  1762. code = (code - self.CODE_FROMSTACK) >> 2
  1763. if code_inputarg:
  1764. code = ~code
  1765. code_inputarg = False
  1766. loc = X86FrameManager.frame_pos(code, descr_to_box_type[kind])
  1767. elif code == self.CODE_STOP:
  1768. break
  1769. elif code == self.CODE_HOLE:
  1770. continue
  1771. elif code == self.CODE_INPUTARG:
  1772. code_inputarg = True
  1773. continue
  1774. else:
  1775. # 'code' identifies a register
  1776. kind = code & 3
  1777. code >>= 2
  1778. if kind == self.DESCR_FLOAT:
  1779. loc = regloc.XMMREGLOCS[code]
  1780. else:
  1781. loc = regloc.REGLOCS[code]
  1782. arglocs.append(loc)
  1783. return arglocs[:]
  1784. @staticmethod
  1785. #@rgc.no_collect -- XXX still true, but hacked gc_set_extra_threshold
  1786. def grab_frame_values(cpu, bytecode, frame_addr, allregisters):
  1787. # no malloc allowed here!! xxx apart from one, hacking a lot
  1788. #self.fail_ebp = allregisters[16 + ebp.value]
  1789. num = 0
  1790. deadframe = lltype.nullptr(jitframe.DEADFRAME)
  1791. # step 1: lots of mess just to count the final value of 'num'
  1792. bytecode1 = bytecode
  1793. while 1:
  1794. code = rffi.cast(lltype.Signed, bytecode1[0])
  1795. bytecode1 = rffi.ptradd(bytecode1, 1)
  1796. if code >= Assembler386.CODE_FROMSTACK:
  1797. while code > 0x7F:
  1798. code = rffi.cast(lltype.Signed, bytecode1[0])
  1799. bytecode1 = rffi.ptradd(bytecode1, 1)
  1800. else:
  1801. kind = code & 3
  1802. if kind == Assembler386.DESCR_SPECIAL:
  1803. if code == Assembler386.CODE_HOLE:
  1804. num += 1
  1805. continue
  1806. if code == Assembler386.CODE_INPUTARG:
  1807. continue
  1808. if code == Assembler386.CODE_FORCED:
  1809. # resuming from a GUARD_NOT_FORCED
  1810. token = allregisters[16 + ebp.value]
  1811. deadframe = (
  1812. cpu.assembler.force_token_to_dead_frame.pop(token))
  1813. deadframe = lltype.cast_opaque_ptr(
  1814. jitframe.DEADFRAMEPTR, deadframe)
  1815. continue
  1816. assert code == Assembler386.CODE_STOP
  1817. break
  1818. num += 1
  1819. # allocate the deadframe
  1820. if not deadframe:
  1821. # Remove the "reserve" at the end of the nursery. This means
  1822. # that it is guaranteed that the following malloc() works
  1823. # without requiring a collect(), but it needs to be re-added
  1824. # as soon as possible.
  1825. cpu.gc_clear_extra_threshold()
  1826. assert num <= cpu.get_failargs_limit()
  1827. try:
  1828. deadframe = lltype.malloc(jitframe.DEADFRAME, num)
  1829. except MemoryError:
  1830. fatalerror("memory usage error in grab_frame_values")
  1831. # fill it
  1832. code_inputarg = False
  1833. num = 0
  1834. value_hi = 0
  1835. while 1:
  1836. # decode the next instruction from the bytecode
  1837. code = rffi.cast(lltype.Signed, bytecode[0])
  1838. bytecode = rffi.ptradd(bytecode, 1)
  1839. if code >= Assembler386.CODE_FROMSTACK:
  1840. if code > 0x7F:
  1841. shift = 7
  1842. code &= 0x7F
  1843. while True:
  1844. nextcode = rffi.cast(lltype.Signed, bytecode[0])
  1845. bytecode = rffi.ptradd(bytecode, 1)
  1846. code |= (nextcode & 0x7F) << shift
  1847. shift += 7
  1848. if nextcode <= 0x7F:
  1849. break
  1850. # load the value from the stack
  1851. kind = code & 3
  1852. code = (code - Assembler386.CODE_FROMSTACK) >> 2
  1853. if code_inputarg:
  1854. code = ~code
  1855. code_inputarg = False
  1856. stackloc = frame_addr + get_ebp_ofs(code)
  1857. value = rffi.cast(rffi.LONGP, stackloc)[0]
  1858. if kind == Assembler386.DESCR_FLOAT and WORD == 4:
  1859. value_hi = value
  1860. value = rffi.cast(rffi.LONGP, stackloc - 4)[0]
  1861. else:
  1862. kind = code & 3
  1863. if kind == Assembler386.DESCR_SPECIAL:
  1864. if code == Assembler386.CODE_HOLE:
  1865. num += 1
  1866. continue
  1867. if code == Assembler386.CODE_INPUTARG:
  1868. code_inputarg = True
  1869. continue
  1870. if code == Assembler386.CODE_FORCED:
  1871. continue
  1872. assert code == Assembler386.CODE_STOP
  1873. break
  1874. # 'code' identifies a register: load its value
  1875. code >>= 2
  1876. if kind == Assembler386.DESCR_FLOAT:
  1877. if WORD == 4:
  1878. value = allregisters[2*code]
  1879. value_hi = allregisters[2*code + 1]
  1880. else:
  1881. value = allregisters[code]
  1882. else:
  1883. value = allregisters[16 + code]
  1884. # store the loaded value into fail_boxes_<type>
  1885. if kind == Assembler386.DESCR_INT:
  1886. deadframe.jf_values[num].int = value
  1887. elif kind == Assembler386.DESCR_REF:
  1888. deadframe.jf_values[num].ref = rffi.cast(llmemory.GCREF, value)
  1889. elif kind == Assembler386.DESCR_FLOAT:
  1890. if WORD == 4:
  1891. assert not longlong.is_64_bit
  1892. floatvalue = rffi.cast(lltype.SignedLongLong, value_hi)
  1893. floatvalue <<= 32
  1894. floatvalue |= rffi.cast(lltype.SignedLongLong,
  1895. rffi.cast(lltype.Unsigned, value))
  1896. else:
  1897. assert longlong.is_64_bit
  1898. floatvalue = longlong2float.longlong2float(value)
  1899. deadframe.jf_values[num].float = floatvalue
  1900. else:
  1901. assert 0, "bogus kind"
  1902. num += 1
  1903. #
  1904. assert num == len(deadframe.jf_values)
  1905. if not we_are_translated():
  1906. assert bytecode[4] == 0xCC
  1907. #self.fail_boxes_count = num
  1908. fail_index = rffi.cast(rffi.INTP, bytecode)[0]
  1909. fail_descr = cpu.get_fail_descr_from_number(fail_index)
  1910. deadframe.jf_descr = fail_descr.hide(cpu)
  1911. return lltype.cast_opaque_ptr(llmemory.GCREF, deadframe)
  1912. def setup_failure_recovery(self):
  1913. #@rgc.no_collect -- XXX still true, but hacked gc_set_extra_threshold
  1914. def failure_recovery_func(registers):
  1915. # 'registers' is a pointer to a structure containing the
  1916. # original value of the registers, optionally the original
  1917. # value of XMM registers, and finally a reference to the
  1918. # recovery bytecode. See _build_failure_recovery() for details.
  1919. stack_at_ebp = registers[ebp.value]
  1920. bytecode = rffi.cast(rffi.UCHARP, registers[self.cpu.NUM_REGS])
  1921. allregisters = rffi.ptradd(registers, -16)
  1922. return self.grab_frame_values(self.cpu, bytecode, stack_at_ebp,
  1923. allregisters)
  1924. self.failure_recovery_func = failure_recovery_func
  1925. self.failure_recovery_code = [0, 0, 0, 0]
  1926. _FAILURE_RECOVERY_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP],
  1927. llmemory.GCREF))
  1928. def _build_failure_recovery(self, exc, withfloats=False):
  1929. failure_recovery_func = llhelper(self._FAILURE_RECOVERY_FUNC,
  1930. self.failure_recovery_func)
  1931. failure_recovery_func = rffi.cast(lltype.Signed,
  1932. failure_recovery_func)
  1933. mc = codebuf.MachineCodeBlockWrapper()
  1934. self.mc = mc
  1935. # Push all general purpose registers
  1936. for gpr in range(self.cpu.NUM_REGS-1, -1, -1):
  1937. mc.PUSH_r(gpr)
  1938. if exc:
  1939. # We might have an exception pending. Load it into ebx
  1940. # (this is a register saved across calls, both if 32 or 64)
  1941. mc.MOV(ebx, heap(self.cpu.pos_exc_value()))
  1942. mc.MOV(heap(self.cpu.pos_exception()), imm0)
  1943. mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
  1944. # Load the current esp value into edi. On 64-bit, this is the
  1945. # argument. On 32-bit, it will be pushed as argument below.
  1946. mc.MOV_rr(edi.value, esp.value)
  1947. if withfloats:
  1948. # Push all float registers
  1949. mc.SUB_ri(esp.value, self.cpu.NUM_REGS*8)
  1950. for i in range(self.cpu.NUM_REGS):
  1951. mc.MOVSD_sx(8*i, i)
  1952. # the following call saves all values from the stack and from
  1953. # registers to a fresh new deadframe object.
  1954. # Note that the registers are saved so far in esi[0] to esi[7],
  1955. # as pushed above, plus optionally in esi[-16] to esi[-1] for
  1956. # the XMM registers. Moreover, esi[8] is a pointer to the recovery
  1957. # bytecode, pushed just before by the CALL instruction written by
  1958. # generate_quick_failure().
  1959. if IS_X86_32:
  1960. mc.SUB_ri(esp.value, 3*WORD) # for stack alignment
  1961. mc.PUSH_r(edi.value)
  1962. mc.CALL(imm(failure_recovery_func))
  1963. # returns in eax the deadframe object
  1964. if exc:
  1965. # save ebx into 'jf_guard_exc'
  1966. from rpython.jit.backend.llsupport.descr import unpack_fielddescr
  1967. descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
  1968. offset, size, _ = unpack_fielddescr(descrs.jf_guard_exc)
  1969. mc.MOV_mr((eax.value, offset), ebx.value)
  1970. # now we return from the complete frame, which starts from
  1971. # _call_header_with_stack_check(). The LEA in _call_footer below
  1972. # throws away most of the frame, including all the PUSHes that we
  1973. # did just above.
  1974. self._call_footer()
  1975. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  1976. self.failure_recovery_code[exc + 2 * withfloats] = rawstart
  1977. self.mc = None
  1978. def genop_finish(self, op, arglocs, result_loc):
  1979. [argloc] = arglocs
  1980. if argloc is not eax:
  1981. self.mov(argloc, eax)
  1982. # exit function
  1983. self._call_footer()
  1984. def implement_guard(self, guard_token, condition=None):
  1985. # These jumps are patched later.
  1986. if condition:
  1987. self.mc.J_il(rx86.Conditions[condition], 0)
  1988. else:
  1989. self.mc.JMP_l(0)
  1990. guard_token.pos_jump_offset = self.mc.get_relative_pos() - 4
  1991. self.pending_guard_tokens.append(guard_token)
  1992. def genop_call(self, op, arglocs, resloc):
  1993. force_index = self.write_new_force_index()
  1994. self._genop_call(op, arglocs, resloc, force_index)
  1995. def _genop_call(self, op, arglocs, resloc, force_index):
  1996. from rpython.jit.backend.llsupport.descr import CallDescr
  1997. sizeloc = arglocs[0]
  1998. assert isinstance(sizeloc, ImmedLoc)
  1999. size = sizeloc.value
  2000. signloc = arglocs[1]
  2001. x = arglocs[2] # the function address
  2002. if x is eax:
  2003. tmp = ecx
  2004. else:
  2005. tmp = eax
  2006. descr = op.getdescr()
  2007. assert isinstance(descr, CallDescr)
  2008. self._emit_call(force_index, x, arglocs, 3, tmp=tmp,
  2009. argtypes=descr.get_arg_types(),
  2010. callconv=descr.get_call_conv())
  2011. if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.type == FLOAT:
  2012. # a float or a long long return
  2013. if descr.get_result_type() == 'L':
  2014. self.mc.MOV_br(resloc.value, eax.value) # long long
  2015. self.mc.MOV_br(resloc.value + 4, edx.value)
  2016. # XXX should ideally not move the result on the stack,
  2017. # but it's a mess to load eax/edx into a xmm register
  2018. # and this way is simpler also because the result loc
  2019. # can just be always a stack location
  2020. else:
  2021. self.mc.FSTPL_b(resloc.value) # float return
  2022. elif descr.get_result_type() == 'S':
  2023. # singlefloat return
  2024. assert resloc is eax
  2025. if IS_X86_32:
  2026. # must convert ST(0) to a 32-bit singlefloat and load it into EAX
  2027. # mess mess mess
  2028. self.mc.SUB_ri(esp.value, 4)
  2029. self.mc.FSTPS_s(0)
  2030. self.mc.POP_r(eax.value)
  2031. elif IS_X86_64:
  2032. # must copy from the lower 32 bits of XMM0 into eax
  2033. self.mc.MOVD_rx(eax.value, xmm0.value)
  2034. elif size == WORD:
  2035. assert resloc is eax or resloc is xmm0 # a full word
  2036. elif size == 0:
  2037. pass # void return
  2038. else:
  2039. # use the code in load_from_mem to do the zero- or sign-extension
  2040. assert resloc is eax
  2041. if size == 1:
  2042. srcloc = eax.lowest8bits()
  2043. else:
  2044. srcloc = eax
  2045. self.load_from_mem(eax, srcloc, sizeloc, signloc)
  2046. def genop_guard_call_may_force(self, op, guard_op, guard_token,
  2047. arglocs, result_loc):
  2048. faildescr = guard_op.getdescr()
  2049. fail_index = self.cpu.get_fail_descr_number(faildescr)
  2050. self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
  2051. self._genop_call(op, arglocs, result_loc, fail_index)
  2052. self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
  2053. self.implement_guard(guard_token, 'L')
  2054. def genop_guard_call_release_gil(self, op, guard_op, guard_token,
  2055. arglocs, result_loc):
  2056. # first, close the stack in the sense of the asmgcc GC root tracker
  2057. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  2058. if gcrootmap:
  2059. self.call_release_gil(gcrootmap, arglocs)
  2060. # do the call
  2061. faildescr = guard_op.getdescr()
  2062. fail_index = self.cpu.get_fail_descr_number(faildescr)
  2063. self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
  2064. self._genop_call(op, arglocs, result_loc, fail_index)
  2065. # then reopen the stack
  2066. if gcrootmap:
  2067. self.call_reacquire_gil(gcrootmap, result_loc)
  2068. # finally, the guard_not_forced
  2069. self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
  2070. self.implement_guard(guard_token, 'L')
  2071. def call_release_gil(self, gcrootmap, save_registers):
  2072. # First, we need to save away the registers listed in
  2073. # 'save_registers' that are not callee-save. XXX We assume that
  2074. # the XMM registers won't be modified. We store them in
  2075. # [ESP+4], [ESP+8], etc.; on x86-32 we leave enough room in [ESP]
  2076. # for the single argument to closestack_addr below.
  2077. if IS_X86_32:
  2078. p = WORD
  2079. elif IS_X86_64:
  2080. p = 0
  2081. for reg in self._regalloc.rm.save_around_call_regs:
  2082. if reg in save_registers:
  2083. self.mc.MOV_sr(p, reg.value)
  2084. p += WORD
  2085. #
  2086. if gcrootmap.is_shadow_stack:
  2087. args = []
  2088. else:
  2089. # note that regalloc.py used save_all_regs=True to save all
  2090. # registers, so we don't have to care about saving them (other
  2091. # than ebp) in the close_stack_struct. But if they are registers
  2092. # like %eax that would be destroyed by this call, *and* they are
  2093. # used by arglocs for the *next* call, then trouble; for now we
  2094. # will just push/pop them.
  2095. from rpython.rtyper.memory.gctransform import asmgcroot
  2096. css = self._regalloc.close_stack_struct
  2097. if css == 0:
  2098. use_words = (2 + max(asmgcroot.INDEX_OF_EBP,
  2099. asmgcroot.FRAME_PTR) + 1)
  2100. pos = self._regalloc.fm.reserve_location_in_frame(use_words)
  2101. css = get_ebp_ofs(pos + use_words - 1)
  2102. self._regalloc.close_stack_struct = css
  2103. # The location where the future CALL will put its return address
  2104. # will be [ESP-WORD]. But we can't use that as the next frame's
  2105. # top address! As the code after releasegil() runs without the
  2106. # GIL, it might not be set yet by the time we need it (very
  2107. # unlikely), or it might be overwritten by the following call
  2108. # to reaquiregil() (much more likely). So we hack even more
  2109. # and use a dummy location containing a dummy value (a pointer
  2110. # to itself) which we pretend is the return address :-/ :-/ :-/
  2111. # It prevents us to store any %esp-based stack locations but we
  2112. # don't so far.
  2113. adr = self.datablockwrapper.malloc_aligned(WORD, WORD)
  2114. rffi.cast(rffi.CArrayPtr(lltype.Signed), adr)[0] = adr
  2115. self.gcrootmap_retaddr_forced = adr
  2116. frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
  2117. if rx86.fits_in_32bits(adr):
  2118. self.mc.MOV_bi(frame_ptr, adr) # MOV [css.frame], adr
  2119. else:
  2120. self.mc.MOV_ri(eax.value, adr) # MOV EAX, adr
  2121. self.mc.MOV_br(frame_ptr, eax.value) # MOV [css.frame], EAX
  2122. # Save ebp
  2123. index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
  2124. self.mc.MOV_br(index_of_ebp, ebp.value) # MOV [css.ebp], EBP
  2125. # Call the closestack() function (also releasing the GIL)
  2126. if IS_X86_32:
  2127. reg = eax
  2128. elif IS_X86_64:
  2129. reg = edi
  2130. self.mc.LEA_rb(reg.value, css)
  2131. args = [reg]
  2132. #
  2133. self._emit_call(-1, imm(self.releasegil_addr), args)
  2134. # Finally, restore the registers saved above.
  2135. if IS_X86_32:
  2136. p = WORD
  2137. elif IS_X86_64:
  2138. p = 0
  2139. for reg in self._regalloc.rm.save_around_call_regs:
  2140. if reg in save_registers:
  2141. self.mc.MOV_rs(reg.value, p)
  2142. p += WORD
  2143. self._regalloc.needed_extra_stack_locations(p//WORD)
  2144. def call_reacquire_gil(self, gcrootmap, save_loc):
  2145. # save the previous result (eax/xmm0) into the stack temporarily.
  2146. # XXX like with call_release_gil(), we assume that we don't need
  2147. # to save xmm0 in this case.
  2148. if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
  2149. self.mc.MOV_sr(WORD, save_loc.value)
  2150. # call the reopenstack() function (also reacquiring the GIL)
  2151. if gcrootmap.is_shadow_stack:
  2152. args = []
  2153. else:
  2154. assert self.gcrootmap_retaddr_forced == -1, (
  2155. "missing mark_gc_roots() in CALL_RELEASE_GIL")
  2156. self.gcrootmap_retaddr_forced = 0
  2157. css = self._regalloc.close_stack_struct
  2158. assert css != 0
  2159. if IS_X86_32:
  2160. reg = eax
  2161. elif IS_X86_64:
  2162. reg = edi
  2163. self.mc.LEA_rb(reg.value, css)
  2164. args = [reg]
  2165. self._emit_call(-1, imm(self.reacqgil_addr), args)
  2166. # restore the result from the stack
  2167. if isinstance(save_loc, RegLoc) and not save_loc.is_xmm:
  2168. self.mc.MOV_rs(save_loc.value, WORD)
  2169. self._regalloc.needed_extra_stack_locations(2)
  2170. def genop_guard_call_assembler(self, op, guard_op, guard_token,
  2171. arglocs, result_loc):
  2172. faildescr = guard_op.getdescr()
  2173. fail_index = self.cpu.get_fail_descr_number(faildescr)
  2174. self.mc.MOV_bi(FORCE_INDEX_OFS, fail_index)
  2175. descr = op.getdescr()
  2176. assert isinstance(descr, JitCellToken)
  2177. assert len(arglocs) - 2 == descr.compiled_loop_token._debug_nbargs
  2178. #
  2179. # Write a call to the target assembler
  2180. self._emit_call(fail_index, imm(descr._x86_function_addr),
  2181. arglocs, 2, tmp=eax)
  2182. if op.result is None:
  2183. assert result_loc is None
  2184. value = self.cpu.done_with_this_frame_void_v
  2185. else:
  2186. kind = op.result.type
  2187. if kind == INT:
  2188. assert result_loc is eax
  2189. value = self.cpu.done_with_this_frame_int_v
  2190. elif kind == REF:
  2191. assert result_loc is eax
  2192. value = self.cpu.done_with_this_frame_ref_v
  2193. elif kind == FLOAT:
  2194. value = self.cpu.done_with_this_frame_float_v
  2195. else:
  2196. raise AssertionError(kind)
  2197. from rpython.jit.backend.llsupport.descr import unpack_fielddescr
  2198. from rpython.jit.backend.llsupport.descr import unpack_interiorfielddescr
  2199. descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
  2200. _offset, _size, _ = unpack_fielddescr(descrs.jf_descr)
  2201. fail_descr = self.cpu.get_fail_descr_from_number(value)
  2202. value = fail_descr.hide(self.cpu)
  2203. rgc._make_sure_does_not_move(value)
  2204. value = rffi.cast(lltype.Signed, value)
  2205. if rx86.fits_in_32bits(value):
  2206. self.mc.CMP_mi((eax.value, _offset), value)
  2207. else:
  2208. self.mc.MOV_ri(X86_64_SCRATCH_REG.value, value)
  2209. self.mc.CMP_mr((eax.value, _offset), X86_64_SCRATCH_REG.value)
  2210. # patched later
  2211. self.mc.J_il8(rx86.Conditions['E'], 0) # goto B if we get 'done_with_this_frame'
  2212. je_location = self.mc.get_relative_pos()
  2213. #
  2214. # Path A: use assembler_helper_adr
  2215. jd = descr.outermost_jitdriver_sd
  2216. assert jd is not None
  2217. asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
  2218. self._emit_call(fail_index, imm(asm_helper_adr), [eax, arglocs[1]], 0,
  2219. tmp=ecx)
  2220. if IS_X86_32 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
  2221. self.mc.FSTPL_b(result_loc.value)
  2222. #else: result_loc is already either eax or None, checked below
  2223. self.mc.JMP_l8(0) # jump to done, patched later
  2224. jmp_location = self.mc.get_relative_pos()
  2225. #
  2226. # Path B: fast path. Must load the return value, and reset the token
  2227. offset = jmp_location - je_location
  2228. assert 0 < offset <= 127
  2229. self.mc.overwrite(je_location - 1, chr(offset))
  2230. #
  2231. # Reset the vable token --- XXX really too much special logic here:-(
  2232. if jd.index_of_virtualizable >= 0:
  2233. from rpython.jit.backend.llsupport.descr import FieldDescr
  2234. fielddescr = jd.vable_token_descr
  2235. assert isinstance(fielddescr, FieldDescr)
  2236. ofs = fielddescr.offset
  2237. self.mc.MOV(edx, arglocs[1])
  2238. self.mc.MOV_mi((edx.value, ofs), 0)
  2239. # in the line above, TOKEN_NONE = 0
  2240. #
  2241. if op.result is not None:
  2242. # load the return value from the dead frame's value index 0
  2243. kind = op.result.type
  2244. if kind == FLOAT:
  2245. t = unpack_interiorfielddescr(descrs.as_float)
  2246. self.mc.MOVSD_xm(xmm0.value, (eax.value, t[0]))
  2247. if result_loc is not xmm0:
  2248. self.mc.MOVSD(result_loc, xmm0)
  2249. else:
  2250. assert result_loc is eax
  2251. if kind == INT:
  2252. t = unpack_interiorfielddescr(descrs.as_int)
  2253. else:
  2254. t = unpack_interiorfielddescr(descrs.as_ref)
  2255. self.mc.MOV_rm(eax.value, (eax.value, t[0]))
  2256. #
  2257. # Here we join Path A and Path B again
  2258. offset = self.mc.get_relative_pos() - jmp_location
  2259. assert 0 <= offset <= 127
  2260. self.mc.overwrite(jmp_location - 1, chr(offset))
  2261. self.mc.CMP_bi(FORCE_INDEX_OFS, 0)
  2262. self.implement_guard(guard_token, 'L')
  2263. def genop_discard_cond_call_gc_wb(self, op, arglocs):
  2264. # Write code equivalent to write_barrier() in the GC: it checks
  2265. # a flag in the object at arglocs[0], and if set, it calls a
  2266. # helper piece of assembler. The latter saves registers as needed
  2267. # and call the function jit_remember_young_pointer() from the GC.
  2268. descr = op.getdescr()
  2269. if we_are_translated():
  2270. cls = self.cpu.gc_ll_descr.has_write_barrier_class()
  2271. assert cls is not None and isinstance(descr, cls)
  2272. #
  2273. opnum = op.getopnum()
  2274. card_marking = False
  2275. mask = descr.jit_wb_if_flag_singlebyte
  2276. if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
  2277. # assumptions the rest of the function depends on:
  2278. assert (descr.jit_wb_cards_set_byteofs ==
  2279. descr.jit_wb_if_flag_byteofs)
  2280. assert descr.jit_wb_cards_set_singlebyte == -0x80
  2281. card_marking = True
  2282. mask = descr.jit_wb_if_flag_singlebyte | -0x80
  2283. #
  2284. loc_base = arglocs[0]
  2285. self.mc.TEST8(addr_add_const(loc_base, descr.jit_wb_if_flag_byteofs),
  2286. imm(mask))
  2287. self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
  2288. jz_location = self.mc.get_relative_pos()
  2289. # for cond_call_gc_wb_array, also add another fast path:
  2290. # if GCFLAG_CARDS_SET, then we can just set one bit and be done
  2291. if card_marking:
  2292. # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can
  2293. # been checked by the status flags of the previous TEST8
  2294. self.mc.J_il8(rx86.Conditions['S'], 0) # patched later
  2295. js_location = self.mc.get_relative_pos()
  2296. else:
  2297. js_location = 0
  2298. # Write only a CALL to the helper prepared in advance, passing it as
  2299. # argument the address of the structure we are writing into
  2300. # (the first argument to COND_CALL_GC_WB).
  2301. helper_num = card_marking
  2302. if self._regalloc.xrm.reg_bindings:
  2303. helper_num += 2
  2304. if self.wb_slowpath[helper_num] == 0: # tests only
  2305. assert not we_are_translated()
  2306. self.cpu.gc_ll_descr.write_barrier_descr = descr
  2307. self._build_wb_slowpath(card_marking,
  2308. bool(self._regalloc.xrm.reg_bindings))
  2309. assert self.wb_slowpath[helper_num] != 0
  2310. #
  2311. self.mc.PUSH(loc_base)
  2312. self.mc.CALL(imm(self.wb_slowpath[helper_num]))
  2313. if card_marking:
  2314. # The helper ends again with a check of the flag in the object.
  2315. # So here, we can simply write again a 'JNS', which will be
  2316. # taken if GCFLAG_CARDS_SET is still not set.
  2317. self.mc.J_il8(rx86.Conditions['NS'], 0) # patched later
  2318. jns_location = self.mc.get_relative_pos()
  2319. #
  2320. # patch the JS above
  2321. offset = self.mc.get_relative_pos() - js_location
  2322. assert 0 < offset <= 127
  2323. self.mc.overwrite(js_location-1, chr(offset))
  2324. #
  2325. # case GCFLAG_CARDS_SET: emit a few instructions to do
  2326. # directly the card flag setting
  2327. loc_index = arglocs[1]
  2328. if isinstance(loc_index, RegLoc):
  2329. if IS_X86_64 and isinstance(loc_base, RegLoc):
  2330. # copy loc_index into r11
  2331. tmp1 = X86_64_SCRATCH_REG
  2332. self.mc.MOV_rr(tmp1.value, loc_index.value)
  2333. final_pop = False
  2334. else:
  2335. # must save the register loc_index before it is mutated
  2336. self.mc.PUSH_r(loc_index.value)
  2337. tmp1 = loc_index
  2338. final_pop = True
  2339. # SHR tmp, card_page_shift
  2340. self.mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift)
  2341. # XOR tmp, -8
  2342. self.mc.XOR_ri(tmp1.value, -8)
  2343. # BTS [loc_base], tmp
  2344. self.mc.BTS(addr_add_const(loc_base, 0), tmp1)
  2345. # done
  2346. if final_pop:
  2347. self.mc.POP_r(loc_index.value)
  2348. #
  2349. elif isinstance(loc_index, ImmedLoc):
  2350. byte_index = loc_index.value >> descr.jit_wb_card_page_shift
  2351. byte_ofs = ~(byte_index >> 3)
  2352. byte_val = 1 << (byte_index & 7)
  2353. self.mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val))
  2354. else:
  2355. raise AssertionError("index is neither RegLoc nor ImmedLoc")
  2356. #
  2357. # patch the JNS above
  2358. offset = self.mc.get_relative_pos() - jns_location
  2359. assert 0 < offset <= 127
  2360. self.mc.overwrite(jns_location-1, chr(offset))
  2361. # patch the JZ above
  2362. offset = self.mc.get_relative_pos() - jz_location
  2363. assert 0 < offset <= 127
  2364. self.mc.overwrite(jz_location-1, chr(offset))
  2365. genop_discard_cond_call_gc_wb_array = genop_discard_cond_call_gc_wb
  2366. def not_implemented_op_discard(self, op, arglocs):
  2367. not_implemented("not implemented operation: %s" % op.getopname())
  2368. def not_implemented_op(self, op, arglocs, resloc):
  2369. not_implemented("not implemented operation with res: %s" %
  2370. op.getopname())
  2371. def not_implemented_op_guard(self, op, guard_op,
  2372. failaddr, arglocs, resloc):
  2373. not_implemented("not implemented operation (guard): %s" %
  2374. op.getopname())
  2375. def mark_gc_roots(self, force_index, use_copy_area=False):
  2376. if force_index < 0:
  2377. return # not needed
  2378. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  2379. if gcrootmap:
  2380. mark = self._regalloc.get_mark_gc_roots(gcrootmap, use_copy_area)
  2381. if gcrootmap.is_shadow_stack:
  2382. gcrootmap.write_callshape(mark, force_index)
  2383. else:
  2384. if self.gcrootmap_retaddr_forced == 0:
  2385. self.mc.insert_gcroot_marker(mark) # common case
  2386. else:
  2387. assert self.gcrootmap_retaddr_forced != -1, (
  2388. "two mark_gc_roots() in a CALL_RELEASE_GIL")
  2389. gcrootmap.put(self.gcrootmap_retaddr_forced, mark)
  2390. self.gcrootmap_retaddr_forced = -1
  2391. def closing_jump(self, target_token):
  2392. # The backend's logic assumes that the target code is in a piece of
  2393. # assembler that was also called with the same number of arguments,
  2394. # so that the locations [ebp+8..] of the input arguments are valid
  2395. # stack locations both before and after the jump.
  2396. my_nbargs = self.current_clt._debug_nbargs
  2397. target_nbargs = target_token._x86_clt._debug_nbargs
  2398. assert my_nbargs == target_nbargs
  2399. #
  2400. target = target_token._x86_loop_code
  2401. if target_token in self.target_tokens_currently_compiling:
  2402. curpos = self.mc.get_relative_pos() + 5
  2403. self.mc.JMP_l(target - curpos)
  2404. else:
  2405. self.mc.JMP(imm(target))
  2406. def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
  2407. assert size & (WORD-1) == 0 # must be correctly aligned
  2408. self.mc.MOV(eax, heap(nursery_free_adr))
  2409. self.mc.LEA_rm(edx.value, (eax.value, size))
  2410. self.mc.CMP(edx, heap(nursery_top_adr))
  2411. self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
  2412. jmp_adr = self.mc.get_relative_pos()
  2413. # See comments in _build_malloc_slowpath for the
  2414. # details of the two helper functions that we are calling below.
  2415. # First, we need to call two of them and not just one because we
  2416. # need to have a mark_gc_roots() in between. Then the calling
  2417. # convention of slowpath_addr{1,2} are tweaked a lot to allow
  2418. # the code here to be just two CALLs: slowpath_addr1 gets the
  2419. # size of the object to allocate from (EDX-EAX) and returns the
  2420. # result in EAX; slowpath_addr2 additionally returns in EDX a
  2421. # copy of heap(nursery_free_adr), so that the final MOV below is
  2422. # a no-op.
  2423. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  2424. shadow_stack = (gcrootmap is not None and gcrootmap.is_shadow_stack)
  2425. if not shadow_stack:
  2426. # there are two helpers to call only with asmgcc
  2427. slowpath_addr1 = self.malloc_slowpath1
  2428. self.mc.CALL(imm(slowpath_addr1))
  2429. self.mark_gc_roots(self.write_new_force_index(), use_copy_area=True)
  2430. slowpath_addr2 = self.malloc_slowpath2
  2431. self.mc.CALL(imm(slowpath_addr2))
  2432. # reserve room for the argument to the real malloc and the
  2433. # saved XMM regs (on 32 bit: 8 * 2 words; on 64 bit: 16 * 1
  2434. # word)
  2435. self._regalloc.needed_extra_stack_locations(1+16)
  2436. offset = self.mc.get_relative_pos() - jmp_adr
  2437. assert 0 < offset <= 127
  2438. self.mc.overwrite(jmp_adr-1, chr(offset))
  2439. self.mc.MOV(heap(nursery_free_adr), edx)
  2440. genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
  2441. genop_list = [Assembler386.not_implemented_op] * rop._LAST
  2442. genop_llong_list = {}
  2443. genop_math_list = {}
  2444. genop_guard_list = [Assembler386.not_implemented_op_guard] * rop._LAST
  2445. for name, value in Assembler386.__dict__.iteritems():
  2446. if name.startswith('genop_discard_'):
  2447. opname = name[len('genop_discard_'):]
  2448. num = getattr(rop, opname.upper())
  2449. genop_discard_list[num] = value
  2450. elif name.startswith('genop_guard_') and name != 'genop_guard_exception':
  2451. opname = name[len('genop_guard_'):]
  2452. num = getattr(rop, opname.upper())
  2453. genop_guard_list[num] = value
  2454. elif name.startswith('genop_llong_'):
  2455. opname = name[len('genop_llong_'):]
  2456. num = getattr(EffectInfo, 'OS_LLONG_' + opname.upper())
  2457. genop_llong_list[num] = value
  2458. elif name.startswith('genop_math_'):
  2459. opname = name[len('genop_math_'):]
  2460. num = getattr(EffectInfo, 'OS_MATH_' + opname.upper())
  2461. genop_math_list[num] = value
  2462. elif name.startswith('genop_'):
  2463. opname = name[len('genop_'):]
  2464. num = getattr(rop, opname.upper())
  2465. genop_list[num] = value
  2466. # XXX: ri386 migration shims:
  2467. def addr_add(reg_or_imm1, reg_or_imm2, offset=0, scale=0):
  2468. return AddressLoc(reg_or_imm1, reg_or_imm2, scale, offset)
  2469. def addr_add_const(reg_or_imm1, offset):
  2470. return AddressLoc(reg_or_imm1, imm0, 0, offset)
  2471. def mem(loc, offset):
  2472. return AddressLoc(loc, imm0, 0, offset)
  2473. def heap(addr):
  2474. return AddressLoc(ImmedLoc(addr), imm0, 0, 0)
  2475. def not_implemented(msg):
  2476. os.write(2, '[x86/asm] %s\n' % msg)
  2477. raise NotImplementedError(msg)
  2478. class BridgeAlreadyCompiled(Exception):
  2479. pass