PageRenderTime 53ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 1ms

/rpython/jit/backend/zarch/assembler.py

https://bitbucket.org/pypy/pypy/
Python | 1554 lines | 1088 code | 189 blank | 277 comment | 170 complexity | 6f4d845b2acc86670133beba16624c8d MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler,
  2. DEBUG_COUNTER)
  3. from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
  4. from rpython.jit.backend.llsupport import jitframe, rewrite
  5. from rpython.jit.backend.model import CompiledLoopToken
  6. from rpython.jit.backend.zarch import conditions as c
  7. from rpython.jit.backend.zarch import registers as r
  8. from rpython.jit.backend.zarch import locations as l
  9. from rpython.jit.backend.zarch.pool import LiteralPool
  10. from rpython.rtyper.lltypesystem.lloperation import llop
  11. from rpython.jit.backend.zarch.codebuilder import (InstrBuilder,
  12. OverwritingBuilder)
  13. from rpython.jit.backend.zarch.helper.regalloc import check_imm_value
  14. from rpython.jit.backend.zarch.registers import JITFRAME_FIXED_SIZE
  15. from rpython.jit.backend.zarch.regalloc import ZARCHRegisterManager
  16. from rpython.jit.backend.zarch.arch import (WORD,
  17. STD_FRAME_SIZE_IN_BYTES, THREADLOCAL_ADDR_OFFSET,
  18. RECOVERY_GCMAP_POOL_OFFSET, RECOVERY_TARGET_POOL_OFFSET,
  19. JUMPABS_TARGET_ADDR__POOL_OFFSET, JUMPABS_POOL_ADDR_POOL_OFFSET,
  20. THREADLOCAL_ON_ENTER_JIT, JIT_ENTER_EXTRA_STACK_SPACE)
  21. from rpython.jit.backend.zarch.opassembler import OpAssembler
  22. from rpython.jit.backend.zarch.regalloc import Regalloc
  23. from rpython.jit.codewriter.effectinfo import EffectInfo
  24. from rpython.jit.metainterp.resoperation import rop
  25. from rpython.rlib.debug import (debug_print, debug_start, debug_stop,
  26. have_debug_prints)
  27. from rpython.jit.metainterp.history import (INT, REF, FLOAT, TargetToken)
  28. from rpython.rlib.rarithmetic import r_uint
  29. from rpython.rlib.objectmodel import we_are_translated, specialize, compute_unique_id
  30. from rpython.rlib import rgc
  31. from rpython.rlib.longlong2float import float2longlong
  32. from rpython.rtyper.lltypesystem import lltype, rffi, llmemory
  33. from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
  34. from rpython.rlib.jit import AsmInfo
  35. from rpython.rlib.rjitlog import rjitlog as jl
  36. class JitFrameTooDeep(Exception):
  37. pass
  38. class AssemblerZARCH(BaseAssembler, OpAssembler):
  39. def __init__(self, cpu, translate_support_code=False):
  40. BaseAssembler.__init__(self, cpu, translate_support_code)
  41. self.mc = None
  42. self.current_clt = None
  43. self._regalloc = None
  44. self.datablockwrapper = None
  45. self.propagate_exception_path = 0
  46. self.stack_check_slowpath = 0
  47. self.loop_run_counters = []
  48. self.gcrootmap_retaddr_forced = 0
  49. self.failure_recovery_code = [0, 0, 0, 0]
  50. self.wb_slowpath = [0,0,0,0,0]
  51. self.pool = None
  52. def setup(self, looptoken):
  53. BaseAssembler.setup(self, looptoken)
  54. assert self.memcpy_addr != 0, 'setup_once() not called?'
  55. if we_are_translated():
  56. self.debug = False
  57. self.current_clt = looptoken.compiled_loop_token
  58. self.pool = LiteralPool()
  59. self.mc = InstrBuilder(None)
  60. self.pending_guard_tokens = []
  61. self.pending_guard_tokens_recovered = 0
  62. #assert self.datablockwrapper is None --- but obscure case
  63. # possible, e.g. getting MemoryError and continuing
  64. allblocks = self.get_asmmemmgr_blocks(looptoken)
  65. self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
  66. allblocks)
  67. self.mc.datablockwrapper = self.datablockwrapper
  68. self.target_tokens_currently_compiling = {}
  69. self.frame_depth_to_patch = []
  70. def teardown(self):
  71. self.pending_guard_tokens = None
  72. self.current_clt = None
  73. self._regalloc = None
  74. self.mc = None
  75. self.pool = None
  76. def target_arglocs(self, looptoken):
  77. return looptoken._zarch_arglocs
  78. def get_asmmemmgr_blocks(self, looptoken):
  79. clt = looptoken.compiled_loop_token
  80. if clt.asmmemmgr_blocks is None:
  81. clt.asmmemmgr_blocks = []
  82. return clt.asmmemmgr_blocks
  83. def jmpto(self, register):
  84. # unconditional jump
  85. self.mc.BCR_rr(0xf, register.value)
  86. def _build_failure_recovery(self, exc, withfloats=False):
  87. mc = InstrBuilder(None)
  88. self.mc = mc
  89. # fill in the jf_descr and jf_gcmap fields of the frame according
  90. # to which failure we are resuming from. These are set before
  91. # this function is called (see generate_quick_failure()).
  92. ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
  93. ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  94. self.mc.STG(r.SCRATCH2, l.addr(ofs2, r.SPP))
  95. self.mc.STG(r.SCRATCH, l.addr(ofs, r.SPP))
  96. self._push_core_regs_to_jitframe(mc)
  97. if withfloats:
  98. self._push_fp_regs_to_jitframe(mc)
  99. if exc:
  100. # We might have an exception pending.
  101. mc.load_imm(r.SCRATCH, self.cpu.pos_exc_value())
  102. # Copy it into 'jf_guard_exc'
  103. offset = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
  104. mc.LG(r.SCRATCH2, l.addr(0, r.SCRATCH))
  105. mc.STG(r.SCRATCH2, l.addr(offset, r.SPP))
  106. # Zero out the exception fields
  107. diff = self.cpu.pos_exception() - self.cpu.pos_exc_value()
  108. assert check_imm_value(diff)
  109. mc.LGHI(r.SCRATCH2, l.imm(0))
  110. mc.STG(r.SCRATCH2, l.addr(0, r.SCRATCH))
  111. mc.STG(r.SCRATCH2, l.addr(diff, r.SCRATCH))
  112. # now we return from the complete frame, which starts from
  113. # _call_header_with_stack_check(). The _call_footer below does it.
  114. self._call_footer()
  115. rawstart = mc.materialize(self.cpu, [])
  116. self.failure_recovery_code[exc + 2 * withfloats] = rawstart
  117. self.mc = None
  118. def generate_quick_failure(self, guardtok):
  119. startpos = self.mc.currpos()
  120. faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
  121. assert target != 0
  122. self.load_gcref_into(r.SCRATCH, faildescrindex)
  123. self.load_gcmap(self.mc, r.SCRATCH2, gcmap=guardtok.gcmap)
  124. self.mc.load_imm(r.r14, target)
  125. self.mc.BCR(c.ANY, r.r14)
  126. return startpos
  127. def load_gcref_into(self, register, index):
  128. topoff = index * WORD
  129. size = self.pool.gcref_table_size
  130. self.mc.LG(register, l.addr(-size + topoff, r.POOL))
  131. def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
  132. descr = self.cpu.gc_ll_descr.write_barrier_descr
  133. if descr is None:
  134. return
  135. if not withcards:
  136. func = descr.get_write_barrier_fn(self.cpu)
  137. else:
  138. if descr.jit_wb_cards_set == 0:
  139. return
  140. func = descr.get_write_barrier_from_array_fn(self.cpu)
  141. if func == 0:
  142. return
  143. #
  144. # This builds a helper function called from the slow path of
  145. # write barriers. It must save all registers, and optionally
  146. # all fp registers. It takes its single argument in r0
  147. # (or in SPP if 'for_frame').
  148. if for_frame:
  149. argument_loc = r.SPP
  150. else:
  151. argument_loc = r.r0
  152. mc = InstrBuilder()
  153. old_mc = self.mc
  154. self.mc = mc
  155. # save the information
  156. mc.store_link()
  157. mc.push_std_frame()
  158. RCS2 = r.r10
  159. RCS3 = r.r11
  160. # r9,r10,r11,r2,f0 -> makes exactly 4 words + 8 byte
  161. extra_stack_size = 4 * WORD + 8
  162. if for_frame:
  163. # NOTE: don't save registers on the jitframe here! It might
  164. # override already-saved values that will be restored
  165. # later...
  166. #
  167. # This 'for_frame' version is called after a CALL. It does not
  168. # need to save many registers: the registers that are anyway
  169. # destroyed by the call can be ignored (VOLATILES), and the
  170. # non-volatile registers won't be changed here. It only needs
  171. # to save r2 and f0 (possible results of the call),
  172. # and two more non-volatile registers (used to store
  173. # the RPython exception that occurred in the CALL, if any).
  174. #
  175. off = STD_FRAME_SIZE_IN_BYTES
  176. mc.LG(r.SCRATCH, l.addr(0, r.SP))
  177. mc.STG(r.SCRATCH, l.addr(-extra_stack_size, r.SP))
  178. mc.LAY(r.SP, l.addr(-extra_stack_size, r.SP))
  179. mc.STMG(r.r9, r.r11, l.addr(off, r.SP))
  180. mc.STG(r.r2, l.addr(off+3*WORD, r.SP))
  181. # OK to use STD, because offset is not negative
  182. mc.STD(r.f0, l.addr(off+4*WORD, r.SP))
  183. saved_regs = None
  184. saved_fp_regs = None
  185. else:
  186. # push all volatile registers, sometimes push RCS2
  187. if withcards:
  188. saved_regs = r.VOLATILES + [RCS2]
  189. else:
  190. saved_regs = r.VOLATILES
  191. if withfloats:
  192. saved_fp_regs = r.MANAGED_FP_REGS
  193. else:
  194. saved_fp_regs = []
  195. self._push_core_regs_to_jitframe(mc, saved_regs)
  196. self._push_fp_regs_to_jitframe(mc, saved_fp_regs)
  197. if for_frame:
  198. # note that it's safe to store the exception in register,
  199. # since the call to write barrier can't collect
  200. # (and this is assumed a bit left and right here, like lack
  201. # of _reload_frame_if_necessary)
  202. # This trashes r0 and r1, which is fine in this case
  203. assert argument_loc is not r.r0
  204. assert argument_loc is not r.r1
  205. self._store_and_reset_exception(mc, RCS2, RCS3)
  206. if withcards:
  207. mc.LGR(RCS2, argument_loc)
  208. func = rffi.cast(lltype.Signed, func)
  209. # Note: if not 'for_frame', argument_loc is r0, which must carefully
  210. # not be overwritten above
  211. mc.load_imm(mc.RAW_CALL_REG, func)
  212. mc.LGR(r.r2, argument_loc)
  213. mc.raw_call()
  214. if for_frame:
  215. self._restore_exception(mc, RCS2, RCS3)
  216. if withcards:
  217. # A final NILL before the return to the caller. Careful to
  218. # not follow this instruction with another one that changes
  219. # the status of the condition code
  220. card_marking_mask = descr.jit_wb_cards_set_singlebyte
  221. mc.LLGC(RCS2, l.addr(descr.jit_wb_if_flag_byteofs, RCS2))
  222. mc.NILL(RCS2, l.imm(card_marking_mask & 0xFF))
  223. if for_frame:
  224. off = STD_FRAME_SIZE_IN_BYTES
  225. mc.LMG(r.r9, r.r11, l.addr(off, r.SP))
  226. mc.LG(r.r2, l.addr(off+3*WORD, r.SP))
  227. mc.LD(r.f0, l.addr(off+4*WORD, r.SP))
  228. mc.LAY(r.SP, l.addr(extra_stack_size, r.SP))
  229. else:
  230. self._pop_core_regs_from_jitframe(mc, saved_regs)
  231. self._pop_fp_regs_from_jitframe(mc, saved_fp_regs)
  232. mc.restore_link()
  233. mc.BCR(c.ANY, r.RETURN)
  234. self.mc = old_mc
  235. rawstart = mc.materialize(self.cpu, [])
  236. if for_frame:
  237. self.wb_slowpath[4] = rawstart
  238. else:
  239. self.wb_slowpath[withcards + 2 * withfloats] = rawstart
  240. def _store_and_reset_exception(self, mc, excvalloc, exctploc=None):
  241. """Reset the exception, after fetching it inside the two regs.
  242. """
  243. mc.load_imm(r.SCRATCH, self.cpu.pos_exc_value())
  244. diff = self.cpu.pos_exception() - self.cpu.pos_exc_value()
  245. assert check_imm_value(diff)
  246. # Load the exception fields into the two registers
  247. mc.LG(excvalloc, l.addr(0,r.SCRATCH))
  248. if exctploc is not None:
  249. mc.LG(exctploc, l.addr(diff, r.SCRATCH))
  250. # Zero out the exception fields
  251. mc.XGR(r.SCRATCH2, r.SCRATCH2)
  252. mc.STG(r.SCRATCH2, l.addr(0, r.SCRATCH))
  253. mc.STG(r.SCRATCH2, l.addr(diff, r.SCRATCH))
  254. def _restore_exception(self, mc, excvalloc, exctploc):
  255. mc.load_imm(r.SCRATCH, self.cpu.pos_exc_value())
  256. diff = self.cpu.pos_exception() - self.cpu.pos_exc_value()
  257. assert check_imm_value(diff)
  258. # Store the exception fields from the two registers
  259. mc.STG(excvalloc, l.addr(0, r.SCRATCH))
  260. mc.STG(exctploc, l.addr(diff, r.SCRATCH))
  261. def build_frame_realloc_slowpath(self):
  262. # this code should do the following steps
  263. # a) store all registers in the jitframe
  264. # b) fish for the arguments passed by the caller
  265. # c) store the gcmap in the jitframe
  266. # d) call realloc_frame
  267. # e) set the fp to point to the new jitframe
  268. # f) store the address of the new jitframe in the shadowstack
  269. # c) set the gcmap field to 0 in the new jitframe
  270. # g) restore registers and return
  271. mc = InstrBuilder()
  272. self.mc = mc
  273. mc.store_link()
  274. mc.push_std_frame()
  275. # signature of this _frame_realloc_slowpath function:
  276. # * on entry, r0 is the new size
  277. # * no managed register must be modified
  278. # caller already did push_gcmap(store=True)
  279. self._push_core_regs_to_jitframe(mc, r.MANAGED_REGS)
  280. self._push_fp_regs_to_jitframe(mc)
  281. # First argument is SPP, which is the jitframe
  282. mc.LGR(r.r2, r.SPP)
  283. # no need to move second argument (frame_depth),
  284. # it is already in register r3!
  285. mc.LGR(r.r3, r.SCRATCH2)
  286. RCS2 = r.r10
  287. RCS3 = r.r11
  288. self._store_and_reset_exception(mc, RCS2, RCS3)
  289. # Do the call
  290. adr = rffi.cast(lltype.Signed, self.cpu.realloc_frame)
  291. mc.load_imm(mc.RAW_CALL_REG, adr)
  292. mc.raw_call()
  293. # The result is stored back into SPP (= r31)
  294. mc.LGR(r.SPP, r.r2)
  295. self._restore_exception(mc, RCS2, RCS3)
  296. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  297. if gcrootmap and gcrootmap.is_shadow_stack:
  298. diff = mc.load_imm(r.r5, gcrootmap.get_root_stack_top_addr())
  299. mc.load(r.r5, r.r5, 0)
  300. mc.store(r.r2, r.r5, -WORD)
  301. self.pop_gcmap(mc) # cancel the push_gcmap(store=True) in the caller
  302. self._pop_core_regs_from_jitframe(mc, r.MANAGED_REGS)
  303. self._pop_fp_regs_from_jitframe(mc)
  304. mc.restore_link()
  305. mc.BCR(c.ANY, r.RETURN)
  306. self._frame_realloc_slowpath = mc.materialize(self.cpu, [])
  307. self.mc = None
  308. def _build_propagate_exception_path(self):
  309. self.mc = InstrBuilder()
  310. #
  311. # read and reset the current exception
  312. propagate_exception_descr = rffi.cast(lltype.Signed,
  313. cast_instance_to_gcref(self.cpu.propagate_exception_descr))
  314. ofs3 = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
  315. ofs4 = self.cpu.get_ofs_of_frame_field('jf_descr')
  316. self._store_and_reset_exception(self.mc, r.r2)
  317. self.mc.load_imm(r.r3, propagate_exception_descr)
  318. self.mc.STG(r.r2, l.addr(ofs3, r.SPP))
  319. self.mc.STG(r.r3, l.addr(ofs4, r.SPP))
  320. #
  321. self._call_footer()
  322. rawstart = self.mc.materialize(self.cpu, [])
  323. self.propagate_exception_path = rawstart
  324. self.mc = None
  325. def _build_cond_call_slowpath(self, supports_floats, callee_only):
  326. """ This builds a general call slowpath, for whatever call happens to
  327. come.
  328. """
  329. # signature of these cond_call_slowpath functions:
  330. # * on entry, r11 contains the function to call
  331. # * r2, r3, r4, r5 contain arguments for the call
  332. # * gcmap is pushed
  333. # * the old value of these regs must already be stored in the jitframe
  334. # * on exit, all registers are restored from the jitframe
  335. mc = InstrBuilder()
  336. self.mc = mc
  337. mc.store_link()
  338. mc.push_std_frame()
  339. # copy registers to the frame, with the exception of r2 to r5 and r11,
  340. # because these have already been saved by the caller. Note that
  341. # this is not symmetrical: these 5 registers are saved by the caller
  342. # but restored here at the end of this function.
  343. if callee_only:
  344. saved_regs = ZARCHRegisterManager.save_around_call_regs
  345. else:
  346. saved_regs = ZARCHRegisterManager.all_regs
  347. regs = [reg for reg in saved_regs
  348. if reg is not r.r2 and
  349. reg is not r.r3 and
  350. reg is not r.r4 and
  351. reg is not r.r5 and
  352. reg is not r.r11]
  353. # the caller already did push_gcmap(store=True)
  354. self._push_core_regs_to_jitframe(mc, regs)
  355. if supports_floats:
  356. self._push_fp_regs_to_jitframe(mc)
  357. mc.raw_call(r.r11)
  358. # Finish
  359. self._reload_frame_if_necessary(mc)
  360. self.pop_gcmap(mc) # cancel the push_gcmap(store=True) in the caller
  361. self._pop_core_regs_from_jitframe(mc, saved_regs)
  362. if supports_floats:
  363. self._pop_fp_regs_from_jitframe(mc)
  364. mc.restore_link()
  365. mc.BCR(c.ANY, r.RETURN)
  366. self.mc = None
  367. return mc.materialize(self.cpu, [])
  368. def _build_malloc_slowpath(self, kind):
  369. """ While arriving on slowpath, we have a gcmap in r1.
  370. The arguments are passed in r.RES and r.RSZ, as follows:
  371. kind == 'fixed': nursery_head in r.RES and the size in r.RSZ - r.RES.
  372. kind == 'str/unicode': length of the string to allocate in r.RES.
  373. kind == 'var': itemsize in r.RES, length to allocate in r.RSZ,
  374. and tid in r.r0.
  375. This function must preserve all registers apart from r.RES and r.RSZ.
  376. On return, SCRATCH must contain the address of nursery_free.
  377. """
  378. assert kind in ['fixed', 'str', 'unicode', 'var']
  379. mc = InstrBuilder()
  380. self.mc = mc
  381. # alloc a frame for the callee
  382. mc.store_link()
  383. mc.push_std_frame()
  384. #
  385. saved_regs = [reg for reg in r.MANAGED_REGS
  386. if reg is not r.RES and reg is not r.RSZ]
  387. self._push_core_regs_to_jitframe(mc, saved_regs)
  388. self._push_fp_regs_to_jitframe(mc)
  389. # the caller already did push_gcmap(store=True)
  390. #
  391. if kind == 'fixed':
  392. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
  393. elif kind == 'str':
  394. addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_str')
  395. elif kind == 'unicode':
  396. addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_unicode')
  397. else:
  398. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_array_addr()
  399. if kind == 'fixed':
  400. # compute the size we want
  401. mc.SGRK(r.r2, r.RSZ, r.RES)
  402. if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
  403. # for tests only
  404. mc.LGR(r.r3, r.SPP)
  405. elif kind == 'str' or kind == 'unicode':
  406. pass # length is already in r2
  407. else:
  408. # arguments to the called function are [itemsize, tid, length]
  409. # itemsize is already in r2
  410. mc.LGR(r.r4, r.RSZ) # length
  411. mc.LGR(r.r3, r.r0) # tid
  412. # Do the call
  413. addr = rffi.cast(lltype.Signed, addr)
  414. mc.load_imm(mc.RAW_CALL_REG, addr)
  415. mc.raw_call()
  416. self._reload_frame_if_necessary(mc)
  417. # Check that we don't get NULL; if we do, we always interrupt the
  418. # current loop, as a "good enough" approximation (same as
  419. # emit_call_malloc_gc()).
  420. self.propagate_memoryerror_if_reg_is_null(r.r2, True)
  421. self._pop_core_regs_from_jitframe(mc, saved_regs)
  422. self._pop_fp_regs_from_jitframe(mc)
  423. nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
  424. self.mc.load_imm(r.r1, nursery_free_adr)
  425. # r.r1 is now the address of nursery_free
  426. # r.RES is still the result of the call done above
  427. # r.RSZ is loaded from [r1], to make the caller's store a no-op here
  428. mc.load(r.RSZ, r.r1, 0)
  429. #
  430. mc.restore_link()
  431. mc.BCR(c.ANY, r.r14)
  432. self.mc = None
  433. return mc.materialize(self.cpu, [])
  434. def _build_stack_check_slowpath(self):
  435. _, _, slowpathaddr = self.cpu.insert_stack_check()
  436. if slowpathaddr == 0 or not self.cpu.propagate_exception_descr:
  437. return # no stack check (for tests, or non-translated)
  438. #
  439. # make a regular function that is called from a point near the start
  440. # of an assembler function (after it adjusts the stack and saves
  441. # registers).
  442. mc = InstrBuilder()
  443. #
  444. # store the link backwards
  445. mc.store_link()
  446. mc.push_std_frame()
  447. mc.LGR(r.r2, r.SP)
  448. mc.load_imm(mc.RAW_CALL_REG, slowpathaddr)
  449. mc.raw_call()
  450. #
  451. # Check if it raised StackOverflow
  452. mc.load_imm(r.SCRATCH, self.cpu.pos_exception())
  453. mc.LG(r.SCRATCH, l.addr(0, r.SCRATCH))
  454. # if this comparison is true, then everything is ok,
  455. # else we have an exception
  456. mc.cmp_op(r.SCRATCH, l.imm(0), imm=True)
  457. #
  458. mc.restore_link()
  459. # So we return to our caller, conditionally if "EQ"
  460. mc.BCR(c.EQ, r.r14)
  461. #
  462. # Else, jump to propagate_exception_path
  463. assert self.propagate_exception_path
  464. mc.branch_absolute(self.propagate_exception_path)
  465. #
  466. rawstart = mc.materialize(self.cpu, [])
  467. self.stack_check_slowpath = rawstart
  468. def new_stack_loc(self, i, tp):
  469. base_ofs = self.cpu.get_baseofs_of_frame_field()
  470. loc = l.StackLocation(i, l.get_fp_offset(base_ofs, i), tp)
  471. return loc
  472. def _call_header_with_stack_check(self):
  473. self._call_header()
  474. if self.stack_check_slowpath == 0:
  475. pass # not translated
  476. else:
  477. endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
  478. diff = lengthaddr - endaddr
  479. assert check_imm_value(diff)
  480. mc = self.mc
  481. mc.load_imm(r.r1, endaddr)
  482. mc.load(r.r0, r.r1, 0) # ld r0, [end]
  483. mc.load(r.r1, r.r1, diff) # ld r1, [length]
  484. mc.SGRK(r.r0, r.SP, r.r0)
  485. jmp_pos = self.mc.get_relative_pos()
  486. mc.reserve_cond_jump()
  487. mc.load_imm(r.r14, self.stack_check_slowpath)
  488. mc.BASR(r.r14, r.r14)
  489. currpos = self.mc.currpos()
  490. pmc = OverwritingBuilder(self.mc, jmp_pos, 1)
  491. pmc.CLGRJ(r.r0, r.r1, c.LE, l.imm(currpos - jmp_pos))
  492. pmc.overwrite()
  493. def _check_frame_depth(self, mc, gcmap):
  494. """ check if the frame is of enough depth to follow this bridge.
  495. Otherwise reallocate the frame in a helper.
  496. """
  497. descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
  498. ofs = self.cpu.unpack_fielddescr(descrs.arraydescr.lendescr)
  499. mc.LG(r.r1, l.addr(ofs, r.SPP))
  500. patch_pos = mc.currpos()
  501. # placeholder for the following instructions
  502. # CGFI r1, ... (6 bytes)
  503. # BRC c, ... (4 bytes)
  504. # LGHI r0, ... (4 bytes)
  505. # sum -> (14 bytes)
  506. mc.write('\x00'*14)
  507. mc.load_imm(r.RETURN, self._frame_realloc_slowpath)
  508. self.push_gcmap(mc, gcmap, store=True)
  509. mc.raw_call()
  510. self.frame_depth_to_patch.append((patch_pos, mc.currpos()))
  511. def patch_stack_checks(self, frame_depth):
  512. if frame_depth > 0x7fff:
  513. raise JitFrameTooDeep
  514. for traps_pos, jmp_target in self.frame_depth_to_patch:
  515. pmc = OverwritingBuilder(self.mc, traps_pos, 3)
  516. # patch 3 instructions as shown above
  517. pmc.CGFI(r.r1, l.imm(frame_depth))
  518. pmc.BRC(c.GE, l.imm(jmp_target - (traps_pos + 6)))
  519. pmc.LGHI(r.r0, l.imm(frame_depth))
  520. pmc.overwrite()
  521. @rgc.no_release_gil
  522. def assemble_loop(self, jd_id, unique_id, logger, loopname, inputargs,
  523. operations, looptoken, log):
  524. clt = CompiledLoopToken(self.cpu, looptoken.number)
  525. looptoken.compiled_loop_token = clt
  526. clt._debug_nbargs = len(inputargs)
  527. if not we_are_translated():
  528. # Arguments should be unique
  529. assert len(set(inputargs)) == len(inputargs)
  530. self.setup(looptoken)
  531. frame_info = self.datablockwrapper.malloc_aligned(
  532. jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
  533. clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
  534. clt.frame_info.clear() # for now
  535. if log:
  536. operations = self._inject_debugging_code(looptoken, operations,
  537. 'e', looptoken.number)
  538. regalloc = Regalloc(assembler=self)
  539. #
  540. allgcrefs = []
  541. operations = regalloc.prepare_loop(inputargs, operations,
  542. looptoken, allgcrefs)
  543. # reserve_gcref_table is handled in pool
  544. self.pool.pre_assemble(self, operations, allgcrefs)
  545. functionpos = self.mc.get_relative_pos()
  546. self._call_header_with_stack_check()
  547. looppos = self.mc.get_relative_pos()
  548. frame_depth_no_fixed_size = self._assemble(regalloc, inputargs,
  549. operations)
  550. self.update_frame_depth(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
  551. #
  552. size_excluding_failure_stuff = self.mc.get_relative_pos()
  553. #
  554. self.write_pending_failure_recoveries()
  555. full_size = self.mc.get_relative_pos()
  556. #
  557. self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
  558. #
  559. if not we_are_translated():
  560. self.mc.trap() # should be never reached
  561. rawstart = self.materialize_loop(looptoken)
  562. self.patch_gcref_table(looptoken, rawstart)
  563. looptoken._ll_function_addr = rawstart + functionpos
  564. #
  565. looptoken._ll_loop_code = looppos + rawstart
  566. debug_start("jit-backend-addr")
  567. debug_print("Loop %d (%s) has address 0x%x to 0x%x (bootstrap 0x%x)" % (
  568. looptoken.number, loopname,
  569. r_uint(rawstart + looppos),
  570. r_uint(rawstart + size_excluding_failure_stuff),
  571. r_uint(rawstart + functionpos)))
  572. debug_print(" gc table: 0x%x" % r_uint(self.gc_table_addr))
  573. debug_print(" function: 0x%x" % r_uint(rawstart + functionpos))
  574. debug_print(" resops: 0x%x" % r_uint(rawstart + looppos))
  575. debug_print(" failures: 0x%x" % r_uint(rawstart +
  576. size_excluding_failure_stuff))
  577. debug_print(" end: 0x%x" % r_uint(rawstart + full_size))
  578. debug_stop("jit-backend-addr")
  579. #
  580. self.patch_pending_failure_recoveries(rawstart)
  581. #
  582. ops_offset = self.mc.ops_offset
  583. if not we_are_translated():
  584. # used only by looptoken.dump() -- useful in tests
  585. looptoken._zarch_rawstart = rawstart
  586. looptoken._zarch_fullsize = full_size
  587. looptoken._zarch_ops_offset = ops_offset
  588. if logger:
  589. log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
  590. log.write(inputargs, operations, ops_offset=ops_offset)
  591. # legacy
  592. if logger.logger_ops:
  593. logger.logger_ops.log_loop(inputargs, operations, 0,
  594. "rewritten", name=loopname,
  595. ops_offset=ops_offset)
  596. self.fixup_target_tokens(rawstart)
  597. self.teardown()
  598. # oprofile support
  599. #if self.cpu.profile_agent is not None:
  600. # name = "Loop # %s: %s" % (looptoken.number, loopname)
  601. # self.cpu.profile_agent.native_code_written(name,
  602. # rawstart, full_size)
  603. #print(hex(rawstart+looppos))
  604. #import pdb; pdb.set_trace()
  605. return AsmInfo(ops_offset, rawstart + looppos,
  606. size_excluding_failure_stuff - looppos, rawstart)
  607. @rgc.no_release_gil
  608. def assemble_bridge(self, faildescr, inputargs, operations,
  609. original_loop_token, log, logger):
  610. if not we_are_translated():
  611. # Arguments should be unique
  612. assert len(set(inputargs)) == len(inputargs)
  613. self.setup(original_loop_token)
  614. descr_number = compute_unique_id(faildescr)
  615. if log:
  616. operations = self._inject_debugging_code(faildescr, operations,
  617. 'b', descr_number)
  618. arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
  619. regalloc = Regalloc(assembler=self)
  620. allgcrefs = []
  621. operations = regalloc.prepare_bridge(inputargs, arglocs,
  622. operations, allgcrefs,
  623. self.current_clt.frame_info)
  624. # reserve gcref table is handled in pre_assemble
  625. self.pool.pre_assemble(self, operations, allgcrefs, bridge=True)
  626. startpos = self.mc.get_relative_pos()
  627. self._check_frame_depth(self.mc, regalloc.get_gcmap())
  628. bridgestartpos = self.mc.get_relative_pos()
  629. self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - bridgestartpos))
  630. frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations)
  631. codeendpos = self.mc.get_relative_pos()
  632. #self.pool.post_assemble(self)
  633. self.write_pending_failure_recoveries()
  634. fullsize = self.mc.get_relative_pos()
  635. #
  636. self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
  637. #
  638. rawstart = self.materialize_loop(original_loop_token)
  639. self.patch_gcref_table(original_loop_token, rawstart)
  640. debug_start("jit-backend-addr")
  641. debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
  642. (r_uint(descr_number), r_uint(rawstart + startpos),
  643. r_uint(rawstart + codeendpos)))
  644. debug_print(" gc table: 0x%x" % r_uint(self.gc_table_addr))
  645. debug_print(" jump target: 0x%x" % r_uint(rawstart + startpos))
  646. debug_print(" resops: 0x%x" % r_uint(rawstart + bridgestartpos))
  647. debug_print(" failures: 0x%x" % r_uint(rawstart + codeendpos))
  648. debug_print(" end: 0x%x" % r_uint(rawstart + fullsize))
  649. debug_stop("jit-backend-addr")
  650. self.patch_pending_failure_recoveries(rawstart)
  651. # patch the jump from original guard
  652. self.patch_jump_for_descr(faildescr, rawstart + startpos)
  653. ops_offset = self.mc.ops_offset
  654. frame_depth = max(self.current_clt.frame_info.jfi_frame_depth,
  655. frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
  656. if logger:
  657. log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
  658. log.write(inputargs, operations, ops_offset)
  659. # log that the already written bridge is stitched to a descr!
  660. logger.log_patch_guard(descr_number, rawstart)
  661. # legacy
  662. if logger.logger_ops:
  663. logger.logger_ops.log_bridge(inputargs, operations, "rewritten",
  664. faildescr, ops_offset=ops_offset)
  665. self.fixup_target_tokens(rawstart)
  666. self.update_frame_depth(frame_depth)
  667. self.teardown()
  668. return AsmInfo(ops_offset, rawstart + startpos, codeendpos - startpos,
  669. rawstart + bridgestartpos)
  670. def patch_gcref_table(self, looptoken, rawstart):
  671. self.gc_table_addr = rawstart
  672. tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
  673. self._allgcrefs)
  674. gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
  675. gcreftracers.append(tracer) # keepalive
  676. self.teardown_gcrefs_list()
  677. def get_asmmemmgr_gcreftracers(self, looptoken):
  678. clt = looptoken.compiled_loop_token
  679. if clt.asmmemmgr_gcreftracers is None:
  680. clt.asmmemmgr_gcreftracers = []
  681. return clt.asmmemmgr_gcreftracers
  682. def patch_jump_for_descr(self, faildescr, adr_new_target):
  683. # 'faildescr.adr_jump_offset' is the address of an instruction that is a
  684. # conditional jump. We must patch this conditional jump to go
  685. # to 'adr_new_target'.
  686. mc = InstrBuilder()
  687. mc.b_abs(adr_new_target)
  688. mc.copy_to_raw_memory(faildescr.adr_jump_offset)
  689. assert faildescr.adr_jump_offset != 0
  690. faildescr.adr_jump_offset = 0 # means "patched"
  691. def fixup_target_tokens(self, rawstart):
  692. for targettoken in self.target_tokens_currently_compiling:
  693. assert isinstance(targettoken, TargetToken)
  694. targettoken._ll_loop_code += rawstart
  695. self.target_tokens_currently_compiling = None
  696. def flush_cc(self, condition, result_loc):
  697. # After emitting an instruction that leaves a boolean result in
  698. # a condition code (cc), call this. In the common case, result_loc
  699. # will be set to 'fp' by the regalloc, which in this case means
  700. # "propagate it between this operation and the next guard by keeping
  701. # it in the cc". In the uncommon case, result_loc is another
  702. # register, and we emit a load from the cc into this register.
  703. assert self.guard_success_cc == c.cond_none
  704. if result_loc is r.SPP:
  705. self.guard_success_cc = condition
  706. else:
  707. # sadly we cannot use LOCGHI
  708. # it is included in some extension that seem to be NOT installed
  709. # by default.
  710. self.mc.LGHI(result_loc, l.imm(1))
  711. off = self.mc.XGR_byte_count + self.mc.BRC_byte_count
  712. self.mc.BRC(condition, l.imm(off)) # branch over XGR
  713. self.mc.XGR(result_loc, result_loc)
  714. def propagate_memoryerror_if_reg_is_null(self, reg, pop_one_stackframe=False):
  715. # if self.propagate_exception_path == 0 (tests), this may jump to 0
  716. # and segfaults. too bad. the alternative is to continue anyway
  717. # with reg==0, but that will segfault too.
  718. jmp_pos = self.mc.get_relative_pos()
  719. # bails to propagate exception path if reg != 0
  720. self.mc.reserve_cond_jump()
  721. self.mc.load_imm(r.RETURN, self.propagate_exception_path)
  722. if pop_one_stackframe:
  723. self.mc.LAY(r.SP, l.addr(STD_FRAME_SIZE_IN_BYTES, r.SP))
  724. self.mc.BCR(c.ANY, r.RETURN)
  725. curpos = self.mc.currpos()
  726. pmc = OverwritingBuilder(self.mc, jmp_pos, 1)
  727. pmc.CGIJ(reg, l.imm(0), c.NE, l.imm(curpos - jmp_pos))
  728. pmc.overwrite()
  729. def regalloc_push(self, loc, already_pushed):
  730. """Pushes the value stored in loc to the stack
  731. Can trash the current value of SCRATCH when pushing a stack
  732. loc"""
  733. index = WORD * (~already_pushed)
  734. if loc.type == FLOAT:
  735. if not loc.is_fp_reg():
  736. self.regalloc_mov(loc, r.FP_SCRATCH)
  737. loc = r.FP_SCRATCH
  738. self.mc.STDY(loc, l.addr(index, r.SP))
  739. else:
  740. if not loc.is_core_reg():
  741. self.regalloc_mov(loc, r.SCRATCH)
  742. loc = r.SCRATCH
  743. self.mc.STG(loc, l.addr(index, r.SP))
  744. def regalloc_pop(self, loc, already_pushed):
  745. """Pops the value on top of the stack to loc. Can trash the current
  746. value of SCRATCH when popping to a stack loc"""
  747. index = WORD * (~already_pushed)
  748. if loc.type == FLOAT:
  749. if loc.is_fp_reg():
  750. self.mc.LDY(loc, l.addr(index, r.SP))
  751. else:
  752. self.mc.LDY(r.FP_SCRATCH, l.addr(index, r.SP))
  753. self.regalloc_mov(r.FP_SCRATCH, loc)
  754. else:
  755. if loc.is_core_reg():
  756. self.mc.LG(loc, l.addr(index, r.SP))
  757. else:
  758. self.mc.LG(r.SCRATCH, l.addr(index, r.SP))
  759. self.regalloc_mov(r.SCRATCH, loc)
  760. def regalloc_prepare_move(self, src, dst, tmp):
  761. if dst.is_stack() and src.is_stack():
  762. self.regalloc_mov(src, tmp)
  763. return tmp
  764. if dst.is_stack() and src.is_in_pool():
  765. self.regalloc_mov(src, tmp)
  766. return tmp
  767. return src
  768. def push_gcmap(self, mc, gcmap, store=True):
  769. # (called from callbuilder.py and ../llsupport/callbuilder.py)
  770. assert store is True
  771. self.load_gcmap(mc, r.SCRATCH, gcmap)
  772. ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  773. mc.STG(r.SCRATCH, l.addr(ofs, r.SPP))
  774. def pop_gcmap(self, mc):
  775. ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  776. mc.LG(r.SCRATCH, l.addr(ofs, r.SPP))
  777. def break_long_loop(self):
  778. # If the loop is too long, the guards in it will jump forward
  779. # more than 32 KB. We use an approximate hack to know if we
  780. # should break the loop here with an unconditional "b" that
  781. # jumps over the target code.
  782. jmp_pos = self.mc.currpos()
  783. self.mc.reserve_cond_jump()
  784. self.write_pending_failure_recoveries()
  785. currpos = self.mc.currpos()
  786. pmc = OverwritingBuilder(self.mc, jmp_pos, 1)
  787. pmc.BRCL(c.ANY, l.imm(currpos - jmp_pos))
  788. pmc.overwrite()
  789. def _assemble(self, regalloc, inputargs, operations):
  790. self._regalloc = regalloc
  791. self.guard_success_cc = c.cond_none
  792. regalloc.compute_hint_frame_locations(operations)
  793. regalloc.walk_operations(inputargs, operations)
  794. assert self.guard_success_cc == c.cond_none
  795. if we_are_translated() or self.cpu.dont_keepalive_stuff:
  796. self._regalloc = None # else keep it around for debugging
  797. frame_depth = regalloc.get_final_frame_depth()
  798. jump_target_descr = regalloc.jump_target_descr
  799. if jump_target_descr is not None:
  800. tgt_depth = jump_target_descr._zarch_clt.frame_info.jfi_frame_depth
  801. target_frame_depth = tgt_depth - JITFRAME_FIXED_SIZE
  802. frame_depth = max(frame_depth, target_frame_depth)
  803. return frame_depth
  804. def regalloc_mov(self, prev_loc, loc):
  805. if prev_loc.is_imm():
  806. value = prev_loc.getint()
  807. # move immediate value to register
  808. if loc.is_reg():
  809. self.mc.load_imm(loc, value)
  810. return
  811. # move immediate value to memory
  812. elif loc.is_stack():
  813. offset = loc.value
  814. self.mc.load_imm(r.SCRATCH, prev_loc.value)
  815. self.mc.STG(r.SCRATCH, l.addr(offset, r.SPP))
  816. return
  817. assert 0, "not supported location"
  818. elif prev_loc.is_imm_float():
  819. self.mc.load_imm(r.SCRATCH, prev_loc.value)
  820. if loc.is_fp_reg():
  821. self.mc.LDY(loc, l.addr(0, r.SCRATCH))
  822. return
  823. elif loc.is_stack():
  824. src_adr = l.addr(0, r.SCRATCH)
  825. tgt_adr = l.AddressLocation(r.SPP, None, loc.value, l.imm(7))
  826. self.mc.MVC(tgt_adr, src_adr)
  827. return
  828. elif prev_loc.is_stack():
  829. offset = prev_loc.value
  830. # move from memory to register
  831. if loc.is_reg():
  832. self.mc.load(loc, r.SPP, offset)
  833. return
  834. # move in memory
  835. elif loc.is_stack():
  836. target_offset = loc.value
  837. self.mc.load(r.SCRATCH, r.SPP, offset)
  838. self.mc.store(r.SCRATCH, r.SPP, target_offset)
  839. return
  840. # move from memory to fp register
  841. elif loc.is_fp_reg():
  842. assert prev_loc.type == FLOAT, 'source not float location'
  843. self.mc.LDY(loc, l.addr(offset, r.SPP))
  844. return
  845. assert 0, "not supported location"
  846. elif prev_loc.is_reg():
  847. # move to another register
  848. if loc.is_reg():
  849. self.mc.LGR(loc, prev_loc)
  850. return
  851. # move to memory
  852. elif loc.is_stack():
  853. offset = loc.value
  854. self.mc.STG(prev_loc, l.addr(offset, r.SPP))
  855. return
  856. assert 0, "not supported location"
  857. elif prev_loc.is_in_pool():
  858. if loc.is_core_reg():
  859. self.mc.LG(loc, prev_loc)
  860. return
  861. # move immediate value to fp register
  862. if loc.is_fp_reg():
  863. self.mc.LDY(loc, prev_loc)
  864. return
  865. # move immediate value to memory
  866. elif loc.is_stack():
  867. offset = loc.value
  868. self.mc.LDY(r.FP_SCRATCH, prev_loc)
  869. self.mc.STDY(r.FP_SCRATCH, l.addr(offset, r.SPP))
  870. return
  871. assert 0, "not supported location"
  872. elif prev_loc.is_fp_reg():
  873. # move to another fp register
  874. if loc.is_fp_reg():
  875. self.mc.LDR(loc, prev_loc)
  876. return
  877. # move from fp register to memory
  878. elif loc.is_stack():
  879. assert loc.type == FLOAT, "target not float location"
  880. offset = loc.value
  881. self.mc.STDY(prev_loc, l.addr(offset, r.SPP))
  882. return
  883. assert 0, "not supported location"
  884. assert 0, "not supported location"
  885. def update_frame_depth(self, frame_depth):
  886. if frame_depth > 0x7fff:
  887. raise JitFrameTooDeep
  888. baseofs = self.cpu.get_baseofs_of_frame_field()
  889. self.current_clt.frame_info.update_frame_depth(baseofs, frame_depth)
  890. def write_pending_failure_recoveries(self):
  891. # for each pending guard, generate the code of the recovery stub
  892. # at the end of self.mc.
  893. for i in range(self.pending_guard_tokens_recovered,
  894. len(self.pending_guard_tokens)):
  895. tok = self.pending_guard_tokens[i]
  896. tok.pos_recovery_stub = self.generate_quick_failure(tok)
  897. self.pending_guard_tokens_recovered = len(self.pending_guard_tokens)
  898. def materialize_loop(self, looptoken):
  899. self.datablockwrapper.done()
  900. self.datablockwrapper = None
  901. allblocks = self.get_asmmemmgr_blocks(looptoken)
  902. start = self.mc.materialize(self.cpu, allblocks,
  903. self.cpu.gc_ll_descr.gcrootmap)
  904. return start
  905. def _reload_frame_if_necessary(self, mc, shadowstack_reg=None):
  906. # might trash the VOLATILE registers different from r2 and f0
  907. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  908. if gcrootmap:
  909. if gcrootmap.is_shadow_stack:
  910. if shadowstack_reg is None:
  911. diff = mc.load_imm(r.SPP, gcrootmap.get_root_stack_top_addr())
  912. mc.load(r.SPP, r.SPP, 0)
  913. shadowstack_reg = r.SPP
  914. mc.load(r.SPP, shadowstack_reg, -WORD)
  915. wbdescr = self.cpu.gc_ll_descr.write_barrier_descr
  916. if gcrootmap and wbdescr:
  917. # frame never uses card marking, so we enforce this is not
  918. # an array
  919. self._write_barrier_fastpath(mc, wbdescr, [r.SPP], regalloc=None,
  920. array=False, is_frame=True)
  921. def patch_pending_failure_recoveries(self, rawstart):
  922. assert (self.pending_guard_tokens_recovered ==
  923. len(self.pending_guard_tokens))
  924. clt = self.current_clt
  925. for tok in self.pending_guard_tokens:
  926. addr = rawstart + tok.pos_jump_offset
  927. #
  928. tok.faildescr.adr_jump_offset = rawstart + tok.pos_recovery_stub
  929. relative_target = tok.pos_recovery_stub - tok.pos_jump_offset
  930. #
  931. if not tok.guard_not_invalidated():
  932. mc = InstrBuilder()
  933. mc.b_cond_offset(relative_target, tok.fcond)
  934. mc.copy_to_raw_memory(addr)
  935. else:
  936. # GUARD_NOT_INVALIDATED, record an entry in
  937. # clt.invalidate_positions of the form:
  938. # (addr-in-the-code-of-the-not-yet-written-jump-target,
  939. # relative-target-to-use)
  940. relpos = tok.pos_jump_offset
  941. clt.invalidate_positions.append((rawstart + relpos,
  942. relative_target))
  943. def _call_header(self):
  944. # Build a new stackframe of size STD_FRAME_SIZE_IN_BYTES
  945. fpoff = JIT_ENTER_EXTRA_STACK_SPACE
  946. self.mc.STMG(r.r6, r.r15, l.addr(-fpoff+6*WORD, r.SP))
  947. self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - self.mc.get_relative_pos()))
  948. # f8 through f15 are saved registers (= non volatile)
  949. for i,reg in enumerate([r.f8, r.f9, r.f10, r.f11,
  950. r.f12, r.f13, r.f14, r.f15]):
  951. off = -fpoff + STD_FRAME_SIZE_IN_BYTES
  952. assert off > 0
  953. self.mc.STD(reg, l.addr(off + i*8, r.SP))
  954. # save r3, the second argument, to the thread local position
  955. self.mc.STG(r.r3, l.addr(-fpoff+THREADLOCAL_ON_ENTER_JIT, r.SP))
  956. # push a standard frame for any within the jit trace
  957. self.mc.push_std_frame(fpoff)
  958. # move the first argument to SPP: the jitframe object
  959. self.mc.LGR(r.SPP, r.r2)
  960. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  961. if gcrootmap and gcrootmap.is_shadow_stack:
  962. self._call_header_shadowstack(gcrootmap)
  963. def _call_header_shadowstack(self, gcrootmap):
  964. # we need to put one word into the shadowstack: the jitframe (SPP)
  965. # we saved all registers to the stack
  966. RCS1 = r.r3
  967. RCS2 = r.r4
  968. RCS3 = r.r5
  969. mc = self.mc
  970. mc.load_imm(RCS1, gcrootmap.get_root_stack_top_addr())
  971. mc.load(RCS2, RCS1, 0) # ld RCS2, [rootstacktop]
  972. #
  973. mc.LGR(RCS3, RCS2)
  974. mc.AGHI(RCS3, l.imm(WORD)) # add RCS3, RCS2, WORD
  975. mc.store(r.SPP, RCS2, 0) # std SPP, RCS2
  976. #
  977. mc.store(RCS3, RCS1, 0) # std RCS3, [rootstacktop]
  978. def _call_footer_shadowstack(self, gcrootmap):
  979. # r6 -> r15 can be used freely, they will be restored by
  980. # _call_footer after this call
  981. RCS1 = r.r8
  982. RCS2 = r.r7
  983. mc = self.mc
  984. mc.load_imm(RCS1, gcrootmap.get_root_stack_top_addr())
  985. mc.load(RCS2, RCS1, 0) # ld RCS2, [rootstacktop]
  986. mc.AGHI(RCS2, l.imm(-WORD)) # sub RCS2, RCS2, WORD
  987. mc.store(RCS2, RCS1, 0) # std RCS2, [rootstacktop]
  988. def _call_footer(self):
  989. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  990. if gcrootmap and gcrootmap.is_shadow_stack:
  991. self._call_footer_shadowstack(gcrootmap)
  992. # the return value is the jitframe
  993. self.mc.LGR(r.r2, r.SPP)
  994. size = STD_FRAME_SIZE_IN_BYTES
  995. # f8 through f15 are saved registers (= non volatile)
  996. for i,reg in enumerate([r.f8, r.f9, r.f10, r.f11,
  997. r.f12, r.f13, r.f14, r.f15]):
  998. self.mc.LD(reg, l.addr(size + size + i*8, r.SP))
  999. # restore registers r6-r15
  1000. self.mc.LMG(r.r6, r.r15, l.addr(size+6*WORD, r.SP))
  1001. self.jmpto(r.r14)
  1002. def _push_core_regs_to_jitframe(self, mc, includes=r.MANAGED_REGS):
  1003. self._multiple_to_or_from_jitframe(mc, includes, store=True)
  1004. @specialize.arg(3)
  1005. def _multiple_to_or_from_jitframe(self, mc, includes, store):
  1006. if len(includes) == 0:
  1007. return
  1008. base_ofs = self.cpu.get_baseofs_of_frame_field()
  1009. if len(includes) == 1:
  1010. iv = includes[0]
  1011. v = r.ALL_REG_INDEXES[iv]
  1012. addr = l.addr(base_ofs + v * WORD, r.SPP)
  1013. if store:
  1014. mc.STG(iv, addr)
  1015. else:
  1016. mc.LG(iv, addr)
  1017. return
  1018. val = includes[0].value
  1019. # includes[i => j]
  1020. # for each continous sequence in the registers are stored
  1021. # with STMG instead of STG, in the best case this only leads
  1022. # to 1 instruction to store r.ri -> r.rj (if it is continuous)
  1023. i = 0
  1024. j = 1
  1025. for register in includes[1:]:
  1026. if i >= j:
  1027. j += 1
  1028. continue
  1029. regval = register.value
  1030. if regval != (val+1):
  1031. iv = includes[i]
  1032. diff = (val - iv.value)
  1033. v = r.ALL_REG_INDEXES[iv]
  1034. addr = l.addr(base_ofs + v * WORD, r.SPP)
  1035. if diff > 0:
  1036. if store:
  1037. mc.STMG(iv, includes[i+diff], addr)
  1038. else:
  1039. mc.LMG(iv, includes[i+diff], addr)
  1040. i = j
  1041. else:
  1042. if store:
  1043. mc.STG(iv, addr)
  1044. else:
  1045. mc.LG(iv, addr)
  1046. i = j
  1047. val = regval
  1048. j += 1
  1049. if i >= len(includes):
  1050. # all have been stored
  1051. return
  1052. diff = (val - includes[i].value)
  1053. iv = includes[i]
  1054. v = r.ALL_REG_INDEXES[iv]
  1055. addr = l.addr(base_ofs + v * WORD, r.SPP)
  1056. if diff > 0:
  1057. if store:
  1058. mc.STMG(iv, includes[-1], addr)
  1059. else:
  1060. mc.LMG(iv, includes[-1], addr)
  1061. else:
  1062. if store:
  1063. mc.STG(iv, addr)
  1064. else:
  1065. mc.LG(iv, addr)
  1066. def _pop_core_regs_from_jitframe(self, mc, includes=r.MANAGED_REGS):
  1067. self._multiple_to_or_from_jitframe(mc, includes, store=False)
  1068. def _push_fp_regs_to_jitframe(self, mc, includes=r.MANAGED_FP_REGS):
  1069. if len(includes) == 0:
  1070. return
  1071. base_ofs = self.cpu.get_baseofs_of_frame_field()
  1072. for reg in includes:
  1073. v = r.ALL_REG_INDEXES[reg]
  1074. offset = base_ofs + v * WORD
  1075. mc.STD(reg, l.addr(offset, r.SPP))
  1076. def _pop_fp_regs_from_jitframe(self, mc, includes=r.MANAGED_FP_REGS):
  1077. base_ofs = self.cpu.get_baseofs_of_frame_field()

Large files files are truncated, but you can click here to view the full file