PageRenderTime 56ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/rpython/jit/backend/ppc/ppc_assembler.py

https://bitbucket.org/pypy/pypy/
Python | 1385 lines | 953 code | 176 blank | 256 comment | 138 complexity | c74e7d6b4e365d043de2210950320f90 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. from rpython.jit.backend.ppc.regalloc import (PPCFrameManager,
  2. Regalloc, PPCRegisterManager)
  3. from rpython.jit.backend.ppc.opassembler import OpAssembler
  4. from rpython.jit.backend.ppc.codebuilder import (PPCBuilder, OverwritingBuilder,
  5. scratch_reg)
  6. from rpython.jit.backend.ppc.arch import (IS_PPC_32, IS_PPC_64, WORD,
  7. LR_BC_OFFSET, REGISTERS_SAVED,
  8. GPR_SAVE_AREA_OFFSET,
  9. THREADLOCAL_ADDR_OFFSET,
  10. STD_FRAME_SIZE_IN_BYTES,
  11. IS_BIG_ENDIAN,
  12. LOCAL_VARS_OFFSET)
  13. from rpython.jit.backend.ppc.helper.assembler import Saved_Volatiles
  14. from rpython.jit.backend.ppc.helper.regalloc import _check_imm_arg
  15. import rpython.jit.backend.ppc.register as r
  16. import rpython.jit.backend.ppc.condition as c
  17. from rpython.jit.backend.ppc.register import JITFRAME_FIXED_SIZE
  18. from rpython.jit.metainterp.history import AbstractFailDescr
  19. from rpython.jit.backend.llsupport import jitframe, rewrite
  20. from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
  21. from rpython.jit.backend.llsupport.assembler import (DEBUG_COUNTER, debug_bridge,
  22. BaseAssembler)
  23. from rpython.jit.backend.model import CompiledLoopToken
  24. from rpython.rtyper.lltypesystem import lltype, rffi, llmemory
  25. from rpython.jit.metainterp.resoperation import rop, ResOperation
  26. from rpython.jit.codewriter import longlong
  27. from rpython.jit.metainterp.history import (INT, REF, FLOAT)
  28. from rpython.rlib.debug import (debug_print, debug_start, debug_stop,
  29. have_debug_prints)
  30. from rpython.rlib import rgc
  31. from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
  32. from rpython.rlib.objectmodel import we_are_translated, specialize
  33. from rpython.rtyper.lltypesystem.lloperation import llop
  34. from rpython.jit.backend.ppc.locations import StackLocation, get_fp_offset, imm
  35. from rpython.jit.backend.ppc import callbuilder
  36. from rpython.rlib.jit import AsmInfo
  37. from rpython.rlib.objectmodel import compute_unique_id
  38. from rpython.rlib.rarithmetic import r_uint
  39. from rpython.rlib.rjitlog import rjitlog as jl
  40. memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address,
  41. rffi.SIZE_T], lltype.Void,
  42. sandboxsafe=True, _nowrapper=True)
  43. DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
  44. ('type', lltype.Char), # 'b'ridge, 'l'abel or
  45. # 'e'ntry point
  46. ('number', lltype.Signed))
  47. def hi(w):
  48. return w >> 16
  49. def ha(w):
  50. if (w >> 15) & 1:
  51. return (w >> 16) + 1
  52. else:
  53. return w >> 16
  54. def lo(w):
  55. return w & 0x0000FFFF
  56. def la(w):
  57. v = w & 0x0000FFFF
  58. if v & 0x8000:
  59. return -((v ^ 0xFFFF) + 1) # "sign extend" to 32 bits
  60. return v
  61. def highest(w):
  62. return w >> 48
  63. def higher(w):
  64. return (w >> 32) & 0x0000FFFF
  65. def high(w):
  66. return (w >> 16) & 0x0000FFFF
  67. class JitFrameTooDeep(Exception):
  68. pass
  69. class AssemblerPPC(OpAssembler, BaseAssembler):
  70. #ENCODING_AREA = FORCE_INDEX_OFS
  71. #OFFSET_SPP_TO_GPR_SAVE_AREA = (FORCE_INDEX + FLOAT_INT_CONVERSION
  72. # + ENCODING_AREA)
  73. #OFFSET_SPP_TO_FPR_SAVE_AREA = (OFFSET_SPP_TO_GPR_SAVE_AREA
  74. # + GPR_SAVE_AREA)
  75. #OFFSET_SPP_TO_OLD_BACKCHAIN = (OFFSET_SPP_TO_GPR_SAVE_AREA
  76. # + GPR_SAVE_AREA + FPR_SAVE_AREA)
  77. #OFFSET_STACK_ARGS = OFFSET_SPP_TO_OLD_BACKCHAIN + BACKCHAIN_SIZE * WORD
  78. #if IS_PPC_64:
  79. # OFFSET_STACK_ARGS += MAX_REG_PARAMS * WORD
  80. def __init__(self, cpu, translate_support_code=False):
  81. BaseAssembler.__init__(self, cpu, translate_support_code)
  82. self.loop_run_counters = []
  83. self.wb_slowpath = [0, 0, 0, 0, 0]
  84. self.setup_failure_recovery()
  85. self.stack_check_slowpath = 0
  86. self.propagate_exception_path = 0
  87. self.teardown()
  88. def set_debug(self, v):
  89. self._debug = v
  90. def _save_nonvolatiles(self):
  91. """ save nonvolatile GPRs and FPRs in SAVE AREA
  92. """
  93. for i, reg in enumerate(NONVOLATILES):
  94. # save r31 later on
  95. if reg.value == r.SPP.value:
  96. continue
  97. self.mc.store(reg.value, r.SPP.value,
  98. self.OFFSET_SPP_TO_GPR_SAVE_AREA + WORD * i)
  99. for i, reg in enumerate(NONVOLATILES_FLOAT):
  100. self.mc.stfd(reg.value, r.SPP.value,
  101. self.OFFSET_SPP_TO_FPR_SAVE_AREA + WORD * i)
  102. def _restore_nonvolatiles(self, mc, spp_reg):
  103. """ restore nonvolatile GPRs and FPRs from SAVE AREA
  104. """
  105. for i, reg in enumerate(NONVOLATILES):
  106. mc.load(reg.value, spp_reg.value,
  107. self.OFFSET_SPP_TO_GPR_SAVE_AREA + WORD * i)
  108. for i, reg in enumerate(NONVOLATILES_FLOAT):
  109. mc.lfd(reg.value, spp_reg.value,
  110. self.OFFSET_SPP_TO_FPR_SAVE_AREA + WORD * i)
  111. def _call_header_shadowstack(self, gcrootmap):
  112. # we need to put one word into the shadowstack: the jitframe (SPP)
  113. mc = self.mc
  114. diff = mc.load_imm_plus(r.RCS1, gcrootmap.get_root_stack_top_addr())
  115. mc.load(r.RCS2.value, r.RCS1.value, diff) # ld RCS2, [rootstacktop]
  116. #
  117. mc.addi(r.RCS3.value, r.RCS2.value, WORD) # add RCS3, RCS2, WORD
  118. mc.store(r.SPP.value, r.RCS2.value, 0) # std SPP, RCS2
  119. #
  120. mc.store(r.RCS3.value, r.RCS1.value, diff)# std RCS3, [rootstacktop]
  121. def _call_footer_shadowstack(self, gcrootmap):
  122. mc = self.mc
  123. diff = mc.load_imm_plus(r.RCS1, gcrootmap.get_root_stack_top_addr())
  124. mc.load(r.RCS2.value, r.RCS1.value, diff) # ld RCS2, [rootstacktop]
  125. mc.subi(r.RCS2.value, r.RCS2.value, WORD) # sub RCS2, RCS2, WORD
  126. mc.store(r.RCS2.value, r.RCS1.value, diff) # std RCS2, [rootstacktop]
  127. def new_stack_loc(self, i, tp):
  128. base_ofs = self.cpu.get_baseofs_of_frame_field()
  129. return StackLocation(i, get_fp_offset(base_ofs, i), tp)
  130. def setup_failure_recovery(self):
  131. self.failure_recovery_code = [0, 0, 0, 0]
  132. def _push_core_regs_to_jitframe(self, mc, includes=r.MANAGED_REGS):
  133. base_ofs = self.cpu.get_baseofs_of_frame_field()
  134. for reg in includes:
  135. v = r.ALL_REG_INDEXES[reg]
  136. mc.std(reg.value, r.SPP.value, base_ofs + v * WORD)
  137. def _push_fp_regs_to_jitframe(self, mc, includes=r.MANAGED_FP_REGS):
  138. base_ofs = self.cpu.get_baseofs_of_frame_field()
  139. for reg in includes:
  140. v = r.ALL_REG_INDEXES[reg]
  141. mc.stfd(reg.value, r.SPP.value, base_ofs + v * WORD)
  142. def _pop_core_regs_from_jitframe(self, mc, includes=r.MANAGED_REGS):
  143. base_ofs = self.cpu.get_baseofs_of_frame_field()
  144. for reg in includes:
  145. v = r.ALL_REG_INDEXES[reg]
  146. mc.ld(reg.value, r.SPP.value, base_ofs + v * WORD)
  147. def _pop_fp_regs_from_jitframe(self, mc, includes=r.MANAGED_FP_REGS):
  148. base_ofs = self.cpu.get_baseofs_of_frame_field()
  149. for reg in includes:
  150. v = r.ALL_REG_INDEXES[reg]
  151. mc.lfd(reg.value, r.SPP.value, base_ofs + v * WORD)
  152. def _build_failure_recovery(self, exc, withfloats=False):
  153. mc = PPCBuilder()
  154. self.mc = mc
  155. # fill in the jf_descr and jf_gcmap fields of the frame according
  156. # to which failure we are resuming from. These are set before
  157. # this function is called (see generate_quick_failure()).
  158. ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
  159. ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  160. mc.store(r.r0.value, r.SPP.value, ofs)
  161. mc.store(r.r2.value, r.SPP.value, ofs2)
  162. self._push_core_regs_to_jitframe(mc)
  163. if withfloats:
  164. self._push_fp_regs_to_jitframe(mc)
  165. if exc:
  166. # We might have an exception pending.
  167. mc.load_imm(r.r2, self.cpu.pos_exc_value())
  168. # Copy it into 'jf_guard_exc'
  169. offset = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
  170. mc.load(r.r0.value, r.r2.value, 0)
  171. mc.store(r.r0.value, r.SPP.value, offset)
  172. # Zero out the exception fields
  173. diff = self.cpu.pos_exception() - self.cpu.pos_exc_value()
  174. assert _check_imm_arg(diff)
  175. mc.li(r.r0.value, 0)
  176. mc.store(r.r0.value, r.r2.value, 0)
  177. mc.store(r.r0.value, r.r2.value, diff)
  178. # now we return from the complete frame, which starts from
  179. # _call_header_with_stack_check(). The _call_footer below does it.
  180. self._call_footer()
  181. rawstart = mc.materialize(self.cpu, [])
  182. self.failure_recovery_code[exc + 2 * withfloats] = rawstart
  183. self.mc = None
  184. def build_frame_realloc_slowpath(self):
  185. mc = PPCBuilder()
  186. self.mc = mc
  187. # signature of this _frame_realloc_slowpath function:
  188. # * on entry, r0 is the new size
  189. # * on entry, r2 is the gcmap
  190. # * no managed register must be modified
  191. ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  192. mc.store(r.r2.value, r.SPP.value, ofs2)
  193. self._push_core_regs_to_jitframe(mc)
  194. self._push_fp_regs_to_jitframe(mc)
  195. # Save away the LR inside r30
  196. mc.mflr(r.RCS1.value)
  197. # First argument is SPP (= r31), which is the jitframe
  198. mc.mr(r.r3.value, r.SPP.value)
  199. # Second argument is the new size, which is still in r0 here
  200. mc.mr(r.r4.value, r.r0.value)
  201. # This trashes r0 and r2
  202. self._store_and_reset_exception(mc, r.RCS2, r.RCS3)
  203. # Do the call
  204. adr = rffi.cast(lltype.Signed, self.cpu.realloc_frame)
  205. mc.load_imm(mc.RAW_CALL_REG, adr)
  206. mc.raw_call()
  207. # The result is stored back into SPP (= r31)
  208. mc.mr(r.SPP.value, r.r3.value)
  209. self._restore_exception(mc, r.RCS2, r.RCS3)
  210. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  211. if gcrootmap and gcrootmap.is_shadow_stack:
  212. diff = mc.load_imm_plus(r.r5, gcrootmap.get_root_stack_top_addr())
  213. mc.load(r.r5.value, r.r5.value, diff)
  214. mc.store(r.r3.value, r.r5.value, -WORD)
  215. mc.mtlr(r.RCS1.value) # restore LR
  216. self._pop_core_regs_from_jitframe(mc)
  217. self._pop_fp_regs_from_jitframe(mc)
  218. mc.blr()
  219. self._frame_realloc_slowpath = mc.materialize(self.cpu, [])
  220. self.mc = None
  221. def _store_and_reset_exception(self, mc, excvalloc, exctploc=None):
  222. """Reset the exception, after fetching it inside the two regs.
  223. """
  224. mc.load_imm(r.r2, self.cpu.pos_exc_value())
  225. diff = self.cpu.pos_exception() - self.cpu.pos_exc_value()
  226. assert _check_imm_arg(diff)
  227. # Load the exception fields into the two registers
  228. mc.load(excvalloc.value, r.r2.value, 0)
  229. if exctploc is not None:
  230. mc.load(exctploc.value, r.r2.value, diff)
  231. # Zero out the exception fields
  232. mc.li(r.r0.value, 0)
  233. mc.store(r.r0.value, r.r2.value, 0)
  234. mc.store(r.r0.value, r.r2.value, diff)
  235. def _restore_exception(self, mc, excvalloc, exctploc):
  236. mc.load_imm(r.r2, self.cpu.pos_exc_value())
  237. diff = self.cpu.pos_exception() - self.cpu.pos_exc_value()
  238. assert _check_imm_arg(diff)
  239. # Store the exception fields from the two registers
  240. mc.store(excvalloc.value, r.r2.value, 0)
  241. mc.store(exctploc.value, r.r2.value, diff)
  242. def _reload_frame_if_necessary(self, mc, shadowstack_reg=None):
  243. # might trash the VOLATILE registers different from r3 and f1
  244. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  245. if gcrootmap:
  246. if gcrootmap.is_shadow_stack:
  247. if shadowstack_reg is None:
  248. diff = mc.load_imm_plus(r.SPP,
  249. gcrootmap.get_root_stack_top_addr())
  250. mc.load(r.SPP.value, r.SPP.value, diff)
  251. shadowstack_reg = r.SPP
  252. mc.load(r.SPP.value, shadowstack_reg.value, -WORD)
  253. wbdescr = self.cpu.gc_ll_descr.write_barrier_descr
  254. if gcrootmap and wbdescr:
  255. # frame never uses card marking, so we enforce this is not
  256. # an array
  257. self._write_barrier_fastpath(mc, wbdescr, [r.SPP], regalloc=None,
  258. array=False, is_frame=True)
  259. def _build_cond_call_slowpath(self, supports_floats, callee_only):
  260. """ This builds a general call slowpath, for whatever call happens to
  261. come.
  262. """
  263. # signature of these cond_call_slowpath functions:
  264. # * on entry, r12 contains the function to call
  265. # * r3, r4, r5, r6 contain arguments for the call
  266. # * r2 is the gcmap
  267. # * the old value of these regs must already be stored in the jitframe
  268. # * on exit, all registers are restored from the jitframe
  269. mc = PPCBuilder()
  270. self.mc = mc
  271. ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  272. mc.store(r.r2.value, r.SPP.value, ofs2)
  273. # copy registers to the frame, with the exception of r3 to r6 and r12,
  274. # because these have already been saved by the caller. Note that
  275. # this is not symmetrical: these 5 registers are saved by the caller
  276. # but restored here at the end of this function.
  277. if callee_only:
  278. saved_regs = PPCRegisterManager.save_around_call_regs
  279. else:
  280. saved_regs = PPCRegisterManager.all_regs
  281. self._push_core_regs_to_jitframe(mc, [reg for reg in saved_regs
  282. if reg is not r.r3 and
  283. reg is not r.r4 and
  284. reg is not r.r5 and
  285. reg is not r.r6 and
  286. reg is not r.r12])
  287. if supports_floats:
  288. self._push_fp_regs_to_jitframe(mc)
  289. # Save away the LR inside r30
  290. mc.mflr(r.RCS1.value)
  291. # Do the call
  292. mc.raw_call(r.r12)
  293. # Finish
  294. self._reload_frame_if_necessary(mc)
  295. mc.mtlr(r.RCS1.value) # restore LR
  296. self._pop_core_regs_from_jitframe(mc, saved_regs)
  297. if supports_floats:
  298. self._pop_fp_regs_from_jitframe(mc)
  299. mc.blr()
  300. self.mc = None
  301. return mc.materialize(self.cpu, [])
  302. def _build_malloc_slowpath(self, kind):
  303. """ While arriving on slowpath, we have a gcmap in r2.
  304. The arguments are passed in r.RES and r.RSZ, as follows:
  305. kind == 'fixed': nursery_head in r.RES and the size in r.RSZ - r.RES.
  306. kind == 'str/unicode': length of the string to allocate in r.RES.
  307. kind == 'var': itemsize in r.RES, length to allocate in r.RSZ,
  308. and tid in r.SCRATCH.
  309. This function must preserve all registers apart from r.RES and r.RSZ.
  310. On return, r2 must contain the address of nursery_free.
  311. """
  312. assert kind in ['fixed', 'str', 'unicode', 'var']
  313. mc = PPCBuilder()
  314. self.mc = mc
  315. ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  316. mc.store(r.r2.value, r.SPP.value, ofs2)
  317. saved_regs = [reg for reg in r.MANAGED_REGS
  318. if reg is not r.RES and reg is not r.RSZ]
  319. self._push_core_regs_to_jitframe(mc, saved_regs)
  320. self._push_fp_regs_to_jitframe(mc)
  321. #
  322. if kind == 'fixed':
  323. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
  324. elif kind == 'str':
  325. addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_str')
  326. elif kind == 'unicode':
  327. addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_unicode')
  328. else:
  329. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_array_addr()
  330. # Save away the LR inside r30
  331. mc.mflr(r.RCS1.value)
  332. if kind == 'fixed':
  333. # compute the size we want
  334. mc.subf(r.r3.value, r.RES.value, r.RSZ.value)
  335. if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
  336. # for tests only
  337. mc.mr(r.r4.value, r.SPP.value)
  338. elif kind == 'str' or kind == 'unicode':
  339. pass # length is already in r3
  340. else:
  341. # arguments to the called function are [itemsize, tid, length]
  342. # itemsize is already in r3
  343. mc.mr(r.r5.value, r.RSZ.value) # length
  344. mc.mr(r.r4.value, r.SCRATCH.value) # tid
  345. # Do the call
  346. addr = rffi.cast(lltype.Signed, addr)
  347. mc.load_imm(mc.RAW_CALL_REG, addr)
  348. mc.raw_call()
  349. self._reload_frame_if_necessary(mc)
  350. # Check that we don't get NULL; if we do, we always interrupt the
  351. # current loop, as a "good enough" approximation (same as
  352. # emit_call_malloc_gc()).
  353. self.propagate_memoryerror_if_reg_is_null(r.r3)
  354. mc.mtlr(r.RCS1.value) # restore LR
  355. self._pop_core_regs_from_jitframe(mc, saved_regs)
  356. self._pop_fp_regs_from_jitframe(mc)
  357. nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
  358. self.mc.load_imm(r.r2, nursery_free_adr)
  359. # r2 is now the address of nursery_free
  360. # r.RES is still the result of the call done above
  361. # r.RSZ is loaded from [r2], to make the caller's store a no-op here
  362. mc.load(r.RSZ.value, r.r2.value, 0)
  363. #
  364. mc.blr()
  365. self.mc = None
  366. return mc.materialize(self.cpu, [])
  367. def _build_stack_check_slowpath(self):
  368. _, _, slowpathaddr = self.cpu.insert_stack_check()
  369. if slowpathaddr == 0 or not self.cpu.propagate_exception_descr:
  370. return # no stack check (for tests, or non-translated)
  371. #
  372. # make a regular function that is called from a point near the start
  373. # of an assembler function (after it adjusts the stack and saves
  374. # registers).
  375. mc = PPCBuilder()
  376. #
  377. # Save away the LR inside r30
  378. mc.mflr(r.RCS1.value)
  379. #
  380. # Do the call
  381. # use SP as single parameter for the call
  382. mc.mr(r.r3.value, r.SP.value)
  383. mc.load_imm(mc.RAW_CALL_REG, slowpathaddr)
  384. mc.raw_call()
  385. #
  386. # Restore LR
  387. mc.mtlr(r.RCS1.value)
  388. #
  389. # Check if it raised StackOverflow
  390. mc.load_imm(r.SCRATCH, self.cpu.pos_exception())
  391. mc.loadx(r.SCRATCH.value, 0, r.SCRATCH.value)
  392. # if this comparison is true, then everything is ok,
  393. # else we have an exception
  394. mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
  395. #
  396. # So we return to LR back to our caller, conditionally if "EQ"
  397. mc.beqlr()
  398. #
  399. # Else, jump to propagate_exception_path
  400. assert self.propagate_exception_path
  401. mc.b_abs(self.propagate_exception_path)
  402. #
  403. rawstart = mc.materialize(self.cpu, [])
  404. self.stack_check_slowpath = rawstart
  405. def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
  406. descr = self.cpu.gc_ll_descr.write_barrier_descr
  407. if descr is None:
  408. return
  409. if not withcards:
  410. func = descr.get_write_barrier_fn(self.cpu)
  411. else:
  412. if descr.jit_wb_cards_set == 0:
  413. return
  414. func = descr.get_write_barrier_from_array_fn(self.cpu)
  415. if func == 0:
  416. return
  417. #
  418. # This builds a helper function called from the slow path of
  419. # write barriers. It must save all registers, and optionally
  420. # all fp registers. It takes its single argument in r0
  421. # (or in SPP if 'for_frame').
  422. if for_frame:
  423. argument_loc = r.SPP
  424. else:
  425. argument_loc = r.r0
  426. mc = PPCBuilder()
  427. old_mc = self.mc
  428. self.mc = mc
  429. extra_stack_size = LOCAL_VARS_OFFSET + 4 * WORD + 8
  430. extra_stack_size = (extra_stack_size + 15) & ~15
  431. if for_frame:
  432. # NOTE: don't save registers on the jitframe here! It might
  433. # override already-saved values that will be restored
  434. # later...
  435. #
  436. # This 'for_frame' version is called after a CALL. It does not
  437. # need to save many registers: the registers that are anyway
  438. # destroyed by the call can be ignored (VOLATILES), and the
  439. # non-volatile registers won't be changed here. It only needs
  440. # to save r.RCS1 (used below), r3 and f1 (possible results of
  441. # the call), and two more non-volatile registers (used to store
  442. # the RPython exception that occurred in the CALL, if any).
  443. #
  444. # We need to increase our stack frame size a bit to store them.
  445. #
  446. self.mc.load(r.SCRATCH.value, r.SP.value, 0) # SP back chain
  447. self.mc.store_update(r.SCRATCH.value, r.SP.value, -extra_stack_size)
  448. self.mc.std(r.RCS1.value, r.SP.value, LOCAL_VARS_OFFSET + 0 * WORD)
  449. self.mc.std(r.RCS2.value, r.SP.value, LOCAL_VARS_OFFSET + 1 * WORD)
  450. self.mc.std(r.RCS3.value, r.SP.value, LOCAL_VARS_OFFSET + 2 * WORD)
  451. self.mc.std(r.r3.value, r.SP.value, LOCAL_VARS_OFFSET + 3 * WORD)
  452. self.mc.stfd(r.f1.value, r.SP.value, LOCAL_VARS_OFFSET + 4 * WORD)
  453. saved_regs = None
  454. saved_fp_regs = None
  455. else:
  456. # push all volatile registers, push RCS1, and sometimes push RCS2
  457. if withcards:
  458. saved_regs = r.VOLATILES + [r.RCS1, r.RCS2]
  459. else:
  460. saved_regs = r.VOLATILES + [r.RCS1]
  461. if withfloats:
  462. saved_fp_regs = r.MANAGED_FP_REGS
  463. else:
  464. saved_fp_regs = []
  465. self._push_core_regs_to_jitframe(mc, saved_regs)
  466. self._push_fp_regs_to_jitframe(mc, saved_fp_regs)
  467. if for_frame:
  468. # note that it's safe to store the exception in register,
  469. # since the call to write barrier can't collect
  470. # (and this is assumed a bit left and right here, like lack
  471. # of _reload_frame_if_necessary)
  472. # This trashes r0 and r2, which is fine in this case
  473. assert argument_loc is not r.r0
  474. self._store_and_reset_exception(mc, r.RCS2, r.RCS3)
  475. if withcards:
  476. mc.mr(r.RCS2.value, argument_loc.value)
  477. #
  478. # Save the lr into r.RCS1
  479. mc.mflr(r.RCS1.value)
  480. #
  481. func = rffi.cast(lltype.Signed, func)
  482. # Note: if not 'for_frame', argument_loc is r0, which must carefully
  483. # not be overwritten above
  484. mc.mr(r.r3.value, argument_loc.value)
  485. mc.load_imm(mc.RAW_CALL_REG, func)
  486. mc.raw_call()
  487. #
  488. # Restore lr
  489. mc.mtlr(r.RCS1.value)
  490. if for_frame:
  491. self._restore_exception(mc, r.RCS2, r.RCS3)
  492. if withcards:
  493. # A final andix before the blr, for the caller. Careful to
  494. # not follow this instruction with another one that changes
  495. # the status of cr0!
  496. card_marking_mask = descr.jit_wb_cards_set_singlebyte
  497. mc.lbz(r.RCS2.value, r.RCS2.value, descr.jit_wb_if_flag_byteofs)
  498. mc.andix(r.RCS2.value, r.RCS2.value, card_marking_mask & 0xFF)
  499. if for_frame:
  500. self.mc.ld(r.RCS1.value, r.SP.value, LOCAL_VARS_OFFSET + 0 * WORD)
  501. self.mc.ld(r.RCS2.value, r.SP.value, LOCAL_VARS_OFFSET + 1 * WORD)
  502. self.mc.ld(r.RCS3.value, r.SP.value, LOCAL_VARS_OFFSET + 2 * WORD)
  503. self.mc.ld(r.r3.value, r.SP.value, LOCAL_VARS_OFFSET + 3 * WORD)
  504. self.mc.lfd(r.f1.value, r.SP.value, LOCAL_VARS_OFFSET + 4 * WORD)
  505. self.mc.addi(r.SP.value, r.SP.value, extra_stack_size)
  506. else:
  507. self._pop_core_regs_from_jitframe(mc, saved_regs)
  508. self._pop_fp_regs_from_jitframe(mc, saved_fp_regs)
  509. mc.blr()
  510. self.mc = old_mc
  511. rawstart = mc.materialize(self.cpu, [])
  512. if for_frame:
  513. self.wb_slowpath[4] = rawstart
  514. else:
  515. self.wb_slowpath[withcards + 2 * withfloats] = rawstart
  516. def _build_propagate_exception_path(self):
  517. self.mc = PPCBuilder()
  518. #
  519. # read and reset the current exception
  520. propagate_exception_descr = rffi.cast(lltype.Signed,
  521. cast_instance_to_gcref(self.cpu.propagate_exception_descr))
  522. ofs3 = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
  523. ofs4 = self.cpu.get_ofs_of_frame_field('jf_descr')
  524. self._store_and_reset_exception(self.mc, r.r3)
  525. self.mc.load_imm(r.r4, propagate_exception_descr)
  526. self.mc.std(r.r3.value, r.SPP.value, ofs3)
  527. self.mc.std(r.r4.value, r.SPP.value, ofs4)
  528. #
  529. self._call_footer()
  530. rawstart = self.mc.materialize(self.cpu, [])
  531. self.propagate_exception_path = rawstart
  532. self.mc = None
  533. def _call_header(self):
  534. if IS_PPC_64 and IS_BIG_ENDIAN:
  535. # Reserve space for a function descriptor, 3 words
  536. self.mc.write64(0)
  537. self.mc.write64(0)
  538. self.mc.write64(0)
  539. # Build a new stackframe of size STD_FRAME_SIZE_IN_BYTES
  540. self.mc.store_update(r.SP.value, r.SP.value, -STD_FRAME_SIZE_IN_BYTES)
  541. self.mc.mflr(r.SCRATCH.value)
  542. self.mc.store(r.SCRATCH.value, r.SP.value,
  543. STD_FRAME_SIZE_IN_BYTES + LR_BC_OFFSET)
  544. # save registers r25 to r31
  545. for i, reg in enumerate(REGISTERS_SAVED):
  546. self.mc.store(reg.value, r.SP.value,
  547. GPR_SAVE_AREA_OFFSET + i * WORD)
  548. # save r4, the second argument, to THREADLOCAL_ADDR_OFFSET
  549. self.mc.store(r.r4.value, r.SP.value, THREADLOCAL_ADDR_OFFSET)
  550. # move r3, the first argument, to r31 (SPP): the jitframe object
  551. self.mc.mr(r.SPP.value, r.r3.value)
  552. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  553. if gcrootmap and gcrootmap.is_shadow_stack:
  554. self._call_header_shadowstack(gcrootmap)
  555. def _call_header_with_stack_check(self):
  556. self._call_header()
  557. if self.stack_check_slowpath == 0:
  558. pass # not translated
  559. else:
  560. endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
  561. diff = lengthaddr - endaddr
  562. assert _check_imm_arg(diff)
  563. mc = self.mc
  564. mc.load_imm(r.SCRATCH, self.stack_check_slowpath)
  565. mc.load_imm(r.SCRATCH2, endaddr) # li r2, endaddr
  566. mc.mtctr(r.SCRATCH.value)
  567. mc.load(r.SCRATCH.value, r.SCRATCH2.value, 0) # ld r0, [end]
  568. mc.load(r.SCRATCH2.value, r.SCRATCH2.value, diff)# ld r2, [length]
  569. mc.subf(r.SCRATCH.value, r.SP.value, r.SCRATCH.value) # sub r0, SP
  570. mc.cmp_op(0, r.SCRATCH.value, r.SCRATCH2.value, signed=False)
  571. mc.bgtctrl()
  572. def _call_footer(self):
  573. # the return value is the jitframe
  574. self.mc.mr(r.r3.value, r.SPP.value)
  575. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  576. if gcrootmap and gcrootmap.is_shadow_stack:
  577. self._call_footer_shadowstack(gcrootmap)
  578. # restore registers r25 to r31
  579. for i, reg in enumerate(REGISTERS_SAVED):
  580. self.mc.load(reg.value, r.SP.value,
  581. GPR_SAVE_AREA_OFFSET + i * WORD)
  582. # load the return address into r4
  583. self.mc.load(r.r4.value, r.SP.value,
  584. STD_FRAME_SIZE_IN_BYTES + LR_BC_OFFSET)
  585. # throw away the stack frame and return to r4
  586. self.mc.addi(r.SP.value, r.SP.value, STD_FRAME_SIZE_IN_BYTES)
  587. self.mc.mtlr(r.r4.value) # restore LR
  588. self.mc.blr()
  589. def setup(self, looptoken):
  590. BaseAssembler.setup(self, looptoken)
  591. assert self.memcpy_addr != 0, "setup_once() not called?"
  592. self.current_clt = looptoken.compiled_loop_token
  593. self.pending_guard_tokens = []
  594. self.pending_guard_tokens_recovered = 0
  595. #if WORD == 8:
  596. # self.pending_memoryerror_trampoline_from = []
  597. # self.error_trampoline_64 = 0
  598. self.mc = PPCBuilder()
  599. #assert self.datablockwrapper is None --- but obscure case
  600. # possible, e.g. getting MemoryError and continuing
  601. allblocks = self.get_asmmemmgr_blocks(looptoken)
  602. self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
  603. allblocks)
  604. self.target_tokens_currently_compiling = {}
  605. self.frame_depth_to_patch = []
  606. def update_frame_depth(self, frame_depth):
  607. if frame_depth > 0x7fff:
  608. raise JitFrameTooDeep # XXX
  609. baseofs = self.cpu.get_baseofs_of_frame_field()
  610. self.current_clt.frame_info.update_frame_depth(baseofs, frame_depth)
  611. def patch_stack_checks(self, frame_depth):
  612. if frame_depth > 0x7fff:
  613. raise JitFrameTooDeep # XXX
  614. for traps_pos, jmp_target in self.frame_depth_to_patch:
  615. pmc = OverwritingBuilder(self.mc, traps_pos, 3)
  616. # three traps, so exactly three instructions to patch here
  617. pmc.cmpdi(0, r.r2.value, frame_depth) # 1
  618. pmc.bc(7, 0, jmp_target - (traps_pos + 4)) # 2 "bge+"
  619. pmc.li(r.r0.value, frame_depth) # 3
  620. pmc.overwrite()
  621. def _check_frame_depth(self, mc, gcmap):
  622. """ check if the frame is of enough depth to follow this bridge.
  623. Otherwise reallocate the frame in a helper.
  624. """
  625. descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
  626. ofs = self.cpu.unpack_fielddescr(descrs.arraydescr.lendescr)
  627. mc.ld(r.r2.value, r.SPP.value, ofs)
  628. patch_pos = mc.currpos()
  629. mc.trap() # placeholder for cmpdi(0, r2, ...)
  630. mc.trap() # placeholder for bge
  631. mc.trap() # placeholder for li(r0, ...)
  632. mc.load_imm(r.SCRATCH2, self._frame_realloc_slowpath)
  633. mc.mtctr(r.SCRATCH2.value)
  634. self.load_gcmap(mc, r.r2, gcmap)
  635. mc.bctrl()
  636. self.frame_depth_to_patch.append((patch_pos, mc.currpos()))
  637. @rgc.no_release_gil
  638. def assemble_loop(self, jd_id, unique_id, logger, loopname, inputargs,
  639. operations, looptoken, log):
  640. clt = CompiledLoopToken(self.cpu, looptoken.number)
  641. looptoken.compiled_loop_token = clt
  642. clt._debug_nbargs = len(inputargs)
  643. if not we_are_translated():
  644. # Arguments should be unique
  645. assert len(set(inputargs)) == len(inputargs)
  646. self.setup(looptoken)
  647. frame_info = self.datablockwrapper.malloc_aligned(
  648. jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
  649. clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
  650. clt.frame_info.clear() # for now
  651. if log:
  652. operations = self._inject_debugging_code(looptoken, operations,
  653. 'e', looptoken.number)
  654. regalloc = Regalloc(assembler=self)
  655. #
  656. self._call_header_with_stack_check()
  657. allgcrefs = []
  658. operations = regalloc.prepare_loop(inputargs, operations,
  659. looptoken, allgcrefs)
  660. self.reserve_gcref_table(allgcrefs)
  661. looppos = self.mc.get_relative_pos()
  662. frame_depth_no_fixed_size = self._assemble(regalloc, inputargs,
  663. operations)
  664. self.update_frame_depth(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
  665. #
  666. size_excluding_failure_stuff = self.mc.get_relative_pos()
  667. self.write_pending_failure_recoveries()
  668. full_size = self.mc.get_relative_pos()
  669. #
  670. self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
  671. rawstart = self.materialize_loop(looptoken)
  672. if IS_PPC_64 and IS_BIG_ENDIAN: # fix the function descriptor (3 words)
  673. rffi.cast(rffi.LONGP, rawstart)[0] = rawstart + 3 * WORD
  674. #
  675. looptoken._ll_loop_code = looppos + rawstart
  676. debug_start("jit-backend-addr")
  677. debug_print("Loop %d (%s) has address 0x%x to 0x%x (bootstrap 0x%x)" % (
  678. looptoken.number, loopname,
  679. r_uint(rawstart + looppos),
  680. r_uint(rawstart + size_excluding_failure_stuff),
  681. r_uint(rawstart)))
  682. debug_stop("jit-backend-addr")
  683. self.patch_gcref_table(looptoken, rawstart)
  684. self.patch_pending_failure_recoveries(rawstart)
  685. #
  686. ops_offset = self.mc.ops_offset
  687. if not we_are_translated():
  688. # used only by looptoken.dump() -- useful in tests
  689. looptoken._ppc_rawstart = rawstart
  690. looptoken._ppc_fullsize = full_size
  691. looptoken._ppc_ops_offset = ops_offset
  692. looptoken._ll_function_addr = rawstart
  693. if logger:
  694. log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
  695. log.write(inputargs, operations, ops_offset=ops_offset)
  696. # legacy
  697. if logger.logger_ops:
  698. logger.logger_ops.log_loop(inputargs, operations, 0,
  699. "rewritten", name=loopname,
  700. ops_offset=ops_offset)
  701. self.fixup_target_tokens(rawstart)
  702. self.teardown()
  703. # oprofile support
  704. #if self.cpu.profile_agent is not None:
  705. # name = "Loop # %s: %s" % (looptoken.number, loopname)
  706. # self.cpu.profile_agent.native_code_written(name,
  707. # rawstart, full_size)
  708. return AsmInfo(ops_offset, rawstart + looppos,
  709. size_excluding_failure_stuff - looppos)
  710. def _assemble(self, regalloc, inputargs, operations):
  711. self._regalloc = regalloc
  712. self.guard_success_cc = c.cond_none
  713. regalloc.compute_hint_frame_locations(operations)
  714. regalloc.walk_operations(inputargs, operations)
  715. assert self.guard_success_cc == c.cond_none
  716. if 1: # we_are_translated() or self.cpu.dont_keepalive_stuff:
  717. self._regalloc = None # else keep it around for debugging
  718. frame_depth = regalloc.get_final_frame_depth()
  719. jump_target_descr = regalloc.jump_target_descr
  720. if jump_target_descr is not None:
  721. tgt_depth = jump_target_descr._ppc_clt.frame_info.jfi_frame_depth
  722. target_frame_depth = tgt_depth - JITFRAME_FIXED_SIZE
  723. frame_depth = max(frame_depth, target_frame_depth)
  724. return frame_depth
  725. @rgc.no_release_gil
  726. def assemble_bridge(self, faildescr, inputargs, operations,
  727. original_loop_token, log, logger):
  728. if not we_are_translated():
  729. # Arguments should be unique
  730. assert len(set(inputargs)) == len(inputargs)
  731. self.setup(original_loop_token)
  732. descr_number = compute_unique_id(faildescr)
  733. if log:
  734. operations = self._inject_debugging_code(faildescr, operations,
  735. 'b', descr_number)
  736. arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
  737. regalloc = Regalloc(assembler=self)
  738. allgcrefs = []
  739. operations = regalloc.prepare_bridge(inputargs, arglocs,
  740. operations,
  741. allgcrefs,
  742. self.current_clt.frame_info)
  743. self.reserve_gcref_table(allgcrefs)
  744. startpos = self.mc.get_relative_pos()
  745. self._check_frame_depth(self.mc, regalloc.get_gcmap())
  746. frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations)
  747. codeendpos = self.mc.get_relative_pos()
  748. self.write_pending_failure_recoveries()
  749. fullsize = self.mc.get_relative_pos()
  750. #
  751. self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
  752. rawstart = self.materialize_loop(original_loop_token)
  753. debug_bridge(descr_number, rawstart, codeendpos)
  754. self.patch_gcref_table(original_loop_token, rawstart)
  755. self.patch_pending_failure_recoveries(rawstart)
  756. # patch the jump from original guard
  757. self.patch_jump_for_descr(faildescr, rawstart)
  758. ops_offset = self.mc.ops_offset
  759. frame_depth = max(self.current_clt.frame_info.jfi_frame_depth,
  760. frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
  761. if logger:
  762. log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
  763. log.write(inputargs, operations, ops_offset)
  764. # log that the already written bridge is stitched to a descr!
  765. logger.log_patch_guard(descr_number, rawstart)
  766. # legacy
  767. if logger.logger_ops:
  768. logger.logger_ops.log_bridge(inputargs, operations, "rewritten",
  769. faildescr, ops_offset=ops_offset)
  770. self.fixup_target_tokens(rawstart)
  771. self.update_frame_depth(frame_depth)
  772. self.teardown()
  773. return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
  774. def reserve_gcref_table(self, allgcrefs):
  775. # allocate the gc table right now. We write absolute loads in
  776. # each load_from_gc_table instruction for now. XXX improve,
  777. # but it's messy.
  778. self.gc_table_addr = self.datablockwrapper.malloc_aligned(
  779. len(allgcrefs) * WORD, alignment=WORD)
  780. self.setup_gcrefs_list(allgcrefs)
  781. def patch_gcref_table(self, looptoken, rawstart):
  782. rawstart = self.gc_table_addr
  783. tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
  784. self._allgcrefs)
  785. gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
  786. gcreftracers.append(tracer) # keepalive
  787. self.teardown_gcrefs_list()
  788. def teardown(self):
  789. self.pending_guard_tokens = None
  790. self.mc = None
  791. self.current_clt = None
  792. def _find_failure_recovery_bytecode(self, faildescr):
  793. return faildescr._failure_recovery_code_adr
  794. def fixup_target_tokens(self, rawstart):
  795. for targettoken in self.target_tokens_currently_compiling:
  796. targettoken._ll_loop_code += rawstart
  797. self.target_tokens_currently_compiling = None
  798. def target_arglocs(self, looptoken):
  799. return looptoken._ppc_arglocs
  800. def materialize_loop(self, looptoken):
  801. self.datablockwrapper.done()
  802. self.datablockwrapper = None
  803. allblocks = self.get_asmmemmgr_blocks(looptoken)
  804. start = self.mc.materialize(self.cpu, allblocks,
  805. self.cpu.gc_ll_descr.gcrootmap)
  806. return start
  807. def load_gcmap(self, mc, reg, gcmap):
  808. # load the current gcmap into register 'reg'
  809. ptr = rffi.cast(lltype.Signed, gcmap)
  810. mc.load_imm(reg, ptr)
  811. def push_gcmap(self, mc, gcmap, store=True):
  812. # (called from callbuilder.py and ../llsupport/callbuilder.py)
  813. assert store is True
  814. self.load_gcmap(mc, r.SCRATCH, gcmap)
  815. ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  816. mc.store(r.SCRATCH.value, r.SPP.value, ofs)
  817. def break_long_loop(self):
  818. # If the loop is too long, the guards in it will jump forward
  819. # more than 32 KB. We use an approximate hack to know if we
  820. # should break the loop here with an unconditional "b" that
  821. # jumps over the target code.
  822. jmp_pos = self.mc.currpos()
  823. self.mc.trap()
  824. self.write_pending_failure_recoveries()
  825. currpos = self.mc.currpos()
  826. pmc = OverwritingBuilder(self.mc, jmp_pos, 1)
  827. pmc.b(currpos - jmp_pos)
  828. pmc.overwrite()
  829. def generate_quick_failure(self, guardtok):
  830. startpos = self.mc.currpos()
  831. faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
  832. assert target != 0
  833. self.mc.load_imm(r.r2, target)
  834. self.mc.mtctr(r.r2.value)
  835. self._load_from_gc_table(r.r0, r.r2, faildescrindex)
  836. self.load_gcmap(self.mc, r.r2, gcmap=guardtok.gcmap) # preserves r0
  837. self.mc.bctr()
  838. # we need to write at least 6 insns here, for patch_jump_for_descr()
  839. while self.mc.currpos() < startpos + 6 * 4:
  840. self.mc.trap()
  841. return startpos
  842. def write_pending_failure_recoveries(self):
  843. # for each pending guard, generate the code of the recovery stub
  844. # at the end of self.mc.
  845. for i in range(self.pending_guard_tokens_recovered,
  846. len(self.pending_guard_tokens)):
  847. tok = self.pending_guard_tokens[i]
  848. tok.pos_recovery_stub = self.generate_quick_failure(tok)
  849. self.pending_guard_tokens_recovered = len(self.pending_guard_tokens)
  850. def patch_pending_failure_recoveries(self, rawstart):
  851. assert (self.pending_guard_tokens_recovered ==
  852. len(self.pending_guard_tokens))
  853. clt = self.current_clt
  854. for tok in self.pending_guard_tokens:
  855. addr = rawstart + tok.pos_jump_offset
  856. #
  857. # XXX see patch_jump_for_descr()
  858. tok.faildescr.adr_jump_offset = rawstart + tok.pos_recovery_stub
  859. #
  860. relative_target = tok.pos_recovery_stub - tok.pos_jump_offset
  861. #
  862. if not tok.guard_not_invalidated():
  863. mc = PPCBuilder()
  864. mc.b_cond_offset(relative_target, tok.fcond)
  865. mc.copy_to_raw_memory(addr)
  866. else:
  867. # GUARD_NOT_INVALIDATED, record an entry in
  868. # clt.invalidate_positions of the form:
  869. # (addr-in-the-code-of-the-not-yet-written-jump-target,
  870. # relative-target-to-use)
  871. relpos = tok.pos_jump_offset
  872. clt.invalidate_positions.append((rawstart + relpos,
  873. relative_target))
  874. def patch_jump_for_descr(self, faildescr, adr_new_target):
  875. # 'faildescr.adr_jump_offset' is the address of an instruction that is a
  876. # conditional jump. We must patch this conditional jump to go
  877. # to 'adr_new_target'. If the target is too far away, we can't
  878. # patch it inplace, and instead we patch the quick failure code
  879. # (which should be at least 6 instructions, so enough).
  880. # --- XXX for now we always use the second solution ---
  881. mc = PPCBuilder()
  882. mc.b_abs(adr_new_target)
  883. mc.copy_to_raw_memory(faildescr.adr_jump_offset)
  884. assert faildescr.adr_jump_offset != 0
  885. faildescr.adr_jump_offset = 0 # means "patched"
  886. def get_asmmemmgr_blocks(self, looptoken):
  887. clt = looptoken.compiled_loop_token
  888. if clt.asmmemmgr_blocks is None:
  889. clt.asmmemmgr_blocks = []
  890. return clt.asmmemmgr_blocks
  891. def regalloc_mov(self, prev_loc, loc):
  892. if prev_loc.is_imm():
  893. value = prev_loc.getint()
  894. # move immediate value to register
  895. if loc.is_reg():
  896. self.mc.load_imm(loc, value)
  897. return
  898. # move immediate value to memory
  899. elif loc.is_stack():
  900. with scratch_reg(self.mc):
  901. offset = loc.value
  902. self.mc.load_imm(r.SCRATCH, value)
  903. self.mc.store(r.SCRATCH.value, r.SPP.value, offset)
  904. return
  905. assert 0, "not supported location"
  906. elif prev_loc.is_stack():
  907. offset = prev_loc.value
  908. # move from memory to register
  909. if loc.is_reg():
  910. reg = loc.value
  911. self.mc.load(reg, r.SPP.value, offset)
  912. return
  913. # move in memory
  914. elif loc.is_stack():
  915. target_offset = loc.value
  916. with scratch_reg(self.mc):
  917. self.mc.load(r.SCRATCH.value, r.SPP.value, offset)
  918. self.mc.store(r.SCRATCH.value, r.SPP.value, target_offset)
  919. return
  920. # move from memory to fp register
  921. elif loc.is_fp_reg():
  922. assert prev_loc.type == FLOAT, 'source not float location'
  923. reg = loc.value
  924. self.mc.lfd(reg, r.SPP.value, offset)
  925. return
  926. assert 0, "not supported location"
  927. elif prev_loc.is_reg():
  928. reg = prev_loc.value
  929. # move to another register
  930. if loc.is_reg():
  931. other_reg = loc.value
  932. self.mc.mr(other_reg, reg)
  933. return
  934. # move to memory
  935. elif loc.is_stack():
  936. offset = loc.value
  937. self.mc.store(reg, r.SPP.value, offset)
  938. return
  939. assert 0, "not supported location"
  940. elif prev_loc.is_imm_float():
  941. value = prev_loc.getint()
  942. # move immediate value to fp register
  943. if loc.is_fp_reg():
  944. with scratch_reg(self.mc):
  945. self.mc.load_imm(r.SCRATCH, value)
  946. self.mc.lfdx(loc.value, 0, r.SCRATCH.value)
  947. return
  948. # move immediate value to memory
  949. elif loc.is_stack():
  950. with scratch_reg(self.mc):
  951. offset = loc.value
  952. self.mc.load_imm(r.SCRATCH, value)
  953. self.mc.lfdx(r.FP_SCRATCH.value, 0, r.SCRATCH.value)
  954. self.mc.stfd(r.FP_SCRATCH.value, r.SPP.value, offset)
  955. return
  956. assert 0, "not supported location"
  957. elif prev_loc.is_fp_reg():
  958. reg = prev_loc.value
  959. # move to another fp register
  960. if loc.is_fp_reg():
  961. other_reg = loc.value
  962. self.mc.fmr(other_reg, reg)
  963. return
  964. # move from fp register to memory
  965. elif loc.is_stack():
  966. assert loc.type == FLOAT, "target not float location"
  967. offset = loc.value
  968. self.mc.stfd(reg, r.SPP.value, offset)
  969. return
  970. assert 0, "not supported location"
  971. assert 0, "not supported location"
  972. mov_loc_loc = regalloc_mov
  973. def regalloc_push(self, loc, already_pushed):
  974. """Pushes the value stored in loc to the stack
  975. Can trash the current value of SCRATCH when pushing a stack
  976. loc"""
  977. assert IS_PPC_64, 'needs to updated for ppc 32'
  978. index = WORD * (~already_pushed)
  979. if loc.type == FLOAT:
  980. if not loc.is_fp_reg():
  981. self.regalloc_mov(loc, r.FP_SCRATCH)
  982. loc = r.FP_SCRATCH
  983. self.mc.stfd(loc.value, r.SP.value, index)
  984. else:
  985. if not loc.is_core_reg():
  986. self.regalloc_mov(loc, r.SCRATCH)
  987. loc = r.SCRATCH
  988. self.mc.std(loc.value, r.SP.value, index)
  989. def regalloc_pop(self, loc, already_pushed):
  990. """Pops the value on top of the stack to loc. Can trash the current
  991. value of SCRATCH when popping to a stack loc"""
  992. assert IS_PPC_64, 'needs to updated for ppc 32'
  993. index = WORD * (~already_pushed)
  994. if loc.type == FLOAT:
  995. if loc.is_fp_reg():
  996. self.mc.lfd(loc.value, r.SP.value, index)
  997. else:
  998. self.mc.lfd(r.FP_SCRATCH.value, r.SP.value, index)
  999. self.regalloc_mov(r.FP_SCRATCH, loc)
  1000. else:
  1001. if loc.is_core_reg():
  1002. self.mc.ld(loc.value, r.SP.value, index)
  1003. else:
  1004. self.mc.ld(r.SCRATCH.value, r.SP.value, index)
  1005. self.regalloc_mov(r.SCRATCH, loc)
  1006. def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap):
  1007. assert size & (WORD-1) == 0 # must be correctly aligned
  1008. # We load into RES the address stored at nursery_free_adr. We
  1009. # calculate the new value for nursery_free_adr and store it in
  1010. # RSZ. Then we load the address stored in nursery_top_adr
  1011. # into SCRATCH. In the rare case where the value in RSZ is
  1012. # (unsigned) bigger than the one in SCRATCH we call
  1013. # malloc_slowpath. In the common case where malloc_slowpath
  1014. # is not called, we must still write RSZ back into
  1015. # nursery_free_adr (r2); so we do it always, even if we called
  1016. # malloc_slowpath.
  1017. diff = nursery_top_adr - nursery_free_adr
  1018. assert _check_imm_arg(diff)
  1019. mc = self.mc
  1020. mc.load_imm(r.r2, nursery_free_adr)
  1021. mc.load(r.RES.value, r.r2.value, 0) # load nursery_free
  1022. mc.load(r.SCRATCH.value, r.r2.value, diff) # load nursery_top
  1023. if _check_imm_arg(size):
  1024. mc.addi(r.RSZ.value, r.RES.value, size)
  1025. else:
  1026. mc.load_imm(r.RSZ, size)
  1027. mc.add(r.RSZ.value, r.RES.value, r.RSZ.value)
  1028. mc.cmp_op(0, r.RSZ.value, r.SCRATCH.value, signed=False)
  1029. fast_jmp_pos = mc.currpos()
  1030. mc.trap() # conditional jump, patched later
  1031. # new value of nursery_free_adr in RSZ and the adr of the new object
  1032. # in RES.
  1033. self.load_gcmap(mc, r.r2, gcmap)
  1034. # We are jumping to malloc_slowpath without a call through a function
  1035. # descriptor, because it is an internal call and "call" would trash

Large files files are truncated, but you can click here to view the full file