PageRenderTime 54ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 1ms

/rpython/jit/backend/arm/assembler.py

https://bitbucket.org/pypy/pypy/
Python | 1615 lines | 1355 code | 109 blank | 151 comment | 78 complexity | 2adf32c2a79d4533455dd987b81c2255 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. from __future__ import with_statement
  2. import os
  3. from rpython.jit.backend.arm import conditions as c, registers as r
  4. from rpython.jit.backend.arm import shift
  5. from rpython.jit.backend.arm.arch import (WORD, DOUBLE_WORD,
  6. JITFRAME_FIXED_SIZE)
  7. from rpython.jit.backend.arm.codebuilder import InstrBuilder, OverwritingBuilder
  8. from rpython.jit.backend.arm.locations import imm, StackLocation, get_fp_offset
  9. from rpython.jit.backend.arm.helper.regalloc import VMEM_imm_size
  10. from rpython.jit.backend.arm.opassembler import ResOpAssembler
  11. from rpython.jit.backend.arm.regalloc import (Regalloc,
  12. CoreRegisterManager, check_imm_arg, VFPRegisterManager,
  13. operations as regalloc_operations)
  14. from rpython.jit.backend.llsupport import jitframe, rewrite
  15. from rpython.jit.backend.llsupport.assembler import BaseAssembler
  16. from rpython.jit.backend.llsupport.regalloc import get_scale, valid_addressing_size
  17. from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
  18. from rpython.jit.backend.model import CompiledLoopToken
  19. from rpython.jit.codewriter.effectinfo import EffectInfo
  20. from rpython.jit.metainterp.history import AbstractFailDescr, FLOAT, INT, VOID
  21. from rpython.jit.metainterp.resoperation import rop
  22. from rpython.rlib.debug import debug_print, debug_start, debug_stop
  23. from rpython.rlib.jit import AsmInfo
  24. from rpython.rlib.objectmodel import we_are_translated, specialize, compute_unique_id
  25. from rpython.rlib.rarithmetic import r_uint
  26. from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
  27. from rpython.rtyper.lltypesystem import lltype, rffi
  28. from rpython.jit.backend.arm import callbuilder
  29. from rpython.rtyper.lltypesystem.lloperation import llop
  30. from rpython.rlib.rjitlog import rjitlog as jl
  31. class AssemblerARM(ResOpAssembler):
  32. debug = False
  33. DEBUG_FRAME_DEPTH = False
  34. def __init__(self, cpu, translate_support_code=False):
  35. ResOpAssembler.__init__(self, cpu, translate_support_code)
  36. self.setup_failure_recovery()
  37. self.mc = None
  38. self.pending_guards = None
  39. self._exit_code_addr = 0
  40. self.current_clt = None
  41. self.malloc_slowpath = 0
  42. self.wb_slowpath = [0, 0, 0, 0, 0]
  43. self._regalloc = None
  44. self.datablockwrapper = None
  45. self.propagate_exception_path = 0
  46. self.stack_check_slowpath = 0
  47. self._debug = False
  48. self.loop_run_counters = []
  49. self.gcrootmap_retaddr_forced = 0
  50. def setup_once(self):
  51. BaseAssembler.setup_once(self)
  52. def setup(self, looptoken):
  53. BaseAssembler.setup(self, looptoken)
  54. assert self.memcpy_addr != 0, 'setup_once() not called?'
  55. if we_are_translated():
  56. self.debug = False
  57. self.current_clt = looptoken.compiled_loop_token
  58. self.mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  59. self.pending_guards = []
  60. #assert self.datablockwrapper is None --- but obscure case
  61. # possible, e.g. getting MemoryError and continuing
  62. allblocks = self.get_asmmemmgr_blocks(looptoken)
  63. self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
  64. allblocks)
  65. self.mc.datablockwrapper = self.datablockwrapper
  66. self.target_tokens_currently_compiling = {}
  67. self.frame_depth_to_patch = []
  68. def teardown(self):
  69. self.current_clt = None
  70. self._regalloc = None
  71. self.mc = None
  72. self.pending_guards = None
  73. def setup_failure_recovery(self):
  74. self.failure_recovery_code = [0, 0, 0, 0]
  75. def _build_propagate_exception_path(self):
  76. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  77. self._store_and_reset_exception(mc, r.r0)
  78. ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
  79. # make sure ofs fits into a register
  80. assert check_imm_arg(ofs)
  81. self.store_reg(mc, r.r0, r.fp, ofs)
  82. propagate_exception_descr = rffi.cast(lltype.Signed,
  83. cast_instance_to_gcref(self.cpu.propagate_exception_descr))
  84. # put propagate_exception_descr into frame
  85. ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
  86. # make sure ofs fits into a register
  87. assert check_imm_arg(ofs)
  88. mc.gen_load_int(r.r0.value, propagate_exception_descr)
  89. self.store_reg(mc, r.r0, r.fp, ofs)
  90. mc.MOV_rr(r.r0.value, r.fp.value)
  91. self.gen_func_epilog(mc)
  92. rawstart = mc.materialize(self.cpu, [])
  93. self.propagate_exception_path = rawstart
  94. def _store_and_reset_exception(self, mc, excvalloc=None, exctploc=None,
  95. on_frame=False):
  96. """ Resest the exception. If excvalloc is None, then store it on the
  97. frame in jf_guard_exc
  98. """
  99. assert excvalloc is not r.ip
  100. assert exctploc is not r.ip
  101. tmpreg = r.lr
  102. mc.gen_load_int(r.ip.value, self.cpu.pos_exc_value())
  103. if excvalloc is not None: # store
  104. assert excvalloc.is_core_reg()
  105. self.load_reg(mc, excvalloc, r.ip)
  106. if on_frame:
  107. # store exc_value in JITFRAME
  108. ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
  109. assert check_imm_arg(ofs)
  110. #
  111. self.load_reg(mc, r.ip, r.ip, helper=tmpreg)
  112. #
  113. self.store_reg(mc, r.ip, r.fp, ofs, helper=tmpreg)
  114. if exctploc is not None:
  115. # store pos_exception in exctploc
  116. assert exctploc.is_core_reg()
  117. mc.gen_load_int(r.ip.value, self.cpu.pos_exception())
  118. self.load_reg(mc, exctploc, r.ip, helper=tmpreg)
  119. if on_frame or exctploc is not None:
  120. mc.gen_load_int(r.ip.value, self.cpu.pos_exc_value())
  121. # reset exception
  122. mc.gen_load_int(tmpreg.value, 0)
  123. self.store_reg(mc, tmpreg, r.ip, 0)
  124. mc.gen_load_int(r.ip.value, self.cpu.pos_exception())
  125. self.store_reg(mc, tmpreg, r.ip, 0)
  126. def _restore_exception(self, mc, excvalloc, exctploc):
  127. assert excvalloc is not r.ip
  128. assert exctploc is not r.ip
  129. tmpreg = r.lr # use lr as a second temporary reg
  130. mc.gen_load_int(r.ip.value, self.cpu.pos_exc_value())
  131. if excvalloc is not None:
  132. assert excvalloc.is_core_reg()
  133. self.store_reg(mc, excvalloc, r.ip)
  134. else:
  135. assert exctploc is not r.fp
  136. # load exc_value from JITFRAME and put it in pos_exc_value
  137. ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
  138. self.load_reg(mc, tmpreg, r.fp, ofs)
  139. self.store_reg(mc, tmpreg, r.ip)
  140. # reset exc_value in the JITFRAME
  141. mc.gen_load_int(tmpreg.value, 0)
  142. self.store_reg(mc, tmpreg, r.fp, ofs)
  143. # restore pos_exception from exctploc register
  144. mc.gen_load_int(r.ip.value, self.cpu.pos_exception())
  145. self.store_reg(mc, exctploc, r.ip)
  146. def _build_stack_check_slowpath(self):
  147. _, _, slowpathaddr = self.cpu.insert_stack_check()
  148. if slowpathaddr == 0 or not self.cpu.propagate_exception_descr:
  149. return # no stack check (for tests, or non-translated)
  150. #
  151. # make a "function" that is called immediately at the start of
  152. # an assembler function. In particular, the stack looks like:
  153. #
  154. # | retaddr of caller | <-- aligned to a multiple of 16
  155. # | saved argument regs |
  156. # | my own retaddr | <-- sp
  157. # +-----------------------+
  158. #
  159. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  160. # save argument registers and return address
  161. mc.PUSH([reg.value for reg in r.argument_regs] + [r.ip.value, r.lr.value])
  162. # stack is aligned here
  163. # Pass current stack pointer as argument to the call
  164. mc.MOV_rr(r.r0.value, r.sp.value)
  165. #
  166. mc.BL(slowpathaddr)
  167. # check for an exception
  168. mc.gen_load_int(r.r0.value, self.cpu.pos_exception())
  169. mc.LDR_ri(r.r0.value, r.r0.value)
  170. mc.TST_rr(r.r0.value, r.r0.value)
  171. #
  172. # restore registers and return
  173. # We check for c.EQ here, meaning all bits zero in this case
  174. mc.POP([reg.value for reg in r.argument_regs] + [r.ip.value, r.pc.value], cond=c.EQ)
  175. # restore sp
  176. mc.ADD_ri(r.sp.value, r.sp.value, (len(r.argument_regs) + 2) * WORD)
  177. mc.B(self.propagate_exception_path)
  178. #
  179. rawstart = mc.materialize(self.cpu, [])
  180. self.stack_check_slowpath = rawstart
  181. def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
  182. descr = self.cpu.gc_ll_descr.write_barrier_descr
  183. if descr is None:
  184. return
  185. if not withcards:
  186. func = descr.get_write_barrier_fn(self.cpu)
  187. else:
  188. if descr.jit_wb_cards_set == 0:
  189. return
  190. func = descr.get_write_barrier_from_array_fn(self.cpu)
  191. if func == 0:
  192. return
  193. #
  194. # This builds a helper function called from the slow path of
  195. # write barriers. It must save all registers, and optionally
  196. # all vfp registers. It takes a single argument which is in r0.
  197. # It must keep stack alignment accordingly.
  198. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  199. #
  200. exc0 = exc1 = None
  201. mc.PUSH([r.ip.value, r.lr.value]) # push two words to keep alignment
  202. if not for_frame:
  203. self._push_all_regs_to_jitframe(mc, [], withfloats, callee_only=True)
  204. else:
  205. # NOTE: don't save registers on the jitframe here! It might
  206. # override already-saved values that will be restored
  207. # later...
  208. #
  209. # we're possibly called from the slowpath of malloc
  210. # save the caller saved registers
  211. # assuming we do not collect here
  212. exc0, exc1 = r.r4, r.r5
  213. mc.PUSH([gpr.value for gpr in r.caller_resp] + [exc0.value, exc1.value])
  214. mc.VPUSH([vfpr.value for vfpr in r.caller_vfp_resp])
  215. self._store_and_reset_exception(mc, exc0, exc1)
  216. mc.BL(func)
  217. #
  218. if not for_frame:
  219. self._pop_all_regs_from_jitframe(mc, [], withfloats, callee_only=True)
  220. else:
  221. self._restore_exception(mc, exc0, exc1)
  222. mc.VPOP([vfpr.value for vfpr in r.caller_vfp_resp])
  223. assert exc0 is not None
  224. assert exc1 is not None
  225. mc.POP([gpr.value for gpr in r.caller_resp] +
  226. [exc0.value, exc1.value])
  227. #
  228. if withcards:
  229. # A final TEST8 before the RET, for the caller. Careful to
  230. # not follow this instruction with another one that changes
  231. # the status of the CPU flags!
  232. mc.LDRB_ri(r.ip.value, r.r0.value,
  233. imm=descr.jit_wb_if_flag_byteofs)
  234. mc.TST_ri(r.ip.value, imm=0x80)
  235. #
  236. mc.POP([r.ip.value, r.pc.value])
  237. #
  238. rawstart = mc.materialize(self.cpu, [])
  239. if for_frame:
  240. self.wb_slowpath[4] = rawstart
  241. else:
  242. self.wb_slowpath[withcards + 2 * withfloats] = rawstart
  243. def _build_cond_call_slowpath(self, supports_floats, callee_only):
  244. """ This builds a general call slowpath, for whatever call happens to
  245. come.
  246. """
  247. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  248. #
  249. self._push_all_regs_to_jitframe(mc, [], self.cpu.supports_floats, callee_only)
  250. ## args are in their respective positions
  251. mc.PUSH([r.ip.value, r.lr.value])
  252. mc.BLX(r.r4.value)
  253. self._reload_frame_if_necessary(mc)
  254. self._pop_all_regs_from_jitframe(mc, [], supports_floats,
  255. callee_only)
  256. # return
  257. mc.POP([r.ip.value, r.pc.value])
  258. return mc.materialize(self.cpu, [])
  259. def _build_malloc_slowpath(self, kind):
  260. """ While arriving on slowpath, we have a gcpattern on stack 0.
  261. The arguments are passed in r0 and r10, as follows:
  262. kind == 'fixed': nursery_head in r0 and the size in r1 - r0.
  263. kind == 'str/unicode': length of the string to allocate in r0.
  264. kind == 'var': length to allocate in r1, tid in r0,
  265. and itemsize on the stack.
  266. This function must preserve all registers apart from r0 and r1.
  267. """
  268. assert kind in ['fixed', 'str', 'unicode', 'var']
  269. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  270. #
  271. self._push_all_regs_to_jitframe(mc, [r.r0, r.r1], self.cpu.supports_floats)
  272. #
  273. if kind == 'fixed':
  274. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
  275. elif kind == 'str':
  276. addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_str')
  277. elif kind == 'unicode':
  278. addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_unicode')
  279. else:
  280. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_array_addr()
  281. if kind == 'fixed':
  282. # stack layout: [gcmap]
  283. # At this point we know that the values we need to compute the size
  284. # are stored in r0 and r1.
  285. mc.SUB_rr(r.r0.value, r.r1.value, r.r0.value) # compute the size we want
  286. if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
  287. mc.MOV_rr(r.r1.value, r.fp.value)
  288. elif kind == 'str' or kind == 'unicode':
  289. # stack layout: [gcmap]
  290. mc.MOV_rr(r.r0.value, r.r1.value)
  291. else: # var
  292. # stack layout: [gcmap][itemsize]...
  293. # tid is in r0
  294. # length is in r1
  295. mc.MOV_rr(r.r2.value, r.r1.value)
  296. mc.MOV_rr(r.r1.value, r.r0.value)
  297. mc.POP([r.r0.value]) # load itemsize
  298. # store the gc pattern
  299. mc.POP([r.r4.value])
  300. ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  301. self.store_reg(mc, r.r4, r.fp, ofs)
  302. #
  303. # We need to push two registers here because we are going to make a
  304. # call an therefore the stack needs to be 8-byte aligned
  305. mc.PUSH([r.ip.value, r.lr.value])
  306. #
  307. mc.BL(addr)
  308. #
  309. # If the slowpath malloc failed, we raise a MemoryError that
  310. # always interrupts the current loop, as a "good enough"
  311. # approximation.
  312. mc.CMP_ri(r.r0.value, 0)
  313. mc.B(self.propagate_exception_path, c=c.EQ)
  314. #
  315. self._reload_frame_if_necessary(mc)
  316. self._pop_all_regs_from_jitframe(mc, [r.r0, r.r1], self.cpu.supports_floats)
  317. #
  318. nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
  319. mc.gen_load_int(r.r1.value, nursery_free_adr)
  320. mc.LDR_ri(r.r1.value, r.r1.value)
  321. # clear the gc pattern
  322. mc.gen_load_int(r.ip.value, 0)
  323. self.store_reg(mc, r.ip, r.fp, ofs)
  324. # return
  325. mc.POP([r.ip.value, r.pc.value])
  326. #
  327. rawstart = mc.materialize(self.cpu, [])
  328. return rawstart
  329. def _reload_frame_if_necessary(self, mc):
  330. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  331. if gcrootmap and gcrootmap.is_shadow_stack:
  332. rst = gcrootmap.get_root_stack_top_addr()
  333. mc.gen_load_int(r.ip.value, rst)
  334. self.load_reg(mc, r.ip, r.ip)
  335. self.load_reg(mc, r.fp, r.ip, ofs=-WORD)
  336. wbdescr = self.cpu.gc_ll_descr.write_barrier_descr
  337. if gcrootmap and wbdescr:
  338. # frame never uses card marking, so we enforce this is not
  339. # an array
  340. self._write_barrier_fastpath(mc, wbdescr, [r.fp], array=False,
  341. is_frame=True)
  342. def propagate_memoryerror_if_reg_is_null(self, reg_loc):
  343. # see ../x86/assembler.py:genop_discard_check_memory_error()
  344. self.mc.CMP_ri(reg_loc.value, 0)
  345. self.mc.B(self.propagate_exception_path, c=c.EQ)
  346. def _push_all_regs_to_jitframe(self, mc, ignored_regs, withfloats,
  347. callee_only=False):
  348. # Push general purpose registers
  349. base_ofs = self.cpu.get_baseofs_of_frame_field()
  350. if callee_only:
  351. regs = CoreRegisterManager.save_around_call_regs
  352. else:
  353. regs = CoreRegisterManager.all_regs
  354. # XXX add special case if ignored_regs are a block at the start of regs
  355. if not ignored_regs: # we want to push a contiguous block of regs
  356. assert check_imm_arg(base_ofs)
  357. mc.ADD_ri(r.ip.value, r.fp.value, base_ofs)
  358. mc.STM(r.ip.value, [reg.value for reg in regs])
  359. else:
  360. for reg in ignored_regs:
  361. assert not reg.is_vfp_reg() # sanity check
  362. # we can have holes in the list of regs
  363. for i, gpr in enumerate(regs):
  364. if gpr in ignored_regs:
  365. continue
  366. self.store_reg(mc, gpr, r.fp, base_ofs + i * WORD)
  367. if withfloats:
  368. # Push VFP regs
  369. regs = VFPRegisterManager.all_regs
  370. ofs = len(CoreRegisterManager.all_regs) * WORD
  371. assert check_imm_arg(ofs+base_ofs)
  372. mc.ADD_ri(r.ip.value, r.fp.value, imm=ofs+base_ofs)
  373. mc.VSTM(r.ip.value, [vfpr.value for vfpr in regs])
  374. def _pop_all_regs_from_jitframe(self, mc, ignored_regs, withfloats,
  375. callee_only=False):
  376. # Pop general purpose registers
  377. base_ofs = self.cpu.get_baseofs_of_frame_field()
  378. if callee_only:
  379. regs = CoreRegisterManager.save_around_call_regs
  380. else:
  381. regs = CoreRegisterManager.all_regs
  382. # XXX add special case if ignored_regs are a block at the start of regs
  383. if not ignored_regs: # we want to pop a contiguous block of regs
  384. assert check_imm_arg(base_ofs)
  385. mc.ADD_ri(r.ip.value, r.fp.value, base_ofs)
  386. mc.LDM(r.ip.value, [reg.value for reg in regs])
  387. else:
  388. for reg in ignored_regs:
  389. assert not reg.is_vfp_reg() # sanity check
  390. # we can have holes in the list of regs
  391. for i, gpr in enumerate(regs):
  392. if gpr in ignored_regs:
  393. continue
  394. ofs = i * WORD + base_ofs
  395. self.load_reg(mc, gpr, r.fp, ofs)
  396. if withfloats:
  397. # Pop VFP regs
  398. regs = VFPRegisterManager.all_regs
  399. ofs = len(CoreRegisterManager.all_regs) * WORD
  400. assert check_imm_arg(ofs+base_ofs)
  401. mc.ADD_ri(r.ip.value, r.fp.value, imm=ofs+base_ofs)
  402. mc.VLDM(r.ip.value, [vfpr.value for vfpr in regs])
  403. def _build_failure_recovery(self, exc, withfloats=False):
  404. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  405. self._push_all_regs_to_jitframe(mc, [], withfloats)
  406. if exc:
  407. # We might have an exception pending. Load it into r4
  408. # (this is a register saved across calls)
  409. mc.gen_load_int(r.r5.value, self.cpu.pos_exc_value())
  410. mc.LDR_ri(r.r4.value, r.r5.value)
  411. # clear the exc flags
  412. mc.gen_load_int(r.r6.value, 0)
  413. mc.STR_ri(r.r6.value, r.r5.value) # pos_exc_value is still in r5
  414. mc.gen_load_int(r.r5.value, self.cpu.pos_exception())
  415. mc.STR_ri(r.r6.value, r.r5.value)
  416. # save r4 into 'jf_guard_exc'
  417. offset = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
  418. assert check_imm_arg(abs(offset))
  419. mc.STR_ri(r.r4.value, r.fp.value, imm=offset)
  420. # now we return from the complete frame, which starts from
  421. # _call_header_with_stack_check(). The LEA in _call_footer below
  422. # throws away most of the frame, including all the PUSHes that we
  423. # did just above.
  424. ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
  425. assert check_imm_arg(abs(ofs))
  426. ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  427. assert check_imm_arg(abs(ofs2))
  428. base_ofs = self.cpu.get_baseofs_of_frame_field()
  429. # store the gcmap
  430. mc.POP([r.ip.value])
  431. mc.STR_ri(r.ip.value, r.fp.value, imm=ofs2)
  432. # store the descr
  433. mc.POP([r.ip.value])
  434. mc.STR_ri(r.ip.value, r.fp.value, imm=ofs)
  435. # set return value
  436. assert check_imm_arg(base_ofs)
  437. mc.MOV_rr(r.r0.value, r.fp.value)
  438. #
  439. self.gen_func_epilog(mc)
  440. rawstart = mc.materialize(self.cpu, [])
  441. self.failure_recovery_code[exc + 2 * withfloats] = rawstart
  442. def generate_quick_failure(self, guardtok):
  443. startpos = self.mc.currpos()
  444. faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
  445. self.load_from_gc_table(r.ip.value, faildescrindex)
  446. self.regalloc_push(r.ip)
  447. self.push_gcmap(self.mc, gcmap=guardtok.gcmap, push=True)
  448. self.mc.BL(target)
  449. return startpos
  450. def gen_func_epilog(self, mc=None, cond=c.AL):
  451. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  452. if mc is None:
  453. mc = self.mc
  454. if gcrootmap and gcrootmap.is_shadow_stack:
  455. self.gen_footer_shadowstack(gcrootmap, mc)
  456. if self.cpu.supports_floats:
  457. mc.VPOP([reg.value for reg in r.callee_saved_vfp_registers],
  458. cond=cond)
  459. # pop all callee saved registers. This pops 'pc' last.
  460. # It also pops the threadlocal_addr back into 'r1', but it
  461. # is not needed any more and will be discarded.
  462. mc.POP([reg.value for reg in r.callee_restored_registers] +
  463. [r.r1.value], cond=cond)
  464. mc.BKPT()
  465. def gen_func_prolog(self):
  466. stack_size = WORD #alignment
  467. stack_size += len(r.callee_saved_registers) * WORD
  468. if self.cpu.supports_floats:
  469. stack_size += len(r.callee_saved_vfp_registers) * 2 * WORD
  470. # push all callee saved registers including lr; and push r1 as
  471. # well, which contains the threadlocal_addr argument. Note that
  472. # we're pushing a total of 10 words, which keeps the stack aligned.
  473. self.mc.PUSH([reg.value for reg in r.callee_saved_registers] +
  474. [r.r1.value])
  475. self.saved_threadlocal_addr = 0 # at offset 0 from location 'sp'
  476. if self.cpu.supports_floats:
  477. self.mc.VPUSH([reg.value for reg in r.callee_saved_vfp_registers])
  478. self.saved_threadlocal_addr += (
  479. len(r.callee_saved_vfp_registers) * 2 * WORD)
  480. assert stack_size % 8 == 0 # ensure we keep alignment
  481. # set fp to point to the JITFRAME
  482. self.mc.MOV_rr(r.fp.value, r.r0.value)
  483. #
  484. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  485. if gcrootmap and gcrootmap.is_shadow_stack:
  486. self.gen_shadowstack_header(gcrootmap)
  487. def gen_shadowstack_header(self, gcrootmap):
  488. # lr = shadow stack top addr
  489. # ip = *lr
  490. rst = gcrootmap.get_root_stack_top_addr()
  491. self.mc.gen_load_int(r.lr.value, rst)
  492. self.load_reg(self.mc, r.ip, r.lr)
  493. # *ip = r.fp
  494. self.store_reg(self.mc, r.fp, r.ip)
  495. #
  496. self.mc.ADD_ri(r.ip.value, r.ip.value, WORD)
  497. # *lr = ip + WORD
  498. self.store_reg(self.mc, r.ip, r.lr)
  499. def gen_footer_shadowstack(self, gcrootmap, mc):
  500. rst = gcrootmap.get_root_stack_top_addr()
  501. mc.gen_load_int(r.ip.value, rst)
  502. self.load_reg(mc, r.r4, r.ip)
  503. mc.SUB_ri(r.r4.value, r.r4.value, WORD)
  504. self.store_reg(mc, r.r4, r.ip)
  505. def _dump(self, ops, type='loop'):
  506. debug_start('jit-backend-ops')
  507. debug_print(type)
  508. for op in ops:
  509. debug_print(op.repr())
  510. debug_stop('jit-backend-ops')
  511. def _call_header(self):
  512. # there is the gc table before this point
  513. self.gen_func_prolog()
  514. def _call_header_with_stack_check(self):
  515. self._call_header()
  516. if self.stack_check_slowpath == 0:
  517. pass # no stack check (e.g. not translated)
  518. else:
  519. endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
  520. # load stack end
  521. self.mc.gen_load_int(r.ip.value, endaddr) # load ip, [end]
  522. self.mc.LDR_ri(r.ip.value, r.ip.value) # LDR ip, ip
  523. # load stack length
  524. self.mc.gen_load_int(r.lr.value, lengthaddr) # load lr, lengh
  525. self.mc.LDR_ri(r.lr.value, r.lr.value) # ldr lr, *lengh
  526. # calculate ofs
  527. self.mc.SUB_rr(r.ip.value, r.ip.value, r.sp.value) # SUB ip, current
  528. # if ofs
  529. self.mc.CMP_rr(r.ip.value, r.lr.value) # CMP ip, lr
  530. self.mc.BL(self.stack_check_slowpath, c=c.HI) # call if ip > lr
  531. # cpu interface
  532. def assemble_loop(self, jd_id, unique_id, logger, loopname, inputargs,
  533. operations, looptoken, log):
  534. clt = CompiledLoopToken(self.cpu, looptoken.number)
  535. looptoken.compiled_loop_token = clt
  536. clt._debug_nbargs = len(inputargs)
  537. if not we_are_translated():
  538. # Arguments should be unique
  539. assert len(set(inputargs)) == len(inputargs)
  540. self.setup(looptoken)
  541. #self.codemap_builder.enter_portal_frame(jd_id, unique_id,
  542. # self.mc.get_relative_pos())
  543. frame_info = self.datablockwrapper.malloc_aligned(
  544. jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
  545. clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
  546. clt.frame_info.clear() # for now
  547. if log:
  548. operations = self._inject_debugging_code(looptoken, operations,
  549. 'e', looptoken.number)
  550. regalloc = Regalloc(assembler=self)
  551. allgcrefs = []
  552. operations = regalloc.prepare_loop(inputargs, operations, looptoken,
  553. allgcrefs)
  554. self.reserve_gcref_table(allgcrefs)
  555. functionpos = self.mc.get_relative_pos()
  556. self._call_header_with_stack_check()
  557. self._check_frame_depth_debug(self.mc)
  558. loop_head = self.mc.get_relative_pos()
  559. looptoken._ll_loop_code = loop_head
  560. #
  561. frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations)
  562. self.update_frame_depth(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
  563. #
  564. size_excluding_failure_stuff = self.mc.get_relative_pos()
  565. self.write_pending_failure_recoveries()
  566. full_size = self.mc.get_relative_pos()
  567. rawstart = self.materialize_loop(looptoken)
  568. looptoken._ll_function_addr = rawstart + functionpos
  569. self.patch_gcref_table(looptoken, rawstart)
  570. self.process_pending_guards(rawstart)
  571. self.fixup_target_tokens(rawstart)
  572. if log and not we_are_translated():
  573. self.mc._dump_trace(rawstart,
  574. 'loop.asm')
  575. ops_offset = self.mc.ops_offset
  576. if logger:
  577. log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
  578. log.write(inputargs, operations, ops_offset=ops_offset)
  579. # legacy
  580. if logger.logger_ops:
  581. logger.logger_ops.log_loop(inputargs, operations, 0,
  582. "rewritten", name=loopname,
  583. ops_offset=ops_offset)
  584. self.teardown()
  585. debug_start("jit-backend-addr")
  586. debug_print("Loop %d (%s) has address 0x%x to 0x%x (bootstrap 0x%x)" % (
  587. looptoken.number, loopname,
  588. r_uint(rawstart + loop_head),
  589. r_uint(rawstart + size_excluding_failure_stuff),
  590. r_uint(rawstart + functionpos)))
  591. debug_print(" gc table: 0x%x" % r_uint(rawstart))
  592. debug_print(" function: 0x%x" % r_uint(rawstart + functionpos))
  593. debug_print(" resops: 0x%x" % r_uint(rawstart + loop_head))
  594. debug_print(" failures: 0x%x" % r_uint(rawstart +
  595. size_excluding_failure_stuff))
  596. debug_print(" end: 0x%x" % r_uint(rawstart + full_size))
  597. debug_stop("jit-backend-addr")
  598. return AsmInfo(ops_offset, rawstart + loop_head,
  599. size_excluding_failure_stuff - loop_head)
  600. def _assemble(self, regalloc, inputargs, operations):
  601. self.guard_success_cc = c.cond_none
  602. regalloc.compute_hint_frame_locations(operations)
  603. self._walk_operations(inputargs, operations, regalloc)
  604. assert self.guard_success_cc == c.cond_none
  605. frame_depth = regalloc.get_final_frame_depth()
  606. jump_target_descr = regalloc.jump_target_descr
  607. if jump_target_descr is not None:
  608. tgt_depth = jump_target_descr._arm_clt.frame_info.jfi_frame_depth
  609. target_frame_depth = tgt_depth - JITFRAME_FIXED_SIZE
  610. frame_depth = max(frame_depth, target_frame_depth)
  611. return frame_depth
  612. def assemble_bridge(self, logger, faildescr, inputargs, operations,
  613. original_loop_token, log):
  614. if not we_are_translated():
  615. # Arguments should be unique
  616. assert len(set(inputargs)) == len(inputargs)
  617. self.setup(original_loop_token)
  618. #self.codemap.inherit_code_from_position(faildescr.adr_jump_offset)
  619. descr_number = compute_unique_id(faildescr)
  620. if log:
  621. operations = self._inject_debugging_code(faildescr, operations,
  622. 'b', descr_number)
  623. assert isinstance(faildescr, AbstractFailDescr)
  624. arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
  625. regalloc = Regalloc(assembler=self)
  626. allgcrefs = []
  627. operations = regalloc.prepare_bridge(inputargs, arglocs,
  628. operations,
  629. allgcrefs,
  630. self.current_clt.frame_info)
  631. self.reserve_gcref_table(allgcrefs)
  632. startpos = self.mc.get_relative_pos()
  633. self._check_frame_depth(self.mc, regalloc.get_gcmap())
  634. bridgestartpos = self.mc.get_relative_pos()
  635. frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations)
  636. codeendpos = self.mc.get_relative_pos()
  637. self.write_pending_failure_recoveries()
  638. fullsize = self.mc.get_relative_pos()
  639. rawstart = self.materialize_loop(original_loop_token)
  640. self.patch_gcref_table(original_loop_token, rawstart)
  641. self.process_pending_guards(rawstart)
  642. debug_start("jit-backend-addr")
  643. debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
  644. (r_uint(descr_number), r_uint(rawstart + startpos),
  645. r_uint(rawstart + codeendpos)))
  646. debug_print(" gc table: 0x%x" % r_uint(rawstart))
  647. debug_print(" jump target: 0x%x" % r_uint(rawstart + startpos))
  648. debug_print(" resops: 0x%x" % r_uint(rawstart + bridgestartpos))
  649. debug_print(" failures: 0x%x" % r_uint(rawstart + codeendpos))
  650. debug_print(" end: 0x%x" % r_uint(rawstart + fullsize))
  651. debug_stop("jit-backend-addr")
  652. # patch the jump from original guard
  653. self.patch_trace(faildescr, original_loop_token,
  654. rawstart + startpos, regalloc)
  655. self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE,
  656. rawstart)
  657. if not we_are_translated():
  658. if log:
  659. self.mc._dump_trace(rawstart, 'bridge.asm')
  660. ops_offset = self.mc.ops_offset
  661. frame_depth = max(self.current_clt.frame_info.jfi_frame_depth,
  662. frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
  663. self.fixup_target_tokens(rawstart)
  664. self.update_frame_depth(frame_depth)
  665. if logger:
  666. log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
  667. log.write(inputargs, operations, ops_offset)
  668. # log that the already written bridge is stitched to a descr!
  669. logger.log_patch_guard(descr_number, rawstart)
  670. # legacy
  671. if logger.logger_ops:
  672. logger.logger_ops.log_bridge(inputargs, operations, "rewritten",
  673. faildescr, ops_offset=ops_offset)
  674. self.teardown()
  675. return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
  676. def reserve_gcref_table(self, allgcrefs):
  677. gcref_table_size = len(allgcrefs) * WORD
  678. # align to a multiple of 16 and reserve space at the beginning
  679. # of the machine code for the gc table. This lets us write
  680. # machine code with relative addressing (see load_from_gc_table())
  681. gcref_table_size = (gcref_table_size + 15) & ~15
  682. mc = self.mc
  683. assert mc.get_relative_pos() == 0
  684. for i in range(gcref_table_size):
  685. mc.writechar('\x00')
  686. self.setup_gcrefs_list(allgcrefs)
  687. def patch_gcref_table(self, looptoken, rawstart):
  688. # the gc table is at the start of the machine code. Fill it now
  689. tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
  690. self._allgcrefs)
  691. gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
  692. gcreftracers.append(tracer) # keepalive
  693. self.teardown_gcrefs_list()
  694. def load_from_gc_table(self, regnum, index):
  695. """emits either:
  696. LDR Rt, [PC, #offset] if -4095 <= offset
  697. or:
  698. gen_load_int(Rt, offset)
  699. LDR Rt, [PC, Rt] for larger offsets
  700. """
  701. mc = self.mc
  702. address_in_buffer = index * WORD # at the start of the buffer
  703. offset = address_in_buffer - (mc.get_relative_pos() + 8) # negative
  704. if offset >= -4095:
  705. mc.LDR_ri(regnum, r.pc.value, offset)
  706. else:
  707. # The offset we're loading is negative: right now,
  708. # gen_load_int() will always use exactly
  709. # get_max_size_of_gen_load_int() instructions. No point
  710. # in optimizing in case we get less. Just in case though,
  711. # we check and pad with nops.
  712. extra_bytes = mc.get_max_size_of_gen_load_int() * 4
  713. offset -= extra_bytes
  714. start = mc.get_relative_pos()
  715. mc.gen_load_int(regnum, offset)
  716. missing = start + extra_bytes - mc.get_relative_pos()
  717. while missing > 0:
  718. mc.NOP()
  719. missing = start + extra_bytes - mc.get_relative_pos()
  720. assert missing == 0
  721. mc.LDR_rr(regnum, r.pc.value, regnum)
  722. def new_stack_loc(self, i, tp):
  723. base_ofs = self.cpu.get_baseofs_of_frame_field()
  724. return StackLocation(i, get_fp_offset(base_ofs, i), tp)
  725. def check_frame_before_jump(self, target_token):
  726. if target_token in self.target_tokens_currently_compiling:
  727. return
  728. if target_token._arm_clt is self.current_clt:
  729. return
  730. # We can have a frame coming from god knows where that's
  731. # passed to a jump to another loop. Make sure it has the
  732. # correct depth
  733. expected_size = target_token._arm_clt.frame_info.jfi_frame_depth
  734. self._check_frame_depth(self.mc, self._regalloc.get_gcmap(),
  735. expected_size=expected_size)
  736. def _patch_frame_depth(self, adr, allocated_depth):
  737. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  738. mc.gen_load_int(r.lr.value, allocated_depth)
  739. mc.copy_to_raw_memory(adr)
  740. def _check_frame_depth(self, mc, gcmap, expected_size=-1):
  741. """ check if the frame is of enough depth to follow this bridge.
  742. Otherwise reallocate the frame in a helper.
  743. There are other potential solutions
  744. to that, but this one does not sound too bad.
  745. """
  746. descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
  747. ofs = self.cpu.unpack_fielddescr(descrs.arraydescr.lendescr)
  748. mc.LDR_ri(r.ip.value, r.fp.value, imm=ofs)
  749. stack_check_cmp_ofs = mc.currpos()
  750. if expected_size == -1:
  751. for _ in range(mc.get_max_size_of_gen_load_int()):
  752. mc.NOP()
  753. else:
  754. mc.gen_load_int(r.lr.value, expected_size)
  755. mc.CMP_rr(r.ip.value, r.lr.value)
  756. jg_location = mc.currpos()
  757. mc.BKPT()
  758. # the size value is still stored in lr
  759. mc.PUSH([r.lr.value])
  760. self.push_gcmap(mc, gcmap, push=True)
  761. self.mc.BL(self._frame_realloc_slowpath)
  762. # patch jg_location above
  763. currpos = self.mc.currpos()
  764. pmc = OverwritingBuilder(mc, jg_location, WORD)
  765. pmc.B_offs(currpos, c.GE)
  766. self.frame_depth_to_patch.append(stack_check_cmp_ofs)
  767. def _check_frame_depth_debug(self, mc):
  768. """ double check the depth size. It prints the error (and potentially
  769. segfaults later)
  770. """
  771. if not self.DEBUG_FRAME_DEPTH:
  772. return
  773. descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
  774. ofs = self.cpu.unpack_fielddescr(descrs.arraydescr.lendescr)
  775. mc.LDR_ri(r.ip.value, r.fp.value, imm=ofs)
  776. stack_check_cmp_ofs = mc.currpos()
  777. for _ in range(mc.get_max_size_of_gen_load_int()):
  778. mc.NOP()
  779. mc.CMP_rr(r.ip.value, r.lr.value)
  780. jg_location = mc.currpos()
  781. mc.BKPT()
  782. mc.MOV_rr(r.r0.value, r.fp.value)
  783. mc.MOV_ri(r.r1.value, r.lr.value)
  784. self.mc.BL(self.cpu.realloc_frame_crash)
  785. # patch the JG above
  786. currpos = self.mc.currpos()
  787. pmc = OverwritingBuilder(mc, jg_location, WORD)
  788. pmc.B_offs(currpos, c.GE)
  789. self.frame_depth_to_patch.append(stack_check_cmp_ofs)
  790. def build_frame_realloc_slowpath(self):
  791. # this code should do the following steps
  792. # a) store all registers in the jitframe
  793. # b) fish for the arguments passed by the caller
  794. # c) store the gcmap in the jitframe
  795. # d) call realloc_frame
  796. # e) set the fp to point to the new jitframe
  797. # f) store the address of the new jitframe in the shadowstack
  798. # c) set the gcmap field to 0 in the new jitframe
  799. # g) restore registers and return
  800. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  801. self._push_all_regs_to_jitframe(mc, [], self.cpu.supports_floats)
  802. # this is the gcmap stored by push_gcmap(mov=True) in _check_stack_frame
  803. # and the expected_size pushed in _check_stack_frame
  804. # pop the values passed on the stack, gcmap -> r0, expected_size -> r1
  805. mc.POP([r.r0.value, r.r1.value])
  806. # store return address and keep the stack aligned
  807. mc.PUSH([r.ip.value, r.lr.value])
  808. # store the current gcmap(r0) in the jitframe
  809. gcmap_ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  810. assert check_imm_arg(abs(gcmap_ofs))
  811. mc.STR_ri(r.r0.value, r.fp.value, imm=gcmap_ofs)
  812. # set first arg, which is the old jitframe address
  813. mc.MOV_rr(r.r0.value, r.fp.value)
  814. # store a possibly present exception
  815. # we use a callee saved reg here as a tmp for the exc.
  816. self._store_and_reset_exception(mc, None, r.r4, on_frame=True)
  817. # call realloc_frame, it takes two arguments
  818. # arg0: the old jitframe
  819. # arg1: the new size
  820. #
  821. mc.BL(self.cpu.realloc_frame)
  822. # set fp to the new jitframe returned from the previous call
  823. mc.MOV_rr(r.fp.value, r.r0.value)
  824. # restore a possibly present exception
  825. self._restore_exception(mc, None, r.r4)
  826. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  827. if gcrootmap and gcrootmap.is_shadow_stack:
  828. self._load_shadowstack_top(mc, r.r5, gcrootmap)
  829. # store the new jitframe addr in the shadowstack
  830. mc.STR_ri(r.r0.value, r.r5.value, imm=-WORD)
  831. # reset the jf_gcmap field in the jitframe
  832. mc.gen_load_int(r.ip.value, 0)
  833. mc.STR_ri(r.ip.value, r.fp.value, imm=gcmap_ofs)
  834. # restore registers
  835. self._pop_all_regs_from_jitframe(mc, [], self.cpu.supports_floats)
  836. mc.POP([r.ip.value, r.pc.value]) # return
  837. self._frame_realloc_slowpath = mc.materialize(self.cpu, [])
  838. def _load_shadowstack_top(self, mc, reg, gcrootmap):
  839. rst = gcrootmap.get_root_stack_top_addr()
  840. mc.gen_load_int(reg.value, rst)
  841. self.load_reg(mc, reg, reg)
  842. return rst
  843. def fixup_target_tokens(self, rawstart):
  844. for targettoken in self.target_tokens_currently_compiling:
  845. targettoken._ll_loop_code += rawstart
  846. self.target_tokens_currently_compiling = None
  847. def _patch_stackadjust(self, adr, allocated_depth):
  848. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  849. mc.gen_load_int(r.lr.value, allocated_depth)
  850. mc.copy_to_raw_memory(adr)
  851. def patch_stack_checks(self, framedepth, rawstart):
  852. for ofs in self.frame_depth_to_patch:
  853. self._patch_frame_depth(ofs + rawstart, framedepth)
  854. def target_arglocs(self, loop_token):
  855. return loop_token._arm_arglocs
  856. def materialize_loop(self, looptoken):
  857. self.datablockwrapper.done() # finish using cpu.asmmemmgr
  858. self.datablockwrapper = None
  859. allblocks = self.get_asmmemmgr_blocks(looptoken)
  860. size = self.mc.get_relative_pos()
  861. res = self.mc.materialize(self.cpu, allblocks,
  862. self.cpu.gc_ll_descr.gcrootmap)
  863. #self.cpu.codemap.register_codemap(
  864. # self.codemap.get_final_bytecode(res, size))
  865. return res
  866. def update_frame_depth(self, frame_depth):
  867. baseofs = self.cpu.get_baseofs_of_frame_field()
  868. self.current_clt.frame_info.update_frame_depth(baseofs, frame_depth)
  869. def write_pending_failure_recoveries(self):
  870. for tok in self.pending_guards:
  871. #generate the exit stub and the encoded representation
  872. tok.pos_recovery_stub = self.generate_quick_failure(tok)
  873. def process_pending_guards(self, block_start):
  874. clt = self.current_clt
  875. for tok in self.pending_guards:
  876. descr = tok.faildescr
  877. assert isinstance(descr, AbstractFailDescr)
  878. failure_recovery_pos = block_start + tok.pos_recovery_stub
  879. descr.adr_jump_offset = failure_recovery_pos
  880. relative_offset = tok.pos_recovery_stub - tok.offset
  881. guard_pos = block_start + tok.offset
  882. if not tok.guard_not_invalidated():
  883. # patch the guard jump to the stub
  884. # overwrite the generate NOP with a B_offs to the pos of the
  885. # stub
  886. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  887. mc.B_offs(relative_offset, c.get_opposite_of(tok.fcond))
  888. mc.copy_to_raw_memory(guard_pos)
  889. else:
  890. clt.invalidate_positions.append((guard_pos, relative_offset))
  891. def _walk_operations(self, inputargs, operations, regalloc):
  892. fcond = c.AL
  893. self._regalloc = regalloc
  894. regalloc.operations = operations
  895. while regalloc.position() < len(operations) - 1:
  896. regalloc.next_instruction()
  897. i = regalloc.position()
  898. op = operations[i]
  899. self.mc.mark_op(op)
  900. opnum = op.getopnum()
  901. if rop.has_no_side_effect(opnum) and op not in regalloc.longevity:
  902. regalloc.possibly_free_vars_for_op(op)
  903. elif not we_are_translated() and op.getopnum() == rop.FORCE_SPILL:
  904. regalloc.prepare_force_spill(op, fcond)
  905. else:
  906. arglocs = regalloc_operations[opnum](regalloc, op, fcond)
  907. if arglocs is not None:
  908. fcond = asm_operations[opnum](self, op, arglocs,
  909. regalloc, fcond)
  910. assert fcond is not None
  911. if rop.is_guard(opnum):
  912. regalloc.possibly_free_vars(op.getfailargs())
  913. if op.type != 'v':
  914. regalloc.possibly_free_var(op)
  915. regalloc.possibly_free_vars_for_op(op)
  916. regalloc.free_temp_vars()
  917. regalloc._check_invariants()
  918. if not we_are_translated():
  919. self.mc.BKPT()
  920. self.mc.mark_op(None) # end of the loop
  921. regalloc.operations = None
  922. def regalloc_emit_extra(self, op, arglocs, fcond, regalloc):
  923. # for calls to a function with a specifically-supported OS_xxx
  924. effectinfo = op.getdescr().get_extra_info()
  925. oopspecindex = effectinfo.oopspecindex
  926. asm_extra_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
  927. return fcond
  928. def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
  929. b = InstrBuilder(self.cpu.cpuinfo.arch_version)
  930. patch_addr = faildescr.adr_jump_offset
  931. assert patch_addr != 0
  932. b.B(bridge_addr)
  933. b.copy_to_raw_memory(patch_addr)
  934. faildescr.adr_jump_offset = 0
  935. # regalloc support
  936. def load(self, loc, value):
  937. """load an immediate value into a register"""
  938. assert (loc.is_core_reg() and value.is_imm()
  939. or loc.is_vfp_reg() and value.is_imm_float())
  940. if value.is_imm():
  941. self.mc.gen_load_int(loc.value, value.getint())
  942. elif value.is_imm_float():
  943. self.mc.gen_load_int(r.ip.value, value.getint())
  944. self.mc.VLDR(loc.value, r.ip.value)
  945. def load_reg(self, mc, target, base, ofs=0, cond=c.AL, helper=r.ip):
  946. if target.is_vfp_reg():
  947. return self._load_vfp_reg(mc, target, base, ofs, cond, helper)
  948. elif target.is_core_reg():
  949. return self._load_core_reg(mc, target, base, ofs, cond, helper)
  950. def _load_vfp_reg(self, mc, target, base, ofs, cond=c.AL, helper=r.ip):
  951. if check_imm_arg(ofs, VMEM_imm_size):
  952. mc.VLDR(target.value, base.value, imm=ofs, cond=cond)
  953. else:
  954. mc.gen_load_int(helper.value, ofs, cond=cond)
  955. mc.ADD_rr(helper.value, base.value, helper.value, cond=cond)
  956. mc.VLDR(target.value, helper.value, cond=cond)
  957. def _load_core_reg(self, mc, target, base, ofs, cond=c.AL, helper=r.ip):
  958. if check_imm_arg(abs(ofs)):
  959. mc.LDR_ri(target.value, base.value, imm=ofs, cond=cond)
  960. else:
  961. mc.gen_load_int(helper.value, ofs, cond=cond)
  962. mc.LDR_rr(target.value, base.value, helper.value, cond=cond)
  963. def store_reg(self, mc, source, base, ofs=0, cond=c.AL, helper=r.ip):
  964. if source.is_vfp_reg():
  965. return self._store_vfp_reg(mc, source, base, ofs, cond, helper)
  966. else:
  967. return self._store_core_reg(mc, source, base, ofs, cond, helper)
  968. def _store_vfp_reg(self, mc, source, base, ofs, cond=c.AL, helper=r.ip):
  969. if check_imm_arg(ofs, VMEM_imm_size):
  970. mc.VSTR(source.value, base.value, imm=ofs, cond=cond)
  971. else:
  972. mc.gen_load_int(helper.value, ofs, cond=cond)
  973. mc.ADD_rr(helper.value, base.value, helper.value, cond=cond)
  974. mc.VSTR(source.value, helper.value, cond=cond)
  975. def _store_core_reg(self, mc, source, base, ofs, cond=c.AL, helper=r.ip):
  976. if check_imm_arg(ofs):
  977. mc.STR_ri(source.value, base.value, imm=ofs, cond=cond)
  978. else:
  979. mc.gen_load_int(helper.value, ofs, cond=cond)
  980. mc.STR_rr(source.value, base.value, helper.value, cond=cond)
  981. def get_tmp_reg(self, forbidden_regs=None):
  982. if forbidden_regs is None:
  983. return r.ip, False
  984. for x in [r.ip, r.lr]:
  985. if x not in forbidden_regs:
  986. return x, False
  987. # pick some reg, that we need to save
  988. for x in r.all_regs:
  989. if x not in forbidden_regs:
  990. return x, True
  991. assert 0
  992. def _mov_imm_to_loc(self, prev_loc, loc, cond=c.AL):
  993. if loc.type == FLOAT:
  994. raise AssertionError("invalid target for move from imm value")
  995. if loc.is_core_reg():
  996. new_loc = loc
  997. elif loc.is_stack() or loc.is_raw_sp():
  998. new_loc = r.lr
  999. else:
  1000. raise AssertionError("invalid target for move from imm value")
  1001. self.mc.gen_load_int(new_loc.value, prev_loc.value, cond=cond)
  1002. if loc.is_stack():
  1003. self.regalloc_mov(new_loc, loc)
  1004. elif loc.is_raw_sp():
  1005. self.store_reg(self.mc, new_loc, r.sp, loc.value, cond=cond, helper=r.ip)
  1006. def _mov_reg_to_loc(self, prev_loc, loc, cond=c.AL):
  1007. if loc.is_imm():
  1008. raise AssertionError("mov reg to imm doesn't make sense")
  1009. if loc.is_core_reg():
  1010. self.mc.MOV_rr(loc.value, prev_loc.value, cond=cond)
  1011. elif loc.is_stack() and loc.type != FLOAT:
  1012. # spill a core register
  1013. temp, save = self.get_tmp_reg([prev_loc, loc])
  1014. offset = loc.value
  1015. is_imm = check_imm_arg(offset, size=0xFFF)
  1016. if not is_imm and save:
  1017. self.mc.PUSH([temp.value], cond=cond)
  1018. self.store_reg(self.mc, prev_loc, r.fp, offset, helper=temp, cond=cond)
  1019. if not is_imm and save:
  1020. self.mc.POP([temp.value], cond=cond)
  1021. elif loc.is_raw_sp() and loc.type != FLOAT:
  1022. temp, save = self.get_tmp_reg([prev_loc])
  1023. assert not save
  1024. self.store_reg(self.mc, prev_loc, r.sp, loc.value, cond=cond, helper=temp)
  1025. else:
  1026. assert 0, 'unsupported case'
  1027. def _mov_stack_to_loc(self, prev_loc, loc,

Large files files are truncated, but you can click here to view the full file