PageRenderTime 46ms CodeModel.GetById 1ms RepoModel.GetById 0ms app.codeStats 1ms

/rpython/jit/backend/arm/assembler.py

https://bitbucket.org/pypy/pypy/
Python | 1615 lines | 1355 code | 109 blank | 151 comment | 78 complexity | 2adf32c2a79d4533455dd987b81c2255 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from __future__ import with_statement
  2. import os
  3. from rpython.jit.backend.arm import conditions as c, registers as r
  4. from rpython.jit.backend.arm import shift
  5. from rpython.jit.backend.arm.arch import (WORD, DOUBLE_WORD,
  6. JITFRAME_FIXED_SIZE)
  7. from rpython.jit.backend.arm.codebuilder import InstrBuilder, OverwritingBuilder
  8. from rpython.jit.backend.arm.locations import imm, StackLocation, get_fp_offset
  9. from rpython.jit.backend.arm.helper.regalloc import VMEM_imm_size
  10. from rpython.jit.backend.arm.opassembler import ResOpAssembler
  11. from rpython.jit.backend.arm.regalloc import (Regalloc,
  12. CoreRegisterManager, check_imm_arg, VFPRegisterManager,
  13. operations as regalloc_operations)
  14. from rpython.jit.backend.llsupport import jitframe, rewrite
  15. from rpython.jit.backend.llsupport.assembler import BaseAssembler
  16. from rpython.jit.backend.llsupport.regalloc import get_scale, valid_addressing_size
  17. from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
  18. from rpython.jit.backend.model import CompiledLoopToken
  19. from rpython.jit.codewriter.effectinfo import EffectInfo
  20. from rpython.jit.metainterp.history import AbstractFailDescr, FLOAT, INT, VOID
  21. from rpython.jit.metainterp.resoperation import rop
  22. from rpython.rlib.debug import debug_print, debug_start, debug_stop
  23. from rpython.rlib.jit import AsmInfo
  24. from rpython.rlib.objectmodel import we_are_translated, specialize, compute_unique_id
  25. from rpython.rlib.rarithmetic import r_uint
  26. from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
  27. from rpython.rtyper.lltypesystem import lltype, rffi
  28. from rpython.jit.backend.arm import callbuilder
  29. from rpython.rtyper.lltypesystem.lloperation import llop
  30. from rpython.rlib.rjitlog import rjitlog as jl
  31. class AssemblerARM(ResOpAssembler):
  32. debug = False
  33. DEBUG_FRAME_DEPTH = False
  34. def __init__(self, cpu, translate_support_code=False):
  35. ResOpAssembler.__init__(self, cpu, translate_support_code)
  36. self.setup_failure_recovery()
  37. self.mc = None
  38. self.pending_guards = None
  39. self._exit_code_addr = 0
  40. self.current_clt = None
  41. self.malloc_slowpath = 0
  42. self.wb_slowpath = [0, 0, 0, 0, 0]
  43. self._regalloc = None
  44. self.datablockwrapper = None
  45. self.propagate_exception_path = 0
  46. self.stack_check_slowpath = 0
  47. self._debug = False
  48. self.loop_run_counters = []
  49. self.gcrootmap_retaddr_forced = 0
  50. def setup_once(self):
  51. BaseAssembler.setup_once(self)
  52. def setup(self, looptoken):
  53. BaseAssembler.setup(self, looptoken)
  54. assert self.memcpy_addr != 0, 'setup_once() not called?'
  55. if we_are_translated():
  56. self.debug = False
  57. self.current_clt = looptoken.compiled_loop_token
  58. self.mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  59. self.pending_guards = []
  60. #assert self.datablockwrapper is None --- but obscure case
  61. # possible, e.g. getting MemoryError and continuing
  62. allblocks = self.get_asmmemmgr_blocks(looptoken)
  63. self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
  64. allblocks)
  65. self.mc.datablockwrapper = self.datablockwrapper
  66. self.target_tokens_currently_compiling = {}
  67. self.frame_depth_to_patch = []
  68. def teardown(self):
  69. self.current_clt = None
  70. self._regalloc = None
  71. self.mc = None
  72. self.pending_guards = None
  73. def setup_failure_recovery(self):
  74. self.failure_recovery_code = [0, 0, 0, 0]
  75. def _build_propagate_exception_path(self):
  76. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  77. self._store_and_reset_exception(mc, r.r0)
  78. ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
  79. # make sure ofs fits into a register
  80. assert check_imm_arg(ofs)
  81. self.store_reg(mc, r.r0, r.fp, ofs)
  82. propagate_exception_descr = rffi.cast(lltype.Signed,
  83. cast_instance_to_gcref(self.cpu.propagate_exception_descr))
  84. # put propagate_exception_descr into frame
  85. ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
  86. # make sure ofs fits into a register
  87. assert check_imm_arg(ofs)
  88. mc.gen_load_int(r.r0.value, propagate_exception_descr)
  89. self.store_reg(mc, r.r0, r.fp, ofs)
  90. mc.MOV_rr(r.r0.value, r.fp.value)
  91. self.gen_func_epilog(mc)
  92. rawstart = mc.materialize(self.cpu, [])
  93. self.propagate_exception_path = rawstart
  94. def _store_and_reset_exception(self, mc, excvalloc=None, exctploc=None,
  95. on_frame=False):
  96. """ Resest the exception. If excvalloc is None, then store it on the
  97. frame in jf_guard_exc
  98. """
  99. assert excvalloc is not r.ip
  100. assert exctploc is not r.ip
  101. tmpreg = r.lr
  102. mc.gen_load_int(r.ip.value, self.cpu.pos_exc_value())
  103. if excvalloc is not None: # store
  104. assert excvalloc.is_core_reg()
  105. self.load_reg(mc, excvalloc, r.ip)
  106. if on_frame:
  107. # store exc_value in JITFRAME
  108. ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
  109. assert check_imm_arg(ofs)
  110. #
  111. self.load_reg(mc, r.ip, r.ip, helper=tmpreg)
  112. #
  113. self.store_reg(mc, r.ip, r.fp, ofs, helper=tmpreg)
  114. if exctploc is not None:
  115. # store pos_exception in exctploc
  116. assert exctploc.is_core_reg()
  117. mc.gen_load_int(r.ip.value, self.cpu.pos_exception())
  118. self.load_reg(mc, exctploc, r.ip, helper=tmpreg)
  119. if on_frame or exctploc is not None:
  120. mc.gen_load_int(r.ip.value, self.cpu.pos_exc_value())
  121. # reset exception
  122. mc.gen_load_int(tmpreg.value, 0)
  123. self.store_reg(mc, tmpreg, r.ip, 0)
  124. mc.gen_load_int(r.ip.value, self.cpu.pos_exception())
  125. self.store_reg(mc, tmpreg, r.ip, 0)
  126. def _restore_exception(self, mc, excvalloc, exctploc):
  127. assert excvalloc is not r.ip
  128. assert exctploc is not r.ip
  129. tmpreg = r.lr # use lr as a second temporary reg
  130. mc.gen_load_int(r.ip.value, self.cpu.pos_exc_value())
  131. if excvalloc is not None:
  132. assert excvalloc.is_core_reg()
  133. self.store_reg(mc, excvalloc, r.ip)
  134. else:
  135. assert exctploc is not r.fp
  136. # load exc_value from JITFRAME and put it in pos_exc_value
  137. ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
  138. self.load_reg(mc, tmpreg, r.fp, ofs)
  139. self.store_reg(mc, tmpreg, r.ip)
  140. # reset exc_value in the JITFRAME
  141. mc.gen_load_int(tmpreg.value, 0)
  142. self.store_reg(mc, tmpreg, r.fp, ofs)
  143. # restore pos_exception from exctploc register
  144. mc.gen_load_int(r.ip.value, self.cpu.pos_exception())
  145. self.store_reg(mc, exctploc, r.ip)
  146. def _build_stack_check_slowpath(self):
  147. _, _, slowpathaddr = self.cpu.insert_stack_check()
  148. if slowpathaddr == 0 or not self.cpu.propagate_exception_descr:
  149. return # no stack check (for tests, or non-translated)
  150. #
  151. # make a "function" that is called immediately at the start of
  152. # an assembler function. In particular, the stack looks like:
  153. #
  154. # | retaddr of caller | <-- aligned to a multiple of 16
  155. # | saved argument regs |
  156. # | my own retaddr | <-- sp
  157. # +-----------------------+
  158. #
  159. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  160. # save argument registers and return address
  161. mc.PUSH([reg.value for reg in r.argument_regs] + [r.ip.value, r.lr.value])
  162. # stack is aligned here
  163. # Pass current stack pointer as argument to the call
  164. mc.MOV_rr(r.r0.value, r.sp.value)
  165. #
  166. mc.BL(slowpathaddr)
  167. # check for an exception
  168. mc.gen_load_int(r.r0.value, self.cpu.pos_exception())
  169. mc.LDR_ri(r.r0.value, r.r0.value)
  170. mc.TST_rr(r.r0.value, r.r0.value)
  171. #
  172. # restore registers and return
  173. # We check for c.EQ here, meaning all bits zero in this case
  174. mc.POP([reg.value for reg in r.argument_regs] + [r.ip.value, r.pc.value], cond=c.EQ)
  175. # restore sp
  176. mc.ADD_ri(r.sp.value, r.sp.value, (len(r.argument_regs) + 2) * WORD)
  177. mc.B(self.propagate_exception_path)
  178. #
  179. rawstart = mc.materialize(self.cpu, [])
  180. self.stack_check_slowpath = rawstart
  181. def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
  182. descr = self.cpu.gc_ll_descr.write_barrier_descr
  183. if descr is None:
  184. return
  185. if not withcards:
  186. func = descr.get_write_barrier_fn(self.cpu)
  187. else:
  188. if descr.jit_wb_cards_set == 0:
  189. return
  190. func = descr.get_write_barrier_from_array_fn(self.cpu)
  191. if func == 0:
  192. return
  193. #
  194. # This builds a helper function called from the slow path of
  195. # write barriers. It must save all registers, and optionally
  196. # all vfp registers. It takes a single argument which is in r0.
  197. # It must keep stack alignment accordingly.
  198. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  199. #
  200. exc0 = exc1 = None
  201. mc.PUSH([r.ip.value, r.lr.value]) # push two words to keep alignment
  202. if not for_frame:
  203. self._push_all_regs_to_jitframe(mc, [], withfloats, callee_only=True)
  204. else:
  205. # NOTE: don't save registers on the jitframe here! It might
  206. # override already-saved values that will be restored
  207. # later...
  208. #
  209. # we're possibly called from the slowpath of malloc
  210. # save the caller saved registers
  211. # assuming we do not collect here
  212. exc0, exc1 = r.r4, r.r5
  213. mc.PUSH([gpr.value for gpr in r.caller_resp] + [exc0.value, exc1.value])
  214. mc.VPUSH([vfpr.value for vfpr in r.caller_vfp_resp])
  215. self._store_and_reset_exception(mc, exc0, exc1)
  216. mc.BL(func)
  217. #
  218. if not for_frame:
  219. self._pop_all_regs_from_jitframe(mc, [], withfloats, callee_only=True)
  220. else:
  221. self._restore_exception(mc, exc0, exc1)
  222. mc.VPOP([vfpr.value for vfpr in r.caller_vfp_resp])
  223. assert exc0 is not None
  224. assert exc1 is not None
  225. mc.POP([gpr.value for gpr in r.caller_resp] +
  226. [exc0.value, exc1.value])
  227. #
  228. if withcards:
  229. # A final TEST8 before the RET, for the caller. Careful to
  230. # not follow this instruction with another one that changes
  231. # the status of the CPU flags!
  232. mc.LDRB_ri(r.ip.value, r.r0.value,
  233. imm=descr.jit_wb_if_flag_byteofs)
  234. mc.TST_ri(r.ip.value, imm=0x80)
  235. #
  236. mc.POP([r.ip.value, r.pc.value])
  237. #
  238. rawstart = mc.materialize(self.cpu, [])
  239. if for_frame:
  240. self.wb_slowpath[4] = rawstart
  241. else:
  242. self.wb_slowpath[withcards + 2 * withfloats] = rawstart
  243. def _build_cond_call_slowpath(self, supports_floats, callee_only):
  244. """ This builds a general call slowpath, for whatever call happens to
  245. come.
  246. """
  247. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  248. #
  249. self._push_all_regs_to_jitframe(mc, [], self.cpu.supports_floats, callee_only)
  250. ## args are in their respective positions
  251. mc.PUSH([r.ip.value, r.lr.value])
  252. mc.BLX(r.r4.value)
  253. self._reload_frame_if_necessary(mc)
  254. self._pop_all_regs_from_jitframe(mc, [], supports_floats,
  255. callee_only)
  256. # return
  257. mc.POP([r.ip.value, r.pc.value])
  258. return mc.materialize(self.cpu, [])
  259. def _build_malloc_slowpath(self, kind):
  260. """ While arriving on slowpath, we have a gcpattern on stack 0.
  261. The arguments are passed in r0 and r10, as follows:
  262. kind == 'fixed': nursery_head in r0 and the size in r1 - r0.
  263. kind == 'str/unicode': length of the string to allocate in r0.
  264. kind == 'var': length to allocate in r1, tid in r0,
  265. and itemsize on the stack.
  266. This function must preserve all registers apart from r0 and r1.
  267. """
  268. assert kind in ['fixed', 'str', 'unicode', 'var']
  269. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  270. #
  271. self._push_all_regs_to_jitframe(mc, [r.r0, r.r1], self.cpu.supports_floats)
  272. #
  273. if kind == 'fixed':
  274. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
  275. elif kind == 'str':
  276. addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_str')
  277. elif kind == 'unicode':
  278. addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_unicode')
  279. else:
  280. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_array_addr()
  281. if kind == 'fixed':
  282. # stack layout: [gcmap]
  283. # At this point we know that the values we need to compute the size
  284. # are stored in r0 and r1.
  285. mc.SUB_rr(r.r0.value, r.r1.value, r.r0.value) # compute the size we want
  286. if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
  287. mc.MOV_rr(r.r1.value, r.fp.value)
  288. elif kind == 'str' or kind == 'unicode':
  289. # stack layout: [gcmap]
  290. mc.MOV_rr(r.r0.value, r.r1.value)
  291. else: # var
  292. # stack layout: [gcmap][itemsize]...
  293. # tid is in r0
  294. # length is in r1
  295. mc.MOV_rr(r.r2.value, r.r1.value)
  296. mc.MOV_rr(r.r1.value, r.r0.value)
  297. mc.POP([r.r0.value]) # load itemsize
  298. # store the gc pattern
  299. mc.POP([r.r4.value])
  300. ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  301. self.store_reg(mc, r.r4, r.fp, ofs)
  302. #
  303. # We need to push two registers here because we are going to make a
  304. # call an therefore the stack needs to be 8-byte aligned
  305. mc.PUSH([r.ip.value, r.lr.value])
  306. #
  307. mc.BL(addr)
  308. #
  309. # If the slowpath malloc failed, we raise a MemoryError that
  310. # always interrupts the current loop, as a "good enough"
  311. # approximation.
  312. mc.CMP_ri(r.r0.value, 0)
  313. mc.B(self.propagate_exception_path, c=c.EQ)
  314. #
  315. self._reload_frame_if_necessary(mc)
  316. self._pop_all_regs_from_jitframe(mc, [r.r0, r.r1], self.cpu.supports_floats)
  317. #
  318. nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
  319. mc.gen_load_int(r.r1.value, nursery_free_adr)
  320. mc.LDR_ri(r.r1.value, r.r1.value)
  321. # clear the gc pattern
  322. mc.gen_load_int(r.ip.value, 0)
  323. self.store_reg(mc, r.ip, r.fp, ofs)
  324. # return
  325. mc.POP([r.ip.value, r.pc.value])
  326. #
  327. rawstart = mc.materialize(self.cpu, [])
  328. return rawstart
  329. def _reload_frame_if_necessary(self, mc):
  330. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  331. if gcrootmap and gcrootmap.is_shadow_stack:
  332. rst = gcrootmap.get_root_stack_top_addr()
  333. mc.gen_load_int(r.ip.value, rst)
  334. self.load_reg(mc, r.ip, r.ip)
  335. self.load_reg(mc, r.fp, r.ip, ofs=-WORD)
  336. wbdescr = self.cpu.gc_ll_descr.write_barrier_descr
  337. if gcrootmap and wbdescr:
  338. # frame never uses card marking, so we enforce this is not
  339. # an array
  340. self._write_barrier_fastpath(mc, wbdescr, [r.fp], array=False,
  341. is_frame=True)
  342. def propagate_memoryerror_if_reg_is_null(self, reg_loc):
  343. # see ../x86/assembler.py:genop_discard_check_memory_error()
  344. self.mc.CMP_ri(reg_loc.value, 0)
  345. self.mc.B(self.propagate_exception_path, c=c.EQ)
  346. def _push_all_regs_to_jitframe(self, mc, ignored_regs, withfloats,
  347. callee_only=False):
  348. # Push general purpose registers
  349. base_ofs = self.cpu.get_baseofs_of_frame_field()
  350. if callee_only:
  351. regs = CoreRegisterManager.save_around_call_regs
  352. else:
  353. regs = CoreRegisterManager.all_regs
  354. # XXX add special case if ignored_regs are a block at the start of regs
  355. if not ignored_regs: # we want to push a contiguous block of regs
  356. assert check_imm_arg(base_ofs)
  357. mc.ADD_ri(r.ip.value, r.fp.value, base_ofs)
  358. mc.STM(r.ip.value, [reg.value for reg in regs])
  359. else:
  360. for reg in ignored_regs:
  361. assert not reg.is_vfp_reg() # sanity check
  362. # we can have holes in the list of regs
  363. for i, gpr in enumerate(regs):
  364. if gpr in ignored_regs:
  365. continue
  366. self.store_reg(mc, gpr, r.fp, base_ofs + i * WORD)
  367. if withfloats:
  368. # Push VFP regs
  369. regs = VFPRegisterManager.all_regs
  370. ofs = len(CoreRegisterManager.all_regs) * WORD
  371. assert check_imm_arg(ofs+base_ofs)
  372. mc.ADD_ri(r.ip.value, r.fp.value, imm=ofs+base_ofs)
  373. mc.VSTM(r.ip.value, [vfpr.value for vfpr in regs])
  374. def _pop_all_regs_from_jitframe(self, mc, ignored_regs, withfloats,
  375. callee_only=False):
  376. # Pop general purpose registers
  377. base_ofs = self.cpu.get_baseofs_of_frame_field()
  378. if callee_only:
  379. regs = CoreRegisterManager.save_around_call_regs
  380. else:
  381. regs = CoreRegisterManager.all_regs
  382. # XXX add special case if ignored_regs are a block at the start of regs
  383. if not ignored_regs: # we want to pop a contiguous block of regs
  384. assert check_imm_arg(base_ofs)
  385. mc.ADD_ri(r.ip.value, r.fp.value, base_ofs)
  386. mc.LDM(r.ip.value, [reg.value for reg in regs])
  387. else:
  388. for reg in ignored_regs:
  389. assert not reg.is_vfp_reg() # sanity check
  390. # we can have holes in the list of regs
  391. for i, gpr in enumerate(regs):
  392. if gpr in ignored_regs:
  393. continue
  394. ofs = i * WORD + base_ofs
  395. self.load_reg(mc, gpr, r.fp, ofs)
  396. if withfloats:
  397. # Pop VFP regs
  398. regs = VFPRegisterManager.all_regs
  399. ofs = len(CoreRegisterManager.all_regs) * WORD
  400. assert check_imm_arg(ofs+base_ofs)
  401. mc.ADD_ri(r.ip.value, r.fp.value, imm=ofs+base_ofs)
  402. mc.VLDM(r.ip.value, [vfpr.value for vfpr in regs])
  403. def _build_failure_recovery(self, exc, withfloats=False):
  404. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  405. self._push_all_regs_to_jitframe(mc, [], withfloats)
  406. if exc:
  407. # We might have an exception pending. Load it into r4
  408. # (this is a register saved across calls)
  409. mc.gen_load_int(r.r5.value, self.cpu.pos_exc_value())
  410. mc.LDR_ri(r.r4.value, r.r5.value)
  411. # clear the exc flags
  412. mc.gen_load_int(r.r6.value, 0)
  413. mc.STR_ri(r.r6.value, r.r5.value) # pos_exc_value is still in r5
  414. mc.gen_load_int(r.r5.value, self.cpu.pos_exception())
  415. mc.STR_ri(r.r6.value, r.r5.value)
  416. # save r4 into 'jf_guard_exc'
  417. offset = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
  418. assert check_imm_arg(abs(offset))
  419. mc.STR_ri(r.r4.value, r.fp.value, imm=offset)
  420. # now we return from the complete frame, which starts from
  421. # _call_header_with_stack_check(). The LEA in _call_footer below
  422. # throws away most of the frame, including all the PUSHes that we
  423. # did just above.
  424. ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
  425. assert check_imm_arg(abs(ofs))
  426. ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  427. assert check_imm_arg(abs(ofs2))
  428. base_ofs = self.cpu.get_baseofs_of_frame_field()
  429. # store the gcmap
  430. mc.POP([r.ip.value])
  431. mc.STR_ri(r.ip.value, r.fp.value, imm=ofs2)
  432. # store the descr
  433. mc.POP([r.ip.value])
  434. mc.STR_ri(r.ip.value, r.fp.value, imm=ofs)
  435. # set return value
  436. assert check_imm_arg(base_ofs)
  437. mc.MOV_rr(r.r0.value, r.fp.value)
  438. #
  439. self.gen_func_epilog(mc)
  440. rawstart = mc.materialize(self.cpu, [])
  441. self.failure_recovery_code[exc + 2 * withfloats] = rawstart
  442. def generate_quick_failure(self, guardtok):
  443. startpos = self.mc.currpos()
  444. faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
  445. self.load_from_gc_table(r.ip.value, faildescrindex)
  446. self.regalloc_push(r.ip)
  447. self.push_gcmap(self.mc, gcmap=guardtok.gcmap, push=True)
  448. self.mc.BL(target)
  449. return startpos
  450. def gen_func_epilog(self, mc=None, cond=c.AL):
  451. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  452. if mc is None:
  453. mc = self.mc
  454. if gcrootmap and gcrootmap.is_shadow_stack:
  455. self.gen_footer_shadowstack(gcrootmap, mc)
  456. if self.cpu.supports_floats:
  457. mc.VPOP([reg.value for reg in r.callee_saved_vfp_registers],
  458. cond=cond)
  459. # pop all callee saved registers. This pops 'pc' last.
  460. # It also pops the threadlocal_addr back into 'r1', but it
  461. # is not needed any more and will be discarded.
  462. mc.POP([reg.value for reg in r.callee_restored_registers] +
  463. [r.r1.value], cond=cond)
  464. mc.BKPT()
  465. def gen_func_prolog(self):
  466. stack_size = WORD #alignment
  467. stack_size += len(r.callee_saved_registers) * WORD
  468. if self.cpu.supports_floats:
  469. stack_size += len(r.callee_saved_vfp_registers) * 2 * WORD
  470. # push all callee saved registers including lr; and push r1 as
  471. # well, which contains the threadlocal_addr argument. Note that
  472. # we're pushing a total of 10 words, which keeps the stack aligned.
  473. self.mc.PUSH([reg.value for reg in r.callee_saved_registers] +
  474. [r.r1.value])
  475. self.saved_threadlocal_addr = 0 # at offset 0 from location 'sp'
  476. if self.cpu.supports_floats:
  477. self.mc.VPUSH([reg.value for reg in r.callee_saved_vfp_registers])
  478. self.saved_threadlocal_addr += (
  479. len(r.callee_saved_vfp_registers) * 2 * WORD)
  480. assert stack_size % 8 == 0 # ensure we keep alignment
  481. # set fp to point to the JITFRAME
  482. self.mc.MOV_rr(r.fp.value, r.r0.value)
  483. #
  484. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  485. if gcrootmap and gcrootmap.is_shadow_stack:
  486. self.gen_shadowstack_header(gcrootmap)
  487. def gen_shadowstack_header(self, gcrootmap):
  488. # lr = shadow stack top addr
  489. # ip = *lr
  490. rst = gcrootmap.get_root_stack_top_addr()
  491. self.mc.gen_load_int(r.lr.value, rst)
  492. self.load_reg(self.mc, r.ip, r.lr)
  493. # *ip = r.fp
  494. self.store_reg(self.mc, r.fp, r.ip)
  495. #
  496. self.mc.ADD_ri(r.ip.value, r.ip.value, WORD)
  497. # *lr = ip + WORD
  498. self.store_reg(self.mc, r.ip, r.lr)
  499. def gen_footer_shadowstack(self, gcrootmap, mc):
  500. rst = gcrootmap.get_root_stack_top_addr()
  501. mc.gen_load_int(r.ip.value, rst)
  502. self.load_reg(mc, r.r4, r.ip)
  503. mc.SUB_ri(r.r4.value, r.r4.value, WORD)
  504. self.store_reg(mc, r.r4, r.ip)
  505. def _dump(self, ops, type='loop'):
  506. debug_start('jit-backend-ops')
  507. debug_print(type)
  508. for op in ops:
  509. debug_print(op.repr())
  510. debug_stop('jit-backend-ops')
  511. def _call_header(self):
  512. # there is the gc table before this point
  513. self.gen_func_prolog()
  514. def _call_header_with_stack_check(self):
  515. self._call_header()
  516. if self.stack_check_slowpath == 0:
  517. pass # no stack check (e.g. not translated)
  518. else:
  519. endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
  520. # load stack end
  521. self.mc.gen_load_int(r.ip.value, endaddr) # load ip, [end]
  522. self.mc.LDR_ri(r.ip.value, r.ip.value) # LDR ip, ip
  523. # load stack length
  524. self.mc.gen_load_int(r.lr.value, lengthaddr) # load lr, lengh
  525. self.mc.LDR_ri(r.lr.value, r.lr.value) # ldr lr, *lengh
  526. # calculate ofs
  527. self.mc.SUB_rr(r.ip.value, r.ip.value, r.sp.value) # SUB ip, current
  528. # if ofs
  529. self.mc.CMP_rr(r.ip.value, r.lr.value) # CMP ip, lr
  530. self.mc.BL(self.stack_check_slowpath, c=c.HI) # call if ip > lr
  531. # cpu interface
  532. def assemble_loop(self, jd_id, unique_id, logger, loopname, inputargs,
  533. operations, looptoken, log):
  534. clt = CompiledLoopToken(self.cpu, looptoken.number)
  535. looptoken.compiled_loop_token = clt
  536. clt._debug_nbargs = len(inputargs)
  537. if not we_are_translated():
  538. # Arguments should be unique
  539. assert len(set(inputargs)) == len(inputargs)
  540. self.setup(looptoken)
  541. #self.codemap_builder.enter_portal_frame(jd_id, unique_id,
  542. # self.mc.get_relative_pos())
  543. frame_info = self.datablockwrapper.malloc_aligned(
  544. jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
  545. clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
  546. clt.frame_info.clear() # for now
  547. if log:
  548. operations = self._inject_debugging_code(looptoken, operations,
  549. 'e', looptoken.number)
  550. regalloc = Regalloc(assembler=self)
  551. allgcrefs = []
  552. operations = regalloc.prepare_loop(inputargs, operations, looptoken,
  553. allgcrefs)
  554. self.reserve_gcref_table(allgcrefs)
  555. functionpos = self.mc.get_relative_pos()
  556. self._call_header_with_stack_check()
  557. self._check_frame_depth_debug(self.mc)
  558. loop_head = self.mc.get_relative_pos()
  559. looptoken._ll_loop_code = loop_head
  560. #
  561. frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations)
  562. self.update_frame_depth(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
  563. #
  564. size_excluding_failure_stuff = self.mc.get_relative_pos()
  565. self.write_pending_failure_recoveries()
  566. full_size = self.mc.get_relative_pos()
  567. rawstart = self.materialize_loop(looptoken)
  568. looptoken._ll_function_addr = rawstart + functionpos
  569. self.patch_gcref_table(looptoken, rawstart)
  570. self.process_pending_guards(rawstart)
  571. self.fixup_target_tokens(rawstart)
  572. if log and not we_are_translated():
  573. self.mc._dump_trace(rawstart,
  574. 'loop.asm')
  575. ops_offset = self.mc.ops_offset
  576. if logger:
  577. log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
  578. log.write(inputargs, operations, ops_offset=ops_offset)
  579. # legacy
  580. if logger.logger_ops:
  581. logger.logger_ops.log_loop(inputargs, operations, 0,
  582. "rewritten", name=loopname,
  583. ops_offset=ops_offset)
  584. self.teardown()
  585. debug_start("jit-backend-addr")
  586. debug_print("Loop %d (%s) has address 0x%x to 0x%x (bootstrap 0x%x)" % (
  587. looptoken.number, loopname,
  588. r_uint(rawstart + loop_head),
  589. r_uint(rawstart + size_excluding_failure_stuff),
  590. r_uint(rawstart + functionpos)))
  591. debug_print(" gc table: 0x%x" % r_uint(rawstart))
  592. debug_print(" function: 0x%x" % r_uint(rawstart + functionpos))
  593. debug_print(" resops: 0x%x" % r_uint(rawstart + loop_head))
  594. debug_print(" failures: 0x%x" % r_uint(rawstart +
  595. size_excluding_failure_stuff))
  596. debug_print(" end: 0x%x" % r_uint(rawstart + full_size))
  597. debug_stop("jit-backend-addr")
  598. return AsmInfo(ops_offset, rawstart + loop_head,
  599. size_excluding_failure_stuff - loop_head)
  600. def _assemble(self, regalloc, inputargs, operations):
  601. self.guard_success_cc = c.cond_none
  602. regalloc.compute_hint_frame_locations(operations)
  603. self._walk_operations(inputargs, operations, regalloc)
  604. assert self.guard_success_cc == c.cond_none
  605. frame_depth = regalloc.get_final_frame_depth()
  606. jump_target_descr = regalloc.jump_target_descr
  607. if jump_target_descr is not None:
  608. tgt_depth = jump_target_descr._arm_clt.frame_info.jfi_frame_depth
  609. target_frame_depth = tgt_depth - JITFRAME_FIXED_SIZE
  610. frame_depth = max(frame_depth, target_frame_depth)
  611. return frame_depth
  612. def assemble_bridge(self, logger, faildescr, inputargs, operations,
  613. original_loop_token, log):
  614. if not we_are_translated():
  615. # Arguments should be unique
  616. assert len(set(inputargs)) == len(inputargs)
  617. self.setup(original_loop_token)
  618. #self.codemap.inherit_code_from_position(faildescr.adr_jump_offset)
  619. descr_number = compute_unique_id(faildescr)
  620. if log:
  621. operations = self._inject_debugging_code(faildescr, operations,
  622. 'b', descr_number)
  623. assert isinstance(faildescr, AbstractFailDescr)
  624. arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
  625. regalloc = Regalloc(assembler=self)
  626. allgcrefs = []
  627. operations = regalloc.prepare_bridge(inputargs, arglocs,
  628. operations,
  629. allgcrefs,
  630. self.current_clt.frame_info)
  631. self.reserve_gcref_table(allgcrefs)
  632. startpos = self.mc.get_relative_pos()
  633. self._check_frame_depth(self.mc, regalloc.get_gcmap())
  634. bridgestartpos = self.mc.get_relative_pos()
  635. frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations)
  636. codeendpos = self.mc.get_relative_pos()
  637. self.write_pending_failure_recoveries()
  638. fullsize = self.mc.get_relative_pos()
  639. rawstart = self.materialize_loop(original_loop_token)
  640. self.patch_gcref_table(original_loop_token, rawstart)
  641. self.process_pending_guards(rawstart)
  642. debug_start("jit-backend-addr")
  643. debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
  644. (r_uint(descr_number), r_uint(rawstart + startpos),
  645. r_uint(rawstart + codeendpos)))
  646. debug_print(" gc table: 0x%x" % r_uint(rawstart))
  647. debug_print(" jump target: 0x%x" % r_uint(rawstart + startpos))
  648. debug_print(" resops: 0x%x" % r_uint(rawstart + bridgestartpos))
  649. debug_print(" failures: 0x%x" % r_uint(rawstart + codeendpos))
  650. debug_print(" end: 0x%x" % r_uint(rawstart + fullsize))
  651. debug_stop("jit-backend-addr")
  652. # patch the jump from original guard
  653. self.patch_trace(faildescr, original_loop_token,
  654. rawstart + startpos, regalloc)
  655. self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE,
  656. rawstart)
  657. if not we_are_translated():
  658. if log:
  659. self.mc._dump_trace(rawstart, 'bridge.asm')
  660. ops_offset = self.mc.ops_offset
  661. frame_depth = max(self.current_clt.frame_info.jfi_frame_depth,
  662. frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
  663. self.fixup_target_tokens(rawstart)
  664. self.update_frame_depth(frame_depth)
  665. if logger:
  666. log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
  667. log.write(inputargs, operations, ops_offset)
  668. # log that the already written bridge is stitched to a descr!
  669. logger.log_patch_guard(descr_number, rawstart)
  670. # legacy
  671. if logger.logger_ops:
  672. logger.logger_ops.log_bridge(inputargs, operations, "rewritten",
  673. faildescr, ops_offset=ops_offset)
  674. self.teardown()
  675. return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
  676. def reserve_gcref_table(self, allgcrefs):
  677. gcref_table_size = len(allgcrefs) * WORD
  678. # align to a multiple of 16 and reserve space at the beginning
  679. # of the machine code for the gc table. This lets us write
  680. # machine code with relative addressing (see load_from_gc_table())
  681. gcref_table_size = (gcref_table_size + 15) & ~15
  682. mc = self.mc
  683. assert mc.get_relative_pos() == 0
  684. for i in range(gcref_table_size):
  685. mc.writechar('\x00')
  686. self.setup_gcrefs_list(allgcrefs)
  687. def patch_gcref_table(self, looptoken, rawstart):
  688. # the gc table is at the start of the machine code. Fill it now
  689. tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
  690. self._allgcrefs)
  691. gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
  692. gcreftracers.append(tracer) # keepalive
  693. self.teardown_gcrefs_list()
  694. def load_from_gc_table(self, regnum, index):
  695. """emits either:
  696. LDR Rt, [PC, #offset] if -4095 <= offset
  697. or:
  698. gen_load_int(Rt, offset)
  699. LDR Rt, [PC, Rt] for larger offsets
  700. """
  701. mc = self.mc
  702. address_in_buffer = index * WORD # at the start of the buffer
  703. offset = address_in_buffer - (mc.get_relative_pos() + 8) # negative
  704. if offset >= -4095:
  705. mc.LDR_ri(regnum, r.pc.value, offset)
  706. else:
  707. # The offset we're loading is negative: right now,
  708. # gen_load_int() will always use exactly
  709. # get_max_size_of_gen_load_int() instructions. No point
  710. # in optimizing in case we get less. Just in case though,
  711. # we check and pad with nops.
  712. extra_bytes = mc.get_max_size_of_gen_load_int() * 4
  713. offset -= extra_bytes
  714. start = mc.get_relative_pos()
  715. mc.gen_load_int(regnum, offset)
  716. missing = start + extra_bytes - mc.get_relative_pos()
  717. while missing > 0:
  718. mc.NOP()
  719. missing = start + extra_bytes - mc.get_relative_pos()
  720. assert missing == 0
  721. mc.LDR_rr(regnum, r.pc.value, regnum)
  722. def new_stack_loc(self, i, tp):
  723. base_ofs = self.cpu.get_baseofs_of_frame_field()
  724. return StackLocation(i, get_fp_offset(base_ofs, i), tp)
  725. def check_frame_before_jump(self, target_token):
  726. if target_token in self.target_tokens_currently_compiling:
  727. return
  728. if target_token._arm_clt is self.current_clt:
  729. return
  730. # We can have a frame coming from god knows where that's
  731. # passed to a jump to another loop. Make sure it has the
  732. # correct depth
  733. expected_size = target_token._arm_clt.frame_info.jfi_frame_depth
  734. self._check_frame_depth(self.mc, self._regalloc.get_gcmap(),
  735. expected_size=expected_size)
  736. def _patch_frame_depth(self, adr, allocated_depth):
  737. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  738. mc.gen_load_int(r.lr.value, allocated_depth)
  739. mc.copy_to_raw_memory(adr)
  740. def _check_frame_depth(self, mc, gcmap, expected_size=-1):
  741. """ check if the frame is of enough depth to follow this bridge.
  742. Otherwise reallocate the frame in a helper.
  743. There are other potential solutions
  744. to that, but this one does not sound too bad.
  745. """
  746. descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
  747. ofs = self.cpu.unpack_fielddescr(descrs.arraydescr.lendescr)
  748. mc.LDR_ri(r.ip.value, r.fp.value, imm=ofs)
  749. stack_check_cmp_ofs = mc.currpos()
  750. if expected_size == -1:
  751. for _ in range(mc.get_max_size_of_gen_load_int()):
  752. mc.NOP()
  753. else:
  754. mc.gen_load_int(r.lr.value, expected_size)
  755. mc.CMP_rr(r.ip.value, r.lr.value)
  756. jg_location = mc.currpos()
  757. mc.BKPT()
  758. # the size value is still stored in lr
  759. mc.PUSH([r.lr.value])
  760. self.push_gcmap(mc, gcmap, push=True)
  761. self.mc.BL(self._frame_realloc_slowpath)
  762. # patch jg_location above
  763. currpos = self.mc.currpos()
  764. pmc = OverwritingBuilder(mc, jg_location, WORD)
  765. pmc.B_offs(currpos, c.GE)
  766. self.frame_depth_to_patch.append(stack_check_cmp_ofs)
  767. def _check_frame_depth_debug(self, mc):
  768. """ double check the depth size. It prints the error (and potentially
  769. segfaults later)
  770. """
  771. if not self.DEBUG_FRAME_DEPTH:
  772. return
  773. descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
  774. ofs = self.cpu.unpack_fielddescr(descrs.arraydescr.lendescr)
  775. mc.LDR_ri(r.ip.value, r.fp.value, imm=ofs)
  776. stack_check_cmp_ofs = mc.currpos()
  777. for _ in range(mc.get_max_size_of_gen_load_int()):
  778. mc.NOP()
  779. mc.CMP_rr(r.ip.value, r.lr.value)
  780. jg_location = mc.currpos()
  781. mc.BKPT()
  782. mc.MOV_rr(r.r0.value, r.fp.value)
  783. mc.MOV_ri(r.r1.value, r.lr.value)
  784. self.mc.BL(self.cpu.realloc_frame_crash)
  785. # patch the JG above
  786. currpos = self.mc.currpos()
  787. pmc = OverwritingBuilder(mc, jg_location, WORD)
  788. pmc.B_offs(currpos, c.GE)
  789. self.frame_depth_to_patch.append(stack_check_cmp_ofs)
  790. def build_frame_realloc_slowpath(self):
  791. # this code should do the following steps
  792. # a) store all registers in the jitframe
  793. # b) fish for the arguments passed by the caller
  794. # c) store the gcmap in the jitframe
  795. # d) call realloc_frame
  796. # e) set the fp to point to the new jitframe
  797. # f) store the address of the new jitframe in the shadowstack
  798. # c) set the gcmap field to 0 in the new jitframe
  799. # g) restore registers and return
  800. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  801. self._push_all_regs_to_jitframe(mc, [], self.cpu.supports_floats)
  802. # this is the gcmap stored by push_gcmap(mov=True) in _check_stack_frame
  803. # and the expected_size pushed in _check_stack_frame
  804. # pop the values passed on the stack, gcmap -> r0, expected_size -> r1
  805. mc.POP([r.r0.value, r.r1.value])
  806. # store return address and keep the stack aligned
  807. mc.PUSH([r.ip.value, r.lr.value])
  808. # store the current gcmap(r0) in the jitframe
  809. gcmap_ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  810. assert check_imm_arg(abs(gcmap_ofs))
  811. mc.STR_ri(r.r0.value, r.fp.value, imm=gcmap_ofs)
  812. # set first arg, which is the old jitframe address
  813. mc.MOV_rr(r.r0.value, r.fp.value)
  814. # store a possibly present exception
  815. # we use a callee saved reg here as a tmp for the exc.
  816. self._store_and_reset_exception(mc, None, r.r4, on_frame=True)
  817. # call realloc_frame, it takes two arguments
  818. # arg0: the old jitframe
  819. # arg1: the new size
  820. #
  821. mc.BL(self.cpu.realloc_frame)
  822. # set fp to the new jitframe returned from the previous call
  823. mc.MOV_rr(r.fp.value, r.r0.value)
  824. # restore a possibly present exception
  825. self._restore_exception(mc, None, r.r4)
  826. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  827. if gcrootmap and gcrootmap.is_shadow_stack:
  828. self._load_shadowstack_top(mc, r.r5, gcrootmap)
  829. # store the new jitframe addr in the shadowstack
  830. mc.STR_ri(r.r0.value, r.r5.value, imm=-WORD)
  831. # reset the jf_gcmap field in the jitframe
  832. mc.gen_load_int(r.ip.value, 0)
  833. mc.STR_ri(r.ip.value, r.fp.value, imm=gcmap_ofs)
  834. # restore registers
  835. self._pop_all_regs_from_jitframe(mc, [], self.cpu.supports_floats)
  836. mc.POP([r.ip.value, r.pc.value]) # return
  837. self._frame_realloc_slowpath = mc.materialize(self.cpu, [])
  838. def _load_shadowstack_top(self, mc, reg, gcrootmap):
  839. rst = gcrootmap.get_root_stack_top_addr()
  840. mc.gen_load_int(reg.value, rst)
  841. self.load_reg(mc, reg, reg)
  842. return rst
  843. def fixup_target_tokens(self, rawstart):
  844. for targettoken in self.target_tokens_currently_compiling:
  845. targettoken._ll_loop_code += rawstart
  846. self.target_tokens_currently_compiling = None
  847. def _patch_stackadjust(self, adr, allocated_depth):
  848. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  849. mc.gen_load_int(r.lr.value, allocated_depth)
  850. mc.copy_to_raw_memory(adr)
  851. def patch_stack_checks(self, framedepth, rawstart):
  852. for ofs in self.frame_depth_to_patch:
  853. self._patch_frame_depth(ofs + rawstart, framedepth)
  854. def target_arglocs(self, loop_token):
  855. return loop_token._arm_arglocs
  856. def materialize_loop(self, looptoken):
  857. self.datablockwrapper.done() # finish using cpu.asmmemmgr
  858. self.datablockwrapper = None
  859. allblocks = self.get_asmmemmgr_blocks(looptoken)
  860. size = self.mc.get_relative_pos()
  861. res = self.mc.materialize(self.cpu, allblocks,
  862. self.cpu.gc_ll_descr.gcrootmap)
  863. #self.cpu.codemap.register_codemap(
  864. # self.codemap.get_final_bytecode(res, size))
  865. return res
  866. def update_frame_depth(self, frame_depth):
  867. baseofs = self.cpu.get_baseofs_of_frame_field()
  868. self.current_clt.frame_info.update_frame_depth(baseofs, frame_depth)
  869. def write_pending_failure_recoveries(self):
  870. for tok in self.pending_guards:
  871. #generate the exit stub and the encoded representation
  872. tok.pos_recovery_stub = self.generate_quick_failure(tok)
  873. def process_pending_guards(self, block_start):
  874. clt = self.current_clt
  875. for tok in self.pending_guards:
  876. descr = tok.faildescr
  877. assert isinstance(descr, AbstractFailDescr)
  878. failure_recovery_pos = block_start + tok.pos_recovery_stub
  879. descr.adr_jump_offset = failure_recovery_pos
  880. relative_offset = tok.pos_recovery_stub - tok.offset
  881. guard_pos = block_start + tok.offset
  882. if not tok.guard_not_invalidated():
  883. # patch the guard jump to the stub
  884. # overwrite the generate NOP with a B_offs to the pos of the
  885. # stub
  886. mc = InstrBuilder(self.cpu.cpuinfo.arch_version)
  887. mc.B_offs(relative_offset, c.get_opposite_of(tok.fcond))
  888. mc.copy_to_raw_memory(guard_pos)
  889. else:
  890. clt.invalidate_positions.append((guard_pos, relative_offset))
  891. def _walk_operations(self, inputargs, operations, regalloc):
  892. fcond = c.AL
  893. self._regalloc = regalloc
  894. regalloc.operations = operations
  895. while regalloc.position() < len(operations) - 1:
  896. regalloc.next_instruction()
  897. i = regalloc.position()
  898. op = operations[i]
  899. self.mc.mark_op(op)
  900. opnum = op.getopnum()
  901. if rop.has_no_side_effect(opnum) and op not in regalloc.longevity:
  902. regalloc.possibly_free_vars_for_op(op)
  903. elif not we_are_translated() and op.getopnum() == rop.FORCE_SPILL:
  904. regalloc.prepare_force_spill(op, fcond)
  905. else:
  906. arglocs = regalloc_operations[opnum](regalloc, op, fcond)
  907. if arglocs is not None:
  908. fcond = asm_operations[opnum](self, op, arglocs,
  909. regalloc, fcond)
  910. assert fcond is not None
  911. if rop.is_guard(opnum):
  912. regalloc.possibly_free_vars(op.getfailargs())
  913. if op.type != 'v':
  914. regalloc.possibly_free_var(op)
  915. regalloc.possibly_free_vars_for_op(op)
  916. regalloc.free_temp_vars()
  917. regalloc._check_invariants()
  918. if not we_are_translated():
  919. self.mc.BKPT()
  920. self.mc.mark_op(None) # end of the loop
  921. regalloc.operations = None
  922. def regalloc_emit_extra(self, op, arglocs, fcond, regalloc):
  923. # for calls to a function with a specifically-supported OS_xxx
  924. effectinfo = op.getdescr().get_extra_info()
  925. oopspecindex = effectinfo.oopspecindex
  926. asm_extra_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
  927. return fcond
  928. def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
  929. b = InstrBuilder(self.cpu.cpuinfo.arch_version)
  930. patch_addr = faildescr.adr_jump_offset
  931. assert patch_addr != 0
  932. b.B(bridge_addr)
  933. b.copy_to_raw_memory(patch_addr)
  934. faildescr.adr_jump_offset = 0
  935. # regalloc support
  936. def load(self, loc, value):
  937. """load an immediate value into a register"""
  938. assert (loc.is_core_reg() and value.is_imm()
  939. or loc.is_vfp_reg() and value.is_imm_float())
  940. if value.is_imm():
  941. self.mc.gen_load_int(loc.value, value.getint())
  942. elif value.is_imm_float():
  943. self.mc.gen_load_int(r.ip.value, value.getint())
  944. self.mc.VLDR(loc.value, r.ip.value)
  945. def load_reg(self, mc, target, base, ofs=0, cond=c.AL, helper=r.ip):
  946. if target.is_vfp_reg():
  947. return self._load_vfp_reg(mc, target, base, ofs, cond, helper)
  948. elif target.is_core_reg():
  949. return self._load_core_reg(mc, target, base, ofs, cond, helper)
  950. def _load_vfp_reg(self, mc, target, base, ofs, cond=c.AL, helper=r.ip):
  951. if check_imm_arg(ofs, VMEM_imm_size):
  952. mc.VLDR(target.value, base.value, imm=ofs, cond=cond)
  953. else:
  954. mc.gen_load_int(helper.value, ofs, cond=cond)
  955. mc.ADD_rr(helper.value, base.value, helper.value, cond=cond)
  956. mc.VLDR(target.value, helper.value, cond=cond)
  957. def _load_core_reg(self, mc, target, base, ofs, cond=c.AL, helper=r.ip):
  958. if check_imm_arg(abs(ofs)):
  959. mc.LDR_ri(target.value, base.value, imm=ofs, cond=cond)
  960. else:
  961. mc.gen_load_int(helper.value, ofs, cond=cond)
  962. mc.LDR_rr(target.value, base.value, helper.value, cond=cond)
  963. def store_reg(self, mc, source, base, ofs=0, cond=c.AL, helper=r.ip):
  964. if source.is_vfp_reg():
  965. return self._store_vfp_reg(mc, source, base, ofs, cond, helper)
  966. else:
  967. return self._store_core_reg(mc, source, base, ofs, cond, helper)
  968. def _store_vfp_reg(self, mc, source, base, ofs, cond=c.AL, helper=r.ip):
  969. if check_imm_arg(ofs, VMEM_imm_size):
  970. mc.VSTR(source.value, base.value, imm=ofs, cond=cond)
  971. else:
  972. mc.gen_load_int(helper.value, ofs, cond=cond)
  973. mc.ADD_rr(helper.value, base.value, helper.value, cond=cond)
  974. mc.VSTR(source.value, helper.value, cond=cond)
  975. def _store_core_reg(self, mc, source, base, ofs, cond=c.AL, helper=r.ip):
  976. if check_imm_arg(ofs):
  977. mc.STR_ri(source.value, base.value, imm=ofs, cond=cond)
  978. else:
  979. mc.gen_load_int(helper.value, ofs, cond=cond)
  980. mc.STR_rr(source.value, base.value, helper.value, cond=cond)
  981. def get_tmp_reg(self, forbidden_regs=None):
  982. if forbidden_regs is None:
  983. return r.ip, False
  984. for x in [r.ip, r.lr]:
  985. if x not in forbidden_regs:
  986. return x, False
  987. # pick some reg, that we need to save
  988. for x in r.all_regs:
  989. if x not in forbidden_regs:
  990. return x, True
  991. assert 0
  992. def _mov_imm_to_loc(self, prev_loc, loc, cond=c.AL):
  993. if loc.type == FLOAT:
  994. raise AssertionError("invalid target for move from imm value")
  995. if loc.is_core_reg():
  996. new_loc = loc
  997. elif loc.is_stack() or loc.is_raw_sp():
  998. new_loc = r.lr
  999. else:
  1000. raise AssertionError("invalid target for move from imm value")
  1001. self.mc.gen_load_int(new_loc.value, prev_loc.value, cond=cond)
  1002. if loc.is_stack():
  1003. self.regalloc_mov(new_loc, loc)
  1004. elif loc.is_raw_sp():
  1005. self.store_reg(self.mc, new_loc, r.sp, loc.value, cond=cond, helper=r.ip)
  1006. def _mov_reg_to_loc(self, prev_loc, loc, cond=c.AL):
  1007. if loc.is_imm():
  1008. raise AssertionError("mov reg to imm doesn't make sense")
  1009. if loc.is_core_reg():
  1010. self.mc.MOV_rr(loc.value, prev_loc.value, cond=cond)
  1011. elif loc.is_stack() and loc.type != FLOAT:
  1012. # spill a core register
  1013. temp, save = self.get_tmp_reg([prev_loc, loc])
  1014. offset = loc.value
  1015. is_imm = check_imm_arg(offset, size=0xFFF)
  1016. if not is_imm and save:
  1017. self.mc.PUSH([temp.value], cond=cond)
  1018. self.store_reg(self.mc, prev_loc, r.fp, offset, helper=temp, cond=cond)
  1019. if not is_imm and save:
  1020. self.mc.POP([temp.value], cond=cond)
  1021. elif loc.is_raw_sp() and loc.type != FLOAT:
  1022. temp, save = self.get_tmp_reg([prev_loc])
  1023. assert not save
  1024. self.store_reg(self.mc, prev_loc, r.sp, loc.value, cond=cond, helper=temp)
  1025. else:
  1026. assert 0, 'unsupported case'
  1027. def _mov_stack_to_loc(self, prev_loc, loc, cond=c.AL):
  1028. helper = None
  1029. offset = prev_loc.value
  1030. tmp = None
  1031. if loc.is_core_reg():
  1032. assert prev_loc.type != FLOAT, 'trying to load from an \
  1033. incompatible location into a core register'
  1034. # unspill a core register
  1035. is_imm = check_imm_arg(offset, size=0xFFF)
  1036. helper, save = self.get_tmp_reg([loc])
  1037. save_helper = not is_imm and save
  1038. elif loc.is_vfp_reg():
  1039. assert prev_loc.type == FLOAT, 'trying to load from an \
  1040. incompatible location into a float register'
  1041. # load spilled value into vfp reg
  1042. is_imm = check_imm_arg(offset)
  1043. helper, save = self.get_tmp_reg()
  1044. save_helper = not is_imm and save
  1045. elif loc.is_raw_sp():
  1046. assert (loc.type == prev_loc.type == FLOAT
  1047. or (loc.type != FLOAT and prev_loc.type != FLOAT))
  1048. tmp = loc
  1049. if loc.is_float():
  1050. loc = r.vfp_ip
  1051. else:
  1052. loc, save_helper = self.get_tmp_reg()
  1053. assert not save_helper
  1054. helper, save_helper = self.get_tmp_reg([loc])
  1055. assert not save_helper
  1056. else:
  1057. assert 0, 'unsupported case'
  1058. if save_helper:
  1059. self.mc.PUSH([helper.value], cond=cond)
  1060. self.load_reg(self.mc, loc, r.fp, offset, cond=cond, helper=helper)
  1061. if save_helper:
  1062. self.mc.POP([helper.value], cond=cond)
  1063. if tmp and tmp.is_raw_sp():
  1064. self.store_reg(self.mc, loc, r.sp, tmp.value, cond=cond, helper=helper)
  1065. def _mov_imm_float_to_loc(self, prev_loc, loc, cond=c.AL):
  1066. if loc.is_vfp_reg():
  1067. helper, save_helper = self.get_tmp_reg([loc])
  1068. if save_helper:
  1069. self.mc.PUSH([helper.value], cond=cond)
  1070. self.mc.gen_load_int(helper.value, prev_loc.getint(), cond=cond)
  1071. self.load_reg(self.mc, loc, helper, 0, cond=cond)
  1072. if save_helper:
  1073. self.mc.POP([helper.value], cond=cond)
  1074. elif loc.is_stack() and loc.type == FLOAT:
  1075. self.regalloc_mov(prev_loc, r.vfp_ip, cond)
  1076. self.regalloc_mov(r.vfp_ip, loc, cond)
  1077. elif loc.is_raw_sp() and loc.type == FLOAT:
  1078. self.regalloc_mov(prev_loc, r.vfp_ip, cond)
  1079. self.regalloc_mov(r.vfp_ip, loc, cond)
  1080. else:
  1081. assert 0, 'unsupported case'
  1082. def _mov_vfp_reg_to_loc(self, prev_loc, loc, cond=c.AL):
  1083. if loc.is_vfp_reg():
  1084. self.mc.VMOV_cc(loc.value, prev_loc.value, cond=cond)
  1085. elif loc.is_stack():
  1086. assert loc.type == FLOAT, 'trying to store to an \
  1087. incompatible location from a float register'
  1088. # spill vfp register
  1089. offset = loc.value
  1090. is_imm = check_imm_arg(offset)
  1091. self.store_reg(self.mc, prev_loc, r.fp, offset, cond=cond, helper=r.ip)
  1092. elif loc.is_raw_sp():
  1093. assert loc.type == FLOAT, 'trying to store to an \
  1094. incompatible location from a float register'
  1095. self.store_reg(self.mc, prev_loc, r.sp, loc.value, cond=cond)
  1096. else:
  1097. assert 0, 'unsupported case'
  1098. def _mov_raw_sp_to_loc(self, prev_loc, loc, cond=c.AL):
  1099. if loc.is_core_reg():
  1100. # load a value from 'SP + n'
  1101. assert prev_loc.value <= 0xFFF # not too far
  1102. self.load_reg(self.mc, loc, r.sp, prev_loc.value, cond=cond)
  1103. else:
  1104. assert 0, 'unsupported case'
  1105. def regalloc_mov(self, prev_loc, loc, cond=c.AL):
  1106. """Moves a value from a previous location to some other location"""
  1107. if prev_loc.is_imm():
  1108. return self._mov_imm_to_loc(prev_loc, loc, cond)
  1109. elif prev_loc.is_core_reg():
  1110. self._mov_reg_to_loc(prev_loc, loc, cond)
  1111. elif prev_loc.is_stack():
  1112. self._mov_stack_to_loc(prev_loc, loc, cond)
  1113. elif prev_loc.is_imm_float():
  1114. self._mov_imm_float_to_loc(prev_loc, loc, cond)
  1115. elif prev_loc.is_vfp_reg():
  1116. self._mov_vfp_reg_to_loc(prev_loc, loc, cond)
  1117. elif prev_loc.is_raw_sp():
  1118. self._mov_raw_sp_to_loc(prev_loc, loc, cond)
  1119. else:
  1120. assert 0, 'unsupported case'
  1121. mov_loc_loc = regalloc_mov
  1122. def mov_from_vfp_loc(self, vfp_loc, reg1, reg2, cond=c.AL):
  1123. """Moves floating point values either as an immediate, in a vfp
  1124. register or at a stack location to a pair of core registers"""
  1125. assert reg1.value + 1 == reg2.value
  1126. if vfp_loc.is_vfp_reg():
  1127. self.mc.VMOV_rc(reg1.value, reg2.value, vfp_loc.value, cond=cond)
  1128. elif vfp_loc.is_imm_float():
  1129. helper, save_helper = self.get_tmp_reg([reg1, reg2])
  1130. if save_helper:
  1131. self.mc.PUSH([helper.value], cond=cond)
  1132. self.mc.gen_load_int(helper.value, vfp_loc.getint(), cond=cond)
  1133. # we need to load one word to loc and one to loc+1 which are
  1134. # two 32-bit core registers
  1135. self.mc.LDR_ri(reg1.value, helper.value, cond=cond)
  1136. self.mc.LDR_ri(reg2.value, helper.value, imm=WORD, cond=cond)
  1137. if save_helper:
  1138. self.mc.POP([helper.value], cond=cond)
  1139. elif vfp_loc.is_stack() and vfp_loc.type == FLOAT:
  1140. # load spilled vfp value into two core registers
  1141. offset = vfp_loc.value
  1142. if not check_imm_arg(offset, size=0xFFF):
  1143. helper, save_helper = self.get_tmp_reg([reg1, reg2])
  1144. if save_helper:
  1145. self.mc.PUSH([helper.value], cond=cond)
  1146. self.mc.gen_load_int(helper.value, offset, cond=cond)
  1147. self.mc.LDR_rr(reg1.value, r.fp.value, helper.value, cond=cond)
  1148. self.mc.ADD_ri(helper.value, helper.value, imm=WORD, cond=cond)
  1149. self.mc.LDR_rr(reg2.value, r.fp.value, helper.value, cond=cond)
  1150. if save_helper:
  1151. self.mc.POP([helper.value], cond=cond)
  1152. else:
  1153. self.mc.LDR_ri(reg1.value, r.fp.value, imm=offset, cond=cond)
  1154. self.mc.LDR_ri(reg2.value, r.fp.value,
  1155. imm=offset + WORD, cond=cond)
  1156. else:
  1157. assert 0, 'unsupported case'
  1158. def mov_to_vfp_loc(self, reg1, reg2, vfp_loc, cond=c.AL):
  1159. """Moves a floating point value from to consecutive core registers to a
  1160. vfp location, either a vfp regsiter or a stacklocation"""
  1161. assert reg1.value + 1 == reg2.value
  1162. if vfp_loc.is_vfp_reg():
  1163. self.mc.VMOV_cr(vfp_loc.value, reg1.value, reg2.value, cond=cond)
  1164. elif vfp_loc.is_stack():
  1165. # move from two core registers to a float stack location
  1166. offset = vfp_loc.value
  1167. if not check_imm_arg(offset + WORD, size=0xFFF):
  1168. helper, save_helper = self.get_tmp_reg([reg1, reg2])
  1169. if save_helper:
  1170. self.mc.PUSH([helper.value], cond=cond)
  1171. self.mc.gen_load_int(helper.value, offset, cond=cond)
  1172. self.mc.STR_rr(reg1.value, r.fp.value, helper.value, cond=cond)
  1173. self.mc.ADD_ri(helper.value, helper.value, imm=WORD, cond=cond)
  1174. self.mc.STR_rr(reg2.value, r.fp.value, helper.value, cond=cond)
  1175. if save_helper:
  1176. self.mc.POP([helper.value], cond=cond)
  1177. else:
  1178. self.mc.STR_ri(reg1.value, r.fp.value, imm=offset, cond=cond)
  1179. self.mc.STR_ri(reg2.value, r.fp.value,
  1180. imm=offset + WORD, cond=cond)
  1181. else:
  1182. assert 0, 'unsupported case'
  1183. def regalloc_push(self, loc, cond=c.AL):
  1184. """Pushes the value stored in loc to the stack
  1185. Can trash the current value of the IP register when pushing a stack
  1186. loc"""
  1187. if loc.is_stack():
  1188. if loc.type != FLOAT:
  1189. scratch_reg = r.ip
  1190. else:
  1191. scratch_reg = r.vfp_ip
  1192. self.regalloc_mov(loc, scratch_reg, cond)
  1193. self.regalloc_push(scratch_reg, cond)
  1194. elif loc.is_core_reg():
  1195. self.mc.PUSH([loc.value], cond=cond)
  1196. elif loc.is_vfp_reg():
  1197. self.mc.VPUSH([loc.value], cond=cond)
  1198. elif loc.is_imm():
  1199. self.regalloc_mov(loc, r.ip)
  1200. self.mc.PUSH([r.ip.value], cond=cond)
  1201. elif loc.is_imm_float():
  1202. self.regalloc_mov(loc, r.vfp_ip)
  1203. self.mc.VPUSH([r.vfp_ip.value], cond=cond)
  1204. else:
  1205. raise AssertionError('Trying to push an invalid location')
  1206. def regalloc_pop(self, loc, cond=c.AL):
  1207. """Pops the value on top of the stack to loc Can trash the current
  1208. value of the IP register when popping to a stack loc"""
  1209. if loc.is_stack():
  1210. if loc.type != FLOAT:
  1211. scratch_reg = r.ip
  1212. else:
  1213. scratch_reg = r.vfp_ip
  1214. self.regalloc_pop(scratch_reg)
  1215. self.regalloc_mov(scratch_reg, loc)
  1216. elif loc.is_core_reg():
  1217. self.mc.POP([loc.value], cond=cond)
  1218. elif loc.is_vfp_reg():
  1219. self.mc.VPOP([loc.value], cond=cond)
  1220. else:
  1221. raise AssertionError('Trying to pop to an invalid location')
  1222. def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap):
  1223. assert size & (WORD-1) == 0
  1224. self.mc.gen_load_int(r.r0.value, nursery_free_adr)
  1225. self.mc.LDR_ri(r.r0.value, r.r0.value)
  1226. if check_imm_arg(size):
  1227. self.mc.ADD_ri(r.r1.value, r.r0.value, size)
  1228. else:
  1229. self.mc.gen_load_int(r.r1.value, size)
  1230. self.mc.ADD_rr(r.r1.value, r.r0.value, r.r1.value)
  1231. self.mc.gen_load_int(r.ip.value, nursery_top_adr)
  1232. self.mc.LDR_ri(r.ip.value, r.ip.value)
  1233. self.mc.CMP_rr(r.r1.value, r.ip.value)
  1234. # We load into r0 the address stored at nursery_free_adr We calculate
  1235. # the new value for nursery_free_adr and store in r1 The we load the
  1236. # address stored in nursery_top_adr into IP If the value in r1 is
  1237. # (unsigned) bigger than the one in ip we conditionally call
  1238. # malloc_slowpath in case we called malloc_slowpath, which returns the
  1239. # new value of nursery_free_adr in r1 and the adr of the new object in
  1240. # r0.
  1241. self.push_gcmap(self.mc, gcmap, push=True, cond=c.HI)
  1242. self.mc.BL(self.malloc_slowpath, c=c.HI)
  1243. self.mc.gen_load_int(r.ip.value, nursery_free_adr)
  1244. self.mc.STR_ri(r.r1.value, r.ip.value)
  1245. def malloc_cond_varsize_frame(self, nursery_free_adr, nursery_top_adr,
  1246. sizeloc, gcmap):
  1247. if sizeloc is r.r0:
  1248. self.mc.MOV_rr(r.r1.value, r.r0.value)
  1249. sizeloc = r.r1
  1250. self.mc.gen_load_int(r.r0.value, nursery_free_adr)
  1251. self.mc.LDR_ri(r.r0.value, r.r0.value)
  1252. #
  1253. self.mc.ADD_rr(r.r1.value, r.r0.value, sizeloc.value)
  1254. #
  1255. self.mc.gen_load_int(r.ip.value, nursery_top_adr)
  1256. self.mc.LDR_ri(r.ip.value, r.ip.value)
  1257. self.mc.CMP_rr(r.r1.value, r.ip.value)
  1258. #
  1259. self.push_gcmap(self.mc, gcmap, push=True, cond=c.HI)
  1260. self.mc.BL(self.malloc_slowpath, c=c.HI)
  1261. self.mc.gen_load_int(r.ip.value, nursery_free_adr)
  1262. self.mc.STR_ri(r.r1.value, r.ip.value)
  1263. def malloc_cond_varsize(self, kind, nursery_free_adr, nursery_top_adr,
  1264. lengthloc, itemsize, maxlength, gcmap,
  1265. arraydescr):
  1266. from rpython.jit.backend.llsupport.descr import ArrayDescr
  1267. assert isinstance(arraydescr, ArrayDescr)
  1268. # lengthloc is the length of the array, which we must not modify!
  1269. assert lengthloc is not r.r0 and lengthloc is not r.r1
  1270. if lengthloc.is_core_reg():
  1271. varsizeloc = lengthloc
  1272. else:
  1273. assert lengthloc.is_stack()
  1274. self.regalloc_mov(lengthloc, r.r1)
  1275. varsizeloc = r.r1
  1276. #
  1277. if check_imm_arg(maxlength):
  1278. self.mc.CMP_ri(varsizeloc.value, maxlength)
  1279. else:
  1280. self.mc.gen_load_int(r.ip.value, maxlength)
  1281. self.mc.CMP_rr(varsizeloc.value, r.ip.value)
  1282. jmp_adr0 = self.mc.currpos() # jump to (large)
  1283. self.mc.BKPT()
  1284. #
  1285. self.mc.gen_load_int(r.r0.value, nursery_free_adr)
  1286. self.mc.LDR_ri(r.r0.value, r.r0.value)
  1287. if valid_addressing_size(itemsize):
  1288. shiftsize = get_scale(itemsize)
  1289. else:
  1290. shiftsize = self._mul_const_scaled(self.mc, r.lr, varsizeloc,
  1291. itemsize)
  1292. varsizeloc = r.lr
  1293. # now varsizeloc is a register != r0. The size of
  1294. # the variable part of the array is (varsizeloc << shiftsize)
  1295. assert arraydescr.basesize >= self.gc_minimal_size_in_nursery
  1296. constsize = arraydescr.basesize + self.gc_size_of_header
  1297. force_realignment = (itemsize % WORD) != 0
  1298. if force_realignment:
  1299. constsize += WORD - 1
  1300. self.mc.gen_load_int(r.ip.value, constsize)
  1301. # constsize + (varsizeloc << shiftsize)
  1302. self.mc.ADD_rr(r.r1.value, r.ip.value, varsizeloc.value,
  1303. imm=shiftsize, shifttype=shift.LSL)
  1304. self.mc.ADD_rr(r.r1.value, r.r1.value, r.r0.value)
  1305. if force_realignment:
  1306. self.mc.MVN_ri(r.ip.value, imm=(WORD - 1))
  1307. self.mc.AND_rr(r.r1.value, r.r1.value, r.ip.value)
  1308. # now r1 contains the total size in bytes, rounded up to a multiple
  1309. # of WORD, plus nursery_free_adr
  1310. #
  1311. self.mc.gen_load_int(r.ip.value, nursery_top_adr)
  1312. self.mc.LDR_ri(r.ip.value, r.ip.value)
  1313. self.mc.CMP_rr(r.r1.value, r.ip.value)
  1314. jmp_adr1 = self.mc.currpos() # jump to (after-call)
  1315. self.mc.BKPT()
  1316. #
  1317. # (large)
  1318. currpos = self.mc.currpos()
  1319. pmc = OverwritingBuilder(self.mc, jmp_adr0, WORD)
  1320. pmc.B_offs(currpos, c.GT)
  1321. #
  1322. # save the gcmap
  1323. self.push_gcmap(self.mc, gcmap, push=True)
  1324. #
  1325. if kind == rewrite.FLAG_ARRAY:
  1326. self.mc.gen_load_int(r.r0.value, arraydescr.tid)
  1327. self.regalloc_mov(lengthloc, r.r1)
  1328. self.regalloc_push(imm(itemsize))
  1329. addr = self.malloc_slowpath_varsize
  1330. else:
  1331. if kind == rewrite.FLAG_STR:
  1332. addr = self.malloc_slowpath_str
  1333. else:
  1334. assert kind == rewrite.FLAG_UNICODE
  1335. addr = self.malloc_slowpath_unicode
  1336. self.regalloc_mov(lengthloc, r.r1)
  1337. self.mc.BL(addr)
  1338. #
  1339. jmp_location = self.mc.currpos() # jump to (done)
  1340. self.mc.BKPT()
  1341. # (after-call)
  1342. currpos = self.mc.currpos()
  1343. pmc = OverwritingBuilder(self.mc, jmp_adr1, WORD)
  1344. pmc.B_offs(currpos, c.LS)
  1345. #
  1346. # write down the tid, but not if it's the result of the CALL
  1347. self.mc.gen_load_int(r.ip.value, arraydescr.tid)
  1348. self.mc.STR_ri(r.ip.value, r.r0.value)
  1349. # while we're at it, this line is not needed if we've done the CALL
  1350. self.mc.gen_load_int(r.ip.value, nursery_free_adr)
  1351. self.mc.STR_ri(r.r1.value, r.ip.value)
  1352. # (done)
  1353. # skip instructions after call
  1354. currpos = self.mc.currpos()
  1355. pmc = OverwritingBuilder(self.mc, jmp_location, WORD)
  1356. pmc.B_offs(currpos)
  1357. def push_gcmap(self, mc, gcmap, push=False, store=False, cond=c.AL):
  1358. ptr = rffi.cast(lltype.Signed, gcmap)
  1359. if push:
  1360. mc.gen_load_int(r.ip.value, ptr, cond=cond)
  1361. mc.PUSH([r.ip.value], cond=cond)
  1362. else:
  1363. assert store
  1364. ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  1365. mc.gen_load_int(r.ip.value, ptr, cond=cond)
  1366. mc.STR_ri(r.ip.value, r.fp.value, imm=ofs, cond=cond)
  1367. def pop_gcmap(self, mc):
  1368. ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  1369. assert check_imm_arg(ofs)
  1370. mc.gen_load_int(r.ip.value, 0)
  1371. self.store_reg(mc, r.ip, r.fp, ofs)
  1372. def _mul_const_scaled(self, mc, targetreg, sourcereg, itemsize):
  1373. """Produce one operation to do roughly
  1374. targetreg = sourcereg * itemsize
  1375. except that the targetreg may still need shifting by 0,1,2,3.
  1376. """
  1377. if (itemsize & 7) == 0:
  1378. shiftsize = 3
  1379. elif (itemsize & 3) == 0:
  1380. shiftsize = 2
  1381. elif (itemsize & 1) == 0:
  1382. shiftsize = 1
  1383. else:
  1384. shiftsize = 0
  1385. itemsize >>= shiftsize
  1386. #
  1387. if valid_addressing_size(itemsize - 1):
  1388. self.mc.ADD_rr(targetreg.value, sourcereg.value, sourcereg.value,
  1389. imm=get_scale(itemsize - 1), shifttype=shift.LSL)
  1390. elif valid_addressing_size(itemsize):
  1391. self.mc.LSL_ri(targetreg.value, sourcereg.value,
  1392. get_scale(itemsize))
  1393. else:
  1394. mc.gen_load_int(targetreg.value, itemsize)
  1395. mc.MUL(targetreg.value, sourcereg.value, targetreg.value)
  1396. #
  1397. return shiftsize
  1398. def simple_call(self, fnloc, arglocs, result_loc=r.r0):
  1399. if result_loc is None:
  1400. result_type = VOID
  1401. result_size = 0
  1402. elif result_loc.is_vfp_reg():
  1403. result_type = FLOAT
  1404. result_size = DOUBLE_WORD
  1405. else:
  1406. result_type = INT
  1407. result_size = WORD
  1408. cb = callbuilder.get_callbuilder(self.cpu, self, fnloc, arglocs,
  1409. result_loc, result_type,
  1410. result_size)
  1411. cb.emit()
  1412. def simple_call_no_collect(self, fnloc, arglocs):
  1413. cb = callbuilder.get_callbuilder(self.cpu, self, fnloc, arglocs)
  1414. cb.emit_no_collect()
  1415. def not_implemented(msg):
  1416. msg = '[ARM/asm] %s\n' % msg
  1417. if we_are_translated():
  1418. llop.debug_print(lltype.Void, msg)
  1419. raise NotImplementedError(msg)
  1420. def notimplemented_op(self, op, arglocs, regalloc, fcond):
  1421. print "[ARM/asm] %s not implemented" % op.getopname()
  1422. raise NotImplementedError(op)
  1423. asm_operations = [notimplemented_op] * (rop._LAST + 1)
  1424. asm_extra_operations = {}
  1425. for name, value in ResOpAssembler.__dict__.iteritems():
  1426. if name.startswith('emit_opx_'):
  1427. opname = name[len('emit_opx_'):]
  1428. num = getattr(EffectInfo, 'OS_' + opname.upper())
  1429. asm_extra_operations[num] = value
  1430. elif name.startswith('emit_op_'):
  1431. opname = name[len('emit_op_'):]
  1432. num = getattr(rop, opname.upper())
  1433. asm_operations[num] = value
  1434. class BridgeAlreadyCompiled(Exception):
  1435. pass