PageRenderTime 66ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 1ms

/rpython/jit/backend/arm/assembler.py

https://bitbucket.org/kkris/pypy
Python | 1408 lines | 1127 code | 131 blank | 150 comment | 262 complexity | 4d76b7d6d52268bf17d53ecde01d29af MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. from __future__ import with_statement
  2. import os
  3. from rpython.jit.backend.llsupport import jitframe
  4. from rpython.jit.backend.arm.helper.assembler import saved_registers
  5. from rpython.jit.backend.arm import conditions as c
  6. from rpython.jit.backend.arm import registers as r
  7. from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD, FUNC_ALIGN, \
  8. N_REGISTERS_SAVED_BY_MALLOC
  9. from rpython.jit.backend.arm.codebuilder import ARMv7Builder, OverwritingBuilder
  10. from rpython.jit.backend.arm.locations import get_fp_offset
  11. from rpython.jit.backend.arm.regalloc import (Regalloc, ARMFrameManager,
  12. CoreRegisterManager, check_imm_arg,
  13. operations as regalloc_operations,
  14. operations_with_guard as regalloc_operations_with_guard)
  15. from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
  16. from rpython.jit.backend.model import CompiledLoopToken
  17. from rpython.jit.codewriter import longlong
  18. from rpython.jit.codewriter.effectinfo import EffectInfo
  19. from rpython.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
  20. from rpython.jit.metainterp.history import BoxInt, ConstInt
  21. from rpython.jit.metainterp.resoperation import rop, ResOperation
  22. from rpython.rlib import rgc
  23. from rpython.rlib.objectmodel import we_are_translated, specialize
  24. from rpython.rtyper.annlowlevel import llhelper
  25. from rpython.rtyper.lltypesystem import lltype, rffi, llmemory
  26. from rpython.rtyper.lltypesystem.lloperation import llop
  27. from rpython.jit.backend.arm.opassembler import ResOpAssembler
  28. from rpython.rlib.debug import (debug_print, debug_start, debug_stop,
  29. have_debug_prints, fatalerror)
  30. from rpython.rlib.jit import AsmInfo
  31. from rpython.rlib.objectmodel import compute_unique_id
  32. # XXX Move to llsupport
  33. from rpython.jit.backend.x86.support import memcpy_fn
  34. DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
  35. ('type', lltype.Char), # 'b'ridge, 'l'abel or
  36. # 'e'ntry point
  37. ('number', lltype.Signed))
  38. class AssemblerARM(ResOpAssembler):
  39. STACK_FIXED_AREA = -1
  40. debug = True
  41. def __init__(self, cpu, translate_support_code=False):
  42. self.cpu = cpu
  43. self.setup_failure_recovery()
  44. self.mc = None
  45. self.memcpy_addr = 0
  46. self.pending_guards = None
  47. self._exit_code_addr = 0
  48. self.current_clt = None
  49. self.malloc_slowpath = 0
  50. self.wb_slowpath = [0, 0, 0, 0]
  51. self._regalloc = None
  52. self.datablockwrapper = None
  53. self.propagate_exception_path = 0
  54. self.stack_check_slowpath = 0
  55. self._compute_stack_size()
  56. self._debug = False
  57. self.loop_run_counters = []
  58. self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
  59. self.force_token_to_dead_frame = {} # XXX temporary hack
  60. def set_debug(self, v):
  61. r = self._debug
  62. self._debug = v
  63. return r
  64. def _compute_stack_size(self):
  65. self.STACK_FIXED_AREA = len(r.callee_saved_registers) * WORD
  66. self.STACK_FIXED_AREA += WORD # FORCE_TOKEN
  67. self.STACK_FIXED_AREA += N_REGISTERS_SAVED_BY_MALLOC * WORD
  68. if self.cpu.supports_floats:
  69. self.STACK_FIXED_AREA += (len(r.callee_saved_vfp_registers)
  70. * DOUBLE_WORD)
  71. if self.STACK_FIXED_AREA % 8 != 0:
  72. self.STACK_FIXED_AREA += WORD # Stack alignment
  73. assert self.STACK_FIXED_AREA % 8 == 0
  74. def setup(self, looptoken, operations):
  75. self.current_clt = looptoken.compiled_loop_token
  76. operations = self.cpu.gc_ll_descr.rewrite_assembler(self.cpu,
  77. operations, self.current_clt.allgcrefs)
  78. assert self.memcpy_addr != 0, 'setup_once() not called?'
  79. self.mc = ARMv7Builder()
  80. self.pending_guards = []
  81. assert self.datablockwrapper is None
  82. allblocks = self.get_asmmemmgr_blocks(looptoken)
  83. self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
  84. allblocks)
  85. self.target_tokens_currently_compiling = {}
  86. return operations
  87. def teardown(self):
  88. self.current_clt = None
  89. self._regalloc = None
  90. self.mc = None
  91. self.pending_guards = None
  92. assert self.datablockwrapper is None
  93. def setup_once(self):
  94. # Addresses of functions called by new_xxx operations
  95. gc_ll_descr = self.cpu.gc_ll_descr
  96. gc_ll_descr.initialize()
  97. self._build_wb_slowpath(False)
  98. self._build_wb_slowpath(True)
  99. self._build_failure_recovery(exc=True, withfloats=False)
  100. self._build_failure_recovery(exc=False, withfloats=False)
  101. if self.cpu.supports_floats:
  102. self._build_wb_slowpath(False, withfloats=True)
  103. self._build_wb_slowpath(True, withfloats=True)
  104. self._build_failure_recovery(exc=True, withfloats=True)
  105. self._build_failure_recovery(exc=False, withfloats=True)
  106. self._build_propagate_exception_path()
  107. if gc_ll_descr.get_malloc_slowpath_addr is not None:
  108. self._build_malloc_slowpath()
  109. self._build_stack_check_slowpath()
  110. if gc_ll_descr.gcrootmap and gc_ll_descr.gcrootmap.is_shadow_stack:
  111. self._build_release_gil(gc_ll_descr.gcrootmap)
  112. self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn)
  113. if not self._debug:
  114. # if self._debug is already set it means that someone called
  115. # set_debug by hand before initializing the assembler. Leave it
  116. # as it is
  117. debug_start('jit-backend-counts')
  118. self.set_debug(have_debug_prints())
  119. debug_stop('jit-backend-counts')
  120. def finish_once(self):
  121. if self._debug:
  122. debug_start('jit-backend-counts')
  123. for i in range(len(self.loop_run_counters)):
  124. struct = self.loop_run_counters[i]
  125. if struct.type == 'l':
  126. prefix = 'TargetToken(%d)' % struct.number
  127. elif struct.type == 'b':
  128. prefix = 'bridge ' + str(struct.number)
  129. else:
  130. prefix = 'entry ' + str(struct.number)
  131. debug_print(prefix + ':' + str(struct.i))
  132. debug_stop('jit-backend-counts')
  133. # XXX: merge with x86
  134. def _register_counter(self, tp, number, token):
  135. # YYY very minor leak -- we need the counters to stay alive
  136. # forever, just because we want to report them at the end
  137. # of the process
  138. struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
  139. track_allocation=False)
  140. struct.i = 0
  141. struct.type = tp
  142. if tp == 'b' or tp == 'e':
  143. struct.number = number
  144. else:
  145. assert token
  146. struct.number = compute_unique_id(token)
  147. self.loop_run_counters.append(struct)
  148. return struct
  149. def _append_debugging_code(self, operations, tp, number, token):
  150. counter = self._register_counter(tp, number, token)
  151. c_adr = ConstInt(rffi.cast(lltype.Signed, counter))
  152. box = BoxInt()
  153. box2 = BoxInt()
  154. ops = [ResOperation(rop.GETFIELD_RAW, [c_adr],
  155. box, descr=self.debug_counter_descr),
  156. ResOperation(rop.INT_ADD, [box, ConstInt(1)], box2),
  157. ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
  158. None, descr=self.debug_counter_descr)]
  159. operations.extend(ops)
  160. @specialize.argtype(1)
  161. def _inject_debugging_code(self, looptoken, operations, tp, number):
  162. if self._debug:
  163. # before doing anything, let's increase a counter
  164. s = 0
  165. for op in operations:
  166. s += op.getopnum()
  167. looptoken._arm_debug_checksum = s
  168. newoperations = []
  169. self._append_debugging_code(newoperations, tp, number,
  170. None)
  171. for op in operations:
  172. newoperations.append(op)
  173. if op.getopnum() == rop.LABEL:
  174. self._append_debugging_code(newoperations, 'l', number,
  175. op.getdescr())
  176. operations = newoperations
  177. return operations
  178. @staticmethod
  179. def _release_gil_shadowstack():
  180. before = rffi.aroundstate.before
  181. if before:
  182. before()
  183. @staticmethod
  184. def _reacquire_gil_shadowstack():
  185. after = rffi.aroundstate.after
  186. if after:
  187. after()
  188. _NOARG_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
  189. def _build_release_gil(self, gcrootmap):
  190. assert gcrootmap.is_shadow_stack
  191. releasegil_func = llhelper(self._NOARG_FUNC,
  192. self._release_gil_shadowstack)
  193. reacqgil_func = llhelper(self._NOARG_FUNC,
  194. self._reacquire_gil_shadowstack)
  195. self.releasegil_addr = rffi.cast(lltype.Signed, releasegil_func)
  196. self.reacqgil_addr = rffi.cast(lltype.Signed, reacqgil_func)
  197. def _build_propagate_exception_path(self):
  198. if self.cpu.propagate_exception_v < 0:
  199. return # not supported (for tests, or non-translated)
  200. #
  201. mc = ARMv7Builder()
  202. #
  203. # Call the helper, which will return a dead frame object with
  204. # the correct exception set, or MemoryError by default
  205. # XXX make sure we return the correct value here
  206. addr = rffi.cast(lltype.Signed, self.cpu.get_propagate_exception())
  207. mc.BL(addr)
  208. self.gen_func_epilog(mc=mc)
  209. self.propagate_exception_path = mc.materialize(self.cpu.asmmemmgr, [])
  210. def _build_stack_check_slowpath(self):
  211. _, _, slowpathaddr = self.cpu.insert_stack_check()
  212. if slowpathaddr == 0 or self.cpu.propagate_exception_v < 0:
  213. return # no stack check (for tests, or non-translated)
  214. #
  215. # make a "function" that is called immediately at the start of
  216. # an assembler function. In particular, the stack looks like:
  217. #
  218. # | retaddr of caller | <-- aligned to a multiple of 16
  219. # | saved argument regs |
  220. # | my own retaddr | <-- sp
  221. # +-----------------------+
  222. #
  223. mc = ARMv7Builder()
  224. # save argument registers and return address
  225. mc.PUSH([reg.value for reg in r.argument_regs] + [r.lr.value])
  226. # stack is aligned here
  227. # Pass current stack pointer as argument to the call
  228. mc.MOV_rr(r.r0.value, r.sp.value)
  229. #
  230. mc.BL(slowpathaddr)
  231. # check for an exception
  232. mc.gen_load_int(r.r0.value, self.cpu.pos_exception())
  233. mc.LDR_ri(r.r0.value, r.r0.value)
  234. mc.TST_rr(r.r0.value, r.r0.value)
  235. # restore registers and return
  236. # We check for c.EQ here, meaning all bits zero in this case
  237. mc.POP([reg.value for reg in r.argument_regs] + [r.pc.value], cond=c.EQ)
  238. #
  239. # Call the helper, which will return a dead frame object with
  240. # the correct exception set, or MemoryError by default
  241. addr = rffi.cast(lltype.Signed, self.cpu.get_propagate_exception())
  242. mc.BL(addr)
  243. #
  244. # footer -- note the ADD, which skips the return address of this
  245. # function, and will instead return to the caller's caller. Note
  246. # also that we completely ignore the saved arguments, because we
  247. # are interrupting the function.
  248. mc.ADD_ri(r.sp.value, r.sp.value, (len(r.argument_regs) + 1) * WORD)
  249. mc.POP([r.pc.value])
  250. #
  251. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  252. self.stack_check_slowpath = rawstart
  253. def _build_wb_slowpath(self, withcards, withfloats=False):
  254. descr = self.cpu.gc_ll_descr.write_barrier_descr
  255. if descr is None:
  256. return
  257. if not withcards:
  258. func = descr.get_write_barrier_fn(self.cpu)
  259. else:
  260. if descr.jit_wb_cards_set == 0:
  261. return
  262. func = descr.get_write_barrier_from_array_fn(self.cpu)
  263. if func == 0:
  264. return
  265. #
  266. # This builds a helper function called from the slow path of
  267. # write barriers. It must save all registers, and optionally
  268. # all vfp registers. It takes a single argument which is in r0.
  269. # It must keep stack alignment accordingly.
  270. mc = ARMv7Builder()
  271. #
  272. if withfloats:
  273. floats = r.caller_vfp_resp
  274. else:
  275. floats = []
  276. with saved_registers(mc, r.caller_resp + [r.ip, r.lr], floats):
  277. mc.BL(func)
  278. #
  279. if withcards:
  280. # A final TEST8 before the RET, for the caller. Careful to
  281. # not follow this instruction with another one that changes
  282. # the status of the CPU flags!
  283. mc.LDRB_ri(r.ip.value, r.r0.value,
  284. imm=descr.jit_wb_if_flag_byteofs)
  285. mc.TST_ri(r.ip.value, imm=0x80)
  286. #
  287. mc.MOV_rr(r.pc.value, r.lr.value)
  288. #
  289. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  290. self.wb_slowpath[withcards + 2 * withfloats] = rawstart
  291. def setup_failure_recovery(self):
  292. #@rgc.no_collect -- XXX still true, but hacked gc_set_extra_threshold
  293. def failure_recovery_func(mem_loc, frame_pointer, stack_pointer):
  294. """mem_loc is a structure in memory describing where the values for
  295. the failargs are stored. frame loc is the address of the frame
  296. pointer for the frame to be decoded frame """
  297. vfp_registers = rffi.cast(rffi.LONGP, stack_pointer)
  298. registers = rffi.ptradd(vfp_registers, 2*len(r.all_vfp_regs))
  299. registers = rffi.cast(rffi.LONGP, registers)
  300. bytecode = rffi.cast(rffi.UCHARP, mem_loc)
  301. return self.grab_frame_values(self.cpu, bytecode, frame_pointer,
  302. registers, vfp_registers)
  303. self.failure_recovery_code = [0, 0, 0, 0]
  304. self.failure_recovery_func = failure_recovery_func
  305. _FAILURE_RECOVERY_FUNC = lltype.Ptr(lltype.FuncType([rffi.LONGP] * 3,
  306. llmemory.GCREF))
  307. @staticmethod
  308. #@rgc.no_collect -- XXX still true, but hacked gc_set_extra_threshold
  309. def grab_frame_values(cpu, bytecode, frame_pointer,
  310. registers, vfp_registers):
  311. # no malloc allowed here!! xxx apart from one, hacking a lot
  312. force_index = rffi.cast(lltype.Signed, frame_pointer)
  313. num = 0
  314. deadframe = lltype.nullptr(jitframe.DEADFRAME)
  315. # step 1: lots of mess just to count the final value of 'num'
  316. bytecode1 = bytecode
  317. while 1:
  318. code = rffi.cast(lltype.Signed, bytecode1[0])
  319. bytecode1 = rffi.ptradd(bytecode1, 1)
  320. if code >= AssemblerARM.CODE_FROMSTACK:
  321. while code > 0x7F:
  322. code = rffi.cast(lltype.Signed, bytecode1[0])
  323. bytecode1 = rffi.ptradd(bytecode1, 1)
  324. else:
  325. kind = code & 3
  326. if kind == AssemblerARM.DESCR_SPECIAL:
  327. if code == AssemblerARM.CODE_HOLE:
  328. num += 1
  329. continue
  330. if code == AssemblerARM.CODE_INPUTARG:
  331. continue
  332. if code == AssemblerARM.CODE_FORCED:
  333. # resuming from a GUARD_NOT_FORCED
  334. token = force_index
  335. deadframe = (
  336. cpu.assembler.force_token_to_dead_frame.pop(token))
  337. deadframe = lltype.cast_opaque_ptr(
  338. jitframe.DEADFRAMEPTR, deadframe)
  339. continue
  340. assert code == AssemblerARM.CODE_STOP
  341. break
  342. num += 1
  343. # allocate the deadframe
  344. if not deadframe:
  345. # Remove the "reserve" at the end of the nursery. This means
  346. # that it is guaranteed that the following malloc() works
  347. # without requiring a collect(), but it needs to be re-added
  348. # as soon as possible.
  349. cpu.gc_clear_extra_threshold()
  350. assert num <= cpu.get_failargs_limit()
  351. try:
  352. deadframe = lltype.malloc(jitframe.DEADFRAME, num)
  353. except MemoryError:
  354. fatalerror("memory usage error in grab_frame_values")
  355. # fill it
  356. code_inputarg = False
  357. num = 0
  358. value_hi = 0
  359. while 1:
  360. # decode the next instruction from the bytecode
  361. code = rffi.cast(lltype.Signed, bytecode[0])
  362. bytecode = rffi.ptradd(bytecode, 1)
  363. if code >= AssemblerARM.CODE_FROMSTACK:
  364. if code > 0x7F:
  365. shift = 7
  366. code &= 0x7F
  367. while True:
  368. nextcode = rffi.cast(lltype.Signed, bytecode[0])
  369. bytecode = rffi.ptradd(bytecode, 1)
  370. code |= (nextcode & 0x7F) << shift
  371. shift += 7
  372. if nextcode <= 0x7F:
  373. break
  374. # load the value from the stack
  375. kind = code & 3
  376. code = (code - AssemblerARM.CODE_FROMSTACK) >> 2
  377. if code_inputarg:
  378. code = ~code
  379. code_inputarg = False
  380. stackloc = force_index - get_fp_offset(int(code))
  381. value = rffi.cast(rffi.LONGP, stackloc)[0]
  382. if kind == AssemblerARM.DESCR_FLOAT:
  383. assert WORD == 4
  384. value_hi = value
  385. value = rffi.cast(rffi.LONGP, stackloc - WORD)[0]
  386. else:
  387. kind = code & 3
  388. if kind == AssemblerARM.DESCR_SPECIAL:
  389. if code == AssemblerARM.CODE_HOLE:
  390. num += 1
  391. continue
  392. if code == AssemblerARM.CODE_INPUTARG:
  393. code_inputarg = True
  394. continue
  395. if code == AssemblerARM.CODE_FORCED:
  396. continue
  397. assert code == AssemblerARM.CODE_STOP
  398. break
  399. # 'code' identifies a register: load its value
  400. code >>= 2
  401. if kind == AssemblerARM.DESCR_FLOAT:
  402. if WORD == 4:
  403. value = vfp_registers[2*code]
  404. value_hi = vfp_registers[2*code + 1]
  405. else:
  406. value = registers[code]
  407. else:
  408. value = registers[code]
  409. # store the loaded value into fail_boxes_<type>
  410. if kind == AssemblerARM.DESCR_INT:
  411. deadframe.jf_values[num].int = value
  412. elif kind == AssemblerARM.DESCR_REF:
  413. deadframe.jf_values[num].ref = rffi.cast(llmemory.GCREF, value)
  414. elif kind == AssemblerARM.DESCR_FLOAT:
  415. assert WORD == 4
  416. assert not longlong.is_64_bit
  417. floatvalue = rffi.cast(lltype.SignedLongLong, value_hi)
  418. floatvalue <<= 32
  419. floatvalue |= rffi.cast(lltype.SignedLongLong,
  420. rffi.cast(lltype.Unsigned, value))
  421. deadframe.jf_values[num].float = floatvalue
  422. else:
  423. assert 0, "bogus kind"
  424. num += 1
  425. #
  426. assert num == len(deadframe.jf_values)
  427. if not we_are_translated():
  428. assert bytecode[4] == 0xCC
  429. fail_index = rffi.cast(rffi.INTP, bytecode)[0]
  430. fail_descr = cpu.get_fail_descr_from_number(fail_index)
  431. deadframe.jf_descr = fail_descr.hide(cpu)
  432. return lltype.cast_opaque_ptr(llmemory.GCREF, deadframe)
  433. def decode_inputargs(self, code):
  434. descr_to_box_type = [REF, INT, FLOAT]
  435. bytecode = rffi.cast(rffi.UCHARP, code)
  436. arglocs = []
  437. code_inputarg = False
  438. while 1:
  439. # decode the next instruction from the bytecode
  440. code = rffi.cast(lltype.Signed, bytecode[0])
  441. bytecode = rffi.ptradd(bytecode, 1)
  442. if code >= self.CODE_FROMSTACK:
  443. # 'code' identifies a stack location
  444. if code > 0x7F:
  445. shift = 7
  446. code &= 0x7F
  447. while True:
  448. nextcode = rffi.cast(lltype.Signed, bytecode[0])
  449. bytecode = rffi.ptradd(bytecode, 1)
  450. code |= (nextcode & 0x7F) << shift
  451. shift += 7
  452. if nextcode <= 0x7F:
  453. break
  454. kind = code & 3
  455. code = (code - self.CODE_FROMSTACK) >> 2
  456. if code_inputarg:
  457. code = ~code
  458. code_inputarg = False
  459. loc = ARMFrameManager.frame_pos(code, descr_to_box_type[kind])
  460. elif code == self.CODE_STOP:
  461. break
  462. elif code == self.CODE_HOLE:
  463. continue
  464. elif code == self.CODE_INPUTARG:
  465. code_inputarg = True
  466. continue
  467. else:
  468. # 'code' identifies a register
  469. kind = code & 3
  470. code >>= 2
  471. if kind == self.DESCR_FLOAT:
  472. loc = r.all_vfp_regs[code]
  473. else:
  474. loc = r.all_regs[code]
  475. arglocs.append(loc)
  476. return arglocs[:]
  477. def _build_malloc_slowpath(self):
  478. mc = ARMv7Builder()
  479. if self.cpu.supports_floats:
  480. vfp_regs = r.all_vfp_regs
  481. else:
  482. vfp_regs = []
  483. # We need to push two registers here because we are going to make a
  484. # call an therefore the stack needs to be 8-byte aligned
  485. mc.PUSH([r.ip.value, r.lr.value])
  486. with saved_registers(mc, [], vfp_regs):
  487. # At this point we know that the values we need to compute the size
  488. # are stored in r0 and r1.
  489. mc.SUB_rr(r.r0.value, r.r1.value, r.r0.value)
  490. addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
  491. for reg, ofs in CoreRegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
  492. mc.STR_ri(reg.value, r.fp.value, imm=ofs)
  493. mc.BL(addr)
  494. for reg, ofs in CoreRegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
  495. mc.LDR_ri(reg.value, r.fp.value, imm=ofs)
  496. mc.CMP_ri(r.r0.value, 0)
  497. mc.B(self.propagate_exception_path, c=c.EQ)
  498. nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
  499. mc.gen_load_int(r.r1.value, nursery_free_adr)
  500. mc.LDR_ri(r.r1.value, r.r1.value)
  501. # see above
  502. mc.POP([r.ip.value, r.pc.value])
  503. rawstart = mc.materialize(self.cpu.asmmemmgr, [])
  504. self.malloc_slowpath = rawstart
  505. def propagate_memoryerror_if_r0_is_null(self):
  506. # see ../x86/assembler.py:propagate_memoryerror_if_eax_is_null
  507. self.mc.CMP_ri(r.r0.value, 0)
  508. self.mc.B(self.propagate_exception_path, c=c.EQ)
  509. def _build_failure_recovery(self, exc, withfloats=False):
  510. mc = ARMv7Builder()
  511. failure_recovery = llhelper(self._FAILURE_RECOVERY_FUNC,
  512. self.failure_recovery_func)
  513. self._insert_checks(mc)
  514. if withfloats:
  515. f = r.all_vfp_regs
  516. else:
  517. f = []
  518. with saved_registers(mc, r.all_regs, f):
  519. if exc:
  520. # We might have an exception pending. Load it into r4
  521. # (this is a register saved across calls)
  522. mc.gen_load_int(r.r5.value, self.cpu.pos_exc_value())
  523. mc.LDR_ri(r.r4.value, self.cpu.pos_exc_value())
  524. # clear the exc flags
  525. mc.gen_load_int(r.r6.value, 0)
  526. mc.STR_ri(r.r6.value, r.r5.value)
  527. mc.gen_load_int(r.r5.value, self.cpu.pos_exception())
  528. mc.STR_ri(r.r6.value, r.r5.value)
  529. # move mem block address, to r0 to pass as
  530. mc.MOV_rr(r.r0.value, r.lr.value)
  531. # pass the current frame pointer as second param
  532. mc.MOV_rr(r.r1.value, r.fp.value)
  533. # pass the current stack pointer as third param
  534. mc.MOV_rr(r.r2.value, r.sp.value)
  535. self._insert_checks(mc)
  536. mc.BL(rffi.cast(lltype.Signed, failure_recovery))
  537. if exc:
  538. # save ebx into 'jf_guard_exc'
  539. from rpython.jit.backend.llsupport.descr import unpack_fielddescr
  540. descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
  541. offset, size, _ = unpack_fielddescr(descrs.jf_guard_exc)
  542. mc.STR_rr(r.r4.value, r.r0.value, offset, cond=c.AL)
  543. mc.MOV_rr(r.ip.value, r.r0.value)
  544. mc.MOV_rr(r.r0.value, r.ip.value)
  545. self.gen_func_epilog(mc=mc)
  546. rawstart = mc.materialize(self.cpu.asmmemmgr, [],
  547. self.cpu.gc_ll_descr.gcrootmap)
  548. self.failure_recovery_code[exc + 2 * withfloats] = rawstart
  549. self.mc = None
  550. DESCR_REF = 0x00
  551. DESCR_INT = 0x01
  552. DESCR_FLOAT = 0x02
  553. DESCR_SPECIAL = 0x03
  554. CODE_FROMSTACK = 64
  555. CODE_STOP = 0 | DESCR_SPECIAL
  556. CODE_HOLE = 4 | DESCR_SPECIAL
  557. CODE_INPUTARG = 8 | DESCR_SPECIAL
  558. CODE_FORCED = 12 | DESCR_SPECIAL #XXX where should this be written?
  559. def write_failure_recovery_description(self, descr, failargs, locs):
  560. assert self.mc is not None
  561. for i in range(len(failargs)):
  562. arg = failargs[i]
  563. if arg is not None:
  564. if arg.type == REF:
  565. kind = self.DESCR_REF
  566. elif arg.type == INT:
  567. kind = self.DESCR_INT
  568. elif arg.type == FLOAT:
  569. kind = self.DESCR_FLOAT
  570. else:
  571. raise AssertionError("bogus kind")
  572. loc = locs[i]
  573. if loc.is_stack():
  574. pos = loc.position
  575. if pos < 0:
  576. self.mc.writechar(chr(self.CODE_INPUTARG))
  577. pos = ~pos
  578. n = self.CODE_FROMSTACK // 4 + pos
  579. else:
  580. assert loc.is_reg() or loc.is_vfp_reg()
  581. n = loc.value
  582. n = kind + 4 * n
  583. while n > 0x7F:
  584. self.mc.writechar(chr((n & 0x7F) | 0x80))
  585. n >>= 7
  586. else:
  587. n = self.CODE_HOLE
  588. self.mc.writechar(chr(n))
  589. self.mc.writechar(chr(self.CODE_STOP))
  590. def generate_quick_failure(self, guardtok, fcond=c.AL):
  591. assert isinstance(guardtok.save_exc, bool)
  592. fail_index = self.cpu.get_fail_descr_number(guardtok.descr)
  593. startpos = self.mc.currpos()
  594. withfloats = False
  595. for box in guardtok.failargs:
  596. if box is not None and box.type == FLOAT:
  597. withfloats = True
  598. break
  599. exc = guardtok.save_exc
  600. target = self.failure_recovery_code[exc + 2 * withfloats]
  601. assert target != 0
  602. self.mc.BL(target)
  603. # write tight data that describes the failure recovery
  604. if guardtok.is_guard_not_forced:
  605. self.mc.writechar(chr(self.CODE_FORCED))
  606. self.write_failure_recovery_description(guardtok.descr,
  607. guardtok.failargs, guardtok.faillocs[1:])
  608. self.mc.write32(fail_index)
  609. # for testing the decoding, write a final byte 0xCC
  610. if not we_are_translated():
  611. self.mc.writechar('\xCC')
  612. faillocs = [loc for loc in guardtok.faillocs if loc is not None]
  613. guardtok.descr._arm_debug_faillocs = faillocs
  614. self.align()
  615. return startpos
  616. def align(self):
  617. while(self.mc.currpos() % FUNC_ALIGN != 0):
  618. self.mc.writechar(chr(0))
  619. def gen_func_epilog(self, mc=None, cond=c.AL):
  620. stack_size = self.STACK_FIXED_AREA
  621. stack_size -= len(r.callee_saved_registers) * WORD
  622. if self.cpu.supports_floats:
  623. stack_size -= len(r.callee_saved_vfp_registers) * 2 * WORD
  624. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  625. if mc is None:
  626. mc = self.mc
  627. if gcrootmap and gcrootmap.is_shadow_stack:
  628. self.gen_footer_shadowstack(gcrootmap, mc)
  629. mc.MOV_rr(r.sp.value, r.fp.value, cond=cond)
  630. mc.ADD_ri(r.sp.value, r.sp.value, stack_size, cond=cond)
  631. if self.cpu.supports_floats:
  632. mc.VPOP([reg.value for reg in r.callee_saved_vfp_registers],
  633. cond=cond)
  634. mc.POP([reg.value for reg in r.callee_restored_registers], cond=cond)
  635. def gen_func_prolog(self):
  636. stack_size = self.STACK_FIXED_AREA
  637. stack_size -= len(r.callee_saved_registers) * WORD
  638. if self.cpu.supports_floats:
  639. stack_size -= len(r.callee_saved_vfp_registers) * 2 * WORD
  640. self.mc.PUSH([reg.value for reg in r.callee_saved_registers])
  641. if self.cpu.supports_floats:
  642. self.mc.VPUSH([reg.value for reg in r.callee_saved_vfp_registers])
  643. # here we modify the stack pointer to leave room for the 9 registers
  644. # that are going to be saved here around malloc calls and one word to
  645. # store the force index
  646. self.mc.SUB_ri(r.sp.value, r.sp.value, stack_size)
  647. self.mc.MOV_rr(r.fp.value, r.sp.value)
  648. gcrootmap = self.cpu.gc_ll_descr.gcrootmap
  649. if gcrootmap and gcrootmap.is_shadow_stack:
  650. self.gen_shadowstack_header(gcrootmap)
  651. def gen_shadowstack_header(self, gcrootmap):
  652. # we need to put two words into the shadowstack: the MARKER_FRAME
  653. # and the address of the frame (fp, actually)
  654. rst = gcrootmap.get_root_stack_top_addr()
  655. self.mc.gen_load_int(r.ip.value, rst)
  656. self.mc.LDR_ri(r.r4.value, r.ip.value) # LDR r4, [rootstacktop]
  657. #
  658. MARKER = gcrootmap.MARKER_FRAME
  659. self.mc.ADD_ri(r.r5.value, r.r4.value,
  660. imm=2 * WORD) # ADD r5, r4 [2*WORD]
  661. self.mc.gen_load_int(r.r6.value, MARKER)
  662. self.mc.STR_ri(r.r6.value, r.r4.value, WORD) # STR MARKER, r4 [WORD]
  663. self.mc.STR_ri(r.fp.value, r.r4.value) # STR fp, r4
  664. #
  665. self.mc.STR_ri(r.r5.value, r.ip.value) # STR r5 [rootstacktop]
  666. def gen_footer_shadowstack(self, gcrootmap, mc):
  667. rst = gcrootmap.get_root_stack_top_addr()
  668. mc.gen_load_int(r.ip.value, rst)
  669. mc.LDR_ri(r.r4.value, r.ip.value) # LDR r4, [rootstacktop]
  670. mc.SUB_ri(r.r5.value, r.r4.value, imm=2 * WORD) # ADD r5, r4 [2*WORD]
  671. mc.STR_ri(r.r5.value, r.ip.value)
  672. def _dump(self, ops, type='loop'):
  673. debug_start('jit-backend-ops')
  674. debug_print(type)
  675. for op in ops:
  676. debug_print(op.repr())
  677. debug_stop('jit-backend-ops')
  678. def _call_header(self):
  679. self.align()
  680. self.gen_func_prolog()
  681. def _call_header_with_stack_check(self):
  682. if self.stack_check_slowpath == 0:
  683. pass # no stack check (e.g. not translated)
  684. else:
  685. endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
  686. self.mc.PUSH([r.lr.value])
  687. # load stack end
  688. self.mc.gen_load_int(r.ip.value, endaddr) # load ip, [end]
  689. self.mc.LDR_ri(r.ip.value, r.ip.value) # LDR ip, ip
  690. # load stack length
  691. self.mc.gen_load_int(r.lr.value, lengthaddr) # load lr, lengh
  692. self.mc.LDR_ri(r.lr.value, r.lr.value) # ldr lr, *lengh
  693. # calculate ofs
  694. self.mc.SUB_rr(r.ip.value, r.ip.value, r.sp.value) # SUB ip, current
  695. # if ofs
  696. self.mc.CMP_rr(r.ip.value, r.lr.value) # CMP ip, lr
  697. self.mc.BL(self.stack_check_slowpath, c=c.HI) # call if ip > lr
  698. #
  699. self.mc.POP([r.lr.value])
  700. self._call_header()
  701. # cpu interface
  702. def assemble_loop(self, loopname, inputargs, operations, looptoken, log):
  703. clt = CompiledLoopToken(self.cpu, looptoken.number)
  704. clt.allgcrefs = []
  705. looptoken.compiled_loop_token = clt
  706. clt._debug_nbargs = len(inputargs)
  707. if not we_are_translated():
  708. # Arguments should be unique
  709. assert len(set(inputargs)) == len(inputargs)
  710. operations = self.setup(looptoken, operations)
  711. if log:
  712. operations = self._inject_debugging_code(looptoken, operations,
  713. 'e', looptoken.number)
  714. self._call_header_with_stack_check()
  715. sp_patch_location = self._prepare_sp_patch_position()
  716. regalloc = Regalloc(assembler=self, frame_manager=ARMFrameManager())
  717. regalloc.prepare_loop(inputargs, operations)
  718. loop_head = self.mc.get_relative_pos()
  719. looptoken._arm_loop_code = loop_head
  720. #
  721. clt.frame_depth = -1
  722. frame_depth = self._assemble(operations, regalloc)
  723. clt.frame_depth = frame_depth
  724. #
  725. size_excluding_failure_stuff = self.mc.get_relative_pos()
  726. self._patch_sp_offset(sp_patch_location, frame_depth)
  727. self.write_pending_failure_recoveries()
  728. rawstart = self.materialize_loop(looptoken)
  729. looptoken._arm_func_addr = rawstart
  730. self.process_pending_guards(rawstart)
  731. self.fixup_target_tokens(rawstart)
  732. if log and not we_are_translated():
  733. self.mc._dump_trace(rawstart,
  734. 'loop_%s.asm' % self.cpu.total_compiled_loops)
  735. ops_offset = self.mc.ops_offset
  736. self.teardown()
  737. debug_start("jit-backend-addr")
  738. debug_print("Loop %d (%s) has address %x to %x (bootstrap %x)" % (
  739. looptoken.number, loopname,
  740. rawstart + loop_head,
  741. rawstart + size_excluding_failure_stuff,
  742. rawstart))
  743. debug_stop("jit-backend-addr")
  744. return AsmInfo(ops_offset, rawstart + loop_head,
  745. size_excluding_failure_stuff - loop_head)
  746. def _assemble(self, operations, regalloc):
  747. regalloc.compute_hint_frame_locations(operations)
  748. self._walk_operations(operations, regalloc)
  749. frame_depth = regalloc.frame_manager.get_frame_depth()
  750. jump_target_descr = regalloc.jump_target_descr
  751. if jump_target_descr is not None:
  752. frame_depth = max(frame_depth,
  753. jump_target_descr._arm_clt.frame_depth)
  754. return frame_depth
  755. def assemble_bridge(self, faildescr, inputargs, operations,
  756. original_loop_token, log):
  757. operations = self.setup(original_loop_token, operations)
  758. descr_number = self.cpu.get_fail_descr_number(faildescr)
  759. if log:
  760. operations = self._inject_debugging_code(faildescr, operations,
  761. 'b', descr_number)
  762. assert isinstance(faildescr, AbstractFailDescr)
  763. code = self._find_failure_recovery_bytecode(faildescr)
  764. frame_depth = faildescr._arm_current_frame_depth
  765. arglocs = self.decode_inputargs(code)
  766. if not we_are_translated():
  767. assert len(inputargs) == len(arglocs)
  768. regalloc = Regalloc(assembler=self, frame_manager=ARMFrameManager())
  769. regalloc.prepare_bridge(inputargs, arglocs, operations)
  770. sp_patch_location = self._prepare_sp_patch_position()
  771. startpos = self.mc.get_relative_pos()
  772. frame_depth = self._assemble(operations, regalloc)
  773. codeendpos = self.mc.get_relative_pos()
  774. self._patch_sp_offset(sp_patch_location, frame_depth)
  775. self.write_pending_failure_recoveries()
  776. rawstart = self.materialize_loop(original_loop_token)
  777. self.process_pending_guards(rawstart)
  778. self.fixup_target_tokens(rawstart)
  779. self.patch_trace(faildescr, original_loop_token,
  780. rawstart, regalloc)
  781. if not we_are_translated():
  782. # for the benefit of tests
  783. faildescr._arm_bridge_frame_depth = frame_depth
  784. if log:
  785. self.mc._dump_trace(rawstart, 'bridge_%d.asm' %
  786. self.cpu.total_compiled_bridges)
  787. self.current_clt.frame_depth = max(self.current_clt.frame_depth,
  788. frame_depth)
  789. ops_offset = self.mc.ops_offset
  790. self.teardown()
  791. debug_start("jit-backend-addr")
  792. debug_print("bridge out of Guard %d has address %x to %x" %
  793. (descr_number, rawstart, rawstart + codeendpos))
  794. debug_stop("jit-backend-addr")
  795. return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
  796. def _find_failure_recovery_bytecode(self, faildescr):
  797. guard_stub_addr = faildescr._arm_failure_recovery_block
  798. if guard_stub_addr == 0:
  799. # This case should be prevented by the logic in compile.py:
  800. # look for CNT_BUSY_FLAG, which disables tracing from a guard
  801. # when another tracing from the same guard is already in progress.
  802. raise BridgeAlreadyCompiled
  803. # a guard requires 3 words to encode the jump to the exit code.
  804. return guard_stub_addr + 3 * WORD
  805. def fixup_target_tokens(self, rawstart):
  806. for targettoken in self.target_tokens_currently_compiling:
  807. targettoken._arm_loop_code += rawstart
  808. self.target_tokens_currently_compiling = None
  809. def target_arglocs(self, loop_token):
  810. return loop_token._arm_arglocs
  811. def materialize_loop(self, looptoken):
  812. self.datablockwrapper.done() # finish using cpu.asmmemmgr
  813. self.datablockwrapper = None
  814. allblocks = self.get_asmmemmgr_blocks(looptoken)
  815. return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
  816. self.cpu.gc_ll_descr.gcrootmap)
  817. def write_pending_failure_recoveries(self):
  818. for tok in self.pending_guards:
  819. #generate the exit stub and the encoded representation
  820. tok.pos_recovery_stub = self.generate_quick_failure(tok)
  821. # store info on the descr
  822. tok.descr._arm_current_frame_depth = tok.faillocs[0].getint()
  823. def process_pending_guards(self, block_start):
  824. clt = self.current_clt
  825. for tok in self.pending_guards:
  826. descr = tok.descr
  827. assert isinstance(descr, AbstractFailDescr)
  828. failure_recovery_pos = block_start + tok.pos_recovery_stub
  829. descr._arm_failure_recovery_block = failure_recovery_pos
  830. relative_offset = tok.pos_recovery_stub - tok.offset
  831. guard_pos = block_start + tok.offset
  832. if not tok.is_guard_not_invalidated:
  833. # patch the guard jumpt to the stub
  834. # overwrite the generate NOP with a B_offs to the pos of the
  835. # stub
  836. mc = ARMv7Builder()
  837. mc.B_offs(relative_offset, c.get_opposite_of(tok.fcond))
  838. mc.copy_to_raw_memory(guard_pos)
  839. else:
  840. clt.invalidate_positions.append((guard_pos, relative_offset))
  841. def get_asmmemmgr_blocks(self, looptoken):
  842. clt = looptoken.compiled_loop_token
  843. if clt.asmmemmgr_blocks is None:
  844. clt.asmmemmgr_blocks = []
  845. return clt.asmmemmgr_blocks
  846. def _prepare_sp_patch_position(self):
  847. """Generate NOPs as placeholder to patch the instruction(s) to update
  848. the sp according to the number of spilled variables"""
  849. size = (self.mc.size_of_gen_load_int + WORD)
  850. l = self.mc.currpos()
  851. for _ in range(size // WORD):
  852. self.mc.NOP()
  853. return l
  854. def _patch_sp_offset(self, pos, frame_depth):
  855. cb = OverwritingBuilder(self.mc, pos,
  856. OverwritingBuilder.size_of_gen_load_int + WORD)
  857. n = frame_depth * WORD
  858. # ensure the sp is 8 byte aligned when patching it
  859. if n % 8 != 0:
  860. n += WORD
  861. assert n % 8 == 0
  862. self._adjust_sp(n, cb, base_reg=r.fp)
  863. def _adjust_sp(self, n, cb=None, fcond=c.AL, base_reg=r.sp):
  864. if cb is None:
  865. cb = self.mc
  866. if n < 0:
  867. n = -n
  868. rev = True
  869. else:
  870. rev = False
  871. if n <= 0xFF and fcond == c.AL:
  872. if rev:
  873. cb.ADD_ri(r.sp.value, base_reg.value, n)
  874. else:
  875. cb.SUB_ri(r.sp.value, base_reg.value, n)
  876. else:
  877. cb.gen_load_int(r.ip.value, n, cond=fcond)
  878. if rev:
  879. cb.ADD_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
  880. else:
  881. cb.SUB_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond)
  882. def _walk_operations(self, operations, regalloc):
  883. fcond = c.AL
  884. self._regalloc = regalloc
  885. while regalloc.position() < len(operations) - 1:
  886. regalloc.next_instruction()
  887. i = regalloc.position()
  888. op = operations[i]
  889. self.mc.mark_op(op)
  890. opnum = op.getopnum()
  891. if op.has_no_side_effect() and op.result not in regalloc.longevity:
  892. regalloc.possibly_free_vars_for_op(op)
  893. elif self.can_merge_with_next_guard(op, i, operations):
  894. guard = operations[i + 1]
  895. assert guard.is_guard()
  896. arglocs = regalloc_operations_with_guard[opnum](regalloc, op,
  897. guard, fcond)
  898. fcond = asm_operations_with_guard[opnum](self, op,
  899. guard, arglocs, regalloc, fcond)
  900. regalloc.next_instruction()
  901. regalloc.possibly_free_vars_for_op(guard)
  902. regalloc.possibly_free_vars(guard.getfailargs())
  903. elif not we_are_translated() and op.getopnum() == -124:
  904. regalloc.prepare_force_spill(op, fcond)
  905. else:
  906. arglocs = regalloc_operations[opnum](regalloc, op, fcond)
  907. if arglocs is not None:
  908. fcond = asm_operations[opnum](self, op, arglocs,
  909. regalloc, fcond)
  910. if op.is_guard():
  911. regalloc.possibly_free_vars(op.getfailargs())
  912. if op.result:
  913. regalloc.possibly_free_var(op.result)
  914. regalloc.possibly_free_vars_for_op(op)
  915. regalloc.free_temp_vars()
  916. regalloc._check_invariants()
  917. self.mc.mark_op(None) # end of the loop
  918. # from ../x86/regalloc.py
  919. def can_merge_with_next_guard(self, op, i, operations):
  920. if (op.getopnum() == rop.CALL_MAY_FORCE or
  921. op.getopnum() == rop.CALL_ASSEMBLER or
  922. op.getopnum() == rop.CALL_RELEASE_GIL):
  923. assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED
  924. return True
  925. if not op.is_comparison():
  926. if op.is_ovf():
  927. if (operations[i + 1].getopnum() != rop.GUARD_NO_OVERFLOW and
  928. operations[i + 1].getopnum() != rop.GUARD_OVERFLOW):
  929. not_implemented("int_xxx_ovf not followed by "
  930. "guard_(no)_overflow")
  931. return True
  932. return False
  933. if (operations[i + 1].getopnum() != rop.GUARD_TRUE and
  934. operations[i + 1].getopnum() != rop.GUARD_FALSE):
  935. return False
  936. if operations[i + 1].getarg(0) is not op.result:
  937. return False
  938. if (self._regalloc.longevity[op.result][1] > i + 1 or
  939. op.result in operations[i + 1].getfailargs()):
  940. return False
  941. return True
  942. def regalloc_emit_llong(self, op, arglocs, fcond, regalloc):
  943. effectinfo = op.getdescr().get_extra_info()
  944. oopspecindex = effectinfo.oopspecindex
  945. asm_llong_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
  946. return fcond
  947. def regalloc_emit_math(self, op, arglocs, fcond, regalloc):
  948. effectinfo = op.getdescr().get_extra_info()
  949. oopspecindex = effectinfo.oopspecindex
  950. asm_math_operations[oopspecindex](self, op, arglocs, regalloc, fcond)
  951. return fcond
  952. def _insert_checks(self, mc=None):
  953. if not we_are_translated() and self._debug:
  954. if mc is None:
  955. mc = self.mc
  956. mc.CMP_rr(r.fp.value, r.sp.value)
  957. mc.MOV_rr(r.pc.value, r.pc.value, cond=c.GE)
  958. mc.BKPT()
  959. def _ensure_result_bit_extension(self, resloc, size, signed):
  960. if size == 4:
  961. return
  962. if size == 1:
  963. if not signed: # unsigned char
  964. self.mc.AND_ri(resloc.value, resloc.value, 0xFF)
  965. else:
  966. self.mc.LSL_ri(resloc.value, resloc.value, 24)
  967. self.mc.ASR_ri(resloc.value, resloc.value, 24)
  968. elif size == 2:
  969. if not signed:
  970. self.mc.LSL_ri(resloc.value, resloc.value, 16)
  971. self.mc.LSR_ri(resloc.value, resloc.value, 16)
  972. else:
  973. self.mc.LSL_ri(resloc.value, resloc.value, 16)
  974. self.mc.ASR_ri(resloc.value, resloc.value, 16)
  975. def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
  976. b = ARMv7Builder()
  977. patch_addr = faildescr._arm_failure_recovery_block
  978. assert patch_addr != 0
  979. b.B(bridge_addr)
  980. b.copy_to_raw_memory(patch_addr)
  981. faildescr._arm_failure_recovery_block = 0
  982. # regalloc support
  983. def load(self, loc, value):
  984. assert (loc.is_reg() and value.is_imm()
  985. or loc.is_vfp_reg() and value.is_imm_float())
  986. if value.is_imm():
  987. self.mc.gen_load_int(loc.value, value.getint())
  988. elif value.is_imm_float():
  989. self.mc.gen_load_int(r.ip.value, value.getint())
  990. self.mc.VLDR(loc.value, r.ip.value)
  991. def _mov_imm_to_loc(self, prev_loc, loc, cond=c.AL):
  992. if not loc.is_reg() and not (loc.is_stack() and loc.type != FLOAT):
  993. raise AssertionError("invalid target for move from imm value")
  994. if loc.is_reg():
  995. new_loc = loc
  996. elif loc.is_stack():
  997. self.mc.PUSH([r.lr.value], cond=cond)
  998. new_loc = r.lr
  999. else:
  1000. raise AssertionError("invalid target for move from imm value")
  1001. self.mc.gen_load_int(new_loc.value, prev_loc.value, cond=cond)
  1002. if loc.is_stack():
  1003. self.regalloc_mov(new_loc, loc)
  1004. self.mc.POP([r.lr.value], cond=cond)
  1005. def _mov_reg_to_loc(self, prev_loc, loc, cond=c.AL):
  1006. if loc.is_imm():
  1007. raise AssertionError("mov reg to imm doesn't make sense")
  1008. if loc.is_reg():
  1009. self.mc.MOV_rr(loc.value, prev_loc.value, cond=cond)
  1010. elif loc.is_stack() and loc.type != FLOAT:
  1011. # spill a core register
  1012. if prev_loc is r.ip:
  1013. temp = r.lr
  1014. else:
  1015. temp = r.ip
  1016. offset = loc.value
  1017. if not check_imm_arg(offset, size=0xFFF):
  1018. self.mc.PUSH([temp.value], cond=cond)
  1019. self.mc.gen_load_int(temp.value, -offset, cond=cond)
  1020. self.mc.STR_rr(prev_loc.value, r.fp.value,
  1021. temp.value, cond=cond)
  1022. self.mc.POP([temp.value], cond=cond)
  1023. else:
  1024. self.mc.STR_ri(prev_loc.value, r.fp.value,
  1025. imm=-offset, cond=cond)
  1026. else:
  1027. assert 0, 'unsupported case'
  1028. def _mov_stack_to_loc(self, prev_loc, loc, cond=c.AL):
  1029. pushed = False
  1030. if loc.is_reg():
  1031. assert prev_loc.type != FLOAT, 'trying to load from an \
  1032. incompatible location into a core register'
  1033. assert loc is not r.lr, 'lr is not supported as a target \
  1034. when moving from the stack'
  1035. # unspill a core register
  1036. offset = prev_loc.value
  1037. if not check_imm_arg(offset, size=0xFFF):
  1038. self.mc.PUSH([r.lr.value], cond=cond)
  1039. pushed = True
  1040. self.mc.gen_load_int(r.lr.value, -offset, cond=cond)
  1041. self.mc.LDR_rr(loc.value, r.fp.value, r.lr.value, cond=cond)
  1042. else:
  1043. self.mc.LDR_ri(loc.value, r.fp.value, imm=-offset, cond=cond)
  1044. if pushed:
  1045. self.mc.POP([r.lr.value], cond=cond)
  1046. elif loc.is_vfp_reg():
  1047. assert prev_loc.type == FLOAT, 'trying to load from an \
  1048. incompatible location into a float register'
  1049. # load spilled value into vfp reg
  1050. offset = prev_loc.value
  1051. self.mc.PUSH([r.ip.value], cond=cond)
  1052. pushed = True
  1053. if not check_imm_arg(offset):
  1054. self.mc.gen_load_int(r.ip.value, offset, cond=cond)
  1055. self.mc.SUB_rr(r.ip.value, r.fp.value, r.ip.value, cond=cond)
  1056. else:
  1057. self.mc.SUB_ri(r.ip.value, r.fp.value, offset, cond=cond)
  1058. self.mc.VLDR(loc.value, r.ip.value, cond=cond)
  1059. if pushed:
  1060. self.m

Large files files are truncated, but you can click here to view the full file