PageRenderTime 54ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/rpython/jit/backend/arm/callbuilder.py

https://bitbucket.org/pypy/pypy/
Python | 506 lines | 371 code | 47 blank | 88 comment | 100 complexity | b796d903463ac0610d0ee298e3d8108e MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from rpython.rlib.clibffi import FFI_DEFAULT_ABI
  2. from rpython.rlib.objectmodel import we_are_translated
  3. from rpython.jit.metainterp.history import INT, FLOAT, REF
  4. from rpython.jit.backend.arm.arch import WORD
  5. from rpython.jit.backend.arm import registers as r
  6. from rpython.jit.backend.arm import conditions as c
  7. from rpython.jit.backend.arm.locations import RawSPStackLocation
  8. from rpython.jit.backend.arm.jump import remap_frame_layout
  9. from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
  10. from rpython.jit.backend.arm.helper.assembler import count_reg_args
  11. from rpython.jit.backend.arm.helper.assembler import saved_registers
  12. from rpython.jit.backend.arm.helper.regalloc import check_imm_arg
  13. from rpython.jit.backend.arm.codebuilder import OverwritingBuilder
  14. from rpython.jit.backend.llsupport import llerrno
  15. from rpython.rtyper.lltypesystem import rffi
  16. class ARMCallbuilder(AbstractCallBuilder):
  17. def __init__(self, assembler, fnloc, arglocs,
  18. resloc=r.r0, restype=INT, ressize=WORD, ressigned=True):
  19. AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
  20. resloc, restype, ressize)
  21. self.current_sp = 0
  22. def push_gcmap(self):
  23. assert not self.is_call_release_gil
  24. # we push *now* the gcmap, describing the status of GC registers
  25. # after the rearrangements done just above, ignoring the return
  26. # value eax, if necessary
  27. noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
  28. gcmap = self.asm._regalloc.get_gcmap([r.r0], noregs=noregs)
  29. self.asm.push_gcmap(self.mc, gcmap, store=True)
  30. def pop_gcmap(self):
  31. self.asm._reload_frame_if_necessary(self.mc)
  32. self.asm.pop_gcmap(self.mc)
  33. def emit_raw_call(self):
  34. #the actual call
  35. if self.fnloc.is_imm():
  36. self.mc.BL(self.fnloc.value)
  37. return
  38. # --self.fnloc.is_stack() is always remapped to r4 here
  39. assert self.fnloc.is_core_reg()
  40. self.mc.BLX(self.fnloc.value)
  41. def restore_stack_pointer(self):
  42. # readjust the sp in case we passed some args on the stack
  43. assert self.current_sp % 8 == 0 # sanity check
  44. if self.current_sp != 0:
  45. self._adjust_sp(self.current_sp)
  46. self.current_sp = 0
  47. def _push_stack_args(self, stack_args, on_stack):
  48. assert on_stack % 8 == 0
  49. if on_stack == 0:
  50. return
  51. self._adjust_sp(-on_stack)
  52. self.current_sp = on_stack
  53. ofs = 0
  54. for i, arg in enumerate(stack_args):
  55. if arg is not None:
  56. sp_loc = RawSPStackLocation(ofs, arg.type)
  57. self.asm.regalloc_mov(arg, sp_loc)
  58. ofs += sp_loc.width
  59. else: # alignment word
  60. ofs += WORD
  61. def _adjust_sp(self, n):
  62. # adjust the current stack pointer by n bytes
  63. if n > 0:
  64. if check_imm_arg(n):
  65. self.mc.ADD_ri(r.sp.value, r.sp.value, n)
  66. else:
  67. self.mc.gen_load_int(r.ip.value, n)
  68. self.mc.ADD_rr(r.sp.value, r.sp.value, r.ip.value)
  69. elif n < 0:
  70. n = abs(n)
  71. if check_imm_arg(n):
  72. self.mc.SUB_ri(r.sp.value, r.sp.value, n)
  73. else:
  74. self.mc.gen_load_int(r.ip.value, n)
  75. self.mc.SUB_rr(r.sp.value, r.sp.value, r.ip.value)
  76. def call_releasegil_addr_and_move_real_arguments(self, fastgil):
  77. assert self.is_call_release_gil
  78. assert not self.asm._is_asmgcc()
  79. # Save this thread's shadowstack pointer into r7, for later comparison
  80. gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
  81. if gcrootmap:
  82. rst = gcrootmap.get_root_stack_top_addr()
  83. self.mc.gen_load_int(r.r5.value, rst)
  84. self.mc.LDR_ri(r.r7.value, r.r5.value)
  85. # change 'rpy_fastgil' to 0 (it should be non-zero right now)
  86. if self.asm.cpu.cpuinfo.arch_version >= 7:
  87. self.mc.DMB()
  88. self.mc.gen_load_int(r.r6.value, fastgil)
  89. self.mc.MOV_ri(r.ip.value, 0)
  90. self.mc.STR_ri(r.ip.value, r.r6.value)
  91. if not we_are_translated(): # for testing: we should not access
  92. self.mc.ADD_ri(r.fp.value, r.fp.value, 1) # fp any more
  93. def move_real_result_and_call_reacqgil_addr(self, fastgil):
  94. # try to reacquire the lock. The registers r5 to r7 are still
  95. # valid from before the call:
  96. # r5 == &root_stack_top
  97. # r6 == fastgil
  98. # r7 == previous value of root_stack_top
  99. self.mc.LDREX(r.r3.value, r.r6.value) # load the lock value
  100. self.mc.MOV_ri(r.ip.value, 1)
  101. self.mc.CMP_ri(r.r3.value, 0) # is the lock free?
  102. self.mc.STREX(r.r3.value, r.ip.value, r.r6.value, c=c.EQ)
  103. # try to claim the lock
  104. self.mc.CMP_ri(r.r3.value, 0, cond=c.EQ) # did this succeed?
  105. if self.asm.cpu.cpuinfo.arch_version >= 7:
  106. self.mc.DMB()
  107. # the success of the lock acquisition is defined by
  108. # 'EQ is true', or equivalently by 'r3 == 0'.
  109. #
  110. if self.asm.cpu.gc_ll_descr.gcrootmap:
  111. # When doing a call_release_gil with shadowstack, there
  112. # is the risk that the 'rpy_fastgil' was free but the
  113. # current shadowstack can be the one of a different
  114. # thread. So here we check if the shadowstack pointer
  115. # is still the same as before we released the GIL (saved
  116. # in 'r7'), and if not, we fall back to 'reacqgil_addr'.
  117. self.mc.LDR_ri(r.ip.value, r.r5.value, cond=c.EQ)
  118. self.mc.CMP_rr(r.ip.value, r.r7.value, cond=c.EQ)
  119. b1_location = self.mc.currpos()
  120. self.mc.BKPT() # BEQ below
  121. # there are two cases here: either EQ was false from
  122. # the beginning, or EQ was true at first but the CMP
  123. # made it false. In the second case we need to
  124. # release the fastgil here. We know which case it is
  125. # by checking again r3.
  126. self.mc.CMP_ri(r.r3.value, 0)
  127. self.mc.STR_ri(r.r3.value, r.r6.value, cond=c.EQ)
  128. else:
  129. b1_location = self.mc.currpos()
  130. self.mc.BKPT() # BEQ below
  131. #
  132. # save the result we just got
  133. gpr_to_save, vfp_to_save = self.get_result_locs()
  134. with saved_registers(self.mc, gpr_to_save, vfp_to_save):
  135. self.mc.BL(self.asm.reacqgil_addr)
  136. # replace b1_location with B(here, c.EQ)
  137. pmc = OverwritingBuilder(self.mc, b1_location, WORD)
  138. pmc.B_offs(self.mc.currpos(), c.EQ)
  139. if not we_are_translated(): # for testing: now we can accesss
  140. self.mc.SUB_ri(r.fp.value, r.fp.value, 1) # fp again
  141. def get_result_locs(self):
  142. raise NotImplementedError
  143. def _ensure_result_bit_extension(self, resloc, size, signed):
  144. if size == 4:
  145. return
  146. if size == 1:
  147. if not signed: # unsigned char
  148. self.mc.AND_ri(resloc.value, resloc.value, 0xFF)
  149. else:
  150. self.mc.LSL_ri(resloc.value, resloc.value, 24)
  151. self.mc.ASR_ri(resloc.value, resloc.value, 24)
  152. elif size == 2:
  153. if not signed:
  154. self.mc.LSL_ri(resloc.value, resloc.value, 16)
  155. self.mc.LSR_ri(resloc.value, resloc.value, 16)
  156. else:
  157. self.mc.LSL_ri(resloc.value, resloc.value, 16)
  158. self.mc.ASR_ri(resloc.value, resloc.value, 16)
  159. def write_real_errno(self, save_err):
  160. if save_err & rffi.RFFI_READSAVED_ERRNO:
  161. # Just before a call, read '*_errno' and write it into the
  162. # real 'errno'. The r0-r3 registers contain arguments to the
  163. # future call; the r5-r7 registers contain various stuff.
  164. # We still have r8-r12.
  165. if save_err & rffi.RFFI_ALT_ERRNO:
  166. rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
  167. else:
  168. rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
  169. p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
  170. self.mc.LDR_ri(r.r9.value, r.sp.value,
  171. self.asm.saved_threadlocal_addr + self.current_sp)
  172. self.mc.LDR_ri(r.ip.value, r.r9.value, p_errno)
  173. self.mc.LDR_ri(r.r9.value, r.r9.value, rpy_errno)
  174. self.mc.STR_ri(r.r9.value, r.ip.value)
  175. elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE:
  176. # Same, but write zero.
  177. p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
  178. self.mc.LDR_ri(r.r9.value, r.sp.value,
  179. self.asm.saved_threadlocal_addr + self.current_sp)
  180. self.mc.LDR_ri(r.ip.value, r.r9.value, p_errno)
  181. self.mc.MOV_ri(r.r9.value, 0)
  182. self.mc.STR_ri(r.r9.value, r.ip.value)
  183. def read_real_errno(self, save_err):
  184. if save_err & rffi.RFFI_SAVE_ERRNO:
  185. # Just after a call, read the real 'errno' and save a copy of
  186. # it inside our thread-local '*_errno'. Registers r8-r12
  187. # are unused here, and registers r2-r3 never contain anything
  188. # after the call.
  189. if save_err & rffi.RFFI_ALT_ERRNO:
  190. rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
  191. else:
  192. rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
  193. p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
  194. self.mc.LDR_ri(r.r3.value, r.sp.value,
  195. self.asm.saved_threadlocal_addr)
  196. self.mc.LDR_ri(r.ip.value, r.r3.value, p_errno)
  197. self.mc.LDR_ri(r.ip.value, r.ip.value, 0)
  198. self.mc.STR_ri(r.ip.value, r.r3.value, rpy_errno)
  199. class SoftFloatCallBuilder(ARMCallbuilder):
  200. # XXX Maybe we could kill this class and unify the remaining two
  201. # XXX classes, by carefully checking if all methods here are doing
  202. # XXX the exact same thing as the methods from HardFloatCallBuilder,
  203. # XXX but simply forcing all BoxFloat arguments to be longlongs
  204. # XXX (i.e. ignoring 'f' in favour of 'L'), and the same with
  205. # XXX single-float arguments (ignoring 'S' in favour of 'i');
  206. # XXX and the same for the return value.
  207. def get_result_locs(self):
  208. if self.resloc is None:
  209. return [], []
  210. if self.resloc.is_vfp_reg():
  211. return [r.r0, r.r1], []
  212. assert self.resloc.is_core_reg()
  213. return [r.r0], []
  214. def load_result(self):
  215. # ensure the result is wellformed and stored in the correct location
  216. resloc = self.resloc
  217. if resloc is None:
  218. return
  219. if resloc.is_vfp_reg():
  220. # move result to the allocated register
  221. self.asm.mov_to_vfp_loc(r.r0, r.r1, resloc)
  222. elif resloc.is_core_reg():
  223. # move result to the allocated register
  224. if resloc is not r.r0:
  225. self.asm.mov_loc_loc(r.r0, resloc)
  226. self._ensure_result_bit_extension(resloc,
  227. self.ressize, self.ressign)
  228. def _collect_and_push_stack_args(self, arglocs):
  229. n_args = len(arglocs)
  230. reg_args = count_reg_args(arglocs)
  231. # all arguments past the 4th go on the stack
  232. # first we need to prepare the list so it stays aligned
  233. stack_args = []
  234. count = 0
  235. on_stack = 0
  236. if n_args > reg_args:
  237. for i in range(reg_args, n_args):
  238. arg = arglocs[i]
  239. if arg.type != FLOAT:
  240. count += 1
  241. on_stack += 1
  242. else:
  243. on_stack += 2
  244. if count % 2 != 0:
  245. stack_args.append(None)
  246. count = 0
  247. on_stack += 1
  248. stack_args.append(arg)
  249. if count % 2 != 0:
  250. on_stack += 1
  251. stack_args.append(None)
  252. if on_stack > 0:
  253. self._push_stack_args(stack_args, on_stack*WORD)
  254. def prepare_arguments(self):
  255. arglocs = self.arglocs
  256. reg_args = count_reg_args(arglocs)
  257. self._collect_and_push_stack_args(arglocs)
  258. # collect variables that need to go in registers and the registers they
  259. # will be stored in
  260. num = 0
  261. count = 0
  262. non_float_locs = []
  263. non_float_regs = []
  264. float_locs = []
  265. for i in range(reg_args):
  266. arg = arglocs[i]
  267. if arg.type == FLOAT and count % 2 != 0:
  268. num += 1
  269. count = 0
  270. reg = r.caller_resp[num]
  271. if arg.type == FLOAT:
  272. float_locs.append((arg, reg))
  273. else:
  274. non_float_locs.append(arg)
  275. non_float_regs.append(reg)
  276. if arg.type == FLOAT:
  277. num += 2
  278. else:
  279. num += 1
  280. count += 1
  281. # Check that the address of the function we want to call is not
  282. # currently stored in one of the registers used to pass the arguments
  283. # or on the stack, which we can not access later
  284. # If this happens to be the case we remap the register to r4 and use r4
  285. # to call the function
  286. if not self.fnloc.is_imm():
  287. non_float_locs.append(self.fnloc)
  288. non_float_regs.append(r.r4)
  289. self.fnloc = r.r4
  290. # remap values stored in core registers
  291. remap_frame_layout(self.asm, non_float_locs, non_float_regs, r.ip)
  292. for loc, reg in float_locs:
  293. self.asm.mov_from_vfp_loc(loc, reg, r.all_regs[reg.value + 1])
  294. class HardFloatCallBuilder(ARMCallbuilder):
  295. next_arg_vfp = 0
  296. next_arg_svfp = 0
  297. def get_next_vfp(self, tp):
  298. assert tp in 'fS'
  299. if tp == 'f':
  300. # 64bit double
  301. i = max(self.next_arg_vfp, (self.next_arg_svfp + 1) >> 1)
  302. if i >= len(r.vfp_argument_regs):
  303. self.next_arg_svfp = 1000 # stop that sequence too
  304. return None
  305. self.next_arg_vfp = i + 1
  306. return r.vfp_argument_regs[i]
  307. else:
  308. # 32bit float
  309. i = self.next_arg_svfp
  310. if not (i & 1): # if i is even
  311. i = max(i, self.next_arg_vfp << 1)
  312. if i >= len(r.svfp_argument_regs):
  313. return None
  314. self.next_arg_svfp = i + 1
  315. return r.svfp_argument_regs[i]
  316. def prepare_arguments(self):
  317. non_float_locs = []
  318. non_float_regs = []
  319. float_locs = []
  320. float_regs = []
  321. stack_args = []
  322. singlefloats = None
  323. longlong_mask = 0
  324. arglocs = self.arglocs
  325. argtypes = self.argtypes
  326. r_register_count = 0
  327. on_stack = 0
  328. for i in range(len(arglocs)):
  329. argtype = INT
  330. if i < len(argtypes) and argtypes[i] == 'S':
  331. argtype = argtypes[i]
  332. arg = arglocs[i]
  333. if arg.is_float():
  334. if i < len(argtypes) and argtypes[i] == 'L':
  335. # A longlong argument. It uses two regular argument
  336. # positions, but aligned to an even number. This is
  337. # a bit strange, but it is the case even for registers:
  338. # it can be in r0-r1 or in r2-r3 but not in r1-r2.
  339. assert arg.is_float()
  340. if r_register_count == 0:
  341. # will temporarily load the register into d8
  342. float_locs.append(arg)
  343. float_regs.append(r.d8)
  344. longlong_mask |= 1
  345. r_register_count = 2
  346. continue
  347. elif r_register_count <= 2:
  348. # will temporarily load the register into d9
  349. float_locs.append(arg)
  350. float_regs.append(r.d9)
  351. longlong_mask |= 2
  352. r_register_count = 4
  353. continue
  354. elif r_register_count == 3:
  355. r_register_count = 4
  356. else:
  357. # A 64-bit float argument. Goes into the next free v#
  358. # register, or if none, to the stack aligned to an
  359. # even number of words.
  360. argtype = FLOAT
  361. reg = self.get_next_vfp(argtype)
  362. if reg:
  363. float_locs.append(arg)
  364. assert reg not in float_regs
  365. float_regs.append(reg)
  366. continue
  367. # float or longlong argument that needs to go on the stack
  368. if on_stack & 1: # odd: realign
  369. stack_args.append(None)
  370. on_stack += 1
  371. stack_args.append(arg)
  372. on_stack += 2
  373. elif argtype == 'S':
  374. # Singlefloat (32-bit) argument. Goes into the next free
  375. # v# register, or if none, to the stack in a single word.
  376. if singlefloats is None:
  377. singlefloats = []
  378. tgt = self.get_next_vfp(argtype)
  379. if tgt:
  380. singlefloats.append((arg, tgt))
  381. else: # Singlefloat argument that needs to go on the stack
  382. # treated the same as a regular core register argument
  383. stack_args.append(arg)
  384. on_stack += 1
  385. else:
  386. # Regular one-word argument. Goes into the next register
  387. # free from the list r0, r1, r2, r3, or to the stack.
  388. if r_register_count < len(r.argument_regs):
  389. reg = r.argument_regs[r_register_count]
  390. r_register_count += 1
  391. non_float_locs.append(arg)
  392. non_float_regs.append(reg)
  393. else: # non-float argument that needs to go on the stack
  394. stack_args.append(arg)
  395. on_stack += 1
  396. # align the stack
  397. if on_stack & 1: # odd: realign
  398. stack_args.append(None)
  399. on_stack += 1
  400. self._push_stack_args(stack_args, on_stack*WORD)
  401. # Check that the address of the function we want to call is not
  402. # currently stored in one of the registers used to pass the arguments
  403. # or on the stack, which we can not access later
  404. # If this happens to be the case we remap the register to r4 and use r4
  405. # to call the function
  406. if not self.fnloc.is_imm():
  407. non_float_locs.append(self.fnloc)
  408. non_float_regs.append(r.r4)
  409. self.fnloc = r.r4
  410. # remap values stored in vfp registers
  411. remap_frame_layout(self.asm, float_locs, float_regs, r.vfp_ip)
  412. if singlefloats:
  413. for src, dest in singlefloats:
  414. if src.is_float():
  415. assert 0, 'unsupported case'
  416. if src.is_stack():
  417. # use special VLDR for 32bit
  418. self.asm.regalloc_mov(src, r.ip)
  419. src = r.ip
  420. if src.is_imm():
  421. self.mc.gen_load_int(r.ip.value, src.value)
  422. src = r.ip
  423. if src.is_core_reg():
  424. self.mc.VMOV_cs(dest.value, src.value)
  425. # remap values stored in core registers
  426. remap_frame_layout(self.asm, non_float_locs, non_float_regs, r.ip)
  427. if longlong_mask & 1:
  428. self.mc.FMRRD(r.r0.value, r.r1.value, r.d8.value)
  429. if longlong_mask & 2:
  430. self.mc.FMRRD(r.r2.value, r.r3.value, r.d9.value)
  431. def load_result(self):
  432. resloc = self.resloc
  433. if self.restype == 'S':
  434. self.mc.VMOV_sc(resloc.value, r.s0.value)
  435. elif self.restype == 'L':
  436. assert resloc.is_vfp_reg()
  437. self.mc.FMDRR(resloc.value, r.r0.value, r.r1.value)
  438. # ensure the result is wellformed and stored in the correct location
  439. if resloc is not None and resloc.is_core_reg():
  440. self._ensure_result_bit_extension(resloc,
  441. self.ressize, self.ressign)
  442. def get_result_locs(self):
  443. if self.resloc is None:
  444. return [], []
  445. if self.resloc.is_vfp_reg():
  446. if self.restype == 'L': # long long
  447. return [r.r0, r.r1], []
  448. else:
  449. return [], [r.d0]
  450. assert self.resloc.is_core_reg()
  451. return [r.r0], []
  452. def get_callbuilder(cpu, assembler, fnloc, arglocs,
  453. resloc=r.r0, restype=INT, ressize=WORD, ressigned=True):
  454. if cpu.cpuinfo.hf_abi:
  455. return HardFloatCallBuilder(assembler, fnloc, arglocs, resloc,
  456. restype, ressize, ressigned)
  457. else:
  458. return SoftFloatCallBuilder(assembler, fnloc, arglocs, resloc,
  459. restype, ressize, ressigned)