PageRenderTime 25ms CodeModel.GetById 0ms RepoModel.GetById 0ms app.codeStats 0ms

/rpython/jit/backend/ppc/callbuilder.py

https://bitbucket.org/pypy/pypy/
Python | 278 lines | 184 code | 34 blank | 60 comment | 36 complexity | f3557405ecf5b3b25893f1529fe5ea6f MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from rpython.jit.backend.ppc.arch import IS_PPC_64, WORD, PARAM_SAVE_AREA_OFFSET
  2. from rpython.jit.backend.ppc.arch import THREADLOCAL_ADDR_OFFSET
  3. import rpython.jit.backend.ppc.register as r
  4. from rpython.jit.metainterp.history import INT, FLOAT
  5. from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
  6. from rpython.jit.backend.ppc.jump import remap_frame_layout
  7. from rpython.rlib.objectmodel import we_are_translated
  8. from rpython.jit.backend.llsupport import llerrno
  9. from rpython.rtyper.lltypesystem import rffi
  10. def follow_jump(addr):
  11. # xxx implement me
  12. return addr
  13. class CallBuilder(AbstractCallBuilder):
  14. GPR_ARGS = [r.r3, r.r4, r.r5, r.r6, r.r7, r.r8, r.r9, r.r10]
  15. FPR_ARGS = r.MANAGED_FP_REGS
  16. assert FPR_ARGS == [r.f1, r.f2, r.f3, r.f4, r.f5, r.f6, r.f7,
  17. r.f8, r.f9, r.f10, r.f11, r.f12, r.f13]
  18. RSHADOWPTR = r.RCS1
  19. RFASTGILPTR = r.RCS2
  20. RSHADOWOLD = r.RCS3
  21. def __init__(self, assembler, fnloc, arglocs, resloc):
  22. AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
  23. resloc, restype=INT, ressize=None)
  24. def prepare_arguments(self):
  25. assert IS_PPC_64
  26. self.subtracted_to_sp = 0
  27. # Prepare arguments. Note that this follows the convention where
  28. # a prototype is in scope, and doesn't take "..." arguments. If
  29. # you were to call a C function with a "..." argument with cffi,
  30. # it would not go there but instead via libffi. If you pretend
  31. # instead that it takes fixed arguments, then it would arrive here
  32. # but the convention is bogus for floating-point arguments. (And,
  33. # to add to the mess, at least CPython's ctypes cannot be used
  34. # to call a "..." function with floating-point arguments. As I
  35. # guess that it's a problem with libffi, it means PyPy inherits
  36. # the same problem.)
  37. arglocs = self.arglocs
  38. num_args = len(arglocs)
  39. non_float_locs = []
  40. non_float_regs = []
  41. float_locs = []
  42. for i in range(min(num_args, 8)):
  43. if arglocs[i].type != FLOAT:
  44. non_float_locs.append(arglocs[i])
  45. non_float_regs.append(self.GPR_ARGS[i])
  46. else:
  47. float_locs.append(arglocs[i])
  48. # now 'non_float_locs' and 'float_locs' together contain the
  49. # locations of the first 8 arguments
  50. if num_args > 8:
  51. # We need to make a larger PPC stack frame, as shown on the
  52. # picture in arch.py. It needs to be 48 bytes + 8 * num_args.
  53. # The new SP back chain location should point to the top of
  54. # the whole stack frame, i.e. jumping over both the existing
  55. # fixed-sise part and the new variable-sized part.
  56. base = PARAM_SAVE_AREA_OFFSET
  57. varsize = base + 8 * num_args
  58. varsize = (varsize + 15) & ~15 # align
  59. self.mc.load(r.SCRATCH2.value, r.SP.value, 0) # SP back chain
  60. self.mc.store_update(r.SCRATCH2.value, r.SP.value, -varsize)
  61. self.subtracted_to_sp = varsize
  62. # In this variable-sized part, only the arguments from the 8th
  63. # one need to be written, starting at SP + 112
  64. for n in range(8, num_args):
  65. loc = arglocs[n]
  66. if loc.type != FLOAT:
  67. # after the 8th argument, a non-float location is
  68. # always stored in the stack
  69. if loc.is_reg():
  70. src = loc
  71. else:
  72. src = r.r2
  73. self.asm.regalloc_mov(loc, src)
  74. self.mc.std(src.value, r.SP.value, base + 8 * n)
  75. else:
  76. # the first 13 floating-point arguments are all passed
  77. # in the registers f1 to f13, independently on their
  78. # index in the complete list of arguments
  79. if len(float_locs) < len(self.FPR_ARGS):
  80. float_locs.append(loc)
  81. else:
  82. if loc.is_fp_reg():
  83. src = loc
  84. else:
  85. src = r.FP_SCRATCH
  86. self.asm.regalloc_mov(loc, src)
  87. self.mc.stfd(src.value, r.SP.value, base + 8 * n)
  88. # We must also copy fnloc into FNREG
  89. non_float_locs.append(self.fnloc)
  90. non_float_regs.append(self.mc.RAW_CALL_REG)
  91. if float_locs:
  92. assert len(float_locs) <= len(self.FPR_ARGS)
  93. remap_frame_layout(self.asm, float_locs,
  94. self.FPR_ARGS[:len(float_locs)],
  95. r.FP_SCRATCH)
  96. remap_frame_layout(self.asm, non_float_locs, non_float_regs,
  97. r.SCRATCH)
  98. def push_gcmap(self):
  99. # we push *now* the gcmap, describing the status of GC registers
  100. # after the rearrangements done just before, ignoring the return
  101. # value r3, if necessary
  102. assert not self.is_call_release_gil
  103. noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
  104. gcmap = self.asm._regalloc.get_gcmap([r.r3], noregs=noregs)
  105. self.asm.push_gcmap(self.mc, gcmap, store=True)
  106. def pop_gcmap(self):
  107. ssreg = None
  108. gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
  109. if gcrootmap:
  110. if gcrootmap.is_shadow_stack and self.is_call_release_gil:
  111. # in this mode, RSHADOWOLD happens to contain the shadowstack
  112. # top at this point, so reuse it instead of loading it again
  113. ssreg = self.RSHADOWOLD
  114. self.asm._reload_frame_if_necessary(self.mc, shadowstack_reg=ssreg)
  115. def emit_raw_call(self):
  116. self.mc.raw_call()
  117. def restore_stack_pointer(self):
  118. if self.subtracted_to_sp != 0:
  119. self.mc.addi(r.SP.value, r.SP.value, self.subtracted_to_sp)
  120. def load_result(self):
  121. assert (self.resloc is None or
  122. self.resloc is r.r3 or
  123. self.resloc is r.f1)
  124. def call_releasegil_addr_and_move_real_arguments(self, fastgil):
  125. assert self.is_call_release_gil
  126. RSHADOWPTR = self.RSHADOWPTR
  127. RFASTGILPTR = self.RFASTGILPTR
  128. RSHADOWOLD = self.RSHADOWOLD
  129. #
  130. # Save this thread's shadowstack pointer into r29, for later comparison
  131. gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
  132. if gcrootmap:
  133. if gcrootmap.is_shadow_stack:
  134. rst = gcrootmap.get_root_stack_top_addr()
  135. self.mc.load_imm(RSHADOWPTR, rst)
  136. self.mc.load(RSHADOWOLD.value, RSHADOWPTR.value, 0)
  137. #
  138. # change 'rpy_fastgil' to 0 (it should be non-zero right now)
  139. self.mc.load_imm(RFASTGILPTR, fastgil)
  140. self.mc.li(r.r0.value, 0)
  141. self.mc.lwsync()
  142. self.mc.std(r.r0.value, RFASTGILPTR.value, 0)
  143. #
  144. if not we_are_translated(): # for testing: we should not access
  145. self.mc.addi(r.SPP.value, r.SPP.value, 1) # r31 any more
  146. def move_real_result_and_call_reacqgil_addr(self, fastgil):
  147. from rpython.jit.backend.ppc.codebuilder import OverwritingBuilder
  148. # try to reacquire the lock. The following registers are still
  149. # valid from before the call:
  150. RSHADOWPTR = self.RSHADOWPTR # r30: &root_stack_top
  151. RFASTGILPTR = self.RFASTGILPTR # r29: &fastgil
  152. RSHADOWOLD = self.RSHADOWOLD # r28: previous val of root_stack_top
  153. # Equivalent of 'r10 = __sync_lock_test_and_set(&rpy_fastgil, 1);'
  154. self.mc.li(r.r9.value, 1)
  155. retry_label = self.mc.currpos()
  156. self.mc.ldarx(r.r10.value, 0, RFASTGILPTR.value) # load the lock value
  157. self.mc.stdcxx(r.r9.value, 0, RFASTGILPTR.value) # try to claim lock
  158. self.mc.bc(6, 2, retry_label - self.mc.currpos()) # retry if failed
  159. self.mc.isync()
  160. self.mc.cmpdi(0, r.r10.value, 0)
  161. b1_location = self.mc.currpos()
  162. self.mc.trap() # boehm: patched with a BEQ: jump if r10 is zero
  163. # shadowstack: patched with BNE instead
  164. if self.asm.cpu.gc_ll_descr.gcrootmap:
  165. # When doing a call_release_gil with shadowstack, there
  166. # is the risk that the 'rpy_fastgil' was free but the
  167. # current shadowstack can be the one of a different
  168. # thread. So here we check if the shadowstack pointer
  169. # is still the same as before we released the GIL (saved
  170. # in RSHADOWOLD), and if not, we fall back to 'reacqgil_addr'.
  171. self.mc.load(r.r9.value, RSHADOWPTR.value, 0)
  172. self.mc.cmpdi(0, r.r9.value, RSHADOWOLD.value)
  173. bne_location = b1_location
  174. b1_location = self.mc.currpos()
  175. self.mc.trap()
  176. # revert the rpy_fastgil acquired above, so that the
  177. # general 'reacqgil_addr' below can acquire it again...
  178. # (here, r10 is conveniently zero)
  179. self.mc.std(r.r10.value, RFASTGILPTR.value, 0)
  180. pmc = OverwritingBuilder(self.mc, bne_location, 1)
  181. pmc.bne(self.mc.currpos() - bne_location)
  182. pmc.overwrite()
  183. #
  184. # Yes, we need to call the reacqgil() function.
  185. # save the result we just got
  186. RSAVEDRES = RFASTGILPTR # can reuse this reg here
  187. reg = self.resloc
  188. if reg is not None:
  189. if reg.is_core_reg():
  190. self.mc.mr(RSAVEDRES.value, reg.value)
  191. elif reg.is_fp_reg():
  192. self.mc.stfd(reg.value, r.SP.value,
  193. PARAM_SAVE_AREA_OFFSET + 7 * WORD)
  194. self.mc.load_imm(self.mc.RAW_CALL_REG, self.asm.reacqgil_addr)
  195. self.mc.raw_call()
  196. if reg is not None:
  197. if reg.is_core_reg():
  198. self.mc.mr(reg.value, RSAVEDRES.value)
  199. elif reg.is_fp_reg():
  200. self.mc.lfd(reg.value, r.SP.value,
  201. PARAM_SAVE_AREA_OFFSET + 7 * WORD)
  202. # replace b1_location with BEQ(here)
  203. pmc = OverwritingBuilder(self.mc, b1_location, 1)
  204. pmc.beq(self.mc.currpos() - b1_location)
  205. pmc.overwrite()
  206. if not we_are_translated(): # for testing: now we can access
  207. self.mc.addi(r.SPP.value, r.SPP.value, -1) # r31 again
  208. def write_real_errno(self, save_err):
  209. if save_err & rffi.RFFI_READSAVED_ERRNO:
  210. # Just before a call, read '*_errno' and write it into the
  211. # real 'errno'. A lot of registers are free here, notably
  212. # r11 and r0.
  213. if save_err & rffi.RFFI_ALT_ERRNO:
  214. rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
  215. else:
  216. rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
  217. p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
  218. self.mc.ld(r.r11.value, r.SP.value,
  219. THREADLOCAL_ADDR_OFFSET + self.subtracted_to_sp)
  220. self.mc.lwz(r.r0.value, r.r11.value, rpy_errno)
  221. self.mc.ld(r.r11.value, r.r11.value, p_errno)
  222. self.mc.stw(r.r0.value, r.r11.value, 0)
  223. elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE:
  224. # Same, but write zero.
  225. p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
  226. self.mc.ld(r.r11.value, r.SP.value,
  227. THREADLOCAL_ADDR_OFFSET + self.subtracted_to_sp)
  228. self.mc.ld(r.r11.value, r.r11.value, p_errno)
  229. self.mc.li(r.r0.value, 0)
  230. self.mc.stw(r.r0.value, r.r11.value, 0)
  231. def read_real_errno(self, save_err):
  232. if save_err & rffi.RFFI_SAVE_ERRNO:
  233. # Just after a call, read the real 'errno' and save a copy of
  234. # it inside our thread-local '*_errno'. Registers r4-r10
  235. # never contain anything after the call.
  236. if save_err & rffi.RFFI_ALT_ERRNO:
  237. rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
  238. else:
  239. rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
  240. p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
  241. self.mc.ld(r.r9.value, r.SP.value, THREADLOCAL_ADDR_OFFSET)
  242. self.mc.ld(r.r10.value, r.r9.value, p_errno)
  243. self.mc.lwz(r.r10.value, r.r10.value, 0)
  244. self.mc.stw(r.r10.value, r.r9.value, rpy_errno)