PageRenderTime 57ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/rpython/jit/backend/zarch/callbuilder.py

https://bitbucket.org/pypy/pypy/
Python | 330 lines | 220 code | 38 blank | 72 comment | 54 complexity | 920862fc2c0aeed7a877d8a477ececab MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from rpython.jit.backend.zarch.arch import WORD
  2. from rpython.jit.backend.zarch.arch import (THREADLOCAL_ADDR_OFFSET,
  3. STD_FRAME_SIZE_IN_BYTES)
  4. import rpython.jit.backend.zarch.locations as l
  5. import rpython.jit.backend.zarch.registers as r
  6. import rpython.jit.backend.zarch.conditions as c
  7. from rpython.jit.metainterp.history import INT, FLOAT
  8. from rpython.jit.backend.llsupport.callbuilder import AbstractCallBuilder
  9. from rpython.jit.backend.llsupport.jump import remap_frame_layout
  10. from rpython.rlib.objectmodel import we_are_translated
  11. from rpython.jit.backend.llsupport import llerrno
  12. from rpython.rtyper.lltypesystem import rffi
  13. from rpython.jit.backend.llsupport.descr import CallDescr
  14. CALL_RELEASE_GIL_STACK_OFF = 6*WORD
  15. class CallBuilder(AbstractCallBuilder):
  16. GPR_ARGS = [r.r2, r.r3, r.r4, r.r5, r.r6]
  17. FPR_ARGS = [r.f0, r.f2, r.f4, r.f6]
  18. RSHADOWOLD = r.r8
  19. RSHADOWPTR = r.r9
  20. RFASTGILPTR = r.r10
  21. def __init__(self, assembler, fnloc, arglocs, resloc, calldescr):
  22. type = INT
  23. size = WORD
  24. self.ressign = True
  25. if calldescr is not None and isinstance(calldescr, CallDescr):
  26. type = calldescr.get_result_type()
  27. size = calldescr.get_result_size()
  28. self.ressign = calldescr.is_result_signed()
  29. AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
  30. resloc, restype=type, ressize=size)
  31. def prepare_arguments(self):
  32. self.subtracted_to_sp = 0
  33. # Prepare arguments. Note that this follows the convention where
  34. # a prototype is in scope, and doesn't take "..." arguments. If
  35. # you were to call a C function with a "..." argument with cffi,
  36. # it would not go there but instead via libffi. If you pretend
  37. # instead that it takes fixed arguments, then it would arrive here
  38. # but the convention is bogus for floating-point arguments. (And,
  39. # to add to the mess, at least CPython's ctypes cannot be used
  40. # to call a "..." function with floating-point arguments. As I
  41. # guess that it's a problem with libffi, it means PyPy inherits
  42. # the same problem.)
  43. arglocs = self.arglocs
  44. num_args = len(arglocs)
  45. max_gpr_in_reg = 5
  46. max_fpr_in_reg = 4
  47. non_float_locs = []
  48. non_float_regs = []
  49. float_locs = []
  50. # the IBM zarch manual states:
  51. # """
  52. # A function will be passed a frame on the runtime stack by the function which
  53. # called it, and may allocate a new stack frame. A new stack frame is required if the
  54. # called function will in turn call further functions (which must be passed the
  55. # address of the new frame). This stack grows downwards from high addresses
  56. # """
  57. gpr_regs = 0
  58. fpr_regs = 0
  59. stack_params = []
  60. for i in range(num_args):
  61. loc = arglocs[i]
  62. if not arglocs[i].is_float():
  63. if gpr_regs < max_gpr_in_reg:
  64. non_float_locs.append(arglocs[i])
  65. non_float_regs.append(self.GPR_ARGS[gpr_regs])
  66. gpr_regs += 1
  67. else:
  68. stack_params.append(i)
  69. else:
  70. if fpr_regs < max_fpr_in_reg:
  71. float_locs.append(arglocs[i])
  72. fpr_regs += 1
  73. else:
  74. stack_params.append(i)
  75. self.subtracted_to_sp += len(stack_params) * WORD
  76. base = len(stack_params) * WORD
  77. if self.is_call_release_gil:
  78. self.subtracted_to_sp += CALL_RELEASE_GIL_STACK_OFF
  79. base += CALL_RELEASE_GIL_STACK_OFF
  80. for idx,i in enumerate(stack_params):
  81. loc = arglocs[i]
  82. offset = STD_FRAME_SIZE_IN_BYTES - base + 8 * idx
  83. if loc.type == FLOAT:
  84. if loc.is_fp_reg():
  85. src = loc
  86. else:
  87. src = r.FP_SCRATCH
  88. self.asm.regalloc_mov(loc, src)
  89. self.mc.STDY(src, l.addr(offset, r.SP))
  90. else:
  91. if loc.is_core_reg():
  92. src = loc
  93. else:
  94. src = r.SCRATCH
  95. self.asm.regalloc_mov(loc, src)
  96. self.mc.STG(src, l.addr(offset, r.SP))
  97. # We must also copy fnloc into FNREG
  98. non_float_locs.append(self.fnloc)
  99. non_float_regs.append(r.RETURN)
  100. if float_locs:
  101. assert len(float_locs) <= len(self.FPR_ARGS)
  102. remap_frame_layout(self.asm, float_locs,
  103. self.FPR_ARGS[:len(float_locs)],
  104. r.FP_SCRATCH)
  105. remap_frame_layout(self.asm, non_float_locs, non_float_regs,
  106. r.SCRATCH)
  107. def push_gcmap(self):
  108. # we push *now* the gcmap, describing the status of GC registers
  109. # after the rearrangements done just before, ignoring the return
  110. # value r2, if necessary
  111. assert not self.is_call_release_gil
  112. noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
  113. gcmap = self.asm._regalloc.get_gcmap([r.r2], noregs=noregs)
  114. self.asm.push_gcmap(self.mc, gcmap, store=True)
  115. def pop_gcmap(self):
  116. ssreg = None
  117. gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
  118. if gcrootmap:
  119. if gcrootmap.is_shadow_stack and self.is_call_release_gil:
  120. # in this mode, RSHADOWOLD happens to contain the shadowstack
  121. # top at this point, so reuse it instead of loading it again
  122. # RSHADOWOLD is moved to the scratch reg just before restoring r8
  123. ssreg = r.SCRATCH
  124. self.asm._reload_frame_if_necessary(self.mc, shadowstack_reg=ssreg)
  125. def emit_raw_call(self):
  126. # always allocate a stack frame for the new function
  127. # save the SP back chain
  128. # move the frame pointer
  129. if self.subtracted_to_sp != 0:
  130. # rewrite the back chain
  131. self.mc.LG(r.SCRATCH, l.addr(0, r.SP))
  132. self.mc.STG(r.SCRATCH, l.addr(-self.subtracted_to_sp, r.SP))
  133. self.mc.LAY(r.SP, l.addr(-self.subtracted_to_sp, r.SP))
  134. self.mc.raw_call()
  135. def restore_stack_pointer(self):
  136. if self.subtracted_to_sp != 0:
  137. self.mc.LAY(r.SP, l.addr(self.subtracted_to_sp, r.SP))
  138. def load_result(self):
  139. assert (self.resloc is None or
  140. self.resloc is r.GPR_RETURN or
  141. self.resloc is r.FPR_RETURN)
  142. #
  143. if self.restype == 'i' and self.ressize != WORD:
  144. # we must be sure! libffi (s390x impl) will not return
  145. # a sane 64 bit zero/sign extended value. fix for this
  146. # has been rejected (Jan. 2016). This will not be updated
  147. # any time soon...
  148. if self.ressign:
  149. # sign extend!
  150. if self.ressize == 1: self.mc.LGBR(r.r2, r.r2)
  151. elif self.ressize == 2: self.mc.LGHR(r.r2, r.r2)
  152. elif self.ressize == 4: self.mc.LGFR(r.r2, r.r2)
  153. else:
  154. assert 0, "cannot sign extend size %d" % self.ressize
  155. else:
  156. # zero extend!
  157. if self.ressize == 1: self.mc.LLGCR(r.r2, r.r2)
  158. elif self.ressize == 2: self.mc.LLGHR(r.r2, r.r2)
  159. elif self.ressize == 4: self.mc.LLGFR(r.r2, r.r2)
  160. else:
  161. assert 0, "cannot zero extend size %d" % self.ressize
  162. def call_releasegil_addr_and_move_real_arguments(self, fastgil):
  163. assert self.is_call_release_gil
  164. RSHADOWOLD = self.RSHADOWOLD
  165. RSHADOWPTR = self.RSHADOWPTR
  166. RFASTGILPTR = self.RFASTGILPTR
  167. #
  168. pos = STD_FRAME_SIZE_IN_BYTES - CALL_RELEASE_GIL_STACK_OFF
  169. self.mc.STMG(r.r8, r.r13, l.addr(pos, r.SP))
  170. #
  171. # Save this thread's shadowstack pointer into r8, for later comparison
  172. gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
  173. if gcrootmap:
  174. if gcrootmap.is_shadow_stack:
  175. rst = gcrootmap.get_root_stack_top_addr()
  176. self.mc.load_imm(RSHADOWPTR, rst)
  177. self.mc.load(RSHADOWOLD, RSHADOWPTR, 0)
  178. #
  179. # change 'rpy_fastgil' to 0 (it should be non-zero right now)
  180. self.mc.load_imm(RFASTGILPTR, fastgil)
  181. self.mc.XGR(r.SCRATCH, r.SCRATCH)
  182. # zarch is sequentially consistent
  183. self.mc.STG(r.SCRATCH, l.addr(0, RFASTGILPTR))
  184. def move_real_result_and_call_reacqgil_addr(self, fastgil):
  185. from rpython.jit.backend.zarch.codebuilder import OverwritingBuilder
  186. # try to reacquire the lock. The following registers are still
  187. # valid from before the call:
  188. RSHADOWOLD = self.RSHADOWOLD # r8: previous val of root_stack_top
  189. RSHADOWPTR = self.RSHADOWPTR # r9: &root_stack_top
  190. RFASTGILPTR = self.RFASTGILPTR # r10: &fastgil
  191. # Equivalent of 'r13 = __sync_lock_test_and_set(&rpy_fastgil, 1);'
  192. self.mc.LGHI(r.SCRATCH, l.imm(1))
  193. self.mc.LG(r.r13, l.addr(0, RFASTGILPTR))
  194. retry_label = self.mc.currpos()
  195. self.mc.LGR(r.r14, r.r13)
  196. self.mc.CSG(r.r13, r.SCRATCH, l.addr(0, RFASTGILPTR)) # try to claim lock
  197. self.mc.BRC(c.LT, l.imm(retry_label - self.mc.currpos())) # retry if failed
  198. # CSG performs a serialization
  199. # zarch is sequential consistent!
  200. self.mc.CGHI(r.r14, l.imm0)
  201. b1_location = self.mc.currpos()
  202. # boehm: patched with a BEQ: jump if r13 is zero
  203. # shadowstack: patched with BNE instead
  204. self.mc.reserve_cond_jump()
  205. gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
  206. if gcrootmap:
  207. # When doing a call_release_gil with shadowstack, there
  208. # is the risk that the 'rpy_fastgil' was free but the
  209. # current shadowstack can be the one of a different
  210. # thread. So here we check if the shadowstack pointer
  211. # is still the same as before we released the GIL (saved
  212. # in RSHADOWOLD), and if not, we fall back to 'reacqgil_addr'.
  213. self.mc.load(r.SCRATCH, RSHADOWPTR, 0)
  214. self.mc.CGR(r.SCRATCH, RSHADOWOLD)
  215. bne_location = b1_location
  216. b1_location = self.mc.currpos()
  217. self.mc.reserve_cond_jump()
  218. # revert the rpy_fastgil acquired above, so that the
  219. # general 'reacqgil_addr' below can acquire it again...
  220. # (here, r14 is conveniently zero)
  221. self.mc.STG(r.r14, l.addr(0, RFASTGILPTR))
  222. pmc = OverwritingBuilder(self.mc, bne_location, 1)
  223. pmc.BRCL(c.NE, l.imm(self.mc.currpos() - bne_location))
  224. pmc.overwrite()
  225. #
  226. # Yes, we need to call the reacqgil() function.
  227. # save the result we just got
  228. RSAVEDRES = RFASTGILPTR # can reuse this reg here
  229. reg = self.resloc
  230. PARAM_SAVE_AREA_OFFSET = 0
  231. if reg is not None:
  232. # save 1 word below the stack pointer
  233. if reg.is_core_reg():
  234. self.mc.LGR(RSAVEDRES, reg)
  235. elif reg.is_fp_reg():
  236. self.mc.STD(reg, l.addr(16*WORD, r.SP))
  237. # r8-r13 live on the stack and must NOT be overwritten,
  238. # restore_stack_pointer already moved SP + subtracted_to_sp,
  239. self.mc.LAY(r.SP, l.addr(-self.subtracted_to_sp, r.SP))
  240. self.mc.load_imm(self.mc.RAW_CALL_REG, self.asm.reacqgil_addr)
  241. self.mc.raw_call()
  242. self.mc.LAY(r.SP, l.addr(self.subtracted_to_sp, r.SP))
  243. if reg is not None:
  244. if reg.is_core_reg():
  245. self.mc.LGR(reg, RSAVEDRES)
  246. elif reg.is_fp_reg():
  247. self.mc.LD(reg, l.addr(16*WORD, r.SP))
  248. # replace b1_location with BEQ(here)
  249. pmc = OverwritingBuilder(self.mc, b1_location, 1)
  250. pmc.BRCL(c.EQ, l.imm(self.mc.currpos() - b1_location))
  251. pmc.overwrite()
  252. if gcrootmap:
  253. if gcrootmap.is_shadow_stack and self.is_call_release_gil:
  254. self.mc.LGR(r.SCRATCH, RSHADOWOLD)
  255. pos = STD_FRAME_SIZE_IN_BYTES - CALL_RELEASE_GIL_STACK_OFF
  256. self.mc.LMG(r.r8, r.r13, l.addr(pos, r.SP))
  257. def write_real_errno(self, save_err):
  258. # r11 is saved in call_releasegil_addr_and_move_real_arguments,
  259. # thus can be used freely here!
  260. if save_err & rffi.RFFI_READSAVED_ERRNO:
  261. # Just before a call, read '*_errno' and write it into the
  262. # real 'errno'.
  263. if save_err & rffi.RFFI_ALT_ERRNO:
  264. rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
  265. else:
  266. rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
  267. p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
  268. self.mc.LG(r.r11, l.addr(THREADLOCAL_ADDR_OFFSET, r.SP))
  269. self.mc.LGF(r.SCRATCH2, l.addr(rpy_errno, r.r11))
  270. self.mc.LG(r.r11, l.addr(p_errno, r.r11))
  271. self.mc.STY(r.SCRATCH2, l.addr(0,r.r11))
  272. elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE:
  273. # Same, but write zero.
  274. p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
  275. self.mc.LG(r.r11, l.addr(THREADLOCAL_ADDR_OFFSET, r.SP))
  276. self.mc.LG(r.r11, l.addr(p_errno, r.r11))
  277. self.mc.LGHI(r.SCRATCH, l.imm(0))
  278. self.mc.STY(r.SCRATCH, l.addr(0,r.r11))
  279. def read_real_errno(self, save_err):
  280. if save_err & rffi.RFFI_SAVE_ERRNO:
  281. # Just after a call, read the real 'errno' and save a copy of
  282. # it inside our thread-local '*_errno'. Registers r3-r6
  283. # never contain anything after the call.
  284. if save_err & rffi.RFFI_ALT_ERRNO:
  285. rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu)
  286. else:
  287. rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
  288. p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
  289. self.mc.LG(r.r3, l.addr(THREADLOCAL_ADDR_OFFSET, r.SP))
  290. self.mc.LG(r.r4, l.addr(p_errno, r.r3))
  291. self.mc.LGF(r.r4, l.addr(0, r.r4))
  292. self.mc.STY(r.r4, l.addr(rpy_errno, r.r3))