PageRenderTime 71ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/rpython/jit/backend/zarch/opassembler.py

https://bitbucket.org/pypy/pypy/
Python | 1197 lines | 889 code | 171 blank | 137 comment | 97 complexity | cbc69e2740a599bf41445da97717dfd9 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from rpython.jit.backend.llsupport.jump import remap_frame_layout
  2. from rpython.jit.backend.zarch.arch import (WORD,
  3. STD_FRAME_SIZE_IN_BYTES)
  4. from rpython.jit.backend.zarch.arch import THREADLOCAL_ADDR_OFFSET
  5. from rpython.jit.backend.zarch.helper.assembler import (gen_emit_cmp_op,
  6. gen_emit_rr_rp, gen_emit_shift, gen_emit_rr_rh_ri_rp, gen_emit_div_mod)
  7. from rpython.jit.backend.zarch.helper.regalloc import (check_imm,
  8. check_imm_value)
  9. from rpython.jit.metainterp.history import (ConstInt)
  10. from rpython.jit.backend.zarch.codebuilder import ZARCHGuardToken, InstrBuilder
  11. from rpython.jit.backend.llsupport import symbolic, jitframe
  12. import rpython.jit.backend.zarch.conditions as c
  13. import rpython.jit.backend.zarch.registers as r
  14. import rpython.jit.backend.zarch.locations as l
  15. from rpython.jit.backend.zarch.locations import imm
  16. from rpython.jit.backend.zarch import callbuilder
  17. from rpython.jit.backend.zarch.codebuilder import OverwritingBuilder
  18. from rpython.jit.backend.llsupport.descr import CallDescr
  19. from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
  20. from rpython.jit.codewriter.effectinfo import EffectInfo
  21. from rpython.jit.metainterp.history import (FLOAT, INT, REF, VOID)
  22. from rpython.jit.metainterp.resoperation import rop
  23. from rpython.rtyper import rclass
  24. from rpython.rtyper.lltypesystem import rstr, rffi, lltype
  25. from rpython.rtyper.annlowlevel import cast_instance_to_gcref
  26. from rpython.rlib.objectmodel import we_are_translated
  27. class IntOpAssembler(object):
  28. _mixin_ = True
  29. emit_int_add = gen_emit_rr_rh_ri_rp('AGR', 'AGHI', 'AGFI', 'AG')
  30. emit_int_add_ovf = emit_int_add
  31. emit_nursery_ptr_increment = emit_int_add
  32. def emit_int_sub(self, op, arglocs, regalloc):
  33. res, l0, l1 = arglocs
  34. self.mc.SGRK(res, l0, l1)
  35. emit_int_sub_ovf = emit_int_sub
  36. emit_int_mul = gen_emit_rr_rh_ri_rp('MSGR', 'MGHI', 'MSGFI', 'MSG')
  37. def emit_int_mul_ovf(self, op, arglocs, regalloc):
  38. lr, lq, l1 = arglocs
  39. if l1.is_in_pool():
  40. self.mc.LG(r.SCRATCH, l1)
  41. l1 = r.SCRATCH
  42. elif l1.is_imm():
  43. self.mc.LGFI(r.SCRATCH, l1)
  44. l1 = r.SCRATCH
  45. else:
  46. # we are not allowed to modify l1 if it is not a scratch
  47. # register, thus copy it here!
  48. self.mc.LGR(r.SCRATCH, l1)
  49. l1 = r.SCRATCH
  50. mc = self.mc
  51. # check left neg
  52. jmp_lq_lt_0 = mc.get_relative_pos()
  53. mc.reserve_cond_jump() # CGIJ lq < 0 +-----------+
  54. jmp_l1_ge_0 = mc.get_relative_pos() # |
  55. mc.reserve_cond_jump() # CGIJ l1 >= 0 -----------|-> (both same sign)
  56. jmp_lq_pos_l1_neg = mc.get_relative_pos() # |
  57. mc.reserve_cond_jump(short=True) # BCR any -----|-> (xor negative)
  58. jmp_l1_neg_lq_neg = mc.get_relative_pos() # |
  59. mc.reserve_cond_jump() # <-----------------------+
  60. # CGIJ l1 < 0 -> (both same_sign)
  61. # (xor negative)
  62. label_xor_neg = mc.get_relative_pos()
  63. mc.LPGR(lq, lq)
  64. mc.LPGR(l1, l1)
  65. mc.MLGR(lr, l1)
  66. mc.LGHI(r.SCRATCH, l.imm(-1))
  67. mc.RISBG(r.SCRATCH, r.SCRATCH, l.imm(0), l.imm(0x80 | 0), l.imm(0))
  68. # is the value greater than 2**63 ? then an overflow occured
  69. jmp_xor_lq_overflow = mc.get_relative_pos()
  70. mc.reserve_cond_jump() # CLGRJ lq > 0x8000 ... 00 -> (label_overflow)
  71. jmp_xor_lr_overflow = mc.get_relative_pos()
  72. mc.reserve_cond_jump() # CLGIJ lr > 0 -> (label_overflow)
  73. mc.LCGR(lq, lq) # complement the value
  74. mc.XGR(r.SCRATCH, r.SCRATCH)
  75. mc.SPM(r.SCRATCH) # 0x80 ... 00 clears the condition code and program mask
  76. jmp_no_overflow_xor_neg = mc.get_relative_pos()
  77. mc.reserve_cond_jump(short=True)
  78. # both are positive/negative
  79. label_both_same_sign = mc.get_relative_pos()
  80. mc.LPGR(lq, lq)
  81. mc.LPGR(l1, l1)
  82. mc.MLGR(lr, l1)
  83. mc.LGHI(r.SCRATCH, l.imm(-1))
  84. # 0xff -> shift 0 -> 0xff set MSB on pos 0 to zero -> 7f
  85. mc.RISBG(r.SCRATCH, r.SCRATCH, l.imm(1), l.imm(0x80 | 63), l.imm(0))
  86. jmp_lq_overflow = mc.get_relative_pos()
  87. mc.reserve_cond_jump() # CLGRJ lq > 0x7fff ... ff -> (label_overflow)
  88. jmp_lr_overflow = mc.get_relative_pos()
  89. mc.reserve_cond_jump() # CLGIJ lr > 0 -> (label_overflow)
  90. jmp_neither_lqlr_overflow = mc.get_relative_pos()
  91. mc.reserve_cond_jump(short=True) # BRC any -> (label_end)
  92. # set overflow!
  93. label_overflow = mc.get_relative_pos()
  94. # set bit 34 & 35 -> indicates overflow
  95. mc.XGR(r.SCRATCH, r.SCRATCH)
  96. mc.OILH(r.SCRATCH, l.imm(0x3000)) # sets OF
  97. mc.SPM(r.SCRATCH)
  98. # no overflow happended
  99. label_end = mc.get_relative_pos()
  100. # patch patch patch!!!
  101. # jmp_lq_lt_0
  102. pos = jmp_lq_lt_0
  103. omc = OverwritingBuilder(self.mc, pos, 1)
  104. omc.CGIJ(lq, l.imm(0), c.LT, l.imm(jmp_l1_neg_lq_neg - pos))
  105. omc.overwrite()
  106. # jmp_l1_ge_0
  107. pos = jmp_l1_ge_0
  108. omc = OverwritingBuilder(self.mc, pos, 1)
  109. omc.CGIJ(l1, l.imm(0), c.GE, l.imm(label_both_same_sign - pos))
  110. omc.overwrite()
  111. # jmp_lq_pos_l1_neg
  112. pos = jmp_lq_pos_l1_neg
  113. omc = OverwritingBuilder(self.mc, pos, 1)
  114. omc.BRC(c.ANY, l.imm(label_xor_neg - pos))
  115. omc.overwrite()
  116. # jmp_l1_neg_lq_neg
  117. pos = jmp_l1_neg_lq_neg
  118. omc = OverwritingBuilder(self.mc, pos, 1)
  119. omc.CGIJ(l1, l.imm(0), c.LT, l.imm(label_both_same_sign - pos))
  120. omc.overwrite()
  121. # patch jmp_xor_lq_overflow
  122. pos = jmp_xor_lq_overflow
  123. omc = OverwritingBuilder(self.mc, pos, 1)
  124. omc.CLGRJ(lq, r.SCRATCH, c.GT, l.imm(label_overflow - pos))
  125. omc.overwrite()
  126. # patch jmp_xor_lr_overflow
  127. pos = jmp_xor_lr_overflow
  128. omc = OverwritingBuilder(self.mc, pos, 1)
  129. omc.CLGIJ(lr, l.imm(0), c.GT, l.imm(label_overflow - pos))
  130. omc.overwrite()
  131. # patch jmp_no_overflow_xor_neg
  132. omc = OverwritingBuilder(self.mc, jmp_no_overflow_xor_neg, 1)
  133. omc.BRC(c.ANY, l.imm(label_end - jmp_no_overflow_xor_neg))
  134. omc.overwrite()
  135. # patch jmp_lq_overflow
  136. omc = OverwritingBuilder(self.mc, jmp_lq_overflow, 1)
  137. omc.CLGRJ(lq, r.SCRATCH, c.GT, l.imm(label_overflow - jmp_lq_overflow))
  138. omc.overwrite()
  139. # patch jmp_lr_overflow
  140. omc = OverwritingBuilder(self.mc, jmp_lr_overflow, 1)
  141. omc.CLGIJ(lr, l.imm(0), c.GT, l.imm(label_overflow - jmp_lr_overflow))
  142. omc.overwrite()
  143. # patch jmp_neither_lqlr_overflow
  144. omc = OverwritingBuilder(self.mc, jmp_neither_lqlr_overflow, 1)
  145. omc.BRC(c.ANY, l.imm(label_end - jmp_neither_lqlr_overflow))
  146. omc.overwrite()
  147. def emit_uint_mul_high(self, op, arglocs, regalloc):
  148. r0, _, a1 = arglocs
  149. # _ carries the value, contents of r0 are ignored
  150. assert not r0.is_imm()
  151. assert not a1.is_imm()
  152. if a1.is_core_reg():
  153. self.mc.MLGR(r0, a1)
  154. else:
  155. self.mc.MLG(r0, a1)
  156. def emit_int_invert(self, op, arglocs, regalloc):
  157. l0, = arglocs
  158. assert not l0.is_imm()
  159. self.mc.LGHI(r.SCRATCH, l.imm(-1))
  160. self.mc.XGR(l0, r.SCRATCH)
  161. def emit_int_neg(self, op, arglocs, regalloc):
  162. l0, = arglocs
  163. self.mc.LCGR(l0, l0)
  164. def emit_int_signext(self, op, arglocs, regalloc):
  165. l0, = arglocs
  166. extend_from = op.getarg(1).getint()
  167. if extend_from == 1:
  168. self.mc.LGBR(l0, l0)
  169. elif extend_from == 2:
  170. self.mc.LGHR(l0, l0)
  171. elif extend_from == 4:
  172. self.mc.LGFR(l0, l0)
  173. else:
  174. raise AssertionError(extend_from)
  175. def emit_int_force_ge_zero(self, op, arglocs, resloc):
  176. l0, = arglocs
  177. off = self.mc.CGIJ_byte_count + self.mc.LGHI_byte_count
  178. self.mc.CGIJ(l0, l.imm(0), c.GE, l.imm(off))
  179. self.mc.LGHI(l0, l.imm(0))
  180. def emit_int_is_zero(self, op, arglocs, regalloc):
  181. l0, res = arglocs
  182. self.mc.CGHI(l0, l.imm(0))
  183. self.flush_cc(c.EQ, res)
  184. def emit_int_is_true(self, op, arglocs, regalloc):
  185. l0, res = arglocs
  186. self.mc.CGHI(l0, l.imm(0))
  187. self.flush_cc(c.NE, res)
  188. emit_int_and = gen_emit_rr_rp("NGR", "NG")
  189. emit_int_or = gen_emit_rr_rp("OGR", "OG")
  190. emit_int_xor = gen_emit_rr_rp("XGR", "XG")
  191. emit_int_rshift = gen_emit_shift("SRAG")
  192. emit_int_lshift = gen_emit_shift("SLLG")
  193. emit_uint_rshift = gen_emit_shift("SRLG")
  194. emit_int_le = gen_emit_cmp_op(c.LE)
  195. emit_int_lt = gen_emit_cmp_op(c.LT)
  196. emit_int_gt = gen_emit_cmp_op(c.GT)
  197. emit_int_ge = gen_emit_cmp_op(c.GE)
  198. emit_int_eq = gen_emit_cmp_op(c.EQ)
  199. emit_int_ne = gen_emit_cmp_op(c.NE)
  200. emit_ptr_eq = emit_int_eq
  201. emit_ptr_ne = emit_int_ne
  202. emit_instance_ptr_eq = emit_ptr_eq
  203. emit_instance_ptr_ne = emit_ptr_ne
  204. emit_uint_le = gen_emit_cmp_op(c.LE, signed=False)
  205. emit_uint_lt = gen_emit_cmp_op(c.LT, signed=False)
  206. emit_uint_gt = gen_emit_cmp_op(c.GT, signed=False)
  207. emit_uint_ge = gen_emit_cmp_op(c.GE, signed=False)
  208. class FloatOpAssembler(object):
  209. _mixin_ = True
  210. emit_float_add = gen_emit_rr_rp('ADBR', 'ADB')
  211. emit_float_sub = gen_emit_rr_rp('SDBR', 'SDB')
  212. emit_float_mul = gen_emit_rr_rp('MDBR', 'MDB')
  213. emit_float_truediv = gen_emit_rr_rp('DDBR', 'DDB')
  214. # Support for NaNs: S390X sets condition code to 0x3 (unordered)
  215. # whenever any operand is nan.
  216. # in the case float_le,float_ge the overflow bit is not set of
  217. # the initial condition!
  218. # e.g. guard_true(nan <= x): jumps 1100 inv => 0011, bit 3 set
  219. # e.g. guard_false(nan <= x): does not jump 1100, bit 3 not set
  220. # e.g. guard_true(nan >= nan): jumps 1010 inv => 0101, bit 3 set
  221. emit_float_lt = gen_emit_cmp_op(c.LT, fp=True)
  222. emit_float_le = gen_emit_cmp_op(c.FLE, fp=True)
  223. emit_float_eq = gen_emit_cmp_op(c.EQ, fp=True)
  224. emit_float_ne = gen_emit_cmp_op(c.NE, fp=True)
  225. emit_float_gt = gen_emit_cmp_op(c.GT, fp=True)
  226. emit_float_ge = gen_emit_cmp_op(c.FGE, fp=True)
  227. def emit_float_neg(self, op, arglocs, regalloc):
  228. l0, = arglocs
  229. self.mc.LCDBR(l0, l0)
  230. def emit_float_abs(self, op, arglocs, regalloc):
  231. l0, = arglocs
  232. self.mc.LPDBR(l0, l0)
  233. def emit_cast_float_to_int(self, op, arglocs, regalloc):
  234. f0, r0 = arglocs
  235. self.mc.CGDBR(r0, c.FP_TOWARDS_ZERO, f0)
  236. def emit_cast_int_to_float(self, op, arglocs, regalloc):
  237. r0, f0 = arglocs
  238. self.mc.CDGBR(f0, r0)
  239. def emit_convert_float_bytes_to_longlong(self, op, arglocs, regalloc):
  240. l0, res = arglocs
  241. self.mc.LGDR(res, l0)
  242. def emit_convert_longlong_bytes_to_float(self, op, arglocs, regalloc):
  243. l0, res = arglocs
  244. self.mc.LDGR(res, l0)
  245. class CallOpAssembler(object):
  246. _mixin_ = True
  247. def _emit_call(self, op, arglocs, is_call_release_gil=False):
  248. resloc = arglocs[0]
  249. func_index = 1 + is_call_release_gil
  250. adr = arglocs[func_index]
  251. arglist = arglocs[func_index+1:]
  252. cb = callbuilder.CallBuilder(self, adr, arglist, resloc, op.getdescr())
  253. descr = op.getdescr()
  254. assert isinstance(descr, CallDescr)
  255. cb.argtypes = descr.get_arg_types()
  256. cb.restype = descr.get_result_type()
  257. if is_call_release_gil:
  258. saveerrloc = arglocs[1]
  259. assert saveerrloc.is_imm()
  260. cb.emit_call_release_gil(saveerrloc.value)
  261. else:
  262. cb.emit()
  263. def _genop_call(self, op, arglocs, regalloc):
  264. oopspecindex = regalloc.get_oopspecindex(op)
  265. if oopspecindex == EffectInfo.OS_MATH_SQRT:
  266. return self._emit_math_sqrt(op, arglocs, regalloc)
  267. if oopspecindex == EffectInfo.OS_THREADLOCALREF_GET:
  268. return self._emit_threadlocalref_get(op, arglocs, regalloc)
  269. self._emit_call(op, arglocs)
  270. emit_call_i = _genop_call
  271. emit_call_r = _genop_call
  272. emit_call_f = _genop_call
  273. emit_call_n = _genop_call
  274. def _emit_threadlocalref_get(self, op, arglocs, regalloc):
  275. [resloc] = arglocs
  276. offset = op.getarg(1).getint() # getarg(0) == 'threadlocalref_get'
  277. calldescr = op.getdescr()
  278. size = calldescr.get_result_size()
  279. sign = calldescr.is_result_signed()
  280. #
  281. # This loads the stack location THREADLOCAL_OFS into a
  282. # register, and then read the word at the given offset.
  283. # It is only supported if 'translate_support_code' is
  284. # true; otherwise, the execute_token() was done with a
  285. # dummy value for the stack location THREADLOCAL_OFS
  286. #
  287. assert self.cpu.translate_support_code
  288. assert resloc.is_reg()
  289. assert check_imm_value(offset)
  290. self.mc.LG(resloc, l.addr(THREADLOCAL_ADDR_OFFSET, r.SP))
  291. self._memory_read(resloc, l.addr(offset, resloc), size, sign)
  292. def _emit_math_sqrt(self, op, arglocs, regalloc):
  293. l0, res = arglocs
  294. self.mc.SQDBR(res, l0)
  295. def _genop_call_may_force(self, op, arglocs, regalloc):
  296. self._store_force_index(self._find_nearby_operation(regalloc, +1))
  297. self._emit_call(op, arglocs)
  298. emit_call_may_force_i = _genop_call_may_force
  299. emit_call_may_force_r = _genop_call_may_force
  300. emit_call_may_force_f = _genop_call_may_force
  301. emit_call_may_force_n = _genop_call_may_force
  302. def _genop_call_release_gil(self, op, arglocs, regalloc):
  303. self._store_force_index(self._find_nearby_operation(regalloc, +1))
  304. self._emit_call(op, arglocs, is_call_release_gil=True)
  305. emit_call_release_gil_i = _genop_call_release_gil
  306. emit_call_release_gil_f = _genop_call_release_gil
  307. emit_call_release_gil_n = _genop_call_release_gil
  308. def _store_force_index(self, guard_op):
  309. assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
  310. guard_op.getopnum() == rop.GUARD_NOT_FORCED_2)
  311. faildescr = guard_op.getdescr()
  312. ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
  313. #
  314. faildescrindex = self.get_gcref_from_faildescr(faildescr)
  315. self.load_gcref_into(r.SCRATCH, faildescrindex)
  316. self.mc.STG(r.SCRATCH, l.addr(ofs, r.SPP))
  317. def _find_nearby_operation(self, regalloc, delta):
  318. return regalloc.operations[regalloc.rm.position + delta]
  319. _COND_CALL_SAVE_REGS = [r.r11, r.r2, r.r3, r.r4, r.r5]
  320. def emit_cond_call(self, op, arglocs, regalloc):
  321. fcond = self.guard_success_cc
  322. self.guard_success_cc = c.cond_none
  323. assert fcond.value != c.cond_none.value
  324. fcond = c.negate(fcond)
  325. jmp_adr = self.mc.get_relative_pos()
  326. self.mc.reserve_cond_jump() # patched later to a relative branch
  327. # save away r2, r3, r4, r5, r11 into the jitframe
  328. should_be_saved = [
  329. reg for reg in self._regalloc.rm.reg_bindings.itervalues()
  330. if reg in self._COND_CALL_SAVE_REGS]
  331. self._push_core_regs_to_jitframe(self.mc, should_be_saved)
  332. self.push_gcmap(self.mc, regalloc.get_gcmap())
  333. #
  334. # load the 0-to-4 arguments into these registers, with the address of
  335. # the function to call into r11
  336. remap_frame_layout(self, arglocs,
  337. [r.r11, r.r2, r.r3, r.r4, r.r5][:len(arglocs)],
  338. r.SCRATCH)
  339. #
  340. # figure out which variant of cond_call_slowpath to call, and call it
  341. callee_only = False
  342. floats = False
  343. for reg in regalloc.rm.reg_bindings.values():
  344. if reg not in regalloc.rm.save_around_call_regs:
  345. break
  346. else:
  347. callee_only = True
  348. if regalloc.fprm.reg_bindings:
  349. floats = True
  350. cond_call_adr = self.cond_call_slowpath[floats * 2 + callee_only]
  351. self.mc.load_imm(r.r14, cond_call_adr)
  352. self.mc.BASR(r.r14, r.r14)
  353. # restoring the registers saved above, and doing pop_gcmap(), is left
  354. # to the cond_call_slowpath helper. We never have any result value.
  355. relative_target = self.mc.currpos() - jmp_adr
  356. pmc = OverwritingBuilder(self.mc, jmp_adr, 1)
  357. pmc.BRCL(fcond, l.imm(relative_target))
  358. pmc.overwrite()
  359. # might be overridden again to skip over the following
  360. # guard_no_exception too
  361. self.previous_cond_call_jcond = jmp_adr, fcond
  362. class AllocOpAssembler(object):
  363. _mixin_ = True
  364. def emit_check_memory_error(self, op, arglocs, regalloc):
  365. self.propagate_memoryerror_if_reg_is_null(arglocs[0])
  366. def emit_call_malloc_nursery(self, op, arglocs, regalloc):
  367. # registers r.RES and r.RSZ are allocated for this call
  368. size_box = op.getarg(0)
  369. assert isinstance(size_box, ConstInt)
  370. size = size_box.getint()
  371. gc_ll_descr = self.cpu.gc_ll_descr
  372. gcmap = regalloc.get_gcmap([r.RES, r.RSZ])
  373. self.malloc_cond(
  374. gc_ll_descr.get_nursery_free_addr(),
  375. gc_ll_descr.get_nursery_top_addr(),
  376. size, gcmap)
  377. def emit_call_malloc_nursery_varsize_frame(self, op, arglocs, regalloc):
  378. # registers r.RES and r.RSZ are allocated for this call
  379. [sizeloc] = arglocs
  380. gc_ll_descr = self.cpu.gc_ll_descr
  381. gcmap = regalloc.get_gcmap([r.RES, r.RSZ])
  382. self.malloc_cond_varsize_frame(
  383. gc_ll_descr.get_nursery_free_addr(),
  384. gc_ll_descr.get_nursery_top_addr(),
  385. sizeloc, gcmap)
  386. def emit_call_malloc_nursery_varsize(self, op, arglocs, regalloc):
  387. # registers r.RES and r.RSZ are allocated for this call
  388. gc_ll_descr = self.cpu.gc_ll_descr
  389. if not hasattr(gc_ll_descr, 'max_size_of_young_obj'):
  390. raise Exception("unreachable code")
  391. # for boehm, this function should never be called
  392. [lengthloc] = arglocs
  393. arraydescr = op.getdescr()
  394. itemsize = op.getarg(1).getint()
  395. maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) // itemsize
  396. gcmap = regalloc.get_gcmap([r.RES, r.RSZ])
  397. self.malloc_cond_varsize(
  398. op.getarg(0).getint(),
  399. gc_ll_descr.get_nursery_free_addr(),
  400. gc_ll_descr.get_nursery_top_addr(),
  401. lengthloc, itemsize, maxlength, gcmap, arraydescr)
  402. def emit_debug_merge_point(self, op, arglocs, regalloc):
  403. pass
  404. emit_jit_debug = emit_debug_merge_point
  405. emit_keepalive = emit_debug_merge_point
  406. def emit_enter_portal_frame(self, op, arglocs, regalloc):
  407. self.enter_portal_frame(op)
  408. def emit_leave_portal_frame(self, op, arglocs, regalloc):
  409. self.leave_portal_frame(op)
  410. def _write_barrier_fastpath(self, mc, descr, arglocs, regalloc, array=False,
  411. is_frame=False):
  412. # Write code equivalent to write_barrier() in the GC: it checks
  413. # a flag in the object at arglocs[0], and if set, it calls a
  414. # helper piece of assembler. The latter saves registers as needed
  415. # and call the function remember_young_pointer() from the GC.
  416. if we_are_translated():
  417. cls = self.cpu.gc_ll_descr.has_write_barrier_class()
  418. assert cls is not None and isinstance(descr, cls)
  419. #
  420. card_marking_mask = 0
  421. mask = descr.jit_wb_if_flag_singlebyte
  422. if array and descr.jit_wb_cards_set != 0:
  423. # assumptions the rest of the function depends on:
  424. assert (descr.jit_wb_cards_set_byteofs ==
  425. descr.jit_wb_if_flag_byteofs)
  426. card_marking_mask = descr.jit_wb_cards_set_singlebyte
  427. #
  428. loc_base = arglocs[0]
  429. assert loc_base.is_reg()
  430. if is_frame:
  431. assert loc_base is r.SPP
  432. assert check_imm_value(descr.jit_wb_if_flag_byteofs)
  433. mc.LLGC(r.SCRATCH2, l.addr(descr.jit_wb_if_flag_byteofs, loc_base))
  434. mc.LGR(r.SCRATCH, r.SCRATCH2)
  435. mc.NILL(r.SCRATCH, l.imm(mask & 0xFF))
  436. jz_location = mc.get_relative_pos()
  437. mc.reserve_cond_jump(short=True) # patched later with 'EQ'
  438. # for cond_call_gc_wb_array, also add another fast path:
  439. # if GCFLAG_CARDS_SET, then we can just set one bit and be done
  440. if card_marking_mask:
  441. # GCFLAG_CARDS_SET is in the same byte, loaded in r2 already
  442. mc.LGR(r.SCRATCH, r.SCRATCH2)
  443. mc.NILL(r.SCRATCH, l.imm(card_marking_mask & 0xFF))
  444. js_location = mc.get_relative_pos()
  445. mc.reserve_cond_jump() # patched later with 'NE'
  446. else:
  447. js_location = 0
  448. # Write only a CALL to the helper prepared in advance, passing it as
  449. # argument the address of the structure we are writing into
  450. # (the first argument to COND_CALL_GC_WB).
  451. helper_num = (card_marking_mask != 0)
  452. if is_frame:
  453. helper_num = 4
  454. elif regalloc.fprm.reg_bindings:
  455. helper_num += 2
  456. if self.wb_slowpath[helper_num] == 0: # tests only
  457. assert not we_are_translated()
  458. assert not is_frame
  459. self.cpu.gc_ll_descr.write_barrier_descr = descr
  460. self._build_wb_slowpath(card_marking_mask != 0,
  461. bool(regalloc.fprm.reg_bindings))
  462. assert self.wb_slowpath[helper_num] != 0
  463. #
  464. if not is_frame:
  465. mc.LGR(r.r0, loc_base) # unusual argument location
  466. mc.load_imm(r.r14, self.wb_slowpath[helper_num])
  467. mc.BASR(r.r14, r.r14)
  468. if card_marking_mask:
  469. # The helper ends again with a check of the flag in the object.
  470. # So here, we can simply write again a beq, which will be
  471. # taken if GCFLAG_CARDS_SET is still not set.
  472. jns_location = mc.get_relative_pos()
  473. mc.reserve_cond_jump(short=True)
  474. #
  475. # patch the 'NE' above
  476. currpos = mc.currpos()
  477. pmc = OverwritingBuilder(mc, js_location, 1)
  478. pmc.BRCL(c.NE, l.imm(currpos - js_location))
  479. pmc.overwrite()
  480. #
  481. # case GCFLAG_CARDS_SET: emit a few instructions to do
  482. # directly the card flag setting
  483. loc_index = arglocs[1]
  484. if loc_index.is_reg():
  485. tmp_loc = arglocs[2]
  486. n = descr.jit_wb_card_page_shift
  487. assert tmp_loc is not loc_index
  488. # compute in tmp_loc the byte offset:
  489. # tmp_loc = ~(index >> (card_page_shift + 3))
  490. mc.SRLG(tmp_loc, loc_index, l.addr(n+3))
  491. # invert the bits of tmp_loc
  492. # compute in SCRATCH the index of the bit inside the byte:
  493. # scratch = (index >> card_page_shift) & 7
  494. # 0x80 sets zero flag. will store 0 into all not selected bits
  495. mc.RISBG(r.SCRATCH, loc_index, l.imm(61), l.imm(0x80 | 63), l.imm(64-n))
  496. mc.LGHI(r.SCRATCH2, l.imm(-1))
  497. mc.XGR(tmp_loc, r.SCRATCH2)
  498. # set SCRATCH2 to 1 << r1
  499. mc.LGHI(r.SCRATCH2, l.imm(1))
  500. mc.SLLG(r.SCRATCH2, r.SCRATCH2, l.addr(0,r.SCRATCH))
  501. # set this bit inside the byte of interest
  502. addr = l.addr(0, loc_base, tmp_loc)
  503. mc.LLGC(r.SCRATCH, addr)
  504. mc.OGRK(r.SCRATCH, r.SCRATCH, r.SCRATCH2)
  505. mc.STCY(r.SCRATCH, addr)
  506. # done
  507. else:
  508. byte_index = loc_index.value >> descr.jit_wb_card_page_shift
  509. byte_ofs = ~(byte_index >> 3)
  510. byte_val = 1 << (byte_index & 7)
  511. assert check_imm_value(byte_ofs, lower_bound=-2**19, upper_bound=2**19-1)
  512. addr = l.addr(byte_ofs, loc_base)
  513. mc.LLGC(r.SCRATCH, addr)
  514. mc.OILL(r.SCRATCH, l.imm(byte_val))
  515. mc.STCY(r.SCRATCH, addr)
  516. #
  517. # patch the beq just above
  518. currpos = mc.currpos()
  519. pmc = OverwritingBuilder(mc, jns_location, 1)
  520. pmc.BRC(c.EQ, l.imm(currpos - jns_location))
  521. pmc.overwrite()
  522. # patch the JZ above
  523. currpos = mc.currpos()
  524. pmc = OverwritingBuilder(mc, jz_location, 1)
  525. pmc.BRC(c.EQ, l.imm(currpos - jz_location))
  526. pmc.overwrite()
  527. def emit_cond_call_gc_wb(self, op, arglocs, regalloc):
  528. self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs, regalloc)
  529. def emit_cond_call_gc_wb_array(self, op, arglocs, regalloc):
  530. self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs, regalloc,
  531. array=True)
  532. class GuardOpAssembler(object):
  533. _mixin_ = True
  534. def _emit_guard(self, op, arglocs, is_guard_not_invalidated=False):
  535. if is_guard_not_invalidated:
  536. fcond = c.cond_none
  537. else:
  538. fcond = self.guard_success_cc
  539. self.guard_success_cc = c.cond_none
  540. assert fcond.value != c.cond_none.value
  541. fcond = c.negate(fcond)
  542. token = self.build_guard_token(op, arglocs[0].value, arglocs[1:], fcond)
  543. token.pos_jump_offset = self.mc.currpos()
  544. assert token.guard_not_invalidated() == is_guard_not_invalidated
  545. if not is_guard_not_invalidated:
  546. self.mc.reserve_guard_branch() # has to be patched later on
  547. self.pending_guard_tokens.append(token)
  548. def build_guard_token(self, op, frame_depth, arglocs, fcond):
  549. descr = op.getdescr()
  550. gcmap = allocate_gcmap(self, frame_depth, r.JITFRAME_FIXED_SIZE)
  551. faildescrindex = self.get_gcref_from_faildescr(descr)
  552. token = ZARCHGuardToken(self.cpu, gcmap, descr, op.getfailargs(),
  553. arglocs, op.getopnum(), frame_depth,
  554. faildescrindex, fcond)
  555. #token._pool_offset = self.pool.get_descr_offset(descr)
  556. return token
  557. def emit_load_from_gc_table(self, op, arglocs, regalloc):
  558. resloc, = arglocs
  559. index = op.getarg(0).getint()
  560. assert resloc.is_reg()
  561. self.load_gcref_into(resloc, index)
  562. def emit_guard_true(self, op, arglocs, regalloc):
  563. self._emit_guard(op, arglocs)
  564. def emit_guard_false(self, op, arglocs, regalloc):
  565. self.guard_success_cc = c.negate(self.guard_success_cc)
  566. self._emit_guard(op, arglocs)
  567. def emit_guard_overflow(self, op, arglocs, regalloc):
  568. self.guard_success_cc = c.OF
  569. self._emit_guard(op, arglocs)
  570. def emit_guard_no_overflow(self, op, arglocs, regalloc):
  571. self.guard_success_cc = c.NO
  572. self._emit_guard(op, arglocs)
  573. def emit_guard_value(self, op, arglocs, regalloc):
  574. l0 = arglocs[0]
  575. l1 = arglocs[1]
  576. failargs = arglocs[2:]
  577. if l0.is_reg():
  578. if l1.is_imm():
  579. self.mc.cmp_op(l0, l1, imm=True)
  580. else:
  581. self.mc.cmp_op(l0, l1)
  582. elif l0.is_fp_reg():
  583. assert l1.is_fp_reg()
  584. self.mc.cmp_op(l0, l1, fp=True)
  585. self.guard_success_cc = c.EQ
  586. self._emit_guard(op, failargs)
  587. emit_guard_nonnull = emit_guard_true
  588. emit_guard_isnull = emit_guard_false
  589. def emit_guard_class(self, op, arglocs, regalloc):
  590. self._cmp_guard_class(op, arglocs, regalloc)
  591. self.guard_success_cc = c.EQ
  592. self._emit_guard(op, arglocs[2:])
  593. def emit_guard_nonnull_class(self, op, arglocs, regalloc):
  594. self.mc.cmp_op(arglocs[0], l.imm(1), imm=True, signed=False)
  595. patch_pos = self.mc.currpos()
  596. self.mc.reserve_cond_jump(short=True)
  597. self._cmp_guard_class(op, arglocs, regalloc)
  598. #self.mc.CGRT(r.SCRATCH, r.SCRATCH2, c.NE)
  599. pmc = OverwritingBuilder(self.mc, patch_pos, 1)
  600. pmc.BRC(c.LT, l.imm(self.mc.currpos() - patch_pos))
  601. pmc.overwrite()
  602. self.guard_success_cc = c.EQ
  603. self._emit_guard(op, arglocs[2:])
  604. def _cmp_guard_class(self, op, locs, regalloc):
  605. offset = self.cpu.vtable_offset
  606. loc_ptr = locs[0]
  607. loc_classptr = locs[1]
  608. if offset is not None:
  609. # could be one instruction shorter, but don't care because
  610. # it's not this case that is commonly translated
  611. self.mc.LG(r.SCRATCH, l.addr(offset, loc_ptr))
  612. self.mc.load_imm(r.SCRATCH2, locs[1].value)
  613. self.mc.cmp_op(r.SCRATCH, r.SCRATCH2)
  614. else:
  615. classptr = loc_classptr.value
  616. expected_typeid = (self.cpu.gc_ll_descr
  617. .get_typeid_from_classptr_if_gcremovetypeptr(classptr))
  618. self._cmp_guard_gc_type(loc_ptr, expected_typeid)
  619. def _read_typeid(self, targetreg, loc_ptr):
  620. # Note that the typeid half-word is at offset 0 on a little-endian
  621. # machine; it is at offset 2 or 4 on a big-endian machine.
  622. assert self.cpu.supports_guard_gc_type
  623. self.mc.LGF(targetreg, l.addr(4, loc_ptr))
  624. def _cmp_guard_gc_type(self, loc_ptr, expected_typeid):
  625. self._read_typeid(r.SCRATCH2, loc_ptr)
  626. assert 0 <= expected_typeid <= 0x7fffffff # 4 bytes are always enough
  627. # we can handle 4 byte compare immediate
  628. self.mc.cmp_op(r.SCRATCH2, l.imm(expected_typeid),
  629. imm=True, signed=False)
  630. def emit_guard_gc_type(self, op, arglocs, regalloc):
  631. self._cmp_guard_gc_type(arglocs[0], arglocs[1].value)
  632. self.guard_success_cc = c.EQ
  633. self._emit_guard(op, arglocs[2:])
  634. def emit_guard_is_object(self, op, arglocs, regalloc):
  635. assert self.cpu.supports_guard_gc_type
  636. loc_object = arglocs[0]
  637. # idea: read the typeid, fetch one byte of the field 'infobits' from
  638. # the big typeinfo table, and check the flag 'T_IS_RPYTHON_INSTANCE'.
  639. base_type_info, shift_by, sizeof_ti = (
  640. self.cpu.gc_ll_descr.get_translated_info_for_typeinfo())
  641. infobits_offset, IS_OBJECT_FLAG = (
  642. self.cpu.gc_ll_descr.get_translated_info_for_guard_is_object())
  643. self._read_typeid(r.SCRATCH2, loc_object)
  644. self.mc.load_imm(r.SCRATCH, base_type_info + infobits_offset)
  645. assert shift_by == 0
  646. self.mc.AGR(r.SCRATCH, r.SCRATCH2)
  647. self.mc.LLGC(r.SCRATCH2, l.addr(0, r.SCRATCH)) # cannot use r.r0 as index reg
  648. self.mc.NILL(r.SCRATCH2, l.imm(IS_OBJECT_FLAG & 0xff))
  649. self.guard_success_cc = c.NE
  650. self._emit_guard(op, arglocs[1:])
  651. def emit_guard_subclass(self, op, arglocs, regalloc):
  652. assert self.cpu.supports_guard_gc_type
  653. loc_object = arglocs[0]
  654. loc_check_against_class = arglocs[1]
  655. offset = self.cpu.vtable_offset
  656. offset2 = self.cpu.subclassrange_min_offset
  657. if offset is not None:
  658. # read this field to get the vtable pointer
  659. self.mc.LG(r.SCRATCH, l.addr(offset, loc_object))
  660. # read the vtable's subclassrange_min field
  661. assert check_imm_value(offset2)
  662. self.mc.load(r.SCRATCH2, r.SCRATCH, offset2)
  663. else:
  664. # read the typeid
  665. self._read_typeid(r.SCRATCH, loc_object)
  666. # read the vtable's subclassrange_min field, as a single
  667. # step with the correct offset
  668. base_type_info, shift_by, sizeof_ti = (
  669. self.cpu.gc_ll_descr.get_translated_info_for_typeinfo())
  670. self.mc.load_imm(r.SCRATCH2, base_type_info + sizeof_ti + offset2)
  671. assert shift_by == 0
  672. # add index manually
  673. # we cannot use r0 in l.addr(...)
  674. self.mc.AGR(r.SCRATCH, r.SCRATCH2)
  675. self.mc.load(r.SCRATCH2, r.SCRATCH, 0)
  676. # get the two bounds to check against
  677. vtable_ptr = loc_check_against_class.getint()
  678. vtable_ptr = rffi.cast(rclass.CLASSTYPE, vtable_ptr)
  679. check_min = vtable_ptr.subclassrange_min
  680. check_max = vtable_ptr.subclassrange_max
  681. assert check_max > check_min
  682. check_diff = check_max - check_min - 1
  683. # right now, a full PyPy uses less than 6000 numbers,
  684. # so we'll assert here that it always fit inside 15 bits
  685. assert 0 <= check_min <= 0x7fff
  686. assert 0 <= check_diff <= 0xffff
  687. # check by doing the unsigned comparison (tmp - min) < (max - min)
  688. self.mc.AGHI(r.SCRATCH2, l.imm(-check_min))
  689. self.mc.cmp_op(r.SCRATCH2, l.imm(check_diff), imm=True, signed=False)
  690. # the guard passes if we get a result of "below or equal"
  691. self.guard_success_cc = c.LE
  692. self._emit_guard(op, arglocs[2:])
  693. def emit_guard_not_invalidated(self, op, arglocs, regalloc):
  694. self._emit_guard(op, arglocs, is_guard_not_invalidated=True)
  695. def emit_guard_not_forced(self, op, arglocs, regalloc):
  696. ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
  697. self.mc.LG(r.SCRATCH, l.addr(ofs, r.SPP))
  698. self.mc.cmp_op(r.SCRATCH, l.imm(0), imm=True)
  699. self.guard_success_cc = c.EQ
  700. self._emit_guard(op, arglocs)
  701. def emit_guard_not_forced_2(self, op, arglocs, regalloc):
  702. guard_token = self.build_guard_token(op, arglocs[0].value, arglocs[1:],
  703. c.cond_none)
  704. self._finish_gcmap = guard_token.gcmap
  705. self._store_force_index(op)
  706. self.store_info_on_descr(0, guard_token)
  707. def emit_guard_exception(self, op, arglocs, regalloc):
  708. loc, resloc = arglocs[:2]
  709. failargs = arglocs[2:]
  710. mc = self.mc
  711. mc.load_imm(r.SCRATCH, self.cpu.pos_exc_value())
  712. diff = self.cpu.pos_exception() - self.cpu.pos_exc_value()
  713. assert check_imm_value(diff)
  714. mc.LG(r.SCRATCH2, l.addr(diff, r.SCRATCH))
  715. mc.cmp_op(r.SCRATCH2, loc)
  716. self.guard_success_cc = c.EQ
  717. self._emit_guard(op, failargs)
  718. if resloc:
  719. mc.load(resloc, r.SCRATCH, 0)
  720. mc.LGHI(r.SCRATCH2, l.imm(0))
  721. mc.STG(r.SCRATCH2, l.addr(0, r.SCRATCH))
  722. mc.STG(r.SCRATCH2, l.addr(diff, r.SCRATCH))
  723. def emit_save_exc_class(self, op, arglocs, regalloc):
  724. [resloc] = arglocs
  725. diff = self.mc.load_imm_plus(r.SCRATCH, self.cpu.pos_exception())
  726. self.mc.load(resloc, r.SCRATCH, diff)
  727. def emit_save_exception(self, op, arglocs, regalloc):
  728. [resloc] = arglocs
  729. self._store_and_reset_exception(self.mc, resloc)
  730. def emit_restore_exception(self, op, arglocs, regalloc):
  731. self._restore_exception(self.mc, arglocs[1], arglocs[0])
  732. def emit_guard_no_exception(self, op, arglocs, regalloc):
  733. self.mc.load_imm(r.SCRATCH, self.cpu.pos_exception())
  734. self.mc.LG(r.SCRATCH2, l.addr(0,r.SCRATCH))
  735. self.mc.cmp_op(r.SCRATCH2, l.imm(0), imm=True)
  736. self.guard_success_cc = c.EQ
  737. self._emit_guard(op, arglocs)
  738. # If the previous operation was a COND_CALL, overwrite its conditional
  739. # jump to jump over this GUARD_NO_EXCEPTION as well, if we can
  740. if self._find_nearby_operation(regalloc,-1).getopnum() == rop.COND_CALL:
  741. jmp_adr, fcond = self.previous_cond_call_jcond
  742. relative_target = self.mc.currpos() - jmp_adr
  743. pmc = OverwritingBuilder(self.mc, jmp_adr, 1)
  744. pmc.BRCL(fcond, l.imm(relative_target))
  745. pmc.overwrite()
  746. class MemoryOpAssembler(object):
  747. _mixin_ = True
  748. def _memory_read(self, result_loc, source_loc, size, sign):
  749. # res, base_loc, ofs, size and signed are all locations
  750. if size == 8:
  751. if result_loc.is_fp_reg():
  752. self.mc.LDY(result_loc, source_loc)
  753. else:
  754. self.mc.LG(result_loc, source_loc)
  755. elif size == 4:
  756. if sign:
  757. self.mc.LGF(result_loc, source_loc)
  758. else:
  759. self.mc.LLGF(result_loc, source_loc)
  760. elif size == 2:
  761. if sign:
  762. self.mc.LGH(result_loc, source_loc)
  763. else:
  764. self.mc.LLGH(result_loc, source_loc)
  765. elif size == 1:
  766. if sign:
  767. self.mc.LGB(result_loc, source_loc)
  768. else:
  769. self.mc.LLGC(result_loc, source_loc)
  770. else:
  771. assert 0, "size not supported"
  772. def _memory_store(self, value_loc, addr_loc, size):
  773. if size.value == 8:
  774. if value_loc.is_fp_reg():
  775. self.mc.STDY(value_loc, addr_loc)
  776. else:
  777. self.mc.STG(value_loc, addr_loc)
  778. elif size.value == 4:
  779. self.mc.STY(value_loc, addr_loc)
  780. elif size.value == 2:
  781. self.mc.STHY(value_loc, addr_loc)
  782. elif size.value == 1:
  783. self.mc.STCY(value_loc, addr_loc)
  784. else:
  785. assert 0, "size not supported"
  786. def _emit_gc_load(self, op, arglocs, regalloc):
  787. result_loc, base_loc, index_loc, size_loc, sign_loc = arglocs
  788. addr_loc = self._load_address(base_loc, index_loc, l.imm0)
  789. self._memory_read(result_loc, addr_loc, size_loc.value, sign_loc.value)
  790. emit_gc_load_i = _emit_gc_load
  791. emit_gc_load_f = _emit_gc_load
  792. emit_gc_load_r = _emit_gc_load
  793. def _emit_gc_load_indexed(self, op, arglocs, regalloc):
  794. result_loc, base_loc, index_loc, offset_loc, size_loc, sign_loc=arglocs
  795. addr_loc = self._load_address(base_loc, index_loc, offset_loc)
  796. self._memory_read(result_loc, addr_loc, size_loc.value, sign_loc.value)
  797. emit_gc_load_indexed_i = _emit_gc_load_indexed
  798. emit_gc_load_indexed_f = _emit_gc_load_indexed
  799. emit_gc_load_indexed_r = _emit_gc_load_indexed
  800. def emit_gc_store(self, op, arglocs, regalloc):
  801. (base_loc, index_loc, value_loc, size_loc) = arglocs
  802. addr_loc = self._load_address(base_loc, index_loc, l.imm0)
  803. self._memory_store(value_loc, addr_loc, size_loc)
  804. def emit_gc_store_indexed(self, op, arglocs, regalloc):
  805. (base_loc, index_loc, value_loc, offset_loc, size_loc) = arglocs
  806. addr_loc = self._load_address(base_loc, index_loc, offset_loc)
  807. self._memory_store(value_loc, addr_loc, size_loc)
  808. def _load_address(self, base_loc, index_loc, offset_imm):
  809. assert offset_imm.is_imm()
  810. offset = offset_imm.value
  811. if index_loc.is_imm():
  812. offset = index_loc.value + offset
  813. if self._mem_offset_supported(offset):
  814. addr_loc = l.addr(offset, base_loc)
  815. else:
  816. self.mc.load_imm(r.SCRATCH, offset)
  817. addr_loc = l.addr(0, base_loc, r.SCRATCH)
  818. else:
  819. assert self._mem_offset_supported(offset)
  820. addr_loc = l.addr(offset, base_loc, index_loc)
  821. return addr_loc
  822. def _mem_offset_supported(self, value):
  823. return -2**19 <= value < 2**19
  824. def emit_copystrcontent(self, op, arglocs, regalloc):
  825. self._emit_copycontent(arglocs, is_unicode=False)
  826. def emit_copyunicodecontent(self, op, arglocs, regalloc):
  827. self._emit_copycontent(arglocs, is_unicode=True)
  828. def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
  829. if src_ofs.is_imm():
  830. value = src_ofs.value << scale
  831. if check_imm_value(value):
  832. self.mc.AGHIK(dst, src_ptr, l.imm(value))
  833. else:
  834. # it is fine to use r1 here, because it will
  835. # only hold a value before invoking the memory copy
  836. self.mc.load_imm(r.SCRATCH, value)
  837. self.mc.AGRK(dst, src_ptr, r.SCRATCH)
  838. elif scale == 0:
  839. self.mc.AGRK(dst, src_ptr, src_ofs)
  840. else:
  841. self.mc.SLLG(r.SCRATCH, src_ofs, l.addr(scale))
  842. self.mc.AGRK(dst, src_ptr, r.SCRATCH)
  843. def _emit_copycontent(self, arglocs, is_unicode):
  844. [src_ptr_loc, dst_ptr_loc,
  845. src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
  846. if is_unicode:
  847. basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
  848. self.cpu.translate_support_code)
  849. if itemsize == 2: scale = 1
  850. elif itemsize == 4: scale = 2
  851. else: raise AssertionError
  852. else:
  853. basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
  854. self.cpu.translate_support_code)
  855. assert itemsize == 1
  856. basesize -= 1 # for the extra null character
  857. scale = 0
  858. # src and src_len are tmp registers
  859. src = src_ptr_loc
  860. src_len = r.odd_reg(src)
  861. dst = r.r0
  862. dst_len = r.r1
  863. self._emit_load_for_copycontent(src, src_ptr_loc, src_ofs_loc, scale)
  864. self._emit_load_for_copycontent(dst, dst_ptr_loc, dst_ofs_loc, scale)
  865. if length_loc.is_imm():
  866. length = length_loc.getint()
  867. self.mc.load_imm(dst_len, length << scale)
  868. else:
  869. if scale > 0:
  870. self.mc.SLLG(dst_len, length_loc, l.addr(scale))
  871. else:
  872. self.mc.LGR(dst_len, length_loc)
  873. # ensure that src_len is as long as dst_len, otherwise
  874. # padding bytes are written to dst
  875. self.mc.LGR(src_len, dst_len)
  876. self.mc.AGHI(src, l.imm(basesize))
  877. self.mc.AGHI(dst, l.imm(basesize))
  878. # s390x has memset directly as a hardware instruction!!
  879. # 0xB8 means we might reference dst later
  880. self.mc.MVCLE(dst, src, l.addr(0xB8))
  881. # NOTE this instruction can (determined by the cpu), just
  882. # quit the movement any time, thus it is looped until all bytes
  883. # are copied!
  884. self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
  885. def emit_zero_array(self, op, arglocs, regalloc):
  886. base_loc, startindex_loc, length_loc, \
  887. ofs_loc, itemsize_loc = arglocs
  888. if ofs_loc.is_imm():
  889. assert check_imm_value(ofs_loc.value)
  890. self.mc.AGHI(base_loc, ofs_loc)
  891. else:
  892. self.mc.AGR(base_loc, ofs_loc)
  893. if startindex_loc.is_imm():
  894. assert check_imm_value(startindex_loc.value)
  895. self.mc.AGHI(base_loc, startindex_loc)
  896. else:
  897. self.mc.AGR(base_loc, startindex_loc)
  898. assert not length_loc.is_imm()
  899. # contents of r0 do not matter because r1 is zero, so
  900. # no copying takes place
  901. self.mc.XGR(r.r1, r.r1)
  902. assert base_loc.is_even()
  903. assert length_loc.value == base_loc.value + 1
  904. # s390x has memset directly as a hardware instruction!!
  905. # it needs 5 registers allocated
  906. # dst = rX, dst len = rX+1 (ensured by the regalloc)
  907. # src = r0, src len = r1
  908. self.mc.MVCLE(base_loc, r.r0, l.addr(0))
  909. # NOTE this instruction can (determined by the cpu), just
  910. # quit the movement any time, thus it is looped until all bytes
  911. # are copied!
  912. self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
  913. class ForceOpAssembler(object):
  914. _mixin_ = True
  915. def emit_force_token(self, op, arglocs, regalloc):
  916. res_loc = arglocs[0]
  917. self.mc.LGR(res_loc, r.SPP)
  918. def _genop_call_assembler(self, op, arglocs, regalloc):
  919. if len(arglocs) == 3:
  920. [result_loc, argloc, vloc] = arglocs
  921. else:
  922. [result_loc, argloc] = arglocs
  923. vloc = imm(0)
  924. self._store_force_index(self._find_nearby_operation(regalloc, +1))
  925. # 'result_loc' is either r2, f0 or None
  926. self.call_assembler(op, argloc, vloc, result_loc, r.r2)
  927. self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - self.mc.get_relative_pos()))
  928. emit_call_assembler_i = _genop_call_assembler
  929. emit_call_assembler_r = _genop_call_assembler
  930. emit_call_assembler_f = _genop_call_assembler
  931. emit_call_assembler_n = _genop_call_assembler
  932. imm = staticmethod(imm) # for call_assembler()
  933. def _call_assembler_emit_call(self, addr, argloc, _):
  934. self.regalloc_mov(argloc, r.r2)
  935. self.mc.LG(r.r3, l.addr(THREADLOCAL_ADDR_OFFSET, r.SP))
  936. cb = callbuilder.CallBuilder(self, addr, [r.r2, r.r3], r.r2, None)
  937. cb.emit()
  938. def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc):
  939. cb = callbuilder.CallBuilder(self, addr, arglocs, result_loc, None)
  940. cb.emit()
  941. def _call_assembler_check_descr(self, value, tmploc):
  942. ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
  943. self.mc.LG(r.SCRATCH, l.addr(ofs, r.r2))
  944. if check_imm_value(value):
  945. self.mc.cmp_op(r.SCRATCH, l.imm(value), imm=True)
  946. else:
  947. self.mc.load_imm(r.SCRATCH2, value)
  948. self.mc.cmp_op(r.SCRATCH, r.SCRATCH2, imm=False)
  949. jump_if_eq = self.mc.currpos()
  950. self.mc.trap() # patched later
  951. self.mc.write('\x00' * 4) # patched later
  952. return jump_if_eq
  953. def _call_assembler_patch_je(self, result_loc, je_location):
  954. jump_to_done = self.mc.currpos()
  955. self.mc.trap() # patched later
  956. self.mc.write('\x00' * 4) # patched later
  957. #
  958. currpos = self.mc.currpos()
  959. pmc = OverwritingBuilder(self.mc, je_location, 1)
  960. pmc.BRCL(c.EQ, l.imm(currpos - je_location))
  961. pmc.overwrite()
  962. #
  963. return jump_to_done
  964. def _call_assembler_load_result(self, op, result_loc):
  965. if op.type != VOID:
  966. # load the return value from the dead frame's value index 0
  967. kind = op.type
  968. descr = self.cpu.getarraydescr_for_frame(kind)
  969. ofs = self.cpu.unpack_arraydescr(descr)
  970. if kind == FLOAT:
  971. assert result_loc is r.f0
  972. self.mc.LD(r.f0, l.addr(ofs, r.r2))
  973. else:
  974. assert result_loc is r.r2
  975. self.mc.LG(r.r2, l.addr(ofs, r.r2))
  976. def _call_assembler_patch_jmp(self, jmp_location):
  977. currpos = self.mc.currpos()
  978. pmc = OverwritingBuilder(self.mc, jmp_location, 1)
  979. pmc.BRCL(c.ANY, l.imm(currpos - jmp_location))
  980. pmc.overwrite()
  981. def redirect_call_assembler(self, oldlooptoken, newlooptoken):
  982. # some minimal sanity checking
  983. old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs
  984. new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs
  985. assert old_nbargs == new_nbargs
  986. oldadr = oldlooptoken._ll_function_addr
  987. target = newlooptoken._ll_function_addr
  988. # copy frame-info data
  989. baseofs = self.cpu.get_baseofs_of_frame_field()
  990. newlooptoken.compiled_loop_token.update_frame_info(
  991. oldlooptoken.compiled_loop_token, baseofs)
  992. # we overwrite the instructions at the old _ll_function_addr
  993. # to start with a JMP to the new _ll_function_addr.
  994. mc = InstrBuilder()
  995. mc.load_imm(r.SCRATCH, target)
  996. mc.BCR(c.ANY, r.SCRATCH)
  997. mc.copy_to_raw_memory(oldadr)
  998. class MiscOpAssembler(object):
  999. _mixin_ = True
  1000. def _genop_same_as(self, op, arglocs, regalloc):
  1001. argloc, resloc = arglocs
  1002. if argloc is not resloc:
  1003. self.regalloc_mov(argloc, resloc)
  1004. emit_same_as_i = _genop_same_as
  1005. emit_same_as_r = _genop_same_as
  1006. emit_same_as_f = _genop_same_as
  1007. emit_cast_ptr_to_int = _genop_same_as
  1008. emit_cast_int_to_ptr = _genop_same_as
  1009. def emit_increment_debug_counter(self, op, arglocs, regalloc):
  1010. addr, scratch = arglocs
  1011. self.mc.LG(scratch, l.addr(0,addr))
  1012. self.mc.AGHI(scratch, l.imm(1))
  1013. self.mc.STG(scratch, l.addr(0,addr))
  1014. def emit_debug_merge_point(self, op, arglocs, regalloc):
  1015. pass
  1016. emit_jit_debug = emit_debug_merge_point
  1017. emit_keepalive = emit_debug_merge_point
  1018. def emit_enter_portal_frame(self, op, arglocs, regalloc):
  1019. self.enter_portal_frame(op)
  1020. def emit_leave_portal_frame(self, op, arglocs, regalloc):
  1021. self.leave_portal_frame(op)
  1022. class OpAssembler(IntOpAssembler, FloatOpAssembler,
  1023. GuardOpAssembler, CallOpAssembler,
  1024. AllocOpAssembler, MemoryOpAssembler,
  1025. MiscOpAssembler, ForceOpAssembler):
  1026. _mixin_ = True