PageRenderTime 63ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/rpython/jit/backend/ppc/opassembler.py

https://bitbucket.org/pypy/pypy/
Python | 1334 lines | 1012 code | 203 blank | 119 comment | 176 complexity | 0530d995e849b8cccb0e0e17733f3aa6 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. from rpython.jit.backend.ppc.helper.assembler import gen_emit_cmp_op
  2. from rpython.jit.backend.ppc.helper.regalloc import _check_imm_arg
  3. import rpython.jit.backend.ppc.condition as c
  4. import rpython.jit.backend.ppc.register as r
  5. from rpython.jit.backend.ppc.locations import imm
  6. from rpython.jit.backend.ppc.locations import imm as make_imm_loc
  7. from rpython.jit.backend.ppc.arch import (IS_PPC_32, IS_PPC_64, WORD,
  8. MAX_REG_PARAMS, MAX_FREG_PARAMS,
  9. PARAM_SAVE_AREA_OFFSET,
  10. THREADLOCAL_ADDR_OFFSET,
  11. IS_BIG_ENDIAN)
  12. from rpython.jit.metainterp.history import (JitCellToken, TargetToken,
  13. AbstractFailDescr, FLOAT, INT, REF,
  14. ConstInt, VOID)
  15. from rpython.rlib.objectmodel import we_are_translated
  16. from rpython.jit.backend.ppc.helper.assembler import (Saved_Volatiles)
  17. from rpython.jit.backend.ppc.jump import remap_frame_layout
  18. from rpython.jit.backend.ppc.codebuilder import (OverwritingBuilder, scratch_reg,
  19. PPCBuilder, PPCGuardToken)
  20. from rpython.jit.backend.ppc.regalloc import TempPtr, TempInt
  21. from rpython.jit.backend.llsupport import symbolic, jitframe
  22. from rpython.jit.backend.llsupport.descr import CallDescr
  23. from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
  24. from rpython.rtyper.lltypesystem import rstr, rffi, lltype
  25. from rpython.rtyper.annlowlevel import cast_instance_to_gcref
  26. from rpython.rtyper import rclass
  27. from rpython.jit.metainterp.resoperation import rop
  28. from rpython.jit.codewriter.effectinfo import EffectInfo
  29. from rpython.jit.backend.ppc import callbuilder
  30. from rpython.rlib.rarithmetic import r_uint
  31. class IntOpAssembler(object):
  32. _mixin_ = True
  33. def emit_int_add(self, op, arglocs, regalloc):
  34. l0, l1, res = arglocs
  35. assert not l0.is_imm()
  36. if l1.is_imm():
  37. self.mc.addi(res.value, l0.value, l1.value)
  38. else:
  39. self.mc.add(res.value, l0.value, l1.value)
  40. emit_nursery_ptr_increment = emit_int_add
  41. def emit_int_sub(self, op, arglocs, regalloc):
  42. l0, l1, res = arglocs
  43. assert not l0.is_imm()
  44. if l1.is_imm():
  45. self.mc.subi(res.value, l0.value, l1.value)
  46. else:
  47. self.mc.sub(res.value, l0.value, l1.value)
  48. def emit_int_mul(self, op, arglocs, regalloc):
  49. l0, l1, res = arglocs
  50. assert not l0.is_imm()
  51. if l1.is_imm():
  52. self.mc.mulli(res.value, l0.value, l1.value)
  53. elif IS_PPC_32:
  54. self.mc.mullw(res.value, l0.value, l1.value)
  55. else:
  56. self.mc.mulld(res.value, l0.value, l1.value)
  57. def emit_uint_mul_high(self, op, arglocs, regalloc):
  58. l0, l1, res = arglocs
  59. assert not l0.is_imm()
  60. assert not l1.is_imm()
  61. self.mc.mulhdu(res.value, l0.value, l1.value)
  62. def do_emit_int_binary_ovf(self, op, arglocs):
  63. l0, l1, res = arglocs[0], arglocs[1], arglocs[2]
  64. self.mc.load_imm(r.SCRATCH, 0)
  65. self.mc.mtxer(r.SCRATCH.value)
  66. return (res.value, l0.value, l1.value)
  67. def emit_int_add_ovf(self, op, arglocs, regalloc):
  68. self.mc.addox(*self.do_emit_int_binary_ovf(op, arglocs))
  69. def emit_int_sub_ovf(self, op, arglocs, regalloc):
  70. self.mc.subox(*self.do_emit_int_binary_ovf(op, arglocs))
  71. def emit_int_mul_ovf(self, op, arglocs, regalloc):
  72. if IS_PPC_32:
  73. self.mc.mullwox(*self.do_emit_int_binary_ovf(op, arglocs))
  74. else:
  75. self.mc.mulldox(*self.do_emit_int_binary_ovf(op, arglocs))
  76. def emit_int_and(self, op, arglocs, regalloc):
  77. l0, l1, res = arglocs
  78. self.mc.and_(res.value, l0.value, l1.value)
  79. def emit_int_or(self, op, arglocs, regalloc):
  80. l0, l1, res = arglocs
  81. self.mc.or_(res.value, l0.value, l1.value)
  82. def emit_int_xor(self, op, arglocs, regalloc):
  83. l0, l1, res = arglocs
  84. self.mc.xor(res.value, l0.value, l1.value)
  85. def emit_int_lshift(self, op, arglocs, regalloc):
  86. l0, l1, res = arglocs
  87. if IS_PPC_32:
  88. self.mc.slw(res.value, l0.value, l1.value)
  89. else:
  90. self.mc.sld(res.value, l0.value, l1.value)
  91. def emit_int_rshift(self, op, arglocs, regalloc):
  92. l0, l1, res = arglocs
  93. if IS_PPC_32:
  94. self.mc.sraw(res.value, l0.value, l1.value)
  95. else:
  96. self.mc.srad(res.value, l0.value, l1.value)
  97. def emit_uint_rshift(self, op, arglocs, regalloc):
  98. l0, l1, res = arglocs
  99. if IS_PPC_32:
  100. self.mc.srw(res.value, l0.value, l1.value)
  101. else:
  102. self.mc.srd(res.value, l0.value, l1.value)
  103. emit_int_le = gen_emit_cmp_op(c.LE)
  104. emit_int_lt = gen_emit_cmp_op(c.LT)
  105. emit_int_gt = gen_emit_cmp_op(c.GT)
  106. emit_int_ge = gen_emit_cmp_op(c.GE)
  107. emit_int_eq = gen_emit_cmp_op(c.EQ)
  108. emit_int_ne = gen_emit_cmp_op(c.NE)
  109. emit_uint_lt = gen_emit_cmp_op(c.LT, signed=False)
  110. emit_uint_le = gen_emit_cmp_op(c.LE, signed=False)
  111. emit_uint_gt = gen_emit_cmp_op(c.GT, signed=False)
  112. emit_uint_ge = gen_emit_cmp_op(c.GE, signed=False)
  113. emit_int_is_zero = emit_int_eq # EQ to 0
  114. emit_int_is_true = emit_int_ne # NE to 0
  115. emit_ptr_eq = emit_int_eq
  116. emit_ptr_ne = emit_int_ne
  117. emit_instance_ptr_eq = emit_ptr_eq
  118. emit_instance_ptr_ne = emit_ptr_ne
  119. def emit_int_neg(self, op, arglocs, regalloc):
  120. l0, res = arglocs
  121. self.mc.neg(res.value, l0.value)
  122. def emit_int_invert(self, op, arglocs, regalloc):
  123. l0, res = arglocs
  124. self.mc.not_(res.value, l0.value)
  125. def emit_int_signext(self, op, arglocs, regalloc):
  126. l0, res = arglocs
  127. extend_from = op.getarg(1).getint()
  128. if extend_from == 1:
  129. self.mc.extsb(res.value, l0.value)
  130. elif extend_from == 2:
  131. self.mc.extsh(res.value, l0.value)
  132. elif extend_from == 4:
  133. self.mc.extsw(res.value, l0.value)
  134. else:
  135. raise AssertionError(extend_from)
  136. def emit_int_force_ge_zero(self, op, arglocs, regalloc):
  137. arg, res = arglocs
  138. with scratch_reg(self.mc):
  139. self.mc.nor(r.SCRATCH.value, arg.value, arg.value)
  140. if IS_PPC_32:
  141. self.mc.srawi(r.SCRATCH.value, r.SCRATCH.value, 31)
  142. else:
  143. # sradi (scratch, scratch, 63)
  144. self.mc.sradi(r.SCRATCH.value, r.SCRATCH.value, 1, 31)
  145. self.mc.and_(res.value, arg.value, r.SCRATCH.value)
  146. class FloatOpAssembler(object):
  147. _mixin_ = True
  148. def emit_float_add(self, op, arglocs, regalloc):
  149. l0, l1, res = arglocs
  150. self.mc.fadd(res.value, l0.value, l1.value)
  151. def emit_float_sub(self, op, arglocs, regalloc):
  152. l0, l1, res = arglocs
  153. self.mc.fsub(res.value, l0.value, l1.value)
  154. def emit_float_mul(self, op, arglocs, regalloc):
  155. l0, l1, res = arglocs
  156. self.mc.fmul(res.value, l0.value, l1.value)
  157. def emit_float_truediv(self, op, arglocs, regalloc):
  158. l0, l1, res = arglocs
  159. self.mc.fdiv(res.value, l0.value, l1.value)
  160. def emit_float_neg(self, op, arglocs, regalloc):
  161. l0, res = arglocs
  162. self.mc.fneg(res.value, l0.value)
  163. def emit_float_abs(self, op, arglocs, regalloc):
  164. l0, res = arglocs
  165. self.mc.fabs(res.value, l0.value)
  166. def _emit_math_sqrt(self, op, arglocs, regalloc):
  167. l0, res = arglocs
  168. self.mc.fsqrt(res.value, l0.value)
  169. def _emit_threadlocalref_get(self, op, arglocs, regalloc):
  170. [resloc] = arglocs
  171. offset = op.getarg(1).getint() # getarg(0) == 'threadlocalref_get'
  172. calldescr = op.getdescr()
  173. size = calldescr.get_result_size()
  174. sign = calldescr.is_result_signed()
  175. #
  176. # This loads the stack location THREADLOCAL_OFS into a
  177. # register, and then read the word at the given offset.
  178. # It is only supported if 'translate_support_code' is
  179. # true; otherwise, the execute_token() was done with a
  180. # dummy value for the stack location THREADLOCAL_OFS
  181. #
  182. assert self.cpu.translate_support_code
  183. assert resloc.is_reg()
  184. assert _check_imm_arg(offset)
  185. self.mc.ld(resloc.value, r.SP.value, THREADLOCAL_ADDR_OFFSET)
  186. self._load_from_mem(resloc, resloc, imm(offset), imm(size), imm(sign))
  187. emit_float_le = gen_emit_cmp_op(c.LE, fp=True)
  188. emit_float_lt = gen_emit_cmp_op(c.LT, fp=True)
  189. emit_float_gt = gen_emit_cmp_op(c.GT, fp=True)
  190. emit_float_ge = gen_emit_cmp_op(c.GE, fp=True)
  191. emit_float_eq = gen_emit_cmp_op(c.EQ, fp=True)
  192. emit_float_ne = gen_emit_cmp_op(c.NE, fp=True)
  193. def emit_cast_float_to_int(self, op, arglocs, regalloc):
  194. l0, temp_loc, res = arglocs
  195. self.mc.fctidz(temp_loc.value, l0.value)
  196. self.mc.stfd(temp_loc.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
  197. self.mc.ld(res.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
  198. def emit_cast_int_to_float(self, op, arglocs, regalloc):
  199. l0, res = arglocs
  200. self.mc.std(l0.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
  201. self.mc.lfd(res.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
  202. self.mc.fcfid(res.value, res.value)
  203. def emit_convert_float_bytes_to_longlong(self, op, arglocs, regalloc):
  204. l0, res = arglocs
  205. self.mc.stfd(l0.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
  206. self.mc.ld(res.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
  207. def emit_convert_longlong_bytes_to_float(self, op, arglocs, regalloc):
  208. l0, res = arglocs
  209. self.mc.std(l0.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
  210. self.mc.lfd(res.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
  211. class GuardOpAssembler(object):
  212. _mixin_ = True
  213. def _emit_guard(self, op, arglocs, is_guard_not_invalidated=False):
  214. if is_guard_not_invalidated:
  215. fcond = c.cond_none
  216. else:
  217. fcond = self.guard_success_cc
  218. self.guard_success_cc = c.cond_none
  219. assert fcond != c.cond_none
  220. fcond = c.negate(fcond)
  221. token = self.build_guard_token(op, arglocs[0].value, arglocs[1:], fcond)
  222. token.pos_jump_offset = self.mc.currpos()
  223. assert token.guard_not_invalidated() == is_guard_not_invalidated
  224. if not is_guard_not_invalidated:
  225. self.mc.trap() # has to be patched later on
  226. self.pending_guard_tokens.append(token)
  227. def build_guard_token(self, op, frame_depth, arglocs, fcond):
  228. descr = op.getdescr()
  229. gcmap = allocate_gcmap(self, frame_depth, r.JITFRAME_FIXED_SIZE)
  230. faildescrindex = self.get_gcref_from_faildescr(descr)
  231. token = PPCGuardToken(self.cpu, gcmap, descr, op.getfailargs(),
  232. arglocs, op.getopnum(), frame_depth,
  233. faildescrindex,
  234. fcond)
  235. return token
  236. def emit_guard_true(self, op, arglocs, regalloc):
  237. self._emit_guard(op, arglocs)
  238. def emit_guard_false(self, op, arglocs, regalloc):
  239. self.guard_success_cc = c.negate(self.guard_success_cc)
  240. self._emit_guard(op, arglocs)
  241. def emit_guard_overflow(self, op, arglocs, regalloc):
  242. self.guard_success_cc = c.SO
  243. self._emit_guard(op, arglocs)
  244. def emit_guard_no_overflow(self, op, arglocs, regalloc):
  245. self.guard_success_cc = c.NS
  246. self._emit_guard(op, arglocs)
  247. def emit_guard_value(self, op, arglocs, regalloc):
  248. l0 = arglocs[0]
  249. l1 = arglocs[1]
  250. failargs = arglocs[2:]
  251. if l0.is_reg():
  252. if l1.is_imm():
  253. self.mc.cmp_op(0, l0.value, l1.getint(), imm=True)
  254. else:
  255. self.mc.cmp_op(0, l0.value, l1.value)
  256. elif l0.is_fp_reg():
  257. assert l1.is_fp_reg()
  258. self.mc.cmp_op(0, l0.value, l1.value, fp=True)
  259. self.guard_success_cc = c.EQ
  260. self._emit_guard(op, failargs)
  261. emit_guard_nonnull = emit_guard_true
  262. emit_guard_isnull = emit_guard_false
  263. def emit_guard_class(self, op, arglocs, regalloc):
  264. self._cmp_guard_class(op, arglocs, regalloc)
  265. self.guard_success_cc = c.EQ
  266. self._emit_guard(op, arglocs[2:])
  267. def emit_guard_nonnull_class(self, op, arglocs, regalloc):
  268. self.mc.cmp_op(0, arglocs[0].value, 1, imm=True, signed=False)
  269. patch_pos = self.mc.currpos()
  270. self.mc.trap()
  271. self._cmp_guard_class(op, arglocs, regalloc)
  272. pmc = OverwritingBuilder(self.mc, patch_pos, 1)
  273. pmc.blt(self.mc.currpos() - patch_pos)
  274. pmc.overwrite()
  275. self.guard_success_cc = c.EQ
  276. self._emit_guard(op, arglocs[2:])
  277. def _cmp_guard_class(self, op, locs, regalloc):
  278. offset = self.cpu.vtable_offset
  279. if offset is not None:
  280. # could be one instruction shorter, but don't care because
  281. # it's not this case that is commonly translated
  282. self.mc.load(r.SCRATCH.value, locs[0].value, offset)
  283. self.mc.load_imm(r.SCRATCH2, locs[1].value)
  284. self.mc.cmp_op(0, r.SCRATCH.value, r.SCRATCH2.value)
  285. else:
  286. expected_typeid = (self.cpu.gc_ll_descr
  287. .get_typeid_from_classptr_if_gcremovetypeptr(locs[1].value))
  288. self._cmp_guard_gc_type(locs[0], expected_typeid)
  289. def _read_typeid(self, targetreg, loc_ptr):
  290. # Note that the typeid half-word is at offset 0 on a little-endian
  291. # machine; it is at offset 2 or 4 on a big-endian machine.
  292. assert self.cpu.supports_guard_gc_type
  293. if IS_PPC_32:
  294. self.mc.lhz(targetreg.value, loc_ptr.value, 2 * IS_BIG_ENDIAN)
  295. else:
  296. self.mc.lwz(targetreg.value, loc_ptr.value, 4 * IS_BIG_ENDIAN)
  297. def _cmp_guard_gc_type(self, loc_ptr, expected_typeid):
  298. self._read_typeid(r.SCRATCH2, loc_ptr)
  299. assert 0 <= expected_typeid <= 0x7fffffff # 4 bytes are always enough
  300. if expected_typeid > 0xffff: # if 2 bytes are not enough
  301. self.mc.subis(r.SCRATCH2.value, r.SCRATCH2.value,
  302. expected_typeid >> 16)
  303. expected_typeid = expected_typeid & 0xffff
  304. self.mc.cmp_op(0, r.SCRATCH2.value, expected_typeid,
  305. imm=True, signed=False)
  306. def emit_guard_gc_type(self, op, arglocs, regalloc):
  307. self._cmp_guard_gc_type(arglocs[0], arglocs[1].value)
  308. self.guard_success_cc = c.EQ
  309. self._emit_guard(op, arglocs[2:])
  310. def emit_guard_is_object(self, op, arglocs, regalloc):
  311. assert self.cpu.supports_guard_gc_type
  312. loc_object = arglocs[0]
  313. # idea: read the typeid, fetch one byte of the field 'infobits' from
  314. # the big typeinfo table, and check the flag 'T_IS_RPYTHON_INSTANCE'.
  315. base_type_info, shift_by, sizeof_ti = (
  316. self.cpu.gc_ll_descr.get_translated_info_for_typeinfo())
  317. infobits_offset, IS_OBJECT_FLAG = (
  318. self.cpu.gc_ll_descr.get_translated_info_for_guard_is_object())
  319. self._read_typeid(r.SCRATCH2, loc_object)
  320. self.mc.load_imm(r.SCRATCH, base_type_info + infobits_offset)
  321. assert shift_by == 0 # on PPC64; fixme for PPC32
  322. self.mc.lbzx(r.SCRATCH2.value, r.SCRATCH2.value, r.SCRATCH.value)
  323. self.mc.andix(r.SCRATCH2.value, r.SCRATCH2.value, IS_OBJECT_FLAG & 0xff)
  324. self.guard_success_cc = c.NE
  325. self._emit_guard(op, arglocs[1:])
  326. def emit_guard_subclass(self, op, arglocs, regalloc):
  327. assert self.cpu.supports_guard_gc_type
  328. loc_object = arglocs[0]
  329. loc_check_against_class = arglocs[1]
  330. offset = self.cpu.vtable_offset
  331. offset2 = self.cpu.subclassrange_min_offset
  332. if offset is not None:
  333. # read this field to get the vtable pointer
  334. self.mc.load(r.SCRATCH2.value, loc_object.value, offset)
  335. # read the vtable's subclassrange_min field
  336. assert _check_imm_arg(offset2)
  337. self.mc.ld(r.SCRATCH2.value, r.SCRATCH2.value, offset2)
  338. else:
  339. # read the typeid
  340. self._read_typeid(r.SCRATCH, loc_object)
  341. # read the vtable's subclassrange_min field, as a single
  342. # step with the correct offset
  343. base_type_info, shift_by, sizeof_ti = (
  344. self.cpu.gc_ll_descr.get_translated_info_for_typeinfo())
  345. self.mc.load_imm(r.SCRATCH2, base_type_info + sizeof_ti + offset2)
  346. assert shift_by == 0 # on PPC64; fixme for PPC32
  347. self.mc.ldx(r.SCRATCH2.value, r.SCRATCH2.value, r.SCRATCH.value)
  348. # get the two bounds to check against
  349. vtable_ptr = loc_check_against_class.getint()
  350. vtable_ptr = rffi.cast(rclass.CLASSTYPE, vtable_ptr)
  351. check_min = vtable_ptr.subclassrange_min
  352. check_max = vtable_ptr.subclassrange_max
  353. assert check_max > check_min
  354. check_diff = check_max - check_min - 1
  355. # right now, a full PyPy uses less than 6000 numbers,
  356. # so we'll assert here that it always fit inside 15 bits
  357. assert 0 <= check_min <= 0x7fff
  358. assert 0 <= check_diff <= 0xffff
  359. # check by doing the unsigned comparison (tmp - min) < (max - min)
  360. self.mc.subi(r.SCRATCH2.value, r.SCRATCH2.value, check_min)
  361. self.mc.cmp_op(0, r.SCRATCH2.value, check_diff, imm=True, signed=False)
  362. # the guard passes if we get a result of "below or equal"
  363. self.guard_success_cc = c.LE
  364. self._emit_guard(op, arglocs[2:])
  365. def emit_guard_not_invalidated(self, op, arglocs, regalloc):
  366. self._emit_guard(op, arglocs, is_guard_not_invalidated=True)
  367. def emit_guard_not_forced(self, op, arglocs, regalloc):
  368. ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
  369. self.mc.ld(r.SCRATCH.value, r.SPP.value, ofs)
  370. self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
  371. self.guard_success_cc = c.EQ
  372. self._emit_guard(op, arglocs)
  373. def emit_guard_not_forced_2(self, op, arglocs, regalloc):
  374. guard_token = self.build_guard_token(op, arglocs[0].value, arglocs[1:],
  375. c.cond_none)
  376. self._finish_gcmap = guard_token.gcmap
  377. self._store_force_index(op)
  378. self.store_info_on_descr(0, guard_token)
  379. class MiscOpAssembler(object):
  380. _mixin_ = True
  381. def emit_label(self, op, arglocs, regalloc):
  382. pass
  383. def emit_increment_debug_counter(self, op, arglocs, regalloc):
  384. [addr_loc, value_loc] = arglocs
  385. self.mc.load(value_loc.value, addr_loc.value, 0)
  386. self.mc.addi(value_loc.value, value_loc.value, 1) # can't use r0!
  387. self.mc.store(value_loc.value, addr_loc.value, 0)
  388. def emit_finish(self, op, arglocs, regalloc):
  389. base_ofs = self.cpu.get_baseofs_of_frame_field()
  390. if len(arglocs) > 0:
  391. [return_val] = arglocs
  392. if op.getarg(0).type == FLOAT:
  393. self.mc.stfd(return_val.value, r.SPP.value, base_ofs)
  394. else:
  395. self.mc.std(return_val.value, r.SPP.value, base_ofs)
  396. ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
  397. ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
  398. descr = op.getdescr()
  399. faildescrindex = self.get_gcref_from_faildescr(descr)
  400. self._load_from_gc_table(r.r5, r.r5, faildescrindex)
  401. # gcmap logic here:
  402. arglist = op.getarglist()
  403. if arglist and arglist[0].type == REF:
  404. if self._finish_gcmap:
  405. # we're returning with a guard_not_forced_2, and
  406. # additionally we need to say that the result contains
  407. # a reference too:
  408. self._finish_gcmap[0] |= r_uint(1)
  409. gcmap = self._finish_gcmap
  410. else:
  411. gcmap = self.gcmap_for_finish
  412. elif self._finish_gcmap:
  413. # we're returning with a guard_not_forced_2
  414. gcmap = self._finish_gcmap
  415. else:
  416. gcmap = lltype.nullptr(jitframe.GCMAP)
  417. self.load_gcmap(self.mc, r.r2, gcmap)
  418. self.mc.std(r.r5.value, r.SPP.value, ofs)
  419. self.mc.store(r.r2.value, r.SPP.value, ofs2)
  420. # exit function
  421. self._call_footer()
  422. def emit_jump(self, op, arglocs, regalloc):
  423. # The backend's logic assumes that the target code is in a piece of
  424. # assembler that was also called with the same number of arguments,
  425. # so that the locations [ebp+8..] of the input arguments are valid
  426. # stack locations both before and after the jump.
  427. #
  428. descr = op.getdescr()
  429. assert isinstance(descr, TargetToken)
  430. my_nbargs = self.current_clt._debug_nbargs
  431. target_nbargs = descr._ppc_clt._debug_nbargs
  432. assert my_nbargs == target_nbargs
  433. if descr in self.target_tokens_currently_compiling:
  434. self.mc.b_offset(descr._ll_loop_code)
  435. else:
  436. self.mc.b_abs(descr._ll_loop_code)
  437. def _genop_same_as(self, op, arglocs, regalloc):
  438. argloc, resloc = arglocs
  439. if argloc is not resloc:
  440. self.regalloc_mov(argloc, resloc)
  441. emit_same_as_i = _genop_same_as
  442. emit_same_as_r = _genop_same_as
  443. emit_same_as_f = _genop_same_as
  444. emit_cast_ptr_to_int = _genop_same_as
  445. emit_cast_int_to_ptr = _genop_same_as
  446. def emit_guard_no_exception(self, op, arglocs, regalloc):
  447. self.mc.load_from_addr(r.SCRATCH2, r.SCRATCH2, self.cpu.pos_exception())
  448. self.mc.cmp_op(0, r.SCRATCH2.value, 0, imm=True)
  449. self.guard_success_cc = c.EQ
  450. self._emit_guard(op, arglocs)
  451. # If the previous operation was a COND_CALL, overwrite its conditional
  452. # jump to jump over this GUARD_NO_EXCEPTION as well, if we can
  453. if self._find_nearby_operation(regalloc,-1).getopnum() == rop.COND_CALL:
  454. jmp_adr, BI, BO = self.previous_cond_call_jcond
  455. relative_target = self.mc.currpos() - jmp_adr
  456. pmc = OverwritingBuilder(self.mc, jmp_adr, 1)
  457. pmc.bc(BO, BI, relative_target)
  458. pmc.overwrite()
  459. def emit_save_exc_class(self, op, arglocs, regalloc):
  460. [resloc] = arglocs
  461. diff = self.mc.load_imm_plus(r.r2, self.cpu.pos_exception())
  462. self.mc.load(resloc.value, r.r2.value, diff)
  463. def emit_save_exception(self, op, arglocs, regalloc):
  464. [resloc] = arglocs
  465. self._store_and_reset_exception(self.mc, resloc)
  466. def emit_restore_exception(self, op, arglocs, regalloc):
  467. self._restore_exception(self.mc, arglocs[1], arglocs[0])
  468. def emit_guard_exception(self, op, arglocs, regalloc):
  469. loc, resloc = arglocs[:2]
  470. failargs = arglocs[2:]
  471. mc = self.mc
  472. mc.load_imm(r.SCRATCH2, self.cpu.pos_exc_value())
  473. diff = self.cpu.pos_exception() - self.cpu.pos_exc_value()
  474. assert _check_imm_arg(diff)
  475. mc.load(r.SCRATCH.value, r.SCRATCH2.value, diff)
  476. mc.cmp_op(0, r.SCRATCH.value, loc.value)
  477. self.guard_success_cc = c.EQ
  478. self._emit_guard(op, failargs)
  479. if resloc:
  480. mc.load(resloc.value, r.SCRATCH2.value, 0)
  481. mc.load_imm(r.SCRATCH, 0)
  482. mc.store(r.SCRATCH.value, r.SCRATCH2.value, 0)
  483. mc.store(r.SCRATCH.value, r.SCRATCH2.value, diff)
  484. def _load_from_gc_table(self, rD, rT, index):
  485. # rT is a temporary, may be equal to rD, must be != r0
  486. addr = self.gc_table_addr + index * WORD
  487. self.mc.load_from_addr(rD, rT, addr)
  488. def emit_load_from_gc_table(self, op, arglocs, regalloc):
  489. index = op.getarg(0).getint()
  490. [resloc] = arglocs
  491. assert resloc.is_reg()
  492. self._load_from_gc_table(resloc, resloc, index)
  493. class CallOpAssembler(object):
  494. _mixin_ = True
  495. def _emit_call(self, op, arglocs, is_call_release_gil=False):
  496. resloc = arglocs[0]
  497. func_index = 1 + is_call_release_gil
  498. adr = arglocs[func_index]
  499. arglist = arglocs[func_index+1:]
  500. cb = callbuilder.CallBuilder(self, adr, arglist, resloc)
  501. descr = op.getdescr()
  502. assert isinstance(descr, CallDescr)
  503. cb.argtypes = descr.get_arg_types()
  504. cb.restype = descr.get_result_type()
  505. if is_call_release_gil:
  506. saveerrloc = arglocs[1]
  507. assert saveerrloc.is_imm()
  508. cb.emit_call_release_gil(saveerrloc.value)
  509. else:
  510. effectinfo = descr.get_extra_info()
  511. if effectinfo is None or effectinfo.check_can_collect():
  512. cb.emit()
  513. else:
  514. cb.emit_no_collect()
  515. def _genop_call(self, op, arglocs, regalloc):
  516. oopspecindex = regalloc.get_oopspecindex(op)
  517. if oopspecindex == EffectInfo.OS_MATH_SQRT:
  518. return self._emit_math_sqrt(op, arglocs, regalloc)
  519. if oopspecindex == EffectInfo.OS_THREADLOCALREF_GET:
  520. return self._emit_threadlocalref_get(op, arglocs, regalloc)
  521. self._emit_call(op, arglocs)
  522. emit_call_i = _genop_call
  523. emit_call_r = _genop_call
  524. emit_call_f = _genop_call
  525. emit_call_n = _genop_call
  526. def _genop_call_may_force(self, op, arglocs, regalloc):
  527. self._store_force_index(self._find_nearby_operation(regalloc, +1))
  528. self._emit_call(op, arglocs)
  529. emit_call_may_force_i = _genop_call_may_force
  530. emit_call_may_force_r = _genop_call_may_force
  531. emit_call_may_force_f = _genop_call_may_force
  532. emit_call_may_force_n = _genop_call_may_force
  533. def _genop_call_release_gil(self, op, arglocs, regalloc):
  534. self._store_force_index(self._find_nearby_operation(regalloc, +1))
  535. self._emit_call(op, arglocs, is_call_release_gil=True)
  536. emit_call_release_gil_i = _genop_call_release_gil
  537. emit_call_release_gil_f = _genop_call_release_gil
  538. emit_call_release_gil_n = _genop_call_release_gil
  539. def _store_force_index(self, guard_op):
  540. assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
  541. guard_op.getopnum() == rop.GUARD_NOT_FORCED_2)
  542. faildescr = guard_op.getdescr()
  543. ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
  544. faildescrindex = self.get_gcref_from_faildescr(faildescr)
  545. self._load_from_gc_table(r.r2, r.r2, faildescrindex)
  546. self.mc.store(r.r2.value, r.SPP.value, ofs)
  547. def _find_nearby_operation(self, regalloc, delta):
  548. return regalloc.operations[regalloc.rm.position + delta]
  549. _COND_CALL_SAVE_REGS = [r.r3, r.r4, r.r5, r.r6, r.r12]
  550. def emit_cond_call(self, op, arglocs, regalloc):
  551. fcond = self.guard_success_cc
  552. self.guard_success_cc = c.cond_none
  553. assert fcond != c.cond_none
  554. fcond = c.negate(fcond)
  555. jmp_adr = self.mc.get_relative_pos()
  556. self.mc.trap() # patched later to a 'bc'
  557. self.load_gcmap(self.mc, r.r2, regalloc.get_gcmap())
  558. # save away r3, r4, r5, r6, r12 into the jitframe
  559. should_be_saved = [
  560. reg for reg in self._regalloc.rm.reg_bindings.itervalues()
  561. if reg in self._COND_CALL_SAVE_REGS]
  562. self._push_core_regs_to_jitframe(self.mc, should_be_saved)
  563. #
  564. # load the 0-to-4 arguments into these registers, with the address of
  565. # the function to call into r12
  566. remap_frame_layout(self, arglocs,
  567. [r.r12, r.r3, r.r4, r.r5, r.r6][:len(arglocs)],
  568. r.SCRATCH)
  569. #
  570. # figure out which variant of cond_call_slowpath to call, and call it
  571. callee_only = False
  572. floats = False
  573. for reg in regalloc.rm.reg_bindings.values():
  574. if reg not in regalloc.rm.save_around_call_regs:
  575. break
  576. else:
  577. callee_only = True
  578. if regalloc.fprm.reg_bindings:
  579. floats = True
  580. cond_call_adr = self.cond_call_slowpath[floats * 2 + callee_only]
  581. self.mc.bl_abs(cond_call_adr)
  582. # restoring the registers saved above, and doing pop_gcmap(), is left
  583. # to the cond_call_slowpath helper. We never have any result value.
  584. relative_target = self.mc.currpos() - jmp_adr
  585. pmc = OverwritingBuilder(self.mc, jmp_adr, 1)
  586. BI, BO = c.encoding[fcond]
  587. pmc.bc(BO, BI, relative_target)
  588. pmc.overwrite()
  589. # might be overridden again to skip over the following
  590. # guard_no_exception too
  591. self.previous_cond_call_jcond = jmp_adr, BI, BO
  592. class FieldOpAssembler(object):
  593. _mixin_ = True
  594. def _write_to_mem(self, value_loc, base_loc, ofs, size_loc):
  595. assert size_loc.is_imm()
  596. size = size_loc.value
  597. if size == 8:
  598. if value_loc.is_fp_reg():
  599. if ofs.is_imm():
  600. self.mc.stfd(value_loc.value, base_loc.value, ofs.value)
  601. else:
  602. self.mc.stfdx(value_loc.value, base_loc.value, ofs.value)
  603. else:
  604. if ofs.is_imm():
  605. self.mc.std(value_loc.value, base_loc.value, ofs.value)
  606. else:
  607. self.mc.stdx(value_loc.value, base_loc.value, ofs.value)
  608. elif size == 4:
  609. if ofs.is_imm():
  610. self.mc.stw(value_loc.value, base_loc.value, ofs.value)
  611. else:
  612. self.mc.stwx(value_loc.value, base_loc.value, ofs.value)
  613. elif size == 2:
  614. if ofs.is_imm():
  615. self.mc.sth(value_loc.value, base_loc.value, ofs.value)
  616. else:
  617. self.mc.sthx(value_loc.value, base_loc.value, ofs.value)
  618. elif size == 1:
  619. if ofs.is_imm():
  620. self.mc.stb(value_loc.value, base_loc.value, ofs.value)
  621. else:
  622. self.mc.stbx(value_loc.value, base_loc.value, ofs.value)
  623. else:
  624. assert 0, "size not supported"
  625. def emit_gc_store(self, op, arglocs, regalloc):
  626. value_loc, base_loc, ofs_loc, size_loc = arglocs
  627. self._write_to_mem(value_loc, base_loc, ofs_loc, size_loc)
  628. def _apply_offset(self, index_loc, ofs_loc):
  629. # If offset != 0 then we have to add it here. Note that
  630. # mc.addi() would not be valid with operand r0.
  631. assert ofs_loc.is_imm() # must be an immediate...
  632. assert _check_imm_arg(ofs_loc.getint()) # ...that fits 16 bits
  633. assert index_loc is not r.SCRATCH2
  634. # (simplified version of _apply_scale())
  635. if ofs_loc.value > 0:
  636. self.mc.addi(r.SCRATCH2.value, index_loc.value, ofs_loc.value)
  637. index_loc = r.SCRATCH2
  638. return index_loc
  639. def emit_gc_store_indexed(self, op, arglocs, regalloc):
  640. base_loc, index_loc, value_loc, ofs_loc, size_loc = arglocs
  641. index_loc = self._apply_offset(index_loc, ofs_loc)
  642. self._write_to_mem(value_loc, base_loc, index_loc, size_loc)
  643. def _load_from_mem(self, res, base_loc, ofs, size_loc, sign_loc):
  644. # res, base_loc, ofs, size and signed are all locations
  645. assert base_loc is not r.SCRATCH
  646. assert size_loc.is_imm()
  647. size = size_loc.value
  648. assert sign_loc.is_imm()
  649. sign = sign_loc.value
  650. if size == 8:
  651. if res.is_fp_reg():
  652. if ofs.is_imm():
  653. self.mc.lfd(res.value, base_loc.value, ofs.value)
  654. else:
  655. self.mc.lfdx(res.value, base_loc.value, ofs.value)
  656. else:
  657. if ofs.is_imm():
  658. self.mc.ld(res.value, base_loc.value, ofs.value)
  659. else:
  660. self.mc.ldx(res.value, base_loc.value, ofs.value)
  661. elif size == 4:
  662. if IS_PPC_64 and sign:
  663. if ofs.is_imm():
  664. self.mc.lwa(res.value, base_loc.value, ofs.value)
  665. else:
  666. self.mc.lwax(res.value, base_loc.value, ofs.value)
  667. else:
  668. if ofs.is_imm():
  669. self.mc.lwz(res.value, base_loc.value, ofs.value)
  670. else:
  671. self.mc.lwzx(res.value, base_loc.value, ofs.value)
  672. elif size == 2:
  673. if sign:
  674. if ofs.is_imm():
  675. self.mc.lha(res.value, base_loc.value, ofs.value)
  676. else:
  677. self.mc.lhax(res.value, base_loc.value, ofs.value)
  678. else:
  679. if ofs.is_imm():
  680. self.mc.lhz(res.value, base_loc.value, ofs.value)
  681. else:
  682. self.mc.lhzx(res.value, base_loc.value, ofs.value)
  683. elif size == 1:
  684. if ofs.is_imm():
  685. self.mc.lbz(res.value, base_loc.value, ofs.value)
  686. else:
  687. self.mc.lbzx(res.value, base_loc.value, ofs.value)
  688. if sign:
  689. self.mc.extsb(res.value, res.value)
  690. else:
  691. assert 0, "size not supported"
  692. def _genop_gc_load(self, op, arglocs, regalloc):
  693. base_loc, ofs_loc, res_loc, size_loc, sign_loc = arglocs
  694. self._load_from_mem(res_loc, base_loc, ofs_loc, size_loc, sign_loc)
  695. emit_gc_load_i = _genop_gc_load
  696. emit_gc_load_r = _genop_gc_load
  697. emit_gc_load_f = _genop_gc_load
  698. def _genop_gc_load_indexed(self, op, arglocs, regalloc):
  699. base_loc, index_loc, res_loc, ofs_loc, size_loc, sign_loc = arglocs
  700. index_loc = self._apply_offset(index_loc, ofs_loc)
  701. self._load_from_mem(res_loc, base_loc, index_loc, size_loc, sign_loc)
  702. emit_gc_load_indexed_i = _genop_gc_load_indexed
  703. emit_gc_load_indexed_r = _genop_gc_load_indexed
  704. emit_gc_load_indexed_f = _genop_gc_load_indexed
  705. SIZE2SCALE = dict([(1<<_i, _i) for _i in range(32)])
  706. def _multiply_by_constant(self, loc, multiply_by, scratch_loc):
  707. # XXX should die together with _apply_scale() but can't because
  708. # of emit_zero_array() and malloc_cond_varsize() at the moment
  709. assert loc.is_reg()
  710. if multiply_by == 1:
  711. return loc
  712. try:
  713. scale = self.SIZE2SCALE[multiply_by]
  714. except KeyError:
  715. if _check_imm_arg(multiply_by):
  716. self.mc.mulli(scratch_loc.value, loc.value, multiply_by)
  717. else:
  718. self.mc.load_imm(scratch_loc, multiply_by)
  719. if IS_PPC_32:
  720. self.mc.mullw(scratch_loc.value, loc.value,
  721. scratch_loc.value)
  722. else:
  723. self.mc.mulld(scratch_loc.value, loc.value,
  724. scratch_loc.value)
  725. else:
  726. self.mc.sldi(scratch_loc.value, loc.value, scale)
  727. return scratch_loc
  728. def _copy_in_scratch2(self, loc):
  729. if loc.is_imm():
  730. self.mc.li(r.SCRATCH2.value, loc.value)
  731. elif loc is not r.SCRATCH2:
  732. self.mc.mr(r.SCRATCH2.value, loc.value)
  733. return r.SCRATCH2
  734. # RPythonic workaround for emit_zero_array()
  735. def eza_stXux(self, a, b, c, itemsize):
  736. if itemsize & 1: self.mc.stbux(a, b, c)
  737. elif itemsize & 2: self.mc.sthux(a, b, c)
  738. elif (itemsize & 4) or IS_PPC_32: self.mc.stwux(a, b, c)
  739. else: self.mc.stdux(a, b, c)
  740. def eza_stXu(self, a, b, c, itemsize):
  741. if itemsize & 1: self.mc.stbu(a, b, c)
  742. elif itemsize & 2: self.mc.sthu(a, b, c)
  743. elif (itemsize & 4) or IS_PPC_32: self.mc.stwu(a, b, c)
  744. else: self.mc.stdu(a, b, c)
  745. def emit_zero_array(self, op, arglocs, regalloc):
  746. base_loc, startindex_loc, length_loc, ofs_loc = arglocs
  747. stepsize = 8
  748. shift_by = 3
  749. if IS_PPC_32:
  750. stepsize = 4
  751. shift_by = 2
  752. if length_loc.is_imm():
  753. if length_loc.value <= 0:
  754. return # nothing to do
  755. if startindex_loc.is_imm():
  756. self.mc.load_imm(r.SCRATCH2, startindex_loc.value)
  757. startindex_loc = r.SCRATCH2
  758. if ofs_loc.is_imm():
  759. self.mc.addi(r.SCRATCH2.value, startindex_loc.value, ofs_loc.value)
  760. else:
  761. self.mc.add(r.SCRATCH2.value, startindex_loc.value, ofs_loc.value)
  762. ofs_loc = r.SCRATCH2
  763. assert base_loc.is_core_reg()
  764. self.mc.add(ofs_loc.value, ofs_loc.value, base_loc.value)
  765. # ofs_loc is now the real address pointing to the first
  766. # byte to be zeroed
  767. prev_length_loc = length_loc
  768. if length_loc.is_imm():
  769. self.mc.load_imm(r.SCRATCH, length_loc.value)
  770. length_loc = r.SCRATCH
  771. self.mc.cmp_op(0, length_loc.value, stepsize, imm=True)
  772. jlt_location = self.mc.currpos()
  773. self.mc.trap()
  774. self.mc.sradi(r.SCRATCH.value, length_loc.value, 0, shift_by)
  775. self.mc.mtctr(r.SCRATCH.value) # store the length in count register
  776. self.mc.li(r.SCRATCH.value, 0)
  777. # NOTE the following assumes that bytes have been passed to both startindex
  778. # and length. Thus we zero 4/8 bytes in a loop in 1) and every remaining
  779. # byte is zeroed in another loop in 2)
  780. self.mc.subi(ofs_loc.value, ofs_loc.value, stepsize)
  781. # first store of case 1)
  782. # 1) The next loop copies WORDS into the memory chunk starting at startindex
  783. # ending at startindex + length. These are bytes
  784. loop_location = self.mc.currpos()
  785. self.eza_stXu(r.SCRATCH.value, ofs_loc.value, stepsize, stepsize)
  786. self.mc.bdnz(loop_location - self.mc.currpos())
  787. self.mc.addi(ofs_loc.value, ofs_loc.value, stepsize)
  788. pmc = OverwritingBuilder(self.mc, jlt_location, 1)
  789. pmc.blt(self.mc.currpos() - jlt_location) # jump if length < WORD
  790. pmc.overwrite()
  791. # 2) There might be some bytes left to be written.
  792. # following scenario: length_loc == 3 bytes, stepsize == 4!
  793. # need to write the last bytes.
  794. # move the last bytes to the count register
  795. length_loc = prev_length_loc
  796. if length_loc.is_imm():
  797. self.mc.load_imm(r.SCRATCH, length_loc.value & (stepsize-1))
  798. else:
  799. self.mc.andix(r.SCRATCH.value, length_loc.value, (stepsize-1) & 0xff)
  800. self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
  801. jle_location = self.mc.currpos()
  802. self.mc.trap()
  803. self.mc.mtctr(r.SCRATCH.value)
  804. self.mc.li(r.SCRATCH.value, 0)
  805. self.mc.subi(ofs_loc.value, ofs_loc.value, 1)
  806. loop_location = self.mc.currpos()
  807. self.eza_stXu(r.SCRATCH.value, ofs_loc.value, 1, 1)
  808. self.mc.bdnz(loop_location - self.mc.currpos())
  809. pmc = OverwritingBuilder(self.mc, jle_location, 1)
  810. pmc.ble(self.mc.currpos() - jle_location) # !GT
  811. pmc.overwrite()
  812. class StrOpAssembler(object):
  813. _mixin_ = True
  814. def emit_copystrcontent(self, op, arglocs, regalloc):
  815. self._emit_copycontent(arglocs, is_unicode=False)
  816. def emit_copyunicodecontent(self, op, arglocs, regalloc):
  817. self._emit_copycontent(arglocs, is_unicode=True)
  818. def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
  819. if src_ofs.is_imm():
  820. value = src_ofs.value << scale
  821. if value < 32768:
  822. self.mc.addi(dst.value, src_ptr.value, value)
  823. else:
  824. self.mc.load_imm(dst, value)
  825. self.mc.add(dst.value, src_ptr.value, dst.value)
  826. elif scale == 0:
  827. self.mc.add(dst.value, src_ptr.value, src_ofs.value)
  828. else:
  829. self.mc.sldi(dst.value, src_ofs.value, scale)
  830. self.mc.add(dst.value, src_ptr.value, dst.value)
  831. def _emit_copycontent(self, arglocs, is_unicode):
  832. [src_ptr_loc, dst_ptr_loc,
  833. src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
  834. if is_unicode:
  835. basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
  836. self.cpu.translate_support_code)
  837. if itemsize == 2: scale = 1
  838. elif itemsize == 4: scale = 2
  839. else: raise AssertionError
  840. else:
  841. basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
  842. self.cpu.translate_support_code)
  843. assert itemsize == 1
  844. basesize -= 1 # for the extra null character
  845. scale = 0
  846. self._emit_load_for_copycontent(r.r0, src_ptr_loc, src_ofs_loc, scale)
  847. self._emit_load_for_copycontent(r.r2, dst_ptr_loc, dst_ofs_loc, scale)
  848. if length_loc.is_imm():
  849. length = length_loc.getint()
  850. self.mc.load_imm(r.r5, length << scale)
  851. else:
  852. if scale > 0:
  853. self.mc.sldi(r.r5.value, length_loc.value, scale)
  854. elif length_loc is not r.r5:
  855. self.mc.mr(r.r5.value, length_loc.value)
  856. self.mc.mr(r.r4.value, r.r0.value)
  857. self.mc.addi(r.r4.value, r.r4.value, basesize)
  858. self.mc.addi(r.r3.value, r.r2.value, basesize)
  859. self.mc.load_imm(self.mc.RAW_CALL_REG, self.memcpy_addr)
  860. self.mc.raw_call()
  861. class UnicodeOpAssembler(object):
  862. _mixin_ = True
  863. # empty!
  864. class AllocOpAssembler(object):
  865. _mixin_ = True
  866. def emit_check_memory_error(self, op, arglocs, regalloc):
  867. self.propagate_memoryerror_if_reg_is_null(arglocs[0])
  868. def emit_call_malloc_nursery(self, op, arglocs, regalloc):
  869. # registers r.RES and r.RSZ are allocated for this call
  870. size_box = op.getarg(0)
  871. assert isinstance(size_box, ConstInt)
  872. size = size_box.getint()
  873. gc_ll_descr = self.cpu.gc_ll_descr
  874. gcmap = regalloc.get_gcmap([r.RES, r.RSZ])
  875. self.malloc_cond(
  876. gc_ll_descr.get_nursery_free_addr(),
  877. gc_ll_descr.get_nursery_top_addr(),
  878. size, gcmap)
  879. def emit_call_malloc_nursery_varsize_frame(self, op, arglocs, regalloc):
  880. # registers r.RES and r.RSZ are allocated for this call
  881. [sizeloc] = arglocs
  882. gc_ll_descr = self.cpu.gc_ll_descr
  883. gcmap = regalloc.get_gcmap([r.RES, r.RSZ])
  884. self.malloc_cond_varsize_frame(
  885. gc_ll_descr.get_nursery_free_addr(),
  886. gc_ll_descr.get_nursery_top_addr(),
  887. sizeloc, gcmap)
  888. def emit_call_malloc_nursery_varsize(self, op, arglocs, regalloc):
  889. # registers r.RES and r.RSZ are allocated for this call
  890. gc_ll_descr = self.cpu.gc_ll_descr
  891. if not hasattr(gc_ll_descr, 'max_size_of_young_obj'):
  892. raise Exception("unreachable code")
  893. # for boehm, this function should never be called
  894. [lengthloc] = arglocs
  895. arraydescr = op.getdescr()
  896. itemsize = op.getarg(1).getint()
  897. maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) / itemsize
  898. gcmap = regalloc.get_gcmap([r.RES, r.RSZ])
  899. self.malloc_cond_varsize(
  900. op.getarg(0).getint(),
  901. gc_ll_descr.get_nursery_free_addr(),
  902. gc_ll_descr.get_nursery_top_addr(),
  903. lengthloc, itemsize, maxlength, gcmap, arraydescr)
  904. def emit_debug_merge_point(self, op, arglocs, regalloc):
  905. pass
  906. emit_jit_debug = emit_debug_merge_point
  907. emit_keepalive = emit_debug_merge_point
  908. def emit_enter_portal_frame(self, op, arglocs, regalloc):
  909. self.enter_portal_frame(op)
  910. def emit_leave_portal_frame(self, op, arglocs, regalloc):
  911. self.leave_portal_frame(op)
  912. def _write_barrier_fastpath(self, mc, descr, arglocs, regalloc, array=False,
  913. is_frame=False):
  914. # Write code equivalent to write_barrier() in the GC: it checks
  915. # a flag in the object at arglocs[0], and if set, it calls a
  916. # helper piece of assembler. The latter saves registers as needed
  917. # and call the function remember_young_pointer() from the GC.
  918. if we_are_translated():
  919. cls = self.cpu.gc_ll_descr.has_write_barrier_class()
  920. assert cls is not None and isinstance(descr, cls)
  921. #
  922. card_marking_mask = 0
  923. mask = descr.jit_wb_if_flag_singlebyte
  924. if array and descr.jit_wb_cards_set != 0:
  925. # assumptions the rest of the function depends on:
  926. assert (descr.jit_wb_cards_set_byteofs ==
  927. descr.jit_wb_if_flag_byteofs)
  928. card_marking_mask = descr.jit_wb_cards_set_singlebyte
  929. #
  930. loc_base = arglocs[0]
  931. assert loc_base.is_reg()
  932. if is_frame:
  933. assert loc_base is r.SPP
  934. assert _check_imm_arg(descr.jit_wb_if_flag_byteofs)
  935. mc.lbz(r.SCRATCH2.value, loc_base.value, descr.jit_wb_if_flag_byteofs)
  936. mc.andix(r.SCRATCH.value, r.SCRATCH2.value, mask & 0xFF)
  937. jz_location = mc.get_relative_pos()
  938. mc.trap() # patched later with 'beq'
  939. # for cond_call_gc_wb_array, also add another fast path:
  940. # if GCFLAG_CARDS_SET, then we can just set one bit and be done
  941. if card_marking_mask:
  942. # GCFLAG_CARDS_SET is in the same byte, loaded in r2 already
  943. mc.andix(r.SCRATCH.value, r.SCRATCH2.value,
  944. card_marking_mask & 0xFF)
  945. js_location = mc.get_relative_pos()
  946. mc.trap() # patched later with 'bne'
  947. else:
  948. js_location = 0
  949. # Write only a CALL to the helper prepared in advance, passing it as
  950. # argument the address of the structure we are writing into
  951. # (the first argument to COND_CALL_GC_WB).
  952. helper_num = (card_marking_mask != 0)
  953. if is_frame:
  954. helper_num = 4
  955. elif regalloc.fprm.reg_bindings:
  956. helper_num += 2
  957. if self.wb_slowpath[helper_num] == 0: # tests only
  958. assert not we_are_translated()
  959. assert not is_frame
  960. self.cpu.gc_ll_descr.write_barrier_descr = descr
  961. self._build_wb_slowpath(card_marking_mask != 0,
  962. bool(regalloc.fprm.reg_bindings))
  963. assert self.wb_slowpath[helper_num] != 0
  964. #
  965. if not is_frame:
  966. mc.mr(r.r0.value, loc_base.value) # unusual argument location
  967. mc.load_imm(r.SCRATCH2, self.wb_slowpath[helper_num])
  968. mc.mtctr(r.SCRATCH2.value)
  969. mc.bctrl()
  970. if card_marking_mask:
  971. # The helper ends again with a check of the flag in the object.
  972. # So here, we can simply write again a beq, which will be
  973. # taken if GCFLAG_CARDS_SET is still not set.
  974. jns_location = mc.get_relative_pos()
  975. mc.trap()
  976. #
  977. # patch the 'bne' above
  978. currpos = mc.currpos()
  979. pmc = OverwritingBuilder(mc, js_location, 1)
  980. pmc.bne(currpos - js_location)
  981. pmc.overwrite()
  982. #
  983. # case GCFLAG_CARDS_SET: emit a few instructions to do
  984. # directly the card flag setting
  985. loc_index = arglocs[1]
  986. if loc_index.is_reg():
  987. tmp_loc = arglocs[2]
  988. n = descr.jit_wb_card_page_shift
  989. # compute in tmp_loc the byte offset:
  990. # ~(index >> (card_page_shift + 3)) ('~' is 'not_' below)
  991. mc.srli_op(tmp_loc.value, loc_index.value, n + 3)
  992. # compute in r2 the index of the bit inside the byte:
  993. # (index >> card_page_shift) & 7
  994. mc.rldicl(r.SCRATCH2.value, loc_index.value, 64 - n, 61)
  995. mc.li(r.SCRATCH.value, 1)
  996. mc.not_(tmp_loc.value, tmp_loc.value)
  997. # set r2 to 1 << r2
  998. mc.sl_op(r.SCRATCH2.value, r.SCRATCH.value, r.SCRATCH2.value)
  999. # set this bit inside the byte of interest
  1000. mc.lbzx(r.SCRATCH.value, loc_base.value, tmp_loc.value)
  1001. mc.or_(r.SCRATCH.value, r.SCRATCH.value, r.SCRATCH2.value)
  1002. mc.stbx(r.SCRATCH.value, loc_base.value, tmp_loc.value)
  1003. # done
  1004. else:
  1005. byte_index = loc_index.value >> descr.jit_wb_card_page_shift
  1006. byte_ofs = ~(byte_index >> 3)
  1007. byte_val = 1 << (byte_index & 7)
  1008. assert _check_imm_arg(byte_ofs)
  1009. mc.lbz(r.SCRATCH.value, loc_base.value, byte_ofs)
  1010. mc.ori(r.SCRATCH.value, r.SCRATCH.value, byte_val)
  1011. mc.stb(r.SCRATCH.value, loc_base.value, byte_ofs)
  1012. #
  1013. # patch the beq just above
  1014. currpos = mc.currpos()
  1015. pmc = OverwritingBuilder(mc, jns_location, 1)
  1016. pmc.beq(currpos - jns_location)
  1017. pmc.overwrite()
  1018. # patch the JZ above
  1019. currpos = mc.currpos()
  1020. pmc = OverwritingBuilder(mc, jz_location, 1)
  1021. pmc.beq(currpos - jz_location)
  1022. pmc.overwrite()
  1023. def emit_cond_call_gc_wb(self, op, arglocs, regalloc):
  1024. self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs, regalloc)
  1025. def emit_cond_call_gc_wb_array(self, op, arglocs, regalloc):
  1026. self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs, regalloc,
  1027. array=True)
  1028. class ForceOpAssembler(object):
  1029. _mixin_ = True
  1030. def emit_force_token(self, op, arglocs, regalloc):
  1031. res_loc = arglocs[0]
  1032. self.mc.mr(res_loc.value, r.SPP.value)
  1033. def _genop_call_assembler(self, op, arglocs, regalloc):
  1034. if len(arglocs) == 3:
  1035. [result_loc, argloc, vloc] = arglocs
  1036. else:
  1037. [result_loc, argloc] = arglocs
  1038. vloc = imm(0)
  1039. self._store_force_index(self._find_nearby_operation(regalloc, +1))
  1040. # 'result_loc' is either r3 or f1, or None
  1041. self.call_assembler(op, argloc, vloc, result_loc, r.r3)
  1042. emit_call_assembler_i = _genop_call_assembler
  1043. emit_call_assembler_r = _genop_call_assembler
  1044. emit_call_assembler_f = _genop_call_assembler
  1045. emit_call_assembler_n = _genop_call_a

Large files files are truncated, but you can click here to view the full file