/rpython/jit/backend/ppc/opassembler.py
Python | 1334 lines | 1012 code | 203 blank | 119 comment | 176 complexity | 0530d995e849b8cccb0e0e17733f3aa6 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
- from rpython.jit.backend.ppc.helper.assembler import gen_emit_cmp_op
- from rpython.jit.backend.ppc.helper.regalloc import _check_imm_arg
- import rpython.jit.backend.ppc.condition as c
- import rpython.jit.backend.ppc.register as r
- from rpython.jit.backend.ppc.locations import imm
- from rpython.jit.backend.ppc.locations import imm as make_imm_loc
- from rpython.jit.backend.ppc.arch import (IS_PPC_32, IS_PPC_64, WORD,
- MAX_REG_PARAMS, MAX_FREG_PARAMS,
- PARAM_SAVE_AREA_OFFSET,
- THREADLOCAL_ADDR_OFFSET,
- IS_BIG_ENDIAN)
- from rpython.jit.metainterp.history import (JitCellToken, TargetToken,
- AbstractFailDescr, FLOAT, INT, REF,
- ConstInt, VOID)
- from rpython.rlib.objectmodel import we_are_translated
- from rpython.jit.backend.ppc.helper.assembler import (Saved_Volatiles)
- from rpython.jit.backend.ppc.jump import remap_frame_layout
- from rpython.jit.backend.ppc.codebuilder import (OverwritingBuilder, scratch_reg,
- PPCBuilder, PPCGuardToken)
- from rpython.jit.backend.ppc.regalloc import TempPtr, TempInt
- from rpython.jit.backend.llsupport import symbolic, jitframe
- from rpython.jit.backend.llsupport.descr import CallDescr
- from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
- from rpython.rtyper.lltypesystem import rstr, rffi, lltype
- from rpython.rtyper.annlowlevel import cast_instance_to_gcref
- from rpython.rtyper import rclass
- from rpython.jit.metainterp.resoperation import rop
- from rpython.jit.codewriter.effectinfo import EffectInfo
- from rpython.jit.backend.ppc import callbuilder
- from rpython.rlib.rarithmetic import r_uint
- class IntOpAssembler(object):
-
- _mixin_ = True
- def emit_int_add(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- assert not l0.is_imm()
- if l1.is_imm():
- self.mc.addi(res.value, l0.value, l1.value)
- else:
- self.mc.add(res.value, l0.value, l1.value)
- emit_nursery_ptr_increment = emit_int_add
- def emit_int_sub(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- assert not l0.is_imm()
- if l1.is_imm():
- self.mc.subi(res.value, l0.value, l1.value)
- else:
- self.mc.sub(res.value, l0.value, l1.value)
-
- def emit_int_mul(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- assert not l0.is_imm()
- if l1.is_imm():
- self.mc.mulli(res.value, l0.value, l1.value)
- elif IS_PPC_32:
- self.mc.mullw(res.value, l0.value, l1.value)
- else:
- self.mc.mulld(res.value, l0.value, l1.value)
- def emit_uint_mul_high(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- assert not l0.is_imm()
- assert not l1.is_imm()
- self.mc.mulhdu(res.value, l0.value, l1.value)
- def do_emit_int_binary_ovf(self, op, arglocs):
- l0, l1, res = arglocs[0], arglocs[1], arglocs[2]
- self.mc.load_imm(r.SCRATCH, 0)
- self.mc.mtxer(r.SCRATCH.value)
- return (res.value, l0.value, l1.value)
- def emit_int_add_ovf(self, op, arglocs, regalloc):
- self.mc.addox(*self.do_emit_int_binary_ovf(op, arglocs))
- def emit_int_sub_ovf(self, op, arglocs, regalloc):
- self.mc.subox(*self.do_emit_int_binary_ovf(op, arglocs))
- def emit_int_mul_ovf(self, op, arglocs, regalloc):
- if IS_PPC_32:
- self.mc.mullwox(*self.do_emit_int_binary_ovf(op, arglocs))
- else:
- self.mc.mulldox(*self.do_emit_int_binary_ovf(op, arglocs))
- def emit_int_and(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- self.mc.and_(res.value, l0.value, l1.value)
- def emit_int_or(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- self.mc.or_(res.value, l0.value, l1.value)
- def emit_int_xor(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- self.mc.xor(res.value, l0.value, l1.value)
-
- def emit_int_lshift(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- if IS_PPC_32:
- self.mc.slw(res.value, l0.value, l1.value)
- else:
- self.mc.sld(res.value, l0.value, l1.value)
- def emit_int_rshift(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- if IS_PPC_32:
- self.mc.sraw(res.value, l0.value, l1.value)
- else:
- self.mc.srad(res.value, l0.value, l1.value)
- def emit_uint_rshift(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- if IS_PPC_32:
- self.mc.srw(res.value, l0.value, l1.value)
- else:
- self.mc.srd(res.value, l0.value, l1.value)
- emit_int_le = gen_emit_cmp_op(c.LE)
- emit_int_lt = gen_emit_cmp_op(c.LT)
- emit_int_gt = gen_emit_cmp_op(c.GT)
- emit_int_ge = gen_emit_cmp_op(c.GE)
- emit_int_eq = gen_emit_cmp_op(c.EQ)
- emit_int_ne = gen_emit_cmp_op(c.NE)
- emit_uint_lt = gen_emit_cmp_op(c.LT, signed=False)
- emit_uint_le = gen_emit_cmp_op(c.LE, signed=False)
- emit_uint_gt = gen_emit_cmp_op(c.GT, signed=False)
- emit_uint_ge = gen_emit_cmp_op(c.GE, signed=False)
- emit_int_is_zero = emit_int_eq # EQ to 0
- emit_int_is_true = emit_int_ne # NE to 0
- emit_ptr_eq = emit_int_eq
- emit_ptr_ne = emit_int_ne
- emit_instance_ptr_eq = emit_ptr_eq
- emit_instance_ptr_ne = emit_ptr_ne
- def emit_int_neg(self, op, arglocs, regalloc):
- l0, res = arglocs
- self.mc.neg(res.value, l0.value)
- def emit_int_invert(self, op, arglocs, regalloc):
- l0, res = arglocs
- self.mc.not_(res.value, l0.value)
- def emit_int_signext(self, op, arglocs, regalloc):
- l0, res = arglocs
- extend_from = op.getarg(1).getint()
- if extend_from == 1:
- self.mc.extsb(res.value, l0.value)
- elif extend_from == 2:
- self.mc.extsh(res.value, l0.value)
- elif extend_from == 4:
- self.mc.extsw(res.value, l0.value)
- else:
- raise AssertionError(extend_from)
- def emit_int_force_ge_zero(self, op, arglocs, regalloc):
- arg, res = arglocs
- with scratch_reg(self.mc):
- self.mc.nor(r.SCRATCH.value, arg.value, arg.value)
- if IS_PPC_32:
- self.mc.srawi(r.SCRATCH.value, r.SCRATCH.value, 31)
- else:
- # sradi (scratch, scratch, 63)
- self.mc.sradi(r.SCRATCH.value, r.SCRATCH.value, 1, 31)
- self.mc.and_(res.value, arg.value, r.SCRATCH.value)
- class FloatOpAssembler(object):
- _mixin_ = True
- def emit_float_add(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- self.mc.fadd(res.value, l0.value, l1.value)
- def emit_float_sub(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- self.mc.fsub(res.value, l0.value, l1.value)
- def emit_float_mul(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- self.mc.fmul(res.value, l0.value, l1.value)
- def emit_float_truediv(self, op, arglocs, regalloc):
- l0, l1, res = arglocs
- self.mc.fdiv(res.value, l0.value, l1.value)
- def emit_float_neg(self, op, arglocs, regalloc):
- l0, res = arglocs
- self.mc.fneg(res.value, l0.value)
- def emit_float_abs(self, op, arglocs, regalloc):
- l0, res = arglocs
- self.mc.fabs(res.value, l0.value)
- def _emit_math_sqrt(self, op, arglocs, regalloc):
- l0, res = arglocs
- self.mc.fsqrt(res.value, l0.value)
- def _emit_threadlocalref_get(self, op, arglocs, regalloc):
- [resloc] = arglocs
- offset = op.getarg(1).getint() # getarg(0) == 'threadlocalref_get'
- calldescr = op.getdescr()
- size = calldescr.get_result_size()
- sign = calldescr.is_result_signed()
- #
- # This loads the stack location THREADLOCAL_OFS into a
- # register, and then read the word at the given offset.
- # It is only supported if 'translate_support_code' is
- # true; otherwise, the execute_token() was done with a
- # dummy value for the stack location THREADLOCAL_OFS
- #
- assert self.cpu.translate_support_code
- assert resloc.is_reg()
- assert _check_imm_arg(offset)
- self.mc.ld(resloc.value, r.SP.value, THREADLOCAL_ADDR_OFFSET)
- self._load_from_mem(resloc, resloc, imm(offset), imm(size), imm(sign))
- emit_float_le = gen_emit_cmp_op(c.LE, fp=True)
- emit_float_lt = gen_emit_cmp_op(c.LT, fp=True)
- emit_float_gt = gen_emit_cmp_op(c.GT, fp=True)
- emit_float_ge = gen_emit_cmp_op(c.GE, fp=True)
- emit_float_eq = gen_emit_cmp_op(c.EQ, fp=True)
- emit_float_ne = gen_emit_cmp_op(c.NE, fp=True)
- def emit_cast_float_to_int(self, op, arglocs, regalloc):
- l0, temp_loc, res = arglocs
- self.mc.fctidz(temp_loc.value, l0.value)
- self.mc.stfd(temp_loc.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
- self.mc.ld(res.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
- def emit_cast_int_to_float(self, op, arglocs, regalloc):
- l0, res = arglocs
- self.mc.std(l0.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
- self.mc.lfd(res.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
- self.mc.fcfid(res.value, res.value)
- def emit_convert_float_bytes_to_longlong(self, op, arglocs, regalloc):
- l0, res = arglocs
- self.mc.stfd(l0.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
- self.mc.ld(res.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
- def emit_convert_longlong_bytes_to_float(self, op, arglocs, regalloc):
- l0, res = arglocs
- self.mc.std(l0.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
- self.mc.lfd(res.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
- class GuardOpAssembler(object):
- _mixin_ = True
- def _emit_guard(self, op, arglocs, is_guard_not_invalidated=False):
- if is_guard_not_invalidated:
- fcond = c.cond_none
- else:
- fcond = self.guard_success_cc
- self.guard_success_cc = c.cond_none
- assert fcond != c.cond_none
- fcond = c.negate(fcond)
- token = self.build_guard_token(op, arglocs[0].value, arglocs[1:], fcond)
- token.pos_jump_offset = self.mc.currpos()
- assert token.guard_not_invalidated() == is_guard_not_invalidated
- if not is_guard_not_invalidated:
- self.mc.trap() # has to be patched later on
- self.pending_guard_tokens.append(token)
- def build_guard_token(self, op, frame_depth, arglocs, fcond):
- descr = op.getdescr()
- gcmap = allocate_gcmap(self, frame_depth, r.JITFRAME_FIXED_SIZE)
- faildescrindex = self.get_gcref_from_faildescr(descr)
- token = PPCGuardToken(self.cpu, gcmap, descr, op.getfailargs(),
- arglocs, op.getopnum(), frame_depth,
- faildescrindex,
- fcond)
- return token
- def emit_guard_true(self, op, arglocs, regalloc):
- self._emit_guard(op, arglocs)
- def emit_guard_false(self, op, arglocs, regalloc):
- self.guard_success_cc = c.negate(self.guard_success_cc)
- self._emit_guard(op, arglocs)
- def emit_guard_overflow(self, op, arglocs, regalloc):
- self.guard_success_cc = c.SO
- self._emit_guard(op, arglocs)
- def emit_guard_no_overflow(self, op, arglocs, regalloc):
- self.guard_success_cc = c.NS
- self._emit_guard(op, arglocs)
- def emit_guard_value(self, op, arglocs, regalloc):
- l0 = arglocs[0]
- l1 = arglocs[1]
- failargs = arglocs[2:]
- if l0.is_reg():
- if l1.is_imm():
- self.mc.cmp_op(0, l0.value, l1.getint(), imm=True)
- else:
- self.mc.cmp_op(0, l0.value, l1.value)
- elif l0.is_fp_reg():
- assert l1.is_fp_reg()
- self.mc.cmp_op(0, l0.value, l1.value, fp=True)
- self.guard_success_cc = c.EQ
- self._emit_guard(op, failargs)
- emit_guard_nonnull = emit_guard_true
- emit_guard_isnull = emit_guard_false
- def emit_guard_class(self, op, arglocs, regalloc):
- self._cmp_guard_class(op, arglocs, regalloc)
- self.guard_success_cc = c.EQ
- self._emit_guard(op, arglocs[2:])
- def emit_guard_nonnull_class(self, op, arglocs, regalloc):
- self.mc.cmp_op(0, arglocs[0].value, 1, imm=True, signed=False)
- patch_pos = self.mc.currpos()
- self.mc.trap()
- self._cmp_guard_class(op, arglocs, regalloc)
- pmc = OverwritingBuilder(self.mc, patch_pos, 1)
- pmc.blt(self.mc.currpos() - patch_pos)
- pmc.overwrite()
- self.guard_success_cc = c.EQ
- self._emit_guard(op, arglocs[2:])
- def _cmp_guard_class(self, op, locs, regalloc):
- offset = self.cpu.vtable_offset
- if offset is not None:
- # could be one instruction shorter, but don't care because
- # it's not this case that is commonly translated
- self.mc.load(r.SCRATCH.value, locs[0].value, offset)
- self.mc.load_imm(r.SCRATCH2, locs[1].value)
- self.mc.cmp_op(0, r.SCRATCH.value, r.SCRATCH2.value)
- else:
- expected_typeid = (self.cpu.gc_ll_descr
- .get_typeid_from_classptr_if_gcremovetypeptr(locs[1].value))
- self._cmp_guard_gc_type(locs[0], expected_typeid)
- def _read_typeid(self, targetreg, loc_ptr):
- # Note that the typeid half-word is at offset 0 on a little-endian
- # machine; it is at offset 2 or 4 on a big-endian machine.
- assert self.cpu.supports_guard_gc_type
- if IS_PPC_32:
- self.mc.lhz(targetreg.value, loc_ptr.value, 2 * IS_BIG_ENDIAN)
- else:
- self.mc.lwz(targetreg.value, loc_ptr.value, 4 * IS_BIG_ENDIAN)
- def _cmp_guard_gc_type(self, loc_ptr, expected_typeid):
- self._read_typeid(r.SCRATCH2, loc_ptr)
- assert 0 <= expected_typeid <= 0x7fffffff # 4 bytes are always enough
- if expected_typeid > 0xffff: # if 2 bytes are not enough
- self.mc.subis(r.SCRATCH2.value, r.SCRATCH2.value,
- expected_typeid >> 16)
- expected_typeid = expected_typeid & 0xffff
- self.mc.cmp_op(0, r.SCRATCH2.value, expected_typeid,
- imm=True, signed=False)
- def emit_guard_gc_type(self, op, arglocs, regalloc):
- self._cmp_guard_gc_type(arglocs[0], arglocs[1].value)
- self.guard_success_cc = c.EQ
- self._emit_guard(op, arglocs[2:])
- def emit_guard_is_object(self, op, arglocs, regalloc):
- assert self.cpu.supports_guard_gc_type
- loc_object = arglocs[0]
- # idea: read the typeid, fetch one byte of the field 'infobits' from
- # the big typeinfo table, and check the flag 'T_IS_RPYTHON_INSTANCE'.
- base_type_info, shift_by, sizeof_ti = (
- self.cpu.gc_ll_descr.get_translated_info_for_typeinfo())
- infobits_offset, IS_OBJECT_FLAG = (
- self.cpu.gc_ll_descr.get_translated_info_for_guard_is_object())
- self._read_typeid(r.SCRATCH2, loc_object)
- self.mc.load_imm(r.SCRATCH, base_type_info + infobits_offset)
- assert shift_by == 0 # on PPC64; fixme for PPC32
- self.mc.lbzx(r.SCRATCH2.value, r.SCRATCH2.value, r.SCRATCH.value)
- self.mc.andix(r.SCRATCH2.value, r.SCRATCH2.value, IS_OBJECT_FLAG & 0xff)
- self.guard_success_cc = c.NE
- self._emit_guard(op, arglocs[1:])
- def emit_guard_subclass(self, op, arglocs, regalloc):
- assert self.cpu.supports_guard_gc_type
- loc_object = arglocs[0]
- loc_check_against_class = arglocs[1]
- offset = self.cpu.vtable_offset
- offset2 = self.cpu.subclassrange_min_offset
- if offset is not None:
- # read this field to get the vtable pointer
- self.mc.load(r.SCRATCH2.value, loc_object.value, offset)
- # read the vtable's subclassrange_min field
- assert _check_imm_arg(offset2)
- self.mc.ld(r.SCRATCH2.value, r.SCRATCH2.value, offset2)
- else:
- # read the typeid
- self._read_typeid(r.SCRATCH, loc_object)
- # read the vtable's subclassrange_min field, as a single
- # step with the correct offset
- base_type_info, shift_by, sizeof_ti = (
- self.cpu.gc_ll_descr.get_translated_info_for_typeinfo())
- self.mc.load_imm(r.SCRATCH2, base_type_info + sizeof_ti + offset2)
- assert shift_by == 0 # on PPC64; fixme for PPC32
- self.mc.ldx(r.SCRATCH2.value, r.SCRATCH2.value, r.SCRATCH.value)
- # get the two bounds to check against
- vtable_ptr = loc_check_against_class.getint()
- vtable_ptr = rffi.cast(rclass.CLASSTYPE, vtable_ptr)
- check_min = vtable_ptr.subclassrange_min
- check_max = vtable_ptr.subclassrange_max
- assert check_max > check_min
- check_diff = check_max - check_min - 1
- # right now, a full PyPy uses less than 6000 numbers,
- # so we'll assert here that it always fit inside 15 bits
- assert 0 <= check_min <= 0x7fff
- assert 0 <= check_diff <= 0xffff
- # check by doing the unsigned comparison (tmp - min) < (max - min)
- self.mc.subi(r.SCRATCH2.value, r.SCRATCH2.value, check_min)
- self.mc.cmp_op(0, r.SCRATCH2.value, check_diff, imm=True, signed=False)
- # the guard passes if we get a result of "below or equal"
- self.guard_success_cc = c.LE
- self._emit_guard(op, arglocs[2:])
- def emit_guard_not_invalidated(self, op, arglocs, regalloc):
- self._emit_guard(op, arglocs, is_guard_not_invalidated=True)
- def emit_guard_not_forced(self, op, arglocs, regalloc):
- ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
- self.mc.ld(r.SCRATCH.value, r.SPP.value, ofs)
- self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
- self.guard_success_cc = c.EQ
- self._emit_guard(op, arglocs)
- def emit_guard_not_forced_2(self, op, arglocs, regalloc):
- guard_token = self.build_guard_token(op, arglocs[0].value, arglocs[1:],
- c.cond_none)
- self._finish_gcmap = guard_token.gcmap
- self._store_force_index(op)
- self.store_info_on_descr(0, guard_token)
- class MiscOpAssembler(object):
- _mixin_ = True
- def emit_label(self, op, arglocs, regalloc):
- pass
- def emit_increment_debug_counter(self, op, arglocs, regalloc):
- [addr_loc, value_loc] = arglocs
- self.mc.load(value_loc.value, addr_loc.value, 0)
- self.mc.addi(value_loc.value, value_loc.value, 1) # can't use r0!
- self.mc.store(value_loc.value, addr_loc.value, 0)
- def emit_finish(self, op, arglocs, regalloc):
- base_ofs = self.cpu.get_baseofs_of_frame_field()
- if len(arglocs) > 0:
- [return_val] = arglocs
- if op.getarg(0).type == FLOAT:
- self.mc.stfd(return_val.value, r.SPP.value, base_ofs)
- else:
- self.mc.std(return_val.value, r.SPP.value, base_ofs)
- ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
- ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap')
- descr = op.getdescr()
- faildescrindex = self.get_gcref_from_faildescr(descr)
- self._load_from_gc_table(r.r5, r.r5, faildescrindex)
- # gcmap logic here:
- arglist = op.getarglist()
- if arglist and arglist[0].type == REF:
- if self._finish_gcmap:
- # we're returning with a guard_not_forced_2, and
- # additionally we need to say that the result contains
- # a reference too:
- self._finish_gcmap[0] |= r_uint(1)
- gcmap = self._finish_gcmap
- else:
- gcmap = self.gcmap_for_finish
- elif self._finish_gcmap:
- # we're returning with a guard_not_forced_2
- gcmap = self._finish_gcmap
- else:
- gcmap = lltype.nullptr(jitframe.GCMAP)
- self.load_gcmap(self.mc, r.r2, gcmap)
- self.mc.std(r.r5.value, r.SPP.value, ofs)
- self.mc.store(r.r2.value, r.SPP.value, ofs2)
- # exit function
- self._call_footer()
- def emit_jump(self, op, arglocs, regalloc):
- # The backend's logic assumes that the target code is in a piece of
- # assembler that was also called with the same number of arguments,
- # so that the locations [ebp+8..] of the input arguments are valid
- # stack locations both before and after the jump.
- #
- descr = op.getdescr()
- assert isinstance(descr, TargetToken)
- my_nbargs = self.current_clt._debug_nbargs
- target_nbargs = descr._ppc_clt._debug_nbargs
- assert my_nbargs == target_nbargs
- if descr in self.target_tokens_currently_compiling:
- self.mc.b_offset(descr._ll_loop_code)
- else:
- self.mc.b_abs(descr._ll_loop_code)
- def _genop_same_as(self, op, arglocs, regalloc):
- argloc, resloc = arglocs
- if argloc is not resloc:
- self.regalloc_mov(argloc, resloc)
- emit_same_as_i = _genop_same_as
- emit_same_as_r = _genop_same_as
- emit_same_as_f = _genop_same_as
- emit_cast_ptr_to_int = _genop_same_as
- emit_cast_int_to_ptr = _genop_same_as
- def emit_guard_no_exception(self, op, arglocs, regalloc):
- self.mc.load_from_addr(r.SCRATCH2, r.SCRATCH2, self.cpu.pos_exception())
- self.mc.cmp_op(0, r.SCRATCH2.value, 0, imm=True)
- self.guard_success_cc = c.EQ
- self._emit_guard(op, arglocs)
- # If the previous operation was a COND_CALL, overwrite its conditional
- # jump to jump over this GUARD_NO_EXCEPTION as well, if we can
- if self._find_nearby_operation(regalloc,-1).getopnum() == rop.COND_CALL:
- jmp_adr, BI, BO = self.previous_cond_call_jcond
- relative_target = self.mc.currpos() - jmp_adr
- pmc = OverwritingBuilder(self.mc, jmp_adr, 1)
- pmc.bc(BO, BI, relative_target)
- pmc.overwrite()
- def emit_save_exc_class(self, op, arglocs, regalloc):
- [resloc] = arglocs
- diff = self.mc.load_imm_plus(r.r2, self.cpu.pos_exception())
- self.mc.load(resloc.value, r.r2.value, diff)
- def emit_save_exception(self, op, arglocs, regalloc):
- [resloc] = arglocs
- self._store_and_reset_exception(self.mc, resloc)
- def emit_restore_exception(self, op, arglocs, regalloc):
- self._restore_exception(self.mc, arglocs[1], arglocs[0])
- def emit_guard_exception(self, op, arglocs, regalloc):
- loc, resloc = arglocs[:2]
- failargs = arglocs[2:]
- mc = self.mc
- mc.load_imm(r.SCRATCH2, self.cpu.pos_exc_value())
- diff = self.cpu.pos_exception() - self.cpu.pos_exc_value()
- assert _check_imm_arg(diff)
- mc.load(r.SCRATCH.value, r.SCRATCH2.value, diff)
- mc.cmp_op(0, r.SCRATCH.value, loc.value)
- self.guard_success_cc = c.EQ
- self._emit_guard(op, failargs)
- if resloc:
- mc.load(resloc.value, r.SCRATCH2.value, 0)
- mc.load_imm(r.SCRATCH, 0)
- mc.store(r.SCRATCH.value, r.SCRATCH2.value, 0)
- mc.store(r.SCRATCH.value, r.SCRATCH2.value, diff)
- def _load_from_gc_table(self, rD, rT, index):
- # rT is a temporary, may be equal to rD, must be != r0
- addr = self.gc_table_addr + index * WORD
- self.mc.load_from_addr(rD, rT, addr)
- def emit_load_from_gc_table(self, op, arglocs, regalloc):
- index = op.getarg(0).getint()
- [resloc] = arglocs
- assert resloc.is_reg()
- self._load_from_gc_table(resloc, resloc, index)
- class CallOpAssembler(object):
- _mixin_ = True
- def _emit_call(self, op, arglocs, is_call_release_gil=False):
- resloc = arglocs[0]
- func_index = 1 + is_call_release_gil
- adr = arglocs[func_index]
- arglist = arglocs[func_index+1:]
- cb = callbuilder.CallBuilder(self, adr, arglist, resloc)
- descr = op.getdescr()
- assert isinstance(descr, CallDescr)
- cb.argtypes = descr.get_arg_types()
- cb.restype = descr.get_result_type()
- if is_call_release_gil:
- saveerrloc = arglocs[1]
- assert saveerrloc.is_imm()
- cb.emit_call_release_gil(saveerrloc.value)
- else:
- effectinfo = descr.get_extra_info()
- if effectinfo is None or effectinfo.check_can_collect():
- cb.emit()
- else:
- cb.emit_no_collect()
- def _genop_call(self, op, arglocs, regalloc):
- oopspecindex = regalloc.get_oopspecindex(op)
- if oopspecindex == EffectInfo.OS_MATH_SQRT:
- return self._emit_math_sqrt(op, arglocs, regalloc)
- if oopspecindex == EffectInfo.OS_THREADLOCALREF_GET:
- return self._emit_threadlocalref_get(op, arglocs, regalloc)
- self._emit_call(op, arglocs)
- emit_call_i = _genop_call
- emit_call_r = _genop_call
- emit_call_f = _genop_call
- emit_call_n = _genop_call
- def _genop_call_may_force(self, op, arglocs, regalloc):
- self._store_force_index(self._find_nearby_operation(regalloc, +1))
- self._emit_call(op, arglocs)
- emit_call_may_force_i = _genop_call_may_force
- emit_call_may_force_r = _genop_call_may_force
- emit_call_may_force_f = _genop_call_may_force
- emit_call_may_force_n = _genop_call_may_force
- def _genop_call_release_gil(self, op, arglocs, regalloc):
- self._store_force_index(self._find_nearby_operation(regalloc, +1))
- self._emit_call(op, arglocs, is_call_release_gil=True)
- emit_call_release_gil_i = _genop_call_release_gil
- emit_call_release_gil_f = _genop_call_release_gil
- emit_call_release_gil_n = _genop_call_release_gil
- def _store_force_index(self, guard_op):
- assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
- guard_op.getopnum() == rop.GUARD_NOT_FORCED_2)
- faildescr = guard_op.getdescr()
- ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
- faildescrindex = self.get_gcref_from_faildescr(faildescr)
- self._load_from_gc_table(r.r2, r.r2, faildescrindex)
- self.mc.store(r.r2.value, r.SPP.value, ofs)
- def _find_nearby_operation(self, regalloc, delta):
- return regalloc.operations[regalloc.rm.position + delta]
- _COND_CALL_SAVE_REGS = [r.r3, r.r4, r.r5, r.r6, r.r12]
- def emit_cond_call(self, op, arglocs, regalloc):
- fcond = self.guard_success_cc
- self.guard_success_cc = c.cond_none
- assert fcond != c.cond_none
- fcond = c.negate(fcond)
- jmp_adr = self.mc.get_relative_pos()
- self.mc.trap() # patched later to a 'bc'
- self.load_gcmap(self.mc, r.r2, regalloc.get_gcmap())
- # save away r3, r4, r5, r6, r12 into the jitframe
- should_be_saved = [
- reg for reg in self._regalloc.rm.reg_bindings.itervalues()
- if reg in self._COND_CALL_SAVE_REGS]
- self._push_core_regs_to_jitframe(self.mc, should_be_saved)
- #
- # load the 0-to-4 arguments into these registers, with the address of
- # the function to call into r12
- remap_frame_layout(self, arglocs,
- [r.r12, r.r3, r.r4, r.r5, r.r6][:len(arglocs)],
- r.SCRATCH)
- #
- # figure out which variant of cond_call_slowpath to call, and call it
- callee_only = False
- floats = False
- for reg in regalloc.rm.reg_bindings.values():
- if reg not in regalloc.rm.save_around_call_regs:
- break
- else:
- callee_only = True
- if regalloc.fprm.reg_bindings:
- floats = True
- cond_call_adr = self.cond_call_slowpath[floats * 2 + callee_only]
- self.mc.bl_abs(cond_call_adr)
- # restoring the registers saved above, and doing pop_gcmap(), is left
- # to the cond_call_slowpath helper. We never have any result value.
- relative_target = self.mc.currpos() - jmp_adr
- pmc = OverwritingBuilder(self.mc, jmp_adr, 1)
- BI, BO = c.encoding[fcond]
- pmc.bc(BO, BI, relative_target)
- pmc.overwrite()
- # might be overridden again to skip over the following
- # guard_no_exception too
- self.previous_cond_call_jcond = jmp_adr, BI, BO
- class FieldOpAssembler(object):
- _mixin_ = True
- def _write_to_mem(self, value_loc, base_loc, ofs, size_loc):
- assert size_loc.is_imm()
- size = size_loc.value
- if size == 8:
- if value_loc.is_fp_reg():
- if ofs.is_imm():
- self.mc.stfd(value_loc.value, base_loc.value, ofs.value)
- else:
- self.mc.stfdx(value_loc.value, base_loc.value, ofs.value)
- else:
- if ofs.is_imm():
- self.mc.std(value_loc.value, base_loc.value, ofs.value)
- else:
- self.mc.stdx(value_loc.value, base_loc.value, ofs.value)
- elif size == 4:
- if ofs.is_imm():
- self.mc.stw(value_loc.value, base_loc.value, ofs.value)
- else:
- self.mc.stwx(value_loc.value, base_loc.value, ofs.value)
- elif size == 2:
- if ofs.is_imm():
- self.mc.sth(value_loc.value, base_loc.value, ofs.value)
- else:
- self.mc.sthx(value_loc.value, base_loc.value, ofs.value)
- elif size == 1:
- if ofs.is_imm():
- self.mc.stb(value_loc.value, base_loc.value, ofs.value)
- else:
- self.mc.stbx(value_loc.value, base_loc.value, ofs.value)
- else:
- assert 0, "size not supported"
- def emit_gc_store(self, op, arglocs, regalloc):
- value_loc, base_loc, ofs_loc, size_loc = arglocs
- self._write_to_mem(value_loc, base_loc, ofs_loc, size_loc)
- def _apply_offset(self, index_loc, ofs_loc):
- # If offset != 0 then we have to add it here. Note that
- # mc.addi() would not be valid with operand r0.
- assert ofs_loc.is_imm() # must be an immediate...
- assert _check_imm_arg(ofs_loc.getint()) # ...that fits 16 bits
- assert index_loc is not r.SCRATCH2
- # (simplified version of _apply_scale())
- if ofs_loc.value > 0:
- self.mc.addi(r.SCRATCH2.value, index_loc.value, ofs_loc.value)
- index_loc = r.SCRATCH2
- return index_loc
- def emit_gc_store_indexed(self, op, arglocs, regalloc):
- base_loc, index_loc, value_loc, ofs_loc, size_loc = arglocs
- index_loc = self._apply_offset(index_loc, ofs_loc)
- self._write_to_mem(value_loc, base_loc, index_loc, size_loc)
- def _load_from_mem(self, res, base_loc, ofs, size_loc, sign_loc):
- # res, base_loc, ofs, size and signed are all locations
- assert base_loc is not r.SCRATCH
- assert size_loc.is_imm()
- size = size_loc.value
- assert sign_loc.is_imm()
- sign = sign_loc.value
- if size == 8:
- if res.is_fp_reg():
- if ofs.is_imm():
- self.mc.lfd(res.value, base_loc.value, ofs.value)
- else:
- self.mc.lfdx(res.value, base_loc.value, ofs.value)
- else:
- if ofs.is_imm():
- self.mc.ld(res.value, base_loc.value, ofs.value)
- else:
- self.mc.ldx(res.value, base_loc.value, ofs.value)
- elif size == 4:
- if IS_PPC_64 and sign:
- if ofs.is_imm():
- self.mc.lwa(res.value, base_loc.value, ofs.value)
- else:
- self.mc.lwax(res.value, base_loc.value, ofs.value)
- else:
- if ofs.is_imm():
- self.mc.lwz(res.value, base_loc.value, ofs.value)
- else:
- self.mc.lwzx(res.value, base_loc.value, ofs.value)
- elif size == 2:
- if sign:
- if ofs.is_imm():
- self.mc.lha(res.value, base_loc.value, ofs.value)
- else:
- self.mc.lhax(res.value, base_loc.value, ofs.value)
- else:
- if ofs.is_imm():
- self.mc.lhz(res.value, base_loc.value, ofs.value)
- else:
- self.mc.lhzx(res.value, base_loc.value, ofs.value)
- elif size == 1:
- if ofs.is_imm():
- self.mc.lbz(res.value, base_loc.value, ofs.value)
- else:
- self.mc.lbzx(res.value, base_loc.value, ofs.value)
- if sign:
- self.mc.extsb(res.value, res.value)
- else:
- assert 0, "size not supported"
- def _genop_gc_load(self, op, arglocs, regalloc):
- base_loc, ofs_loc, res_loc, size_loc, sign_loc = arglocs
- self._load_from_mem(res_loc, base_loc, ofs_loc, size_loc, sign_loc)
- emit_gc_load_i = _genop_gc_load
- emit_gc_load_r = _genop_gc_load
- emit_gc_load_f = _genop_gc_load
- def _genop_gc_load_indexed(self, op, arglocs, regalloc):
- base_loc, index_loc, res_loc, ofs_loc, size_loc, sign_loc = arglocs
- index_loc = self._apply_offset(index_loc, ofs_loc)
- self._load_from_mem(res_loc, base_loc, index_loc, size_loc, sign_loc)
- emit_gc_load_indexed_i = _genop_gc_load_indexed
- emit_gc_load_indexed_r = _genop_gc_load_indexed
- emit_gc_load_indexed_f = _genop_gc_load_indexed
- SIZE2SCALE = dict([(1<<_i, _i) for _i in range(32)])
- def _multiply_by_constant(self, loc, multiply_by, scratch_loc):
- # XXX should die together with _apply_scale() but can't because
- # of emit_zero_array() and malloc_cond_varsize() at the moment
- assert loc.is_reg()
- if multiply_by == 1:
- return loc
- try:
- scale = self.SIZE2SCALE[multiply_by]
- except KeyError:
- if _check_imm_arg(multiply_by):
- self.mc.mulli(scratch_loc.value, loc.value, multiply_by)
- else:
- self.mc.load_imm(scratch_loc, multiply_by)
- if IS_PPC_32:
- self.mc.mullw(scratch_loc.value, loc.value,
- scratch_loc.value)
- else:
- self.mc.mulld(scratch_loc.value, loc.value,
- scratch_loc.value)
- else:
- self.mc.sldi(scratch_loc.value, loc.value, scale)
- return scratch_loc
- def _copy_in_scratch2(self, loc):
- if loc.is_imm():
- self.mc.li(r.SCRATCH2.value, loc.value)
- elif loc is not r.SCRATCH2:
- self.mc.mr(r.SCRATCH2.value, loc.value)
- return r.SCRATCH2
- # RPythonic workaround for emit_zero_array()
- def eza_stXux(self, a, b, c, itemsize):
- if itemsize & 1: self.mc.stbux(a, b, c)
- elif itemsize & 2: self.mc.sthux(a, b, c)
- elif (itemsize & 4) or IS_PPC_32: self.mc.stwux(a, b, c)
- else: self.mc.stdux(a, b, c)
- def eza_stXu(self, a, b, c, itemsize):
- if itemsize & 1: self.mc.stbu(a, b, c)
- elif itemsize & 2: self.mc.sthu(a, b, c)
- elif (itemsize & 4) or IS_PPC_32: self.mc.stwu(a, b, c)
- else: self.mc.stdu(a, b, c)
- def emit_zero_array(self, op, arglocs, regalloc):
- base_loc, startindex_loc, length_loc, ofs_loc = arglocs
- stepsize = 8
- shift_by = 3
- if IS_PPC_32:
- stepsize = 4
- shift_by = 2
- if length_loc.is_imm():
- if length_loc.value <= 0:
- return # nothing to do
- if startindex_loc.is_imm():
- self.mc.load_imm(r.SCRATCH2, startindex_loc.value)
- startindex_loc = r.SCRATCH2
- if ofs_loc.is_imm():
- self.mc.addi(r.SCRATCH2.value, startindex_loc.value, ofs_loc.value)
- else:
- self.mc.add(r.SCRATCH2.value, startindex_loc.value, ofs_loc.value)
- ofs_loc = r.SCRATCH2
- assert base_loc.is_core_reg()
- self.mc.add(ofs_loc.value, ofs_loc.value, base_loc.value)
- # ofs_loc is now the real address pointing to the first
- # byte to be zeroed
- prev_length_loc = length_loc
- if length_loc.is_imm():
- self.mc.load_imm(r.SCRATCH, length_loc.value)
- length_loc = r.SCRATCH
- self.mc.cmp_op(0, length_loc.value, stepsize, imm=True)
- jlt_location = self.mc.currpos()
- self.mc.trap()
- self.mc.sradi(r.SCRATCH.value, length_loc.value, 0, shift_by)
- self.mc.mtctr(r.SCRATCH.value) # store the length in count register
- self.mc.li(r.SCRATCH.value, 0)
- # NOTE the following assumes that bytes have been passed to both startindex
- # and length. Thus we zero 4/8 bytes in a loop in 1) and every remaining
- # byte is zeroed in another loop in 2)
- self.mc.subi(ofs_loc.value, ofs_loc.value, stepsize)
- # first store of case 1)
- # 1) The next loop copies WORDS into the memory chunk starting at startindex
- # ending at startindex + length. These are bytes
- loop_location = self.mc.currpos()
- self.eza_stXu(r.SCRATCH.value, ofs_loc.value, stepsize, stepsize)
- self.mc.bdnz(loop_location - self.mc.currpos())
- self.mc.addi(ofs_loc.value, ofs_loc.value, stepsize)
- pmc = OverwritingBuilder(self.mc, jlt_location, 1)
- pmc.blt(self.mc.currpos() - jlt_location) # jump if length < WORD
- pmc.overwrite()
- # 2) There might be some bytes left to be written.
- # following scenario: length_loc == 3 bytes, stepsize == 4!
- # need to write the last bytes.
- # move the last bytes to the count register
- length_loc = prev_length_loc
- if length_loc.is_imm():
- self.mc.load_imm(r.SCRATCH, length_loc.value & (stepsize-1))
- else:
- self.mc.andix(r.SCRATCH.value, length_loc.value, (stepsize-1) & 0xff)
- self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
- jle_location = self.mc.currpos()
- self.mc.trap()
- self.mc.mtctr(r.SCRATCH.value)
- self.mc.li(r.SCRATCH.value, 0)
- self.mc.subi(ofs_loc.value, ofs_loc.value, 1)
- loop_location = self.mc.currpos()
- self.eza_stXu(r.SCRATCH.value, ofs_loc.value, 1, 1)
- self.mc.bdnz(loop_location - self.mc.currpos())
- pmc = OverwritingBuilder(self.mc, jle_location, 1)
- pmc.ble(self.mc.currpos() - jle_location) # !GT
- pmc.overwrite()
- class StrOpAssembler(object):
- _mixin_ = True
- def emit_copystrcontent(self, op, arglocs, regalloc):
- self._emit_copycontent(arglocs, is_unicode=False)
- def emit_copyunicodecontent(self, op, arglocs, regalloc):
- self._emit_copycontent(arglocs, is_unicode=True)
- def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale):
- if src_ofs.is_imm():
- value = src_ofs.value << scale
- if value < 32768:
- self.mc.addi(dst.value, src_ptr.value, value)
- else:
- self.mc.load_imm(dst, value)
- self.mc.add(dst.value, src_ptr.value, dst.value)
- elif scale == 0:
- self.mc.add(dst.value, src_ptr.value, src_ofs.value)
- else:
- self.mc.sldi(dst.value, src_ofs.value, scale)
- self.mc.add(dst.value, src_ptr.value, dst.value)
- def _emit_copycontent(self, arglocs, is_unicode):
- [src_ptr_loc, dst_ptr_loc,
- src_ofs_loc, dst_ofs_loc, length_loc] = arglocs
- if is_unicode:
- basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE,
- self.cpu.translate_support_code)
- if itemsize == 2: scale = 1
- elif itemsize == 4: scale = 2
- else: raise AssertionError
- else:
- basesize, itemsize, _ = symbolic.get_array_token(rstr.STR,
- self.cpu.translate_support_code)
- assert itemsize == 1
- basesize -= 1 # for the extra null character
- scale = 0
- self._emit_load_for_copycontent(r.r0, src_ptr_loc, src_ofs_loc, scale)
- self._emit_load_for_copycontent(r.r2, dst_ptr_loc, dst_ofs_loc, scale)
- if length_loc.is_imm():
- length = length_loc.getint()
- self.mc.load_imm(r.r5, length << scale)
- else:
- if scale > 0:
- self.mc.sldi(r.r5.value, length_loc.value, scale)
- elif length_loc is not r.r5:
- self.mc.mr(r.r5.value, length_loc.value)
- self.mc.mr(r.r4.value, r.r0.value)
- self.mc.addi(r.r4.value, r.r4.value, basesize)
- self.mc.addi(r.r3.value, r.r2.value, basesize)
- self.mc.load_imm(self.mc.RAW_CALL_REG, self.memcpy_addr)
- self.mc.raw_call()
- class UnicodeOpAssembler(object):
- _mixin_ = True
- # empty!
- class AllocOpAssembler(object):
- _mixin_ = True
- def emit_check_memory_error(self, op, arglocs, regalloc):
- self.propagate_memoryerror_if_reg_is_null(arglocs[0])
- def emit_call_malloc_nursery(self, op, arglocs, regalloc):
- # registers r.RES and r.RSZ are allocated for this call
- size_box = op.getarg(0)
- assert isinstance(size_box, ConstInt)
- size = size_box.getint()
- gc_ll_descr = self.cpu.gc_ll_descr
- gcmap = regalloc.get_gcmap([r.RES, r.RSZ])
- self.malloc_cond(
- gc_ll_descr.get_nursery_free_addr(),
- gc_ll_descr.get_nursery_top_addr(),
- size, gcmap)
- def emit_call_malloc_nursery_varsize_frame(self, op, arglocs, regalloc):
- # registers r.RES and r.RSZ are allocated for this call
- [sizeloc] = arglocs
- gc_ll_descr = self.cpu.gc_ll_descr
- gcmap = regalloc.get_gcmap([r.RES, r.RSZ])
- self.malloc_cond_varsize_frame(
- gc_ll_descr.get_nursery_free_addr(),
- gc_ll_descr.get_nursery_top_addr(),
- sizeloc, gcmap)
- def emit_call_malloc_nursery_varsize(self, op, arglocs, regalloc):
- # registers r.RES and r.RSZ are allocated for this call
- gc_ll_descr = self.cpu.gc_ll_descr
- if not hasattr(gc_ll_descr, 'max_size_of_young_obj'):
- raise Exception("unreachable code")
- # for boehm, this function should never be called
- [lengthloc] = arglocs
- arraydescr = op.getdescr()
- itemsize = op.getarg(1).getint()
- maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) / itemsize
- gcmap = regalloc.get_gcmap([r.RES, r.RSZ])
- self.malloc_cond_varsize(
- op.getarg(0).getint(),
- gc_ll_descr.get_nursery_free_addr(),
- gc_ll_descr.get_nursery_top_addr(),
- lengthloc, itemsize, maxlength, gcmap, arraydescr)
- def emit_debug_merge_point(self, op, arglocs, regalloc):
- pass
- emit_jit_debug = emit_debug_merge_point
- emit_keepalive = emit_debug_merge_point
- def emit_enter_portal_frame(self, op, arglocs, regalloc):
- self.enter_portal_frame(op)
- def emit_leave_portal_frame(self, op, arglocs, regalloc):
- self.leave_portal_frame(op)
- def _write_barrier_fastpath(self, mc, descr, arglocs, regalloc, array=False,
- is_frame=False):
- # Write code equivalent to write_barrier() in the GC: it checks
- # a flag in the object at arglocs[0], and if set, it calls a
- # helper piece of assembler. The latter saves registers as needed
- # and call the function remember_young_pointer() from the GC.
- if we_are_translated():
- cls = self.cpu.gc_ll_descr.has_write_barrier_class()
- assert cls is not None and isinstance(descr, cls)
- #
- card_marking_mask = 0
- mask = descr.jit_wb_if_flag_singlebyte
- if array and descr.jit_wb_cards_set != 0:
- # assumptions the rest of the function depends on:
- assert (descr.jit_wb_cards_set_byteofs ==
- descr.jit_wb_if_flag_byteofs)
- card_marking_mask = descr.jit_wb_cards_set_singlebyte
- #
- loc_base = arglocs[0]
- assert loc_base.is_reg()
- if is_frame:
- assert loc_base is r.SPP
- assert _check_imm_arg(descr.jit_wb_if_flag_byteofs)
- mc.lbz(r.SCRATCH2.value, loc_base.value, descr.jit_wb_if_flag_byteofs)
- mc.andix(r.SCRATCH.value, r.SCRATCH2.value, mask & 0xFF)
- jz_location = mc.get_relative_pos()
- mc.trap() # patched later with 'beq'
- # for cond_call_gc_wb_array, also add another fast path:
- # if GCFLAG_CARDS_SET, then we can just set one bit and be done
- if card_marking_mask:
- # GCFLAG_CARDS_SET is in the same byte, loaded in r2 already
- mc.andix(r.SCRATCH.value, r.SCRATCH2.value,
- card_marking_mask & 0xFF)
- js_location = mc.get_relative_pos()
- mc.trap() # patched later with 'bne'
- else:
- js_location = 0
- # Write only a CALL to the helper prepared in advance, passing it as
- # argument the address of the structure we are writing into
- # (the first argument to COND_CALL_GC_WB).
- helper_num = (card_marking_mask != 0)
- if is_frame:
- helper_num = 4
- elif regalloc.fprm.reg_bindings:
- helper_num += 2
- if self.wb_slowpath[helper_num] == 0: # tests only
- assert not we_are_translated()
- assert not is_frame
- self.cpu.gc_ll_descr.write_barrier_descr = descr
- self._build_wb_slowpath(card_marking_mask != 0,
- bool(regalloc.fprm.reg_bindings))
- assert self.wb_slowpath[helper_num] != 0
- #
- if not is_frame:
- mc.mr(r.r0.value, loc_base.value) # unusual argument location
- mc.load_imm(r.SCRATCH2, self.wb_slowpath[helper_num])
- mc.mtctr(r.SCRATCH2.value)
- mc.bctrl()
- if card_marking_mask:
- # The helper ends again with a check of the flag in the object.
- # So here, we can simply write again a beq, which will be
- # taken if GCFLAG_CARDS_SET is still not set.
- jns_location = mc.get_relative_pos()
- mc.trap()
- #
- # patch the 'bne' above
- currpos = mc.currpos()
- pmc = OverwritingBuilder(mc, js_location, 1)
- pmc.bne(currpos - js_location)
- pmc.overwrite()
- #
- # case GCFLAG_CARDS_SET: emit a few instructions to do
- # directly the card flag setting
- loc_index = arglocs[1]
- if loc_index.is_reg():
- tmp_loc = arglocs[2]
- n = descr.jit_wb_card_page_shift
- # compute in tmp_loc the byte offset:
- # ~(index >> (card_page_shift + 3)) ('~' is 'not_' below)
- mc.srli_op(tmp_loc.value, loc_index.value, n + 3)
- # compute in r2 the index of the bit inside the byte:
- # (index >> card_page_shift) & 7
- mc.rldicl(r.SCRATCH2.value, loc_index.value, 64 - n, 61)
- mc.li(r.SCRATCH.value, 1)
- mc.not_(tmp_loc.value, tmp_loc.value)
- # set r2 to 1 << r2
- mc.sl_op(r.SCRATCH2.value, r.SCRATCH.value, r.SCRATCH2.value)
- # set this bit inside the byte of interest
- mc.lbzx(r.SCRATCH.value, loc_base.value, tmp_loc.value)
- mc.or_(r.SCRATCH.value, r.SCRATCH.value, r.SCRATCH2.value)
- mc.stbx(r.SCRATCH.value, loc_base.value, tmp_loc.value)
- # done
- else:
- byte_index = loc_index.value >> descr.jit_wb_card_page_shift
- byte_ofs = ~(byte_index >> 3)
- byte_val = 1 << (byte_index & 7)
- assert _check_imm_arg(byte_ofs)
- mc.lbz(r.SCRATCH.value, loc_base.value, byte_ofs)
- mc.ori(r.SCRATCH.value, r.SCRATCH.value, byte_val)
- mc.stb(r.SCRATCH.value, loc_base.value, byte_ofs)
- #
- # patch the beq just above
- currpos = mc.currpos()
- pmc = OverwritingBuilder(mc, jns_location, 1)
- pmc.beq(currpos - jns_location)
- pmc.overwrite()
- # patch the JZ above
- currpos = mc.currpos()
- pmc = OverwritingBuilder(mc, jz_location, 1)
- pmc.beq(currpos - jz_location)
- pmc.overwrite()
- def emit_cond_call_gc_wb(self, op, arglocs, regalloc):
- self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs, regalloc)
- def emit_cond_call_gc_wb_array(self, op, arglocs, regalloc):
- self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs, regalloc,
- array=True)
- class ForceOpAssembler(object):
- _mixin_ = True
-
- def emit_force_token(self, op, arglocs, regalloc):
- res_loc = arglocs[0]
- self.mc.mr(res_loc.value, r.SPP.value)
- def _genop_call_assembler(self, op, arglocs, regalloc):
- if len(arglocs) == 3:
- [result_loc, argloc, vloc] = arglocs
- else:
- [result_loc, argloc] = arglocs
- vloc = imm(0)
- self._store_force_index(self._find_nearby_operation(regalloc, +1))
- # 'result_loc' is either r3 or f1, or None
- self.call_assembler(op, argloc, vloc, result_loc, r.r3)
- emit_call_assembler_i = _genop_call_assembler
- emit_call_assembler_r = _genop_call_assembler
- emit_call_assembler_f = _genop_call_assembler
- emit_call_assembler_n = _genop_call_assembler
- imm = staticmethod(imm) # for call_assembler()
- def _call_assembler_emit_call(self, addr, argloc, _):
- self.regalloc_mov(argloc, r.r3)
- self.mc.ld(r.r4.value, r.SP.value, THREADLOCAL_ADDR_OFFSET)
- cb = callbuilder.CallBuilder(self, addr, [r.r3, r.r4], r.r3)
- cb.emit()
- def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc):
- cb = callbuilder.CallBuilder(self, addr, arglocs, result_loc)
- cb.emit()
- def _call_assembler_check_descr(self, value, tmploc):
- ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
- self.mc.ld(r.r5.value, r.r3.value, ofs)
- if _check_imm_arg(value):
- self.mc.cmp_op(0, r.r5.value, value, imm=True)
- else:
- self.mc.load_imm(r.r4, value)
- self.mc.cmp_op(0, r.r5.value, r.r4.value, imm=False)
- jump_if_eq = self.mc.currpos()
- self.mc.trap() # patched later
- return jump_if_eq
- def _call_assembler_patch_je(self, result_loc, je_location):
- jump_to_done = self.mc.currpos()
- self.mc.trap() # patched later
- #
- currpos = self.mc.currpos()
- pmc = OverwritingBuilder(self.mc, je_location, 1)
- pmc.beq(currpos - je_location)
- pmc.overwrite()
- #
- return jump_to_done
- def _call_assembler_load_result(self, op, result_loc):
- if op.type != VOID:
- # load the return value from the dead frame's value index 0
- kind = op.type
- descr = self.cpu.getarraydescr_for_frame(kind)
- ofs = self.cpu.unpack_arraydescr(descr)
- if kind == FLOAT:
- assert result_loc is r.f1
- self.mc.lfd(r.f1.value, r.r3.value, ofs)
- else:
- assert result_loc is r.r3
- self.mc.ld(r.r3.value, r.r3.value, ofs)
- def _call_assembler_patch_jmp(self, jmp_location):
- currpos = self.mc.currpos()
- pmc = OverwritingBuilder(self.mc, jmp_location, 1)
- pmc.b(currpos - jmp_location)
- pmc.overwrite()
- def redirect_call_assembler(self, oldlooptoken, newlooptoken):
- # some minimal sanity checking
- old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs
- new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs
- assert old_nbargs == new_nbargs
- oldadr = oldlooptoken._ll_function_addr
- target = newlooptoken._ll_function_addr
- # copy frame-info data
- baseofs = self.cpu.get_baseofs_of_frame_field()
- newlooptoken.compiled_loop_token.update_frame_info(
- oldlooptoken.compiled_loop_token, baseofs)
- if IS_PPC_64 and IS_BIG_ENDIAN:
- # PPC64 big-endian trampolines are data so overwrite the code
- # address in the function descriptor at the old address.
- # Copy the whole 3-word trampoline, even though the other
- # words are always zero so far. That's not enough in all
- # cases: if the "target" trampoline is itself redirected
- # later, then the "old" trampoline won't be updated; so
- # we still need the jump below to be safe.
- odata = rffi.cast(rffi.CArrayPtr(lltype.Signed), oldadr)
- tdata = rffi.cast(rffi.CArrayPtr(lltype.Signed), target)
- odata[0] = tdata[0]
- odata[1] = tdata[1]
- odata[2] = tdata[2]
- oldadr += 3 * WORD
- target += 3 * WORD
- # we overwrite the instructions at the old _ll_function_addr
- # to start with a JMP to the new _ll_function_addr.
- mc = PPCBuilder()
- mc.b_abs(target)
- mc.copy_to_raw_memory(oldadr)
- class OpAssembler(IntOpAssembler, GuardOpAssembler,
- MiscOpAssembler, FieldOpAssembler,
- StrOpAssembler, CallOpAssembler,
- UnicodeOpAssembler, ForceOpAssembler,
- AllocOpAssembler, FloatOpAssembler):
- _mixin_ = True
- def nop(self):
- self.mc.ori(0, 0, 0)