/rpython/jit/backend/llsupport/assembler.py
Python | 506 lines | 385 code | 52 blank | 69 comment | 82 complexity | 3b08f96a04c622787ab623a9009e7ea8 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
- from rpython.jit.backend.llsupport import jitframe
- from rpython.jit.backend.llsupport.memcpy import memcpy_fn, memset_fn
- from rpython.jit.backend.llsupport.symbolic import WORD
- from rpython.jit.backend.llsupport.codemap import CodemapBuilder
- from rpython.jit.metainterp.history import (INT, REF, FLOAT, JitCellToken,
- ConstInt, AbstractFailDescr, VECTOR)
- from rpython.jit.metainterp.resoperation import ResOperation, rop
- from rpython.rlib import rgc
- from rpython.rlib.debug import (debug_start, debug_stop, have_debug_prints_for,
- debug_print)
- from rpython.rlib.rarithmetic import r_uint
- from rpython.rlib.objectmodel import specialize, compute_unique_id
- from rpython.rtyper.annlowlevel import cast_instance_to_gcref, llhelper
- from rpython.rtyper.lltypesystem import rffi, lltype
- from rpython.rlib.rjitlog import rjitlog as jl
- DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER',
- # 'b'ridge, 'l'abel or # 'e'ntry point
- ('i', lltype.Signed), # first field, at offset 0
- ('type', lltype.Char),
- ('number', lltype.Signed)
- )
- class GuardToken(object):
- def __init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
- guard_opnum, frame_depth, faildescrindex):
- assert isinstance(faildescr, AbstractFailDescr)
- self.cpu = cpu
- self.faildescr = faildescr
- self.faildescrindex = faildescrindex
- self.failargs = failargs
- self.fail_locs = fail_locs
- self.gcmap = self.compute_gcmap(gcmap, failargs,
- fail_locs, frame_depth)
- self.guard_opnum = guard_opnum
- def guard_not_invalidated(self):
- return self.guard_opnum == rop.GUARD_NOT_INVALIDATED
- def must_save_exception(self):
- guard_opnum = self.guard_opnum
- return (guard_opnum == rop.GUARD_EXCEPTION or
- guard_opnum == rop.GUARD_NO_EXCEPTION or
- guard_opnum == rop.GUARD_NOT_FORCED)
- def compute_gcmap(self, gcmap, failargs, fail_locs, frame_depth):
- # note that regalloc has a very similar compute, but
- # one that does iteration over all bindings, so slightly different,
- # eh
- input_i = 0
- for i in range(len(failargs)):
- arg = failargs[i]
- if arg is None:
- continue
- loc = fail_locs[input_i]
- input_i += 1
- if arg.type == REF:
- loc = fail_locs[i]
- if loc.is_core_reg():
- val = self.cpu.all_reg_indexes[loc.value]
- else:
- val = loc.get_position() + self.cpu.JITFRAME_FIXED_SIZE
- gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8))
- return gcmap
- class BaseAssembler(object):
- """ Base class for Assembler generator in real backends
- """
- def __init__(self, cpu, translate_support_code=False):
- self.cpu = cpu
- self.memcpy_addr = 0
- self.memset_addr = 0
- self.rtyper = cpu.rtyper
- # do not rely on this attribute if you test for jitlog
- self._debug = False
- self.loop_run_counters = []
- def stitch_bridge(self, faildescr, target):
- raise NotImplementedError
- def setup_once(self):
- # the address of the function called by 'new'
- gc_ll_descr = self.cpu.gc_ll_descr
- gc_ll_descr.initialize()
- if hasattr(gc_ll_descr, 'minimal_size_in_nursery'):
- self.gc_minimal_size_in_nursery = gc_ll_descr.minimal_size_in_nursery
- else:
- self.gc_minimal_size_in_nursery = 0
- if hasattr(gc_ll_descr, 'gcheaderbuilder'):
- self.gc_size_of_header = gc_ll_descr.gcheaderbuilder.size_gc_header
- else:
- self.gc_size_of_header = WORD # for tests
- self.memcpy_addr = rffi.cast(lltype.Signed, memcpy_fn)
- self.memset_addr = rffi.cast(lltype.Signed, memset_fn)
- self._build_failure_recovery(False, withfloats=False)
- self._build_failure_recovery(True, withfloats=False)
- self._build_wb_slowpath(False)
- self._build_wb_slowpath(True)
- self._build_wb_slowpath(False, for_frame=True)
- # only one of those
- self.build_frame_realloc_slowpath()
- if self.cpu.supports_floats:
- self._build_failure_recovery(False, withfloats=True)
- self._build_failure_recovery(True, withfloats=True)
- self._build_wb_slowpath(False, withfloats=True)
- self._build_wb_slowpath(True, withfloats=True)
- self._build_propagate_exception_path()
- if gc_ll_descr.get_malloc_slowpath_addr is not None:
- # generate few slowpaths for various cases
- self.malloc_slowpath = self._build_malloc_slowpath(kind='fixed')
- self.malloc_slowpath_varsize = self._build_malloc_slowpath(
- kind='var')
- if hasattr(gc_ll_descr, 'malloc_str'):
- self.malloc_slowpath_str = self._build_malloc_slowpath(kind='str')
- else:
- self.malloc_slowpath_str = None
- if hasattr(gc_ll_descr, 'malloc_unicode'):
- self.malloc_slowpath_unicode = self._build_malloc_slowpath(
- kind='unicode')
- else:
- self.malloc_slowpath_unicode = None
- lst = [0, 0, 0, 0]
- lst[0] = self._build_cond_call_slowpath(False, False)
- lst[1] = self._build_cond_call_slowpath(False, True)
- if self.cpu.supports_floats:
- lst[2] = self._build_cond_call_slowpath(True, False)
- lst[3] = self._build_cond_call_slowpath(True, True)
- self.cond_call_slowpath = lst
- self._build_stack_check_slowpath()
- self._build_release_gil(gc_ll_descr.gcrootmap)
- # do not rely on the attribute _debug for jitlog
- if not self._debug:
- # if self._debug is already set it means that someone called
- # set_debug by hand before initializing the assembler. Leave it
- # as it is
- should_debug = have_debug_prints_for('jit-backend-counts')
- self.set_debug(should_debug)
- # when finishing, we only have one value at [0], the rest dies
- self.gcmap_for_finish = lltype.malloc(jitframe.GCMAP, 1,
- flavor='raw',
- track_allocation=False)
- self.gcmap_for_finish[0] = r_uint(1)
- def setup(self, looptoken):
- if self.cpu.HAS_CODEMAP:
- self.codemap_builder = CodemapBuilder()
- self._finish_gcmap = lltype.nullptr(jitframe.GCMAP)
- def setup_gcrefs_list(self, allgcrefs):
- self._allgcrefs = allgcrefs
- self._allgcrefs_faildescr_next = 0
- def teardown_gcrefs_list(self):
- self._allgcrefs = None
- def get_gcref_from_faildescr(self, descr):
- """This assumes that it is called in order for all faildescrs."""
- search = cast_instance_to_gcref(descr)
- while not _safe_eq(
- self._allgcrefs[self._allgcrefs_faildescr_next], search):
- self._allgcrefs_faildescr_next += 1
- assert self._allgcrefs_faildescr_next < len(self._allgcrefs)
- return self._allgcrefs_faildescr_next
- def get_asmmemmgr_blocks(self, looptoken):
- clt = looptoken.compiled_loop_token
- if clt.asmmemmgr_blocks is None:
- clt.asmmemmgr_blocks = []
- return clt.asmmemmgr_blocks
- def get_asmmemmgr_gcreftracers(self, looptoken):
- clt = looptoken.compiled_loop_token
- if clt.asmmemmgr_gcreftracers is None:
- clt.asmmemmgr_gcreftracers = []
- return clt.asmmemmgr_gcreftracers
- def set_debug(self, v):
- r = self._debug
- self._debug = v
- return r
- def rebuild_faillocs_from_descr(self, descr, inputargs):
- locs = []
- GPR_REGS = len(self.cpu.gen_regs)
- XMM_REGS = len(self.cpu.float_regs)
- input_i = 0
- if self.cpu.IS_64_BIT:
- coeff = 1
- else:
- coeff = 2
- for pos in descr.rd_locs:
- pos = rffi.cast(lltype.Signed, pos)
- if pos == 0xFFFF:
- continue
- elif pos < GPR_REGS:
- locs.append(self.cpu.gen_regs[pos])
- elif pos < GPR_REGS + XMM_REGS * coeff:
- pos = (pos - GPR_REGS) // coeff
- locs.append(self.cpu.float_regs[pos])
- else:
- i = pos - self.cpu.JITFRAME_FIXED_SIZE
- assert i >= 0
- tp = inputargs[input_i].type
- locs.append(self.new_stack_loc(i, tp))
- input_i += 1
- return locs
- _previous_rd_locs = []
- def store_info_on_descr(self, startspos, guardtok):
- withfloats = False
- for box in guardtok.failargs:
- if box is not None and \
- (box.type == FLOAT or box.type == VECTOR):
- withfloats = True
- break
- exc = guardtok.must_save_exception()
- target = self.failure_recovery_code[exc + 2 * withfloats]
- faildescrindex = guardtok.faildescrindex
- base_ofs = self.cpu.get_baseofs_of_frame_field()
- #
- # in practice, about 2/3rd of 'positions' lists that we build are
- # exactly the same as the previous one, so share the lists to
- # conserve memory
- if len(self._previous_rd_locs) == len(guardtok.fail_locs):
- positions = self._previous_rd_locs # tentatively
- shared = True
- else:
- positions = [rffi.cast(rffi.USHORT, 0)] * len(guardtok.fail_locs)
- shared = False
- #
- for i, loc in enumerate(guardtok.fail_locs):
- if loc is None:
- position = 0xFFFF
- elif loc.is_stack():
- assert (loc.value & (WORD - 1)) == 0, \
- "store_info_on_descr: misaligned"
- position = (loc.value - base_ofs) // WORD
- assert 0 < position < 0xFFFF, "store_info_on_descr: overflow!"
- else:
- assert loc is not self.cpu.frame_reg # for now
- if self.cpu.IS_64_BIT:
- coeff = 1
- else:
- coeff = 2
- if loc.is_float():
- position = len(self.cpu.gen_regs) + loc.value * coeff
- else:
- position = self.cpu.all_reg_indexes[loc.value]
- if shared:
- if (rffi.cast(lltype.Signed, self._previous_rd_locs[i]) ==
- rffi.cast(lltype.Signed, position)):
- continue # still equal
- positions = positions[:]
- shared = False
- positions[i] = rffi.cast(rffi.USHORT, position)
- self._previous_rd_locs = positions
- # write down the positions of locs
- guardtok.faildescr.rd_locs = positions
- return faildescrindex, target
- def enter_portal_frame(self, op):
- if self.cpu.HAS_CODEMAP:
- self.codemap_builder.enter_portal_frame(op.getarg(0).getint(),
- op.getarg(1).getint(),
- self.mc.get_relative_pos())
- def leave_portal_frame(self, op):
- if self.cpu.HAS_CODEMAP:
- self.codemap_builder.leave_portal_frame(op.getarg(0).getint(),
- self.mc.get_relative_pos())
- def call_assembler(self, op, argloc, vloc, result_loc, tmploc):
- """
- * argloc: location of the frame argument that we're passing to
- the called assembler (this is the first return value
- of locs_for_call_assembler())
- * vloc: location of the virtualizable (not in a register;
- this is the optional second return value of
- locs_for_call_assembler(), or imm(0) if none returned)
- * result_loc: location of op.result (which is not be
- confused with the next one)
- * tmploc: location where the actual call to the other piece
- of assembler will return its jitframe result
- (which is always a REF), before the helper may be
- called
- """
- descr = op.getdescr()
- assert isinstance(descr, JitCellToken)
- #
- # Write a call to the target assembler
- # we need to allocate the frame, keep in sync with runner's
- # execute_token
- jd = descr.outermost_jitdriver_sd
- self._call_assembler_emit_call(self.imm(descr._ll_function_addr),
- argloc, tmploc)
- if op.type == 'v':
- assert result_loc is None
- value = self.cpu.done_with_this_frame_descr_void
- else:
- kind = op.type
- if kind == INT:
- assert result_loc is tmploc
- value = self.cpu.done_with_this_frame_descr_int
- elif kind == REF:
- assert result_loc is tmploc
- value = self.cpu.done_with_this_frame_descr_ref
- elif kind == FLOAT:
- value = self.cpu.done_with_this_frame_descr_float
- else:
- raise AssertionError(kind)
- gcref = cast_instance_to_gcref(value)
- if gcref:
- rgc._make_sure_does_not_move(gcref) # but should be prebuilt
- value = rffi.cast(lltype.Signed, gcref)
- je_location = self._call_assembler_check_descr(value, tmploc)
- #
- # Path A: use assembler_helper_adr
- assert jd is not None
- asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr)
- self._call_assembler_emit_helper_call(self.imm(asm_helper_adr),
- [tmploc, vloc], result_loc)
- jmp_location = self._call_assembler_patch_je(result_loc, je_location)
- # Path B: fast path. Must load the return value
- #
- self._call_assembler_load_result(op, result_loc)
- #
- # Here we join Path A and Path B again
- self._call_assembler_patch_jmp(jmp_location)
- def get_loop_run_counters(self, index):
- return self.loop_run_counters[index]
- @specialize.argtype(1)
- def _inject_debugging_code(self, looptoken, operations, tp, number):
- if self._debug or jl.jitlog_enabled():
- newoperations = []
- self._append_debugging_code(newoperations, tp, number, None)
- for op in operations:
- newoperations.append(op)
- if op.getopnum() == rop.LABEL:
- self._append_debugging_code(newoperations, 'l', number,
- op.getdescr())
- operations = newoperations
- return operations
- def _append_debugging_code(self, operations, tp, number, token):
- counter = self._register_counter(tp, number, token)
- c_adr = ConstInt(rffi.cast(lltype.Signed, counter))
- operations.append(
- ResOperation(rop.INCREMENT_DEBUG_COUNTER, [c_adr]))
- def _register_counter(self, tp, number, token):
- # XXX the numbers here are ALMOST unique, but not quite, use a counter
- # or something
- struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
- track_allocation=False)
- struct.i = 0
- struct.type = tp
- if tp == 'b' or tp == 'e':
- struct.number = number
- else:
- assert token
- struct.number = compute_unique_id(token)
- # YYY very minor leak -- we need the counters to stay alive
- # forever, just because we want to report them at the end
- # of the process
- self.loop_run_counters.append(struct)
- return struct
- def finish_once(self):
- if self._debug:
- # TODO remove the old logging system when jitlog is complete
- debug_start('jit-backend-counts')
- length = len(self.loop_run_counters)
- for i in range(length):
- struct = self.loop_run_counters[i]
- if struct.type == 'l':
- prefix = 'TargetToken(%d)' % struct.number
- else:
- num = struct.number
- if num == -1:
- num = '-1'
- else:
- num = str(r_uint(num))
- if struct.type == 'b':
- prefix = 'bridge %s' % num
- else:
- prefix = 'entry %s' % num
- debug_print(prefix + ':' + str(struct.i))
- debug_stop('jit-backend-counts')
- self.flush_trace_counters()
- def flush_trace_counters(self):
- # this is always called, the jitlog knows if it is enabled
- length = len(self.loop_run_counters)
- for i in range(length):
- struct = self.loop_run_counters[i]
- # only log if it has been executed
- if struct.i > 0:
- jl._log_jit_counter(struct)
- # reset the counter, flush in a later point in time will
- # add up the counters!
- struct.i = 0
- # here would be the point to free some counters
- # see YYY comment above! but first we should run this every once in a while
- # not just when jitlog_disable is called
- @staticmethod
- @rgc.no_collect
- def _reacquire_gil_asmgcc(css, old_rpy_fastgil):
- # Before doing an external call, 'rpy_fastgil' is initialized to
- # be equal to css. This function is called if we find out after
- # the call that it is no longer equal to css. See description
- # in translator/c/src/thread_pthread.c.
- # XXX some duplicated logic here, but note that rgil.acquire()
- # does more than just RPyGilAcquire()
- if old_rpy_fastgil == 0:
- # this case occurs if some other thread stole the GIL but
- # released it again. What occurred here is that we changed
- # 'rpy_fastgil' from 0 to 1, thus successfully reaquiring the
- # GIL.
- pass
- elif old_rpy_fastgil == 1:
- # 'rpy_fastgil' was (and still is) locked by someone else.
- # We need to wait for the regular mutex.
- from rpython.rlib import rgil
- rgil.acquire()
- else:
- # stole the GIL from a different thread that is also
- # currently in an external call from the jit. Attach
- # the 'old_rpy_fastgil' into the chained list.
- from rpython.memory.gctransform import asmgcroot
- oth = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, old_rpy_fastgil)
- next = asmgcroot.gcrootanchor.next
- oth.next = next
- oth.prev = asmgcroot.gcrootanchor
- asmgcroot.gcrootanchor.next = oth
- next.prev = oth
- # similar to trackgcroot.py:pypy_asm_stackwalk, second part:
- # detach the 'css' from the chained list
- from rpython.memory.gctransform import asmgcroot
- old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css)
- prev = old.prev
- next = old.next
- prev.next = next
- next.prev = prev
- @staticmethod
- @rgc.no_collect
- def _reacquire_gil_shadowstack():
- # Simplified version of _reacquire_gil_asmgcc(): in shadowstack mode,
- # 'rpy_fastgil' contains only zero or non-zero, and this is only
- # called when the old value stored in 'rpy_fastgil' was non-zero
- # (i.e. still locked, must wait with the regular mutex)
- from rpython.rlib import rgil
- rgil.acquire()
- _REACQGIL0_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
- _REACQGIL2_FUNC = lltype.Ptr(lltype.FuncType([rffi.CCHARP, lltype.Signed],
- lltype.Void))
- def _build_release_gil(self, gcrootmap):
- if gcrootmap is None or gcrootmap.is_shadow_stack:
- reacqgil_func = llhelper(self._REACQGIL0_FUNC,
- self._reacquire_gil_shadowstack)
- self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
- else:
- reacqgil_func = llhelper(self._REACQGIL2_FUNC,
- self._reacquire_gil_asmgcc)
- self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func)
- def _is_asmgcc(self):
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- return bool(gcrootmap) and not gcrootmap.is_shadow_stack
- def debug_bridge(descr_number, rawstart, codeendpos):
- debug_start("jit-backend-addr")
- debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
- (r_uint(descr_number), r_uint(rawstart),
- r_uint(rawstart + codeendpos)))
- debug_stop("jit-backend-addr")
- def _safe_eq(x, y):
- try:
- return x == y
- except AttributeError: # minor mess
- return False