/rpython/jit/backend/x86/assembler.py
Python | 2678 lines | 2047 code | 236 blank | 395 comment | 353 complexity | c47643153212b61224566887c76ba55e MD5 | raw file
Possible License(s): Apache-2.0, AGPL-3.0, BSD-3-Clause
Large files files are truncated, but you can click here to view the full file
- import sys
- import os
- import py
- from rpython.jit.backend.llsupport import symbolic, jitframe, rewrite
- from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler, debug_bridge)
- from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
- from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
- from rpython.jit.metainterp.history import (Const, VOID, ConstInt)
- from rpython.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
- from rpython.jit.metainterp.compile import ResumeGuardDescr
- from rpython.rlib.rjitlog import rjitlog as jl
- from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory
- from rpython.rtyper.lltypesystem.lloperation import llop
- from rpython.rtyper.annlowlevel import cast_instance_to_gcref
- from rpython.rtyper import rclass
- from rpython.rlib.jit import AsmInfo
- from rpython.jit.backend.model import CompiledLoopToken
- from rpython.jit.backend.x86.regalloc import (RegAlloc, get_ebp_ofs,
- gpr_reg_mgr_cls, xmm_reg_mgr_cls)
- from rpython.jit.backend.llsupport.regalloc import (get_scale, valid_addressing_size)
- from rpython.jit.backend.x86.arch import (FRAME_FIXED_SIZE, WORD, IS_X86_64,
- JITFRAME_FIXED_SIZE, IS_X86_32,
- PASS_ON_MY_FRAME, THREADLOCAL_OFS,
- DEFAULT_FRAME_BYTES)
- from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx, esp, ebp, esi,
- xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, r8, r9, r10, r11, edi,
- r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
- RegLoc, FrameLoc, ConstFloatLoc, ImmedLoc, AddressLoc, imm,
- imm0, imm1, FloatImmedLoc, RawEbpLoc, RawEspLoc)
- from rpython.rlib.objectmodel import we_are_translated
- from rpython.jit.backend.x86 import rx86, codebuf, callbuilder
- from rpython.jit.backend.x86.vector_ext import VectorAssemblerMixin
- from rpython.jit.backend.x86.callbuilder import follow_jump
- from rpython.jit.metainterp.resoperation import rop
- from rpython.jit.backend.x86 import support
- from rpython.rlib.debug import debug_print, debug_start, debug_stop
- from rpython.rlib import rgc
- from rpython.jit.codewriter.effectinfo import EffectInfo
- from rpython.jit.codewriter import longlong
- from rpython.rlib.rarithmetic import intmask, r_uint
- from rpython.rlib.objectmodel import compute_unique_id
- class Assembler386(BaseAssembler, VectorAssemblerMixin):
- _regalloc = None
- _output_loop_log = None
- _second_tmp_reg = ecx
- DEBUG_FRAME_DEPTH = False
- def __init__(self, cpu, translate_support_code=False):
- BaseAssembler.__init__(self, cpu, translate_support_code)
- self.verbose = False
- self.loop_run_counters = []
- self.float_const_neg_addr = 0
- self.float_const_abs_addr = 0
- self.single_float_const_neg_addr = 0
- self.single_float_const_abs_addr = 0
- self.expand_byte_mask_addr = 0
- self.malloc_slowpath = 0
- self.malloc_slowpath_varsize = 0
- self.wb_slowpath = [0, 0, 0, 0, 0]
- self.setup_failure_recovery()
- self.datablockwrapper = None
- self.stack_check_slowpath = 0
- self.propagate_exception_path = 0
- self.teardown()
- def setup_once(self):
- BaseAssembler.setup_once(self)
- if self.cpu.supports_floats:
- support.ensure_sse2_floats()
- self._build_float_constants()
- def setup(self, looptoken):
- BaseAssembler.setup(self, looptoken)
- assert self.memcpy_addr != 0, "setup_once() not called?"
- self.current_clt = looptoken.compiled_loop_token
- self.pending_guard_tokens = []
- if WORD == 8:
- self.pending_memoryerror_trampoline_from = []
- self.error_trampoline_64 = 0
- self.mc = codebuf.MachineCodeBlockWrapper()
- #assert self.datablockwrapper is None --- but obscure case
- # possible, e.g. getting MemoryError and continuing
- allblocks = self.get_asmmemmgr_blocks(looptoken)
- self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
- allblocks)
- self.target_tokens_currently_compiling = {}
- self.frame_depth_to_patch = []
- def teardown(self):
- self.pending_guard_tokens = None
- if WORD == 8:
- self.pending_memoryerror_trampoline_from = None
- self.mc = None
- self.current_clt = None
- def _build_float_constants(self):
- # 0x80000000000000008000000000000000
- neg_const = '\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x80'
- # 0x7FFFFFFFFFFFFFFF7FFFFFFFFFFFFFFF
- abs_const = '\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F'
- # 0x7FFFFFFF7FFFFFFF7FFFFFFF7FFFFFFF
- single_abs_const = '\xFF\xFF\xFF\x7F\xFF\xFF\xFF\x7F\xFF\xFF\xFF\x7F\xFF\xFF\xFF\x7F'
- # 0x80000000800000008000000080000000
- single_neg_const = '\x00\x00\x00\x80\x00\x00\x00\x80\x00\x00\x00\x80\x00\x00\x00\x80'
- zero_const = '\x00' * 16
- #
- data = neg_const + abs_const + \
- single_neg_const + single_abs_const + \
- zero_const
- datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, [])
- float_constants = datablockwrapper.malloc_aligned(len(data), alignment=16)
- datablockwrapper.done()
- addr = rffi.cast(rffi.CArrayPtr(lltype.Char), float_constants)
- for i in range(len(data)):
- addr[i] = data[i]
- self.float_const_neg_addr = float_constants
- self.float_const_abs_addr = float_constants + 16
- self.single_float_const_neg_addr = float_constants + 32
- self.single_float_const_abs_addr = float_constants + 48
- self.expand_byte_mask_addr = float_constants + 64
- def set_extra_stack_depth(self, mc, value):
- if self._is_asmgcc():
- extra_ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
- mc.MOV_bi(extra_ofs, value)
- def build_frame_realloc_slowpath(self):
- mc = codebuf.MachineCodeBlockWrapper()
- self._push_all_regs_to_frame(mc, [], self.cpu.supports_floats)
- # the caller already did push_gcmap(store=True)
- if IS_X86_64:
- mc.MOV_rs(esi.value, WORD*2)
- # push first arg
- mc.MOV_rr(edi.value, ebp.value)
- align = callbuilder.align_stack_words(1)
- mc.SUB_ri(esp.value, (align - 1) * WORD)
- else:
- align = callbuilder.align_stack_words(3)
- mc.MOV_rs(eax.value, WORD * 2)
- mc.SUB_ri(esp.value, (align - 1) * WORD)
- mc.MOV_sr(WORD, eax.value)
- mc.MOV_sr(0, ebp.value)
- # align
- self.set_extra_stack_depth(mc, align * WORD)
- self._store_and_reset_exception(mc, None, ebx, ecx)
- mc.CALL(imm(self.cpu.realloc_frame))
- mc.MOV_rr(ebp.value, eax.value)
- self._restore_exception(mc, None, ebx, ecx)
- mc.ADD_ri(esp.value, (align - 1) * WORD)
- self.set_extra_stack_depth(mc, 0)
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- if gcrootmap and gcrootmap.is_shadow_stack:
- self._load_shadowstack_top_in_ebx(mc, gcrootmap)
- mc.MOV_mr((ebx.value, -WORD), eax.value)
- self.pop_gcmap(mc) # cancel the push_gcmap(store=True) in the caller
- self._pop_all_regs_from_frame(mc, [], self.cpu.supports_floats)
- mc.RET()
- self._frame_realloc_slowpath = mc.materialize(self.cpu, [])
- def _build_cond_call_slowpath(self, supports_floats, callee_only):
- """ This builds a general call slowpath, for whatever call happens to
- come.
- """
- mc = codebuf.MachineCodeBlockWrapper()
- # copy registers to the frame, with the exception of the
- # 'cond_call_register_arguments' and eax, because these have already
- # been saved by the caller. Note that this is not symmetrical:
- # these 5 registers are saved by the caller but 4 of them are
- # restored here at the end of this function.
- self._push_all_regs_to_frame(mc, cond_call_register_arguments + [eax],
- supports_floats, callee_only)
- # the caller already did push_gcmap(store=True)
- if IS_X86_64:
- mc.SUB(esp, imm(WORD)) # alignment
- self.set_extra_stack_depth(mc, 2 * WORD)
- # the arguments are already in the correct registers
- else:
- # we want space for 4 arguments + call + alignment
- mc.SUB(esp, imm(WORD * 7))
- self.set_extra_stack_depth(mc, 8 * WORD)
- # store the arguments at the correct place in the stack
- for i in range(4):
- mc.MOV_sr(i * WORD, cond_call_register_arguments[i].value)
- mc.CALL(eax)
- self._reload_frame_if_necessary(mc)
- if IS_X86_64:
- mc.ADD(esp, imm(WORD))
- else:
- mc.ADD(esp, imm(WORD * 7))
- self.set_extra_stack_depth(mc, 0)
- self.pop_gcmap(mc) # cancel the push_gcmap(store=True) in the caller
- self._pop_all_regs_from_frame(mc, [eax], supports_floats, callee_only)
- mc.RET()
- return mc.materialize(self.cpu, [])
- def _build_malloc_slowpath(self, kind):
- """ While arriving on slowpath, we have a gcpattern on stack 0.
- The arguments are passed in ecx and edx, as follows:
- kind == 'fixed': nursery_head in ecx and the size in (edx - ecx).
- kind == 'str/unicode': length of the string to allocate in edx.
- kind == 'var': length to allocate in edx, tid in ecx,
- and itemsize in the stack 1 (position esp+WORD).
- This function must preserve all registers apart from ecx and edx.
- """
- assert kind in ['fixed', 'str', 'unicode', 'var']
- mc = codebuf.MachineCodeBlockWrapper()
- self._push_all_regs_to_frame(mc, [ecx, edx], self.cpu.supports_floats)
- # the caller already did push_gcmap(store=True)
- #
- if kind == 'fixed':
- addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
- elif kind == 'str':
- addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_str')
- elif kind == 'unicode':
- addr = self.cpu.gc_ll_descr.get_malloc_fn_addr('malloc_unicode')
- else:
- addr = self.cpu.gc_ll_descr.get_malloc_slowpath_array_addr()
- mc.SUB_ri(esp.value, 16 - WORD) # restore 16-byte alignment
- # magically, the above is enough on X86_32 to reserve 3 stack places
- if kind == 'fixed':
- mc.SUB_rr(edx.value, ecx.value) # compute the size we want
- if IS_X86_32:
- mc.MOV_sr(0, edx.value) # store the length
- if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
- mc.MOV_sr(WORD, ebp.value) # for tests only
- else:
- mc.MOV_rr(edi.value, edx.value) # length argument
- if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
- mc.MOV_rr(esi.value, ebp.value) # for tests only
- elif kind == 'str' or kind == 'unicode':
- if IS_X86_32:
- # stack layout: [---][---][---][ret].. with 3 free stack places
- mc.MOV_sr(0, edx.value) # store the length
- elif IS_X86_64:
- mc.MOV_rr(edi.value, edx.value) # length argument
- else:
- if IS_X86_32:
- # stack layout: [---][---][---][ret][gcmap][itemsize]...
- mc.MOV_sr(WORD * 2, edx.value) # store the length
- mc.MOV_sr(WORD * 1, ecx.value) # store the tid
- mc.MOV_rs(edx.value, WORD * 5) # load the itemsize
- mc.MOV_sr(WORD * 0, edx.value) # store the itemsize
- else:
- # stack layout: [---][ret][gcmap][itemsize]...
- # (already in edx) # length
- mc.MOV_rr(esi.value, ecx.value) # tid
- mc.MOV_rs(edi.value, WORD * 3) # load the itemsize
- self.set_extra_stack_depth(mc, 16)
- mc.CALL(imm(follow_jump(addr)))
- self._reload_frame_if_necessary(mc)
- mc.ADD_ri(esp.value, 16 - WORD)
- self.set_extra_stack_depth(mc, 0)
- #
- mc.TEST_rr(eax.value, eax.value)
- mc.J_il(rx86.Conditions['Z'], 0xfffff) # patched later
- jz_location = mc.get_relative_pos()
- mc.MOV_rr(ecx.value, eax.value)
- #
- nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
- self._pop_all_regs_from_frame(mc, [ecx, edx], self.cpu.supports_floats)
- mc.MOV(edx, heap(nursery_free_adr)) # load this in EDX
- self.pop_gcmap(mc) # push_gcmap(store=True) done by the caller
- mc.RET()
- #
- # If the slowpath malloc failed, we raise a MemoryError that
- # always interrupts the current loop, as a "good enough"
- # approximation. We have to adjust the esp a little, to point to
- # the correct "ret" arg
- offset = mc.get_relative_pos() - jz_location
- mc.overwrite32(jz_location-4, offset)
- # From now on this function is basically "merged" with
- # its caller and so contains DEFAULT_FRAME_BYTES bytes
- # plus my own return address, which we'll ignore next
- mc.force_frame_size(DEFAULT_FRAME_BYTES + WORD)
- mc.ADD_ri(esp.value, WORD)
- mc.JMP(imm(self.propagate_exception_path))
- #
- rawstart = mc.materialize(self.cpu, [])
- return rawstart
- def _build_propagate_exception_path(self):
- self.mc = codebuf.MachineCodeBlockWrapper()
- self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
- #
- # read and reset the current exception
- self._store_and_reset_exception(self.mc, eax)
- ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
- self.mc.MOV_br(ofs, eax.value)
- propagate_exception_descr = rffi.cast(lltype.Signed,
- cast_instance_to_gcref(self.cpu.propagate_exception_descr))
- ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
- self.mc.MOV(RawEbpLoc(ofs), imm(propagate_exception_descr))
- #
- self._call_footer()
- rawstart = self.mc.materialize(self.cpu, [])
- self.propagate_exception_path = rawstart
- self.mc = None
- def _build_stack_check_slowpath(self):
- _, _, slowpathaddr = self.cpu.insert_stack_check()
- if slowpathaddr == 0 or not self.cpu.propagate_exception_descr:
- return # no stack check (for tests, or non-translated)
- #
- # make a regular function that is called from a point near the start
- # of an assembler function (after it adjusts the stack and saves
- # registers).
- mc = codebuf.MachineCodeBlockWrapper()
- #
- if IS_X86_64:
- mc.MOV_rr(edi.value, esp.value)
- mc.SUB_ri(esp.value, WORD) # alignment
- #
- if IS_X86_32:
- mc.SUB_ri(esp.value, 2*WORD) # alignment
- mc.PUSH_r(esp.value)
- #
- # esp is now aligned to a multiple of 16 again
- mc.CALL(imm(follow_jump(slowpathaddr)))
- #
- if IS_X86_32:
- mc.ADD_ri(esp.value, 3*WORD) # alignment
- else:
- mc.ADD_ri(esp.value, WORD)
- #
- mc.MOV(eax, heap(self.cpu.pos_exception()))
- mc.TEST_rr(eax.value, eax.value)
- mc.J_il8(rx86.Conditions['NZ'], 0)
- jnz_location = mc.get_relative_pos()
- #
- mc.RET()
- #
- # patch the JNZ above
- offset = mc.get_relative_pos() - jnz_location
- assert 0 < offset <= 127
- mc.overwrite(jnz_location-1, chr(offset))
- # From now on this function is basically "merged" with
- # its caller and so contains DEFAULT_FRAME_BYTES bytes
- # plus my own return address, which we'll ignore next
- mc.force_frame_size(DEFAULT_FRAME_BYTES + WORD)
- mc.ADD_ri(esp.value, WORD)
- mc.JMP(imm(self.propagate_exception_path))
- #
- rawstart = mc.materialize(self.cpu, [])
- self.stack_check_slowpath = rawstart
- def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
- descr = self.cpu.gc_ll_descr.write_barrier_descr
- exc0, exc1 = None, None
- if descr is None:
- return
- if not withcards:
- func = descr.get_write_barrier_fn(self.cpu)
- else:
- if descr.jit_wb_cards_set == 0:
- return
- func = descr.get_write_barrier_from_array_fn(self.cpu)
- if func == 0:
- return
- #
- # This builds a helper function called from the slow path of
- # write barriers. It must save all registers, and optionally
- # all XMM registers. It takes a single argument just pushed
- # on the stack even on X86_64. It must restore stack alignment
- # accordingly.
- mc = codebuf.MachineCodeBlockWrapper()
- #
- if not for_frame:
- self._push_all_regs_to_frame(mc, [], withfloats, callee_only=True)
- if IS_X86_32:
- # we have 2 extra words on stack for retval and we pass 1 extra
- # arg, so we need to substract 2 words
- mc.SUB_ri(esp.value, 2 * WORD)
- mc.MOV_rs(eax.value, 3 * WORD) # 2 + 1
- mc.MOV_sr(0, eax.value)
- else:
- mc.MOV_rs(edi.value, WORD)
- else:
- # NOTE: don't save registers on the jitframe here!
- # It might override already-saved values that will be
- # restored later...
- #
- # This 'for_frame' version is called after a CALL. It does not
- # need to save many registers: the registers that are anyway
- # destroyed by the call can be ignored (volatiles), and the
- # non-volatile registers won't be changed here. It only needs
- # to save eax, maybe edx, and xmm0 (possible results of the call)
- # and two more non-volatile registers (used to store the RPython
- # exception that occurred in the CALL, if any).
- assert not withcards
- # we have one word to align
- mc.SUB_ri(esp.value, 7 * WORD) # align and reserve some space
- mc.MOV_sr(WORD, eax.value) # save for later
- if self.cpu.supports_floats:
- mc.MOVSD_sx(2 * WORD, xmm0.value) # 32-bit: also 3 * WORD
- if IS_X86_32:
- mc.MOV_sr(4 * WORD, edx.value)
- mc.MOV_sr(0, ebp.value)
- exc0, exc1 = esi, edi
- else:
- mc.MOV_rr(edi.value, ebp.value)
- exc0, exc1 = ebx, r12
- mc.MOV(RawEspLoc(WORD * 5, REF), exc0)
- mc.MOV(RawEspLoc(WORD * 6, INT), exc1)
- # note that it's safe to store the exception in register,
- # since the call to write barrier can't collect
- # (and this is assumed a bit left and right here, like lack
- # of _reload_frame_if_necessary)
- self._store_and_reset_exception(mc, exc0, exc1)
- mc.CALL(imm(func))
- #
- if withcards:
- # A final TEST8 before the RET, for the caller. Careful to
- # not follow this instruction with another one that changes
- # the status of the CPU flags!
- if IS_X86_32:
- mc.MOV_rs(eax.value, 3*WORD)
- else:
- mc.MOV_rs(eax.value, WORD)
- mc.TEST8(addr_add_const(eax, descr.jit_wb_if_flag_byteofs),
- imm(-0x80))
- #
- if not for_frame:
- if IS_X86_32:
- # ADD touches CPU flags
- mc.LEA_rs(esp.value, 2 * WORD)
- self._pop_all_regs_from_frame(mc, [], withfloats, callee_only=True)
- mc.RET16_i(WORD)
- # Note that wb_slowpath[0..3] end with a RET16_i, which must be
- # taken care of in the caller by stack_frame_size_delta(-WORD)
- else:
- if IS_X86_32:
- mc.MOV_rs(edx.value, 4 * WORD)
- if self.cpu.supports_floats:
- mc.MOVSD_xs(xmm0.value, 2 * WORD)
- mc.MOV_rs(eax.value, WORD) # restore
- self._restore_exception(mc, exc0, exc1)
- mc.MOV(exc0, RawEspLoc(WORD * 5, REF))
- mc.MOV(exc1, RawEspLoc(WORD * 6, INT))
- mc.LEA_rs(esp.value, 7 * WORD)
- mc.RET()
- rawstart = mc.materialize(self.cpu, [])
- if for_frame:
- self.wb_slowpath[4] = rawstart
- else:
- self.wb_slowpath[withcards + 2 * withfloats] = rawstart
- @rgc.no_release_gil
- def assemble_loop(self, jd_id, unique_id, logger, loopname, inputargs,
- operations, looptoken, log):
- '''adds the following attributes to looptoken:
- _ll_function_addr (address of the generated func, as an int)
- _ll_loop_code (debug: addr of the start of the ResOps)
- _x86_fullsize (debug: full size including failure)
- '''
- # XXX this function is too longish and contains some code
- # duplication with assemble_bridge(). Also, we should think
- # about not storing on 'self' attributes that will live only
- # for the duration of compiling one loop or a one bridge.
- clt = CompiledLoopToken(self.cpu, looptoken.number)
- looptoken.compiled_loop_token = clt
- clt._debug_nbargs = len(inputargs)
- if not we_are_translated():
- # Arguments should be unique
- assert len(set(inputargs)) == len(inputargs)
- self.setup(looptoken)
- if self.cpu.HAS_CODEMAP:
- self.codemap_builder.enter_portal_frame(jd_id, unique_id,
- self.mc.get_relative_pos())
- frame_info = self.datablockwrapper.malloc_aligned(
- jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
- clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
- clt.frame_info.clear() # for now
- if log:
- number = looptoken.number
- operations = self._inject_debugging_code(looptoken, operations,
- 'e', number)
- regalloc = RegAlloc(self, self.cpu.translate_support_code)
- #
- allgcrefs = []
- operations = regalloc.prepare_loop(inputargs, operations,
- looptoken, allgcrefs)
- self.reserve_gcref_table(allgcrefs)
- functionpos = self.mc.get_relative_pos()
- self._call_header_with_stack_check()
- self._check_frame_depth_debug(self.mc)
- looppos = self.mc.get_relative_pos()
- frame_depth_no_fixed_size = self._assemble(regalloc, inputargs,
- operations)
- self.update_frame_depth(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
- #
- size_excluding_failure_stuff = self.mc.get_relative_pos()
- self.write_pending_failure_recoveries(regalloc)
- full_size = self.mc.get_relative_pos()
- #
- rawstart = self.materialize_loop(looptoken)
- self.patch_gcref_table(looptoken, rawstart)
- self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE,
- rawstart)
- looptoken._ll_loop_code = looppos + rawstart
- debug_start("jit-backend-addr")
- debug_print("Loop %d (%s) has address 0x%x to 0x%x (bootstrap 0x%x)" % (
- looptoken.number, loopname,
- r_uint(rawstart + looppos),
- r_uint(rawstart + size_excluding_failure_stuff),
- r_uint(rawstart + functionpos)))
- debug_print(" gc table: 0x%x" % r_uint(self.gc_table_addr))
- debug_print(" function: 0x%x" % r_uint(rawstart + functionpos))
- debug_print(" resops: 0x%x" % r_uint(rawstart + looppos))
- debug_print(" failures: 0x%x" % r_uint(rawstart +
- size_excluding_failure_stuff))
- debug_print(" end: 0x%x" % r_uint(rawstart + full_size))
- debug_stop("jit-backend-addr")
- self.patch_pending_failure_recoveries(rawstart)
- #
- ops_offset = self.mc.ops_offset
- if not we_are_translated():
- # used only by looptoken.dump() -- useful in tests
- looptoken._x86_rawstart = rawstart
- looptoken._x86_fullsize = full_size
- looptoken._x86_ops_offset = ops_offset
- looptoken._ll_function_addr = rawstart + functionpos
- if logger:
- log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
- log.write(inputargs, operations, ops_offset=ops_offset)
- # legacy
- if logger.logger_ops:
- logger.logger_ops.log_loop(inputargs, operations, 0,
- "rewritten", name=loopname,
- ops_offset=ops_offset)
- self.fixup_target_tokens(rawstart)
- self.teardown()
- # oprofile support
- if self.cpu.profile_agent is not None:
- name = "Loop # %s: %s" % (looptoken.number, loopname)
- self.cpu.profile_agent.native_code_written(name,
- rawstart, full_size)
- return AsmInfo(ops_offset, rawstart + looppos,
- size_excluding_failure_stuff - looppos, rawstart)
- @rgc.no_release_gil
- def assemble_bridge(self, faildescr, inputargs, operations,
- original_loop_token, log, logger):
- if not we_are_translated():
- # Arguments should be unique
- assert len(set(inputargs)) == len(inputargs)
- self.setup(original_loop_token)
- if self.cpu.HAS_CODEMAP:
- self.codemap_builder.inherit_code_from_position(
- faildescr.adr_jump_offset)
- self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
- descr_number = compute_unique_id(faildescr)
- if log:
- operations = self._inject_debugging_code(faildescr, operations,
- 'b', descr_number)
- arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
- regalloc = RegAlloc(self, self.cpu.translate_support_code)
- allgcrefs = []
- operations = regalloc.prepare_bridge(inputargs, arglocs,
- operations,
- allgcrefs,
- self.current_clt.frame_info)
- self.reserve_gcref_table(allgcrefs)
- startpos = self.mc.get_relative_pos()
- self._check_frame_depth(self.mc, regalloc.get_gcmap())
- bridgestartpos = self.mc.get_relative_pos()
- self._update_at_exit(arglocs, inputargs, faildescr, regalloc)
- frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations)
- codeendpos = self.mc.get_relative_pos()
- self.write_pending_failure_recoveries(regalloc)
- fullsize = self.mc.get_relative_pos()
- #
- rawstart = self.materialize_loop(original_loop_token)
- self.patch_gcref_table(original_loop_token, rawstart)
- self.patch_stack_checks(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE,
- rawstart)
- debug_start("jit-backend-addr")
- debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
- (r_uint(descr_number), r_uint(rawstart + startpos),
- r_uint(rawstart + codeendpos)))
- debug_print(" gc table: 0x%x" % r_uint(self.gc_table_addr))
- debug_print(" jump target: 0x%x" % r_uint(rawstart + startpos))
- debug_print(" resops: 0x%x" % r_uint(rawstart + bridgestartpos))
- debug_print(" failures: 0x%x" % r_uint(rawstart + codeendpos))
- debug_print(" end: 0x%x" % r_uint(rawstart + fullsize))
- debug_stop("jit-backend-addr")
- self.patch_pending_failure_recoveries(rawstart)
- # patch the jump from original guard
- self.patch_jump_for_descr(faildescr, rawstart + startpos)
- ops_offset = self.mc.ops_offset
- frame_depth = max(self.current_clt.frame_info.jfi_frame_depth,
- frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
- if logger:
- log = logger.log_trace(jl.MARK_TRACE_ASM, None, self.mc)
- log.write(inputargs, operations, ops_offset)
- # log that the already written bridge is stitched to a descr!
- logger.log_patch_guard(descr_number, rawstart)
- # legacy
- if logger.logger_ops:
- logger.logger_ops.log_bridge(inputargs, operations, "rewritten",
- faildescr, ops_offset=ops_offset)
- self.fixup_target_tokens(rawstart)
- self.update_frame_depth(frame_depth)
- self.teardown()
- # oprofile support
- if self.cpu.profile_agent is not None:
- name = "Bridge # %s" % (descr_number,)
- self.cpu.profile_agent.native_code_written(name,
- rawstart, fullsize)
- return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos, rawstart+bridgestartpos)
- def stitch_bridge(self, faildescr, target):
- """ Stitching means that one can enter a bridge with a complete different register
- allocation. This needs remapping which is done here for both normal registers
- and accumulation registers.
- Why? Because this only generates a very small junk of memory, instead of
- duplicating the loop assembler for each faildescr!
- """
- asminfo, bridge_faildescr, version, looptoken = target
- assert isinstance(bridge_faildescr, ResumeGuardDescr)
- assert isinstance(faildescr, ResumeGuardDescr)
- assert asminfo.rawstart != 0
- self.mc = codebuf.MachineCodeBlockWrapper()
- allblocks = self.get_asmmemmgr_blocks(looptoken)
- self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr,
- allblocks)
- frame_info = self.datablockwrapper.malloc_aligned(
- jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
- self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
- # if accumulation is saved at the guard, we need to update it here!
- guard_locs = self.rebuild_faillocs_from_descr(faildescr, version.inputargs)
- bridge_locs = self.rebuild_faillocs_from_descr(bridge_faildescr, version.inputargs)
- #import pdb; pdb.set_trace()
- guard_accum_info = faildescr.rd_vector_info
- # O(n^2), but usually you only have at most 1 fail argument
- while guard_accum_info:
- bridge_accum_info = bridge_faildescr.rd_vector_info
- while bridge_accum_info:
- if bridge_accum_info.failargs_pos == guard_accum_info.failargs_pos:
- # the mapping might be wrong!
- if bridge_accum_info.location is not guard_accum_info.location:
- self.mov(guard_accum_info.location, bridge_accum_info.location)
- bridge_accum_info = bridge_accum_info.next()
- guard_accum_info = guard_accum_info.next()
- # register mapping is most likely NOT valid, thus remap it in this
- # short piece of assembler
- assert len(guard_locs) == len(bridge_locs)
- for i,gloc in enumerate(guard_locs):
- bloc = bridge_locs[i]
- bstack = bloc.location_code() == 'b'
- gstack = gloc.location_code() == 'b'
- if bstack and gstack:
- pass
- elif gloc is not bloc:
- self.mov(gloc, bloc)
- offset = self.mc.get_relative_pos()
- self.mc.JMP_l(0)
- self.mc.writeimm32(0)
- self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
- rawstart = self.materialize_loop(looptoken)
- # update the jump (above) to the real trace
- self._patch_jump_to(rawstart + offset, asminfo.rawstart)
- # update the guard to jump right to this custom piece of assembler
- self.patch_jump_for_descr(faildescr, rawstart)
- def _patch_jump_to(self, adr_jump_offset, adr_new_target):
- assert adr_jump_offset != 0
- offset = adr_new_target - (adr_jump_offset + 5)
- mc = codebuf.MachineCodeBlockWrapper()
- mc.force_frame_size(DEFAULT_FRAME_BYTES)
- if rx86.fits_in_32bits(offset):
- mc.JMP_l(offset)
- else:
- mc.MOV_ri(X86_64_SCRATCH_REG.value, adr_new_target)
- mc.JMP_r(X86_64_SCRATCH_REG.value)
- mc.copy_to_raw_memory(adr_jump_offset)
- def reserve_gcref_table(self, allgcrefs):
- gcref_table_size = len(allgcrefs) * WORD
- if IS_X86_64:
- # align to a multiple of 16 and reserve space at the beginning
- # of the machine code for the gc table. This lets us write
- # machine code with relative addressing (%rip - constant).
- gcref_table_size = (gcref_table_size + 15) & ~15
- mc = self.mc
- assert mc.get_relative_pos() == 0
- for i in range(gcref_table_size):
- mc.writechar('\x00')
- elif IS_X86_32:
- # allocate the gc table right now. This lets us write
- # machine code with absolute 32-bit addressing.
- self.gc_table_addr = self.datablockwrapper.malloc_aligned(
- gcref_table_size, alignment=WORD)
- #
- self.setup_gcrefs_list(allgcrefs)
- def patch_gcref_table(self, looptoken, rawstart):
- if IS_X86_64:
- # the gc table is at the start of the machine code
- self.gc_table_addr = rawstart
- elif IS_X86_32:
- # the gc table was already allocated by reserve_gcref_table()
- rawstart = self.gc_table_addr
- #
- tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
- self._allgcrefs)
- gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
- gcreftracers.append(tracer) # keepalive
- self.teardown_gcrefs_list()
- def write_pending_failure_recoveries(self, regalloc):
- # for each pending guard, generate the code of the recovery stub
- # at the end of self.mc.
- for tok in self.pending_guard_tokens:
- descr = tok.faildescr
- if descr.loop_version():
- startpos = self.mc.get_relative_pos()
- self.store_info_on_descr(startpos, tok)
- else:
- tok.pos_recovery_stub = self.generate_quick_failure(tok, regalloc)
- if WORD == 8 and len(self.pending_memoryerror_trampoline_from) > 0:
- self.error_trampoline_64 = self.generate_propagate_error_64()
- def patch_pending_failure_recoveries(self, rawstart):
- # after we wrote the assembler to raw memory, set up
- # tok.faildescr.adr_jump_offset to contain the raw address of
- # the 4-byte target field in the JMP/Jcond instruction, and patch
- # the field in question to point (initially) to the recovery stub
- clt = self.current_clt
- for tok in self.pending_guard_tokens:
- addr = rawstart + tok.pos_jump_offset
- tok.faildescr.adr_jump_offset = addr
- descr = tok.faildescr
- if descr.loop_version():
- continue # patch them later
- relative_target = tok.pos_recovery_stub - (tok.pos_jump_offset + 4)
- assert rx86.fits_in_32bits(relative_target)
- #
- if not tok.guard_not_invalidated():
- mc = codebuf.MachineCodeBlockWrapper()
- mc.writeimm32(relative_target)
- mc.copy_to_raw_memory(addr)
- else:
- # GUARD_NOT_INVALIDATED, record an entry in
- # clt.invalidate_positions of the form:
- # (addr-in-the-code-of-the-not-yet-written-jump-target,
- # relative-target-to-use)
- relpos = tok.pos_jump_offset
- clt.invalidate_positions.append((rawstart + relpos,
- relative_target))
- # General idea: Although no code was generated by this
- # guard, the code might be patched with a "JMP rel32" to
- # the guard recovery code. This recovery code is
- # already generated, and looks like the recovery code
- # for any guard, even if at first it has no jump to it.
- # So we may later write 5 bytes overriding the existing
- # instructions; this works because a CALL instruction
- # would also take at least 5 bytes. If it could take
- # less, we would run into the issue that overwriting the
- # 5 bytes here might get a few nonsense bytes at the
- # return address of the following CALL.
- if WORD == 8:
- for pos_after_jz in self.pending_memoryerror_trampoline_from:
- assert self.error_trampoline_64 != 0 # only if non-empty
- mc = codebuf.MachineCodeBlockWrapper()
- mc.writeimm32(self.error_trampoline_64 - pos_after_jz)
- mc.copy_to_raw_memory(rawstart + pos_after_jz - 4)
- def update_frame_depth(self, frame_depth):
- baseofs = self.cpu.get_baseofs_of_frame_field()
- self.current_clt.frame_info.update_frame_depth(baseofs, frame_depth)
- def patch_stack_checks(self, framedepth, rawstart):
- for ofs in self.frame_depth_to_patch:
- self._patch_frame_depth(ofs + rawstart, framedepth)
- def _check_frame_depth(self, mc, gcmap):
- """ check if the frame is of enough depth to follow this bridge.
- Otherwise reallocate the frame in a helper.
- There are other potential solutions
- to that, but this one does not sound too bad.
- """
- descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
- ofs = self.cpu.unpack_fielddescr(descrs.arraydescr.lendescr)
- mc.CMP_bi(ofs, 0xffffff) # force writing 32 bit
- stack_check_cmp_ofs = mc.get_relative_pos() - 4
- mc.J_il8(rx86.Conditions['GE'], 0)
- jg_location = mc.get_relative_pos()
- mc.MOV_si(WORD, 0xffffff) # force writing 32 bit
- ofs2 = mc.get_relative_pos() - 4
- self.push_gcmap(mc, gcmap, store=True)
- mc.CALL(imm(self._frame_realloc_slowpath))
- # patch the JG above
- offset = mc.get_relative_pos() - jg_location
- assert 0 < offset <= 127
- mc.overwrite(jg_location-1, chr(offset))
- self.frame_depth_to_patch.append(stack_check_cmp_ofs)
- self.frame_depth_to_patch.append(ofs2)
- def _check_frame_depth_debug(self, mc):
- """ double check the depth size. It prints the error (and potentially
- segfaults later)
- """
- if not self.DEBUG_FRAME_DEPTH:
- return
- descrs = self.cpu.gc_ll_descr.getframedescrs(self.cpu)
- ofs = self.cpu.unpack_fielddescr(descrs.arraydescr.lendescr)
- mc.CMP_bi(ofs, 0xffffff)
- stack_check_cmp_ofs = mc.get_relative_pos() - 4
- mc.J_il8(rx86.Conditions['GE'], 0)
- jg_location = mc.get_relative_pos()
- mc.MOV_rr(edi.value, ebp.value)
- mc.MOV_ri(esi.value, 0xffffff)
- ofs2 = mc.get_relative_pos() - 4
- mc.CALL(imm(self.cpu.realloc_frame_crash))
- # patch the JG above
- offset = mc.get_relative_pos() - jg_location
- assert 0 < offset <= 127
- mc.overwrite(jg_location-1, chr(offset))
- self.frame_depth_to_patch.append(stack_check_cmp_ofs)
- self.frame_depth_to_patch.append(ofs2)
- def _patch_frame_depth(self, adr, allocated_depth):
- mc = codebuf.MachineCodeBlockWrapper()
- mc.writeimm32(allocated_depth)
- mc.copy_to_raw_memory(adr)
- def materialize_loop(self, looptoken):
- self.datablockwrapper.done() # finish using cpu.asmmemmgr
- self.datablockwrapper = None
- allblocks = self.get_asmmemmgr_blocks(looptoken)
- size = self.mc.get_relative_pos()
- res = self.mc.materialize(self.cpu, allblocks,
- self.cpu.gc_ll_descr.gcrootmap)
- if self.cpu.HAS_CODEMAP:
- self.cpu.codemap.register_codemap(
- self.codemap_builder.get_final_bytecode(res, size))
- return res
- def patch_jump_for_descr(self, faildescr, adr_new_target):
- adr_jump_offset = faildescr.adr_jump_offset
- assert adr_jump_offset != 0
- offset = adr_new_target - (adr_jump_offset + 4)
- # If the new target fits within a rel32 of the jump, just patch
- # that. Otherwise, leave the original rel32 to the recovery stub in
- # place, but clobber the recovery stub with a jump to the real
- # target.
- mc = codebuf.MachineCodeBlockWrapper()
- mc.force_frame_size(DEFAULT_FRAME_BYTES)
- if rx86.fits_in_32bits(offset):
- mc.writeimm32(offset)
- mc.copy_to_raw_memory(adr_jump_offset)
- else:
- # "mov r11, addr; jmp r11" is up to 13 bytes, which fits in there
- # because we always write "mov r11, imm-as-8-bytes; call *r11" in
- # the first place.
- mc.MOV_ri(X86_64_SCRATCH_REG.value, adr_new_target)
- mc.JMP_r(X86_64_SCRATCH_REG.value)
- p = rffi.cast(rffi.INTP, adr_jump_offset)
- adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
- mc.copy_to_raw_memory(adr_target)
- faildescr.adr_jump_offset = 0 # means "patched"
- def fixup_target_tokens(self, rawstart):
- for targettoken in self.target_tokens_currently_compiling:
- targettoken._ll_loop_code += rawstart
- self.target_tokens_currently_compiling = None
- def _assemble(self, regalloc, inputargs, operations):
- self._regalloc = regalloc
- self.guard_success_cc = rx86.cond_none
- regalloc.compute_hint_frame_locations(operations)
- regalloc.walk_operations(inputargs, operations)
- assert self.guard_success_cc == rx86.cond_none
- if we_are_translated() or self.cpu.dont_keepalive_stuff:
- self._regalloc = None # else keep it around for debugging
- frame_depth = regalloc.get_final_frame_depth()
- jump_target_descr = regalloc.jump_target_descr
- if jump_target_descr is not None:
- tgt_depth = jump_target_descr._x86_clt.frame_info.jfi_frame_depth
- target_frame_depth = tgt_depth - JITFRAME_FIXED_SIZE
- frame_depth = max(frame_depth, target_frame_depth)
- return frame_depth
- def _call_header_vmprof(self):
- from rpython.rlib.rvmprof.rvmprof import cintf, VMPROF_JITTED_TAG
- # tloc = address of pypy_threadlocal_s
- if IS_X86_32:
- # Can't use esi here, its old value is not saved yet.
- # But we can use eax and ecx.
- self.mc.MOV_rs(edx.value, THREADLOCAL_OFS)
- tloc = edx
- old = ecx
- else:
- # The thread-local value is already in esi.
- # We should avoid if possible to use ecx or edx because they
- # would be used to pass arguments #3 and #4 (even though, so
- # far, the assembler only receives two arguments).
- tloc = esi
- old = r11
- # eax = address in the stack of a 3-words struct vmprof_stack_s
- self.mc.LEA_rs(eax.value, (FRAME_FIXED_SIZE - 4) * WORD)
- # old = current value of vmprof_tl_stack
- offset = cintf.vmprof_tl_stack.getoffset()
- self.mc.MOV_rm(old.value, (tloc.value, offset))
- # eax->next = old
- self.mc.MOV_mr((eax.value, 0), old.value)
- # eax->value = my esp
- self.mc.MOV_mr((eax.value, WORD), esp.value)
- # eax->kind = VMPROF_JITTED_TAG
- self.mc.MOV_mi((eax.value, WORD * 2), VMPROF_JITTED_TAG)
- # save in vmprof_tl_stack the new eax
- self.mc.MOV_mr((tloc.value, offset), eax.value)
- def _call_footer_vmprof(self):
- from rpython.rlib.rvmprof.rvmprof import cintf
- # edx = address of pypy_threadlocal_s
- self.mc.MOV_rs(edx.value, THREADLOCAL_OFS)
- self.mc.AND_ri(edx.value, ~1)
- # eax = (our local vmprof_tl_stack).next
- self.mc.MOV_rs(eax.value, (FRAME_FIXED_SIZE - 4 + 0) * WORD)
- # save in vmprof_tl_stack the value eax
- offset = cintf.vmprof_tl_stack.getoffset()
- self.mc.MOV_mr((edx.value, offset), eax.value)
- def _call_header(self):
- self.mc.SUB_ri(esp.value, FRAME_FIXED_SIZE * WORD)
- self.mc.MOV_sr(PASS_ON_MY_FRAME * WORD, ebp.value)
- if IS_X86_64:
- self.mc.MOV_sr(THREADLOCAL_OFS, esi.value)
- if self.cpu.translate_support_code:
- self._call_header_vmprof() # on X86_64, this uses esi
- if IS_X86_64:
- self.mc.MOV_rr(ebp.value, edi.value)
- else:
- self.mc.MOV_rs(ebp.value, (FRAME_FIXED_SIZE + 1) * WORD)
- for i, loc in enumerate(self.cpu.CALLEE_SAVE_REGISTERS):
- self.mc.MOV_sr((PASS_ON_MY_FRAME + i + 1) * WORD, loc.value)
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- if gcrootmap and gcrootmap.is_shadow_stack:
- self._call_header_shadowstack(gcrootmap)
- def _call_header_with_stack_check(self):
- self._call_header()
- if self.stack_check_slowpath == 0:
- pass # no stack check (e.g. not translated)
- else:
- endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
- self.mc.MOV(eax, heap(endaddr)) # MOV eax, [start]
- self.mc.SUB(eax, esp) # SUB eax, current
- self.mc.CMP(eax, heap(lengthaddr)) # CMP eax, [length]
- self.mc.J_il8(rx86.Conditions['BE'], 0) # JBE .skip
- jb_location = self.mc.get_relative_pos()
- self.mc.CALL(imm(self.stack_check_slowpath))# CALL slowpath
- # patch the JB above # .skip:
- offset = self.mc.get_relative_pos() - jb_location
- assert 0 < offset <= 127
- self.mc.overwrite(jb_location-1, chr(offset))
- #
- def _call_footer(self):
- # the return value is the jitframe
- if self.cpu.translate_support_code:
- self._call_footer_vmprof()
- self.mc.MOV_rr(eax.value, ebp.value)
- gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- if gcrootmap and gcrootmap.is_shadow_stack:
- self._call_footer_shadowstack(gcrootmap)
- for i in range(len(self.cpu.CALLEE_SAVE_REGISTERS)-1, -1, -1):
- self.mc.MOV_rs(self.cpu.CALLEE_SAVE_REGISTERS[i].value,
- (i + 1 + PASS_ON_MY_FRAME) * WORD)
- self.mc.MOV_rs(ebp.value, PASS_ON_MY_FRAME * WORD)
- self.mc.ADD_ri(esp.value, FRAME_FIXED_SIZE * WORD)
- self.mc.RET()
- def _load_shadowstack_top_in_ebx(self, mc, gcrootmap):
- """Loads the shadowstack top in ebx, and returns an integer
- that gives the address of the stack top. If this integer doesn't
- fit in 32 bits, it will be loaded in r11.
- """
- rst = gcrootmap.get_root_stack_top_addr()
- if rx86.fits_in_32bits(rst):
- mc.MOV_rj(ebx.value, rst) # MOV ebx, [rootstacktop]
- else:
- mc.MOV_ri(X86_64_SCRATCH_REG.value, rst) # MOV r11, rootstacktop
- mc.MOV_rm(ebx.value, (X86_64_SCRATCH_REG.value, 0))
- # MOV ebx, [r11]
- #
- return rst
- def _call_header_shadowstack(self, gcrootmap):
- rst = self._load_shadowstack_top_in_ebx(self.mc, gcrootmap)
- self.mc.MOV_mr((ebx.value, 0), ebp.value) # MOV [ebx], ebp
- self.mc.ADD_ri(ebx.value, WORD)
- if rx86.fits_in_32bits(rst):
- self.mc.MOV_jr(rst, ebx.value) # MOV [rootstacktop], ebx
- else:
- # The integer 'rst' doesn't fit in 32 bits, so we know that
- # _load_shadowstack_top_in_ebx() above loaded it in r11.
- # Reuse it. Be careful not to overwrite r11 in the middle!
- self.mc.MOV_mr((X86_64_SCRATCH_REG.value, 0),
- ebx.value) # MOV [r11], ebx
- def _call_footer_shadowstack(self, gcrootmap):
- rst = gcrootmap.get_root_stack_top_addr()
- if rx86.fits_in_32bits(rst):
- self.mc.SUB_ji8(rst, WORD) # SUB [rootstacktop], WORD
- else:
- self.mc.MOV_ri(ebx.value, rst) # MOV ebx, rootstacktop
- self.mc.SUB_mi8((ebx.value, 0), WORD) # SUB [ebx], WORD
- def redirect_call_assembler(self, oldlooptoken, newlooptoken):
- # some minimal sanity checking
- old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs
- new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs
- assert old_nbargs == new_nbargs
- # we overwrite the instructions at the old _ll_function_addr
- # to start with a JMP to the new _ll_function_addr.
- # Ideally we should rather patch all existing CALLs, but well.
- oldadr = oldlooptoken._ll_function_addr
- target = newlooptoken._ll_function_addr
- # copy frame-info data
- baseofs = self.cpu.get_baseofs_of_frame_field()
- newlooptoken.compiled_loop_token.update_frame_info(
- oldlooptoken.compiled_loop_token, baseofs)
- mc = codebuf.MachineCodeBlockWrapper()
- mc.JMP(imm(follow_jump(target)))
- if WORD == 4: # keep in sync with prepare_loop()
- assert mc.get_relative_pos() == 5
- else:
- ass…
Large files files are truncated, but you can click here to view the full file