/rpython/memory/gctransform/asmgcroot.py
Python | 866 lines | 648 code | 64 blank | 154 comment | 82 complexity | efaed6df5949b9b2b05391c7e774c360 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
- from rpython.flowspace.model import (Constant, Variable, Block, Link,
- copygraph, SpaceOperation, checkgraph)
- from rpython.rlib.debug import ll_assert
- from rpython.rlib.nonconst import NonConstant
- from rpython.rlib import rgil
- from rpython.rtyper.annlowlevel import llhelper
- from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
- from rpython.rtyper.lltypesystem.lloperation import llop
- from rpython.memory.gctransform.framework import (
- BaseFrameworkGCTransformer, BaseRootWalker)
- from rpython.rtyper.llannotation import SomeAddress
- from rpython.rtyper.rbuiltin import gen_cast
- from rpython.translator.unsimplify import varoftype
- from rpython.translator.tool.cbuild import ExternalCompilationInfo
- import sys
- #
- # This transformer avoids the use of a shadow stack in a completely
- # platform-specific way, by directing genc to insert asm() special
- # instructions in the C source, which are recognized by GCC.
- # The .s file produced by GCC is then parsed by trackgcroot.py.
- #
- IS_64_BITS = sys.maxint > 2147483647
- class AsmGcRootFrameworkGCTransformer(BaseFrameworkGCTransformer):
- _asmgcc_save_restore_arguments = None
- _seen_gctransformer_hint_close_stack = None
- def push_roots(self, hop, keep_current_args=False):
- livevars = self.get_livevars_for_roots(hop, keep_current_args)
- self.num_pushs += len(livevars)
- return livevars
- def pop_roots(self, hop, livevars):
- if not livevars:
- return
- # mark the values as gc roots
- for var in livevars:
- v_adr = gen_cast(hop.llops, llmemory.Address, var)
- v_newaddr = hop.genop("direct_call", [c_asm_gcroot, v_adr],
- resulttype=llmemory.Address)
- hop.genop("gc_reload_possibly_moved", [v_newaddr, var])
- def build_root_walker(self):
- return AsmStackRootWalker(self)
- def mark_call_cannotcollect(self, hop, name):
- hop.genop("direct_call", [c_asm_nocollect, name])
- def gct_direct_call(self, hop):
- fnptr = hop.spaceop.args[0].value
- try:
- close_stack = fnptr._obj._callable._gctransformer_hint_close_stack_
- except AttributeError:
- close_stack = False
- if close_stack:
- self.handle_call_with_close_stack(hop)
- else:
- BaseFrameworkGCTransformer.gct_direct_call(self, hop)
- def handle_call_with_close_stack(self, hop):
- fnptr = hop.spaceop.args[0].value
- if self._seen_gctransformer_hint_close_stack is None:
- self._seen_gctransformer_hint_close_stack = {}
- if fnptr._obj.graph not in self._seen_gctransformer_hint_close_stack:
- self._transform_hint_close_stack(fnptr)
- self._seen_gctransformer_hint_close_stack[fnptr._obj.graph] = True
- #
- livevars = self.push_roots(hop)
- self.default(hop)
- self.pop_roots(hop, livevars)
- def _transform_hint_close_stack(self, fnptr):
- # We cannot easily pass variable amount of arguments of the call
- # across the call to the pypy_asm_stackwalk helper. So we store
- # them away and restore them. More precisely, we need to
- # replace 'graph' with code that saves the arguments, and make
- # a new graph that starts with restoring the arguments.
- if self._asmgcc_save_restore_arguments is None:
- self._asmgcc_save_restore_arguments = {}
- sradict = self._asmgcc_save_restore_arguments
- sra = [] # list of pointers to raw-malloced containers for args
- seen = {}
- FUNC1 = lltype.typeOf(fnptr).TO
- for TYPE in FUNC1.ARGS:
- if isinstance(TYPE, lltype.Ptr):
- TYPE = llmemory.Address
- num = seen.get(TYPE, 0)
- seen[TYPE] = num + 1
- key = (TYPE, num)
- if key not in sradict:
- CONTAINER = lltype.FixedSizeArray(TYPE, 1)
- p = lltype.malloc(CONTAINER, flavor='raw', zero=True,
- immortal=True)
- sradict[key] = Constant(p, lltype.Ptr(CONTAINER))
- sra.append(sradict[key])
- #
- # make a copy of the graph that will reload the values
- graph = fnptr._obj.graph
- graph2 = copygraph(graph)
- #
- # edit the original graph to only store the value of the arguments
- block = Block(graph.startblock.inputargs)
- c_item0 = Constant('item0', lltype.Void)
- assert len(block.inputargs) == len(sra)
- for v_arg, c_p in zip(block.inputargs, sra):
- if isinstance(v_arg.concretetype, lltype.Ptr):
- v_adr = varoftype(llmemory.Address)
- block.operations.append(
- SpaceOperation("cast_ptr_to_adr", [v_arg], v_adr))
- v_arg = v_adr
- v_void = varoftype(lltype.Void)
- block.operations.append(
- SpaceOperation("bare_setfield", [c_p, c_item0, v_arg], v_void))
- #
- # call asm_stackwalk(graph2)
- FUNC2 = lltype.FuncType([], FUNC1.RESULT)
- fnptr2 = lltype.functionptr(FUNC2,
- fnptr._obj._name + '_reload',
- graph=graph2)
- c_fnptr2 = Constant(fnptr2, lltype.Ptr(FUNC2))
- HELPERFUNC = lltype.FuncType([lltype.Ptr(FUNC2),
- ASM_FRAMEDATA_HEAD_PTR], FUNC1.RESULT)
- v_asm_stackwalk = varoftype(lltype.Ptr(HELPERFUNC), "asm_stackwalk")
- block.operations.append(
- SpaceOperation("cast_pointer", [c_asm_stackwalk], v_asm_stackwalk))
- v_result = varoftype(FUNC1.RESULT)
- block.operations.append(
- SpaceOperation("indirect_call", [v_asm_stackwalk, c_fnptr2,
- c_gcrootanchor,
- Constant(None, lltype.Void)],
- v_result))
- block.closeblock(Link([v_result], graph.returnblock))
- graph.startblock = block
- #
- # edit the copy of the graph to reload the values
- block2 = graph2.startblock
- block1 = Block([])
- reloadedvars = []
- for v, c_p in zip(block2.inputargs, sra):
- v = v.copy()
- if isinstance(v.concretetype, lltype.Ptr):
- w = varoftype(llmemory.Address)
- else:
- w = v
- block1.operations.append(SpaceOperation('getfield',
- [c_p, c_item0], w))
- if w is not v:
- block1.operations.append(SpaceOperation('cast_adr_to_ptr',
- [w], v))
- reloadedvars.append(v)
- block1.closeblock(Link(reloadedvars, block2))
- graph2.startblock = block1
- #
- checkgraph(graph)
- checkgraph(graph2)
- class AsmStackRootWalker(BaseRootWalker):
- def __init__(self, gctransformer):
- BaseRootWalker.__init__(self, gctransformer)
- def _asm_callback():
- self.walk_stack_from()
- self._asm_callback = _asm_callback
- self._shape_decompressor = ShapeDecompressor()
- self._with_jit = hasattr(gctransformer.translator, '_jit2gc')
- if self._with_jit:
- jit2gc = gctransformer.translator._jit2gc
- self.frame_tid = jit2gc['frame_tid']
- self.gctransformer = gctransformer
- #
- # unless overridden in need_thread_support():
- self.belongs_to_current_thread = lambda framedata: True
- def need_stacklet_support(self, gctransformer, getfn):
- from rpython.annotator import model as annmodel
- from rpython.rlib import _stacklet_asmgcc
- # stacklet support: BIG HACK for rlib.rstacklet
- _stacklet_asmgcc._asmstackrootwalker = self # as a global! argh
- _stacklet_asmgcc.complete_destrptr(gctransformer)
- #
- def gc_detach_callback_pieces():
- anchor = llmemory.cast_ptr_to_adr(gcrootanchor)
- result = llmemory.NULL
- framedata = anchor.address[1]
- while framedata != anchor:
- next = framedata.address[1]
- if self.belongs_to_current_thread(framedata):
- # detach it
- prev = framedata.address[0]
- prev.address[1] = next
- next.address[0] = prev
- # update the global stack counter
- rffi.stackcounter.stacks_counter -= 1
- # reattach framedata into the singly-linked list 'result'
- framedata.address[0] = rffi.cast(llmemory.Address, -1)
- framedata.address[1] = result
- result = framedata
- framedata = next
- return result
- #
- def gc_reattach_callback_pieces(pieces):
- anchor = llmemory.cast_ptr_to_adr(gcrootanchor)
- while pieces != llmemory.NULL:
- framedata = pieces
- pieces = pieces.address[1]
- # attach 'framedata' into the normal doubly-linked list
- following = anchor.address[1]
- following.address[0] = framedata
- framedata.address[1] = following
- anchor.address[1] = framedata
- framedata.address[0] = anchor
- # update the global stack counter
- rffi.stackcounter.stacks_counter += 1
- #
- s_addr = SomeAddress()
- s_None = annmodel.s_None
- self.gc_detach_callback_pieces_ptr = getfn(gc_detach_callback_pieces,
- [], s_addr)
- self.gc_reattach_callback_pieces_ptr=getfn(gc_reattach_callback_pieces,
- [s_addr], s_None)
- def need_thread_support(self, gctransformer, getfn):
- # Threads supported "out of the box" by the rest of the code.
- # The whole code in this function is only there to support
- # fork()ing in a multithreaded process :-(
- # For this, we need to handle gc_thread_start and gc_thread_die
- # to record the mapping {thread_id: stack_start}, and
- # gc_thread_before_fork and gc_thread_after_fork to get rid of
- # all ASM_FRAMEDATA structures that do no belong to the current
- # thread after a fork().
- from rpython.rlib import rthread
- from rpython.memory.support import AddressDict
- from rpython.memory.support import copy_without_null_values
- from rpython.annotator import model as annmodel
- gcdata = self.gcdata
- def get_aid():
- """Return the thread identifier, cast to an (opaque) address."""
- return llmemory.cast_int_to_adr(rthread.get_ident())
- def thread_start():
- value = llmemory.cast_int_to_adr(llop.stack_current(lltype.Signed))
- gcdata.aid2stack.setitem(get_aid(), value)
- thread_start._always_inline_ = True
- def thread_setup():
- gcdata.aid2stack = AddressDict()
- gcdata.dead_threads_count = 0
- # to also register the main thread's stack
- thread_start()
- thread_setup._always_inline_ = True
- def thread_die():
- gcdata.aid2stack.setitem(get_aid(), llmemory.NULL)
- # from time to time, rehash the dictionary to remove
- # old NULL entries
- gcdata.dead_threads_count += 1
- if (gcdata.dead_threads_count & 511) == 0:
- copy = copy_without_null_values(gcdata.aid2stack)
- gcdata.aid2stack.delete()
- gcdata.aid2stack = copy
- def belongs_to_current_thread(framedata):
- # xxx obscure: the answer is Yes if, as a pointer, framedata
- # lies between the start of the current stack and the top of it.
- stack_start = gcdata.aid2stack.get(get_aid(), llmemory.NULL)
- ll_assert(stack_start != llmemory.NULL,
- "current thread not found in gcdata.aid2stack!")
- stack_stop = llmemory.cast_int_to_adr(
- llop.stack_current(lltype.Signed))
- return (stack_start <= framedata <= stack_stop or
- stack_start >= framedata >= stack_stop)
- self.belongs_to_current_thread = belongs_to_current_thread
- def thread_before_fork():
- # before fork(): collect all ASM_FRAMEDATA structures that do
- # not belong to the current thread, and move them out of the
- # way, i.e. out of the main circular doubly linked list.
- detached_pieces = llmemory.NULL
- anchor = llmemory.cast_ptr_to_adr(gcrootanchor)
- initialframedata = anchor.address[1]
- while initialframedata != anchor: # while we have not looped back
- if not belongs_to_current_thread(initialframedata):
- # Unlink it
- prev = initialframedata.address[0]
- next = initialframedata.address[1]
- prev.address[1] = next
- next.address[0] = prev
- # Link it to the singly linked list 'detached_pieces'
- initialframedata.address[0] = detached_pieces
- detached_pieces = initialframedata
- rffi.stackcounter.stacks_counter -= 1
- # Then proceed to the next piece of stack
- initialframedata = initialframedata.address[1]
- return detached_pieces
- def thread_after_fork(result_of_fork, detached_pieces):
- if result_of_fork == 0:
- # We are in the child process. Assumes that only the
- # current thread survived. All the detached_pieces
- # are pointers in other stacks, so have likely been
- # freed already by the multithreaded library.
- # Nothing more for us to do.
- pass
- else:
- # We are still in the parent process. The fork() may
- # have succeeded or not, but that's irrelevant here.
- # We need to reattach the detached_pieces now, to the
- # circular doubly linked list at 'gcrootanchor'. The
- # order is not important.
- anchor = llmemory.cast_ptr_to_adr(gcrootanchor)
- while detached_pieces != llmemory.NULL:
- reattach = detached_pieces
- detached_pieces = detached_pieces.address[0]
- a_next = anchor.address[1]
- reattach.address[0] = anchor
- reattach.address[1] = a_next
- anchor.address[1] = reattach
- a_next.address[0] = reattach
- rffi.stackcounter.stacks_counter += 1
- self.thread_setup = thread_setup
- self.thread_start_ptr = getfn(thread_start, [], annmodel.s_None,
- inline=True)
- self.thread_die_ptr = getfn(thread_die, [], annmodel.s_None)
- self.thread_before_fork_ptr = getfn(thread_before_fork, [],
- SomeAddress())
- self.thread_after_fork_ptr = getfn(thread_after_fork,
- [annmodel.SomeInteger(),
- SomeAddress()],
- annmodel.s_None)
- #
- # check that the order of the need_*() is correct for us: if we
- # need both threads and stacklets, need_thread_support() must be
- # called first, to initialize self.belongs_to_current_thread.
- assert not hasattr(self, 'gc_detach_callback_pieces_ptr')
- def walk_stack_roots(self, collect_stack_root, is_minor=False):
- gcdata = self.gcdata
- gcdata._gc_collect_stack_root = collect_stack_root
- gcdata._gc_collect_is_minor = is_minor
- pypy_asm_stackwalk(llhelper(ASM_CALLBACK_PTR, self._asm_callback),
- gcrootanchor)
- def walk_stack_from(self):
- curframe = lltype.malloc(WALKFRAME, flavor='raw')
- otherframe = lltype.malloc(WALKFRAME, flavor='raw')
- # Walk over all the pieces of stack. They are in a circular linked
- # list of structures of 7 words, the 2 first words being prev/next.
- # The anchor of this linked list is:
- anchor = llmemory.cast_ptr_to_adr(gcrootanchor)
- initialframedata = anchor.address[1]
- stackscount = 0
- while initialframedata != anchor: # while we have not looped back
- self.walk_frames(curframe, otherframe, initialframedata)
- # Then proceed to the next piece of stack
- initialframedata = initialframedata.address[1]
- stackscount += 1
- #
- # for the JIT: rpy_fastgil may contain an extra framedata
- rpy_fastgil = rgil.gil_fetch_fastgil().signed[0]
- if rpy_fastgil != 1:
- ll_assert(rpy_fastgil != 0, "walk_stack_from doesn't have the GIL")
- initialframedata = rffi.cast(llmemory.Address, rpy_fastgil)
- #
- # very rare issue: initialframedata.address[0] is uninitialized
- # in this case, but "retaddr = callee.frame_address.address[0]"
- # reads it. If it happens to be exactly a valid return address
- # inside the C code, then bad things occur.
- initialframedata.address[0] = llmemory.NULL
- #
- self.walk_frames(curframe, otherframe, initialframedata)
- stackscount += 1
- #
- expected = rffi.stackcounter.stacks_counter
- if NonConstant(0):
- rffi.stackcounter.stacks_counter += 42 # hack to force it
- ll_assert(not (stackscount < expected), "non-closed stacks around")
- ll_assert(not (stackscount > expected), "stacks counter corruption?")
- lltype.free(otherframe, flavor='raw')
- lltype.free(curframe, flavor='raw')
- def walk_frames(self, curframe, otherframe, initialframedata):
- self.fill_initial_frame(curframe, initialframedata)
- # Loop over all the frames in the stack
- while self.walk_to_parent_frame(curframe, otherframe):
- swap = curframe
- curframe = otherframe # caller becomes callee
- otherframe = swap
- def fill_initial_frame(self, curframe, initialframedata):
- # Read the information provided by initialframedata
- initialframedata += 2*sizeofaddr #skip the prev/next words at the start
- reg = 0
- while reg < CALLEE_SAVED_REGS:
- # NB. 'initialframedata' stores the actual values of the
- # registers %ebx etc., and if these values are modified
- # they are reloaded by pypy_asm_stackwalk(). By contrast,
- # 'regs_stored_at' merely points to the actual values
- # from the 'initialframedata'.
- curframe.regs_stored_at[reg] = initialframedata + reg*sizeofaddr
- reg += 1
- curframe.frame_address = initialframedata.address[CALLEE_SAVED_REGS]
- def walk_to_parent_frame(self, callee, caller):
- """Starting from 'callee', walk the next older frame on the stack
- and fill 'caller' accordingly. Also invokes the collect_stack_root()
- callback from the GC code for each GC root found in 'caller'.
- """
- #
- # The gcmap table is a list of entries, two machine words each:
- # void *SafePointAddress;
- # int Shape;
- #
- # A "safe point" is the return address of a call.
- # The "shape" of a safe point is a list of integers
- # that represent "locations". A "location" can be
- # either in the stack or in a register. See
- # getlocation() for the decoding of this integer.
- # The locations stored in a "shape" are as follows:
- #
- # * The "location" of the return address. This is just
- # after the end of the frame of 'callee'; it is the
- # first word of the frame of 'caller' (see picture
- # below).
- #
- # * Four "locations" that specify where the function saves
- # each of the four callee-saved registers (%ebx, %esi,
- # %edi, %ebp).
- #
- # * The number of live GC roots around the call.
- #
- # * For each GC root, an integer that specify where the
- # GC pointer is stored. This is a "location" too.
- #
- # XXX the details are completely specific to X86!!!
- # a picture of the stack may help:
- # ^ ^ ^
- # | ... | to older frames
- # +--------------+
- # | ret addr | <------ caller_frame (addr of retaddr)
- # | ... |
- # | caller frame |
- # | ... |
- # +--------------+
- # | ret addr | <------ callee_frame (addr of retaddr)
- # | ... |
- # | callee frame |
- # | ... | lower addresses
- # +--------------+ v v v
- #
- retaddr = callee.frame_address.address[0]
- #
- # try to locate the caller function based on retaddr.
- # set up self._shape_decompressor.
- #
- ebp_in_caller = callee.regs_stored_at[INDEX_OF_EBP].address[0]
- self.locate_caller_based_on_retaddr(retaddr, ebp_in_caller)
- #
- # found! Enumerate the GC roots in the caller frame
- #
- collect_stack_root = self.gcdata._gc_collect_stack_root
- gc = self.gc
- while True:
- location = self._shape_decompressor.next()
- if location == 0:
- break
- addr = self.getlocation(callee, ebp_in_caller, location)
- if gc.points_to_valid_gc_object(addr):
- collect_stack_root(gc, addr)
- #
- # small hack: the JIT reserves THREADLOCAL_OFS's last bit for
- # us. We use it to store an "already traced past this frame"
- # flag.
- if self._with_jit and self.gcdata._gc_collect_is_minor:
- if self.mark_jit_frame_can_stop(callee):
- return False
- #
- # track where the caller_frame saved the registers from its own
- # caller
- #
- reg = CALLEE_SAVED_REGS - 1
- while reg >= 0:
- location = self._shape_decompressor.next()
- addr = self.getlocation(callee, ebp_in_caller, location)
- caller.regs_stored_at[reg] = addr
- reg -= 1
- location = self._shape_decompressor.next()
- caller.frame_address = self.getlocation(callee, ebp_in_caller,
- location)
- # we get a NULL marker to mean "I'm the frame
- # of the entry point, stop walking"
- return caller.frame_address != llmemory.NULL
- def locate_caller_based_on_retaddr(self, retaddr, ebp_in_caller):
- gcmapstart = llop.gc_asmgcroot_static(llmemory.Address, 0)
- gcmapend = llop.gc_asmgcroot_static(llmemory.Address, 1)
- item = search_in_gcmap(gcmapstart, gcmapend, retaddr)
- if item:
- self._shape_decompressor.setpos(item.signed[1])
- return
- if not self._shape_decompressor.sorted:
- # the item may have been not found because the main array was
- # not sorted. Sort it and try again.
- win32_follow_gcmap_jmp(gcmapstart, gcmapend)
- sort_gcmap(gcmapstart, gcmapend)
- self._shape_decompressor.sorted = True
- item = search_in_gcmap(gcmapstart, gcmapend, retaddr)
- if item:
- self._shape_decompressor.setpos(item.signed[1])
- return
- if self._with_jit:
- # item not found. We assume that it's a JIT-generated
- # location -- but we check for consistency that ebp points
- # to a JITFRAME object.
- from rpython.jit.backend.llsupport.jitframe import STACK_DEPTH_OFS
- tid = self.gc.get_possibly_forwarded_type_id(ebp_in_caller)
- if (rffi.cast(lltype.Signed, tid) ==
- rffi.cast(lltype.Signed, self.frame_tid)):
- # fish the depth
- extra_stack_depth = (ebp_in_caller + STACK_DEPTH_OFS).signed[0]
- ll_assert((extra_stack_depth & (rffi.sizeof(lltype.Signed) - 1))
- == 0, "asmgcc: misaligned extra_stack_depth")
- extra_stack_depth //= rffi.sizeof(lltype.Signed)
- self._shape_decompressor.setjitframe(extra_stack_depth)
- return
- llop.debug_fatalerror(lltype.Void, "cannot find gc roots!")
- def getlocation(self, callee, ebp_in_caller, location):
- """Get the location in the 'caller' frame of a variable, based
- on the integer 'location' that describes it. All locations are
- computed based on information saved by the 'callee'.
- """
- ll_assert(location >= 0, "negative location")
- kind = location & LOC_MASK
- offset = location & ~ LOC_MASK
- if IS_64_BITS:
- offset <<= 1
- if kind == LOC_REG: # register
- if location == LOC_NOWHERE:
- return llmemory.NULL
- reg = (location >> 2) - 1
- ll_assert(reg < CALLEE_SAVED_REGS, "bad register location")
- return callee.regs_stored_at[reg]
- elif kind == LOC_ESP_PLUS: # in the caller stack frame at N(%esp)
- esp_in_caller = callee.frame_address + sizeofaddr
- return esp_in_caller + offset
- elif kind == LOC_EBP_PLUS: # in the caller stack frame at N(%ebp)
- return ebp_in_caller + offset
- else: # kind == LOC_EBP_MINUS: at -N(%ebp)
- return ebp_in_caller - offset
- def mark_jit_frame_can_stop(self, callee):
- location = self._shape_decompressor.get_threadlocal_loc()
- if location == LOC_NOWHERE:
- return False
- addr = self.getlocation(callee, llmemory.NULL, location)
- #
- x = addr.signed[0]
- if x & 1:
- return True # this JIT stack frame is already marked!
- else:
- addr.signed[0] = x | 1 # otherwise, mark it but don't stop
- return False
- LOC_REG = 0
- LOC_ESP_PLUS = 1
- LOC_EBP_PLUS = 2
- LOC_EBP_MINUS = 3
- LOC_MASK = 0x03
- LOC_NOWHERE = LOC_REG | 0
- # ____________________________________________________________
- sizeofaddr = llmemory.sizeof(llmemory.Address)
- arrayitemsize = 2 * sizeofaddr
- def binary_search(start, end, addr1):
- """Search for an element in a sorted array.
- The interval from the start address (included) to the end address
- (excluded) is assumed to be a sorted arrays of pairs (addr1, addr2).
- This searches for the item with a given addr1 and returns its
- address. If not found exactly, it tries to return the address
- of the item left of addr1 (i.e. such that result.address[0] < addr1).
- """
- count = (end - start) // arrayitemsize
- while count > 1:
- middleindex = count // 2
- middle = start + middleindex * arrayitemsize
- if addr1 < middle.address[0]:
- count = middleindex
- else:
- start = middle
- count -= middleindex
- return start
- def search_in_gcmap(gcmapstart, gcmapend, retaddr):
- item = binary_search(gcmapstart, gcmapend, retaddr)
- if item.address[0] == retaddr:
- return item # found
- # 'retaddr' not exactly found. Check that 'item' is the start of a
- # compressed range that includes 'retaddr'.
- if retaddr > item.address[0] and item.signed[1] < 0:
- return item # ok
- else:
- return llmemory.NULL # failed
- def search_in_gcmap2(gcmapstart, gcmapend, retaddr):
- # same as 'search_in_gcmap', but without range checking support
- # (item.signed[1] is an address in this case, not a signed at all!)
- item = binary_search(gcmapstart, gcmapend, retaddr)
- if item.address[0] == retaddr:
- return item.address[1] # found
- else:
- return llmemory.NULL # failed
- def sort_gcmap(gcmapstart, gcmapend):
- count = (gcmapend - gcmapstart) // arrayitemsize
- qsort(gcmapstart,
- rffi.cast(rffi.SIZE_T, count),
- rffi.cast(rffi.SIZE_T, arrayitemsize),
- c_compare_gcmap_entries)
- def replace_dead_entries_with_nulls(start, end):
- # replace the dead entries (null value) with a null key.
- count = (end - start) // arrayitemsize - 1
- while count >= 0:
- item = start + count * arrayitemsize
- if item.address[1] == llmemory.NULL:
- item.address[0] = llmemory.NULL
- count -= 1
- if sys.platform == 'win32':
- def win32_follow_gcmap_jmp(start, end):
- # The initial gcmap table contains addresses to a JMP
- # instruction that jumps indirectly to the real code.
- # Replace them with the target addresses.
- assert rffi.SIGNEDP is rffi.LONGP, "win64 support missing"
- while start < end:
- code = rffi.cast(rffi.CCHARP, start.address[0])[0]
- if code == '\xe9': # jmp
- rel32 = rffi.cast(rffi.SIGNEDP, start.address[0]+1)[0]
- target = start.address[0] + (rel32 + 5)
- start.address[0] = target
- start += arrayitemsize
- else:
- def win32_follow_gcmap_jmp(start, end):
- pass
- # ____________________________________________________________
- class ShapeDecompressor:
- _alloc_flavor_ = "raw"
- sorted = False
- def setpos(self, pos):
- if pos < 0:
- pos = ~ pos # can ignore this "range" marker here
- gccallshapes = llop.gc_asmgcroot_static(llmemory.Address, 2)
- self.addr = gccallshapes + pos
- self.jit_index = -1
- def setjitframe(self, extra_stack_depth):
- self.jit_index = 0
- self.extra_stack_depth = extra_stack_depth
- def next(self):
- index = self.jit_index
- if index < 0:
- # case "outside the jit"
- addr = self.addr
- value = 0
- while True:
- b = ord(addr.char[0])
- addr += 1
- value += b
- if b < 0x80:
- break
- value = (value - 0x80) << 7
- self.addr = addr
- return value
- else:
- # case "in the jit"
- from rpython.jit.backend.x86.arch import FRAME_FIXED_SIZE
- from rpython.jit.backend.x86.arch import PASS_ON_MY_FRAME
- self.jit_index = index + 1
- if index == 0:
- # the jitframe is an object in EBP
- return LOC_REG | ((INDEX_OF_EBP + 1) << 2)
- if index == 1:
- return 0
- # the remaining returned values should be:
- # saved %rbp
- # saved %r15 or on 32bit:
- # saved %r14 saved %ebp
- # saved %r13 saved %edi
- # saved %r12 saved %esi
- # saved %rbx saved %ebx
- # return addr return addr
- stack_depth = PASS_ON_MY_FRAME + self.extra_stack_depth
- if IS_64_BITS:
- if index == 2: # rbp
- return LOC_ESP_PLUS | (stack_depth << 2)
- if index == 3: # r15
- return LOC_ESP_PLUS | ((stack_depth + 5) << 2)
- if index == 4: # r14
- return LOC_ESP_PLUS | ((stack_depth + 4) << 2)
- if index == 5: # r13
- return LOC_ESP_PLUS | ((stack_depth + 3) << 2)
- if index == 6: # r12
- return LOC_ESP_PLUS | ((stack_depth + 2) << 2)
- if index == 7: # rbx
- return LOC_ESP_PLUS | ((stack_depth + 1) << 2)
- if index == 8: # return addr
- return (LOC_ESP_PLUS |
- ((FRAME_FIXED_SIZE + self.extra_stack_depth) << 2))
- else:
- if index == 2: # ebp
- return LOC_ESP_PLUS | (stack_depth << 2)
- if index == 3: # edi
- return LOC_ESP_PLUS | ((stack_depth + 3) << 2)
- if index == 4: # esi
- return LOC_ESP_PLUS | ((stack_depth + 2) << 2)
- if index == 5: # ebx
- return LOC_ESP_PLUS | ((stack_depth + 1) << 2)
- if index == 6: # return addr
- return (LOC_ESP_PLUS |
- ((FRAME_FIXED_SIZE + self.extra_stack_depth) << 2))
- llop.debug_fatalerror(lltype.Void, "asmgcroot: invalid index")
- return 0 # annotator fix
- def get_threadlocal_loc(self):
- index = self.jit_index
- if index < 0:
- return LOC_NOWHERE # case "outside the jit"
- else:
- # case "in the jit"
- from rpython.jit.backend.x86.arch import THREADLOCAL_OFS, WORD
- return (LOC_ESP_PLUS |
- ((THREADLOCAL_OFS // WORD + self.extra_stack_depth) << 2))
- # ____________________________________________________________
- #
- # The special pypy_asm_stackwalk(), implemented directly in
- # assembler, fills information about the current stack top in an
- # ASM_FRAMEDATA array and invokes an RPython callback with it.
- # An ASM_FRAMEDATA is an array of 5 values that describe everything
- # we need to know about a stack frame:
- #
- # - the value that %ebx had when the current function started
- # - the value that %esi had when the current function started
- # - the value that %edi had when the current function started
- # - the value that %ebp had when the current function started
- # - frame address (actually the addr of the retaddr of the current function;
- # that's the last word of the frame in memory)
- #
- # On 64 bits, it is an array of 7 values instead of 5:
- #
- # - %rbx, %r12, %r13, %r14, %r15, %rbp; and the frame address
- #
- if IS_64_BITS:
- CALLEE_SAVED_REGS = 6
- INDEX_OF_EBP = 5
- FRAME_PTR = CALLEE_SAVED_REGS
- else:
- CALLEE_SAVED_REGS = 4 # there are 4 callee-saved registers
- INDEX_OF_EBP = 3
- FRAME_PTR = CALLEE_SAVED_REGS # the frame is at index 4 in the array
- JIT_USE_WORDS = 2 + FRAME_PTR + 1
- ASM_CALLBACK_PTR = lltype.Ptr(lltype.FuncType([], lltype.Void))
- # used internally by walk_stack_from()
- WALKFRAME = lltype.Struct('WALKFRAME',
- ('regs_stored_at', # address of where the registers have been saved
- lltype.FixedSizeArray(llmemory.Address, CALLEE_SAVED_REGS)),
- ('frame_address',
- llmemory.Address),
- )
- # We have a circular doubly-linked list of all the ASM_FRAMEDATAs currently
- # alive. The list's starting point is given by 'gcrootanchor', which is not
- # a full ASM_FRAMEDATA but only contains the prev/next pointers:
- ASM_FRAMEDATA_HEAD_PTR = lltype.Ptr(lltype.ForwardReference())
- ASM_FRAMEDATA_HEAD_PTR.TO.become(lltype.Struct('ASM_FRAMEDATA_HEAD',
- ('prev', ASM_FRAMEDATA_HEAD_PTR),
- ('next', ASM_FRAMEDATA_HEAD_PTR)
- ))
- gcrootanchor = lltype.malloc(ASM_FRAMEDATA_HEAD_PTR.TO, immortal=True)
- gcrootanchor.prev = gcrootanchor
- gcrootanchor.next = gcrootanchor
- c_gcrootanchor = Constant(gcrootanchor, ASM_FRAMEDATA_HEAD_PTR)
- eci = ExternalCompilationInfo(compile_extra=['-DPYPY_USE_ASMGCC'],
- post_include_bits=["""
- static int pypy_compare_gcmap_entries(const void *addr1, const void *addr2)
- {
- char *key1 = * (char * const *) addr1;
- char *key2 = * (char * const *) addr2;
- if (key1 < key2)
- return -1;
- else if (key1 == key2)
- return 0;
- else
- return 1;
- }
- """])
- pypy_asm_stackwalk = rffi.llexternal('pypy_asm_stackwalk',
- [ASM_CALLBACK_PTR,
- ASM_FRAMEDATA_HEAD_PTR],
- lltype.Signed,
- sandboxsafe=True,
- _nowrapper=True,
- random_effects_on_gcobjs=True,
- compilation_info=eci)
- c_asm_stackwalk = Constant(pypy_asm_stackwalk,
- lltype.typeOf(pypy_asm_stackwalk))
- pypy_asm_gcroot = rffi.llexternal('pypy_asm_gcroot',
- [llmemory.Address],
- llmemory.Address,
- sandboxsafe=True,
- _nowrapper=True)
- c_asm_gcroot = Constant(pypy_asm_gcroot, lltype.typeOf(pypy_asm_gcroot))
- pypy_asm_nocollect = rffi.llexternal('pypy_asm_gc_nocollect',
- [rffi.CCHARP], lltype.Void,
- sandboxsafe=True,
- _nowrapper=True)
- c_asm_nocollect = Constant(pypy_asm_nocollect, lltype.typeOf(pypy_asm_nocollect))
- QSORT_CALLBACK_PTR = lltype.Ptr(lltype.FuncType([llmemory.Address,
- llmemory.Address], rffi.INT))
- c_compare_gcmap_entries = rffi.llexternal('pypy_compare_gcmap_entries',
- [llmemory.Address, llmemory.Address],
- rffi.INT, compilation_info=eci,
- _nowrapper=True, sandboxsafe=True)
- qsort = rffi.llexternal('qsort',
- [llmemory.Address,
- rffi.SIZE_T,
- rffi.SIZE_T,
- QSORT_CALLBACK_PTR],
- lltype.Void,
- sandboxsafe=True,
- random_effects_on_gcobjs=False, # but has a callback
- _nowrapper=True)