PageRenderTime 95ms CodeModel.GetById 18ms app.highlight 62ms RepoModel.GetById 1ms app.codeStats 1ms

/pypy/translator/c/gcc/trackgcroot.py

https://bitbucket.org/evelyn559/pypy
Python | 2025 lines | 1875 code | 65 blank | 85 comment | 198 complexity | b006643ae5bfd57fe073693b84ac857b MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1#! /usr/bin/env python
   2import autopath
   3import re, sys, os, random
   4
   5from pypy.translator.c.gcc.instruction import Insn, Label, InsnCall, InsnRet
   6from pypy.translator.c.gcc.instruction import InsnFunctionStart, InsnStop
   7from pypy.translator.c.gcc.instruction import InsnSetLocal, InsnCopyLocal
   8from pypy.translator.c.gcc.instruction import InsnPrologue, InsnEpilogue
   9from pypy.translator.c.gcc.instruction import InsnGCROOT, InsnCondJump
  10from pypy.translator.c.gcc.instruction import InsnStackAdjust
  11from pypy.translator.c.gcc.instruction import InsnCannotFollowEsp
  12from pypy.translator.c.gcc.instruction import LocalVar, somenewvalue
  13from pypy.translator.c.gcc.instruction import frameloc_esp, frameloc_ebp
  14from pypy.translator.c.gcc.instruction import LOC_REG, LOC_NOWHERE, LOC_MASK
  15from pypy.translator.c.gcc.instruction import LOC_EBP_PLUS, LOC_EBP_MINUS
  16from pypy.translator.c.gcc.instruction import LOC_ESP_PLUS
  17
  18class FunctionGcRootTracker(object):
  19    skip = 0
  20    COMMENT = "([#;].*)?"
  21
  22    @classmethod
  23    def init_regexp(cls):
  24        cls.r_label         = re.compile(cls.LABEL+"[:]\s*$")
  25        cls.r_globl         = re.compile(r"\t[.]globl\t"+cls.LABEL+"\s*$")
  26        cls.r_globllabel    = re.compile(cls.LABEL+r"=[.][+]%d\s*$"%cls.OFFSET_LABELS)
  27
  28        cls.r_insn          = re.compile(r"\t([a-z]\w*)\s")
  29        cls.r_unaryinsn     = re.compile(r"\t[a-z]\w*\s+("+cls.OPERAND+")\s*" + cls.COMMENT + "$")
  30        cls.r_binaryinsn    = re.compile(r"\t[a-z]\w*\s+(?P<source>"+cls.OPERAND+"),\s*(?P<target>"+cls.OPERAND+")\s*$")
  31
  32        cls.r_jump          = re.compile(r"\tj\w+\s+"+cls.LABEL+"\s*" + cls.COMMENT + "$")
  33        cls.r_jmp_switch    = re.compile(r"\tjmp\t[*]"+cls.LABEL+"[(]")
  34        cls.r_jmp_source    = re.compile(r"\d*[(](%[\w]+)[,)]")
  35
  36    def __init__(self, funcname, lines, filetag=0):
  37        self.funcname = funcname
  38        self.lines = lines
  39        self.uses_frame_pointer = False
  40        self.r_localvar = self.r_localvarnofp
  41        self.filetag = filetag
  42        # a "stack bottom" function is either pypy_main_function() or a
  43        # callback from C code.  In both cases they are identified by
  44        # the presence of pypy_asm_stack_bottom().
  45        self.is_stack_bottom = False
  46
  47    def computegcmaptable(self, verbose=0):
  48        if self.funcname in ['main', '_main']:
  49            return []     # don't analyze main(), its prologue may contain
  50                          # strange instructions
  51        self.findlabels()
  52        self.parse_instructions()
  53        try:
  54            self.trim_unreachable_instructions()
  55            self.find_noncollecting_calls()
  56            if not self.list_collecting_call_insns():
  57                return []
  58            self.findframesize()
  59            self.fixlocalvars()
  60            self.trackgcroots()
  61            self.extend_calls_with_labels()
  62        finally:
  63            if verbose > 2:
  64                self.dump()
  65        return self.gettable()
  66
  67    def replace_symbols(self, operand):
  68        return operand
  69
  70    def gettable(self):
  71        """Returns a list [(label_after_call, callshape_tuple)]
  72        See format_callshape() for more details about callshape_tuple.
  73        """
  74        table = []
  75        for insn in self.list_collecting_call_insns():
  76            if not hasattr(insn, 'framesize'):
  77                continue     # calls that never end up reaching a RET
  78            if self.is_stack_bottom:
  79                retaddr = LOC_NOWHERE     # end marker for asmgcroot.py
  80            elif self.uses_frame_pointer:
  81                retaddr = frameloc_ebp(self.WORD, self.WORD)
  82            else:
  83                retaddr = frameloc_esp(insn.framesize, self.WORD)
  84            shape = [retaddr]
  85            # the first gcroots are always the ones corresponding to
  86            # the callee-saved registers
  87            for reg in self.CALLEE_SAVE_REGISTERS:
  88                shape.append(LOC_NOWHERE)
  89            gcroots = []
  90            for localvar, tag in insn.gcroots.items():
  91                if isinstance(localvar, LocalVar):
  92                    loc = localvar.getlocation(insn.framesize,
  93                                               self.uses_frame_pointer,
  94                                               self.WORD)
  95                elif localvar in self.REG2LOC:
  96                    loc = self.REG2LOC[localvar]
  97                else:
  98                    assert False, "%s: %s" % (self.funcname,
  99                                              localvar)
 100                assert isinstance(loc, int)
 101                if tag is None:
 102                    gcroots.append(loc)
 103                else:
 104                    regindex = self.CALLEE_SAVE_REGISTERS.index(tag)
 105                    shape[1 + regindex] = loc
 106            if LOC_NOWHERE in shape and not self.is_stack_bottom:
 107                reg = self.CALLEE_SAVE_REGISTERS[shape.index(LOC_NOWHERE) - 1]
 108                raise AssertionError("cannot track where register %s is saved"
 109                                     % (reg,))
 110            gcroots.sort()
 111            shape.extend(gcroots)
 112            table.append((insn.global_label, tuple(shape)))
 113        return table
 114
 115    def findlabels(self):
 116        self.labels = {}      # {name: Label()}
 117        for lineno, line in enumerate(self.lines):
 118            match = self.r_label.match(line)
 119            label = None
 120            if match:
 121                label = match.group(1)
 122            else:
 123                # labels used by: j* NNNf
 124                match = self.r_rel_label.match(line)
 125                if match:
 126                    label = "rel %d" % lineno
 127            if label:
 128                assert label not in self.labels, "duplicate label: %s" % label
 129                self.labels[label] = Label(label, lineno)
 130
 131    def trim_unreachable_instructions(self):
 132        reached = set([self.insns[0]])
 133        prevlen = 0
 134        while len(reached) > prevlen:
 135            prevlen = len(reached)
 136            for insn in self.insns:
 137                if insn not in reached:
 138                    for previnsn in insn.previous_insns:
 139                        if previnsn in reached:
 140                            # this instruction is reachable too
 141                            reached.add(insn)
 142                            break
 143        # now kill all unreachable instructions
 144        i = 0
 145        while i < len(self.insns):
 146            if self.insns[i] in reached:
 147                i += 1
 148            else:
 149                del self.insns[i]
 150
 151    def find_noncollecting_calls(self):
 152        cannot_collect = {}
 153        for line in self.lines:
 154            match = self.r_gcnocollect_marker.search(line)
 155            if match:
 156                name = match.group(1)
 157                cannot_collect[name] = True
 158        #
 159        self.cannot_collect = dict.fromkeys(
 160            [self.function_names_prefix + name for name in cannot_collect])
 161
 162    def append_instruction(self, insn):
 163        # Add the instruction to the list, and link it to the previous one.
 164        previnsn = self.insns[-1]
 165        self.insns.append(insn)
 166        if (isinstance(insn, (InsnSetLocal, InsnCopyLocal)) and
 167            insn.target == self.tested_for_zero):
 168            self.tested_for_zero = None
 169
 170        try:
 171            lst = insn.previous_insns
 172        except AttributeError:
 173            lst = insn.previous_insns = []
 174        if not isinstance(previnsn, InsnStop):
 175            lst.append(previnsn)
 176
 177    def parse_instructions(self):
 178        self.insns = [InsnFunctionStart(self.CALLEE_SAVE_REGISTERS, self.WORD)]
 179        self.tested_for_zero = None
 180        ignore_insns = False
 181        for lineno, line in enumerate(self.lines):
 182            if lineno < self.skip:
 183                continue
 184            self.currentlineno = lineno
 185            insn = []
 186            match = self.r_insn.match(line)
 187
 188            if self.r_bottom_marker.match(line):
 189                self.is_stack_bottom = True
 190            elif match:
 191                if not ignore_insns:
 192                    opname = match.group(1)
 193                    #
 194                    try:
 195                        cf = self.OPS_WITH_PREFIXES_CHANGING_FLAGS[opname]
 196                    except KeyError:
 197                        cf = self.find_missing_changing_flags(opname)
 198                    if cf:
 199                        self.tested_for_zero = None
 200                    #
 201                    try:
 202                        meth = getattr(self, 'visit_' + opname)
 203                    except AttributeError:
 204                        self.find_missing_visit_method(opname)
 205                        meth = getattr(self, 'visit_' + opname)
 206                    line = line.rsplit(';', 1)[0]
 207                    insn = meth(line)
 208            elif self.r_gcroot_marker.match(line):
 209                insn = self._visit_gcroot_marker(line)
 210            elif line == '\t/* ignore_in_trackgcroot */\n':
 211                ignore_insns = True
 212            elif line == '\t/* end_ignore_in_trackgcroot */\n':
 213                ignore_insns = False
 214            else:
 215                match = self.r_label.match(line)
 216                if match:
 217                    insn = self.labels[match.group(1)]
 218
 219            if isinstance(insn, list):
 220                for i in insn:
 221                    self.append_instruction(i)
 222            else:
 223                self.append_instruction(insn)
 224
 225        del self.currentlineno
 226
 227    @classmethod
 228    def find_missing_visit_method(cls, opname):
 229        # only for operations that are no-ops as far as we are concerned
 230        prefix = opname
 231        while prefix not in cls.IGNORE_OPS_WITH_PREFIXES:
 232            prefix = prefix[:-1]
 233            if not prefix:
 234                raise UnrecognizedOperation(opname)
 235        setattr(cls, 'visit_' + opname, cls.visit_nop)
 236
 237    @classmethod
 238    def find_missing_changing_flags(cls, opname):
 239        prefix = opname
 240        while prefix and prefix not in cls.OPS_WITH_PREFIXES_CHANGING_FLAGS:
 241            prefix = prefix[:-1]
 242        cf = cls.OPS_WITH_PREFIXES_CHANGING_FLAGS.get(prefix, False)
 243        cls.OPS_WITH_PREFIXES_CHANGING_FLAGS[opname] = cf
 244        return cf
 245
 246    def list_collecting_call_insns(self):
 247        return [insn for insn in self.insns if isinstance(insn, InsnCall)
 248                     if insn.name not in self.cannot_collect]
 249
 250    def findframesize(self):
 251        # the 'framesize' attached to an instruction is the number of bytes
 252        # in the frame at this point.  This doesn't count the return address
 253        # which is the word immediately following the frame in memory.
 254        # The 'framesize' is set to an odd value if it is only an estimate
 255        # (see InsnCannotFollowEsp).
 256
 257        def walker(insn, size_delta):
 258            check = deltas.setdefault(insn, size_delta)
 259            assert check == size_delta, (
 260                "inconsistent frame size at instruction %s" % (insn,))
 261            if isinstance(insn, InsnStackAdjust):
 262                size_delta -= insn.delta
 263            if not hasattr(insn, 'framesize'):
 264                yield size_delta   # continue walking backwards
 265
 266        for insn in self.insns:
 267            if isinstance(insn, (InsnRet, InsnEpilogue, InsnGCROOT)):
 268                deltas = {}
 269                self.walk_instructions_backwards(walker, insn, 0)
 270                size_at_insn = []
 271                for insn1, delta1 in deltas.items():
 272                    if hasattr(insn1, 'framesize'):
 273                        size_at_insn.append(insn1.framesize + delta1)
 274                if not size_at_insn:
 275                    continue
 276                size_at_insn = size_at_insn[0]
 277                for insn1, delta1 in deltas.items():
 278                    size_at_insn1 = size_at_insn - delta1
 279                    if hasattr(insn1, 'framesize'):
 280                        assert insn1.framesize == size_at_insn1, (
 281                            "inconsistent frame size at instruction %s" %
 282                            (insn1,))
 283                    else:
 284                        insn1.framesize = size_at_insn1
 285
 286    def fixlocalvars(self):
 287        def fixvar(localvar):
 288            if localvar is None:
 289                return None
 290            elif isinstance(localvar, (list, tuple)):
 291                return [fixvar(var) for var in localvar]
 292
 293            match = self.r_localvar_esp.match(localvar)
 294            if match:
 295                if localvar == self.TOP_OF_STACK_MINUS_WORD:
 296                                                  # for pushl and popl, by
 297                    hint = None                   # default ebp addressing is
 298                else:                             # a bit nicer
 299                    hint = 'esp'
 300                ofs_from_esp = int(match.group(1) or '0')
 301                if self.format == 'msvc':
 302                    ofs_from_esp += int(match.group(2) or '0')
 303                localvar = ofs_from_esp - insn.framesize
 304                assert localvar != 0    # that's the return address
 305                return LocalVar(localvar, hint=hint)
 306            elif self.uses_frame_pointer:
 307                match = self.r_localvar_ebp.match(localvar)
 308                if match:
 309                    ofs_from_ebp = int(match.group(1) or '0')
 310                    if self.format == 'msvc':
 311                        ofs_from_ebp += int(match.group(2) or '0')
 312                    localvar = ofs_from_ebp - self.WORD
 313                    assert localvar != 0    # that's the return address
 314                    return LocalVar(localvar, hint='ebp')
 315            return localvar
 316
 317        for insn in self.insns:
 318            if not hasattr(insn, 'framesize'):
 319                continue
 320            for name in insn._locals_:
 321                localvar = getattr(insn, name)
 322                setattr(insn, name, fixvar(localvar))
 323
 324    def trackgcroots(self):
 325
 326        def walker(insn, loc):
 327            source = insn.source_of(loc, tag)
 328            if source is somenewvalue:
 329                pass   # done
 330            else:
 331                yield source
 332
 333        for insn in self.insns:
 334            for loc, tag in insn.requestgcroots(self).items():
 335                self.walk_instructions_backwards(walker, insn, loc)
 336
 337    def dump(self):
 338        for insn in self.insns:
 339            size = getattr(insn, 'framesize', '?')
 340            print >> sys.stderr, '%4s  %s' % (size, insn)
 341
 342    def walk_instructions_backwards(self, walker, initial_insn, initial_state):
 343        pending = []
 344        seen = {}
 345        def schedule(insn, state):
 346            for previnsn in insn.previous_insns:
 347                key = previnsn, state
 348                if key not in seen:
 349                    seen[key] = True
 350                    pending.append(key)
 351        schedule(initial_insn, initial_state)
 352        while pending:
 353            insn, state = pending.pop()
 354            for prevstate in walker(insn, state):
 355                schedule(insn, prevstate)
 356
 357    def extend_calls_with_labels(self):
 358        # walk backwards, because inserting the global labels in self.lines
 359        # is going to invalidate the lineno of all the InsnCall objects
 360        # after the current one.
 361        for call in self.list_collecting_call_insns()[::-1]:
 362            if hasattr(call, 'framesize'):
 363                self.create_global_label(call)
 364
 365    def create_global_label(self, call):
 366        # we need a globally-declared label just after the call.
 367        # Reuse one if it is already there (e.g. from a previous run of this
 368        # script); otherwise invent a name and add the label to tracker.lines.
 369        label = None
 370        # this checks for a ".globl NAME" followed by "NAME:"
 371        match = self.r_globl.match(self.lines[call.lineno+1])
 372        if match:
 373            label1 = match.group(1)
 374            match = self.r_globllabel.match(self.lines[call.lineno+2])
 375            if match:
 376                label2 = match.group(1)
 377                if label1 == label2:
 378                    label = label2
 379        if label is None:
 380            k = call.lineno
 381            if self.format == 'msvc':
 382                # Some header files (ws2tcpip.h) define STDCALL functions
 383                funcname = self.funcname.split('@')[0]
 384            else:
 385                funcname = self.funcname
 386            while 1:
 387                label = '__gcmap_%s__%s_%d' % (self.filetag, funcname, k)
 388                if label not in self.labels:
 389                    break
 390                k += 1
 391            self.labels[label] = None
 392            if self.format == 'msvc':
 393                self.lines.insert(call.lineno+1, '%s::\n' % (label,))
 394                self.lines.insert(call.lineno+1, 'PUBLIC\t%s\n' % (label,))
 395            else:
 396                # These global symbols are not directly labels pointing to the
 397                # code location because such global labels in the middle of
 398                # functions confuse gdb.  Instead, we add to the global symbol's
 399                # value a big constant, which is subtracted again when we need
 400                # the original value for gcmaptable.s.  That's a hack.
 401                self.lines.insert(call.lineno+1, '%s=.+%d\n' % (label,
 402                                                                self.OFFSET_LABELS))
 403                self.lines.insert(call.lineno+1, '\t.globl\t%s\n' % (label,))
 404        call.global_label = label
 405
 406    @classmethod
 407    def compress_callshape(cls, shape):
 408        # For a single shape, this turns the list of integers into a list of
 409        # bytes and reverses the order of the entries.  The length is
 410        # encoded by inserting a 0 marker after the gc roots coming from
 411        # shape[N:] and before the N values coming from shape[N-1] to
 412        # shape[0] (for N == 5 on 32-bit or 7 on 64-bit platforms).
 413        # In practice it seems that shapes contain many integers
 414        # whose value is up to a few thousands, which the algorithm below
 415        # compresses down to 2 bytes.  Very small values compress down to a
 416        # single byte.
 417
 418        # Callee-save regs plus ret addr
 419        min_size = len(cls.CALLEE_SAVE_REGISTERS) + 1
 420
 421        assert len(shape) >= min_size
 422        shape = list(shape)
 423        assert 0 not in shape[min_size:]
 424        shape.insert(min_size, 0)
 425        result = []
 426        for loc in shape:
 427            assert loc >= 0
 428            flag = 0
 429            while loc >= 0x80:
 430                result.append(int(loc & 0x7F) | flag)
 431                flag = 0x80
 432                loc >>= 7
 433            result.append(int(loc) | flag)
 434        result.reverse()
 435        return result
 436
 437    @classmethod
 438    def decompress_callshape(cls, bytes):
 439        # For tests.  This logic is copied in asmgcroot.py.
 440        result = []
 441        n = 0
 442        while n < len(bytes):
 443            value = 0
 444            while True:
 445                b = bytes[n]
 446                n += 1
 447                value += b
 448                if b < 0x80:
 449                    break
 450                value = (value - 0x80) << 7
 451            result.append(value)
 452        result.reverse()
 453        assert result[5] == 0
 454        del result[5]
 455        return result
 456    # ____________________________________________________________
 457
 458    BASE_FUNCTIONS_NOT_RETURNING = {
 459        'abort': None,
 460        'pypy_debug_catch_fatal_exception': None,
 461        'RPyAbort': None,
 462        'RPyAssertFailed': None,
 463        }
 464
 465    def _visit_gcroot_marker(self, line):
 466        match = self.r_gcroot_marker.match(line)
 467        loc = match.group(1)
 468        return InsnGCROOT(self.replace_symbols(loc))
 469
 470    def visit_nop(self, line):
 471        return []
 472
 473    IGNORE_OPS_WITH_PREFIXES = dict.fromkeys([
 474        'cmp', 'test', 'set', 'sahf', 'lahf', 'cld', 'std',
 475        'rep', 'movs', 'movhp', 'lods', 'stos', 'scas', 'cwde', 'prefetch',
 476        # floating-point operations cannot produce GC pointers
 477        'f',
 478        'cvt', 'ucomi', 'comi', 'subs', 'subp' , 'adds', 'addp', 'xorp',
 479        'movap', 'movd', 'movlp', 'sqrtsd', 'movhpd',
 480        'mins', 'minp', 'maxs', 'maxp', 'unpck', 'pxor', 'por', # sse2
 481        'shufps', 'shufpd',
 482        # arithmetic operations should not produce GC pointers
 483        'inc', 'dec', 'not', 'neg', 'or', 'and', 'sbb', 'adc',
 484        'shl', 'shr', 'sal', 'sar', 'rol', 'ror', 'mul', 'imul', 'div', 'idiv',
 485        'bswap', 'bt', 'rdtsc',
 486        'punpck', 'pshufd', 'pcmp', 'pand', 'psllw', 'pslld', 'psllq',
 487        'paddq', 'pinsr', 'pmul', 'psrl',
 488        # sign-extending moves should not produce GC pointers
 489        'cbtw', 'cwtl', 'cwtd', 'cltd', 'cltq', 'cqto',
 490        # zero-extending moves should not produce GC pointers
 491        'movz', 
 492        # locked operations should not move GC pointers, at least so far
 493        'lock',
 494        ])
 495
 496    # a partial list is hopefully good enough for now; it's all to support
 497    # only one corner case, tested in elf64/track_zero.s
 498    OPS_WITH_PREFIXES_CHANGING_FLAGS = dict.fromkeys([
 499        'cmp', 'test', 'lahf', 'cld', 'std', 'rep',
 500        'ucomi', 'comi',
 501        'add', 'sub', 'xor',
 502        'inc', 'dec', 'not', 'neg', 'or', 'and', 'sbb', 'adc',
 503        'shl', 'shr', 'sal', 'sar', 'rol', 'ror', 'mul', 'imul', 'div', 'idiv',
 504        'bt', 'call', 'int',
 505        'jmp',     # not really changing flags, but we shouldn't assume
 506                   # anything about the operations on the following lines
 507        ], True)
 508
 509    visit_movb = visit_nop
 510    visit_movw = visit_nop
 511    visit_addb = visit_nop
 512    visit_addw = visit_nop
 513    visit_subb = visit_nop
 514    visit_subw = visit_nop
 515    visit_xorb = visit_nop
 516    visit_xorw = visit_nop
 517
 518    def _visit_add(self, line, sign=+1):
 519        match = self.r_binaryinsn.match(line)
 520        source = match.group("source")
 521        target = match.group("target")
 522        if target == self.ESP:
 523            count = self.extract_immediate(source)
 524            if count is None:
 525                # strange instruction - I've seen 'subl %eax, %esp'
 526                return InsnCannotFollowEsp()
 527            return InsnStackAdjust(sign * count)
 528        elif self.r_localvar.match(target):
 529            return InsnSetLocal(target, [source, target])
 530        else:
 531            return []
 532
 533    def _visit_sub(self, line):
 534        return self._visit_add(line, sign=-1)
 535
 536    def unary_insn(self, line):
 537        match = self.r_unaryinsn.match(line)
 538        target = match.group(1)
 539        if self.r_localvar.match(target):
 540            return InsnSetLocal(target)
 541        else:
 542            return []
 543
 544    def binary_insn(self, line):
 545        match = self.r_binaryinsn.match(line)
 546        if not match:
 547            raise UnrecognizedOperation(line)
 548        source = match.group("source")
 549        target = match.group("target")
 550        if self.r_localvar.match(target):
 551            return InsnSetLocal(target, [source])
 552        elif target == self.ESP:
 553            raise UnrecognizedOperation(line)
 554        else:
 555            return []
 556
 557    # The various cmov* operations
 558    for name in '''
 559        e ne g ge l le a ae b be p np s ns o no
 560        '''.split():
 561        locals()['visit_cmov' + name] = binary_insn
 562        locals()['visit_cmov' + name + 'l'] = binary_insn
 563
 564    def _visit_and(self, line):
 565        match = self.r_binaryinsn.match(line)
 566        target = match.group("target")
 567        if target == self.ESP:
 568            # only for  andl $-16, %esp  used to align the stack in main().
 569            # main() should not be seen at all.  But on e.g. MSVC we see
 570            # the instruction somewhere else too...
 571            return InsnCannotFollowEsp()
 572        else:
 573            return self.binary_insn(line)
 574
 575    def _visit_lea(self, line):
 576        match = self.r_binaryinsn.match(line)
 577        target = match.group("target")
 578        if target == self.ESP:
 579            # only for  leal -12(%ebp), %esp  in function epilogues
 580            source = match.group("source")
 581            match = self.r_localvar_ebp.match(source)
 582            if match:
 583                if not self.uses_frame_pointer:
 584                    raise UnrecognizedOperation('epilogue without prologue')
 585                ofs_from_ebp = int(match.group(1) or '0')
 586                assert ofs_from_ebp <= 0
 587                framesize = self.WORD - ofs_from_ebp
 588            else:
 589                match = self.r_localvar_esp.match(source)
 590                # leal 12(%esp), %esp
 591                if match:
 592                    return InsnStackAdjust(int(match.group(1)))
 593
 594                framesize = None    # strange instruction
 595            return InsnEpilogue(framesize)
 596        else:
 597            return self.binary_insn(line)
 598
 599    def insns_for_copy(self, source, target):
 600        source = self.replace_symbols(source)
 601        target = self.replace_symbols(target)
 602        if target == self.ESP:
 603            raise UnrecognizedOperation('%s -> %s' % (source, target))
 604        elif self.r_localvar.match(target):
 605            if self.r_localvar.match(source):
 606                # eg, movl %eax, %ecx: possibly copies a GC root
 607                return [InsnCopyLocal(source, target)]
 608            else:
 609                # eg, movl (%eax), %edi or mov %esp, %edi: load a register
 610                # from "outside".  If it contains a pointer to a GC root,
 611                # it will be announced later with the GCROOT macro.
 612                return [InsnSetLocal(target, [source])]
 613        else:
 614            # eg, movl %ebx, (%edx) or mov %ebp, %esp: does not write into
 615            # a general register
 616            return []
 617
 618    def _visit_mov(self, line):
 619        match = self.r_binaryinsn.match(line)
 620        source = match.group("source")
 621        target = match.group("target")
 622        if source == self.ESP and target == self.EBP:
 623            return self._visit_prologue()
 624        elif source == self.EBP and target == self.ESP:
 625            return self._visit_epilogue()
 626        if source == self.ESP and self.funcname.startswith('VALGRIND_'):
 627            return []     # in VALGRIND_XXX functions, there is a dummy-looking
 628                          # mov %esp, %eax.  Shows up only when compiling with
 629                          # gcc -fno-unit-at-a-time.
 630        return self.insns_for_copy(source, target)
 631
 632    def _visit_push(self, line):
 633        match = self.r_unaryinsn.match(line)
 634        source = match.group(1)
 635        return self.insns_for_copy(source, self.TOP_OF_STACK_MINUS_WORD) + \
 636               [InsnStackAdjust(-self.WORD)]
 637
 638    def _visit_pop(self, target):
 639        return [InsnStackAdjust(+self.WORD)] + \
 640               self.insns_for_copy(self.TOP_OF_STACK_MINUS_WORD, target)
 641
 642    def _visit_prologue(self):
 643        # for the prologue of functions that use %ebp as frame pointer
 644        self.uses_frame_pointer = True
 645        self.r_localvar = self.r_localvarfp
 646        return [InsnPrologue(self.WORD)]
 647
 648    def _visit_epilogue(self):
 649        if not self.uses_frame_pointer:
 650            raise UnrecognizedOperation('epilogue without prologue')
 651        return [InsnEpilogue(self.WORD)]
 652
 653    def visit_leave(self, line):
 654        return self._visit_epilogue() + self._visit_pop(self.EBP)
 655
 656    def visit_ret(self, line):
 657        return InsnRet(self.CALLEE_SAVE_REGISTERS)
 658
 659    def visit_jmp(self, line):
 660        tablelabels = []
 661        match = self.r_jmp_switch.match(line)
 662        if match:
 663            # this is a jmp *Label(%index), used for table-based switches.
 664            # Assume that the table is just a list of lines looking like
 665            # .long LABEL or .long 0, ending in a .text or .section .text.hot.
 666            tablelabels.append(match.group(1))
 667        elif self.r_unaryinsn_star.match(line):
 668            # maybe a jmp similar to the above, but stored in a
 669            # registry:
 670            #     movl L9341(%eax), %eax
 671            #     jmp *%eax
 672            operand = self.r_unaryinsn_star.match(line).group(1)
 673            def walker(insn, locs):
 674                sources = []
 675                for loc in locs:
 676                    for s in insn.all_sources_of(loc):
 677                        # if the source looks like 8(%eax,%edx,4)
 678                        # %eax is the real source, %edx is an offset.
 679                        match = self.r_jmp_source.match(s)
 680                        if match and not self.r_localvar_esp.match(s):
 681                            sources.append(match.group(1))
 682                        else:
 683                            sources.append(s)
 684                for source in sources:
 685                    label_match = re.compile(self.LABEL).match(source)
 686                    if label_match:
 687                        tablelabels.append(label_match.group(0))
 688                        return
 689                yield tuple(sources)
 690            insn = InsnStop()
 691            insn.previous_insns = [self.insns[-1]]
 692            self.walk_instructions_backwards(walker, insn, (operand,))
 693
 694            # Remove probable tail-calls
 695            tablelabels = [label for label in tablelabels
 696                           if label in self.labels]
 697        assert len(tablelabels) <= 1
 698        if tablelabels:
 699            tablelin = self.labels[tablelabels[0]].lineno + 1
 700            while not self.r_jmptable_end.match(self.lines[tablelin]):
 701                # skip empty lines
 702                if (not self.lines[tablelin].strip()
 703                    or self.lines[tablelin].startswith(';')):
 704                    tablelin += 1
 705                    continue
 706                match = self.r_jmptable_item.match(self.lines[tablelin])
 707                if not match:
 708                    raise NoPatternMatch(repr(self.lines[tablelin]))
 709                label = match.group(1)
 710                if label != '0':
 711                    self.register_jump_to(label)
 712                tablelin += 1
 713            return InsnStop("jump table")
 714        if self.r_unaryinsn_star.match(line):
 715            # that looks like an indirect tail-call.
 716            # tail-calls are equivalent to RET for us
 717            return InsnRet(self.CALLEE_SAVE_REGISTERS)
 718        try:
 719            self.conditional_jump(line)
 720        except KeyError:
 721            # label not found: check if it's a tail-call turned into a jump
 722            match = self.r_unaryinsn.match(line)
 723            target = match.group(1)
 724            assert not target.startswith('.')
 725            # tail-calls are equivalent to RET for us
 726            return InsnRet(self.CALLEE_SAVE_REGISTERS)
 727        return InsnStop("jump")
 728    
 729    def register_jump_to(self, label, lastinsn=None):
 730        if lastinsn is None:
 731            lastinsn = self.insns[-1]
 732        if not isinstance(lastinsn, InsnStop):
 733            self.labels[label].previous_insns.append(lastinsn)
 734
 735    def conditional_jump(self, line, je=False, jne=False):
 736        match = self.r_jump.match(line)
 737        if not match:
 738            match = self.r_jump_rel_label.match(line)
 739            if not match:
 740                raise UnrecognizedOperation(line)
 741            # j* NNNf
 742            label = match.group(1)
 743            label += ":"
 744            i = self.currentlineno + 1
 745            while True:
 746                if self.lines[i].startswith(label):
 747                    label = "rel %d" % i
 748                    break
 749                i += 1
 750        else:
 751            label = match.group(1)
 752        prefix = []
 753        lastinsn = None
 754        postfix = []
 755        if self.tested_for_zero is not None:
 756            if je:
 757                # generate pseudo-code...
 758                prefix = [InsnCopyLocal(self.tested_for_zero, '%tmp'),
 759                          InsnSetLocal(self.tested_for_zero)]
 760                postfix = [InsnCopyLocal('%tmp', self.tested_for_zero)]
 761                lastinsn = prefix[-1]
 762            elif jne:
 763                postfix = [InsnSetLocal(self.tested_for_zero)]
 764        self.register_jump_to(label, lastinsn)
 765        return prefix + [InsnCondJump(label)] + postfix
 766
 767    visit_jmpl = visit_jmp
 768    visit_jg = conditional_jump
 769    visit_jge = conditional_jump
 770    visit_jl = conditional_jump
 771    visit_jle = conditional_jump
 772    visit_ja = conditional_jump
 773    visit_jae = conditional_jump
 774    visit_jb = conditional_jump
 775    visit_jbe = conditional_jump
 776    visit_jp = conditional_jump
 777    visit_jnp = conditional_jump
 778    visit_js = conditional_jump
 779    visit_jns = conditional_jump
 780    visit_jo = conditional_jump
 781    visit_jno = conditional_jump
 782    visit_jc = conditional_jump
 783    visit_jnc = conditional_jump
 784
 785    def visit_je(self, line):
 786        return self.conditional_jump(line, je=True)
 787
 788    def visit_jne(self, line):
 789        return self.conditional_jump(line, jne=True)
 790
 791    def _visit_test(self, line):
 792        match = self.r_binaryinsn.match(line)
 793        source = match.group("source")
 794        target = match.group("target")
 795        if source == target:
 796            self.tested_for_zero = source
 797        return []
 798
 799    def _visit_xchg(self, line):
 800        # only support the format used in VALGRIND_DISCARD_TRANSLATIONS
 801        # which is to use a marker no-op "xchgl %ebx, %ebx"
 802        match = self.r_binaryinsn.match(line)
 803        source = match.group("source")
 804        target = match.group("target")
 805        if source == target:
 806            return []
 807        raise UnrecognizedOperation(line)
 808
 809    def visit_call(self, line):
 810        match = self.r_unaryinsn.match(line)
 811
 812        if match is None:
 813            assert self.r_unaryinsn_star.match(line)   # indirect call
 814            return [InsnCall('<indirect>', self.currentlineno),
 815                    InsnSetLocal(self.EAX)]      # the result is there
 816
 817        target = match.group(1)
 818
 819        if self.format in ('msvc',):
 820            # On win32, the address of a foreign function must be
 821            # computed, the optimizer may store it in a register.  We
 822            # could ignore this, except when the function need special
 823            # processing (not returning, __stdcall...)
 824            def find_register(target):
 825                reg = []
 826                def walker(insn, locs):
 827                    sources = []
 828                    for loc in locs:
 829                        for s in insn.all_sources_of(loc):
 830                            sources.append(s)
 831                    for source in sources:
 832                        m = re.match("DWORD PTR " + self.LABEL, source)
 833                        if m:
 834                            reg.append(m.group(1))
 835                    if reg:
 836                        return
 837                    yield tuple(sources)
 838                insn = InsnStop()
 839                insn.previous_insns = [self.insns[-1]]
 840                self.walk_instructions_backwards(walker, insn, (target,))
 841                return reg
 842
 843            if match and self.r_localvarfp.match(target):
 844                sources = find_register(target)
 845                if sources:
 846                    target, = sources
 847
 848        if target in self.FUNCTIONS_NOT_RETURNING:
 849            return [InsnStop(target)]
 850        if self.format == 'mingw32' and target == '__alloca':
 851            # in functions with large stack requirements, windows
 852            # needs a call to _alloca(), to turn reserved pages
 853            # into committed memory.
 854            # With mingw32 gcc at least, %esp is not used before
 855            # this call.  So we don't bother to compute the exact
 856            # stack effect.
 857            return [InsnCannotFollowEsp()]
 858        if target in self.labels:
 859            lineoffset = self.labels[target].lineno - self.currentlineno
 860            if lineoffset >= 0:
 861                assert  lineoffset in (1,2)
 862                return [InsnStackAdjust(-4)]
 863
 864        insns = [InsnCall(target, self.currentlineno),
 865                 InsnSetLocal(self.EAX)]      # the result is there
 866        if self.format in ('mingw32', 'msvc'):
 867            # handle __stdcall calling convention:
 868            # Stack cleanup is performed by the called function,
 869            # Function name is decorated with "@N" where N is the stack size
 870            if '@' in target and not target.startswith('@'):
 871                insns.append(InsnStackAdjust(int(target.rsplit('@', 1)[1])))
 872            # Some (intrinsic?) functions use the "fastcall" calling convention
 873            # XXX without any declaration, how can we guess the stack effect?
 874            if target in ['__alldiv', '__allrem', '__allmul', '__alldvrm',
 875                          '__aulldiv', '__aullrem', '__aullmul', '__aulldvrm']:
 876                insns.append(InsnStackAdjust(16))
 877        return insns
 878
 879    # __________ debugging output __________
 880
 881    @classmethod
 882    def format_location(cls, loc):
 883        # A 'location' is a single number describing where a value is stored
 884        # across a call.  It can be in one of the CALLEE_SAVE_REGISTERS, or
 885        # in the stack frame at an address relative to either %esp or %ebp.
 886        # The last two bits of the location number are used to tell the cases
 887        # apart; see format_location().
 888        assert loc >= 0
 889        kind = loc & LOC_MASK
 890        if kind == LOC_REG:
 891            if loc == LOC_NOWHERE:
 892                return '?'
 893            reg = (loc >> 2) - 1
 894            return '%' + cls.CALLEE_SAVE_REGISTERS[reg].replace("%", "")
 895        else:
 896            offset = loc & ~ LOC_MASK
 897            if cls.WORD == 8:
 898                offset <<= 1
 899            if kind == LOC_EBP_PLUS:
 900                result = '(%' + cls.EBP.replace("%", "") + ')'
 901            elif kind == LOC_EBP_MINUS:
 902                result = '(%' + cls.EBP.replace("%", "") + ')'
 903                offset = -offset
 904            elif kind == LOC_ESP_PLUS:
 905                result = '(%' + cls.ESP.replace("%", "") + ')'
 906            else:
 907                assert 0, kind
 908            if offset != 0:
 909                result = str(offset) + result
 910            return result
 911
 912    @classmethod
 913    def format_callshape(cls, shape):
 914        # A 'call shape' is a tuple of locations in the sense of
 915        # format_location().  They describe where in a function frame
 916        # interesting values are stored, when this function executes a 'call'
 917        # instruction.
 918        #
 919        #   shape[0]    is the location that stores the fn's own return
 920        #               address (not the return address for the currently
 921        #               executing 'call')
 922        #
 923        #   shape[1..N] is where the fn saved its own caller's value of a
 924        #               certain callee save register. (where N is the number
 925        #               of callee save registers.)
 926        #
 927        #   shape[>N]   are GC roots: where the fn has put its local GCPTR
 928        #               vars
 929        #
 930        num_callee_save_regs = len(cls.CALLEE_SAVE_REGISTERS)
 931        assert isinstance(shape, tuple)
 932        # + 1 for the return address
 933        assert len(shape) >= (num_callee_save_regs + 1)
 934        result = [cls.format_location(loc) for loc in shape]
 935        return '{%s | %s | %s}' % (result[0],
 936                                   ', '.join(result[1:(num_callee_save_regs+1)]),
 937                                   ', '.join(result[(num_callee_save_regs+1):]))
 938
 939
 940class FunctionGcRootTracker32(FunctionGcRootTracker):
 941    WORD = 4
 942
 943    visit_mov = FunctionGcRootTracker._visit_mov
 944    visit_movl = FunctionGcRootTracker._visit_mov
 945    visit_pushl = FunctionGcRootTracker._visit_push
 946    visit_leal = FunctionGcRootTracker._visit_lea
 947
 948    visit_addl = FunctionGcRootTracker._visit_add
 949    visit_subl = FunctionGcRootTracker._visit_sub
 950    visit_andl = FunctionGcRootTracker._visit_and
 951    visit_and = FunctionGcRootTracker._visit_and
 952
 953    visit_xchgl = FunctionGcRootTracker._visit_xchg
 954    visit_testl = FunctionGcRootTracker._visit_test
 955
 956    # used in "xor reg, reg" to create a NULL GC ptr
 957    visit_xorl = FunctionGcRootTracker.binary_insn
 958    visit_orl = FunctionGcRootTracker.binary_insn     # unsure about this one
 959
 960    # occasionally used on 32-bits to move floats around
 961    visit_movq = FunctionGcRootTracker.visit_nop
 962
 963    def visit_pushw(self, line):
 964        return [InsnStackAdjust(-2)]   # rare but not impossible
 965
 966    def visit_popl(self, line):
 967        match = self.r_unaryinsn.match(line)
 968        target = match.group(1)
 969        return self._visit_pop(target)
 970
 971class FunctionGcRootTracker64(FunctionGcRootTracker):
 972    WORD = 8
 973
 974    # Regex ignores destination
 975    r_save_xmm_register = re.compile(r"\tmovaps\s+%xmm(\d+)")
 976
 977    def _maybe_32bit_dest(func):
 978        def wrapper(self, line):
 979            # Using a 32-bit reg as a destination in 64-bit mode zero-extends
 980            # to 64-bits, so sometimes gcc uses a 32-bit operation to copy a
 981            # statically known pointer to a register
 982
 983            # %eax -> %rax
 984            new_line = re.sub(r"%e(ax|bx|cx|dx|di|si|bp)$", r"%r\1", line)
 985            # %r10d -> %r10
 986            new_line = re.sub(r"%r(\d+)d$", r"%r\1", new_line)
 987            return func(self, new_line)
 988        return wrapper
 989
 990    visit_addl = FunctionGcRootTracker.visit_nop
 991    visit_subl = FunctionGcRootTracker.visit_nop
 992    visit_leal = FunctionGcRootTracker.visit_nop
 993
 994    visit_cltq = FunctionGcRootTracker.visit_nop
 995
 996    visit_movq = FunctionGcRootTracker._visit_mov
 997    # just a special assembler mnemonic for mov
 998    visit_movabsq = FunctionGcRootTracker._visit_mov
 999    visit_mov = _maybe_32bit_dest(FunctionGcRootTracker._visit_mov)
1000    visit_movl = visit_mov
1001
1002    visit_xorl = _maybe_32bit_dest(FunctionGcRootTracker.binary_insn)
1003    
1004    visit_pushq = FunctionGcRootTracker._visit_push
1005
1006    visit_addq = FunctionGcRootTracker._visit_add
1007    visit_subq = FunctionGcRootTracker._visit_sub
1008
1009    visit_leaq = FunctionGcRootTracker._visit_lea
1010
1011    visit_xorq = FunctionGcRootTracker.binary_insn
1012    visit_xchgq = FunctionGcRootTracker._visit_xchg
1013    visit_testq = FunctionGcRootTracker._visit_test
1014
1015    # FIXME: similar to visit_popl for 32-bit
1016    def visit_popq(self, line):
1017        match = self.r_unaryinsn.match(line)
1018        target = match.group(1)
1019        return self._visit_pop(target)
1020
1021    def visit_jmp(self, line):
1022        # On 64-bit, %al is used when calling varargs functions to specify an
1023        # upper-bound on the number of xmm registers used in the call. gcc
1024        # uses %al to compute an indirect jump that looks like:
1025        #
1026        #     jmp *[some register]
1027        #     movaps %xmm7, [stack location]
1028        #     movaps %xmm6, [stack location]
1029        #     movaps %xmm5, [stack location]
1030        #     movaps %xmm4, [stack location]
1031        #     movaps %xmm3, [stack location]
1032        #     movaps %xmm2, [stack location]
1033        #     movaps %xmm1, [stack location]
1034        #     movaps %xmm0, [stack location]
1035        #
1036        # The jmp is always to somewhere in the block of "movaps"
1037        # instructions, according to how many xmm registers need to be saved
1038        # to the stack. The point of all this is that we can safely ignore
1039        # jmp instructions of that form.
1040        if (self.currentlineno + 8) < len(self.lines) and self.r_unaryinsn_star.match(line):
1041            matches = [self.r_save_xmm_register.match(self.lines[self.currentlineno + 1 + i]) for i in range(8)]
1042            if all(m and int(m.group(1)) == (7 - i) for i, m in enumerate(matches)):
1043                return []
1044
1045        return FunctionGcRootTracker.visit_jmp(self, line)
1046
1047
1048
1049class ElfFunctionGcRootTracker32(FunctionGcRootTracker32):
1050    format = 'elf'
1051    function_names_prefix = ''
1052
1053    ESP     = '%esp'
1054    EBP     = '%ebp'
1055    EAX     = '%eax'
1056    CALLEE_SAVE_REGISTERS = ['%ebx', '%esi', '%edi', '%ebp']
1057    REG2LOC = dict((_reg, LOC_REG | ((_i+1)<<2))
1058                   for _i, _reg in enumerate(CALLEE_SAVE_REGISTERS))
1059    OPERAND = r'(?:[-\w$%+.:@"]+(?:[(][\w%,]+[)])?|[(][\w%,]+[)])'
1060    LABEL   = r'([a-zA-Z_$.][a-zA-Z0-9_$@.]*)'
1061    OFFSET_LABELS   = 2**30
1062    TOP_OF_STACK_MINUS_WORD = '-4(%esp)'
1063
1064    r_functionstart = re.compile(r"\t.type\s+"+LABEL+",\s*[@]function\s*$")
1065    r_functionend   = re.compile(r"\t.size\s+"+LABEL+",\s*[.]-"+LABEL+"\s*$")
1066    LOCALVAR        = r"%eax|%edx|%ecx|%ebx|%esi|%edi|%ebp|-?\d*[(]%esp[)]"
1067    LOCALVARFP      = LOCALVAR + r"|-?\d*[(]%ebp[)]"
1068    r_localvarnofp  = re.compile(LOCALVAR)
1069    r_localvarfp    = re.compile(LOCALVARFP)
1070    r_localvar_esp  = re.compile(r"(-?\d*)[(]%esp[)]")
1071    r_localvar_ebp  = re.compile(r"(-?\d*)[(]%ebp[)]")
1072
1073    r_rel_label      = re.compile(r"(\d+):\s*$")
1074    r_jump_rel_label = re.compile(r"\tj\w+\s+"+"(\d+)f"+"\s*$")
1075
1076    r_unaryinsn_star= re.compile(r"\t[a-z]\w*\s+[*]("+OPERAND+")\s*$")
1077    r_jmptable_item = re.compile(r"\t.long\t"+LABEL+"(-\"[A-Za-z0-9$]+\")?\s*$")
1078    r_jmptable_end  = re.compile(r"\t.text|\t.section\s+.text|\t\.align|"+LABEL)
1079
1080    r_gcroot_marker = re.compile(r"\t/[*] GCROOT ("+LOCALVARFP+") [*]/")
1081    r_gcnocollect_marker = re.compile(r"\t/[*] GC_NOCOLLECT ("+OPERAND+") [*]/")
1082    r_bottom_marker = re.compile(r"\t/[*] GC_STACK_BOTTOM [*]/")
1083
1084    FUNCTIONS_NOT_RETURNING = {
1085        '_exit': None,
1086        '__assert_fail': None,
1087        '___assert_rtn': None,
1088        'L___assert_rtn$stub': None,
1089        'L___eprintf$stub': None,
1090        }
1091    for _name in FunctionGcRootTracker.BASE_FUNCTIONS_NOT_RETURNING:
1092        FUNCTIONS_NOT_RETURNING[_name] = None
1093
1094    def __init__(self, lines, filetag=0):
1095        match = self.r_functionstart.match(lines[0])
1096        funcname = match.group(1)
1097        match = self.r_functionend.match(lines[-1])
1098        assert funcname == match.group(1)
1099        assert funcname == match.group(2)
1100        super(ElfFunctionGcRootTracker32, self).__init__(
1101            funcname, lines, filetag)
1102
1103    def extract_immediate(self, value):
1104        if not value.startswith('$'):
1105            return None
1106        return int(value[1:])
1107
1108ElfFunctionGcRootTracker32.init_regexp()
1109
1110class ElfFunctionGcRootTracker64(FunctionGcRootTracker64):
1111    format = 'elf64'
1112    function_names_prefix = ''
1113
1114    ESP = '%rsp'
1115    EBP = '%rbp'
1116    EAX = '%rax'
1117    CALLEE_SAVE_REGISTERS = ['%rbx', '%r12', '%r13', '%r14', '%r15', '%rbp']
1118    REG2LOC = dict((_reg, LOC_REG | ((_i+1)<<2))
1119                   for _i, _reg in enumerate(CALLEE_SAVE_REGISTERS))
1120    OPERAND = r'(?:[-\w$%+.:@"]+(?:[(][\w%,]+[)])?|[(][\w%,]+[)])'
1121    LABEL   = r'([a-zA-Z_$.][a-zA-Z0-9_$@.]*)'
1122    OFFSET_LABELS   = 2**30
1123    TOP_OF_STACK_MINUS_WORD = '-8(%rsp)'
1124
1125    r_functionstart = re.compile(r"\t.type\s+"+LABEL+",\s*[@]function\s*$")
1126    r_functionend   = re.compile(r"\t.size\s+"+LABEL+",\s*[.]-"+LABEL+"\s*$")
1127    LOCALVAR = r"%rax|%rbx|%rcx|%rdx|%rdi|%rsi|%rbp|%r8|%r9|%r10|%r11|%r12|%r13|%r14|%r15|-?\d*[(]%rsp[)]"
1128    LOCALVARFP = LOCALVAR + r"|-?\d*[(]%rbp[)]"
1129    r_localvarnofp  = re.compile(LOCALVAR)
1130    r_localvarfp    = re.compile(LOCALVARFP)
1131    r_localvar_esp  = re.compile(r"(-?\d*)[(]%rsp[)]")
1132    r_localvar_ebp  = re.compile(r"(-?\d*)[(]%rbp[)]")
1133
1134    r_rel_label      = re.compile(r"(\d+):\s*$")
1135    r_jump_rel_label = re.compile(r"\tj\w+\s+"+"(\d+)f"+"\s*$")
1136
1137    r_unaryinsn_star= re.compile(r"\t[a-z]\w*\s+[*]("+OPERAND+")\s*$")
1138    r_jmptable_item = re.compile(r"\t.quad\t"+LABEL+"(-\"[A-Za-z0-9$]+\")?\s*$")
1139    r_jmptable_end  = re.compile(r"\t.text|\t.section\s+.text|\t\.align|"+LABEL)
1140
1141    r_gcroot_marker = re.compile(r"\t/[*] GCROOT ("+LOCALVARFP+") [*]/")
1142    r_gcnocollect_marker = re.compile(r"\t/[*] GC_NOCOLLECT ("+OPERAND+") [*]/")
1143    r_bottom_marker = re.compile(r"\t/[*] GC_STACK_BOTTOM [*]/")
1144
1145    FUNCTIONS_NOT_RETURNING = {
1146        '_exit': None,
1147        '__assert_fail': None,
1148        '___assert_rtn': None,
1149        'L___assert_rtn$stub': None,
1150        'L___eprintf$stub': None,
1151        }
1152    for _name in FunctionGcRootTracker.BASE_FUNCTIONS_NOT_RETURNING:
1153        FUNCTIONS_NOT_RETURNING[_name] = None
1154
1155    def __init__(self, lines, filetag=0):
1156        match = self.r_functionstart.match(lines[0])
1157        funcname = match.group(1)
1158        match = self.r_functionend.match(lines[-1])
1159        assert funcname == match.group(1)
1160        assert funcname == match.group(2)
1161        super(ElfFunctionGcRootTracker64, self).__init__(
1162            funcname, lines, filetag)
1163
1164    def extract_immediate(self, value):
1165        if not value.startswith('$'):
1166            return None
1167        return int(value[1:])
1168
1169ElfFunctionGcRootTracker64.init_regexp()
1170
1171class DarwinFunctionGcRootTracker32(ElfFunctionGcRootTracker32):
1172    format = 'darwin'
1173    function_names_prefix = '_'
1174
1175    r_functionstart = re.compile(r"_(\w+):\s*$")
1176    OFFSET_LABELS   = 0
1177
1178    def __init__(self, lines, filetag=0):
1179        match = self.r_functionstart.match(lines[0])
1180        funcname = '_' + match.group(1)
1181        FunctionGcRootTracker32.__init__(self, funcname, lines, filetag)
1182
1183class DarwinFunctionGcRootTracker64(ElfFunctionGcRootTracker64):
1184    format = 'darwin64'
1185    function_names_prefix = '_'
1186
1187    LABEL = ElfFunctionGcRootTracker64.LABEL
1188    r_jmptable_item = re.compile(r"\t.(?:long|quad)\t"+LABEL+"(-\"?[A-Za-z0-9$]+\"?)?\s*$")
1189
1190    r_functionstart = re.compile(r"_(\w+):\s*$")
1191    OFFSET_LABELS   = 0
1192
1193    def __init__(self, lines, filetag=0):
1194        match = self.r_functionstart.match(lines[0])
1195        funcname = '_' + match.group(1)
1196        FunctionGcRootTracker64.__init__(self, funcname, lines, filetag)
1197
1198class Mingw32FunctionGcRootTracker(DarwinFunctionGcRootTracker32):
1199    format = 'mingw32'
1200    function_names_prefix = '_'
1201
1202    FUNCTIONS_NOT_RETURNING = {
1203        '_exit': None,
1204        '__assert': None,
1205        }
1206    for _name in FunctionGcRootTracker.BASE_FUNCTIONS_NOT_RETURNING:
1207        FUNCTIONS_NOT_RETURNING['_' + _name] = None
1208
1209class MsvcFunctionGcRootTracker(FunctionGcRootTracker32):
1210    format = 'msvc'
1211    function_names_prefix = '_'
1212
1213    ESP = 'esp'
1214    EBP = 'ebp'
1215    EAX = 'eax'
1216    CALLEE_SAVE_REGISTERS = ['ebx', 'esi', 'edi', 'ebp']
1217    REG2LOC = dict((_reg, LOC_REG | ((_i+1)<<2))
1218                   for _i, _reg in enumerate(CALLEE_SAVE_REGISTERS))
1219    TOP_OF_STACK_MINUS_WORD = 'DWORD PTR [esp-4]'
1220
1221    OPERAND = r'(?:(:?WORD|DWORD|BYTE) PTR |OFFSET )?[_\w?:@$]*(?:[-+0-9]+)?(:?\[[-+*\w0-9]+\])?'
1222    LABEL   = r'([a-zA-Z_$@.][a-zA-Z0-9_$@.]*)'
1223    OFFSET_LABELS = 0
1224
1225    r_segmentstart  = re.compile(r"[_A-Z]+\tSEGMENT$")
1226    r_segmentend    = re.compile(r"[_A-Z]+\tENDS$")
1227    r_functionstart = re.compile(r"; Function compile flags: ")
1228    r_codestart     = re.compile(LABEL+r"\s+PROC\s*(:?;.+)?\n$")
1229    r_functionend   = re.compile(LABEL+r"\s+ENDP\s*$")
1230    r_symboldefine =  re.compile(r"([_A-Za-z0-9$]+) = ([-0-9]+)\s*;.+\n")
1231
1232    LOCAL

Large files files are truncated, but you can click here to view the full file