PageRenderTime 225ms CodeModel.GetById 121ms app.highlight 15ms RepoModel.GetById 86ms app.codeStats 0ms

/Misc/jitfuzz.py

http://unladen-swallow.googlecode.com/
Python | 198 lines | 159 code | 8 blank | 31 comment | 4 complexity | a0ecae3371d166c46d81e05432ac281f MD5 | raw file
  1#!/usr/bin/env python
  2
  3"""Fuzz tester for the bytecode -> LLVM IR compiler.
  4
  5The fuzzer implements two strategies for attacking the compiler:
  6  - Generate a random code object.
  7  - Take a known-good code object and change one byte to a random replacement.
  8
  9Neither of these is a terribly sophisticated, but combined were sufficient to
 10expose multiple fatal errors in the compiler. Strategies that were tried, but
 11failed to find further problems:
 12  - Very large code objects with thousands of opcodes.
 13  - Take a known-good code object and shuffle the opcodes without fixing jump
 14    targets.
 15  - Take a known-good code object and shuffle the opcodes, fixing jump targets.
 16  - Take a known-good code object and replace opcodes with other valid opcodes
 17    (jump opcodes replace other jump opcodes, nullary opcodes replace other
 18    nullary opcodes, etc).
 19
 20The code objects produced by these strategies would either be caught by the
 21JIT's bytecode validator or would be compiled successfully. The experience was
 22that the compiler has no trouble with syntactically-correct bytecode, even if
 23the semantics are invalid.
 24
 25The fuzzer has yet to generate bytecode that causes problems for LLVM; all
 26errors so far have been in the bytecode -> LLVM IR frontend.
 27
 28Example:
 29  /unladen/swallow/python jitfuzz.py --random_seed=12345678
 30"""
 31
 32# Python imports
 33import opcode
 34import optparse
 35import random
 36import sys
 37import traceback
 38import types
 39
 40
 41def find_code_objects(*modules):
 42    """Find most code objects in the given modules."""
 43    for module in modules:
 44        for val in module.__dict__.itervalues():
 45            if isinstance(val, types.FunctionType):
 46                yield val.__code__
 47            if isinstance(val, type):
 48                for x in val.__dict__.values():
 49                    if isinstance(x, types.MethodType):
 50                        yield x.__code__
 51
 52
 53# These are known-good code objects for us to screw with.
 54CODE_OBJS = list(find_code_objects(traceback, optparse, random))
 55
 56# The order of this list must match the order of parameters to types.CodeType().
 57CODE_ATTRS = ["argcount", "nlocals", "stacksize", "flags", "code",
 58              "consts", "names", "varnames", "filename", "name",
 59              "firstlineno", "lnotab", "freevars", "cellvars"]
 60
 61
 62def stderr(message, *args):
 63    print >>sys.stderr, message % args
 64
 65
 66def init_random_seed(random_seed):
 67    if random_seed == -1:
 68        random_seed = int(random.random() * 1e9)
 69    random.seed(random_seed)
 70    return random_seed
 71
 72
 73def clone_code_object(code_obj, **changes):
 74    """Copy a given code object, possibly changing some attributes.
 75
 76    Example:
 77        clone_code_object(code, code=new_bytecode, flags=new_flags)
 78
 79    Args:
 80        code_obj: baseline code object to clone.
 81        **changes: keys should be names in CODE_ATTRS, values should be the
 82          new value for that attribute name.
 83
 84    Returns:
 85        A new code object.
 86    """
 87    members = []
 88    for attr in CODE_ATTRS:
 89        if attr in changes:
 90            members.append(changes[attr])
 91        else:
 92            full_attr = "co_" + attr
 93            members.append(getattr(code_obj, full_attr))
 94    return types.CodeType(*members)
 95
 96
 97def random_int(lower=0, upper=10):
 98    return random.randint(lower, upper)
 99
100
101def random_char(lower=1, upper=255):
102    return chr(random.randint(lower, upper))
103
104
105def random_string(length=None):
106    if length is None:
107        length = random_int(upper=5000)
108    # Not random, but nothing looks at the contents of the strings.
109    return "a" * length
110
111
112def random_list(func, length=None):
113    if length is None:
114        length = random_int(upper=500)
115    return [func() for _ in xrange(length)]
116
117
118def random_object():
119    return random.choice([None, True, 3e8, random_list,
120                          "foo", u"bar", (9,), []])
121
122
123def random_code_object():
124    correct = (random.random() < 0.5)
125
126    argcount = random_int()
127    nlocals = random_int(upper=100)
128    stacksize = random_int(upper=10000)
129    flags = random_int(upper=1024)
130    codestring = random_string()
131    constants = tuple(random_list(random_object))
132    names = tuple(random_list(random_string))
133    filename = "attack-jit.py"
134    name = random_string()
135    firstlineno = random_int(lower=-1000, upper=1000)
136    lnotab = ""
137    freevars = tuple(random_list(random_string))
138    cellvars = tuple(random_list(random_string))
139    if correct:
140        varnames = tuple(random_list(random_string, nlocals))
141    else:
142        varnames = tuple(random_list(random_string))
143
144    code = types.CodeType(argcount, nlocals, stacksize, flags, codestring,
145                          constants, names, varnames, filename, name,
146                          firstlineno, lnotab, freevars, cellvars)
147    return code
148
149
150def permute_code_object(baseline):
151    """Take a code object and change one byte of the bytecode."""
152    bytecode = list(baseline.co_code)
153    bytecode[random.randint(0, len(bytecode) - 1)] = random_char()
154    return clone_code_object(baseline, code="".join(bytecode))
155
156
157def generate_code():
158    """Yield new code objects forever."""
159    while True:
160        if random.random() < 0.5:
161            yield random_code_object()
162        else:
163            yield permute_code_object(random.choice(CODE_OBJS))
164
165
166def attack_jit():
167    # Track how many code objects are approved by the validator. If too many
168    # are being rejected by the validator, we're not stressing LLVM enough.
169    valid = 0
170    rejected = 0
171    for i, code in enumerate(generate_code()):
172        code.co_use_jit = True
173        try:
174            code.co_optimization = 2
175            valid += 1
176        except:
177            traceback.print_exc()
178            rejected += 1
179        if i % 100 == 0:
180            print
181            print "### %d attacks successfully repulsed" % i
182            print "### Validated: %d; rejected: %d" % (valid, rejected)
183            print
184
185
186def main(argv):
187    parser = optparse.OptionParser()
188    parser.add_option("-r", "--random_seed",
189        help="Random seed", type="int", default=-1)
190    options, _ = parser.parse_args(argv)
191
192    rand_seed = init_random_seed(options.random_seed)
193    stderr("Using random seed: %s", rand_seed)
194    attack_jit()
195
196
197if __name__ == "__main__":
198    main(sys.argv)