/Misc/jitfuzz.py
Python | 198 lines | 159 code | 8 blank | 31 comment | 4 complexity | a0ecae3371d166c46d81e05432ac281f MD5 | raw file
1#!/usr/bin/env python 2 3"""Fuzz tester for the bytecode -> LLVM IR compiler. 4 5The fuzzer implements two strategies for attacking the compiler: 6 - Generate a random code object. 7 - Take a known-good code object and change one byte to a random replacement. 8 9Neither of these is a terribly sophisticated, but combined were sufficient to 10expose multiple fatal errors in the compiler. Strategies that were tried, but 11failed to find further problems: 12 - Very large code objects with thousands of opcodes. 13 - Take a known-good code object and shuffle the opcodes without fixing jump 14 targets. 15 - Take a known-good code object and shuffle the opcodes, fixing jump targets. 16 - Take a known-good code object and replace opcodes with other valid opcodes 17 (jump opcodes replace other jump opcodes, nullary opcodes replace other 18 nullary opcodes, etc). 19 20The code objects produced by these strategies would either be caught by the 21JIT's bytecode validator or would be compiled successfully. The experience was 22that the compiler has no trouble with syntactically-correct bytecode, even if 23the semantics are invalid. 24 25The fuzzer has yet to generate bytecode that causes problems for LLVM; all 26errors so far have been in the bytecode -> LLVM IR frontend. 27 28Example: 29 /unladen/swallow/python jitfuzz.py --random_seed=12345678 30""" 31 32# Python imports 33import opcode 34import optparse 35import random 36import sys 37import traceback 38import types 39 40 41def find_code_objects(*modules): 42 """Find most code objects in the given modules.""" 43 for module in modules: 44 for val in module.__dict__.itervalues(): 45 if isinstance(val, types.FunctionType): 46 yield val.__code__ 47 if isinstance(val, type): 48 for x in val.__dict__.values(): 49 if isinstance(x, types.MethodType): 50 yield x.__code__ 51 52 53# These are known-good code objects for us to screw with. 54CODE_OBJS = list(find_code_objects(traceback, optparse, random)) 55 56# The order of this list must match the order of parameters to types.CodeType(). 57CODE_ATTRS = ["argcount", "nlocals", "stacksize", "flags", "code", 58 "consts", "names", "varnames", "filename", "name", 59 "firstlineno", "lnotab", "freevars", "cellvars"] 60 61 62def stderr(message, *args): 63 print >>sys.stderr, message % args 64 65 66def init_random_seed(random_seed): 67 if random_seed == -1: 68 random_seed = int(random.random() * 1e9) 69 random.seed(random_seed) 70 return random_seed 71 72 73def clone_code_object(code_obj, **changes): 74 """Copy a given code object, possibly changing some attributes. 75 76 Example: 77 clone_code_object(code, code=new_bytecode, flags=new_flags) 78 79 Args: 80 code_obj: baseline code object to clone. 81 **changes: keys should be names in CODE_ATTRS, values should be the 82 new value for that attribute name. 83 84 Returns: 85 A new code object. 86 """ 87 members = [] 88 for attr in CODE_ATTRS: 89 if attr in changes: 90 members.append(changes[attr]) 91 else: 92 full_attr = "co_" + attr 93 members.append(getattr(code_obj, full_attr)) 94 return types.CodeType(*members) 95 96 97def random_int(lower=0, upper=10): 98 return random.randint(lower, upper) 99 100 101def random_char(lower=1, upper=255): 102 return chr(random.randint(lower, upper)) 103 104 105def random_string(length=None): 106 if length is None: 107 length = random_int(upper=5000) 108 # Not random, but nothing looks at the contents of the strings. 109 return "a" * length 110 111 112def random_list(func, length=None): 113 if length is None: 114 length = random_int(upper=500) 115 return [func() for _ in xrange(length)] 116 117 118def random_object(): 119 return random.choice([None, True, 3e8, random_list, 120 "foo", u"bar", (9,), []]) 121 122 123def random_code_object(): 124 correct = (random.random() < 0.5) 125 126 argcount = random_int() 127 nlocals = random_int(upper=100) 128 stacksize = random_int(upper=10000) 129 flags = random_int(upper=1024) 130 codestring = random_string() 131 constants = tuple(random_list(random_object)) 132 names = tuple(random_list(random_string)) 133 filename = "attack-jit.py" 134 name = random_string() 135 firstlineno = random_int(lower=-1000, upper=1000) 136 lnotab = "" 137 freevars = tuple(random_list(random_string)) 138 cellvars = tuple(random_list(random_string)) 139 if correct: 140 varnames = tuple(random_list(random_string, nlocals)) 141 else: 142 varnames = tuple(random_list(random_string)) 143 144 code = types.CodeType(argcount, nlocals, stacksize, flags, codestring, 145 constants, names, varnames, filename, name, 146 firstlineno, lnotab, freevars, cellvars) 147 return code 148 149 150def permute_code_object(baseline): 151 """Take a code object and change one byte of the bytecode.""" 152 bytecode = list(baseline.co_code) 153 bytecode[random.randint(0, len(bytecode) - 1)] = random_char() 154 return clone_code_object(baseline, code="".join(bytecode)) 155 156 157def generate_code(): 158 """Yield new code objects forever.""" 159 while True: 160 if random.random() < 0.5: 161 yield random_code_object() 162 else: 163 yield permute_code_object(random.choice(CODE_OBJS)) 164 165 166def attack_jit(): 167 # Track how many code objects are approved by the validator. If too many 168 # are being rejected by the validator, we're not stressing LLVM enough. 169 valid = 0 170 rejected = 0 171 for i, code in enumerate(generate_code()): 172 code.co_use_jit = True 173 try: 174 code.co_optimization = 2 175 valid += 1 176 except: 177 traceback.print_exc() 178 rejected += 1 179 if i % 100 == 0: 180 print 181 print "### %d attacks successfully repulsed" % i 182 print "### Validated: %d; rejected: %d" % (valid, rejected) 183 print 184 185 186def main(argv): 187 parser = optparse.OptionParser() 188 parser.add_option("-r", "--random_seed", 189 help="Random seed", type="int", default=-1) 190 options, _ = parser.parse_args(argv) 191 192 rand_seed = init_random_seed(options.random_seed) 193 stderr("Using random seed: %s", rand_seed) 194 attack_jit() 195 196 197if __name__ == "__main__": 198 main(sys.argv)