/Misc/jitfuzz.py

http://unladen-swallow.googlecode.com/ · Python · 198 lines · 155 code · 6 blank · 37 comment · 5 complexity · a0ecae3371d166c46d81e05432ac281f MD5 · raw file

  1. #!/usr/bin/env python
  2. """Fuzz tester for the bytecode -> LLVM IR compiler.
  3. The fuzzer implements two strategies for attacking the compiler:
  4. - Generate a random code object.
  5. - Take a known-good code object and change one byte to a random replacement.
  6. Neither of these is a terribly sophisticated, but combined were sufficient to
  7. expose multiple fatal errors in the compiler. Strategies that were tried, but
  8. failed to find further problems:
  9. - Very large code objects with thousands of opcodes.
  10. - Take a known-good code object and shuffle the opcodes without fixing jump
  11. targets.
  12. - Take a known-good code object and shuffle the opcodes, fixing jump targets.
  13. - Take a known-good code object and replace opcodes with other valid opcodes
  14. (jump opcodes replace other jump opcodes, nullary opcodes replace other
  15. nullary opcodes, etc).
  16. The code objects produced by these strategies would either be caught by the
  17. JIT's bytecode validator or would be compiled successfully. The experience was
  18. that the compiler has no trouble with syntactically-correct bytecode, even if
  19. the semantics are invalid.
  20. The fuzzer has yet to generate bytecode that causes problems for LLVM; all
  21. errors so far have been in the bytecode -> LLVM IR frontend.
  22. Example:
  23. /unladen/swallow/python jitfuzz.py --random_seed=12345678
  24. """
  25. # Python imports
  26. import opcode
  27. import optparse
  28. import random
  29. import sys
  30. import traceback
  31. import types
  32. def find_code_objects(*modules):
  33. """Find most code objects in the given modules."""
  34. for module in modules:
  35. for val in module.__dict__.itervalues():
  36. if isinstance(val, types.FunctionType):
  37. yield val.__code__
  38. if isinstance(val, type):
  39. for x in val.__dict__.values():
  40. if isinstance(x, types.MethodType):
  41. yield x.__code__
  42. # These are known-good code objects for us to screw with.
  43. CODE_OBJS = list(find_code_objects(traceback, optparse, random))
  44. # The order of this list must match the order of parameters to types.CodeType().
  45. CODE_ATTRS = ["argcount", "nlocals", "stacksize", "flags", "code",
  46. "consts", "names", "varnames", "filename", "name",
  47. "firstlineno", "lnotab", "freevars", "cellvars"]
  48. def stderr(message, *args):
  49. print >>sys.stderr, message % args
  50. def init_random_seed(random_seed):
  51. if random_seed == -1:
  52. random_seed = int(random.random() * 1e9)
  53. random.seed(random_seed)
  54. return random_seed
  55. def clone_code_object(code_obj, **changes):
  56. """Copy a given code object, possibly changing some attributes.
  57. Example:
  58. clone_code_object(code, code=new_bytecode, flags=new_flags)
  59. Args:
  60. code_obj: baseline code object to clone.
  61. **changes: keys should be names in CODE_ATTRS, values should be the
  62. new value for that attribute name.
  63. Returns:
  64. A new code object.
  65. """
  66. members = []
  67. for attr in CODE_ATTRS:
  68. if attr in changes:
  69. members.append(changes[attr])
  70. else:
  71. full_attr = "co_" + attr
  72. members.append(getattr(code_obj, full_attr))
  73. return types.CodeType(*members)
  74. def random_int(lower=0, upper=10):
  75. return random.randint(lower, upper)
  76. def random_char(lower=1, upper=255):
  77. return chr(random.randint(lower, upper))
  78. def random_string(length=None):
  79. if length is None:
  80. length = random_int(upper=5000)
  81. # Not random, but nothing looks at the contents of the strings.
  82. return "a" * length
  83. def random_list(func, length=None):
  84. if length is None:
  85. length = random_int(upper=500)
  86. return [func() for _ in xrange(length)]
  87. def random_object():
  88. return random.choice([None, True, 3e8, random_list,
  89. "foo", u"bar", (9,), []])
  90. def random_code_object():
  91. correct = (random.random() < 0.5)
  92. argcount = random_int()
  93. nlocals = random_int(upper=100)
  94. stacksize = random_int(upper=10000)
  95. flags = random_int(upper=1024)
  96. codestring = random_string()
  97. constants = tuple(random_list(random_object))
  98. names = tuple(random_list(random_string))
  99. filename = "attack-jit.py"
  100. name = random_string()
  101. firstlineno = random_int(lower=-1000, upper=1000)
  102. lnotab = ""
  103. freevars = tuple(random_list(random_string))
  104. cellvars = tuple(random_list(random_string))
  105. if correct:
  106. varnames = tuple(random_list(random_string, nlocals))
  107. else:
  108. varnames = tuple(random_list(random_string))
  109. code = types.CodeType(argcount, nlocals, stacksize, flags, codestring,
  110. constants, names, varnames, filename, name,
  111. firstlineno, lnotab, freevars, cellvars)
  112. return code
  113. def permute_code_object(baseline):
  114. """Take a code object and change one byte of the bytecode."""
  115. bytecode = list(baseline.co_code)
  116. bytecode[random.randint(0, len(bytecode) - 1)] = random_char()
  117. return clone_code_object(baseline, code="".join(bytecode))
  118. def generate_code():
  119. """Yield new code objects forever."""
  120. while True:
  121. if random.random() < 0.5:
  122. yield random_code_object()
  123. else:
  124. yield permute_code_object(random.choice(CODE_OBJS))
  125. def attack_jit():
  126. # Track how many code objects are approved by the validator. If too many
  127. # are being rejected by the validator, we're not stressing LLVM enough.
  128. valid = 0
  129. rejected = 0
  130. for i, code in enumerate(generate_code()):
  131. code.co_use_jit = True
  132. try:
  133. code.co_optimization = 2
  134. valid += 1
  135. except:
  136. traceback.print_exc()
  137. rejected += 1
  138. if i % 100 == 0:
  139. print
  140. print "### %d attacks successfully repulsed" % i
  141. print "### Validated: %d; rejected: %d" % (valid, rejected)
  142. print
  143. def main(argv):
  144. parser = optparse.OptionParser()
  145. parser.add_option("-r", "--random_seed",
  146. help="Random seed", type="int", default=-1)
  147. options, _ = parser.parse_args(argv)
  148. rand_seed = init_random_seed(options.random_seed)
  149. stderr("Using random seed: %s", rand_seed)
  150. attack_jit()
  151. if __name__ == "__main__":
  152. main(sys.argv)