PageRenderTime 42ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/tool/memusage/log2gnumeric.py

https://bitbucket.org/bwesterb/pypy
Python | 204 lines | 195 code | 0 blank | 9 comment | 0 complexity | 9cf614cd41c995ffb60da5bf4c22a796 MD5 | raw file
  1. #! /usr/bin/env python
  2. """
  3. Produces a logfile.gnumeric file which contains the data extracted from the
  4. logfile generated with the PYPYLOG env variable.
  5. Run your program like this::
  6. $ PYPYLOG=gc-collect,jit-mem:logfile pypy your-program.py
  7. This will produce "logfile", containing information about the memory used by
  8. the GC and the number of loops created/freed by the JIT.
  9. If you want, you can also measure the amout of used memory as seen by the OS
  10. (the VmRSS) using memusage.py::
  11. $ PYPYLOG=gc-collect,jit-mem:logfile ./memusage.py -o logfile.vmrss /path/to/pypy your-program.py
  12. log2gnumeric will automatically pick logfile.vmrss, if present.
  13. If you want to compare PyPy to CPython, you can add its VmRSS to the graph, by
  14. using the -c option. To produce the .vmrss file, use again ./memusage.py::
  15. $ ./memusage.py -o cpython.vmrss python your-program.py
  16. $ ./log2gnumeric.py -c cpython.vmrss logfile
  17. Note that on CPython it will take a different amout of time to complete, but
  18. on the graph the plot will be scaled to match the duration of the PyPy run
  19. (i.e., the two lines will end "at the same time").
  20. If you are benchmarking translate.py, you can add the "translation-task"
  21. category to the log, by setting PYPYLOG=gc-collect,jit-mem,translation-task.
  22. You can freely edit the graph in log-template.gnumeric: this script will
  23. create a new file replacing the 'translation-task' and 'gc-collect' sheets.
  24. """
  25. import re, sys
  26. import gzip
  27. import optparse
  28. def main(logname, options):
  29. outname = logname + '.gnumeric'
  30. data = open(logname).read()
  31. data = data.replace('\n', '')
  32. minclock, maxclock = get_clock_range(data)
  33. time0 = minclock # we want "relative clocks"
  34. maxtime = maxclock-time0
  35. #
  36. xml = gzip.open('log-template.gnumeric').read()
  37. xml = replace_sheet(xml, 'translation-task', tasks_rows(time0, data))
  38. xml = replace_sheet(xml, 'gc-collect', gc_collect_rows(time0, data))
  39. xml = replace_sheet(xml, 'loops', loops_rows(time0, data))
  40. xml = replace_sheet(xml, 'vmrss', vmrss_rows(logname + '.vmrss', maxtime))
  41. xml = replace_sheet(xml, 'cpython-vmrss', vmrss_rows(options.cpython_vmrss, maxtime))
  42. #
  43. out = gzip.open(outname, 'wb')
  44. out.write(xml)
  45. out.close()
  46. # ========================================================================
  47. # functions to manipulate gnumeric files
  48. # ========================================================================
  49. def replace_sheet(xml, sheet_name, data):
  50. pattern = '<gnm:Sheet .*?<gnm:Name>%s</gnm:Name>.*?(<gnm:Cells>.*?</gnm:Cells>)'
  51. regex = re.compile(pattern % sheet_name, re.DOTALL)
  52. cells = gen_cells(data)
  53. match = regex.search(xml)
  54. if not match:
  55. print 'Cannot find sheet %s' % sheet_name
  56. return xml
  57. a, b = match.span(1)
  58. xml2 = xml[:a] + cells + xml[b:]
  59. return xml2
  60. def gen_cells(data):
  61. # values for the ValueType attribute
  62. ValueType_Empty = 'ValueType="10"'
  63. ValueType_Number = 'ValueType="40"'
  64. ValueType_String = 'ValueType="60"'
  65. #
  66. parts = []
  67. parts.append('<gnm:Cells>')
  68. for i, row in enumerate(data):
  69. for j, val in enumerate(row):
  70. if val is None:
  71. attr = ValueType_Empty
  72. val = ''
  73. elif isinstance(val, (int, long, float)):
  74. attr = ValueType_Number
  75. else:
  76. attr = ValueType_String
  77. cell = ' <gnm:Cell Row="%d" Col="%d" %s>%s</gnm:Cell>'
  78. parts.append(cell % (i, j, attr, val))
  79. parts.append(' </gnm:Cells>')
  80. return '\n'.join(parts)
  81. # ========================================================================
  82. # functions to extract various data from the logs
  83. # ========================================================================
  84. CLOCK_FACTOR = 1
  85. def read_clock(x):
  86. timestamp = int(x, 16)
  87. return timestamp / CLOCK_FACTOR
  88. def get_clock_range(data):
  89. s = r"\[([0-9a-f]+)\] "
  90. r = re.compile(s)
  91. clocks = [read_clock(x) for x in r.findall(data)]
  92. return min(clocks), max(clocks)
  93. def gc_collect_rows(time0, data):
  94. s = r"""
  95. ----------- Full collection ------------------
  96. \| used before collection:
  97. \| in ArenaCollection: (\d+) bytes
  98. \| raw_malloced: (\d+) bytes
  99. \| used after collection:
  100. \| in ArenaCollection: (\d+) bytes
  101. \| raw_malloced: (\d+) bytes
  102. \| number of major collects: (\d+)
  103. `----------------------------------------------
  104. \[([0-9a-f]+)\] gc-collect\}"""
  105. #
  106. r = re.compile(s.replace('\n', ''))
  107. yield 'clock', 'gc-before', 'gc-after'
  108. for a,b,c,d,e,f in r.findall(data):
  109. clock = read_clock(f) - time0
  110. yield clock, int(a)+int(b), int(c)+int(d)
  111. def tasks_rows(time0, data):
  112. s = r"""
  113. \[([0-9a-f]+)\] \{translation-task
  114. starting ([\w-]+)
  115. """
  116. #
  117. r = re.compile(s.replace('\n', ''))
  118. yield 'clock', None, 'task'
  119. for a,b in r.findall(data):
  120. clock = read_clock(a) - time0
  121. yield clock, 1, b
  122. def loops_rows(time0, data):
  123. s = r"""
  124. \[([0-9a-f]+)\] \{jit-mem-looptoken-(alloc|free)
  125. (.*?)\[
  126. """
  127. #
  128. r = re.compile(s.replace('\n', ''))
  129. yield 'clock', 'total', 'loops', 'bridges'
  130. loops = 0
  131. bridges = 0
  132. fake_total = 0
  133. for clock, action, text in r.findall(data):
  134. clock = read_clock(clock) - time0
  135. if text.startswith('allocating Loop #'):
  136. loops += 1
  137. elif text.startswith('allocating Bridge #'):
  138. bridges += 1
  139. elif text.startswith('freeing Loop #'):
  140. match = re.match('freeing Loop # .* with ([0-9]*) attached bridges', text)
  141. loops -=1
  142. bridges -= int(match.group(1))
  143. total = loops+bridges
  144. yield clock, loops+bridges, loops, bridges
  145. def vmrss_rows(filename, maxtime):
  146. lines = []
  147. if filename:
  148. try:
  149. lines = open(filename).readlines()
  150. except IOError:
  151. print 'Warning: cannot find file %s, skipping this sheet' % filename
  152. for row in vmrss_rows_impl(lines, maxtime):
  153. yield row
  154. def vmrss_rows_impl(lines, maxtime):
  155. yield 'inferred clock', 'VmRSS'
  156. numlines = len(lines)
  157. for i, line in enumerate(lines):
  158. mem = int(line)
  159. clock = maxtime * i // (numlines-1)
  160. yield clock, mem
  161. if __name__ == '__main__':
  162. CLOCK_FACTOR = 1000000000.0 # report GigaTicks instead of Ticks
  163. parser = optparse.OptionParser(usage="%prog logfile [options]")
  164. parser.format_description = lambda fmt: __doc__
  165. parser.description = __doc__
  166. parser.add_option('-c', '--cpython-vmrss', dest='cpython_vmrss', default=None, metavar='FILE', type=str,
  167. help='the .vmrss file produced by CPython')
  168. options, args = parser.parse_args()
  169. if len(args) != 1:
  170. parser.print_help()
  171. sys.exit(2)
  172. main(args[0], options)