PageRenderTime 27ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 1ms

/xdebugtoolkit/cgparser.py

http://xdebugtoolkit.googlecode.com/
Python | 400 lines | 393 code | 4 blank | 3 comment | 4 complexity | 75eac2d17d4603a064f117bb9889d22d MD5 | raw file
  1. """
  2. The cgparser package is intended for parsing xdebug's callgrind
  3. files into memory structure. It preserves structure flat, i.e.
  4. it doesn't build any trees, etc. Also it doesn't fix the fact
  5. that xdebug's callgrind files contain only ends (not starts) of
  6. calls, therefore it is supposed to handle this manually.
  7. Currently supported format is limited to such requirements:
  8. - it supports only non-appended files: xdebug.profiler_append=0
  9. - version of the file must be 0.9.6 (at least compatible with
  10. xdebug 2.0.0..2.0.4, probably with earlier versions too)
  11. http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
  12. """
  13. import weakref
  14. class CgParseError(Exception):
  15. pass
  16. class FileName(object):
  17. """
  18. Flywight pattern realization for storing file names
  19. """
  20. _FileNamePool = weakref.WeakValueDictionary()
  21. def __new__(cls, value):
  22. obj = FileName._FileNamePool.get(value)
  23. if not obj:
  24. obj = object.__new__(cls)
  25. FileName._FileNamePool[value] = obj
  26. return obj
  27. def __init__(self, value):
  28. self._value = value
  29. def __str__(self):
  30. return self._value
  31. class FunctionName(object):
  32. """
  33. Flywight pattern realization for storing function names
  34. """
  35. _FunctionNamePool = weakref.WeakValueDictionary()
  36. def __new__(cls, value):
  37. obj = FunctionName._FunctionNamePool.get(value)
  38. if not obj:
  39. obj = object.__new__(cls)
  40. FunctionName._FunctionNamePool[value] = obj
  41. return obj
  42. def __init__(self, value):
  43. self._value = value
  44. self._type = None
  45. self._clean = value
  46. if value.startswith('php::'):
  47. self._type = 'php'
  48. self._clean = value[5:]
  49. elif value.startswith('require::'):
  50. self._type = 'require'
  51. self._clean = FileName(value[9:])
  52. elif value.startswith('require_once::'):
  53. self._type = 'require_once'
  54. self._clean = FileName(value[14:])
  55. elif value.startswith('include::'):
  56. self._type = 'include'
  57. self._clean = FileName(value[9:])
  58. elif value.startswith('include_once::'):
  59. self._type = 'include_once'
  60. self._clean = FileName(value[14:])
  61. def __str__(self):
  62. return self._value
  63. def get_clean(self):
  64. return self._clean
  65. class RawHeader:
  66. def __init__(self, version, cmd, part, events):
  67. self._version = version
  68. self._cmd = cmd
  69. self._part = part
  70. self._events = events
  71. def get_version(self):
  72. return self._version
  73. def get_cmd(self):
  74. return self._cmd
  75. def get_part(self):
  76. return self._part
  77. def get_events(self):
  78. return self._events
  79. def to_cg(self):
  80. res = ''
  81. res += 'version: ' + self._version + "\n"
  82. res += 'cmd: ' + self._cmd + "\n"
  83. res += 'part: ' + self._part + "\n"
  84. res += "\n"
  85. res += 'events: ' + self._events + "\n"
  86. res += "\n"
  87. return res
  88. class RawEntry(object):
  89. """
  90. The RawEntry class is used for mapping the following entries'
  91. data from callgrind files:
  92. - fl=
  93. - fn=
  94. - position
  95. - self time
  96. - collection of subcalls those are represented by RawCall entries
  97. """
  98. __slots__ = ('fn', 'fl', 'self_time', '_subcalls', 'summary', 'position')
  99. def __init__(self):
  100. self.fn = None
  101. self.fl = None
  102. self.self_time = None
  103. self._subcalls = []
  104. self.summary = None
  105. self.position = None
  106. def add_subcall(self, call):
  107. self._subcalls.append(call)
  108. def get_subcalls(self):
  109. return self._subcalls
  110. def to_cg(self):
  111. res = ''
  112. res += 'fl=' + str(self.fl) + "\n"
  113. res += 'fn=' + str(self.fn) + "\n"
  114. if (str(self.fn) == '{main}'):
  115. res += "\n"
  116. res += 'summary: ' + str(self.summary) + "\n"
  117. res += "\n"
  118. res += str(self.position) + ' ' + str(self.self_time) + "\n"
  119. for subcall in self.get_subcalls():
  120. res += subcall.to_cg()
  121. res += "\n"
  122. return res
  123. class RawCall(object):
  124. """
  125. The RawCall class is used for mapping subcalls in callgrind files
  126. and handles those data:
  127. - cfn=
  128. - calls=
  129. - call's position
  130. - call's inclusive time
  131. """
  132. __slots__ = ('cfn', 'position', 'inclusive_time')
  133. def __init__(self):
  134. self.cfn = None
  135. self.position = None
  136. self.inclusive_time = None
  137. def to_cg(self):
  138. res = ''
  139. res += 'cfn=' + str(self.cfn) + "\n"
  140. res += 'calls=' + '1 0 0' + "\n"
  141. res += str(self.position) + ' ' + str(self.inclusive_time) + "\n"
  142. return res
  143. class RawBody:
  144. def __init__(self, header, body):
  145. self._header = header
  146. self._body = body
  147. def get_header(self):
  148. return self._header
  149. def get_body(self):
  150. return self._body
  151. def to_cg(self):
  152. res = '';
  153. res += self._header.to_cg()
  154. for entry in self._body:
  155. res += entry.to_cg()
  156. return res
  157. class XdebugCachegrindFsaParser:
  158. """
  159. A low-level FSA based lexer.
  160. """
  161. # header states
  162. # -2 got eof or fl, finish parsing
  163. # -1 error, finish parsing
  164. # 0 start
  165. # 1 got version, expecting cmd
  166. # 2 got cmd, expecting part
  167. # 3 gor part, expecting events
  168. # 4 got events, expecting fl or eof
  169. header_fsm = {
  170. # 0 1 2 3 4
  171. 0: [ 1, -1, -1, -1, -1], # version
  172. 1: [-1, 2, -1, -1, -1], # cmd
  173. 2: [-1, -1, 3, -1, -1], # part
  174. 3: [-1, -1, -1, 4, -1], # events
  175. 4: [-1, -1, -1, -1, -2], # fl
  176. 5: [-1, -1, -1, -1, -2], # eof
  177. }
  178. # body states:
  179. # -2 got eof, finish parsing
  180. # -1 error, finish parsing
  181. # 0 got header line, expectine more header lines or fl or eof
  182. # 1 got fl, expecting fn
  183. # 2 got fn, expecting num or summary
  184. # 3 got num, expecting fl or cfn or eof
  185. # 4 got cfn, expecting calls
  186. # 5 got calls, expecting subcall num
  187. # 6 got subcall num, expecting fl or cfn or eof
  188. # 7 got summary, expecting num
  189. body_fsm = {
  190. # 0 1 2 3 4 5 6 7
  191. 0: [ 0, -1, -1, -1, -1, -1, -1, -1], # header
  192. 1: [ 1, -1, -1, 1, -1, -1, 1, -1], # fl
  193. 2: [-1, 2, -1, -1, -1, -1, -1, -1], # fn
  194. 3: [-1, -1, 3, -1, -1, 6, -1, 3], # num
  195. 4: [-1, -1, -1, 4, -1, -1, 4, -1], # cfn
  196. 5: [-1, -1, -1, -1, 5, -1, -1, -1], # calls
  197. 6: [-1, -1, 7, -1, -1, -1, -1, -1], # summary
  198. 7: [-2, -1, -1, -2, -1, -1, -2, -1], # eof
  199. }
  200. def __init__(self, filename):
  201. self.fh = file(filename, 'rU')
  202. def get_header(self):
  203. self.fh.seek(0)
  204. state = 0;
  205. line_no = 0
  206. while True:
  207. token = None
  208. try:
  209. line = self.fh.next()
  210. line_no += 1
  211. if line == '\n':
  212. continue
  213. if line == 'version: 0.9.6\n':
  214. token = 0
  215. if line[0:5] == 'cmd: ':
  216. token = 1
  217. if line == 'part: 1\n':
  218. token = 2
  219. if line == 'events: Time\n':
  220. token = 3
  221. if line[0:3] == 'fl=':
  222. token = 4
  223. except StopIteration:
  224. token = 5
  225. try:
  226. state = self.header_fsm[token][state]
  227. except:
  228. state = -1
  229. if state == -2:
  230. break
  231. elif state == -1:
  232. raise CgParseError(line_no, line, token)
  233. elif state == 2:
  234. cmd = line[5:-1]
  235. return RawHeader('0.9.6', cmd, '1', 'Time')
  236. def get_body(self):
  237. body = []
  238. fl_cache = {}
  239. fn_cache = {}
  240. header = self.get_header()
  241. self.fh.seek(0)
  242. state = 0;
  243. line_no = 0
  244. total_self = 0
  245. total_calls = 0
  246. while True:
  247. token = None
  248. line = None
  249. try:
  250. line = self.fh.next()
  251. line_no += 1
  252. if line == '\n':
  253. continue
  254. elif line[0].isdigit():
  255. token = 3
  256. elif line[0:3] == 'fl=':
  257. token = 1
  258. elif line[0:3] == 'fn=':
  259. token = 2
  260. elif line[0:4] == 'cfn=':
  261. token = 4
  262. elif line[0:6] == 'calls=':
  263. token = 5
  264. elif line[0:9] == 'summary: ':
  265. token = 6
  266. elif state == 0:
  267. token = 0
  268. except StopIteration:
  269. token = 7
  270. try:
  271. state = self.body_fsm[token][state]
  272. except KeyError:
  273. state = -1
  274. if state == 1:
  275. fl = line[3:-1]
  276. # re-init raw_entry
  277. raw_entry = RawEntry()
  278. body.append(raw_entry)
  279. try:
  280. raw_entry.fl = fl_cache[fl]
  281. except KeyError:
  282. raw_entry.fl = fl_cache[fl] = FileName(fl)
  283. elif state == 2:
  284. fn = line[3:-1]
  285. try:
  286. raw_entry.fn = fn_cache[fn]
  287. except KeyError:
  288. raw_entry.fn = fn_cache[fn] = FunctionName(fn)
  289. elif state == 3:
  290. position, time_taken = map(int, line.split(' '))
  291. total_self += time_taken
  292. if fn == '{main}':
  293. total_calls += time_taken
  294. total_self_before_summary = total_self
  295. raw_entry.position = position
  296. raw_entry.self_time = time_taken
  297. elif state == 4:
  298. cfn = line[4:-1]
  299. # init raw_call
  300. raw_call = RawCall()
  301. raw_entry.add_subcall(raw_call)
  302. try:
  303. raw_call.cfn = fn_cache[cfn]
  304. except KeyError:
  305. raw_call.cfn = fn_cache[cfn] = FunctionName(cfn)
  306. elif state == 5:
  307. calls = line[6:-1]
  308. elif state == 6:
  309. position, time_taken = map(int, line.split(' '))
  310. if fn == '{main}':
  311. total_calls += time_taken
  312. # set raw_call's time and position
  313. raw_call.position = position
  314. raw_call.inclusive_time = time_taken
  315. elif state == 7:
  316. summary = int(line[9:-1])
  317. raw_entry.summary = summary
  318. elif state == -2:
  319. break
  320. elif state == -1:
  321. raise CgParseError(line_no, line, token)
  322. return RawBody(header, body)