PageRenderTime 41ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/dotviewer/graphparse.py

https://bitbucket.org/pypy/pypy/
Python | 160 lines | 140 code | 9 blank | 11 comment | 23 complexity | 9f06ffc58d37630764bb014c735f5e37 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. """
  2. Graph file parsing.
  3. """
  4. import sys, re
  5. import subprocess
  6. import msgstruct
  7. re_nonword = re.compile(r'([^0-9a-zA-Z_.]+)')
  8. re_plain = re.compile(r'graph [-0-9.]+ [-0-9.]+ [-0-9.]+$', re.MULTILINE)
  9. re_digraph = re.compile(r'\b(graph|digraph)\b', re.IGNORECASE)
  10. def guess_type(content):
  11. # try to see whether it is a directed graph or not,
  12. # or already a .plain file
  13. # XXX not a perfect heursitic
  14. if re_plain.match(content):
  15. return 'plain' # already a .plain file
  16. # look for the word 'graph' or 'digraph' followed by a '{'.
  17. bracepos = None
  18. lastfound = ''
  19. for match in re_digraph.finditer(content):
  20. position = match.start()
  21. if bracepos is None:
  22. bracepos = content.find('{', position)
  23. if bracepos < 0:
  24. break
  25. elif position > bracepos:
  26. break
  27. lastfound = match.group()
  28. if lastfound.lower() == 'digraph':
  29. return 'dot'
  30. if lastfound.lower() == 'graph':
  31. return 'neato'
  32. print >> sys.stderr, "Warning: could not guess file type, using 'dot'"
  33. return 'unknown'
  34. def dot2plain_graphviz(content, contenttype, use_codespeak=False):
  35. if contenttype != 'neato':
  36. cmdline = 'dot -Tplain'
  37. else:
  38. cmdline = 'neato -Tplain'
  39. #print >> sys.stderr, '* running:', cmdline
  40. close_fds = sys.platform != 'win32'
  41. p = subprocess.Popen(cmdline, shell=True, close_fds=close_fds,
  42. stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  43. (child_in, child_out) = (p.stdin, p.stdout)
  44. try:
  45. import thread
  46. except ImportError:
  47. bkgndwrite(child_in, content)
  48. else:
  49. thread.start_new_thread(bkgndwrite, (child_in, content))
  50. plaincontent = child_out.read()
  51. child_out.close()
  52. if not plaincontent: # 'dot' is likely not installed
  53. raise PlainParseError("no result from running 'dot'")
  54. return plaincontent
  55. def dot2plain_codespeak(content, contenttype):
  56. import urllib
  57. request = urllib.urlencode({'dot': content})
  58. url = 'http://codespeak.net/pypy/convertdot.cgi'
  59. print >> sys.stderr, '* posting:', url
  60. g = urllib.urlopen(url, data=request)
  61. result = []
  62. while True:
  63. data = g.read(16384)
  64. if not data:
  65. break
  66. result.append(data)
  67. g.close()
  68. plaincontent = ''.join(result)
  69. # very simple-minded way to give a somewhat better error message
  70. if plaincontent.startswith('<body'):
  71. raise Exception("the dot on codespeak has very likely crashed")
  72. return plaincontent
  73. def bkgndwrite(f, data):
  74. f.write(data)
  75. f.close()
  76. class PlainParseError(Exception):
  77. pass
  78. def splitline(line, re_word = re.compile(r'[^\s"]\S*|["]["]|["].*?[^\\]["]')):
  79. import ast
  80. result = []
  81. for word in re_word.findall(line):
  82. if word.startswith('"'):
  83. word = ast.literal_eval(word)
  84. result.append(word)
  85. return result
  86. def parse_plain(graph_id, plaincontent, links={}, fixedfont=False):
  87. plaincontent = plaincontent.replace('\r\n', '\n') # fix Windows EOL
  88. lines = plaincontent.splitlines(True)
  89. for i in range(len(lines)-2, -1, -1):
  90. if lines[i].endswith('\\\n'): # line ending in '\'
  91. lines[i] = lines[i][:-2] + lines[i+1]
  92. del lines[i+1]
  93. header = splitline(lines.pop(0))
  94. if header[0] != 'graph':
  95. raise PlainParseError("should start with 'graph'")
  96. yield (msgstruct.CMSG_START_GRAPH, graph_id) + tuple(header[1:])
  97. texts = []
  98. for line in lines:
  99. line = splitline(line)
  100. if line[0] == 'node':
  101. if len(line) != 11:
  102. raise PlainParseError("bad 'node'")
  103. yield (msgstruct.CMSG_ADD_NODE,) + tuple(line[1:])
  104. texts.append(line[6])
  105. if line[0] == 'edge':
  106. yield (msgstruct.CMSG_ADD_EDGE,) + tuple(line[1:])
  107. i = 4 + 2 * int(line[3])
  108. if len(line) > i + 2:
  109. texts.append(line[i])
  110. if line[0] == 'stop':
  111. break
  112. if links:
  113. # only include the links that really appear in the graph
  114. seen = {}
  115. for text in texts:
  116. for word in re_nonword.split(text):
  117. if word and word in links and word not in seen:
  118. t = links[word]
  119. if isinstance(t, tuple):
  120. statusbartext, color = t
  121. else:
  122. statusbartext = t
  123. color = None
  124. if color is not None:
  125. yield (msgstruct.CMSG_ADD_LINK, word,
  126. statusbartext, color[0], color[1], color[2])
  127. else:
  128. yield (msgstruct.CMSG_ADD_LINK, word, statusbartext)
  129. seen[word] = True
  130. if fixedfont:
  131. yield (msgstruct.CMSG_FIXED_FONT,)
  132. yield (msgstruct.CMSG_STOP_GRAPH,)
  133. def parse_dot(graph_id, content, links={}, fixedfont=False):
  134. contenttype = guess_type(content)
  135. if contenttype == 'plain':
  136. plaincontent = content
  137. else:
  138. try:
  139. plaincontent = dot2plain_graphviz(content, contenttype)
  140. except PlainParseError, e:
  141. raise
  142. ##print e
  143. ### failed, retry via codespeak
  144. ##plaincontent = dot2plain_codespeak(content, contenttype)
  145. return list(parse_plain(graph_id, plaincontent, links, fixedfont))