PageRenderTime 241ms CodeModel.GetById 19ms RepoModel.GetById 3ms app.codeStats 0ms

/pypy/tool/import_graph.py

https://bitbucket.org/pypy/pypy/
Python | 211 lines | 205 code | 6 blank | 0 comment | 17 complexity | b8513db001f58211a9778300fc1ed7a9 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from __future__ import division
  2. import py
  3. import random
  4. exclude_files = ["__init__.py", "conftest.py"]
  5. def include_file(path):
  6. if ("test" in str(path) or "tool" in str(path) or
  7. "documentation" in str(path) or
  8. "_cache" in str(path)):
  9. return False
  10. if path.basename in exclude_files:
  11. return False
  12. return True
  13. def get_mod_from_path(path):
  14. dirs = path.get("dirname")[0].split("/")
  15. pypyindex = dirs.index("pypy")
  16. return ".".join(dirs[pypyindex:] + path.get("purebasename"))
  17. def find_references(path):
  18. refs = []
  19. for line in path.open("r"):
  20. if line.startswith(" "): # ignore local imports to reduce graph size
  21. continue
  22. if "\\" in line: #ignore line continuations
  23. continue
  24. line = line.strip()
  25. line = line.split("#")[0].strip()
  26. if line.startswith("import pypy."): # import pypy.bla.whatever
  27. if " as " not in line:
  28. refs.append((line[7:].strip(), None))
  29. else: # import pypy.bla.whatever as somethingelse
  30. assert line.count(" as ") == 1
  31. line = line.split(" as ")
  32. refs.append((line[0][7:].strip(), line[1].strip()))
  33. elif line.startswith("from ") and "pypy" in line: #from pypy.b import a
  34. line = line[5:]
  35. if " as " not in line:
  36. line = line.split(" import ")
  37. what = line[1].split(",")
  38. for w in what:
  39. refs.append((line[0].strip() + "." + w.strip(), None))
  40. else: # prom pypy.b import a as c
  41. if line.count(" as ") != 1 or "," in line:
  42. print"can't handle this: " + line
  43. continue
  44. line = line.split(" as ")
  45. what = line[0].replace(" import ", ".").replace(" ", "")
  46. refs.append((what, line[1].strip()))
  47. return refs
  48. def get_module(ref, imports):
  49. ref = ref.split(".")
  50. i = len(ref)
  51. while i:
  52. possible_mod = ".".join(ref[:i])
  53. if possible_mod in imports:
  54. return possible_mod
  55. i -= 1
  56. return None
  57. def casteljeau(points, t):
  58. points = points[:]
  59. while len(points) > 1:
  60. for i in range(len(points) - 1):
  61. points[i] = points[i] * (1 - t) + points[i + 1] * t
  62. del points[-1]
  63. return points[0]
  64. def color(t):
  65. casteljeau([0, 0, 1, 0, 0], t) / 0.375
  66. class ModuleGraph(object):
  67. def __init__(self, path):
  68. self.imports = {}
  69. self.clusters = {}
  70. self.mod_to_cluster = {}
  71. for f in path.visit("*.py"):
  72. if include_file(f):
  73. self.imports[get_mod_from_path(f)] = find_references(f)
  74. self.remove_object_refs()
  75. self.remove_double_refs()
  76. self.incoming = {}
  77. for mod in self.imports:
  78. self.incoming[mod] = set()
  79. for mod, refs in self.imports.iteritems():
  80. for ref in refs:
  81. if ref[0] in self.incoming:
  82. self.incoming[ref[0]].add(mod)
  83. self.remove_single_nodes()
  84. self.topgraph_properties = ["rankdir=LR"]
  85. def remove_object_refs(self):
  86. # reduces cases like import rpython.translator.genc.basetype.CType to
  87. # import rpython.translator.genc.basetype
  88. for mod, refs in self.imports.iteritems():
  89. i = 0
  90. while i < len(refs):
  91. if refs[i][0] in self.imports:
  92. i += 1
  93. else:
  94. nref = get_module(refs[i][0], self.imports)
  95. if nref is None:
  96. print "removing", repr(refs[i])
  97. del refs[i]
  98. else:
  99. refs[i] = (nref, None)
  100. i += 1
  101. def remove_double_refs(self):
  102. # remove several references to the same module
  103. for mod, refs in self.imports.iteritems():
  104. i = 0
  105. seen_refs = set()
  106. while i < len(refs):
  107. if refs[i] not in seen_refs:
  108. seen_refs.add(refs[i])
  109. i += 1
  110. else:
  111. del refs[i]
  112. def remove_single_nodes(self):
  113. # remove nodes that have no attached edges
  114. rem = []
  115. for mod, refs in self.imports.iteritems():
  116. if len(refs) == 0 and len(self.incoming[mod]) == 0:
  117. rem.append(mod)
  118. for m in rem:
  119. del self.incoming[m]
  120. del self.imports[m]
  121. def create_clusters(self):
  122. self.topgraph_properties.append("compound=true;")
  123. self.clustered = True
  124. hierarchy = [set() for i in range(6)]
  125. for mod in self.imports:
  126. for i, d in enumerate(mod.split(".")):
  127. hierarchy[i].add(d)
  128. for i in range(6):
  129. if len(hierarchy[i]) != 1:
  130. break
  131. for mod in self.imports:
  132. cluster = mod.split(".")[i]
  133. if i == len(mod.split(".")) - 1:
  134. continue
  135. if cluster not in self.clusters:
  136. self.clusters[cluster] = set()
  137. self.clusters[cluster].add(mod)
  138. self.mod_to_cluster[mod] = cluster
  139. def remove_tangling_randomly(self):
  140. # remove edges to nodes that have a lot incoming edges randomly
  141. tangled = []
  142. for mod, incoming in self.incoming.iteritems():
  143. if len(incoming) > 10:
  144. tangled.append(mod)
  145. for mod in tangled:
  146. remove = set()
  147. incoming = self.incoming[mod]
  148. while len(remove) < len(incoming) * 0.80:
  149. remove.add(random.choice(list(incoming)))
  150. for rem in remove:
  151. for i in range(len(self.imports[rem])):
  152. if self.imports[rem][i][1] == mod:
  153. break
  154. del self.imports[rem][i]
  155. incoming.remove(rem)
  156. print "removing", mod, "<-", rem
  157. self.remove_single_nodes()
  158. def dotfile(self, dot):
  159. f = dot.open("w")
  160. f.write("digraph G {\n")
  161. for prop in self.topgraph_properties:
  162. f.write("\t%s\n" % prop)
  163. #write clusters and inter-cluster edges
  164. for cluster, nodes in self.clusters.iteritems():
  165. f.write("\tsubgraph cluster_%s {\n" % cluster)
  166. f.write("\t\tstyle=filled;\n\t\tcolor=lightgrey\n")
  167. for node in nodes:
  168. f.write('\t\t"%s";\n' % node[5:])
  169. for mod, refs in self.imports.iteritems():
  170. for ref in refs:
  171. if mod in nodes and ref[0] in nodes:
  172. f.write('\t\t"%s" -> "%s";\n' % (mod[5:], ref[0][5:]))
  173. f.write("\t}\n")
  174. #write edges between clusters
  175. for mod, refs in self.imports.iteritems():
  176. try:
  177. nodes = self.clusters[self.mod_to_cluster[mod]]
  178. except KeyError:
  179. nodes = set()
  180. for ref in refs:
  181. if ref[0] not in nodes:
  182. f.write('\t"%s" -> "%s";\n' % (mod[5:], ref[0][5:]))
  183. f.write("}")
  184. f.close()
  185. if __name__ == "__main__":
  186. import sys
  187. if len(sys.argv) > 1:
  188. path = py.path.local(sys.argv[1])
  189. else:
  190. path = py.path.local(".")
  191. gr = ModuleGraph(path)
  192. gr.create_clusters()
  193. dot = path.join("import_graph.dot")
  194. gr.dotfile(dot)