/Lib/modulefinder.py

http://unladen-swallow.googlecode.com/ · Python · 646 lines · 530 code · 56 blank · 60 comment · 156 complexity · ca9755f3abcf7bdf84ad447872cf1fb0 MD5 · raw file

  1. """Find modules used by a script, using introspection."""
  2. from __future__ import generators
  3. import dis
  4. import imp
  5. import marshal
  6. import os
  7. import sys
  8. import types
  9. import struct
  10. if hasattr(sys.__stdout__, "newlines"):
  11. READ_MODE = "U" # universal line endings
  12. else:
  13. # remain compatible with Python < 2.3
  14. READ_MODE = "r"
  15. IMPORT_NAME = chr(dis.opname.index('IMPORT_NAME'))
  16. LOAD_GLOBAL = chr(dis.opname.index('LOAD_GLOBAL'))
  17. LOAD_CONST = chr(dis.opname.index('LOAD_CONST'))
  18. STORE_NAME = chr(dis.opname.index('STORE_NAME'))
  19. STORE_GLOBAL = chr(dis.opname.index('STORE_GLOBAL'))
  20. STORE_OPS = [STORE_NAME, STORE_GLOBAL]
  21. HAVE_ARGUMENT = chr(dis.HAVE_ARGUMENT)
  22. # Modulefinder does a good job at simulating Python's, but it can not
  23. # handle __path__ modifications packages make at runtime. Therefore there
  24. # is a mechanism whereby you can register extra paths in this map for a
  25. # package, and it will be honored.
  26. # Note this is a mapping is lists of paths.
  27. packagePathMap = {}
  28. # A Public interface
  29. def AddPackagePath(packagename, path):
  30. paths = packagePathMap.get(packagename, [])
  31. paths.append(path)
  32. packagePathMap[packagename] = paths
  33. replacePackageMap = {}
  34. # This ReplacePackage mechanism allows modulefinder to work around the
  35. # way the _xmlplus package injects itself under the name "xml" into
  36. # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
  37. # before running ModuleFinder.
  38. def ReplacePackage(oldname, newname):
  39. replacePackageMap[oldname] = newname
  40. class Module:
  41. def __init__(self, name, file=None, path=None):
  42. self.__name__ = name
  43. self.__file__ = file
  44. self.__path__ = path
  45. self.__code__ = None
  46. # The set of global names that are assigned to in the module.
  47. # This includes those names imported through starimports of
  48. # Python modules.
  49. self.globalnames = {}
  50. # The set of starimports this module did that could not be
  51. # resolved, ie. a starimport from a non-Python module.
  52. self.starimports = {}
  53. def __repr__(self):
  54. s = "Module(%r" % (self.__name__,)
  55. if self.__file__ is not None:
  56. s = s + ", %r" % (self.__file__,)
  57. if self.__path__ is not None:
  58. s = s + ", %r" % (self.__path__,)
  59. s = s + ")"
  60. return s
  61. class ModuleFinder:
  62. def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
  63. if path is None:
  64. path = sys.path
  65. self.path = path
  66. self.modules = {}
  67. self.badmodules = {}
  68. self.debug = debug
  69. self.indent = 0
  70. self.excludes = excludes
  71. self.replace_paths = replace_paths
  72. self.processed_paths = [] # Used in debugging only
  73. def msg(self, level, str, *args):
  74. if level <= self.debug:
  75. for i in range(self.indent):
  76. print " ",
  77. print str,
  78. for arg in args:
  79. print repr(arg),
  80. print
  81. def msgin(self, *args):
  82. level = args[0]
  83. if level <= self.debug:
  84. self.indent = self.indent + 1
  85. self.msg(*args)
  86. def msgout(self, *args):
  87. level = args[0]
  88. if level <= self.debug:
  89. self.indent = self.indent - 1
  90. self.msg(*args)
  91. def run_script(self, pathname):
  92. self.msg(2, "run_script", pathname)
  93. fp = open(pathname, READ_MODE)
  94. stuff = ("", "r", imp.PY_SOURCE)
  95. self.load_module('__main__', fp, pathname, stuff)
  96. def load_file(self, pathname):
  97. dir, name = os.path.split(pathname)
  98. name, ext = os.path.splitext(name)
  99. fp = open(pathname, READ_MODE)
  100. stuff = (ext, "r", imp.PY_SOURCE)
  101. self.load_module(name, fp, pathname, stuff)
  102. def import_hook(self, name, caller=None, fromlist=None, level=-1):
  103. self.msg(3, "import_hook", name, caller, fromlist, level)
  104. parent = self.determine_parent(caller, level=level)
  105. q, tail = self.find_head_package(parent, name)
  106. m = self.load_tail(q, tail)
  107. if not fromlist:
  108. return q
  109. if m.__path__:
  110. self.ensure_fromlist(m, fromlist)
  111. return None
  112. def determine_parent(self, caller, level=-1):
  113. self.msgin(4, "determine_parent", caller, level)
  114. if not caller or level == 0:
  115. self.msgout(4, "determine_parent -> None")
  116. return None
  117. pname = caller.__name__
  118. if level >= 1: # relative import
  119. if caller.__path__:
  120. level -= 1
  121. if level == 0:
  122. parent = self.modules[pname]
  123. assert parent is caller
  124. self.msgout(4, "determine_parent ->", parent)
  125. return parent
  126. if pname.count(".") < level:
  127. raise ImportError, "relative importpath too deep"
  128. pname = ".".join(pname.split(".")[:-level])
  129. parent = self.modules[pname]
  130. self.msgout(4, "determine_parent ->", parent)
  131. return parent
  132. if caller.__path__:
  133. parent = self.modules[pname]
  134. assert caller is parent
  135. self.msgout(4, "determine_parent ->", parent)
  136. return parent
  137. if '.' in pname:
  138. i = pname.rfind('.')
  139. pname = pname[:i]
  140. parent = self.modules[pname]
  141. assert parent.__name__ == pname
  142. self.msgout(4, "determine_parent ->", parent)
  143. return parent
  144. self.msgout(4, "determine_parent -> None")
  145. return None
  146. def find_head_package(self, parent, name):
  147. self.msgin(4, "find_head_package", parent, name)
  148. if '.' in name:
  149. i = name.find('.')
  150. head = name[:i]
  151. tail = name[i+1:]
  152. else:
  153. head = name
  154. tail = ""
  155. if parent:
  156. qname = "%s.%s" % (parent.__name__, head)
  157. else:
  158. qname = head
  159. q = self.import_module(head, qname, parent)
  160. if q:
  161. self.msgout(4, "find_head_package ->", (q, tail))
  162. return q, tail
  163. if parent:
  164. qname = head
  165. parent = None
  166. q = self.import_module(head, qname, parent)
  167. if q:
  168. self.msgout(4, "find_head_package ->", (q, tail))
  169. return q, tail
  170. self.msgout(4, "raise ImportError: No module named", qname)
  171. raise ImportError, "No module named " + qname
  172. def load_tail(self, q, tail):
  173. self.msgin(4, "load_tail", q, tail)
  174. m = q
  175. while tail:
  176. i = tail.find('.')
  177. if i < 0: i = len(tail)
  178. head, tail = tail[:i], tail[i+1:]
  179. mname = "%s.%s" % (m.__name__, head)
  180. m = self.import_module(head, mname, m)
  181. if not m:
  182. self.msgout(4, "raise ImportError: No module named", mname)
  183. raise ImportError, "No module named " + mname
  184. self.msgout(4, "load_tail ->", m)
  185. return m
  186. def ensure_fromlist(self, m, fromlist, recursive=0):
  187. self.msg(4, "ensure_fromlist", m, fromlist, recursive)
  188. for sub in fromlist:
  189. if sub == "*":
  190. if not recursive:
  191. all = self.find_all_submodules(m)
  192. if all:
  193. self.ensure_fromlist(m, all, 1)
  194. elif not hasattr(m, sub):
  195. subname = "%s.%s" % (m.__name__, sub)
  196. submod = self.import_module(sub, subname, m)
  197. if not submod:
  198. raise ImportError, "No module named " + subname
  199. def find_all_submodules(self, m):
  200. if not m.__path__:
  201. return
  202. modules = {}
  203. # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
  204. # But we must also collect Python extension modules - although
  205. # we cannot separate normal dlls from Python extensions.
  206. suffixes = []
  207. for triple in imp.get_suffixes():
  208. suffixes.append(triple[0])
  209. for dir in m.__path__:
  210. try:
  211. names = os.listdir(dir)
  212. except os.error:
  213. self.msg(2, "can't list directory", dir)
  214. continue
  215. for name in names:
  216. mod = None
  217. for suff in suffixes:
  218. n = len(suff)
  219. if name[-n:] == suff:
  220. mod = name[:-n]
  221. break
  222. if mod and mod != "__init__":
  223. modules[mod] = mod
  224. return modules.keys()
  225. def import_module(self, partname, fqname, parent):
  226. self.msgin(3, "import_module", partname, fqname, parent)
  227. try:
  228. m = self.modules[fqname]
  229. except KeyError:
  230. pass
  231. else:
  232. self.msgout(3, "import_module ->", m)
  233. return m
  234. if fqname in self.badmodules:
  235. self.msgout(3, "import_module -> None")
  236. return None
  237. if parent and parent.__path__ is None:
  238. self.msgout(3, "import_module -> None")
  239. return None
  240. try:
  241. fp, pathname, stuff = self.find_module(partname,
  242. parent and parent.__path__, parent)
  243. except ImportError:
  244. self.msgout(3, "import_module ->", None)
  245. return None
  246. try:
  247. m = self.load_module(fqname, fp, pathname, stuff)
  248. finally:
  249. if fp: fp.close()
  250. if parent:
  251. setattr(parent, partname, m)
  252. self.msgout(3, "import_module ->", m)
  253. return m
  254. def load_module(self, fqname, fp, pathname, file_info):
  255. suffix, mode, type = file_info
  256. self.msgin(2, "load_module", fqname, fp and "fp", pathname)
  257. if type == imp.PKG_DIRECTORY:
  258. m = self.load_package(fqname, pathname)
  259. self.msgout(2, "load_module ->", m)
  260. return m
  261. if type == imp.PY_SOURCE:
  262. co = compile(fp.read()+'\n', pathname, 'exec')
  263. elif type == imp.PY_COMPILED:
  264. if fp.read(4) != imp.get_magic():
  265. self.msgout(2, "raise ImportError: Bad magic number", pathname)
  266. raise ImportError, "Bad magic number in %s" % pathname
  267. fp.read(4)
  268. co = marshal.load(fp)
  269. else:
  270. co = None
  271. m = self.add_module(fqname)
  272. m.__file__ = pathname
  273. if co:
  274. if self.replace_paths:
  275. co = self.replace_paths_in_code(co)
  276. m.__code__ = co
  277. self.scan_code(co, m)
  278. self.msgout(2, "load_module ->", m)
  279. return m
  280. def _add_badmodule(self, name, caller):
  281. if name not in self.badmodules:
  282. self.badmodules[name] = {}
  283. if caller:
  284. self.badmodules[name][caller.__name__] = 1
  285. else:
  286. self.badmodules[name]["-"] = 1
  287. def _safe_import_hook(self, name, caller, fromlist, level=-1):
  288. # wrapper for self.import_hook() that won't raise ImportError
  289. if name in self.badmodules:
  290. self._add_badmodule(name, caller)
  291. return
  292. try:
  293. self.import_hook(name, caller, level=level)
  294. except ImportError, msg:
  295. self.msg(2, "ImportError:", str(msg))
  296. self._add_badmodule(name, caller)
  297. else:
  298. if fromlist:
  299. for sub in fromlist:
  300. if sub in self.badmodules:
  301. self._add_badmodule(sub, caller)
  302. continue
  303. try:
  304. self.import_hook(name, caller, [sub], level=level)
  305. except ImportError, msg:
  306. self.msg(2, "ImportError:", str(msg))
  307. fullname = name + "." + sub
  308. self._add_badmodule(fullname, caller)
  309. def scan_opcodes(self, co, unpack=struct.unpack):
  310. # Scan the code, and yield 'interesting' opcode combinations.
  311. # This supports the absolute and relative imports introduced in
  312. # Python 2.5.
  313. code = co.co_code
  314. names = co.co_names
  315. consts = co.co_consts
  316. OPCODE_SIG = LOAD_CONST + LOAD_CONST + LOAD_CONST + IMPORT_NAME
  317. while code:
  318. c = code[0]
  319. if c in STORE_OPS:
  320. oparg, = unpack('<H', code[1:3])
  321. yield "store", (names[oparg],)
  322. code = code[3:]
  323. continue
  324. if code[:12:3] == OPCODE_SIG:
  325. oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
  326. level = consts[oparg_1]
  327. if level == -1: # normal import
  328. yield "import", (consts[oparg_2], consts[oparg_3])
  329. elif level == 0: # absolute import
  330. yield "absolute_import", (consts[oparg_2],
  331. consts[oparg_3])
  332. else: # relative import
  333. yield "relative_import", (level,
  334. consts[oparg_2],
  335. consts[oparg_3])
  336. code = code[9:]
  337. continue
  338. if c >= HAVE_ARGUMENT:
  339. code = code[3:]
  340. else:
  341. code = code[1:]
  342. def scan_code(self, co, m):
  343. code = co.co_code
  344. for what, args in self.scan_opcodes(co):
  345. if what == "store":
  346. name, = args
  347. m.globalnames[name] = 1
  348. elif what in ("import", "absolute_import"):
  349. fromlist, name = args
  350. have_star = 0
  351. if fromlist is not None:
  352. if "*" in fromlist:
  353. have_star = 1
  354. fromlist = [f for f in fromlist if f != "*"]
  355. if what == "absolute_import": level = 0
  356. else: level = -1
  357. self._safe_import_hook(name, m, fromlist, level=level)
  358. if have_star:
  359. # We've encountered an "import *". If it is a Python
  360. # module, the code has already been parsed and we can suck
  361. # out the global names.
  362. mm = None
  363. if m.__path__:
  364. # At this point we don't know whether 'name' is a
  365. # submodule of 'm' or a global module. Let's just try
  366. # the full name first.
  367. mm = self.modules.get(m.__name__ + "." + name)
  368. if mm is None:
  369. mm = self.modules.get(name)
  370. if mm is not None:
  371. m.globalnames.update(mm.globalnames)
  372. m.starimports.update(mm.starimports)
  373. if mm.__code__ is None:
  374. m.starimports[name] = 1
  375. else:
  376. m.starimports[name] = 1
  377. elif what == "relative_import":
  378. level, fromlist, name = args
  379. if name:
  380. self._safe_import_hook(name, m, fromlist, level=level)
  381. else:
  382. parent = self.determine_parent(m, level=level)
  383. self._safe_import_hook(parent.__name__, None, fromlist, level=0)
  384. else:
  385. # We don't expect anything else from the generator.
  386. raise RuntimeError(what)
  387. for c in co.co_consts:
  388. if isinstance(c, type(co)):
  389. self.scan_code(c, m)
  390. def load_package(self, fqname, pathname):
  391. self.msgin(2, "load_package", fqname, pathname)
  392. newname = replacePackageMap.get(fqname)
  393. if newname:
  394. fqname = newname
  395. m = self.add_module(fqname)
  396. m.__file__ = pathname
  397. m.__path__ = [pathname]
  398. # As per comment at top of file, simulate runtime __path__ additions.
  399. m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
  400. fp, buf, stuff = self.find_module("__init__", m.__path__)
  401. self.load_module(fqname, fp, buf, stuff)
  402. self.msgout(2, "load_package ->", m)
  403. return m
  404. def add_module(self, fqname):
  405. if fqname in self.modules:
  406. return self.modules[fqname]
  407. self.modules[fqname] = m = Module(fqname)
  408. return m
  409. def find_module(self, name, path, parent=None):
  410. if parent is not None:
  411. # assert path is not None
  412. fullname = parent.__name__+'.'+name
  413. else:
  414. fullname = name
  415. if fullname in self.excludes:
  416. self.msgout(3, "find_module -> Excluded", fullname)
  417. raise ImportError, name
  418. if path is None:
  419. if name in sys.builtin_module_names:
  420. return (None, None, ("", "", imp.C_BUILTIN))
  421. path = self.path
  422. return imp.find_module(name, path)
  423. def report(self):
  424. """Print a report to stdout, listing the found modules with their
  425. paths, as well as modules that are missing, or seem to be missing.
  426. """
  427. print
  428. print " %-25s %s" % ("Name", "File")
  429. print " %-25s %s" % ("----", "----")
  430. # Print modules found
  431. keys = self.modules.keys()
  432. keys.sort()
  433. for key in keys:
  434. m = self.modules[key]
  435. if m.__path__:
  436. print "P",
  437. else:
  438. print "m",
  439. print "%-25s" % key, m.__file__ or ""
  440. # Print missing modules
  441. missing, maybe = self.any_missing_maybe()
  442. if missing:
  443. print
  444. print "Missing modules:"
  445. for name in missing:
  446. mods = self.badmodules[name].keys()
  447. mods.sort()
  448. print "?", name, "imported from", ', '.join(mods)
  449. # Print modules that may be missing, but then again, maybe not...
  450. if maybe:
  451. print
  452. print "Submodules thay appear to be missing, but could also be",
  453. print "global names in the parent package:"
  454. for name in maybe:
  455. mods = self.badmodules[name].keys()
  456. mods.sort()
  457. print "?", name, "imported from", ', '.join(mods)
  458. def any_missing(self):
  459. """Return a list of modules that appear to be missing. Use
  460. any_missing_maybe() if you want to know which modules are
  461. certain to be missing, and which *may* be missing.
  462. """
  463. missing, maybe = self.any_missing_maybe()
  464. return missing + maybe
  465. def any_missing_maybe(self):
  466. """Return two lists, one with modules that are certainly missing
  467. and one with modules that *may* be missing. The latter names could
  468. either be submodules *or* just global names in the package.
  469. The reason it can't always be determined is that it's impossible to
  470. tell which names are imported when "from module import *" is done
  471. with an extension module, short of actually importing it.
  472. """
  473. missing = []
  474. maybe = []
  475. for name in self.badmodules:
  476. if name in self.excludes:
  477. continue
  478. i = name.rfind(".")
  479. if i < 0:
  480. missing.append(name)
  481. continue
  482. subname = name[i+1:]
  483. pkgname = name[:i]
  484. pkg = self.modules.get(pkgname)
  485. if pkg is not None:
  486. if pkgname in self.badmodules[name]:
  487. # The package tried to import this module itself and
  488. # failed. It's definitely missing.
  489. missing.append(name)
  490. elif subname in pkg.globalnames:
  491. # It's a global in the package: definitely not missing.
  492. pass
  493. elif pkg.starimports:
  494. # It could be missing, but the package did an "import *"
  495. # from a non-Python module, so we simply can't be sure.
  496. maybe.append(name)
  497. else:
  498. # It's not a global in the package, the package didn't
  499. # do funny star imports, it's very likely to be missing.
  500. # The symbol could be inserted into the package from the
  501. # outside, but since that's not good style we simply list
  502. # it missing.
  503. missing.append(name)
  504. else:
  505. missing.append(name)
  506. missing.sort()
  507. maybe.sort()
  508. return missing, maybe
  509. def replace_paths_in_code(self, co):
  510. new_filename = original_filename = os.path.normpath(co.co_filename)
  511. for f, r in self.replace_paths:
  512. if original_filename.startswith(f):
  513. new_filename = r + original_filename[len(f):]
  514. break
  515. if self.debug and original_filename not in self.processed_paths:
  516. if new_filename != original_filename:
  517. self.msgout(2, "co_filename %r changed to %r" \
  518. % (original_filename,new_filename,))
  519. else:
  520. self.msgout(2, "co_filename %r remains unchanged" \
  521. % (original_filename,))
  522. self.processed_paths.append(original_filename)
  523. consts = list(co.co_consts)
  524. for i in range(len(consts)):
  525. if isinstance(consts[i], type(co)):
  526. consts[i] = self.replace_paths_in_code(consts[i])
  527. return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize,
  528. co.co_flags, co.co_code, tuple(consts), co.co_names,
  529. co.co_varnames, new_filename, co.co_name,
  530. co.co_firstlineno, co.co_lnotab,
  531. co.co_freevars, co.co_cellvars)
  532. def test():
  533. # Parse command line
  534. import getopt
  535. try:
  536. opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
  537. except getopt.error, msg:
  538. print msg
  539. return
  540. # Process options
  541. debug = 1
  542. domods = 0
  543. addpath = []
  544. exclude = []
  545. for o, a in opts:
  546. if o == '-d':
  547. debug = debug + 1
  548. if o == '-m':
  549. domods = 1
  550. if o == '-p':
  551. addpath = addpath + a.split(os.pathsep)
  552. if o == '-q':
  553. debug = 0
  554. if o == '-x':
  555. exclude.append(a)
  556. # Provide default arguments
  557. if not args:
  558. script = "hello.py"
  559. else:
  560. script = args[0]
  561. # Set the path based on sys.path and the script directory
  562. path = sys.path[:]
  563. path[0] = os.path.dirname(script)
  564. path = addpath + path
  565. if debug > 1:
  566. print "path:"
  567. for item in path:
  568. print " ", repr(item)
  569. # Create the module finder and turn its crank
  570. mf = ModuleFinder(path, debug, exclude)
  571. for arg in args[1:]:
  572. if arg == '-m':
  573. domods = 1
  574. continue
  575. if domods:
  576. if arg[-2:] == '.*':
  577. mf.import_hook(arg[:-2], None, ["*"])
  578. else:
  579. mf.import_hook(arg)
  580. else:
  581. mf.load_file(arg)
  582. mf.run_script(script)
  583. mf.report()
  584. return mf # for -i debugging
  585. if __name__ == '__main__':
  586. try:
  587. mf = test()
  588. except KeyboardInterrupt:
  589. print "\n[interrupt]"