PageRenderTime 47ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 1ms

/python/helpers/epydoc/docbuilder.py

http://github.com/JetBrains/intellij-community
Python | 1358 lines | 979 code | 118 blank | 261 comment | 223 complexity | fcad6cd3f0bcc933dc3a14e80ec2b7ec MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0, MPL-2.0-no-copyleft-exception, MIT, EPL-1.0, AGPL-1.0

Large files files are truncated, but you can click here to view the full file

  1. # epydoc -- Documentation Builder
  2. #
  3. # Copyright (C) 2005 Edward Loper
  4. # Author: Edward Loper <edloper@loper.org>
  5. # URL: <http://epydoc.sf.net>
  6. #
  7. # $Id: docbuilder.py 1683 2008-01-29 22:17:39Z edloper $
  8. """
  9. Construct data structures that encode the API documentation for Python
  10. objects. These data structures are created using a series of steps:
  11. 1. B{Building docs}: Extract basic information about the objects,
  12. and objects that are related to them. This can be done by
  13. introspecting the objects' values (with L{epydoc.docintrospecter}; or
  14. by parsing their source code (with L{epydoc.docparser}.
  15. 2. B{Merging}: Combine the information obtained from introspection &
  16. parsing each object into a single structure.
  17. 3. B{Linking}: Replace any 'pointers' that were created for imported
  18. variables by their target (if it's available).
  19. 4. B{Naming}: Chose a unique 'canonical name' for each
  20. object.
  21. 5. B{Docstring Parsing}: Parse the docstring of each object, and
  22. extract any pertinant information.
  23. 6. B{Inheritance}: Add information about variables that classes
  24. inherit from their base classes.
  25. The documentation information for each individual object is
  26. represented using an L{APIDoc}; and the documentation for a collection
  27. of objects is represented using a L{DocIndex}.
  28. The main interface to C{epydoc.docbuilder} consists of two functions:
  29. - L{build_doc()} -- Builds documentation for a single item, and
  30. returns it as an L{APIDoc} object.
  31. - L{build_doc_index()} -- Builds documentation for a collection of
  32. items, and returns it as a L{DocIndex} object.
  33. The remaining functions are used by these two main functions to
  34. perform individual steps in the creation of the documentation.
  35. @group Documentation Construction: build_doc, build_doc_index,
  36. _get_docs_from_*, _report_valdoc_progress
  37. @group Merging: *MERGE*, *merge*
  38. @group Linking: link_imports
  39. @group Naming: _name_scores, _unreachable_names, assign_canonical_names,
  40. _var_shadows_self, _fix_self_shadowing_var, _unreachable_name_for
  41. @group Inheritance: inherit_docs, _inherit_info
  42. """
  43. __docformat__ = 'epytext en'
  44. ######################################################################
  45. ## Contents
  46. ######################################################################
  47. ## 1. build_doc() & build_doc_index() -- the main interface.
  48. ## 2. merge_docs() -- helper, used to merge parse & introspect info
  49. ## 3. link_imports() -- helper, used to connect imported vars w/ values
  50. ## 4. assign_canonical_names() -- helper, used to set canonical names
  51. ## 5. inherit_docs() -- helper, used to inherit docs from base classes
  52. ######################################################################
  53. ## Imports
  54. ######################################################################
  55. import sys, os, os.path, __builtin__, imp, re, inspect
  56. from epydoc.apidoc import *
  57. from epydoc.docintrospecter import introspect_docs
  58. from epydoc.docparser import parse_docs, ParseError
  59. from epydoc.docstringparser import parse_docstring
  60. from epydoc import log
  61. from epydoc.util import *
  62. from epydoc.compat import * # Backwards compatibility
  63. ######################################################################
  64. ## 1. build_doc()
  65. ######################################################################
  66. class BuildOptions:
  67. """
  68. Holds the parameters for a documentation building process.
  69. """
  70. def __init__(self, introspect=True, parse=True,
  71. exclude_introspect=None, exclude_parse=None,
  72. add_submodules=True):
  73. self.introspect = introspect
  74. self.parse = parse
  75. self.exclude_introspect = exclude_introspect
  76. self.exclude_parse = exclude_parse
  77. self.add_submodules = add_submodules
  78. # Test for pattern syntax and compile them into pattern objects.
  79. try:
  80. self._introspect_regexp = (exclude_introspect
  81. and re.compile(exclude_introspect) or None)
  82. self._parse_regexp = (exclude_parse
  83. and re.compile(exclude_parse) or None)
  84. except Exception, exc:
  85. log.error('Error in regular expression pattern: %s' % exc)
  86. raise
  87. def must_introspect(self, name):
  88. """
  89. Return C{True} if a module is to be introsepcted with the current
  90. settings.
  91. @param name: The name of the module to test
  92. @type name: L{DottedName} or C{str}
  93. """
  94. return self.introspect \
  95. and not self._matches_filter(name, self._introspect_regexp)
  96. def must_parse(self, name):
  97. """
  98. Return C{True} if a module is to be parsed with the current settings.
  99. @param name: The name of the module to test
  100. @type name: L{DottedName} or C{str}
  101. """
  102. return self.parse \
  103. and not self._matches_filter(name, self._parse_regexp)
  104. def _matches_filter(self, name, regexp):
  105. """
  106. Test if a module name matches a pattern.
  107. @param name: The name of the module to test
  108. @type name: L{DottedName} or C{str}
  109. @param regexp: The pattern object to match C{name} against.
  110. If C{None}, return C{False}
  111. @type regexp: C{pattern}
  112. @return: C{True} if C{name} in dotted format matches C{regexp},
  113. else C{False}
  114. @rtype: C{bool}
  115. """
  116. if regexp is None: return False
  117. if isinstance(name, DottedName):
  118. name = str(name)
  119. return bool(regexp.search(name))
  120. def build_doc(item, introspect=True, parse=True, add_submodules=True,
  121. exclude_introspect=None, exclude_parse=None):
  122. """
  123. Build API documentation for a given item, and return it as
  124. an L{APIDoc} object.
  125. @rtype: L{APIDoc}
  126. @param item: The item to document, specified using any of the
  127. following:
  128. - A string, naming a python package directory
  129. (e.g., C{'epydoc/markup'})
  130. - A string, naming a python file
  131. (e.g., C{'epydoc/docparser.py'})
  132. - A string, naming a python object
  133. (e.g., C{'epydoc.docparser.DocParser'})
  134. - Any (non-string) python object
  135. (e.g., C{list.append})
  136. @param introspect: If true, then use introspection to examine the
  137. specified items. Otherwise, just use parsing.
  138. @param parse: If true, then use parsing to examine the specified
  139. items. Otherwise, just use introspection.
  140. """
  141. docindex = build_doc_index([item], introspect, parse, add_submodules,
  142. exclude_introspect=exclude_introspect,
  143. exclude_parse=exclude_parse)
  144. return docindex.root[0]
  145. def build_doc_index(items, introspect=True, parse=True, add_submodules=True,
  146. exclude_introspect=None, exclude_parse=None):
  147. """
  148. Build API documentation for the given list of items, and
  149. return it in the form of a L{DocIndex}.
  150. @rtype: L{DocIndex}
  151. @param items: The items to document, specified using any of the
  152. following:
  153. - A string, naming a python package directory
  154. (e.g., C{'epydoc/markup'})
  155. - A string, naming a python file
  156. (e.g., C{'epydoc/docparser.py'})
  157. - A string, naming a python object
  158. (e.g., C{'epydoc.docparser.DocParser'})
  159. - Any (non-string) python object
  160. (e.g., C{list.append})
  161. @param introspect: If true, then use introspection to examine the
  162. specified items. Otherwise, just use parsing.
  163. @param parse: If true, then use parsing to examine the specified
  164. items. Otherwise, just use introspection.
  165. """
  166. try:
  167. options = BuildOptions(parse=parse, introspect=introspect,
  168. exclude_introspect=exclude_introspect, exclude_parse=exclude_parse,
  169. add_submodules=add_submodules)
  170. except Exception, e:
  171. # log.error already reported by constructor.
  172. return None
  173. # Get the basic docs for each item.
  174. doc_pairs = _get_docs_from_items(items, options)
  175. # Merge the introspection & parse docs.
  176. if options.parse and options.introspect:
  177. log.start_progress('Merging parsed & introspected information')
  178. docs = []
  179. for i, (introspect_doc, parse_doc) in enumerate(doc_pairs):
  180. if introspect_doc is not None and parse_doc is not None:
  181. if introspect_doc.canonical_name not in (None, UNKNOWN):
  182. name = introspect_doc.canonical_name
  183. else:
  184. name = parse_doc.canonical_name
  185. log.progress(float(i)/len(doc_pairs), name)
  186. docs.append(merge_docs(introspect_doc, parse_doc))
  187. elif introspect_doc is not None:
  188. docs.append(introspect_doc)
  189. elif parse_doc is not None:
  190. docs.append(parse_doc)
  191. log.end_progress()
  192. elif options.introspect:
  193. docs = [doc_pair[0] for doc_pair in doc_pairs if doc_pair[0]]
  194. else:
  195. docs = [doc_pair[1] for doc_pair in doc_pairs if doc_pair[1]]
  196. if len(docs) == 0:
  197. log.error('Nothing left to document!')
  198. return None
  199. # Collect the docs into a single index.
  200. docindex = DocIndex(docs)
  201. # Replace any proxy valuedocs that we got from importing with
  202. # their targets.
  203. if options.parse:
  204. log.start_progress('Linking imported variables')
  205. valdocs = sorted(docindex.reachable_valdocs(
  206. imports=False, submodules=False, packages=False, subclasses=False))
  207. for i, val_doc in enumerate(valdocs):
  208. _report_valdoc_progress(i, val_doc, valdocs)
  209. link_imports(val_doc, docindex)
  210. log.end_progress()
  211. # Assign canonical names.
  212. log.start_progress('Indexing documentation')
  213. for i, val_doc in enumerate(docindex.root):
  214. log.progress(float(i)/len(docindex.root), val_doc.canonical_name)
  215. assign_canonical_names(val_doc, val_doc.canonical_name, docindex)
  216. log.end_progress()
  217. # Set overrides pointers
  218. log.start_progress('Checking for overridden methods')
  219. valdocs = sorted(docindex.reachable_valdocs(
  220. imports=False, submodules=False, packages=False, subclasses=False))
  221. for i, val_doc in enumerate(valdocs):
  222. if isinstance(val_doc, ClassDoc):
  223. percent = float(i)/len(valdocs)
  224. log.progress(percent, val_doc.canonical_name)
  225. find_overrides(val_doc)
  226. log.end_progress()
  227. # Parse the docstrings for each object.
  228. log.start_progress('Parsing docstrings')
  229. suppress_warnings = set(valdocs).difference(
  230. docindex.reachable_valdocs(
  231. imports=False, submodules=False, packages=False, subclasses=False,
  232. bases=False, overrides=True))
  233. for i, val_doc in enumerate(valdocs):
  234. _report_valdoc_progress(i, val_doc, valdocs)
  235. # the value's docstring
  236. parse_docstring(val_doc, docindex, suppress_warnings)
  237. # the value's variables' docstrings
  238. if (isinstance(val_doc, NamespaceDoc) and
  239. val_doc.variables not in (None, UNKNOWN)):
  240. for var_doc in val_doc.variables.values():
  241. # Now we have a chance to propagate the defining module
  242. # to objects for which introspection is not possible,
  243. # such as properties.
  244. if (isinstance(var_doc.value, ValueDoc)
  245. and var_doc.value.defining_module is UNKNOWN):
  246. var_doc.value.defining_module = val_doc.defining_module
  247. parse_docstring(var_doc, docindex, suppress_warnings)
  248. log.end_progress()
  249. # Take care of inheritance.
  250. log.start_progress('Inheriting documentation')
  251. for i, val_doc in enumerate(valdocs):
  252. if isinstance(val_doc, ClassDoc):
  253. percent = float(i)/len(valdocs)
  254. log.progress(percent, val_doc.canonical_name)
  255. inherit_docs(val_doc)
  256. log.end_progress()
  257. # Initialize the groups & sortedvars attributes.
  258. log.start_progress('Sorting & Grouping')
  259. for i, val_doc in enumerate(valdocs):
  260. if isinstance(val_doc, NamespaceDoc):
  261. percent = float(i)/len(valdocs)
  262. log.progress(percent, val_doc.canonical_name)
  263. val_doc.init_sorted_variables()
  264. val_doc.init_variable_groups()
  265. if isinstance(val_doc, ModuleDoc):
  266. val_doc.init_submodule_groups()
  267. val_doc.report_unused_groups()
  268. log.end_progress()
  269. return docindex
  270. def _report_valdoc_progress(i, val_doc, val_docs):
  271. if (isinstance(val_doc, (ModuleDoc, ClassDoc)) and
  272. val_doc.canonical_name is not UNKNOWN and
  273. not val_doc.canonical_name[0].startswith('??')):
  274. log.progress(float(i)/len(val_docs), val_doc.canonical_name)
  275. #/////////////////////////////////////////////////////////////////
  276. # Documentation Generation
  277. #/////////////////////////////////////////////////////////////////
  278. def _get_docs_from_items(items, options):
  279. # Start the progress bar.
  280. log.start_progress('Building documentation')
  281. progress_estimator = _ProgressEstimator(items)
  282. # Check for duplicate item names.
  283. item_set = set()
  284. for item in items[:]:
  285. if item in item_set:
  286. log.warning("Name %r given multiple times" % item)
  287. items.remove(item)
  288. item_set.add(item)
  289. # Keep track of what top-level canonical names we've assigned, to
  290. # make sure there are no naming conflicts. This dict maps
  291. # canonical names to the item names they came from (so we can print
  292. # useful error messages).
  293. canonical_names = {}
  294. # Collect (introspectdoc, parsedoc) pairs for each item.
  295. doc_pairs = []
  296. for item in items:
  297. if isinstance(item, basestring):
  298. if is_module_file(item):
  299. doc_pairs.append(_get_docs_from_module_file(
  300. item, options, progress_estimator))
  301. elif is_package_dir(item):
  302. pkgfile = os.path.abspath(os.path.join(item, '__init__'))
  303. doc_pairs.append(_get_docs_from_module_file(
  304. pkgfile, options, progress_estimator))
  305. elif os.path.isfile(item):
  306. doc_pairs.append(_get_docs_from_pyscript(
  307. item, options, progress_estimator))
  308. elif hasattr(__builtin__, item):
  309. val = getattr(__builtin__, item)
  310. doc_pairs.append(_get_docs_from_pyobject(
  311. val, options, progress_estimator))
  312. elif is_pyname(item):
  313. doc_pairs.append(_get_docs_from_pyname(
  314. item, options, progress_estimator))
  315. elif os.path.isdir(item):
  316. log.error("Directory %r is not a package" % item)
  317. continue
  318. elif os.path.isfile(item):
  319. log.error("File %s is not a Python module" % item)
  320. continue
  321. else:
  322. log.error("Could not find a file or object named %s" %
  323. item)
  324. continue
  325. else:
  326. doc_pairs.append(_get_docs_from_pyobject(
  327. item, options, progress_estimator))
  328. # Make sure there are no naming conflicts.
  329. name = (getattr(doc_pairs[-1][0], 'canonical_name', None) or
  330. getattr(doc_pairs[-1][1], 'canonical_name', None))
  331. if name in canonical_names:
  332. log.error(
  333. 'Two of the specified items, %r and %r, have the same '
  334. 'canonical name ("%s"). This may mean that you specified '
  335. 'two different files that both use the same module name. '
  336. 'Ignoring the second item (%r)' %
  337. (canonical_names[name], item, name, canonical_names[name]))
  338. doc_pairs.pop()
  339. else:
  340. canonical_names[name] = item
  341. # This will only have an effect if doc_pairs[-1] contains a
  342. # package's docs. The 'not is_module_file(item)' prevents
  343. # us from adding subdirectories if they explicitly specify
  344. # a package's __init__.py file.
  345. if options.add_submodules and not is_module_file(item):
  346. doc_pairs += _get_docs_from_submodules(
  347. item, doc_pairs[-1], options, progress_estimator)
  348. log.end_progress()
  349. return doc_pairs
  350. def _get_docs_from_pyobject(obj, options, progress_estimator):
  351. progress_estimator.complete += 1
  352. log.progress(progress_estimator.progress(), repr(obj))
  353. if not options.introspect:
  354. log.error("Cannot get docs for Python objects without "
  355. "introspecting them.")
  356. introspect_doc = parse_doc = None
  357. introspect_error = parse_error = None
  358. try:
  359. introspect_doc = introspect_docs(value=obj)
  360. except ImportError, e:
  361. log.error(e)
  362. return (None, None)
  363. if options.parse:
  364. if introspect_doc.canonical_name is not None:
  365. prev_introspect = options.introspect
  366. options.introspect = False
  367. try:
  368. _, parse_docs = _get_docs_from_pyname(
  369. str(introspect_doc.canonical_name), options,
  370. progress_estimator, suppress_warnings=True)
  371. finally:
  372. options.introspect = prev_introspect
  373. # We need a name:
  374. if introspect_doc.canonical_name in (None, UNKNOWN):
  375. if hasattr(obj, '__name__'):
  376. introspect_doc.canonical_name = DottedName(
  377. DottedName.UNREACHABLE, obj.__name__)
  378. else:
  379. introspect_doc.canonical_name = DottedName(
  380. DottedName.UNREACHABLE)
  381. return (introspect_doc, parse_doc)
  382. def _get_docs_from_pyname(name, options, progress_estimator,
  383. suppress_warnings=False):
  384. progress_estimator.complete += 1
  385. if options.must_introspect(name) or options.must_parse(name):
  386. log.progress(progress_estimator.progress(), name)
  387. introspect_doc = parse_doc = None
  388. introspect_error = parse_error = None
  389. if options.must_introspect(name):
  390. try:
  391. introspect_doc = introspect_docs(name=name)
  392. except ImportError, e:
  393. introspect_error = str(e)
  394. if options.must_parse(name):
  395. try:
  396. parse_doc = parse_docs(name=name)
  397. except ParseError, e:
  398. parse_error = str(e)
  399. except ImportError, e:
  400. # If we get here, then there' probably no python source
  401. # available; don't bother to generate a warnining.
  402. pass
  403. # Report any errors we encountered.
  404. if not suppress_warnings:
  405. _report_errors(name, introspect_doc, parse_doc,
  406. introspect_error, parse_error)
  407. # Return the docs we found.
  408. return (introspect_doc, parse_doc)
  409. def _get_docs_from_pyscript(filename, options, progress_estimator):
  410. # [xx] I should be careful about what names I allow as filenames,
  411. # and maybe do some munging to prevent problems.
  412. introspect_doc = parse_doc = None
  413. introspect_error = parse_error = None
  414. if options.introspect:
  415. try:
  416. introspect_doc = introspect_docs(filename=filename, is_script=True)
  417. if introspect_doc.canonical_name is UNKNOWN:
  418. introspect_doc.canonical_name = munge_script_name(filename)
  419. except ImportError, e:
  420. introspect_error = str(e)
  421. if options.parse:
  422. try:
  423. parse_doc = parse_docs(filename=filename, is_script=True)
  424. except ParseError, e:
  425. parse_error = str(e)
  426. except ImportError, e:
  427. parse_error = str(e)
  428. # Report any errors we encountered.
  429. _report_errors(filename, introspect_doc, parse_doc,
  430. introspect_error, parse_error)
  431. # Return the docs we found.
  432. return (introspect_doc, parse_doc)
  433. def _get_docs_from_module_file(filename, options, progress_estimator,
  434. parent_docs=(None,None)):
  435. """
  436. Construct and return the API documentation for the python
  437. module with the given filename.
  438. @param parent_docs: The C{ModuleDoc} of the containing package.
  439. If C{parent_docs} is not provided, then this method will
  440. check if the given filename is contained in a package; and
  441. if so, it will construct a stub C{ModuleDoc} for the
  442. containing package(s). C{parent_docs} is a tuple, where
  443. the first element is the parent from introspection, and
  444. the second element is the parent from parsing.
  445. """
  446. # Record our progress.
  447. modulename = os.path.splitext(os.path.split(filename)[1])[0]
  448. if modulename == '__init__':
  449. modulename = os.path.split(os.path.split(filename)[0])[1]
  450. if parent_docs[0]:
  451. modulename = DottedName(parent_docs[0].canonical_name, modulename)
  452. elif parent_docs[1]:
  453. modulename = DottedName(parent_docs[1].canonical_name, modulename)
  454. if options.must_introspect(modulename) or options.must_parse(modulename):
  455. log.progress(progress_estimator.progress(),
  456. '%s (%s)' % (modulename, filename))
  457. progress_estimator.complete += 1
  458. # Normalize the filename.
  459. filename = os.path.normpath(os.path.abspath(filename))
  460. # When possible, use the source version of the file.
  461. try:
  462. filename = py_src_filename(filename)
  463. src_file_available = True
  464. except ValueError:
  465. src_file_available = False
  466. # Get the introspected & parsed docs (as appropriate)
  467. introspect_doc = parse_doc = None
  468. introspect_error = parse_error = None
  469. if options.must_introspect(modulename):
  470. try:
  471. introspect_doc = introspect_docs(
  472. filename=filename, context=parent_docs[0])
  473. if introspect_doc.canonical_name is UNKNOWN:
  474. introspect_doc.canonical_name = modulename
  475. except ImportError, e:
  476. introspect_error = str(e)
  477. if src_file_available and options.must_parse(modulename):
  478. try:
  479. parse_doc = parse_docs(
  480. filename=filename, context=parent_docs[1])
  481. except ParseError, e:
  482. parse_error = str(e)
  483. except ImportError, e:
  484. parse_error = str(e)
  485. # Report any errors we encountered.
  486. _report_errors(filename, introspect_doc, parse_doc,
  487. introspect_error, parse_error)
  488. # Return the docs we found.
  489. return (introspect_doc, parse_doc)
  490. def _get_docs_from_submodules(item, pkg_docs, options, progress_estimator):
  491. # Extract the package's __path__.
  492. if isinstance(pkg_docs[0], ModuleDoc) and pkg_docs[0].is_package:
  493. pkg_path = pkg_docs[0].path
  494. package_dir = os.path.split(pkg_docs[0].filename)[0]
  495. elif isinstance(pkg_docs[1], ModuleDoc) and pkg_docs[1].is_package:
  496. pkg_path = pkg_docs[1].path
  497. package_dir = os.path.split(pkg_docs[1].filename)[0]
  498. else:
  499. return []
  500. module_filenames = {}
  501. subpackage_dirs = set()
  502. for subdir in pkg_path:
  503. if os.path.isdir(subdir):
  504. for name in os.listdir(subdir):
  505. filename = os.path.join(subdir, name)
  506. # Is it a valid module filename?
  507. if is_module_file(filename):
  508. basename = os.path.splitext(filename)[0]
  509. if os.path.split(basename)[1] != '__init__':
  510. module_filenames[basename] = filename
  511. # Is it a valid package filename?
  512. if is_package_dir(filename):
  513. subpackage_dirs.add(filename)
  514. # Update our estimate of the number of modules in this package.
  515. progress_estimator.revise_estimate(item, module_filenames.items(),
  516. subpackage_dirs)
  517. docs = [pkg_docs]
  518. for module_filename in module_filenames.values():
  519. d = _get_docs_from_module_file(
  520. module_filename, options, progress_estimator, pkg_docs)
  521. docs.append(d)
  522. for subpackage_dir in subpackage_dirs:
  523. subpackage_file = os.path.join(subpackage_dir, '__init__')
  524. docs.append(_get_docs_from_module_file(
  525. subpackage_file, options, progress_estimator, pkg_docs))
  526. docs += _get_docs_from_submodules(
  527. subpackage_dir, docs[-1], options, progress_estimator)
  528. return docs
  529. def _report_errors(name, introspect_doc, parse_doc,
  530. introspect_error, parse_error):
  531. hdr = 'In %s:\n' % name
  532. if introspect_doc == parse_doc == None:
  533. log.start_block('%sNo documentation available!' % hdr)
  534. if introspect_error:
  535. log.error('Import failed:\n%s' % introspect_error)
  536. if parse_error:
  537. log.error('Source code parsing failed:\n%s' % parse_error)
  538. log.end_block()
  539. elif introspect_error:
  540. log.start_block('%sImport failed (but source code parsing '
  541. 'was successful).' % hdr)
  542. log.error(introspect_error)
  543. log.end_block()
  544. elif parse_error:
  545. log.start_block('%sSource code parsing failed (but '
  546. 'introspection was successful).' % hdr)
  547. log.error(parse_error)
  548. log.end_block()
  549. #/////////////////////////////////////////////////////////////////
  550. # Progress Estimation (for Documentation Generation)
  551. #/////////////////////////////////////////////////////////////////
  552. class _ProgressEstimator:
  553. """
  554. Used to keep track of progress when generating the initial docs
  555. for the given items. (It is not known in advance how many items a
  556. package directory will contain, since it might depend on those
  557. packages' __path__ values.)
  558. """
  559. def __init__(self, items):
  560. self.est_totals = {}
  561. self.complete = 0
  562. for item in items:
  563. if is_package_dir(item):
  564. self.est_totals[item] = self._est_pkg_modules(item)
  565. else:
  566. self.est_totals[item] = 1
  567. def progress(self):
  568. total = sum(self.est_totals.values())
  569. return float(self.complete) / total
  570. def revise_estimate(self, pkg_item, modules, subpackages):
  571. del self.est_totals[pkg_item]
  572. for item in modules:
  573. self.est_totals[item] = 1
  574. for item in subpackages:
  575. self.est_totals[item] = self._est_pkg_modules(item)
  576. def _est_pkg_modules(self, package_dir):
  577. num_items = 0
  578. if is_package_dir(package_dir):
  579. for name in os.listdir(package_dir):
  580. filename = os.path.join(package_dir, name)
  581. if is_module_file(filename):
  582. num_items += 1
  583. elif is_package_dir(filename):
  584. num_items += self._est_pkg_modules(filename)
  585. return num_items
  586. ######################################################################
  587. ## Doc Merger
  588. ######################################################################
  589. MERGE_PRECEDENCE = {
  590. 'repr': 'parse',
  591. # The names we get from introspection match the names that users
  592. # can actually use -- i.e., they take magic into account.
  593. 'canonical_name': 'introspect',
  594. # Only fall-back on the parser for is_imported if the introspecter
  595. # isn't sure. Otherwise, we can end up thinking that vars
  596. # containing modules are not imported, which can cause external
  597. # modules to show up in the docs (sf bug #1653486)
  598. 'is_imported': 'introspect',
  599. # The parser can tell if an assignment creates an alias or not.
  600. 'is_alias': 'parse',
  601. # The parser is better able to determine what text file something
  602. # came from; e.g., it can't be fooled by 'covert' imports.
  603. 'docformat': 'parse',
  604. # The parse should be able to tell definitively whether a module
  605. # is a package or not.
  606. 'is_package': 'parse',
  607. # Extract the sort spec from the order in which values are defined
  608. # in the source file.
  609. 'sort_spec': 'parse',
  610. 'submodules': 'introspect',
  611. # The filename used by 'parse' is the source file.
  612. 'filename': 'parse',
  613. # 'parse' is more likely to get the encoding right, but
  614. # 'introspect' will handle programatically generated docstrings.
  615. # Which is better?
  616. 'docstring': 'introspect',
  617. }
  618. """Indicates whether information from introspection or parsing should be
  619. given precedence, for specific attributes. This dictionary maps from
  620. attribute names to either C{'introspect'} or C{'parse'}."""
  621. DEFAULT_MERGE_PRECEDENCE = 'introspect'
  622. """Indicates whether information from introspection or parsing should be
  623. given precedence. Should be either C{'introspect'} or C{'parse'}"""
  624. _attribute_mergefunc_registry = {}
  625. def register_attribute_mergefunc(attrib, mergefunc):
  626. """
  627. Register an attribute merge function. This function will be
  628. called by L{merge_docs()} when it needs to merge the attribute
  629. values of two C{APIDoc}s.
  630. @param attrib: The name of the attribute whose values are merged
  631. by C{mergefunc}.
  632. @param mergefunc: The merge function, whose sinature is:
  633. >>> def mergefunc(introspect_val, parse_val, precedence, cyclecheck, path):
  634. ... return calculate_merged_value(introspect_val, parse_val)
  635. Where C{introspect_val} and C{parse_val} are the two values to
  636. combine; C{precedence} is a string indicating which value takes
  637. precedence for this attribute (C{'introspect'} or C{'parse'});
  638. C{cyclecheck} is a value used by C{merge_docs()} to make sure that
  639. it only visits each pair of docs once; and C{path} is a string
  640. describing the path that was taken from the root to this
  641. attribute (used to generate log messages).
  642. If the merge function needs to call C{merge_docs}, then it should
  643. pass C{cyclecheck} and C{path} back in. (When appropriate, a
  644. suffix should be added to C{path} to describe the path taken to
  645. the merged values.)
  646. """
  647. _attribute_mergefunc_registry[attrib] = mergefunc
  648. def merge_docs(introspect_doc, parse_doc, cyclecheck=None, path=None):
  649. """
  650. Merge the API documentation information that was obtained from
  651. introspection with information that was obtained from parsing.
  652. C{introspect_doc} and C{parse_doc} should be two C{APIDoc} instances
  653. that describe the same object. C{merge_docs} combines the
  654. information from these two instances, and returns the merged
  655. C{APIDoc}.
  656. If C{introspect_doc} and C{parse_doc} are compatible, then they will
  657. be I{merged} -- i.e., they will be coerced to a common class, and
  658. their state will be stored in a shared dictionary. Once they have
  659. been merged, any change made to the attributes of one will affect
  660. the other. The value for the each of the merged C{APIDoc}'s
  661. attributes is formed by combining the values of the source
  662. C{APIDoc}s' attributes, as follows:
  663. - If either of the source attributes' value is C{UNKNOWN}, then
  664. use the other source attribute's value.
  665. - Otherwise, if an attribute merge function has been registered
  666. for the attribute, then use that function to calculate the
  667. merged value from the two source attribute values.
  668. - Otherwise, if L{MERGE_PRECEDENCE} is defined for the
  669. attribute, then use the attribute value from the source that
  670. it indicates.
  671. - Otherwise, use the attribute value from the source indicated
  672. by L{DEFAULT_MERGE_PRECEDENCE}.
  673. If C{introspect_doc} and C{parse_doc} are I{not} compatible (e.g., if
  674. their values have incompatible types), then C{merge_docs()} will
  675. simply return either C{introspect_doc} or C{parse_doc}, depending on
  676. the value of L{DEFAULT_MERGE_PRECEDENCE}. The two input
  677. C{APIDoc}s will not be merged or modified in any way.
  678. @param cyclecheck, path: These arguments should only be provided
  679. when C{merge_docs()} is called by an attribute merge
  680. function. See L{register_attribute_mergefunc()} for more
  681. details.
  682. """
  683. assert isinstance(introspect_doc, APIDoc)
  684. assert isinstance(parse_doc, APIDoc)
  685. if cyclecheck is None:
  686. cyclecheck = set()
  687. if introspect_doc.canonical_name not in (None, UNKNOWN):
  688. path = '%s' % introspect_doc.canonical_name
  689. elif parse_doc.canonical_name not in (None, UNKNOWN):
  690. path = '%s' % parse_doc.canonical_name
  691. else:
  692. path = '??'
  693. # If we've already examined this pair, then there's nothing
  694. # more to do. The reason that we check id's here is that we
  695. # want to avoid hashing the APIDoc objects for now, so we can
  696. # use APIDoc.merge_and_overwrite() later.
  697. if (id(introspect_doc), id(parse_doc)) in cyclecheck:
  698. return introspect_doc
  699. cyclecheck.add( (id(introspect_doc), id(parse_doc)) )
  700. # If these two are already merged, then we're done. (Two
  701. # APIDoc's compare equal iff they are identical or have been
  702. # merged.)
  703. if introspect_doc == parse_doc:
  704. return introspect_doc
  705. # If both values are GenericValueDoc, then we don't want to merge
  706. # them. E.g., we don't want to merge 2+2 with 4. So just copy
  707. # the parse_doc's parse_repr to introspect_doc, & return it.
  708. # (In particular, do *not* call merge_and_overwrite.)
  709. if type(introspect_doc) == type(parse_doc) == GenericValueDoc:
  710. if parse_doc.parse_repr is not UNKNOWN:
  711. introspect_doc.parse_repr = parse_doc.parse_repr
  712. introspect_doc.docs_extracted_by = 'both'
  713. return introspect_doc
  714. # Perform several sanity checks here -- if we accidentally
  715. # merge values that shouldn't get merged, then bad things can
  716. # happen.
  717. mismatch = None
  718. if (introspect_doc.__class__ != parse_doc.__class__ and
  719. not (issubclass(introspect_doc.__class__, parse_doc.__class__) or
  720. issubclass(parse_doc.__class__, introspect_doc.__class__))):
  721. mismatch = ("value types don't match -- i=%r, p=%r." %
  722. (introspect_doc.__class__, parse_doc.__class__))
  723. if (isinstance(introspect_doc, ValueDoc) and
  724. isinstance(parse_doc, ValueDoc)):
  725. if (introspect_doc.pyval is not UNKNOWN and
  726. parse_doc.pyval is not UNKNOWN and
  727. introspect_doc.pyval is not parse_doc.pyval):
  728. mismatch = "values don't match."
  729. elif (introspect_doc.canonical_name not in (None, UNKNOWN) and
  730. parse_doc.canonical_name not in (None, UNKNOWN) and
  731. introspect_doc.canonical_name != parse_doc.canonical_name):
  732. mismatch = "canonical names don't match."
  733. if mismatch is not None:
  734. log.info("Not merging the parsed & introspected values of %s, "
  735. "since their %s" % (path, mismatch))
  736. if DEFAULT_MERGE_PRECEDENCE == 'introspect':
  737. return introspect_doc
  738. else:
  739. return parse_doc
  740. # If one apidoc's class is a superclass of the other's, then
  741. # specialize it to the more specific class.
  742. if introspect_doc.__class__ is not parse_doc.__class__:
  743. if issubclass(introspect_doc.__class__, parse_doc.__class__):
  744. parse_doc.specialize_to(introspect_doc.__class__)
  745. if issubclass(parse_doc.__class__, introspect_doc.__class__):
  746. introspect_doc.specialize_to(parse_doc.__class__)
  747. assert introspect_doc.__class__ is parse_doc.__class__
  748. # The posargs and defaults are tied together -- if we merge
  749. # the posargs one way, then we need to merge the defaults the
  750. # same way. So check them first. (This is a minor hack)
  751. if (isinstance(introspect_doc, RoutineDoc) and
  752. isinstance(parse_doc, RoutineDoc)):
  753. _merge_posargs_and_defaults(introspect_doc, parse_doc, path)
  754. # Merge the two api_doc's attributes.
  755. for attrib in set(introspect_doc.__dict__.keys() +
  756. parse_doc.__dict__.keys()):
  757. # Be sure not to merge any private attributes (especially
  758. # __mergeset or __has_been_hashed!)
  759. if attrib.startswith('_'): continue
  760. merge_attribute(attrib, introspect_doc, parse_doc,
  761. cyclecheck, path)
  762. # Set the dictionaries to be shared.
  763. return introspect_doc.merge_and_overwrite(parse_doc)
  764. def _merge_posargs_and_defaults(introspect_doc, parse_doc, path):
  765. # If either is unknown, then let merge_attrib handle it.
  766. if introspect_doc.posargs is UNKNOWN or parse_doc.posargs is UNKNOWN:
  767. return
  768. # If the introspected doc just has '...', then trust the parsed doc.
  769. if introspect_doc.posargs == ['...'] and parse_doc.posargs != ['...']:
  770. introspect_doc.posargs = parse_doc.posargs
  771. introspect_doc.posarg_defaults = parse_doc.posarg_defaults
  772. # If they are incompatible, then check the precedence.
  773. elif introspect_doc.posargs != parse_doc.posargs:
  774. log.info("Not merging the parsed & introspected arg "
  775. "lists for %s, since they don't match (%s vs %s)"
  776. % (path, introspect_doc.posargs, parse_doc.posargs))
  777. if (MERGE_PRECEDENCE.get('posargs', DEFAULT_MERGE_PRECEDENCE) ==
  778. 'introspect'):
  779. parse_doc.posargs = introspect_doc.posargs
  780. parse_doc.posarg_defaults = introspect_doc.posarg_defaults
  781. else:
  782. introspect_doc.posargs = parse_doc.posargs
  783. introspect_doc.posarg_defaults = parse_doc.posarg_defaults
  784. def merge_attribute(attrib, introspect_doc, parse_doc, cyclecheck, path):
  785. precedence = MERGE_PRECEDENCE.get(attrib, DEFAULT_MERGE_PRECEDENCE)
  786. if precedence not in ('parse', 'introspect'):
  787. raise ValueError('Bad precedence value %r' % precedence)
  788. if (getattr(introspect_doc, attrib) is UNKNOWN and
  789. getattr(parse_doc, attrib) is not UNKNOWN):
  790. setattr(introspect_doc, attrib, getattr(parse_doc, attrib))
  791. elif (getattr(introspect_doc, attrib) is not UNKNOWN and
  792. getattr(parse_doc, attrib) is UNKNOWN):
  793. setattr(parse_doc, attrib, getattr(introspect_doc, attrib))
  794. elif (getattr(introspect_doc, attrib) is UNKNOWN and
  795. getattr(parse_doc, attrib) is UNKNOWN):
  796. pass
  797. else:
  798. # Both APIDoc objects have values; we need to merge them.
  799. introspect_val = getattr(introspect_doc, attrib)
  800. parse_val = getattr(parse_doc, attrib)
  801. if attrib in _attribute_mergefunc_registry:
  802. handler = _attribute_mergefunc_registry[attrib]
  803. merged_val = handler(introspect_val, parse_val, precedence,
  804. cyclecheck, path)
  805. elif precedence == 'introspect':
  806. merged_val = introspect_val
  807. elif precedence == 'parse':
  808. merged_val = parse_val
  809. setattr(introspect_doc, attrib, merged_val)
  810. setattr(parse_doc, attrib, merged_val)
  811. def merge_variables(varlist1, varlist2, precedence, cyclecheck, path):
  812. # Merge all variables that are in both sets.
  813. for varname, var1 in varlist1.items():
  814. var2 = varlist2.get(varname)
  815. if var2 is not None:
  816. var = merge_docs(var1, var2, cyclecheck, path+'.'+varname)
  817. varlist1[varname] = var
  818. varlist2[varname] = var
  819. # Copy any variables that are not in varlist1 over.
  820. for varname, var in varlist2.items():
  821. varlist1.setdefault(varname, var)
  822. return varlist1
  823. def merge_value(value1, value2, precedence, cyclecheck, path):
  824. assert value1 is not None and value2 is not None
  825. return merge_docs(value1, value2, cyclecheck, path)
  826. def merge_overrides(v1, v2, precedence, cyclecheck, path):
  827. return merge_value(v1, v2, precedence, cyclecheck, path+'.<overrides>')
  828. def merge_fget(v1, v2, precedence, cyclecheck, path):
  829. return merge_value(v1, v2, precedence, cyclecheck, path+'.fget')
  830. def merge_fset(v1, v2, precedence, cyclecheck, path):
  831. return merge_value(v1, v2, precedence, cyclecheck, path+'.fset')
  832. def merge_fdel(v1, v2, precedence, cyclecheck, path):
  833. return merge_value(v1, v2, precedence, cyclecheck, path+'.fdel')
  834. def merge_proxy_for(v1, v2, precedence, cyclecheck, path):
  835. # Anything we got from introspection shouldn't have a proxy_for
  836. # attribute -- it should be the actual object's documentation.
  837. return v1
  838. def merge_bases(baselist1, baselist2, precedence, cyclecheck, path):
  839. # Be careful here -- if we get it wrong, then we could end up
  840. # merging two unrelated classes, which could lead to bad
  841. # things (e.g., a class that's its own subclass). So only
  842. # merge two bases if we're quite sure they're the same class.
  843. # (In particular, if they have the same canonical name.)
  844. # If the lengths don't match up, then give up. This is most
  845. # often caused by __metaclass__.
  846. if len(baselist1) != len(baselist2):
  847. log.info("Not merging the introspected & parsed base lists "
  848. "for %s, since their lengths don't match (%s vs %s)" %
  849. (path, len(baselist1), len(baselist2)))
  850. if precedence == 'introspect': return baselist1
  851. else: return baselist2
  852. # If any names disagree, then give up.
  853. for base1, base2 in zip(baselist1, baselist2):
  854. if ((base1.canonical_name not in (None, UNKNOWN) and
  855. base2.canonical_name not in (None, UNKNOWN)) and
  856. base1.canonical_name != base2.canonical_name):
  857. log.info("Not merging the parsed & introspected base "
  858. "lists for %s, since the bases' names don't match "
  859. "(%s vs %s)" % (path, base1.canonical_name,
  860. base2.canonical_name))
  861. if precedence == 'introspect': return baselist1
  862. else: return baselist2
  863. for i, (base1, base2) in enumerate(zip(baselist1, baselist2)):
  864. base = merge_docs(base1, base2, cyclecheck,
  865. '%s.__bases__[%d]' % (path, i))
  866. baselist1[i] = baselist2[i] = base
  867. return baselist1
  868. def merge_posarg_defaults(defaults1, defaults2, precedence, cyclecheck, path):
  869. if len(defaults1) != len(defaults2):
  870. if precedence == 'introspect': return defaults1
  871. else: return defaults2
  872. defaults = []
  873. for i, (d1, d2) in enumerate(zip(defaults1, defaults2)):
  874. if d1 is not None and d2 is not None:
  875. d_path = '%s.<default-arg-val>[%d]' % (path, i)
  876. defaults.append(merge_docs(d1, d2, cyclecheck, d_path))
  877. elif precedence == 'introspect':
  878. defaults.append(d1)
  879. else:
  880. defaults.append(d2)
  881. return defaults
  882. def merge_docstring(docstring1, docstring2, precedence, cyclecheck, path):
  883. if docstring1 is None or docstring1 is UNKNOWN or precedence=='parse':
  884. return docstring2
  885. else:
  886. return docstring1
  887. def merge_docs_extracted_by(v1, v2, precedence, cyclecheck, path):
  888. return 'both'
  889. def merge_submodules(v1, v2, precedence, cyclecheck, path):
  890. n1 = sorted([m.canonical_name for m in v1])
  891. n2 = sorted([m.canonical_name for m in v2])
  892. if (n1 != n2) and (n2 != []):
  893. log.info('Introspector & parser disagree about submodules '
  894. 'for %s: (%s) vs (%s)' % (path,
  895. ', '.join([str(n) for n in n1]),
  896. ', '.join([str(n) for n in n2])))
  897. return v1 + [m for m in v2 if m.canonical_name not in n1]
  898. return v1
  899. register_attribute_mergefunc('variables', merge_variables)
  900. register_attribute_mergefunc('value', merge_value)
  901. register_attribute_mergefunc('overrides', merge_overrides)
  902. register_attribute_mergefunc('fget', merge_fget)
  903. register_attribute_mergefunc('fset', merge_fset)
  904. register_attribute_mergefunc('fdel', merge_fdel)
  905. register_attribute_mergefunc('proxy_for', merge_proxy_for)
  906. register_attribute_mergefunc('bases', merge_bases)
  907. register_attribute_mergefunc('posarg_defaults', merge_posarg_defaults)
  908. register_attribute_mergefunc('docstring', merge_docstring)
  909. register_attribute_mergefunc('docs_extracted_by', merge_docs_extracted_by)
  910. register_attribute_mergefunc('submodules', merge_submodules)
  911. ######################################################################
  912. ## Import Linking
  913. ######################################################################
  914. def link_imports(val_doc, docindex):
  915. # Check if the ValueDoc has an unresolved proxy_for link.
  916. # If so, then resolve it.
  917. while val_doc.proxy_for not in (UNKNOWN, None):
  918. # Find the valuedoc that the proxy_for name points to.
  919. src_doc = docindex.get_valdoc(val_doc.proxy_for)
  920. # If we don't have any valuedoc at that address, then
  921. # set that address as its canonical name.
  922. # [XXX] Do I really want to do this?
  923. if src_doc is None:
  924. val_doc.canonical_name = val_doc.proxy_for
  925. return
  926. # If we *do* have something at that address, then
  927. # merge the proxy `val_doc` with it.
  928. elif src_doc != val_doc:
  929. # Copy any subclass information from val_doc->src_doc.
  930. if (isinstance(val_doc, ClassDoc) and
  931. isinstance(src_doc, ClassDoc)):
  932. for subclass in val_doc.subclasses:
  933. if subclass not in src_doc.subclasses:
  934. src_doc.subclasses.append(subclass)
  935. # Then overwrite val_doc with the contents of src_doc.
  936. src_doc.merge_and_overwrite(val_doc, ignore_hash_conflict=True)
  937. # If the proxy_for link points back at src_doc
  938. # itself, then we most likely have a variable that's
  939. # shadowing a submodule that it should be equal to.
  940. # So just get rid of the variable.
  941. elif src_doc == val_doc:
  942. parent_name = val_doc.proxy_for[:-1]
  943. var_name = val_doc.proxy_for[-1]
  944. parent = docindex.get_valdoc(parent_name)
  945. if parent is not None and var_name in parent.variables:
  946. del parent.variables[var_name]
  947. src_doc.proxy_for = None
  948. ######################################################################
  949. ## Canonical Name Assignment
  950. ######################################################################
  951. _name_scores = {}
  952. """A dictionary mapping from each C{ValueDoc} to the score that has
  953. been assigned to its current cannonical name. If
  954. L{assign_canonical_names()} finds a canonical name with a better
  955. score, then it will replace the old name."""
  956. _unreachable_names = {DottedName(DottedName.UNREACHABLE):1}
  957. """The set of names that have been used for unreachable objects. This
  958. is used to ensure there are no duplicate cannonical names assigned.
  959. C{_unreachable_names} is a dictionary mapping from dotted names to
  960. integer ids, where the next unused unreachable name derived from
  961. dotted name C{n} is
  962. C{DottedName('%s-%s' % (n, str(_unreachable_names[n]+1))}"""
  963. def assign_canonical_names(val_doc, name, docindex, score=0):
  964. """
  965. Assign a canonical name to C{val_doc} (if it doesn't have one
  966. already), and (recursively) to each variable in C{val_doc}.
  967. In particular, C{val_doc} will be assigned the canonical name
  968. C{name} iff either:
  969. - C{val_doc}'s canonical name is C{UNKNOWN}; or
  970. - C{val_doc}'s current canonical name was assigned by this
  971. method; but the score of the new name (C{score}) is higher
  972. than the score of the current name (C{score_dict[val_doc]}).
  973. Note that canonical names will even be assigned to values
  974. like integers and C{None}; but these should be harmless.
  975. """
  976. # If we've already visited this node, and our new score
  977. # doesn't beat our old score, then there's nothing more to do.
  978. # Note that since score increases strictly monotonically, this
  979. # also prevents us from going in cycles.
  980. if val_doc in _name_scores and score <= _name_scores[val_doc]:
  981. return
  982. # Update val_doc's canonical name, if appropriate.
  983. if (val_doc not in _name_scores and
  984. val_doc.canonical_name is not UNKNOWN):
  985. # If this is the first time we've seen val_doc, and it
  986. # already has a name, then don't change that name.
  987. _name_scores[val_doc] = sys.maxint
  988. name = val_doc.canonical_name
  989. score = 0
  990. else:
  991. # Otherwise, update the name iff the new score is better
  992. # than the old one.
  993. if (val_doc not in _name_scores or
  994. score > _name_scores[val_doc]):
  995. val_doc.canonical_name = name
  996. _name_scores[val_doc] = score
  997. # Recurse to any contained values.
  998. if isinstance(val_doc, NamespaceDoc):
  999. for var_doc in val_doc.variables.values():
  1000. # Set the variable's canonical name.
  1001. varname = DottedName(name, var_doc.name)
  1002. var_doc.canonical_name = varname
  1003. # If the value is unknown, or is a generic value doc, then
  1004. # the valuedoc doesn't get assigned a name; move on.
  1005. if (var_doc.value is UNKNOWN
  1006. or isinstance(var_doc.value, GenericValueDoc)):
  1007. continue
  1008. # [XX] After svn commit 1644-1647, I'm not sure if this
  1009. # ever gets used: This check is for cases like
  1010. # curses.wrapper, where an imported variable shadows its
  1011. # value's "real" location.
  1012. if _var_shadows_self(var_doc, varname):
  1013. _fix_self_shadowing_var(var_doc, varname, docindex)
  1014. # Find the score for this new name.
  1015. vardoc_score = score-1
  1016. if var_doc.is_imported is UNKNOWN: vardoc_score -= 10
  1017. elif var_doc.is_imported: vardoc_score -= 100
  1018. if var_doc.is_alias is UNKNOWN: vardoc_score -= 10
  1019. elif var_doc.is_alias: vardoc_score -= 1000
  1020. assign_canonical_names(var_doc.value, varname,
  1021. docindex, vardoc_score)
  1022. # Recurse to any directly reachable values.
  1023. for val_doc_2 in val_doc.apidoc_links(variables=False):
  1024. val

Large files files are truncated, but you can click here to view the full file