/python/helpers/epydoc/docbuilder.py
Python | 1358 lines | 979 code | 118 blank | 261 comment | 223 complexity | fcad6cd3f0bcc933dc3a14e80ec2b7ec MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0, MPL-2.0-no-copyleft-exception, MIT, EPL-1.0, AGPL-1.0
Large files files are truncated, but you can click here to view the full file
- # epydoc -- Documentation Builder
- #
- # Copyright (C) 2005 Edward Loper
- # Author: Edward Loper <edloper@loper.org>
- # URL: <http://epydoc.sf.net>
- #
- # $Id: docbuilder.py 1683 2008-01-29 22:17:39Z edloper $
- """
- Construct data structures that encode the API documentation for Python
- objects. These data structures are created using a series of steps:
- 1. B{Building docs}: Extract basic information about the objects,
- and objects that are related to them. This can be done by
- introspecting the objects' values (with L{epydoc.docintrospecter}; or
- by parsing their source code (with L{epydoc.docparser}.
- 2. B{Merging}: Combine the information obtained from introspection &
- parsing each object into a single structure.
- 3. B{Linking}: Replace any 'pointers' that were created for imported
- variables by their target (if it's available).
-
- 4. B{Naming}: Chose a unique 'canonical name' for each
- object.
-
- 5. B{Docstring Parsing}: Parse the docstring of each object, and
- extract any pertinant information.
-
- 6. B{Inheritance}: Add information about variables that classes
- inherit from their base classes.
- The documentation information for each individual object is
- represented using an L{APIDoc}; and the documentation for a collection
- of objects is represented using a L{DocIndex}.
- The main interface to C{epydoc.docbuilder} consists of two functions:
- - L{build_doc()} -- Builds documentation for a single item, and
- returns it as an L{APIDoc} object.
- - L{build_doc_index()} -- Builds documentation for a collection of
- items, and returns it as a L{DocIndex} object.
- The remaining functions are used by these two main functions to
- perform individual steps in the creation of the documentation.
- @group Documentation Construction: build_doc, build_doc_index,
- _get_docs_from_*, _report_valdoc_progress
- @group Merging: *MERGE*, *merge*
- @group Linking: link_imports
- @group Naming: _name_scores, _unreachable_names, assign_canonical_names,
- _var_shadows_self, _fix_self_shadowing_var, _unreachable_name_for
- @group Inheritance: inherit_docs, _inherit_info
- """
- __docformat__ = 'epytext en'
- ######################################################################
- ## Contents
- ######################################################################
- ## 1. build_doc() & build_doc_index() -- the main interface.
- ## 2. merge_docs() -- helper, used to merge parse & introspect info
- ## 3. link_imports() -- helper, used to connect imported vars w/ values
- ## 4. assign_canonical_names() -- helper, used to set canonical names
- ## 5. inherit_docs() -- helper, used to inherit docs from base classes
- ######################################################################
- ## Imports
- ######################################################################
- import sys, os, os.path, __builtin__, imp, re, inspect
- from epydoc.apidoc import *
- from epydoc.docintrospecter import introspect_docs
- from epydoc.docparser import parse_docs, ParseError
- from epydoc.docstringparser import parse_docstring
- from epydoc import log
- from epydoc.util import *
- from epydoc.compat import * # Backwards compatibility
- ######################################################################
- ## 1. build_doc()
- ######################################################################
- class BuildOptions:
- """
- Holds the parameters for a documentation building process.
- """
- def __init__(self, introspect=True, parse=True,
- exclude_introspect=None, exclude_parse=None,
- add_submodules=True):
- self.introspect = introspect
- self.parse = parse
- self.exclude_introspect = exclude_introspect
- self.exclude_parse = exclude_parse
- self.add_submodules = add_submodules
- # Test for pattern syntax and compile them into pattern objects.
- try:
- self._introspect_regexp = (exclude_introspect
- and re.compile(exclude_introspect) or None)
- self._parse_regexp = (exclude_parse
- and re.compile(exclude_parse) or None)
- except Exception, exc:
- log.error('Error in regular expression pattern: %s' % exc)
- raise
- def must_introspect(self, name):
- """
- Return C{True} if a module is to be introsepcted with the current
- settings.
- @param name: The name of the module to test
- @type name: L{DottedName} or C{str}
- """
- return self.introspect \
- and not self._matches_filter(name, self._introspect_regexp)
- def must_parse(self, name):
- """
- Return C{True} if a module is to be parsed with the current settings.
- @param name: The name of the module to test
- @type name: L{DottedName} or C{str}
- """
- return self.parse \
- and not self._matches_filter(name, self._parse_regexp)
- def _matches_filter(self, name, regexp):
- """
- Test if a module name matches a pattern.
- @param name: The name of the module to test
- @type name: L{DottedName} or C{str}
- @param regexp: The pattern object to match C{name} against.
- If C{None}, return C{False}
- @type regexp: C{pattern}
- @return: C{True} if C{name} in dotted format matches C{regexp},
- else C{False}
- @rtype: C{bool}
- """
- if regexp is None: return False
- if isinstance(name, DottedName):
- name = str(name)
- return bool(regexp.search(name))
- def build_doc(item, introspect=True, parse=True, add_submodules=True,
- exclude_introspect=None, exclude_parse=None):
- """
- Build API documentation for a given item, and return it as
- an L{APIDoc} object.
- @rtype: L{APIDoc}
- @param item: The item to document, specified using any of the
- following:
- - A string, naming a python package directory
- (e.g., C{'epydoc/markup'})
- - A string, naming a python file
- (e.g., C{'epydoc/docparser.py'})
- - A string, naming a python object
- (e.g., C{'epydoc.docparser.DocParser'})
- - Any (non-string) python object
- (e.g., C{list.append})
- @param introspect: If true, then use introspection to examine the
- specified items. Otherwise, just use parsing.
- @param parse: If true, then use parsing to examine the specified
- items. Otherwise, just use introspection.
- """
- docindex = build_doc_index([item], introspect, parse, add_submodules,
- exclude_introspect=exclude_introspect,
- exclude_parse=exclude_parse)
- return docindex.root[0]
- def build_doc_index(items, introspect=True, parse=True, add_submodules=True,
- exclude_introspect=None, exclude_parse=None):
- """
- Build API documentation for the given list of items, and
- return it in the form of a L{DocIndex}.
- @rtype: L{DocIndex}
- @param items: The items to document, specified using any of the
- following:
- - A string, naming a python package directory
- (e.g., C{'epydoc/markup'})
- - A string, naming a python file
- (e.g., C{'epydoc/docparser.py'})
- - A string, naming a python object
- (e.g., C{'epydoc.docparser.DocParser'})
- - Any (non-string) python object
- (e.g., C{list.append})
- @param introspect: If true, then use introspection to examine the
- specified items. Otherwise, just use parsing.
- @param parse: If true, then use parsing to examine the specified
- items. Otherwise, just use introspection.
- """
- try:
- options = BuildOptions(parse=parse, introspect=introspect,
- exclude_introspect=exclude_introspect, exclude_parse=exclude_parse,
- add_submodules=add_submodules)
- except Exception, e:
- # log.error already reported by constructor.
- return None
- # Get the basic docs for each item.
- doc_pairs = _get_docs_from_items(items, options)
- # Merge the introspection & parse docs.
- if options.parse and options.introspect:
- log.start_progress('Merging parsed & introspected information')
- docs = []
- for i, (introspect_doc, parse_doc) in enumerate(doc_pairs):
- if introspect_doc is not None and parse_doc is not None:
- if introspect_doc.canonical_name not in (None, UNKNOWN):
- name = introspect_doc.canonical_name
- else:
- name = parse_doc.canonical_name
- log.progress(float(i)/len(doc_pairs), name)
- docs.append(merge_docs(introspect_doc, parse_doc))
- elif introspect_doc is not None:
- docs.append(introspect_doc)
- elif parse_doc is not None:
- docs.append(parse_doc)
- log.end_progress()
- elif options.introspect:
- docs = [doc_pair[0] for doc_pair in doc_pairs if doc_pair[0]]
- else:
- docs = [doc_pair[1] for doc_pair in doc_pairs if doc_pair[1]]
- if len(docs) == 0:
- log.error('Nothing left to document!')
- return None
- # Collect the docs into a single index.
- docindex = DocIndex(docs)
- # Replace any proxy valuedocs that we got from importing with
- # their targets.
- if options.parse:
- log.start_progress('Linking imported variables')
- valdocs = sorted(docindex.reachable_valdocs(
- imports=False, submodules=False, packages=False, subclasses=False))
- for i, val_doc in enumerate(valdocs):
- _report_valdoc_progress(i, val_doc, valdocs)
- link_imports(val_doc, docindex)
- log.end_progress()
- # Assign canonical names.
- log.start_progress('Indexing documentation')
- for i, val_doc in enumerate(docindex.root):
- log.progress(float(i)/len(docindex.root), val_doc.canonical_name)
- assign_canonical_names(val_doc, val_doc.canonical_name, docindex)
- log.end_progress()
- # Set overrides pointers
- log.start_progress('Checking for overridden methods')
- valdocs = sorted(docindex.reachable_valdocs(
- imports=False, submodules=False, packages=False, subclasses=False))
- for i, val_doc in enumerate(valdocs):
- if isinstance(val_doc, ClassDoc):
- percent = float(i)/len(valdocs)
- log.progress(percent, val_doc.canonical_name)
- find_overrides(val_doc)
- log.end_progress()
-
- # Parse the docstrings for each object.
- log.start_progress('Parsing docstrings')
- suppress_warnings = set(valdocs).difference(
- docindex.reachable_valdocs(
- imports=False, submodules=False, packages=False, subclasses=False,
- bases=False, overrides=True))
- for i, val_doc in enumerate(valdocs):
- _report_valdoc_progress(i, val_doc, valdocs)
- # the value's docstring
- parse_docstring(val_doc, docindex, suppress_warnings)
- # the value's variables' docstrings
- if (isinstance(val_doc, NamespaceDoc) and
- val_doc.variables not in (None, UNKNOWN)):
- for var_doc in val_doc.variables.values():
- # Now we have a chance to propagate the defining module
- # to objects for which introspection is not possible,
- # such as properties.
- if (isinstance(var_doc.value, ValueDoc)
- and var_doc.value.defining_module is UNKNOWN):
- var_doc.value.defining_module = val_doc.defining_module
- parse_docstring(var_doc, docindex, suppress_warnings)
- log.end_progress()
- # Take care of inheritance.
- log.start_progress('Inheriting documentation')
- for i, val_doc in enumerate(valdocs):
- if isinstance(val_doc, ClassDoc):
- percent = float(i)/len(valdocs)
- log.progress(percent, val_doc.canonical_name)
- inherit_docs(val_doc)
- log.end_progress()
- # Initialize the groups & sortedvars attributes.
- log.start_progress('Sorting & Grouping')
- for i, val_doc in enumerate(valdocs):
- if isinstance(val_doc, NamespaceDoc):
- percent = float(i)/len(valdocs)
- log.progress(percent, val_doc.canonical_name)
- val_doc.init_sorted_variables()
- val_doc.init_variable_groups()
- if isinstance(val_doc, ModuleDoc):
- val_doc.init_submodule_groups()
- val_doc.report_unused_groups()
- log.end_progress()
- return docindex
- def _report_valdoc_progress(i, val_doc, val_docs):
- if (isinstance(val_doc, (ModuleDoc, ClassDoc)) and
- val_doc.canonical_name is not UNKNOWN and
- not val_doc.canonical_name[0].startswith('??')):
- log.progress(float(i)/len(val_docs), val_doc.canonical_name)
- #/////////////////////////////////////////////////////////////////
- # Documentation Generation
- #/////////////////////////////////////////////////////////////////
- def _get_docs_from_items(items, options):
- # Start the progress bar.
- log.start_progress('Building documentation')
- progress_estimator = _ProgressEstimator(items)
- # Check for duplicate item names.
- item_set = set()
- for item in items[:]:
- if item in item_set:
- log.warning("Name %r given multiple times" % item)
- items.remove(item)
- item_set.add(item)
- # Keep track of what top-level canonical names we've assigned, to
- # make sure there are no naming conflicts. This dict maps
- # canonical names to the item names they came from (so we can print
- # useful error messages).
- canonical_names = {}
- # Collect (introspectdoc, parsedoc) pairs for each item.
- doc_pairs = []
- for item in items:
- if isinstance(item, basestring):
- if is_module_file(item):
- doc_pairs.append(_get_docs_from_module_file(
- item, options, progress_estimator))
- elif is_package_dir(item):
- pkgfile = os.path.abspath(os.path.join(item, '__init__'))
- doc_pairs.append(_get_docs_from_module_file(
- pkgfile, options, progress_estimator))
- elif os.path.isfile(item):
- doc_pairs.append(_get_docs_from_pyscript(
- item, options, progress_estimator))
- elif hasattr(__builtin__, item):
- val = getattr(__builtin__, item)
- doc_pairs.append(_get_docs_from_pyobject(
- val, options, progress_estimator))
- elif is_pyname(item):
- doc_pairs.append(_get_docs_from_pyname(
- item, options, progress_estimator))
- elif os.path.isdir(item):
- log.error("Directory %r is not a package" % item)
- continue
- elif os.path.isfile(item):
- log.error("File %s is not a Python module" % item)
- continue
- else:
- log.error("Could not find a file or object named %s" %
- item)
- continue
- else:
- doc_pairs.append(_get_docs_from_pyobject(
- item, options, progress_estimator))
- # Make sure there are no naming conflicts.
- name = (getattr(doc_pairs[-1][0], 'canonical_name', None) or
- getattr(doc_pairs[-1][1], 'canonical_name', None))
- if name in canonical_names:
- log.error(
- 'Two of the specified items, %r and %r, have the same '
- 'canonical name ("%s"). This may mean that you specified '
- 'two different files that both use the same module name. '
- 'Ignoring the second item (%r)' %
- (canonical_names[name], item, name, canonical_names[name]))
- doc_pairs.pop()
- else:
- canonical_names[name] = item
- # This will only have an effect if doc_pairs[-1] contains a
- # package's docs. The 'not is_module_file(item)' prevents
- # us from adding subdirectories if they explicitly specify
- # a package's __init__.py file.
- if options.add_submodules and not is_module_file(item):
- doc_pairs += _get_docs_from_submodules(
- item, doc_pairs[-1], options, progress_estimator)
- log.end_progress()
- return doc_pairs
- def _get_docs_from_pyobject(obj, options, progress_estimator):
- progress_estimator.complete += 1
- log.progress(progress_estimator.progress(), repr(obj))
-
- if not options.introspect:
- log.error("Cannot get docs for Python objects without "
- "introspecting them.")
-
- introspect_doc = parse_doc = None
- introspect_error = parse_error = None
- try:
- introspect_doc = introspect_docs(value=obj)
- except ImportError, e:
- log.error(e)
- return (None, None)
- if options.parse:
- if introspect_doc.canonical_name is not None:
- prev_introspect = options.introspect
- options.introspect = False
- try:
- _, parse_docs = _get_docs_from_pyname(
- str(introspect_doc.canonical_name), options,
- progress_estimator, suppress_warnings=True)
- finally:
- options.introspect = prev_introspect
- # We need a name:
- if introspect_doc.canonical_name in (None, UNKNOWN):
- if hasattr(obj, '__name__'):
- introspect_doc.canonical_name = DottedName(
- DottedName.UNREACHABLE, obj.__name__)
- else:
- introspect_doc.canonical_name = DottedName(
- DottedName.UNREACHABLE)
- return (introspect_doc, parse_doc)
- def _get_docs_from_pyname(name, options, progress_estimator,
- suppress_warnings=False):
- progress_estimator.complete += 1
- if options.must_introspect(name) or options.must_parse(name):
- log.progress(progress_estimator.progress(), name)
-
- introspect_doc = parse_doc = None
- introspect_error = parse_error = None
- if options.must_introspect(name):
- try:
- introspect_doc = introspect_docs(name=name)
- except ImportError, e:
- introspect_error = str(e)
- if options.must_parse(name):
- try:
- parse_doc = parse_docs(name=name)
- except ParseError, e:
- parse_error = str(e)
- except ImportError, e:
- # If we get here, then there' probably no python source
- # available; don't bother to generate a warnining.
- pass
-
- # Report any errors we encountered.
- if not suppress_warnings:
- _report_errors(name, introspect_doc, parse_doc,
- introspect_error, parse_error)
- # Return the docs we found.
- return (introspect_doc, parse_doc)
- def _get_docs_from_pyscript(filename, options, progress_estimator):
- # [xx] I should be careful about what names I allow as filenames,
- # and maybe do some munging to prevent problems.
- introspect_doc = parse_doc = None
- introspect_error = parse_error = None
- if options.introspect:
- try:
- introspect_doc = introspect_docs(filename=filename, is_script=True)
- if introspect_doc.canonical_name is UNKNOWN:
- introspect_doc.canonical_name = munge_script_name(filename)
- except ImportError, e:
- introspect_error = str(e)
- if options.parse:
- try:
- parse_doc = parse_docs(filename=filename, is_script=True)
- except ParseError, e:
- parse_error = str(e)
- except ImportError, e:
- parse_error = str(e)
-
- # Report any errors we encountered.
- _report_errors(filename, introspect_doc, parse_doc,
- introspect_error, parse_error)
- # Return the docs we found.
- return (introspect_doc, parse_doc)
-
- def _get_docs_from_module_file(filename, options, progress_estimator,
- parent_docs=(None,None)):
- """
- Construct and return the API documentation for the python
- module with the given filename.
- @param parent_docs: The C{ModuleDoc} of the containing package.
- If C{parent_docs} is not provided, then this method will
- check if the given filename is contained in a package; and
- if so, it will construct a stub C{ModuleDoc} for the
- containing package(s). C{parent_docs} is a tuple, where
- the first element is the parent from introspection, and
- the second element is the parent from parsing.
- """
- # Record our progress.
- modulename = os.path.splitext(os.path.split(filename)[1])[0]
- if modulename == '__init__':
- modulename = os.path.split(os.path.split(filename)[0])[1]
- if parent_docs[0]:
- modulename = DottedName(parent_docs[0].canonical_name, modulename)
- elif parent_docs[1]:
- modulename = DottedName(parent_docs[1].canonical_name, modulename)
- if options.must_introspect(modulename) or options.must_parse(modulename):
- log.progress(progress_estimator.progress(),
- '%s (%s)' % (modulename, filename))
- progress_estimator.complete += 1
-
- # Normalize the filename.
- filename = os.path.normpath(os.path.abspath(filename))
- # When possible, use the source version of the file.
- try:
- filename = py_src_filename(filename)
- src_file_available = True
- except ValueError:
- src_file_available = False
- # Get the introspected & parsed docs (as appropriate)
- introspect_doc = parse_doc = None
- introspect_error = parse_error = None
- if options.must_introspect(modulename):
- try:
- introspect_doc = introspect_docs(
- filename=filename, context=parent_docs[0])
- if introspect_doc.canonical_name is UNKNOWN:
- introspect_doc.canonical_name = modulename
- except ImportError, e:
- introspect_error = str(e)
- if src_file_available and options.must_parse(modulename):
- try:
- parse_doc = parse_docs(
- filename=filename, context=parent_docs[1])
- except ParseError, e:
- parse_error = str(e)
- except ImportError, e:
- parse_error = str(e)
- # Report any errors we encountered.
- _report_errors(filename, introspect_doc, parse_doc,
- introspect_error, parse_error)
- # Return the docs we found.
- return (introspect_doc, parse_doc)
- def _get_docs_from_submodules(item, pkg_docs, options, progress_estimator):
- # Extract the package's __path__.
- if isinstance(pkg_docs[0], ModuleDoc) and pkg_docs[0].is_package:
- pkg_path = pkg_docs[0].path
- package_dir = os.path.split(pkg_docs[0].filename)[0]
- elif isinstance(pkg_docs[1], ModuleDoc) and pkg_docs[1].is_package:
- pkg_path = pkg_docs[1].path
- package_dir = os.path.split(pkg_docs[1].filename)[0]
- else:
- return []
- module_filenames = {}
- subpackage_dirs = set()
- for subdir in pkg_path:
- if os.path.isdir(subdir):
- for name in os.listdir(subdir):
- filename = os.path.join(subdir, name)
- # Is it a valid module filename?
- if is_module_file(filename):
- basename = os.path.splitext(filename)[0]
- if os.path.split(basename)[1] != '__init__':
- module_filenames[basename] = filename
- # Is it a valid package filename?
- if is_package_dir(filename):
- subpackage_dirs.add(filename)
- # Update our estimate of the number of modules in this package.
- progress_estimator.revise_estimate(item, module_filenames.items(),
- subpackage_dirs)
- docs = [pkg_docs]
- for module_filename in module_filenames.values():
- d = _get_docs_from_module_file(
- module_filename, options, progress_estimator, pkg_docs)
- docs.append(d)
- for subpackage_dir in subpackage_dirs:
- subpackage_file = os.path.join(subpackage_dir, '__init__')
- docs.append(_get_docs_from_module_file(
- subpackage_file, options, progress_estimator, pkg_docs))
- docs += _get_docs_from_submodules(
- subpackage_dir, docs[-1], options, progress_estimator)
- return docs
- def _report_errors(name, introspect_doc, parse_doc,
- introspect_error, parse_error):
- hdr = 'In %s:\n' % name
- if introspect_doc == parse_doc == None:
- log.start_block('%sNo documentation available!' % hdr)
- if introspect_error:
- log.error('Import failed:\n%s' % introspect_error)
- if parse_error:
- log.error('Source code parsing failed:\n%s' % parse_error)
- log.end_block()
- elif introspect_error:
- log.start_block('%sImport failed (but source code parsing '
- 'was successful).' % hdr)
- log.error(introspect_error)
- log.end_block()
- elif parse_error:
- log.start_block('%sSource code parsing failed (but '
- 'introspection was successful).' % hdr)
- log.error(parse_error)
- log.end_block()
- #/////////////////////////////////////////////////////////////////
- # Progress Estimation (for Documentation Generation)
- #/////////////////////////////////////////////////////////////////
- class _ProgressEstimator:
- """
- Used to keep track of progress when generating the initial docs
- for the given items. (It is not known in advance how many items a
- package directory will contain, since it might depend on those
- packages' __path__ values.)
- """
- def __init__(self, items):
- self.est_totals = {}
- self.complete = 0
-
- for item in items:
- if is_package_dir(item):
- self.est_totals[item] = self._est_pkg_modules(item)
- else:
- self.est_totals[item] = 1
- def progress(self):
- total = sum(self.est_totals.values())
- return float(self.complete) / total
- def revise_estimate(self, pkg_item, modules, subpackages):
- del self.est_totals[pkg_item]
- for item in modules:
- self.est_totals[item] = 1
- for item in subpackages:
- self.est_totals[item] = self._est_pkg_modules(item)
- def _est_pkg_modules(self, package_dir):
- num_items = 0
-
- if is_package_dir(package_dir):
- for name in os.listdir(package_dir):
- filename = os.path.join(package_dir, name)
- if is_module_file(filename):
- num_items += 1
- elif is_package_dir(filename):
- num_items += self._est_pkg_modules(filename)
-
- return num_items
-
- ######################################################################
- ## Doc Merger
- ######################################################################
- MERGE_PRECEDENCE = {
- 'repr': 'parse',
- # The names we get from introspection match the names that users
- # can actually use -- i.e., they take magic into account.
- 'canonical_name': 'introspect',
- # Only fall-back on the parser for is_imported if the introspecter
- # isn't sure. Otherwise, we can end up thinking that vars
- # containing modules are not imported, which can cause external
- # modules to show up in the docs (sf bug #1653486)
- 'is_imported': 'introspect',
- # The parser can tell if an assignment creates an alias or not.
- 'is_alias': 'parse',
- # The parser is better able to determine what text file something
- # came from; e.g., it can't be fooled by 'covert' imports.
- 'docformat': 'parse',
- # The parse should be able to tell definitively whether a module
- # is a package or not.
- 'is_package': 'parse',
- # Extract the sort spec from the order in which values are defined
- # in the source file.
- 'sort_spec': 'parse',
-
- 'submodules': 'introspect',
- # The filename used by 'parse' is the source file.
- 'filename': 'parse',
- # 'parse' is more likely to get the encoding right, but
- # 'introspect' will handle programatically generated docstrings.
- # Which is better?
- 'docstring': 'introspect',
- }
- """Indicates whether information from introspection or parsing should be
- given precedence, for specific attributes. This dictionary maps from
- attribute names to either C{'introspect'} or C{'parse'}."""
- DEFAULT_MERGE_PRECEDENCE = 'introspect'
- """Indicates whether information from introspection or parsing should be
- given precedence. Should be either C{'introspect'} or C{'parse'}"""
- _attribute_mergefunc_registry = {}
- def register_attribute_mergefunc(attrib, mergefunc):
- """
- Register an attribute merge function. This function will be
- called by L{merge_docs()} when it needs to merge the attribute
- values of two C{APIDoc}s.
- @param attrib: The name of the attribute whose values are merged
- by C{mergefunc}.
- @param mergefunc: The merge function, whose sinature is:
- >>> def mergefunc(introspect_val, parse_val, precedence, cyclecheck, path):
- ... return calculate_merged_value(introspect_val, parse_val)
- Where C{introspect_val} and C{parse_val} are the two values to
- combine; C{precedence} is a string indicating which value takes
- precedence for this attribute (C{'introspect'} or C{'parse'});
- C{cyclecheck} is a value used by C{merge_docs()} to make sure that
- it only visits each pair of docs once; and C{path} is a string
- describing the path that was taken from the root to this
- attribute (used to generate log messages).
- If the merge function needs to call C{merge_docs}, then it should
- pass C{cyclecheck} and C{path} back in. (When appropriate, a
- suffix should be added to C{path} to describe the path taken to
- the merged values.)
- """
- _attribute_mergefunc_registry[attrib] = mergefunc
- def merge_docs(introspect_doc, parse_doc, cyclecheck=None, path=None):
- """
- Merge the API documentation information that was obtained from
- introspection with information that was obtained from parsing.
- C{introspect_doc} and C{parse_doc} should be two C{APIDoc} instances
- that describe the same object. C{merge_docs} combines the
- information from these two instances, and returns the merged
- C{APIDoc}.
- If C{introspect_doc} and C{parse_doc} are compatible, then they will
- be I{merged} -- i.e., they will be coerced to a common class, and
- their state will be stored in a shared dictionary. Once they have
- been merged, any change made to the attributes of one will affect
- the other. The value for the each of the merged C{APIDoc}'s
- attributes is formed by combining the values of the source
- C{APIDoc}s' attributes, as follows:
- - If either of the source attributes' value is C{UNKNOWN}, then
- use the other source attribute's value.
- - Otherwise, if an attribute merge function has been registered
- for the attribute, then use that function to calculate the
- merged value from the two source attribute values.
- - Otherwise, if L{MERGE_PRECEDENCE} is defined for the
- attribute, then use the attribute value from the source that
- it indicates.
- - Otherwise, use the attribute value from the source indicated
- by L{DEFAULT_MERGE_PRECEDENCE}.
- If C{introspect_doc} and C{parse_doc} are I{not} compatible (e.g., if
- their values have incompatible types), then C{merge_docs()} will
- simply return either C{introspect_doc} or C{parse_doc}, depending on
- the value of L{DEFAULT_MERGE_PRECEDENCE}. The two input
- C{APIDoc}s will not be merged or modified in any way.
- @param cyclecheck, path: These arguments should only be provided
- when C{merge_docs()} is called by an attribute merge
- function. See L{register_attribute_mergefunc()} for more
- details.
- """
- assert isinstance(introspect_doc, APIDoc)
- assert isinstance(parse_doc, APIDoc)
- if cyclecheck is None:
- cyclecheck = set()
- if introspect_doc.canonical_name not in (None, UNKNOWN):
- path = '%s' % introspect_doc.canonical_name
- elif parse_doc.canonical_name not in (None, UNKNOWN):
- path = '%s' % parse_doc.canonical_name
- else:
- path = '??'
- # If we've already examined this pair, then there's nothing
- # more to do. The reason that we check id's here is that we
- # want to avoid hashing the APIDoc objects for now, so we can
- # use APIDoc.merge_and_overwrite() later.
- if (id(introspect_doc), id(parse_doc)) in cyclecheck:
- return introspect_doc
- cyclecheck.add( (id(introspect_doc), id(parse_doc)) )
- # If these two are already merged, then we're done. (Two
- # APIDoc's compare equal iff they are identical or have been
- # merged.)
- if introspect_doc == parse_doc:
- return introspect_doc
- # If both values are GenericValueDoc, then we don't want to merge
- # them. E.g., we don't want to merge 2+2 with 4. So just copy
- # the parse_doc's parse_repr to introspect_doc, & return it.
- # (In particular, do *not* call merge_and_overwrite.)
- if type(introspect_doc) == type(parse_doc) == GenericValueDoc:
- if parse_doc.parse_repr is not UNKNOWN:
- introspect_doc.parse_repr = parse_doc.parse_repr
- introspect_doc.docs_extracted_by = 'both'
- return introspect_doc
- # Perform several sanity checks here -- if we accidentally
- # merge values that shouldn't get merged, then bad things can
- # happen.
- mismatch = None
- if (introspect_doc.__class__ != parse_doc.__class__ and
- not (issubclass(introspect_doc.__class__, parse_doc.__class__) or
- issubclass(parse_doc.__class__, introspect_doc.__class__))):
- mismatch = ("value types don't match -- i=%r, p=%r." %
- (introspect_doc.__class__, parse_doc.__class__))
- if (isinstance(introspect_doc, ValueDoc) and
- isinstance(parse_doc, ValueDoc)):
- if (introspect_doc.pyval is not UNKNOWN and
- parse_doc.pyval is not UNKNOWN and
- introspect_doc.pyval is not parse_doc.pyval):
- mismatch = "values don't match."
- elif (introspect_doc.canonical_name not in (None, UNKNOWN) and
- parse_doc.canonical_name not in (None, UNKNOWN) and
- introspect_doc.canonical_name != parse_doc.canonical_name):
- mismatch = "canonical names don't match."
- if mismatch is not None:
- log.info("Not merging the parsed & introspected values of %s, "
- "since their %s" % (path, mismatch))
- if DEFAULT_MERGE_PRECEDENCE == 'introspect':
- return introspect_doc
- else:
- return parse_doc
- # If one apidoc's class is a superclass of the other's, then
- # specialize it to the more specific class.
- if introspect_doc.__class__ is not parse_doc.__class__:
- if issubclass(introspect_doc.__class__, parse_doc.__class__):
- parse_doc.specialize_to(introspect_doc.__class__)
- if issubclass(parse_doc.__class__, introspect_doc.__class__):
- introspect_doc.specialize_to(parse_doc.__class__)
- assert introspect_doc.__class__ is parse_doc.__class__
- # The posargs and defaults are tied together -- if we merge
- # the posargs one way, then we need to merge the defaults the
- # same way. So check them first. (This is a minor hack)
- if (isinstance(introspect_doc, RoutineDoc) and
- isinstance(parse_doc, RoutineDoc)):
- _merge_posargs_and_defaults(introspect_doc, parse_doc, path)
-
- # Merge the two api_doc's attributes.
- for attrib in set(introspect_doc.__dict__.keys() +
- parse_doc.__dict__.keys()):
- # Be sure not to merge any private attributes (especially
- # __mergeset or __has_been_hashed!)
- if attrib.startswith('_'): continue
- merge_attribute(attrib, introspect_doc, parse_doc,
- cyclecheck, path)
- # Set the dictionaries to be shared.
- return introspect_doc.merge_and_overwrite(parse_doc)
- def _merge_posargs_and_defaults(introspect_doc, parse_doc, path):
- # If either is unknown, then let merge_attrib handle it.
- if introspect_doc.posargs is UNKNOWN or parse_doc.posargs is UNKNOWN:
- return
-
- # If the introspected doc just has '...', then trust the parsed doc.
- if introspect_doc.posargs == ['...'] and parse_doc.posargs != ['...']:
- introspect_doc.posargs = parse_doc.posargs
- introspect_doc.posarg_defaults = parse_doc.posarg_defaults
- # If they are incompatible, then check the precedence.
- elif introspect_doc.posargs != parse_doc.posargs:
- log.info("Not merging the parsed & introspected arg "
- "lists for %s, since they don't match (%s vs %s)"
- % (path, introspect_doc.posargs, parse_doc.posargs))
- if (MERGE_PRECEDENCE.get('posargs', DEFAULT_MERGE_PRECEDENCE) ==
- 'introspect'):
- parse_doc.posargs = introspect_doc.posargs
- parse_doc.posarg_defaults = introspect_doc.posarg_defaults
- else:
- introspect_doc.posargs = parse_doc.posargs
- introspect_doc.posarg_defaults = parse_doc.posarg_defaults
- def merge_attribute(attrib, introspect_doc, parse_doc, cyclecheck, path):
- precedence = MERGE_PRECEDENCE.get(attrib, DEFAULT_MERGE_PRECEDENCE)
- if precedence not in ('parse', 'introspect'):
- raise ValueError('Bad precedence value %r' % precedence)
-
- if (getattr(introspect_doc, attrib) is UNKNOWN and
- getattr(parse_doc, attrib) is not UNKNOWN):
- setattr(introspect_doc, attrib, getattr(parse_doc, attrib))
- elif (getattr(introspect_doc, attrib) is not UNKNOWN and
- getattr(parse_doc, attrib) is UNKNOWN):
- setattr(parse_doc, attrib, getattr(introspect_doc, attrib))
- elif (getattr(introspect_doc, attrib) is UNKNOWN and
- getattr(parse_doc, attrib) is UNKNOWN):
- pass
- else:
- # Both APIDoc objects have values; we need to merge them.
- introspect_val = getattr(introspect_doc, attrib)
- parse_val = getattr(parse_doc, attrib)
- if attrib in _attribute_mergefunc_registry:
- handler = _attribute_mergefunc_registry[attrib]
- merged_val = handler(introspect_val, parse_val, precedence,
- cyclecheck, path)
- elif precedence == 'introspect':
- merged_val = introspect_val
- elif precedence == 'parse':
- merged_val = parse_val
- setattr(introspect_doc, attrib, merged_val)
- setattr(parse_doc, attrib, merged_val)
- def merge_variables(varlist1, varlist2, precedence, cyclecheck, path):
- # Merge all variables that are in both sets.
- for varname, var1 in varlist1.items():
- var2 = varlist2.get(varname)
- if var2 is not None:
- var = merge_docs(var1, var2, cyclecheck, path+'.'+varname)
- varlist1[varname] = var
- varlist2[varname] = var
- # Copy any variables that are not in varlist1 over.
- for varname, var in varlist2.items():
- varlist1.setdefault(varname, var)
- return varlist1
- def merge_value(value1, value2, precedence, cyclecheck, path):
- assert value1 is not None and value2 is not None
- return merge_docs(value1, value2, cyclecheck, path)
- def merge_overrides(v1, v2, precedence, cyclecheck, path):
- return merge_value(v1, v2, precedence, cyclecheck, path+'.<overrides>')
- def merge_fget(v1, v2, precedence, cyclecheck, path):
- return merge_value(v1, v2, precedence, cyclecheck, path+'.fget')
- def merge_fset(v1, v2, precedence, cyclecheck, path):
- return merge_value(v1, v2, precedence, cyclecheck, path+'.fset')
- def merge_fdel(v1, v2, precedence, cyclecheck, path):
- return merge_value(v1, v2, precedence, cyclecheck, path+'.fdel')
- def merge_proxy_for(v1, v2, precedence, cyclecheck, path):
- # Anything we got from introspection shouldn't have a proxy_for
- # attribute -- it should be the actual object's documentation.
- return v1
- def merge_bases(baselist1, baselist2, precedence, cyclecheck, path):
- # Be careful here -- if we get it wrong, then we could end up
- # merging two unrelated classes, which could lead to bad
- # things (e.g., a class that's its own subclass). So only
- # merge two bases if we're quite sure they're the same class.
- # (In particular, if they have the same canonical name.)
- # If the lengths don't match up, then give up. This is most
- # often caused by __metaclass__.
- if len(baselist1) != len(baselist2):
- log.info("Not merging the introspected & parsed base lists "
- "for %s, since their lengths don't match (%s vs %s)" %
- (path, len(baselist1), len(baselist2)))
- if precedence == 'introspect': return baselist1
- else: return baselist2
- # If any names disagree, then give up.
- for base1, base2 in zip(baselist1, baselist2):
- if ((base1.canonical_name not in (None, UNKNOWN) and
- base2.canonical_name not in (None, UNKNOWN)) and
- base1.canonical_name != base2.canonical_name):
- log.info("Not merging the parsed & introspected base "
- "lists for %s, since the bases' names don't match "
- "(%s vs %s)" % (path, base1.canonical_name,
- base2.canonical_name))
- if precedence == 'introspect': return baselist1
- else: return baselist2
- for i, (base1, base2) in enumerate(zip(baselist1, baselist2)):
- base = merge_docs(base1, base2, cyclecheck,
- '%s.__bases__[%d]' % (path, i))
- baselist1[i] = baselist2[i] = base
- return baselist1
- def merge_posarg_defaults(defaults1, defaults2, precedence, cyclecheck, path):
- if len(defaults1) != len(defaults2):
- if precedence == 'introspect': return defaults1
- else: return defaults2
- defaults = []
- for i, (d1, d2) in enumerate(zip(defaults1, defaults2)):
- if d1 is not None and d2 is not None:
- d_path = '%s.<default-arg-val>[%d]' % (path, i)
- defaults.append(merge_docs(d1, d2, cyclecheck, d_path))
- elif precedence == 'introspect':
- defaults.append(d1)
- else:
- defaults.append(d2)
- return defaults
- def merge_docstring(docstring1, docstring2, precedence, cyclecheck, path):
- if docstring1 is None or docstring1 is UNKNOWN or precedence=='parse':
- return docstring2
- else:
- return docstring1
- def merge_docs_extracted_by(v1, v2, precedence, cyclecheck, path):
- return 'both'
- def merge_submodules(v1, v2, precedence, cyclecheck, path):
- n1 = sorted([m.canonical_name for m in v1])
- n2 = sorted([m.canonical_name for m in v2])
- if (n1 != n2) and (n2 != []):
- log.info('Introspector & parser disagree about submodules '
- 'for %s: (%s) vs (%s)' % (path,
- ', '.join([str(n) for n in n1]),
- ', '.join([str(n) for n in n2])))
- return v1 + [m for m in v2 if m.canonical_name not in n1]
-
- return v1
- register_attribute_mergefunc('variables', merge_variables)
- register_attribute_mergefunc('value', merge_value)
- register_attribute_mergefunc('overrides', merge_overrides)
- register_attribute_mergefunc('fget', merge_fget)
- register_attribute_mergefunc('fset', merge_fset)
- register_attribute_mergefunc('fdel', merge_fdel)
- register_attribute_mergefunc('proxy_for', merge_proxy_for)
- register_attribute_mergefunc('bases', merge_bases)
- register_attribute_mergefunc('posarg_defaults', merge_posarg_defaults)
- register_attribute_mergefunc('docstring', merge_docstring)
- register_attribute_mergefunc('docs_extracted_by', merge_docs_extracted_by)
- register_attribute_mergefunc('submodules', merge_submodules)
- ######################################################################
- ## Import Linking
- ######################################################################
- def link_imports(val_doc, docindex):
- # Check if the ValueDoc has an unresolved proxy_for link.
- # If so, then resolve it.
- while val_doc.proxy_for not in (UNKNOWN, None):
- # Find the valuedoc that the proxy_for name points to.
- src_doc = docindex.get_valdoc(val_doc.proxy_for)
- # If we don't have any valuedoc at that address, then
- # set that address as its canonical name.
- # [XXX] Do I really want to do this?
- if src_doc is None:
- val_doc.canonical_name = val_doc.proxy_for
- return
- # If we *do* have something at that address, then
- # merge the proxy `val_doc` with it.
- elif src_doc != val_doc:
- # Copy any subclass information from val_doc->src_doc.
- if (isinstance(val_doc, ClassDoc) and
- isinstance(src_doc, ClassDoc)):
- for subclass in val_doc.subclasses:
- if subclass not in src_doc.subclasses:
- src_doc.subclasses.append(subclass)
- # Then overwrite val_doc with the contents of src_doc.
- src_doc.merge_and_overwrite(val_doc, ignore_hash_conflict=True)
- # If the proxy_for link points back at src_doc
- # itself, then we most likely have a variable that's
- # shadowing a submodule that it should be equal to.
- # So just get rid of the variable.
- elif src_doc == val_doc:
- parent_name = val_doc.proxy_for[:-1]
- var_name = val_doc.proxy_for[-1]
- parent = docindex.get_valdoc(parent_name)
- if parent is not None and var_name in parent.variables:
- del parent.variables[var_name]
- src_doc.proxy_for = None
- ######################################################################
- ## Canonical Name Assignment
- ######################################################################
- _name_scores = {}
- """A dictionary mapping from each C{ValueDoc} to the score that has
- been assigned to its current cannonical name. If
- L{assign_canonical_names()} finds a canonical name with a better
- score, then it will replace the old name."""
- _unreachable_names = {DottedName(DottedName.UNREACHABLE):1}
- """The set of names that have been used for unreachable objects. This
- is used to ensure there are no duplicate cannonical names assigned.
- C{_unreachable_names} is a dictionary mapping from dotted names to
- integer ids, where the next unused unreachable name derived from
- dotted name C{n} is
- C{DottedName('%s-%s' % (n, str(_unreachable_names[n]+1))}"""
- def assign_canonical_names(val_doc, name, docindex, score=0):
- """
- Assign a canonical name to C{val_doc} (if it doesn't have one
- already), and (recursively) to each variable in C{val_doc}.
- In particular, C{val_doc} will be assigned the canonical name
- C{name} iff either:
- - C{val_doc}'s canonical name is C{UNKNOWN}; or
- - C{val_doc}'s current canonical name was assigned by this
- method; but the score of the new name (C{score}) is higher
- than the score of the current name (C{score_dict[val_doc]}).
-
- Note that canonical names will even be assigned to values
- like integers and C{None}; but these should be harmless.
- """
- # If we've already visited this node, and our new score
- # doesn't beat our old score, then there's nothing more to do.
- # Note that since score increases strictly monotonically, this
- # also prevents us from going in cycles.
- if val_doc in _name_scores and score <= _name_scores[val_doc]:
- return
- # Update val_doc's canonical name, if appropriate.
- if (val_doc not in _name_scores and
- val_doc.canonical_name is not UNKNOWN):
- # If this is the first time we've seen val_doc, and it
- # already has a name, then don't change that name.
- _name_scores[val_doc] = sys.maxint
- name = val_doc.canonical_name
- score = 0
- else:
- # Otherwise, update the name iff the new score is better
- # than the old one.
- if (val_doc not in _name_scores or
- score > _name_scores[val_doc]):
- val_doc.canonical_name = name
- _name_scores[val_doc] = score
- # Recurse to any contained values.
- if isinstance(val_doc, NamespaceDoc):
- for var_doc in val_doc.variables.values():
- # Set the variable's canonical name.
- varname = DottedName(name, var_doc.name)
- var_doc.canonical_name = varname
- # If the value is unknown, or is a generic value doc, then
- # the valuedoc doesn't get assigned a name; move on.
- if (var_doc.value is UNKNOWN
- or isinstance(var_doc.value, GenericValueDoc)):
- continue
-
- # [XX] After svn commit 1644-1647, I'm not sure if this
- # ever gets used: This check is for cases like
- # curses.wrapper, where an imported variable shadows its
- # value's "real" location.
- if _var_shadows_self(var_doc, varname):
- _fix_self_shadowing_var(var_doc, varname, docindex)
-
- # Find the score for this new name.
- vardoc_score = score-1
- if var_doc.is_imported is UNKNOWN: vardoc_score -= 10
- elif var_doc.is_imported: vardoc_score -= 100
- if var_doc.is_alias is UNKNOWN: vardoc_score -= 10
- elif var_doc.is_alias: vardoc_score -= 1000
-
- assign_canonical_names(var_doc.value, varname,
- docindex, vardoc_score)
- # Recurse to any directly reachable values.
- for val_doc_2 in val_doc.apidoc_links(variables=False):
- val…
Large files files are truncated, but you can click here to view the full file