/fix_includes.py
Python | 1561 lines | 1371 code | 44 blank | 146 comment | 34 complexity | f7f006b0350e365fa636d054a706846a MD5 | raw file
Possible License(s): JSON
Large files files are truncated, but you can click here to view the full file
- #!/usr/bin/python
- ##===--- fix_includes.py - rewrite source files based on iwyu output ------===##
- #
- # The LLVM Compiler Infrastructure
- #
- # This file is distributed under the University of Illinois Open Source
- # License. See LICENSE.TXT for details.
- #
- ##===----------------------------------------------------------------------===##
- """Update files with the 'correct' #include and forward-declare lines.
- Given the output of include_what_you_use on stdin -- when run at the
- (default) --v=1 verbosity level or higher -- modify the files
- mentioned in the output, removing their old #include lines and
- replacing them with the lines given by the include_what_you_use
- script.
- We only edit files that are writeable (presumably open for p4 edit),
- unless the user supplies a command to make files writeable via the
- --checkout_command flag (eg '--checkout_command="p4 edit"').
- This script runs in four stages. In the first, it groups physical
- lines together to form 'move spans'. A 'move span' is the atomic unit
- for moving or deleting code. A move span is either a) an #include
- line, along with any comment lines immediately preceding it; b) a
- forward-declare line -- or more if it's a multi-line forward declare
- -- along with preceding comments; c) any other single line. Example:
- // I really am glad I'm forward-declaring this class!
- // If I didn't, I'd have to #include the entire world.
- template<typename A, typename B, typename C, typename D>
- class MyClass;
- Then, it groups move spans together into 'reorder spans'. These are
- spans of code that consist entirely of #includes and forward-declares,
- maybe separated by blank lines and comments. We assume that we can
- arbitrarily reorder #includes and forward-declares within a reorder
- span, without affecting correctness. Things like #ifdefs, #defines,
- namespace declarations, static variable declarations, class
- definitions, etc -- just about anything -- break up reorder spans.
- In stage 3 it deletes all #include and forward-declare lines that iwyu
- says to delete. iwyu includes line numbers for deletion, making this
- part easy. If this step results in "empty" #ifdefs or namespaces
- (#ifdefs or namespaces with no code inside them), we delete those as
- well. We recalculate the reorder spans, which may have gotten bigger
- due to the deleted code.
- In stage 4 it adds new iwyu-dictated #includes and forward-declares
- after the last existing #includes and forward-declares. Then it
- reorders the #includes and forward-declares to match the order
- specified by iwyu. It follows iwyu's instructions as much as
- possible, modulo the constraint that an #include or forward-declare
- cannot leave its current reorder span.
- All this moving messes up the blank lines, which we then need to fix
- up. Then we're done!
- """
- __author__ = 'csilvers@google.com (Craig Silverstein)'
- import difflib
- import optparse
- import os
- import pipes # For (undocumented) pipes.quote
- import re
- import sys
- import subprocess
- _USAGE = """\
- %prog [options] [filename] ... < <output from include-what-you-use script>
- OR %prog -s [other options] <filename> ...
- %prog reads the output from the include-what-you-use
- script on stdin -- run with --v=1 (default) verbose or above -- and,
- unless --sort_only or --dry_run is specified,
- modifies the files mentioned in the output, removing their old
- #include lines and replacing them with the lines given by the
- include_what_you_use script. It also sorts the #include and
- forward-declare lines.
- Only writable files (those opened for p4 edit) are modified (unless
- --checkout_command is specified). All files mentioned in the
- include-what-you-use script are modified, unless filenames are
- specified on the commandline, in which case only those files are
- modified.
- The exit code is the number of files that were modified (or that would
- be modified if --dry_run was specified) unless that number exceeds 100,
- in which case 100 is returned.
- """
- _COMMENT_RE = re.compile(r'\s*//.*')
- # These are the types of lines a file can have. These are matched
- # using re.match(), so don't need a leading ^.
- _C_COMMENT_START_RE = re.compile(r'\s*/\*')
- _C_COMMENT_END_RE = re.compile(r'.*\*/\s*(.*)$')
- _COMMENT_LINE_RE = re.compile(r'\s*//')
- _BLANK_LINE_RE = re.compile(r'\s*$')
- _IF_RE = re.compile(r'\s*#\s*if') # compiles #if/ifdef/ifndef
- _ELSE_RE = re.compile(r'\s*#\s*(else|elif)\b') # compiles #else/elif
- _ENDIF_RE = re.compile(r'\s*#\s*endif\b')
- # This is used to delete 'empty' namespaces after fwd-decls are removed.
- # Some third-party libraries use macros to start/end namespaces.
- _NAMESPACE_START_RE = re.compile(r'\s*(namespace\b[^{]*{\s*)+(//.*)?$|'
- r'\s*(U_NAMESPACE_BEGIN)|'
- r'\s*(HASH_NAMESPACE_DECLARATION_START)')
- _NAMESPACE_END_RE = re.compile(r'\s*(})|'
- r'\s*(U_NAMESPACE_END)|'
- r'\s*(HASH_NAMESPACE_DECLARATION_END)')
- # The group (in parens) holds the unique 'key' identifying this #include.
- _INCLUDE_RE = re.compile(r'\s*#\s*include\s+([<"][^"">]+[>"])')
- # We don't need this to actually match forward-declare lines (we get
- # that information from the iwyu input), but we do need an RE here to
- # serve as an index to _LINE_TYPES. So we use an RE that never matches.
- _FORWARD_DECLARE_RE = re.compile(r'$.FORWARD_DECLARE_RE')
- # Likewise, used to mark an '#ifdef' line of a header guard, or other
- # #ifdef that covers an entire file.
- _HEADER_GUARD_RE = re.compile(r'$.HEADER_GUARD_RE')
- # Marks the '#define' line that comes after a header guard. Since we
- # know the previous line was a header-guard line, we're not that picky
- # about this one.
- _HEADER_GUARD_DEFINE_RE = re.compile(r'\s*#\s*define\s+')
- # We annotate every line in the source file by the re it matches, or None.
- # Note that not all of the above RE's are represented here; for instance,
- # we fold _C_COMMENT_START_RE and _C_COMMENT_END_RE into _COMMENT_LINE_RE.
- _LINE_TYPES = [_COMMENT_LINE_RE, _BLANK_LINE_RE,
- _NAMESPACE_START_RE, _NAMESPACE_END_RE,
- _IF_RE, _ELSE_RE, _ENDIF_RE,
- _INCLUDE_RE, _FORWARD_DECLARE_RE,
- _HEADER_GUARD_RE, _HEADER_GUARD_DEFINE_RE,
- ]
- # A regexp matching #include lines that should be a barrier for
- # sorting -- that is, we should never reorganize the code so an
- # #include that used to come before this line now comes after, or vice
- # versa. This can be used for 'fragile' #includes that require other
- # #includes to happen before them to function properly.
- # (Note that the barrier has no effect on where new #includes are
- # added; it just affects the reordering of existing #includes.)
- _BARRIER_INCLUDES = re.compile(r'^\s*#\s*include\s+(<linux/)')
- def _MayBeHeaderFile(filename):
- """Tries to figure out if filename is a C++ header file. Defaults to yes."""
- # Header files have all sorts of extensions: .h, .hpp, .hxx, or no
- # extension at all. So we say everything is a header file unless it
- # has a known extension that's not.
- extension = os.path.splitext(filename)[1]
- return extension not in ('.c', '.cc', '.cxx', '.cpp', '.C', '.CC')
- class FixIncludesError(Exception):
- pass
- class IWYUOutputRecord(object):
- """Information that the iwyu output file has about one source file."""
- def __init__(self, filename):
- self.filename = filename
- # A set of integers.
- self.lines_to_delete = set()
- # A set of integer line-numbers, for each #include iwyu saw that
- # is marked with a line number. This is usually not an exhaustive
- # list of include-lines, but that's ok because we only use this
- # data structure for sanity checking: we double-check with our own
- # analysis that these lines are all # #include lines. If not, we
- # know the iwyu data is likely out of date, and we complain. So
- # more data here is always welcome, but not essential.
- self.some_include_lines = set()
- # A set of integer line-number spans [start_line, end_line), for
- # each forward-declare iwyu saw. iwyu reports line numbers for
- # every forward-declare it sees in the source code. (It won't
- # report, though, forward-declares inside '#if 0' or similar.)
- self.seen_forward_declare_lines = set()
- # A set of each line in the iwyu 'add' section.
- self.includes_and_forward_declares_to_add = set()
- # A map from the include filename (including ""s or <>s) to the
- # full line as given by iwyu, which includes comments that iwyu
- # has put next to the #include. This holds both 'to-add' and
- # 'to-keep' #includes. If flags.comments is False, the comments
- # are removed before adding to this list.
- self.full_include_lines = {}
- def Merge(self, other):
- """Merges other with this one. They must share a filename.
- This function is intended to be used when we see two iwyu records
- in the input, both for the same file. We can merge the two together.
- We are conservative: we union the lines to add, and intersect the
- lines to delete.
- Arguments:
- other: an IWYUOutputRecord to merge into this one.
- It must have the same value for filename that self does.
- """
- assert self.filename == other.filename, "Can't merge distinct files"
- self.lines_to_delete.intersection_update(other.lines_to_delete)
- self.some_include_lines.update(other.some_include_lines)
- self.seen_forward_declare_lines.update(other.seen_forward_declare_lines)
- self.includes_and_forward_declares_to_add.update(
- other.includes_and_forward_declares_to_add)
- self.full_include_lines.update(other.full_include_lines)
- def HasContentfulChanges(self):
- """Returns true iff this record has at least one add or delete."""
- return (self.includes_and_forward_declares_to_add or
- self.lines_to_delete)
- def __str__(self):
- return ('--- iwyu record ---\n FILENAME: %s\n LINES TO DELETE: %s\n'
- ' (SOME) INCLUDE LINES: %s\n (SOME) FWD-DECL LINES: %s\n'
- ' TO ADD: %s\n ALL INCLUDES: %s\n---\n'
- % (self.filename, self.lines_to_delete,
- self.some_include_lines, self.seen_forward_declare_lines,
- self.includes_and_forward_declares_to_add,
- self.full_include_lines))
- class IWYUOutputParser(object):
- """Parses the lines in iwyu output corresponding to one source file."""
- # iwyu adds this comment to some lines to map them to the source file.
- _LINE_NUMBERS_COMMENT_RE = re.compile(r'\s*// lines ([0-9]+)-([0-9]+)')
- # The output of include-what-you-use has sections that indicate what
- # #includes and forward-declares should be added to the output file,
- # what should be removed, and what the end result is. The first line
- # of each section also has the filename.
- _ADD_SECTION_RE = re.compile(r'^(.*) should add these lines:$')
- _REMOVE_SECTION_RE = re.compile(r'^(.*) should remove these lines:$')
- _TOTAL_SECTION_RE = re.compile(r'^The full include-list for ([^:]*):$')
- _SECTION_END_RE = re.compile(r'^---$')
- # Alternately, if a file does not need any iwyu modifications (though
- # it still may need its #includes sorted), iwyu will emit this:
- _NO_EDITS_RE = re.compile(r'^\((.*) has correct #includes/fwd-decls\)$')
- _RE_TO_NAME = {_ADD_SECTION_RE: 'add',
- _REMOVE_SECTION_RE: 'remove',
- _TOTAL_SECTION_RE: 'total',
- _SECTION_END_RE: 'end',
- _NO_EDITS_RE: 'no_edits',
- }
- # A small state-transition machine. key==None indicates the start
- # state. value==None means that the key is an end state (that is,
- # its presence indicates the record is finished).
- _EXPECTED_NEXT_RE = {
- None: frozenset([_ADD_SECTION_RE, _NO_EDITS_RE]),
- _ADD_SECTION_RE: frozenset([_REMOVE_SECTION_RE]),
- _REMOVE_SECTION_RE: frozenset([_TOTAL_SECTION_RE]),
- _TOTAL_SECTION_RE: frozenset([_SECTION_END_RE]),
- _SECTION_END_RE: None,
- _NO_EDITS_RE: None,
- }
- def __init__(self):
- # This is set to one of the 'section' REs above. None is the start-state.
- self.current_section = None
- self.filename = '<unknown file>'
- self.lines_by_section = {} # key is an RE, value is a list of lines
- def _ProcessOneLine(self, line):
- """Reads one line of input, updates self, and returns False at EORecord.
- If the line matches one of the hard-coded section names, updates
- self.filename and self.current_section. Otherwise, the line is
- taken to be a member of the currently active section, and is added
- to self.lines_by_section.
- Arguments:
- line: one line from the iwyu input file.
- Returns:
- False if the line is the end-of-section marker, True otherwise.
- Raises:
- FixIncludesError: if there is an out-of-order section or
- mismatched filename.
- """
- line = line.rstrip() # don't worry about line endings
- if not line: # just ignore blank lines
- return True
- for (section_re, section_name) in self._RE_TO_NAME.iteritems():
- m = section_re.search(line)
- if m:
- # Check or set the filename (if the re has a group, it's for filename).
- if section_re.groups >= 1:
- this_filename = m.group(1)
- if (self.current_section is not None and
- this_filename != self.filename):
- raise FixIncludesError('"%s" section for %s comes after "%s" for %s'
- % (section_name, this_filename,
- self._RE_TO_NAME[self.current_section],
- self.filename))
- self.filename = this_filename
- # Check and set the new section we're entering.
- if section_re not in self._EXPECTED_NEXT_RE[self.current_section]:
- if self.current_section is None:
- raise FixIncludesError('%s: "%s" section unexpectedly comes first'
- % (self.filename, section_name))
- else:
- raise FixIncludesError('%s: "%s" section unexpectedly follows "%s"'
- % (self.filename, section_name,
- self._RE_TO_NAME[self.current_section]))
- self.current_section = section_re
- # We're done parsing this record if this section has nothing after it.
- return self._EXPECTED_NEXT_RE[self.current_section] is not None
- # We're not starting a new section, so just add to the current section.
- # We ignore lines before section-start, they're probably things like
- # compiler messages ("Compiling file foo").
- if self.current_section is not None:
- self.lines_by_section.setdefault(self.current_section, []).append(line)
- return True
- def ParseOneRecord(self, iwyu_output, flags):
- """Given a file object with output from an iwyu run, return per file info.
- For each source file that iwyu_output mentions (because iwyu was run on
- it), we return a structure holding the information in IWYUOutputRecord:
- 1) What file these changes apply to
- 2) What line numbers hold includes/fwd-declares to remove
- 3) What includes/fwd-declares to add
- 4) Ordering information for includes and fwd-declares
- Arguments:
- iwyu_output: a File object returning lines from an iwyu run
- flags: commandline flags, as parsed by optparse. We use
- flags.comments, which controls whether we output comments
- generated by iwyu.
- Returns:
- An IWYUOutputRecord object, or None at EOF.
- Raises:
- FixIncludesError: for malformed-looking lines in the iwyu output.
- """
- for line in iwyu_output:
- if not self._ProcessOneLine(line): # returns False at end-of-record
- break
- else: # for/else
- return None # at EOF
- # Now set up all the fields in an IWYUOutputRecord.
- # IWYUOutputRecord.filename
- retval = IWYUOutputRecord(self.filename)
- # IWYUOutputRecord.lines_to_delete
- for line in self.lines_by_section.get(self._REMOVE_SECTION_RE, []):
- m = self._LINE_NUMBERS_COMMENT_RE.search(line)
- if not m:
- raise FixIncludesError('line "%s" (for %s) has no line number'
- % (line, self.filename))
- # The RE is of the form [start_line, end_line], inclusive.
- for line_number in xrange(int(m.group(1)), int(m.group(2)) + 1):
- retval.lines_to_delete.add(line_number)
- # IWYUOutputRecord.some_include_lines
- for line in (self.lines_by_section.get(self._REMOVE_SECTION_RE, []) +
- self.lines_by_section.get(self._TOTAL_SECTION_RE, [])):
- if not _INCLUDE_RE.match(line):
- continue
- m = self._LINE_NUMBERS_COMMENT_RE.search(line)
- if not m:
- continue # not all #include lines have line numbers, but some do
- for line_number in xrange(int(m.group(1)), int(m.group(2)) + 1):
- retval.some_include_lines.add(line_number)
- # IWYUOutputRecord.seen_forward_declare_lines
- for line in (self.lines_by_section.get(self._REMOVE_SECTION_RE, []) +
- self.lines_by_section.get(self._TOTAL_SECTION_RE, [])):
- # Everything that's not an #include is a forward-declare.
- if line.startswith('- '): # the 'remove' lines all start with '- '.
- line = line[len('- '):]
- if _INCLUDE_RE.match(line):
- continue
- m = self._LINE_NUMBERS_COMMENT_RE.search(line)
- if m:
- retval.seen_forward_declare_lines.add((int(m.group(1)),
- int(m.group(2))+1))
- # IWYUOutputRecord.includes_and_forward_declares_to_add
- for line in self.lines_by_section.get(self._ADD_SECTION_RE, []):
- line = _COMMENT_RE.sub('', line)
- retval.includes_and_forward_declares_to_add.add(line)
- # IWYUOutputRecord.full_include_lines
- for line in self.lines_by_section.get(self._TOTAL_SECTION_RE, []):
- m = _INCLUDE_RE.match(line)
- if m:
- if not flags.comments:
- line = _COMMENT_RE.sub('', line) # pretend there were no comments
- else:
- # Just remove '// line XX': that's iwyu metadata, not a real comment
- line = self._LINE_NUMBERS_COMMENT_RE.sub('', line)
- retval.full_include_lines[m.group(1)] = line
- return retval
- class LineInfo(object):
- """Information about a single line of a source file."""
- def __init__(self, line):
- """Initializes the content of the line, but no ancillary fields."""
- # The content of the line in the input file
- self.line = line
- # The 'type' of the line. The 'type' is one of the regular
- # expression objects in _LINE_TYPES, or None for any line that
- # does not match any regular expression in _LINE_TYPES.
- self.type = None
- # True if no lines processed before this one have the same type
- # as this line.
- self.is_first_line_of_this_type = False
- # Set to true if we want to delete/ignore this line in the output
- # (for instance, because iwyu says to delete this line). At the
- # start, the only line to delete is the 'dummy' line 0.
- self.deleted = self.line is None
- # If this line is an #include or a forward-declare, gives a
- # [begin,end) pair saying the 'span' this line is part of. We do
- # this for two types of span: the move span (an #include or
- # forward declare, along with any preceding comments) and the
- # reorder span (a continguous block of move-spans, connected only
- # by blank lines and comments). For lines that are not an
- # #include or forward-declare, these may have an arbitrary value.
- self.move_span = None
- self.reorder_span = None
- # If this line is an #include or a forward-declare, gives the
- # 'key' of the line. For #includes it is the filename included,
- # including the ""s or <>s. For a forward-declare it's the name
- # of the class/struct. For other types of lines, this is None.
- self.key = None
- def __str__(self):
- if self.deleted:
- line = 'XX-%s-XX' % self.line
- else:
- line = '>>>%s<<<' % self.line
- if self.type is None:
- type_id = None
- else:
- type_id = _LINE_TYPES.index(self.type)
- return ('%s\n -- type: %s (key: %s). move_span: %s. reorder_span: %s'
- % (line, type_id, self.key, self.move_span, self.reorder_span))
- def _ReadFile(filename):
- """Read from filename and return a list of file lines."""
- try:
- return open(filename).read().splitlines()
- except (IOError, OSError), why:
- print "Skipping '%s': %s" % (filename, why)
- return None
- def _ReadWriteableFile(filename, ignore_writeable):
- """Read from filename and return a list of file lines.
- Given a filename, if the file is found and is writable, read
- the file contents and return it as a list of lines (newlines
- removed). If the file is not found or is not writable, or if
- there is another IO error, return None.
- Arguments:
- filename: the name of the file to read.
- ignore_writeable: if True, don't check whether the file is writeable;
- return the contents anyway.
- Returns:
- A list of lines (without trailing newline) from filename, or None
- if the file is not writable, or cannot be read.
- """
- if os.access(filename, os.W_OK) or ignore_writeable:
- return _ReadFile(filename)
- return None
- def _WriteFileContentsToFileObject(f, file_lines):
- """Write the given file-lines to the file."""
- f.write('\n'.join(file_lines))
- f.write('\n')
- def _WriteFileContents(filename, file_lines):
- """Write the given file-lines to the file."""
- try:
- f = open(filename, 'w')
- try:
- _WriteFileContentsToFileObject(f, file_lines)
- finally:
- f.close()
- except (IOError, OSError), why:
- print "Error writing '%s': %s" % (filename, why)
- def _CreateCommandLine(command, args):
- """Join the command with the args in a shell-quoted way."""
- ret = '%s %s' % (command, ' '.join(map(pipes.quote, args)))
- print 'Running:', ret
- return ret
- def _GetCommandOutputLines(command, args):
- """Return an iterable over the output lines of the given shell command."""
- full_command = _CreateCommandLine(command, args)
- proc = subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE)
- return proc.stdout
- def _RunCommand(command, args):
- """Run the given shell command."""
- for line in _GetCommandOutputLines(command, args):
- print line,
- def _GetCommandOutputWithInput(command, stdin_text):
- """Return the output of the given command fed the stdin_text."""
- print command
- proc = subprocess.Popen(command,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- shell=True)
- return proc.communicate(input=stdin_text)[0]
- def PrintFileDiff(old_file_contents, new_file_contents):
- """Print a unified diff between files, specified as lists of lines."""
- diff = difflib.unified_diff(old_file_contents, new_file_contents)
- # skip the '--- <filename>/+++ <filename>' lines at the start
- try:
- diff.next()
- diff.next()
- print '\n'.join(diff)
- except StopIteration:
- pass
- def _MarkHeaderGuardIfPresent(file_lines):
- """If any line in file_lines is a header-guard, mark it in file_lines.
- We define a header-guard as follows: an #ifdef where there is
- nothing contentful before or after the #ifdef. Also, the #ifdef
- should have no #elif in it (though we don't currently test that).
- This catches the common case of an 'ifdef guard' in .h file, such
- as '#ifndef FOO_H\n#define FOO_H\n...contents...\n#endif', but it
- can also catch other whole-program #ifdefs, such as
- '#ifdef __linux\n...\n#endif'. The issue here is that if an #ifdef
- encloses the entire file, then we are willing to put new
- #includes/fwd-declares inside the #ifdef (which normally we
- wouldn't do). So we want to mark such #ifdefs with a special label.
- If we find such an #ifdef line -- and a single file can have at most
- one -- we change its type to a special type for header guards.
- Arguments:
- file_lines: an array of LineInfo objects with .type filled in.
- """
- # Pass over blank lines or comments at the top of the file.
- i = 0
- for i in xrange(len(file_lines)):
- if (not file_lines[i].deleted and
- file_lines[i].type not in [_COMMENT_LINE_RE, _BLANK_LINE_RE]):
- break
- else: # for/else: got to EOF without finding any non-blank/comment lines
- return
- # This next line is the candidate header guard-line.
- ifdef_start = i
- if file_lines[ifdef_start].type != _IF_RE:
- # Not a header guard, just return without doing anything.
- return
- # Find the end of this ifdef, to see if it's really a header guard..
- ifdef_depth = 0
- for ifdef_end in xrange(ifdef_start, len(file_lines)):
- if file_lines[ifdef_end].deleted:
- continue
- if file_lines[ifdef_end].type == _IF_RE:
- ifdef_depth += 1
- elif file_lines[ifdef_end].type == _ENDIF_RE:
- ifdef_depth -= 1
- if ifdef_depth == 0: # The end of our #ifdef!
- break
- else: # for/else
- return False # Weird: never found a close to this #ifdef
- # Finally, all the lines after the end of the ifdef must be blank or comments.
- for i in xrange(ifdef_end + 1, len(file_lines)):
- if (not file_lines[i].deleted and
- file_lines[i].type not in [_COMMENT_LINE_RE, _BLANK_LINE_RE]):
- return
- # We passed the gauntlet!
- file_lines[ifdef_start].type = _HEADER_GUARD_RE
- # And the line after the header guard #ifdef is the '#define' (usually).
- if _HEADER_GUARD_DEFINE_RE.match(file_lines[ifdef_start + 1].line):
- file_lines[ifdef_start+1].type = _HEADER_GUARD_DEFINE_RE
- def _CalculateLineTypesAndKeys(file_lines, iwyu_record):
- """Fills file_line's type and key fields, where the 'type' is a regexp object.
- We match each line (line_info.line) against every regexp in
- _LINE_TYPES, and assign the first that matches, or None if none
- does. We also use iwyu_record's some_include_lines and
- seen_forward_declare_lines to identify those lines. In fact,
- that's the only data source we use for forward-declare lines.
- Sets file_line.type and file_line.is_first_line_of_this_type for
- each file_line in file_lines.
- Arguments:
- file_lines: an array of LineInfo objects with .line fields filled in.
- iwyu_record: the IWYUOutputRecord struct for this source file.
- Raises:
- FixIncludesError: if iwyu_record's line-number information is
- is inconsistent with what we see in the file. (For instance,
- it says line 12 is an #include, but we say it's a blank line,
- or the file only has 11 lines.)
- """
- seen_types = set()
- in_c_style_comment = False
- for line_info in file_lines:
- if line_info.line is None:
- line_info.type = None
- elif _C_COMMENT_START_RE.match(line_info.line):
- # Note: _C_COMMENT_START_RE only matches a comment at the start
- # of a line. Comments in the middle of a line are ignored.
- # This can cause problems with multi-line comments that start
- # in the middle of the line, but that's hopefully quite rare.
- # TODO(csilvers): check for that case.
- m = _C_COMMENT_END_RE.match(line_info.line)
- if not m: # comment continues onto future lines
- line_info.type = _COMMENT_LINE_RE
- in_c_style_comment = True
- elif not m.group(1): # comment extends across entire line (only)
- line_info.type = _COMMENT_LINE_RE
- else: # comment takes only part of line, treat as content
- # TODO(csilvers): this mis-diagnoses lines like '/*comment*/class Foo;'
- line_info.type = None
- elif in_c_style_comment and _C_COMMENT_END_RE.match(line_info.line):
- line_info.type = _COMMENT_LINE_RE
- in_c_style_comment = False
- elif in_c_style_comment:
- line_info.type = _COMMENT_LINE_RE
- else:
- for type_re in _LINE_TYPES:
- # header-guard-define-re has a two-part decision criterion: it
- # matches the RE, *and* it comes after a header guard line.
- # That's too complex to figure out now, so we skip over it now
- # and fix it up later in _MarkHeaderGuardIfPresent().
- if type_re in (_HEADER_GUARD_DEFINE_RE,):
- continue
- m = type_re.match(line_info.line)
- if m:
- line_info.type = type_re
- if type_re == _INCLUDE_RE:
- line_info.key = m.group(1) # get the 'key' for the #include.
- break
- else: # for/else
- line_info.type = None # means we didn't match any re
- line_info.is_first_line_of_this_type = (line_info.type not in seen_types)
- seen_types.add(line_info.type)
- # Now double-check against iwyu that we got all the #include lines right.
- for line_number in iwyu_record.some_include_lines:
- if file_lines[line_number].type != _INCLUDE_RE:
- raise FixIncludesError('iwyu line number %s:%d (%s) is not an #include'
- % (iwyu_record.filename, line_number,
- file_lines[line_number].line))
- # We depend entirely on the iwyu_record for the forward-declare lines.
- for (start_line, end_line) in iwyu_record.seen_forward_declare_lines:
- for line_number in xrange(start_line, end_line):
- if line_number >= len(file_lines):
- raise FixIncludesError('iwyu line number %s:%d is past file-end'
- % (iwyu_record.filename, line_number))
- file_lines[line_number].type = _FORWARD_DECLARE_RE
- # While we're at it, let's do a bit more sanity checking on iwyu_record.
- for line_number in iwyu_record.lines_to_delete:
- if line_number >= len(file_lines):
- raise FixIncludesError('iwyu line number %s:%d is past file-end'
- % (iwyu_record.filename, line_number))
- elif file_lines[line_number].type not in (_INCLUDE_RE,
- _FORWARD_DECLARE_RE):
- raise FixIncludesError('iwyu line number %s:%d (%s) is not'
- ' an #include or forward declare'
- % (iwyu_record.filename, line_number,
- file_lines[line_number].line))
- # Check if this file has a header guard, which for our purposes is
- # an #ifdef (or #if) that covers an entire source file. Usually
- # this will be a standard .h header-guard, but it could be something
- # like '#if __linux/#endif'. The point here is that if an #ifdef
- # encloses the entire file, then we are willing to put new
- # #includes/fwd-declares inside the #ifdef (which normally we
- # wouldn't do). So we mark such #ifdefs with a special label.
- _MarkHeaderGuardIfPresent(file_lines)
- def _PreviousNondeletedLine(file_lines, line_number):
- """Returns the line number of the previous not-deleted line, or None."""
- for line_number in xrange(line_number - 1, -1, -1):
- if not file_lines[line_number].deleted:
- return line_number
- return None
- def _NextNondeletedLine(file_lines, line_number):
- """Returns the line number of the next not-deleted line, or None."""
- for line_number in xrange(line_number + 1, len(file_lines)):
- if not file_lines[line_number].deleted:
- return line_number
- return None
- def _LineNumberStartingPrecedingComments(file_lines, line_number):
- """Returns the line-number for the comment-lines preceding the given linenum.
- Looking at file_lines, look at the lines immediately preceding the
- given line-number. If they're comment lines, return the first line
- of the comment lines preceding the given line. Otherwise, return
- the given line number.
- As a special case, if the comments go all the way up to the first
- line of the file (line 1), we assume they're comment lines, which
- are special -- they're not associated with any source code line --
- and we return line_number in that case.
- Arguments:
- file_lines: an array of LineInfo objects, with .type fields filled in.
- line_number: an index into file_lines.
- Returns:
- The first line number of the preceding comments, or line_number
- if there are no preceding comments or they appear to be a
- top-of-file copyright notice.
- """
- retval = line_number
- while retval > 0 and file_lines[retval - 1].type == _COMMENT_LINE_RE:
- retval -= 1
- if retval <= 1: # top-of-line comments
- retval = line_number # so ignore all the comment lines
- return retval
- def _CalculateMoveSpans(file_lines, forward_declare_spans):
- """Fills each input_line's move_span field.
- A 'move span' is a range of lines (from file_lines) that includes
- an #include or forward-declare, and all the comments preceding it.
- It is the unit we would move if we decided to move (or delete) this
- #include or forward-declare.
- For lines of type _INCLUDE_RE or _FORWARD_DECLARE_RE, the move span
- is set to the tuple [start_of_span, end_of_span). All other lines
- have the move span kept at None.
- Arguments:
- file_lines: an array of LineInfo objects, with .type fields filled in.
- forward_declare_spans: a set of line-number pairs
- [start_line, end_line), each representing a single namespace.
- In practice this comes from iwyu_record.seen_forward_declare_lines.
- """
- # First let's do #includes.
- for line_number in xrange(len(file_lines)):
- if file_lines[line_number].type == _INCLUDE_RE:
- span_begin = _LineNumberStartingPrecedingComments(file_lines, line_number)
- for i in xrange(span_begin, line_number + 1):
- file_lines[i].move_span = (span_begin, line_number + 1)
- # Now forward-declares. These spans come as input to this function.
- for (span_begin, span_end) in forward_declare_spans:
- span_begin = _LineNumberStartingPrecedingComments(file_lines, span_begin)
- for i in xrange(span_begin, span_end):
- file_lines[i].move_span = (span_begin, span_end)
- def _ContainsBarrierInclude(file_lines, line_range):
- """Returns true iff some line in [line_range[0], line_range[1]) is BARRIER."""
- for line_number in apply(xrange, line_range):
- if (not file_lines[line_number].deleted and
- _BARRIER_INCLUDES.search(file_lines[line_number].line)):
- return True
- return False
- def _LinesAreAllBlank(file_lines, start_line, end_line):
- """Returns true iff all lines in [start_line, end_line) are blank/deleted."""
- for line_number in xrange(start_line, end_line):
- if (not file_lines[line_number].deleted and
- file_lines[line_number].type != _BLANK_LINE_RE):
- return False
- return True
- def _CalculateReorderSpans(file_lines):
- """Fills each input_line's reorder_span field.
- A 'reorder span' is a range of lines (from file_lines) that only has
- #includes and forward-declares in it (and maybe blank lines, and
- comments associated with #includes or forward-declares). In
- particular, it does not include any "real code" besides #includes
- and forward-declares: no functions, no static variable assignment,
- no macro #defines, no nothing. We are willing to reorder #includes
- and namespaces freely inside a reorder span.
- Calculating reorder_span is easy: they're just the union of
- contiguous move-spans (with perhaps blank lines and comments
- thrown in), because move-spans share the 'no actual code'
- requirement.
- There's one exception: if any move-span matches the
- _BARRIER_INCLUDES regexp, it means that we should consider that
- move-span to be a 'barrier': nothing should get reordered from one
- side of that move-span to the other. (This is used for #includes
- that depend on other #includes being before them to function
- properly.) We do that by putting them into their own reorder span.
- For lines of type _INCLUDE_RE or _FORWARD_DECLARE_RE, the reorder
- span is set to the tuple [start_of_span, end_of_span). All other
- lines have an arbitrary value for the reorder span.
- Arguments:
- file_lines: an array of LineInfo objects with .type and .move_span
- fields filled in.
- """
- # Happily, move_spans are disjoint. Just make sure they're sorted and unique.
- move_spans = [s.move_span for s in file_lines if s.move_span is not None]
- sorted_move_spans = sorted(set(move_spans))
- i = 0
- while i < len(sorted_move_spans):
- reorder_span_start = sorted_move_spans[i][0]
- # If we're a 'nosort' include, we're always in a reorder span of
- # our own. Otherwise, add in the next move span if we're
- # connected to it only by blank lines.
- if not _ContainsBarrierInclude(file_lines, sorted_move_spans[i]):
- while i < len(sorted_move_spans) - 1:
- move_span_end = sorted_move_spans[i][1]
- next_move_span_start = sorted_move_spans[i+1][0]
- if (_LinesAreAllBlank(file_lines, move_span_end, next_move_span_start)
- and not _ContainsBarrierInclude(file_lines, sorted_move_spans[i+1])):
- i += 1
- else:
- break
- reorder_span_end = sorted_move_spans[i][1]
- # We'll map every line in the span to the span-extent.
- for line_number in xrange(reorder_span_start, reorder_span_end):
- file_lines[line_number].reorder_span = (reorder_span_start,
- reorder_span_end)
- i += 1
- def ParseOneFile(f, iwyu_record):
- """Given a file object, read and classify the lines of the file.
- For each file that iwyu_output mentions, we return a list of LineInfo
- objects, which is a parsed version of each line, including not only
- its content but its 'type', its 'key', etc.
- Arguments:
- f: an iterable object returning lines from a file.
- iwyu_record: the IWYUOutputRecord struct for this source file.
- Returns:
- An array of LineInfo objects. The first element is always a dummy
- element, so the first line of the file is at retval[1], matching
- the way iwyu counts line numbers.
- """
- file_lines = [LineInfo(None)]
- for line in f:
- file_lines.append(LineInfo(line))
- _CalculateLineTypesAndKeys(file_lines, iwyu_record)
- _CalculateMoveSpans(file_lines, iwyu_record.seen_forward_declare_lines)
- _CalculateReorderSpans(file_lines)
- return file_lines
- def _DeleteEmptyNamespaces(file_lines):
- """Delete namespaces with nothing in them.
- Empty namespaces could be caused by transformations that removed
- forward-declarations:
- namespace foo {
- class Myclass;
- }
- ->
- namespace foo {
- }
- We want to get rid of the 'empty' namespace in this case.
- This routine 'deletes' lines by setting their 'deleted' field to True.
- Arguments:
- file_lines: an array of LineInfo objects with .type fields filled in.
- Returns:
- The number of namespaces deleted.
- """
- num_namespaces_deleted = 0
- start_line = 0
- while start_line < len(file_lines):
- line_info = file_lines[start_line]
- if line_info.deleted or line_info.type != _NAMESPACE_START_RE:
- start_line += 1
- continue
- # Because multiple namespaces can be on one line
- # ("namespace foo { namespace bar { ..."), we need to count.
- # We use the max because line may have 0 '{'s if it's a macro.
- # TODO(csilvers): ignore { in comments.
- namespace_depth = max(line_info.line.count('{'), 1)
- end_line = start_line + 1
- while end_line < len(file_lines):
- line_info = file_lines[end_line]
- if line_info.deleted:
- end_line += 1
- elif line_info.type in (_COMMENT_LINE_RE, _BLANK_LINE_RE):
- end_line += 1 # ignore blank lines
- elif line_info.type == _NAMESPACE_START_RE: # nested namespace
- namespace_depth += max(line_info.line.count('{'), 1)
- end_line += 1
- elif line_info.type == _NAMESPACE_END_RE:
- namespace_depth -= max(line_info.line.count('}'), 1)
- end_line += 1
- if namespace_depth <= 0:
- # Delete any comments preceding this namespace as well.
- start_line = _LineNumberStartingPrecedingComments(file_lines,
- start_line)
- # And also blank lines.
- while (start_line > 0 and
- file_lines[start_line-1].type == _BLANK_LINE_RE):
- start_line -= 1
- for line_number in xrange(start_line, end_line):
- file_lines[line_number].deleted = True
- num_namespaces_deleted += 1
- break
- else: # bail: we're at a line indicating this isn't an empty namespace
- end_line = start_line + 1 # rewind to try again with nested namespaces
- break
- start_line = end_line
- return num_namespaces_deleted
- def _DeleteEmptyIfdefs(file_lines):
- """Deletes ifdefs with nothing in them.
- This could be caused by transformations that removed #includes:
- #ifdef OS_WINDOWS
- # include <windows.h>
- #endif
- ->
- #ifdef OS_WINDOWS
- #endif
- We want to get rid of the 'empty' #ifdef in this case.
- We also handle 'empty' #ifdefs with #else, if both sides of
- the #else are empty. We also handle #ifndef and #if.
- This routine 'deletes' lines by replacing their content with None.
- Arguments:
- file_lines: an array of LineInfo objects with .type fields filled in.
- Returns:
- The number of ifdefs deleted.
- """
- num_ifdefs_deleted = 0
- start_line = 0
- while start_line < len(file_lines):
- if file_lines[start_line].type not in (_IF_RE, _HEADER_GUARD_RE):
- start_line += 1
- continue
- end_line = start_line + 1
- while end_line < len(file_lines):
- line_info = file_lines[end_line]
- if line_info.deleted:
- end_line += 1
- elif line_info.type in (_ELSE_RE, _COMMENT_LINE_RE, _BLANK_LINE_RE):
- end_line += 1 # ignore blank lines
- elif line_info.type == _ENDIF_RE:
- end_line += 1
- # Delete any comments preceding this #ifdef as well.
- start_line = _LineNumberStartingPrecedingComments(file_lines,
- start_line)
- # And also blank lines.
- while (start_line > 0 and
- file_lines[start_line-1].type == _BLANK_LINE_RE):
- start_line -= 1
- for line_number in xrange(start_line, end_line):
- file_lines[line_number].deleted = True
- num_ifdefs_deleted += 1
- break
- else: # bail: we're at a line indicating this isn't an empty ifdef
- end_line = start_line + 1 # rewind to try again with nested #ifdefs
- break
- start_line = end_line
- return num_ifdefs_deleted
- def _DeleteDuplicateLines(file_lines, line_ranges):
- """Goes through all lines in line_ranges, and if any are dups, deletes them.
- For all lines in line_ranges, if any is the same as a previously
- seen line, set its deleted bit to True. The purpose of line_ranges
- is to avoid lines in #ifdefs and namespaces, that may be identical
- syntactically but have different semantics. Ideally, line_ranges
- should include only 'top-level' lines.
- We ignore lines that consist only of comments (or are blank). We
- ignore end-of-line comments when comparing lines for equality.
- NOTE: Because our comment-finding RE is primitive, it's best if
- line_ranges covers only #include and forward-declare lines. In
- particular, it should not cover lines that may have C literal
- strings in them.
- Arguments:
- file_lines: an array of LineInfo objects.
- line_ranges: a list of [start_line, end_line) pairs.
- """
- seen_lines = set()
- for line_range in line_ranges:
- for line_number in apply(xrange, line_range):
- if file_lines[line_number].type in (_BLANK_LINE_RE, _COMMENT_LINE_RE):
- continue
- uncommented_line = _COMMENT_RE.sub('', file_lines[line_number].line)
- if uncommented_line in seen_lines:
- file_lines[line_number].deleted = True
- elif not file_lines[line_number].deleted:
- seen_lines.add(uncommented_line)
- def _DeleteExtraneousBlankLines(file_lines, line_range):
- """Deletes extraneous blank lines caused by line deletion.
- Here's a example file:
- class Foo { ... };
- class Bar;
- class Baz { ... }
- If we delete the "class Bar;" line, we also want to delete one of
- the blank lines around it, otherwise we leave two blank lines
- between Foo and Baz which looks bad. The idea is that if we have
- whitespace on both sides of a deleted span of code, the whitespace
- on one of the sides is 'extraneous'. In this case, we should delete
- not only 'class Bar;' but also the whitespace line below it. That
- leaves one blank line between Foo and Bar, like people would expect.
- We're careful to only delete the minimum of the number of blank
- lines that show up on either side. If 'class Bar' had one blank
- line before it, and one hundred after it, we'd only delete one blank
- line when we delete 'class Bar'. This matches user's expecatations.
- The situation can get tricky when two deleted spans touch (we might
- think it's safe to delete the whitespace between them when it's
- not). To be safe, we only do this check when an entire reorder-span
- has been deleted. So we check the given line_range, and only do
- blank-line deletion if every line in the range is deleted.
- Arguments:
- file_lines: an array of LineInfo objects, with .type filled in.
- line_range: a range [start_line, end_line). It should correspond
- to a reorder-span.
- """
- # First make sure the entire span is deleted.
- for line_number in apply(xrange, line_range):
- if not file_lines[line_number].deleted:
- return
- before_line = _PreviousNondeletedLine(file_lines, line_range[0])
- after_line = _NextNondeletedLine(file_lines, line_range[1] - 1)
- while (before_line and file_lines[before_line].type == _BLANK_LINE_RE and
- after_line and file_lines[after_line].type == _BLANK_LINE_RE):
- # OK, we've got whitespace on both sides of a deleted span. We
- # only want to keep whitespace on one side, so delete on the other.
- file_lines[after_line].deleted = True
- before_line = _PreviousNondeletedLine(file_lines, before_line)
- after_line = _NextNondeletedLine(file_lines, after_line)
- def _ShouldInsertBlankLine(decorated_move_span, next_decorated_move_span,
- file_lines, flags):
- """Returns true iff we should insert a blank line between the two spans.
- Given two decorated move-spans, of the form
- (reorder_range, kind, noncomment_lines, all_lines)
- returns true if we should insert a blank line between them. We
- always put a blank line when transitioning from an #include to a
- forward-declare and back. When the appropriate commandline flag is
- set, we also put a blank line between the 'main' includes (foo.h)
- and the C/C++ system includes, and another between the system
- includes and the rest of the Google includes.
- If the two move spans are in different reorder_ranges, that means
- the first move_span is at the end of a reorder range. In that case,
- a different rule for blank lines applies: if the next line is
- contentful (eg 'static int x = 5;'), or a namespace start, we want
- to insert a blank line to separate the move-span from the next
- block. When figuring out if the next line is contentful, we skip
- over comments.
- Arguments:
- decorated_move_span: a decorated_move_span we may want to put a blank
- line after.
- next_decorated_move_span: the next decorated_move_span, which may
- be a sentinel decorated_move_span at end-of-file.
- file_lines: an array of LineInfo objects with .deleted filled in.
- flags: commandline flags, as parsed by optparse. We use
- flags.blank_lines, which controls whether we put blank
- lines between different 'kinds' of #includes.
- Returns:
- true if we should insert a blank line after decorated_move_span.
- """
- # First handle the 'at the end of a reorder range' case.
- if decorated_move_span[0] != next_decorated_move_span[0]:
- next_line = _NextNondeletedLine(file_lines, decorated_move_span[0][1] - 1)
- # Skip over comments to figure out if the next line is contentful.
- while (next_line and next_line < len(file_lines) and
- file_lines[next_line].type == _COMMENT_LINE_RE):
- next_line += 1
- return (next_line and next_line < len(file_lines) and
- file_lines[next_line].type in (_NAMESPACE_START_RE, None))
- # We never insert a blank line between two spans of the same kind.
- # Nor do we ever insert a blank line at EOF.
- (this_kind, next_kind) = (decorated_move_span[1], next_decorated_move_span[1])
- if this_kind == next_kind or next_kind == _EOF_KIND:
- return False
- # We also never insert a blank line between C and C++-style #includes,
- # no matter what the flag value.
- if (this_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND] and
- next_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND]):
- return False
- # Handle the case we're going from an include to fwd declare or
- # back. If we get here, we can't both be fwd-declares, so it
- # suffices to check if either of us is.
- if this_kind == _FORWARD_DECLARE_KIND or next_kind == _FORWARD_DECLARE_KIND:
- return True
- # Now, depending on the flag, we insert a blank line whenever the
- # kind changes (we handled the one case where a changing kind
- # doesn't introduce a blank line, above).
- if flags.blank_lines:
- return this_kind != next_kind
- return False
- def _GetToplevelReorderSpans(file_lines):
- """Returns a sorted list of all reorder_spans not inside an #ifdef/namespace.
- This routine looks at all the reorder_spans in file_lines, ignores
- reorder spans inside #ifdefs and namespaces -- except for the 'header
- guard' ifdef that encapsulates an entire .h file -- and returns the
- rest in sorted order.
- Arguments:
- file_lines: an array of LineInfo objects with .type and
- .reorder_span filled in.
- Returns:
- A list of [start_line, end_line) reorder_spans.
- """
- in_ifdef = [False] * len(file_lines) # lines insid…
Large files files are truncated, but you can click here to view the full file