/hooks/webkitpy/style/checkers/cpp.py
Python | 3580 lines | 2922 code | 212 blank | 446 comment | 298 complexity | 265650f2e883796370c6cad485c6051a MD5 | raw file
Large files files are truncated, but you can click here to view the full file
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- #
- # Copyright (C) 2009, 2010 Google Inc. All rights reserved.
- # Copyright (C) 2009 Torch Mobile Inc.
- # Copyright (C) 2009 Apple Inc. All rights reserved.
- # Copyright (C) 2010 Chris Jerdonek (cjerdonek@webkit.org)
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are
- # met:
- #
- # * Redistributions of source code must retain the above copyright
- # notice, this list of conditions and the following disclaimer.
- # * Redistributions in binary form must reproduce the above
- # copyright notice, this list of conditions and the following disclaimer
- # in the documentation and/or other materials provided with the
- # distribution.
- # * Neither the name of Google Inc. nor the names of its
- # contributors may be used to endorse or promote products derived from
- # this software without specific prior written permission.
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- # This is the modified version of Google's cpplint. The original code is
- # http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
- """Support for check-webkit-style."""
- import codecs
- import math # for log
- import os
- import os.path
- import re
- import sre_compile
- import string
- import sys
- import unicodedata
- from webkitpy.common.memoized import memoized
- # The key to use to provide a class to fake loading a header file.
- INCLUDE_IO_INJECTION_KEY = 'include_header_io'
- # Headers that we consider STL headers.
- _STL_HEADERS = frozenset([
- 'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
- 'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
- 'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
- 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
- 'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
- 'utility', 'vector', 'vector.h',
- ])
- # Non-STL C++ system headers.
- _CPP_HEADERS = frozenset([
- 'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
- 'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
- 'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
- 'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
- 'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
- 'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
- 'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
- 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
- 'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
- 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
- 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
- 'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
- 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
- ])
- # Assertion macros. These are defined in base/logging.h and
- # testing/base/gunit.h. Note that the _M versions need to come first
- # for substring matching to work.
- _CHECK_MACROS = [
- 'DCHECK', 'CHECK',
- 'EXPECT_TRUE_M', 'EXPECT_TRUE',
- 'ASSERT_TRUE_M', 'ASSERT_TRUE',
- 'EXPECT_FALSE_M', 'EXPECT_FALSE',
- 'ASSERT_FALSE_M', 'ASSERT_FALSE',
- ]
- # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
- _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
- for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
- ('>=', 'GE'), ('>', 'GT'),
- ('<=', 'LE'), ('<', 'LT')]:
- _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
- _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
- _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
- _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
- _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
- _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
- for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
- ('>=', 'LT'), ('>', 'LE'),
- ('<=', 'GT'), ('<', 'GE')]:
- _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
- _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
- _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
- _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
- # These constants define types of headers for use with
- # _IncludeState.check_next_include_order().
- _CONFIG_HEADER = 0
- _PRIMARY_HEADER = 1
- _OTHER_HEADER = 2
- _MOC_HEADER = 3
- # A dictionary of items customize behavior for unit test. For example,
- # INCLUDE_IO_INJECTION_KEY allows providing a custom io class which allows
- # for faking a header file.
- _unit_test_config = {}
- # The regexp compilation caching is inlined in all regexp functions for
- # performance reasons; factoring it out into a separate function turns out
- # to be noticeably expensive.
- _regexp_compile_cache = {}
- def match(pattern, s):
- """Matches the string with the pattern, caching the compiled regexp."""
- if not pattern in _regexp_compile_cache:
- _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
- return _regexp_compile_cache[pattern].match(s)
- def search(pattern, s):
- """Searches the string for the pattern, caching the compiled regexp."""
- if not pattern in _regexp_compile_cache:
- _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
- return _regexp_compile_cache[pattern].search(s)
- def sub(pattern, replacement, s):
- """Substitutes occurrences of a pattern, caching the compiled regexp."""
- if not pattern in _regexp_compile_cache:
- _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
- return _regexp_compile_cache[pattern].sub(replacement, s)
- def subn(pattern, replacement, s):
- """Substitutes occurrences of a pattern, caching the compiled regexp."""
- if not pattern in _regexp_compile_cache:
- _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
- return _regexp_compile_cache[pattern].subn(replacement, s)
- def iteratively_replace_matches_with_char(pattern, char_replacement, s):
- """Returns the string with replacement done.
- Every character in the match is replaced with char.
- Due to the iterative nature, pattern should not match char or
- there will be an infinite loop.
- Example:
- pattern = r'<[^>]>' # template parameters
- char_replacement = '_'
- s = 'A<B<C, D>>'
- Returns 'A_________'
- Args:
- pattern: The regex to match.
- char_replacement: The character to put in place of every
- character of the match.
- s: The string on which to do the replacements.
- Returns:
- True, if the given line is blank.
- """
- while True:
- matched = search(pattern, s)
- if not matched:
- return s
- start_match_index = matched.start(0)
- end_match_index = matched.end(0)
- match_length = end_match_index - start_match_index
- s = s[:start_match_index] + char_replacement * match_length + s[end_match_index:]
- def _rfind_in_lines(regex, lines, start_position, not_found_position):
- """Does a reverse find starting at start position and going backwards until
- a match is found.
- Returns the position where the regex ended.
- """
- # Put the regex in a group and proceed it with a greedy expression that
- # matches anything to ensure that we get the last possible match in a line.
- last_in_line_regex = r'.*(' + regex + ')'
- current_row = start_position.row
- # Start with the given row and trim off everything past what may be matched.
- current_line = lines[start_position.row][:start_position.column]
- while True:
- found_match = match(last_in_line_regex, current_line)
- if found_match:
- return Position(current_row, found_match.end(1))
- # A match was not found so continue backward.
- current_row -= 1
- if current_row < 0:
- return not_found_position
- current_line = lines[current_row]
- def _convert_to_lower_with_underscores(text):
- """Converts all text strings in camelCase or PascalCase to lowers with underscores."""
- # First add underscores before any capital letter followed by a lower case letter
- # as long as it is in a word.
- # (This put an underscore before Password but not P and A in WPAPassword).
- text = sub(r'(?<=[A-Za-z0-9])([A-Z])(?=[a-z])', r'_\1', text)
- # Next add underscores before capitals at the end of words if it was
- # preceeded by lower case letter or number.
- # (This puts an underscore before A in isA but not A in CBA).
- text = sub(r'(?<=[a-z0-9])([A-Z])(?=\b)', r'_\1', text)
- # Next add underscores when you have a captial letter which is followed by a capital letter
- # but is not proceeded by one. (This puts an underscore before A in 'WordADay').
- text = sub(r'(?<=[a-z0-9])([A-Z][A-Z_])', r'_\1', text)
- return text.lower()
- def _create_acronym(text):
- """Creates an acronym for the given text."""
- # Removes all lower case letters except those starting words.
- text = sub(r'(?<!\b)[a-z]', '', text)
- return text.upper()
- def up_to_unmatched_closing_paren(s):
- """Splits a string into two parts up to first unmatched ')'.
- Args:
- s: a string which is a substring of line after '('
- (e.g., "a == (b + c))").
- Returns:
- A pair of strings (prefix before first unmatched ')',
- remainder of s after first unmatched ')'), e.g.,
- up_to_unmatched_closing_paren("a == (b + c)) { ")
- returns "a == (b + c)", " {".
- Returns None, None if there is no unmatched ')'
- """
- i = 1
- for pos, c in enumerate(s):
- if c == '(':
- i += 1
- elif c == ')':
- i -= 1
- if i == 0:
- return s[:pos], s[pos + 1:]
- return None, None
- class _IncludeState(dict):
- """Tracks line numbers for includes, and the order in which includes appear.
- As a dict, an _IncludeState object serves as a mapping between include
- filename and line number on which that file was included.
- Call check_next_include_order() once for each header in the file, passing
- in the type constants defined above. Calls in an illegal order will
- raise an _IncludeError with an appropriate error message.
- """
- # self._section will move monotonically through this set. If it ever
- # needs to move backwards, check_next_include_order will raise an error.
- _INITIAL_SECTION = 0
- _CONFIG_SECTION = 1
- _PRIMARY_SECTION = 2
- _OTHER_SECTION = 3
- _TYPE_NAMES = {
- _CONFIG_HEADER: '',
- _PRIMARY_HEADER: 'header this file implements',
- _OTHER_HEADER: 'other header',
- _MOC_HEADER: 'moc file',
- }
- _SECTION_NAMES = {
- _INITIAL_SECTION: "... nothing.",
- _CONFIG_SECTION: "WebCore config.h.",
- _PRIMARY_SECTION: 'a header this file implements.',
- _OTHER_SECTION: 'other header.',
- }
- def __init__(self):
- dict.__init__(self)
- self._section = self._INITIAL_SECTION
- self._visited_primary_section = False
- self.header_types = dict();
- def visited_primary_section(self):
- return self._visited_primary_section
- def check_next_include_order(self, header_type, file_is_header, primary_header_exists):
- """Returns a non-empty error message if the next header is out of order.
- This function also updates the internal state to be ready to check
- the next include.
- Args:
- header_type: One of the _XXX_HEADER constants defined above.
- file_is_header: Whether the file that owns this _IncludeState is itself a header
- Returns:
- The empty string if the header is in the right order, or an
- error message describing what's wrong.
- """
- if header_type == _CONFIG_HEADER and file_is_header:
- return 'Header file should not contain WebCore config.h.'
- if header_type == _PRIMARY_HEADER and file_is_header:
- return 'Header file should not contain itself.'
- if header_type == _MOC_HEADER:
- return ''
- error_message = ''
- if self._section != self._OTHER_SECTION:
- before_error_message = ('Found %s before %s' %
- (self._TYPE_NAMES[header_type],
- self._SECTION_NAMES[self._section + 1]))
- after_error_message = ('Found %s after %s' %
- (self._TYPE_NAMES[header_type],
- self._SECTION_NAMES[self._section]))
- if header_type == _CONFIG_HEADER:
- if self._section >= self._CONFIG_SECTION:
- error_message = after_error_message
- self._section = self._CONFIG_SECTION
- elif header_type == _PRIMARY_HEADER:
- if self._section >= self._PRIMARY_SECTION:
- error_message = after_error_message
- # elif self._section < self._CONFIG_SECTION:
- # error_message = before_error_message
- self._section = self._PRIMARY_SECTION
- self._visited_primary_section = True
- else:
- assert header_type == _OTHER_HEADER
- if not file_is_header and self._section < self._PRIMARY_SECTION:
- if primary_header_exists:
- error_message = before_error_message
- self._section = self._OTHER_SECTION
- return error_message
- class Position(object):
- """Holds the position of something."""
- def __init__(self, row, column):
- self.row = row
- self.column = column
- def __str__(self):
- return '(%s, %s)' % (self.row, self.column)
- def __cmp__(self, other):
- return self.row.__cmp__(other.row) or self.column.__cmp__(other.column)
- class Parameter(object):
- """Information about one function parameter."""
- def __init__(self, parameter, parameter_name_index, row):
- self.type = parameter[:parameter_name_index].strip()
- # Remove any initializers from the parameter name (e.g. int i = 5).
- self.name = sub(r'=.*', '', parameter[parameter_name_index:]).strip()
- self.row = row
- @memoized
- def lower_with_underscores_name(self):
- """Returns the parameter name in the lower with underscores format."""
- return _convert_to_lower_with_underscores(self.name)
- class SingleLineView(object):
- """Converts multiple lines into a single line (with line breaks replaced by a
- space) to allow for easier searching."""
- def __init__(self, lines, start_position, end_position):
- """Create a SingleLineView instance.
- Args:
- lines: a list of multiple lines to combine into a single line.
- start_position: offset within lines of where to start the single line.
- end_position: just after where to end (like a slice operation).
- """
- # Get the rows of interest.
- trimmed_lines = lines[start_position.row:end_position.row + 1]
- # Remove the columns on the last line that aren't included.
- trimmed_lines[-1] = trimmed_lines[-1][:end_position.column]
- # Remove the columns on the first line that aren't included.
- trimmed_lines[0] = trimmed_lines[0][start_position.column:]
- # Create a single line with all of the parameters.
- self.single_line = ' '.join(trimmed_lines)
- # Keep the row lengths, so we can calculate the original row number
- # given a column in the single line (adding 1 due to the space added
- # during the join).
- self._row_lengths = [len(line) + 1 for line in trimmed_lines]
- self._starting_row = start_position.row
- def convert_column_to_row(self, single_line_column_number):
- """Convert the column number from the single line into the original
- line number.
- Special cases:
- * Columns in the added spaces are considered part of the previous line.
- * Columns beyond the end of the line are consider part the last line
- in the view."""
- total_columns = 0
- row_offset = 0
- while row_offset < len(self._row_lengths) - 1 and single_line_column_number >= total_columns + self._row_lengths[row_offset]:
- total_columns += self._row_lengths[row_offset]
- row_offset += 1
- return self._starting_row + row_offset
- def create_skeleton_parameters(all_parameters):
- """Converts a parameter list to a skeleton version.
- The skeleton only has one word for the parameter name, one word for the type,
- and commas after each parameter and only there. Everything in the skeleton
- remains in the same columns as the original."""
- all_simplifications = (
- # Remove template parameters, function declaration parameters, etc.
- r'(<[^<>]*?>)|(\([^\(\)]*?\))|(\{[^\{\}]*?\})',
- # Remove all initializers.
- r'=[^,]*',
- # Remove :: and everything before it.
- r'[^,]*::',
- # Remove modifiers like &, *.
- r'[&*]',
- # Remove const modifiers.
- r'\bconst\s+(?=[A-Za-z])',
- # Remove numerical modifiers like long.
- r'\b(unsigned|long|short)\s+(?=unsigned|long|short|int|char|double|float)')
- skeleton_parameters = all_parameters
- for simplification in all_simplifications:
- skeleton_parameters = iteratively_replace_matches_with_char(simplification, ' ', skeleton_parameters)
- # If there are any parameters, then add a , after the last one to
- # make a regular pattern of a , following every parameter.
- if skeleton_parameters.strip():
- skeleton_parameters += ','
- return skeleton_parameters
- def find_parameter_name_index(skeleton_parameter):
- """Determines where the parametere name starts given the skeleton parameter."""
- # The first space from the right in the simplified parameter is where the parameter
- # name starts unless the first space is before any content in the simplified parameter.
- before_name_index = skeleton_parameter.rstrip().rfind(' ')
- if before_name_index != -1 and skeleton_parameter[:before_name_index].strip():
- return before_name_index + 1
- return len(skeleton_parameter)
- def parameter_list(elided_lines, start_position, end_position):
- """Generator for a function's parameters."""
- # Create new positions that omit the outer parenthesis of the parameters.
- start_position = Position(row=start_position.row, column=start_position.column + 1)
- end_position = Position(row=end_position.row, column=end_position.column - 1)
- single_line_view = SingleLineView(elided_lines, start_position, end_position)
- skeleton_parameters = create_skeleton_parameters(single_line_view.single_line)
- end_index = -1
- while True:
- # Find the end of the next parameter.
- start_index = end_index + 1
- end_index = skeleton_parameters.find(',', start_index)
- # No comma means that all parameters have been parsed.
- if end_index == -1:
- return
- row = single_line_view.convert_column_to_row(end_index)
- # Parse the parameter into a type and parameter name.
- skeleton_parameter = skeleton_parameters[start_index:end_index]
- name_offset = find_parameter_name_index(skeleton_parameter)
- parameter = single_line_view.single_line[start_index:end_index]
- yield Parameter(parameter, name_offset, row)
- class _FunctionState(object):
- """Tracks current function name and the number of lines in its body.
- Attributes:
- min_confidence: The minimum confidence level to use while checking style.
- """
- _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
- _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
- def __init__(self, min_confidence):
- self.min_confidence = min_confidence
- self.current_function = ''
- self.in_a_function = False
- self.lines_in_function = 0
- # Make sure these will not be mistaken for real positions (even when a
- # small amount is added to them).
- self.body_start_position = Position(-1000, 0)
- self.end_position = Position(-1000, 0)
- def begin(self, function_name, function_name_start_position, body_start_position, end_position,
- parameter_start_position, parameter_end_position, clean_lines):
- """Start analyzing function body.
- Args:
- function_name: The name of the function being tracked.
- function_name_start_position: Position in elided where the function name starts.
- body_start_position: Position in elided of the { or the ; for a prototype.
- end_position: Position in elided just after the final } (or ; is.
- parameter_start_position: Position in elided of the '(' for the parameters.
- parameter_end_position: Position in elided just after the ')' for the parameters.
- clean_lines: A CleansedLines instance containing the file.
- """
- self.in_a_function = True
- self.lines_in_function = -1 # Don't count the open brace line.
- self.current_function = function_name
- self.function_name_start_position = function_name_start_position
- self.body_start_position = body_start_position
- self.end_position = end_position
- self.is_declaration = clean_lines.elided[body_start_position.row][body_start_position.column] == ';'
- self.parameter_start_position = parameter_start_position
- self.parameter_end_position = parameter_end_position
- self.is_pure = False
- if self.is_declaration:
- characters_after_parameters = SingleLineView(clean_lines.elided, parameter_end_position, body_start_position).single_line
- self.is_pure = bool(match(r'\s*=\s*0\s*', characters_after_parameters))
- self._clean_lines = clean_lines
- self._parameter_list = None
- def modifiers_and_return_type(self):
- """Returns the modifiers and the return type."""
- # Go backwards from where the function name is until we encounter one of several things:
- # ';' or '{' or '}' or 'private:', etc. or '#' or return Position(0, 0)
- elided = self._clean_lines.elided
- start_modifiers = _rfind_in_lines(r';|\{|\}|((private|public|protected):)|(#.*)',
- elided, self.parameter_start_position, Position(0, 0))
- return SingleLineView(elided, start_modifiers, self.function_name_start_position).single_line.strip()
- def parameter_list(self):
- if not self._parameter_list:
- # Store the final result as a tuple since that is immutable.
- self._parameter_list = tuple(parameter_list(self._clean_lines.elided, self.parameter_start_position, self.parameter_end_position))
- return self._parameter_list
- def count(self, line_number):
- """Count line in current function body."""
- if self.in_a_function and line_number >= self.body_start_position.row:
- self.lines_in_function += 1
- def check(self, error, line_number):
- """Report if too many lines in function body.
- Args:
- error: The function to call with any errors found.
- line_number: The number of the line to check.
- """
- if match(r'T(EST|est)', self.current_function):
- base_trigger = self._TEST_TRIGGER
- else:
- base_trigger = self._NORMAL_TRIGGER
- trigger = base_trigger * 2 ** self.min_confidence
- if self.lines_in_function > trigger:
- error_level = int(math.log(self.lines_in_function / base_trigger, 2))
- # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
- if error_level > 5:
- error_level = 5
- error(line_number, 'readability/fn_size', error_level,
- 'Small and focused functions are preferred:'
- ' %s has %d non-comment lines'
- ' (error triggered by exceeding %d lines).' % (
- self.current_function, self.lines_in_function, trigger))
- def end(self):
- """Stop analyzing function body."""
- self.in_a_function = False
- class _IncludeError(Exception):
- """Indicates a problem with the include order in a file."""
- pass
- class FileInfo:
- """Provides utility functions for filenames.
- FileInfo provides easy access to the components of a file's path
- relative to the project root.
- """
- def __init__(self, filename):
- self._filename = filename
- def full_name(self):
- """Make Windows paths like Unix."""
- return os.path.abspath(self._filename).replace('\\', '/')
- def repository_name(self):
- """Full name after removing the local path to the repository.
- If we have a real absolute path name here we can try to do something smart:
- detecting the root of the checkout and truncating /path/to/checkout from
- the name so that we get header guards that don't include things like
- "C:\Documents and Settings\..." or "/home/username/..." in them and thus
- people on different computers who have checked the source out to different
- locations won't see bogus errors.
- """
- fullname = self.full_name()
- if os.path.exists(fullname):
- project_dir = os.path.dirname(fullname)
- if os.path.exists(os.path.join(project_dir, ".svn")):
- # If there's a .svn file in the current directory, we
- # recursively look up the directory tree for the top
- # of the SVN checkout
- root_dir = project_dir
- one_up_dir = os.path.dirname(root_dir)
- while os.path.exists(os.path.join(one_up_dir, ".svn")):
- root_dir = os.path.dirname(root_dir)
- one_up_dir = os.path.dirname(one_up_dir)
- prefix = os.path.commonprefix([root_dir, project_dir])
- return fullname[len(prefix) + 1:]
- # Not SVN? Try to find a git top level directory by
- # searching up from the current path.
- root_dir = os.path.dirname(fullname)
- while (root_dir != os.path.dirname(root_dir)
- and not os.path.exists(os.path.join(root_dir, ".git"))):
- root_dir = os.path.dirname(root_dir)
- if os.path.exists(os.path.join(root_dir, ".git")):
- prefix = os.path.commonprefix([root_dir, project_dir])
- return fullname[len(prefix) + 1:]
- # Don't know what to do; header guard warnings may be wrong...
- return fullname
- def split(self):
- """Splits the file into the directory, basename, and extension.
- For 'chrome/browser/browser.cpp', Split() would
- return ('chrome/browser', 'browser', '.cpp')
- Returns:
- A tuple of (directory, basename, extension).
- """
- googlename = self.repository_name()
- project, rest = os.path.split(googlename)
- return (project,) + os.path.splitext(rest)
- def base_name(self):
- """File base name - text after the final slash, before the final period."""
- return self.split()[1]
- def extension(self):
- """File extension - text following the final period."""
- return self.split()[2]
- def no_extension(self):
- """File has no source file extension."""
- return '/'.join(self.split()[0:2])
- def is_source(self):
- """File has a source file extension."""
- return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
- # Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
- _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
- r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
- # Matches strings. Escape codes should already be removed by ESCAPES.
- _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
- # Matches characters. Escape codes should already be removed by ESCAPES.
- _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
- # Matches multi-line C++ comments.
- # This RE is a little bit more complicated than one might expect, because we
- # have to take care of space removals tools so we can handle comments inside
- # statements better.
- # The current rule is: We only clear spaces from both sides when we're at the
- # end of the line. Otherwise, we try to remove spaces from the right side,
- # if this doesn't work we try on left side but only if there's a non-character
- # on the right.
- _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
- r"""(\s*/\*.*\*/\s*$|
- /\*.*\*/\s+|
- \s+/\*.*\*/(?=\W)|
- /\*.*\*/)""", re.VERBOSE)
- def is_cpp_string(line):
- """Does line terminate so, that the next symbol is in string constant.
- This function does not consider single-line nor multi-line comments.
- Args:
- line: is a partial line of code starting from the 0..n.
- Returns:
- True, if next character appended to 'line' is inside a
- string constant.
- """
- line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
- return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
- def find_next_multi_line_comment_start(lines, line_index):
- """Find the beginning marker for a multiline comment."""
- while line_index < len(lines):
- if lines[line_index].strip().startswith('/*'):
- # Only return this marker if the comment goes beyond this line
- if lines[line_index].strip().find('*/', 2) < 0:
- return line_index
- line_index += 1
- return len(lines)
- def find_next_multi_line_comment_end(lines, line_index):
- """We are inside a comment, find the end marker."""
- while line_index < len(lines):
- if lines[line_index].strip().endswith('*/'):
- return line_index
- line_index += 1
- return len(lines)
- def remove_multi_line_comments_from_range(lines, begin, end):
- """Clears a range of lines for multi-line comments."""
- # Having // dummy comments makes the lines non-empty, so we will not get
- # unnecessary blank line warnings later in the code.
- for i in range(begin, end):
- lines[i] = '// dummy'
- def remove_multi_line_comments(lines, error):
- """Removes multiline (c-style) comments from lines."""
- line_index = 0
- while line_index < len(lines):
- line_index_begin = find_next_multi_line_comment_start(lines, line_index)
- if line_index_begin >= len(lines):
- return
- line_index_end = find_next_multi_line_comment_end(lines, line_index_begin)
- if line_index_end >= len(lines):
- error(line_index_begin + 1, 'readability/multiline_comment', 5,
- 'Could not find end of multi-line comment')
- return
- remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1)
- line_index = line_index_end + 1
- def cleanse_comments(line):
- """Removes //-comments and single-line C-style /* */ comments.
- Args:
- line: A line of C++ source.
- Returns:
- The line with single-line comments removed.
- """
- comment_position = line.find('//')
- if comment_position != -1 and not is_cpp_string(line[:comment_position]):
- line = line[:comment_position]
- # get rid of /* ... */
- return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
- class CleansedLines(object):
- """Holds 3 copies of all lines with different preprocessing applied to them.
- 1) elided member contains lines without strings and comments,
- 2) lines member contains lines without comments, and
- 3) raw member contains all the lines without processing.
- All these three members are of <type 'list'>, and of the same length.
- """
- def __init__(self, lines):
- self.elided = []
- self.lines = []
- self.raw_lines = lines
- self._num_lines = len(lines)
- for line_number in range(len(lines)):
- self.lines.append(cleanse_comments(lines[line_number]))
- elided = self.collapse_strings(lines[line_number])
- self.elided.append(cleanse_comments(elided))
- def num_lines(self):
- """Returns the number of lines represented."""
- return self._num_lines
- @staticmethod
- def collapse_strings(elided):
- """Collapses strings and chars on a line to simple "" or '' blocks.
- We nix strings first so we're not fooled by text like '"http://"'
- Args:
- elided: The line being processed.
- Returns:
- The line with collapsed strings.
- """
- if not _RE_PATTERN_INCLUDE.match(elided):
- # Remove escaped characters first to make quote/single quote collapsing
- # basic. Things that look like escaped characters shouldn't occur
- # outside of strings and chars.
- elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
- elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
- elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
- return elided
- def close_expression(elided, position):
- """If input points to ( or { or [, finds the position that closes it.
- If elided[position.row][position.column] points to a '(' or '{' or '[',
- finds the line_number/pos that correspond to the closing of the expression.
- Args:
- elided: A CleansedLines.elided instance containing the file.
- position: The position of the opening item.
- Returns:
- The Position *past* the closing brace, or Position(len(elided), -1)
- if we never find a close. Note we ignore strings and comments when matching.
- """
- line = elided[position.row]
- start_character = line[position.column]
- if start_character == '(':
- enclosing_character_regex = r'[\(\)]'
- elif start_character == '[':
- enclosing_character_regex = r'[\[\]]'
- elif start_character == '{':
- enclosing_character_regex = r'[\{\}]'
- else:
- return Position(len(elided), -1)
- current_column = position.column + 1
- line_number = position.row
- net_open = 1
- for line in elided[position.row:]:
- line = line[current_column:]
- # Search the current line for opening and closing characters.
- while True:
- next_enclosing_character = search(enclosing_character_regex, line)
- # No more on this line.
- if not next_enclosing_character:
- break
- current_column += next_enclosing_character.end(0)
- line = line[next_enclosing_character.end(0):]
- if next_enclosing_character.group(0) == start_character:
- net_open += 1
- else:
- net_open -= 1
- if not net_open:
- return Position(line_number, current_column)
- # Proceed to the next line.
- line_number += 1
- current_column = 0
- # The given item was not closed.
- return Position(len(elided), -1)
- def check_for_copyright(lines, error):
- """Logs an error if no Copyright message appears at the top of the file."""
- # We'll say it should occur by line 10. Don't forget there's a
- # dummy line at the front.
- for line in xrange(1, min(len(lines), 11)):
- if re.search(r'Copyright', lines[line], re.I):
- break
- else: # means no copyright line was found
- error(0, 'legal/copyright', 5,
- 'No copyright message found. '
- 'You should have a line: "Copyright [year] <Copyright Owner>"')
- def get_header_guard_cpp_variable(filename):
- """Returns the CPP variable that should be used as a header guard.
- Args:
- filename: The name of a C++ header file.
- Returns:
- The CPP variable that should be used as a header guard in the
- named file.
- """
- # Restores original filename in case that style checker is invoked from Emacs's
- # flymake.
- filename = re.sub(r'_flymake\.h$', '.h', filename)
- standard_name = sub(r'[-.\s]', '_', os.path.basename(filename))
- # Files under WTF typically have header guards that start with WTF_.
- if '/wtf/' in filename:
- special_name = "WTF_" + standard_name
- else:
- special_name = standard_name
- return (special_name, standard_name)
- def check_for_header_guard(filename, lines, error):
- """Checks that the file contains a header guard.
- Logs an error if no #ifndef header guard is present. For other
- headers, checks that the full pathname is used.
- Args:
- filename: The name of the C++ header file.
- lines: An array of strings, each representing a line of the file.
- error: The function to call with any errors found.
- """
- cppvar = get_header_guard_cpp_variable(filename)
- ifndef = None
- ifndef_line_number = 0
- define = None
- for line_number, line in enumerate(lines):
- line_split = line.split()
- if len(line_split) >= 2:
- # find the first occurrence of #ifndef and #define, save arg
- if not ifndef and line_split[0] == '#ifndef':
- # set ifndef to the header guard presented on the #ifndef line.
- ifndef = line_split[1]
- ifndef_line_number = line_number
- if not define and line_split[0] == '#define':
- define = line_split[1]
- if define and ifndef:
- break
- if not ifndef or not define or ifndef != define:
- error(0, 'build/header_guard', 5,
- 'No #ifndef header guard found, suggested CPP variable is: %s' %
- cppvar[0])
- return
- # The guard should be File_h.
- if ifndef not in cppvar:
- error(ifndef_line_number, 'build/header_guard', 5,
- '#ifndef header guard has wrong style, please use: %s' % cppvar[0])
- def check_for_unicode_replacement_characters(lines, error):
- """Logs an error for each line containing Unicode replacement characters.
- These indicate that either the file contained invalid UTF-8 (likely)
- or Unicode replacement characters (which it shouldn't). Note that
- it's possible for this to throw off line numbering if the invalid
- UTF-8 occurred adjacent to a newline.
- Args:
- lines: An array of strings, each representing a line of the file.
- error: The function to call with any errors found.
- """
- for line_number, line in enumerate(lines):
- if u'\ufffd' in line:
- error(line_number, 'readability/utf8', 5,
- 'Line contains invalid UTF-8 (or Unicode replacement character).')
- def check_for_new_line_at_eof(lines, error):
- """Logs an error if there is no newline char at the end of the file.
- Args:
- lines: An array of strings, each representing a line of the file.
- error: The function to call with any errors found.
- """
- # The array lines() was created by adding two newlines to the
- # original file (go figure), then splitting on \n.
- # To verify that the file ends in \n, we just have to make sure the
- # last-but-two element of lines() exists and is empty.
- if len(lines) < 3 or lines[-2]:
- error(len(lines) - 2, 'whitespace/ending_newline', 5,
- 'Could not find a newline character at the end of the file.')
- def check_for_multiline_comments_and_strings(clean_lines, line_number, error):
- """Logs an error if we see /* ... */ or "..." that extend past one line.
- /* ... */ comments are legit inside macros, for one line.
- Otherwise, we prefer // comments, so it's ok to warn about the
- other. Likewise, it's ok for strings to extend across multiple
- lines, as long as a line continuation character (backslash)
- terminates each line. Although not currently prohibited by the C++
- style guide, it's ugly and unnecessary. We don't do well with either
- in this lint program, so we warn about both.
- Args:
- clean_lines: A CleansedLines instance containing the file.
- line_number: The number of the line to check.
- error: The function to call with any errors found.
- """
- line = clean_lines.elided[line_number]
- # Remove all \\ (escaped backslashes) from the line. They are OK, and the
- # second (escaped) slash may trigger later \" detection erroneously.
- line = line.replace('\\\\', '')
- if line.count('/*') > line.count('*/'):
- error(line_number, 'readability/multiline_comment', 5,
- 'Complex multi-line /*...*/-style comment found. '
- 'Lint may give bogus warnings. '
- 'Consider replacing these with //-style comments, '
- 'with #if 0...#endif, '
- 'or with more clearly structured multi-line comments.')
- if (line.count('"') - line.count('\\"')) % 2:
- error(line_number, 'readability/multiline_string', 5,
- 'Multi-line string ("...") found. This lint script doesn\'t '
- 'do well with such strings, and may give bogus warnings. They\'re '
- 'ugly and unnecessary, and you should use concatenation instead".')
- _THREADING_LIST = (
- ('asctime(', 'asctime_r('),
- ('ctime(', 'ctime_r('),
- ('getgrgid(', 'getgrgid_r('),
- ('getgrnam(', 'getgrnam_r('),
- ('getlogin(', 'getlogin_r('),
- ('getpwnam(', 'getpwnam_r('),
- ('getpwuid(', 'getpwuid_r('),
- ('gmtime(', 'gmtime_r('),
- ('localtime(', 'localtime_r('),
- ('rand(', 'rand_r('),
- ('readdir(', 'readdir_r('),
- ('strtok(', 'strtok_r('),
- ('ttyname(', 'ttyname_r('),
- )
- def check_posix_threading(clean_lines, line_number, error):
- """Checks for calls to thread-unsafe functions.
- Much code has been originally written without consideration of
- multi-threading. Also, engineers are relying on their old experience;
- they have learned posix before threading extensions were added. These
- tests guide the engineers to use thread-safe functions (when using
- posix directly).
- Args:
- clean_lines: A CleansedLines instance containing the file.
- line_number: The number of the line to check.
- error: The function to call with any errors found.
- """
- line = clean_lines.elided[line_number]
- for single_thread_function, multithread_safe_function in _THREADING_LIST:
- index = line.find(single_thread_function)
- # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
- if index >= 0 and (index == 0 or (not line[index - 1].isalnum()
- and line[index - 1] not in ('_', '.', '>'))):
- error(line_number, 'runtime/threadsafe_fn', 2,
- 'Consider using ' + multithread_safe_function +
- '...) instead of ' + single_thread_function +
- '...) for improved thread safety.')
- # Matches invalid increment: *count++, which moves pointer instead of
- # incrementing a value.
- _RE_PATTERN_INVALID_INCREMENT = re.compile(
- r'^\s*\*\w+(\+\+|--);')
- def check_invalid_increment(clean_lines, line_number, error):
- """Checks for invalid increment *count++.
- For example following function:
- void increment_counter(int* count) {
- *count++;
- }
- is invalid, because it effectively does count++, moving pointer, and should
- be replaced with ++*count, (*count)++ or *count += 1.
- Args:
- clean_lines: A CleansedLines instance containing the file.
- line_number: The number of the line to check.
- error: The function to call with any errors found.
- """
- line = clean_lines.elided[line_number]
- if _RE_PATTERN_INVALID_INCREMENT.match(line):
- error(line_number, 'runtime/invalid_increment', 5,
- 'Changing pointer instead of value (or unused value of operator*).')
- class _ClassInfo(object):
- """Stores information about a class."""
- def __init__(self, name, line_number):
- self.name = name
- self.line_number = line_number
- self.seen_open_brace = False
- self.is_derived = False
- self.virtual_method_line_number = None
- self.has_virtual_destructor = False
- self.brace_depth = 0
- class _ClassState(object):
- """Holds the current state of the parse relating to class declarations.
- It maintains a stack of _ClassInfos representing the parser's guess
- as to the current nesting of class declarations. The innermost class
- is at the top (back) of the stack. Typically, the stack will either
- be empty or have exactly one entry.
- """
- def __init__(self):
- self.classinfo_stack = []
- def check_finished(self, error):
- """Checks that all classes have been completely parsed.
- Call this when all lines in a file have been processed.
- Args:
- error: The function to call with any errors found.
- """
- if self.classinfo_stack:
- # Note: This test can result in false positives if #ifdef constructs
- # get in the way of brace matching. See the testBuildClass test in
- # cpp_style_unittest.py for an example of this.
- error(self.classinfo_stack[0].line_number, 'build/class', 5,
- 'Failed to find complete declaration of class %s' %
- self.classinfo_stack[0].name)
- class _FileState(object):
- def __init__(self, clean_lines, file_extension):
- self._did_inside_namespace_indent_warning = False
- self._clean_lines = clean_lines
- if file_extension in ['m', 'mm']:
- self._is_objective_c = True
- elif file_extension == 'h':
- # In the case of header files, it is unknown if the file
- # is objective c or not, so set this value to None and then
- # if it is requested, use heuristics to guess the value.
- self._is_objective_c = None
- else:
- self._is_objective_c = False
- self._is_c = file_extension == 'c'
- def set_did_inside_namespace_indent_warning(self):
- self._did_inside_namespace_indent_warning = True
- def did_inside_namespace_indent_warning(self):
- return self._did_inside_namespace_indent_warning
- def is_objective_c(self):
- if self._is_objective_c is None:
- for line in self._clean_lines.elided:
- # Starting with @ or #import seem like the best indications
- # that we have an Objective C file.
- if line.startswith("@") or line.startswith("#import"):
- self._is_objective_c = True
- break
- else:
- self._is_objective_c = False
- return self._is_objective_c
- def is_c_or_objective_c(self):
- """Return whether the file extension corresponds to C or Objective-C."""
- return self._is_c or self.is_objective_c()
- def check_for_non_standard_constructs(clean_lines, line_number,
- class_state, error):
- """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
- Complain about several constructs which gcc-2 accepts, but which are
- not standard C++. Warning about these in lint is one way to ease the
- transition to new compilers.
- - put storage class first (e.g. "static const" instead of "const static").
- - "%lld" instead of %qd" in printf-type functions.
- - "%1$d" is non-standard in printf-type functions.
- - "\%" is an undefined character escape sequence.
- - text after #endif is not allowed.
- - invalid inner-style forward declaration.
- - >? and <? operators, and their >?= and <?= cousins.
- - classes with virtual methods need virtual destructors (compiler warning
- available, but not turned on yet.)
- Additionally, check for constructor/destructor style violations as it
- is very convenient to do so while checking for gcc-2 compliance.
- Args:
- clean_lines: A CleansedLines instance containing the file.
- line_number: The number of the line to check.
- class_state: A _ClassState instance which maintains information about
- the current stack of nested class declarations being parsed.
- error: A callable to which errors are reported, which takes parameters:
- line number, error level, and message
- """
- # Remove comments from the line, but leave in strings for now.
- line = clean_lines.lines[line_number]
- if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
- error(line_number, 'runtime/printf_format', 3,
- '%q in format strings is deprecated. Use %ll instead.')
- if search(r'printf\s*\(.*".*%\d+\$', line):
- error(line_number, 'runtime/printf_format', 2,
- '%N$ formats are unconventional. Try rewriting to avoid them.')
- # Remove escaped backslashes before looking for undefined escapes.
- line = line.replace('\\\\', '')
- if search(r'("|\').*\\(%|\[|\(|{)', line):
- error(line_number, 'build/printf_format', 3,
- '%, [, (, and { are undefined character escapes. Unescape them.')
- # For the rest, work with both comments and strings removed.
- line = clean_lines.elided[line_number]
- i…
Large files files are truncated, but you can click here to view the full file