cpp.py - Copyright (C) 2009, 2010 Google Inc. All rights re…

/hooks/webkitpy/style/checkers/cpp.py

https://github.com/hwti/LunaSysMgr · Python · 3580 lines · 2381 code · 352 blank · 847 comment · 499 complexity · 265650f2e883796370c6cad485c6051a MD5 · raw file
Large files are truncated click here to view the full file

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009, 2010 Google Inc. All rights reserved.
# Copyright (C) 2009 Torch Mobile Inc.
# Copyright (C) 2009 Apple Inc. All rights reserved.
# Copyright (C) 2010 Chris Jerdonek (cjerdonek@webkit.org)
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#    * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#    * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
#    * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# This is the modified version of Google's cpplint. The original code is
# http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py

"""Support for check-webkit-style."""

import codecs
import math  # for log
import os
import os.path
import re
import sre_compile
import string
import sys
import unicodedata

from webkitpy.common.memoized import memoized

# The key to use to provide a class to fake loading a header file.
INCLUDE_IO_INJECTION_KEY = 'include_header_io'

# Headers that we consider STL headers.
_STL_HEADERS = frozenset([
    'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
    'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
    'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
    'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
    'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
    'utility', 'vector', 'vector.h',
    ])


# Non-STL C++ system headers.
_CPP_HEADERS = frozenset([
    'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
    'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
    'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
    'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
    'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
    'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
    'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
    'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
    'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
    'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
    'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
    'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
    'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
    ])


# Assertion macros.  These are defined in base/logging.h and
# testing/base/gunit.h.  Note that the _M versions need to come first
# for substring matching to work.
_CHECK_MACROS = [
    'DCHECK', 'CHECK',
    'EXPECT_TRUE_M', 'EXPECT_TRUE',
    'ASSERT_TRUE_M', 'ASSERT_TRUE',
    'EXPECT_FALSE_M', 'EXPECT_FALSE',
    'ASSERT_FALSE_M', 'ASSERT_FALSE',
    ]

# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])

for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
                        ('>=', 'GE'), ('>', 'GT'),
                        ('<=', 'LE'), ('<', 'LT')]:
    _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
    _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
    _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
    _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
    _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
    _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement

for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
                            ('>=', 'LT'), ('>', 'LE'),
                            ('<=', 'GT'), ('<', 'GE')]:
    _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
    _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
    _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
    _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement


# These constants define types of headers for use with
# _IncludeState.check_next_include_order().
_CONFIG_HEADER = 0
_PRIMARY_HEADER = 1
_OTHER_HEADER = 2
_MOC_HEADER = 3


# A dictionary of items customize behavior for unit test. For example,
# INCLUDE_IO_INJECTION_KEY allows providing a custom io class which allows
# for faking a header file.
_unit_test_config = {}


# The regexp compilation caching is inlined in all regexp functions for
# performance reasons; factoring it out into a separate function turns out
# to be noticeably expensive.
_regexp_compile_cache = {}


def match(pattern, s):
    """Matches the string with the pattern, caching the compiled regexp."""
    if not pattern in _regexp_compile_cache:
        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    return _regexp_compile_cache[pattern].match(s)


def search(pattern, s):
    """Searches the string for the pattern, caching the compiled regexp."""
    if not pattern in _regexp_compile_cache:
        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    return _regexp_compile_cache[pattern].search(s)


def sub(pattern, replacement, s):
    """Substitutes occurrences of a pattern, caching the compiled regexp."""
    if not pattern in _regexp_compile_cache:
        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    return _regexp_compile_cache[pattern].sub(replacement, s)


def subn(pattern, replacement, s):
    """Substitutes occurrences of a pattern, caching the compiled regexp."""
    if not pattern in _regexp_compile_cache:
        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    return _regexp_compile_cache[pattern].subn(replacement, s)


def iteratively_replace_matches_with_char(pattern, char_replacement, s):
    """Returns the string with replacement done.

    Every character in the match is replaced with char.
    Due to the iterative nature, pattern should not match char or
    there will be an infinite loop.

    Example:
      pattern = r'<[^>]>' # template parameters
      char_replacement =  '_'
      s =     'A<B<C, D>>'
      Returns 'A_________'

    Args:
      pattern: The regex to match.
      char_replacement: The character to put in place of every
                        character of the match.
      s: The string on which to do the replacements.

    Returns:
      True, if the given line is blank.
    """
    while True:
        matched = search(pattern, s)
        if not matched:
            return s
        start_match_index = matched.start(0)
        end_match_index = matched.end(0)
        match_length = end_match_index - start_match_index
        s = s[:start_match_index] + char_replacement * match_length + s[end_match_index:]


def _rfind_in_lines(regex, lines, start_position, not_found_position):
    """Does a reverse find starting at start position and going backwards until
    a match is found.

    Returns the position where the regex ended.
    """
    # Put the regex in a group and proceed it with a greedy expression that
    # matches anything to ensure that we get the last possible match in a line.
    last_in_line_regex = r'.*(' + regex + ')'
    current_row = start_position.row

    # Start with the given row and trim off everything past what may be matched.
    current_line = lines[start_position.row][:start_position.column]
    while True:
        found_match = match(last_in_line_regex, current_line)
        if found_match:
            return Position(current_row, found_match.end(1))

        # A match was not found so continue backward.
        current_row -= 1
        if current_row < 0:
            return not_found_position
        current_line = lines[current_row]


def _convert_to_lower_with_underscores(text):
    """Converts all text strings in camelCase or PascalCase to lowers with underscores."""

    # First add underscores before any capital letter followed by a lower case letter
    # as long as it is in a word.
    # (This put an underscore before Password but not P and A in WPAPassword).
    text = sub(r'(?<=[A-Za-z0-9])([A-Z])(?=[a-z])', r'_\1', text)

    # Next add underscores before capitals at the end of words if it was
    # preceeded by lower case letter or number.
    # (This puts an underscore before A in isA but not A in CBA).
    text = sub(r'(?<=[a-z0-9])([A-Z])(?=\b)', r'_\1', text)

    # Next add underscores when you have a captial letter which is followed by a capital letter
    # but is not proceeded by one. (This puts an underscore before A in 'WordADay').
    text = sub(r'(?<=[a-z0-9])([A-Z][A-Z_])', r'_\1', text)

    return text.lower()



def _create_acronym(text):
    """Creates an acronym for the given text."""
    # Removes all lower case letters except those starting words.
    text = sub(r'(?<!\b)[a-z]', '', text)
    return text.upper()


def up_to_unmatched_closing_paren(s):
    """Splits a string into two parts up to first unmatched ')'.

    Args:
      s: a string which is a substring of line after '('
      (e.g., "a == (b + c))").

    Returns:
      A pair of strings (prefix before first unmatched ')',
      remainder of s after first unmatched ')'), e.g.,
      up_to_unmatched_closing_paren("a == (b + c)) { ")
      returns "a == (b + c)", " {".
      Returns None, None if there is no unmatched ')'

    """
    i = 1
    for pos, c in enumerate(s):
      if c == '(':
        i += 1
      elif c == ')':
        i -= 1
        if i == 0:
          return s[:pos], s[pos + 1:]
    return None, None

class _IncludeState(dict):
    """Tracks line numbers for includes, and the order in which includes appear.

    As a dict, an _IncludeState object serves as a mapping between include
    filename and line number on which that file was included.

    Call check_next_include_order() once for each header in the file, passing
    in the type constants defined above. Calls in an illegal order will
    raise an _IncludeError with an appropriate error message.

    """
    # self._section will move monotonically through this set. If it ever
    # needs to move backwards, check_next_include_order will raise an error.
    _INITIAL_SECTION = 0
    _CONFIG_SECTION = 1
    _PRIMARY_SECTION = 2
    _OTHER_SECTION = 3

    _TYPE_NAMES = {
        _CONFIG_HEADER: '',
        _PRIMARY_HEADER: 'header this file implements',
        _OTHER_HEADER: 'other header',
        _MOC_HEADER: 'moc file',
        }
    _SECTION_NAMES = {
        _INITIAL_SECTION: "... nothing.",
        _CONFIG_SECTION: "WebCore config.h.",
        _PRIMARY_SECTION: 'a header this file implements.',
        _OTHER_SECTION: 'other header.',
        }

    def __init__(self):
        dict.__init__(self)
        self._section = self._INITIAL_SECTION
        self._visited_primary_section = False
        self.header_types = dict();

    def visited_primary_section(self):
        return self._visited_primary_section

    def check_next_include_order(self, header_type, file_is_header, primary_header_exists):
        """Returns a non-empty error message if the next header is out of order.

        This function also updates the internal state to be ready to check
        the next include.

        Args:
          header_type: One of the _XXX_HEADER constants defined above.
          file_is_header: Whether the file that owns this _IncludeState is itself a header

        Returns:
          The empty string if the header is in the right order, or an
          error message describing what's wrong.

        """
        if header_type == _CONFIG_HEADER and file_is_header:
            return 'Header file should not contain WebCore config.h.'
        if header_type == _PRIMARY_HEADER and file_is_header:
            return 'Header file should not contain itself.'
        if header_type == _MOC_HEADER:
            return ''

        error_message = ''
        if self._section != self._OTHER_SECTION:
            before_error_message = ('Found %s before %s' %
                                    (self._TYPE_NAMES[header_type],
                                     self._SECTION_NAMES[self._section + 1]))
        after_error_message = ('Found %s after %s' %
                                (self._TYPE_NAMES[header_type],
                                 self._SECTION_NAMES[self._section]))

        if header_type == _CONFIG_HEADER:
            if self._section >= self._CONFIG_SECTION:
                error_message = after_error_message
            self._section = self._CONFIG_SECTION
        elif header_type == _PRIMARY_HEADER:
            if self._section >= self._PRIMARY_SECTION:
                error_message = after_error_message
#            elif self._section < self._CONFIG_SECTION:
#                error_message = before_error_message
            self._section = self._PRIMARY_SECTION
            self._visited_primary_section = True
        else:
            assert header_type == _OTHER_HEADER
            if not file_is_header and self._section < self._PRIMARY_SECTION:
                if primary_header_exists:
                    error_message = before_error_message
            self._section = self._OTHER_SECTION

        return error_message


class Position(object):
    """Holds the position of something."""
    def __init__(self, row, column):
        self.row = row
        self.column = column

    def __str__(self):
        return '(%s, %s)' % (self.row, self.column)

    def __cmp__(self, other):
        return self.row.__cmp__(other.row) or self.column.__cmp__(other.column)


class Parameter(object):
    """Information about one function parameter."""
    def __init__(self, parameter, parameter_name_index, row):
        self.type = parameter[:parameter_name_index].strip()
        # Remove any initializers from the parameter name (e.g. int i = 5).
        self.name = sub(r'=.*', '', parameter[parameter_name_index:]).strip()
        self.row = row

    @memoized
    def lower_with_underscores_name(self):
        """Returns the parameter name in the lower with underscores format."""
        return _convert_to_lower_with_underscores(self.name)


class SingleLineView(object):
    """Converts multiple lines into a single line (with line breaks replaced by a
       space) to allow for easier searching."""
    def __init__(self, lines, start_position, end_position):
        """Create a SingleLineView instance.

        Args:
          lines: a list of multiple lines to combine into a single line.
          start_position: offset within lines of where to start the single line.
          end_position: just after where to end (like a slice operation).
        """
        # Get the rows of interest.
        trimmed_lines = lines[start_position.row:end_position.row + 1]

        # Remove the columns on the last line that aren't included.
        trimmed_lines[-1] = trimmed_lines[-1][:end_position.column]

        # Remove the columns on the first line that aren't included.
        trimmed_lines[0] = trimmed_lines[0][start_position.column:]

        # Create a single line with all of the parameters.
        self.single_line = ' '.join(trimmed_lines)

        # Keep the row lengths, so we can calculate the original row number
        # given a column in the single line (adding 1 due to the space added
        # during the join).
        self._row_lengths = [len(line) + 1 for line in trimmed_lines]
        self._starting_row = start_position.row

    def convert_column_to_row(self, single_line_column_number):
        """Convert the column number from the single line into the original
        line number.

        Special cases:
        * Columns in the added spaces are considered part of the previous line.
        * Columns beyond the end of the line are consider part the last line
        in the view."""
        total_columns = 0
        row_offset = 0
        while row_offset < len(self._row_lengths) - 1 and single_line_column_number >= total_columns + self._row_lengths[row_offset]:
            total_columns += self._row_lengths[row_offset]
            row_offset += 1
        return self._starting_row + row_offset


def create_skeleton_parameters(all_parameters):
    """Converts a parameter list to a skeleton version.

    The skeleton only has one word for the parameter name, one word for the type,
    and commas after each parameter and only there. Everything in the skeleton
    remains in the same columns as the original."""
    all_simplifications = (
        # Remove template parameters, function declaration parameters, etc.
        r'(<[^<>]*?>)|(\([^\(\)]*?\))|(\{[^\{\}]*?\})',
        # Remove all initializers.
        r'=[^,]*',
        # Remove :: and everything before it.
        r'[^,]*::',
        # Remove modifiers like &, *.
        r'[&*]',
        # Remove const modifiers.
        r'\bconst\s+(?=[A-Za-z])',
        # Remove numerical modifiers like long.
        r'\b(unsigned|long|short)\s+(?=unsigned|long|short|int|char|double|float)')

    skeleton_parameters = all_parameters
    for simplification in all_simplifications:
        skeleton_parameters = iteratively_replace_matches_with_char(simplification, ' ', skeleton_parameters)
    # If there are any parameters, then add a , after the last one to
    # make a regular pattern of a , following every parameter.
    if skeleton_parameters.strip():
        skeleton_parameters += ','
    return skeleton_parameters


def find_parameter_name_index(skeleton_parameter):
    """Determines where the parametere name starts given the skeleton parameter."""
    # The first space from the right in the simplified parameter is where the parameter
    # name starts unless the first space is before any content in the simplified parameter.
    before_name_index = skeleton_parameter.rstrip().rfind(' ')
    if before_name_index != -1 and skeleton_parameter[:before_name_index].strip():
        return before_name_index + 1
    return len(skeleton_parameter)


def parameter_list(elided_lines, start_position, end_position):
    """Generator for a function's parameters."""
    # Create new positions that omit the outer parenthesis of the parameters.
    start_position = Position(row=start_position.row, column=start_position.column + 1)
    end_position = Position(row=end_position.row, column=end_position.column - 1)
    single_line_view = SingleLineView(elided_lines, start_position, end_position)
    skeleton_parameters = create_skeleton_parameters(single_line_view.single_line)
    end_index = -1

    while True:
        # Find the end of the next parameter.
        start_index = end_index + 1
        end_index = skeleton_parameters.find(',', start_index)

        # No comma means that all parameters have been parsed.
        if end_index == -1:
            return
        row = single_line_view.convert_column_to_row(end_index)

        # Parse the parameter into a type and parameter name.
        skeleton_parameter = skeleton_parameters[start_index:end_index]
        name_offset = find_parameter_name_index(skeleton_parameter)
        parameter = single_line_view.single_line[start_index:end_index]
        yield Parameter(parameter, name_offset, row)


class _FunctionState(object):
    """Tracks current function name and the number of lines in its body.

    Attributes:
      min_confidence: The minimum confidence level to use while checking style.

    """

    _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
    _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.

    def __init__(self, min_confidence):
        self.min_confidence = min_confidence
        self.current_function = ''
        self.in_a_function = False
        self.lines_in_function = 0
        # Make sure these will not be mistaken for real positions (even when a
        # small amount is added to them).
        self.body_start_position = Position(-1000, 0)
        self.end_position = Position(-1000, 0)

    def begin(self, function_name, function_name_start_position, body_start_position, end_position,
              parameter_start_position, parameter_end_position, clean_lines):
        """Start analyzing function body.

        Args:
            function_name: The name of the function being tracked.
            function_name_start_position: Position in elided where the function name starts.
            body_start_position: Position in elided of the { or the ; for a prototype.
            end_position: Position in elided just after the final } (or ; is.
            parameter_start_position: Position in elided of the '(' for the parameters.
            parameter_end_position: Position in elided just after the ')' for the parameters.
            clean_lines: A CleansedLines instance containing the file.
        """
        self.in_a_function = True
        self.lines_in_function = -1  # Don't count the open brace line.
        self.current_function = function_name
        self.function_name_start_position = function_name_start_position
        self.body_start_position = body_start_position
        self.end_position = end_position
        self.is_declaration = clean_lines.elided[body_start_position.row][body_start_position.column] == ';'
        self.parameter_start_position = parameter_start_position
        self.parameter_end_position = parameter_end_position
        self.is_pure = False
        if self.is_declaration:
            characters_after_parameters = SingleLineView(clean_lines.elided, parameter_end_position, body_start_position).single_line
            self.is_pure = bool(match(r'\s*=\s*0\s*', characters_after_parameters))
        self._clean_lines = clean_lines
        self._parameter_list = None

    def modifiers_and_return_type(self):
        """Returns the modifiers and the return type."""
        # Go backwards from where the function name is until we encounter one of several things:
        #   ';' or '{' or '}' or 'private:', etc. or '#' or return Position(0, 0)
        elided = self._clean_lines.elided
        start_modifiers = _rfind_in_lines(r';|\{|\}|((private|public|protected):)|(#.*)',
                                          elided, self.parameter_start_position, Position(0, 0))
        return SingleLineView(elided, start_modifiers, self.function_name_start_position).single_line.strip()

    def parameter_list(self):
        if not self._parameter_list:
            # Store the final result as a tuple since that is immutable.
            self._parameter_list = tuple(parameter_list(self._clean_lines.elided, self.parameter_start_position, self.parameter_end_position))

        return self._parameter_list

    def count(self, line_number):
        """Count line in current function body."""
        if self.in_a_function and line_number >= self.body_start_position.row:
            self.lines_in_function += 1

    def check(self, error, line_number):
        """Report if too many lines in function body.

        Args:
          error: The function to call with any errors found.
          line_number: The number of the line to check.
        """
        if match(r'T(EST|est)', self.current_function):
            base_trigger = self._TEST_TRIGGER
        else:
            base_trigger = self._NORMAL_TRIGGER
        trigger = base_trigger * 2 ** self.min_confidence

        if self.lines_in_function > trigger:
            error_level = int(math.log(self.lines_in_function / base_trigger, 2))
            # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
            if error_level > 5:
                error_level = 5
            error(line_number, 'readability/fn_size', error_level,
                  'Small and focused functions are preferred:'
                  ' %s has %d non-comment lines'
                  ' (error triggered by exceeding %d lines).'  % (
                      self.current_function, self.lines_in_function, trigger))

    def end(self):
        """Stop analyzing function body."""
        self.in_a_function = False


class _IncludeError(Exception):
    """Indicates a problem with the include order in a file."""
    pass


class FileInfo:
    """Provides utility functions for filenames.

    FileInfo provides easy access to the components of a file's path
    relative to the project root.
    """

    def __init__(self, filename):
        self._filename = filename

    def full_name(self):
        """Make Windows paths like Unix."""
        return os.path.abspath(self._filename).replace('\\', '/')

    def repository_name(self):
        """Full name after removing the local path to the repository.

        If we have a real absolute path name here we can try to do something smart:
        detecting the root of the checkout and truncating /path/to/checkout from
        the name so that we get header guards that don't include things like
        "C:\Documents and Settings\..." or "/home/username/..." in them and thus
        people on different computers who have checked the source out to different
        locations won't see bogus errors.
        """
        fullname = self.full_name()

        if os.path.exists(fullname):
            project_dir = os.path.dirname(fullname)

            if os.path.exists(os.path.join(project_dir, ".svn")):
                # If there's a .svn file in the current directory, we
                # recursively look up the directory tree for the top
                # of the SVN checkout
                root_dir = project_dir
                one_up_dir = os.path.dirname(root_dir)
                while os.path.exists(os.path.join(one_up_dir, ".svn")):
                    root_dir = os.path.dirname(root_dir)
                    one_up_dir = os.path.dirname(one_up_dir)

                prefix = os.path.commonprefix([root_dir, project_dir])
                return fullname[len(prefix) + 1:]

            # Not SVN? Try to find a git top level directory by
            # searching up from the current path.
            root_dir = os.path.dirname(fullname)
            while (root_dir != os.path.dirname(root_dir)
                   and not os.path.exists(os.path.join(root_dir, ".git"))):
                root_dir = os.path.dirname(root_dir)
                if os.path.exists(os.path.join(root_dir, ".git")):
                    prefix = os.path.commonprefix([root_dir, project_dir])
                    return fullname[len(prefix) + 1:]

        # Don't know what to do; header guard warnings may be wrong...
        return fullname

    def split(self):
        """Splits the file into the directory, basename, and extension.

        For 'chrome/browser/browser.cpp', Split() would
        return ('chrome/browser', 'browser', '.cpp')

        Returns:
          A tuple of (directory, basename, extension).
        """

        googlename = self.repository_name()
        project, rest = os.path.split(googlename)
        return (project,) + os.path.splitext(rest)

    def base_name(self):
        """File base name - text after the final slash, before the final period."""
        return self.split()[1]

    def extension(self):
        """File extension - text following the final period."""
        return self.split()[2]

    def no_extension(self):
        """File has no source file extension."""
        return '/'.join(self.split()[0:2])

    def is_source(self):
        """File has a source file extension."""
        return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx')


# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
# Matches strings.  Escape codes should already be removed by ESCAPES.
_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
# Matches characters.  Escape codes should already be removed by ESCAPES.
_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
# Matches multi-line C++ comments.
# This RE is a little bit more complicated than one might expect, because we
# have to take care of space removals tools so we can handle comments inside
# statements better.
# The current rule is: We only clear spaces from both sides when we're at the
# end of the line. Otherwise, we try to remove spaces from the right side,
# if this doesn't work we try on left side but only if there's a non-character
# on the right.
_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
    r"""(\s*/\*.*\*/\s*$|
            /\*.*\*/\s+|
         \s+/\*.*\*/(?=\W)|
            /\*.*\*/)""", re.VERBOSE)


def is_cpp_string(line):
    """Does line terminate so, that the next symbol is in string constant.

    This function does not consider single-line nor multi-line comments.

    Args:
      line: is a partial line of code starting from the 0..n.

    Returns:
      True, if next character appended to 'line' is inside a
      string constant.
    """

    line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
    return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1


def find_next_multi_line_comment_start(lines, line_index):
    """Find the beginning marker for a multiline comment."""
    while line_index < len(lines):
        if lines[line_index].strip().startswith('/*'):
            # Only return this marker if the comment goes beyond this line
            if lines[line_index].strip().find('*/', 2) < 0:
                return line_index
        line_index += 1
    return len(lines)


def find_next_multi_line_comment_end(lines, line_index):
    """We are inside a comment, find the end marker."""
    while line_index < len(lines):
        if lines[line_index].strip().endswith('*/'):
            return line_index
        line_index += 1
    return len(lines)


def remove_multi_line_comments_from_range(lines, begin, end):
    """Clears a range of lines for multi-line comments."""
    # Having // dummy comments makes the lines non-empty, so we will not get
    # unnecessary blank line warnings later in the code.
    for i in range(begin, end):
        lines[i] = '// dummy'


def remove_multi_line_comments(lines, error):
    """Removes multiline (c-style) comments from lines."""
    line_index = 0
    while line_index < len(lines):
        line_index_begin = find_next_multi_line_comment_start(lines, line_index)
        if line_index_begin >= len(lines):
            return
        line_index_end = find_next_multi_line_comment_end(lines, line_index_begin)
        if line_index_end >= len(lines):
            error(line_index_begin + 1, 'readability/multiline_comment', 5,
                  'Could not find end of multi-line comment')
            return
        remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1)
        line_index = line_index_end + 1


def cleanse_comments(line):
    """Removes //-comments and single-line C-style /* */ comments.

    Args:
      line: A line of C++ source.

    Returns:
      The line with single-line comments removed.
    """
    comment_position = line.find('//')
    if comment_position != -1 and not is_cpp_string(line[:comment_position]):
        line = line[:comment_position]
    # get rid of /* ... */
    return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)


class CleansedLines(object):
    """Holds 3 copies of all lines with different preprocessing applied to them.

    1) elided member contains lines without strings and comments,
    2) lines member contains lines without comments, and
    3) raw member contains all the lines without processing.
    All these three members are of <type 'list'>, and of the same length.
    """

    def __init__(self, lines):
        self.elided = []
        self.lines = []
        self.raw_lines = lines
        self._num_lines = len(lines)
        for line_number in range(len(lines)):
            self.lines.append(cleanse_comments(lines[line_number]))
            elided = self.collapse_strings(lines[line_number])
            self.elided.append(cleanse_comments(elided))

    def num_lines(self):
        """Returns the number of lines represented."""
        return self._num_lines

    @staticmethod
    def collapse_strings(elided):
        """Collapses strings and chars on a line to simple "" or '' blocks.

        We nix strings first so we're not fooled by text like '"http://"'

        Args:
          elided: The line being processed.

        Returns:
          The line with collapsed strings.
        """
        if not _RE_PATTERN_INCLUDE.match(elided):
            # Remove escaped characters first to make quote/single quote collapsing
            # basic.  Things that look like escaped characters shouldn't occur
            # outside of strings and chars.
            elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
            elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
            elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
        return elided


def close_expression(elided, position):
    """If input points to ( or { or [, finds the position that closes it.

    If elided[position.row][position.column] points to a '(' or '{' or '[',
    finds the line_number/pos that correspond to the closing of the expression.

     Args:
       elided: A CleansedLines.elided instance containing the file.
       position: The position of the opening item.

     Returns:
      The Position *past* the closing brace, or Position(len(elided), -1)
      if we never find a close. Note we ignore strings and comments when matching.
    """
    line = elided[position.row]
    start_character = line[position.column]
    if start_character == '(':
        enclosing_character_regex = r'[\(\)]'
    elif start_character == '[':
        enclosing_character_regex = r'[\[\]]'
    elif start_character == '{':
        enclosing_character_regex = r'[\{\}]'
    else:
        return Position(len(elided), -1)

    current_column = position.column + 1
    line_number = position.row
    net_open = 1
    for line in elided[position.row:]:
        line = line[current_column:]

        # Search the current line for opening and closing characters.
        while True:
            next_enclosing_character = search(enclosing_character_regex, line)
            # No more on this line.
            if not next_enclosing_character:
                break
            current_column += next_enclosing_character.end(0)
            line = line[next_enclosing_character.end(0):]
            if next_enclosing_character.group(0) == start_character:
                net_open += 1
            else:
                net_open -= 1
                if not net_open:
                    return Position(line_number, current_column)

        # Proceed to the next line.
        line_number += 1
        current_column = 0

    # The given item was not closed.
    return Position(len(elided), -1)

def check_for_copyright(lines, error):
    """Logs an error if no Copyright message appears at the top of the file."""

    # We'll say it should occur by line 10. Don't forget there's a
    # dummy line at the front.
    for line in xrange(1, min(len(lines), 11)):
        if re.search(r'Copyright', lines[line], re.I):
            break
    else:                       # means no copyright line was found
        error(0, 'legal/copyright', 5,
              'No copyright message found.  '
              'You should have a line: "Copyright [year] <Copyright Owner>"')


def get_header_guard_cpp_variable(filename):
    """Returns the CPP variable that should be used as a header guard.

    Args:
      filename: The name of a C++ header file.

    Returns:
      The CPP variable that should be used as a header guard in the
      named file.

    """

    # Restores original filename in case that style checker is invoked from Emacs's
    # flymake.
    filename = re.sub(r'_flymake\.h$', '.h', filename)

    standard_name = sub(r'[-.\s]', '_', os.path.basename(filename))

    # Files under WTF typically have header guards that start with WTF_.
    if '/wtf/' in filename:
        special_name = "WTF_" + standard_name
    else:
        special_name = standard_name
    return (special_name, standard_name)


def check_for_header_guard(filename, lines, error):
    """Checks that the file contains a header guard.

    Logs an error if no #ifndef header guard is present.  For other
    headers, checks that the full pathname is used.

    Args:
      filename: The name of the C++ header file.
      lines: An array of strings, each representing a line of the file.
      error: The function to call with any errors found.
    """

    cppvar = get_header_guard_cpp_variable(filename)

    ifndef = None
    ifndef_line_number = 0
    define = None
    for line_number, line in enumerate(lines):
        line_split = line.split()
        if len(line_split) >= 2:
            # find the first occurrence of #ifndef and #define, save arg
            if not ifndef and line_split[0] == '#ifndef':
                # set ifndef to the header guard presented on the #ifndef line.
                ifndef = line_split[1]
                ifndef_line_number = line_number
            if not define and line_split[0] == '#define':
                define = line_split[1]
            if define and ifndef:
                break

    if not ifndef or not define or ifndef != define:
        error(0, 'build/header_guard', 5,
              'No #ifndef header guard found, suggested CPP variable is: %s' %
              cppvar[0])
        return

    # The guard should be File_h.
    if ifndef not in cppvar:
        error(ifndef_line_number, 'build/header_guard', 5,
              '#ifndef header guard has wrong style, please use: %s' % cppvar[0])


def check_for_unicode_replacement_characters(lines, error):
    """Logs an error for each line containing Unicode replacement characters.

    These indicate that either the file contained invalid UTF-8 (likely)
    or Unicode replacement characters (which it shouldn't).  Note that
    it's possible for this to throw off line numbering if the invalid
    UTF-8 occurred adjacent to a newline.

    Args:
      lines: An array of strings, each representing a line of the file.
      error: The function to call with any errors found.
    """
    for line_number, line in enumerate(lines):
        if u'\ufffd' in line:
            error(line_number, 'readability/utf8', 5,
                  'Line contains invalid UTF-8 (or Unicode replacement character).')


def check_for_new_line_at_eof(lines, error):
    """Logs an error if there is no newline char at the end of the file.

    Args:
      lines: An array of strings, each representing a line of the file.
      error: The function to call with any errors found.
    """

    # The array lines() was created by adding two newlines to the
    # original file (go figure), then splitting on \n.
    # To verify that the file ends in \n, we just have to make sure the
    # last-but-two element of lines() exists and is empty.
    if len(lines) < 3 or lines[-2]:
        error(len(lines) - 2, 'whitespace/ending_newline', 5,
              'Could not find a newline character at the end of the file.')


def check_for_multiline_comments_and_strings(clean_lines, line_number, error):
    """Logs an error if we see /* ... */ or "..." that extend past one line.

    /* ... */ comments are legit inside macros, for one line.
    Otherwise, we prefer // comments, so it's ok to warn about the
    other.  Likewise, it's ok for strings to extend across multiple
    lines, as long as a line continuation character (backslash)
    terminates each line. Although not currently prohibited by the C++
    style guide, it's ugly and unnecessary. We don't do well with either
    in this lint program, so we warn about both.

    Args:
      clean_lines: A CleansedLines instance containing the file.
      line_number: The number of the line to check.
      error: The function to call with any errors found.
    """
    line = clean_lines.elided[line_number]

    # Remove all \\ (escaped backslashes) from the line. They are OK, and the
    # second (escaped) slash may trigger later \" detection erroneously.
    line = line.replace('\\\\', '')

    if line.count('/*') > line.count('*/'):
        error(line_number, 'readability/multiline_comment', 5,
              'Complex multi-line /*...*/-style comment found. '
              'Lint may give bogus warnings.  '
              'Consider replacing these with //-style comments, '
              'with #if 0...#endif, '
              'or with more clearly structured multi-line comments.')

    if (line.count('"') - line.count('\\"')) % 2:
        error(line_number, 'readability/multiline_string', 5,
              'Multi-line string ("...") found.  This lint script doesn\'t '
              'do well with such strings, and may give bogus warnings.  They\'re '
              'ugly and unnecessary, and you should use concatenation instead".')


_THREADING_LIST = (
    ('asctime(', 'asctime_r('),
    ('ctime(', 'ctime_r('),
    ('getgrgid(', 'getgrgid_r('),
    ('getgrnam(', 'getgrnam_r('),
    ('getlogin(', 'getlogin_r('),
    ('getpwnam(', 'getpwnam_r('),
    ('getpwuid(', 'getpwuid_r('),
    ('gmtime(', 'gmtime_r('),
    ('localtime(', 'localtime_r('),
    ('rand(', 'rand_r('),
    ('readdir(', 'readdir_r('),
    ('strtok(', 'strtok_r('),
    ('ttyname(', 'ttyname_r('),
    )


def check_posix_threading(clean_lines, line_number, error):
    """Checks for calls to thread-unsafe functions.

    Much code has been originally written without consideration of
    multi-threading. Also, engineers are relying on their old experience;
    they have learned posix before threading extensions were added. These
    tests guide the engineers to use thread-safe functions (when using
    posix directly).

    Args:
      clean_lines: A CleansedLines instance containing the file.
      line_number: The number of the line to check.
      error: The function to call with any errors found.
    """
    line = clean_lines.elided[line_number]
    for single_thread_function, multithread_safe_function in _THREADING_LIST:
        index = line.find(single_thread_function)
        # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
        if index >= 0 and (index == 0 or (not line[index - 1].isalnum()
                                          and line[index - 1] not in ('_', '.', '>'))):
            error(line_number, 'runtime/threadsafe_fn', 2,
                  'Consider using ' + multithread_safe_function +
                  '...) instead of ' + single_thread_function +
                  '...) for improved thread safety.')


# Matches invalid increment: *count++, which moves pointer instead of
# incrementing a value.
_RE_PATTERN_INVALID_INCREMENT = re.compile(
    r'^\s*\*\w+(\+\+|--);')


def check_invalid_increment(clean_lines, line_number, error):
    """Checks for invalid increment *count++.

    For example following function:
    void increment_counter(int* count) {
        *count++;
    }
    is invalid, because it effectively does count++, moving pointer, and should
    be replaced with ++*count, (*count)++ or *count += 1.

    Args:
      clean_lines: A CleansedLines instance containing the file.
      line_number: The number of the line to check.
      error: The function to call with any errors found.
    """
    line = clean_lines.elided[line_number]
    if _RE_PATTERN_INVALID_INCREMENT.match(line):
        error(line_number, 'runtime/invalid_increment', 5,
              'Changing pointer instead of value (or unused value of operator*).')


class _ClassInfo(object):
    """Stores information about a class."""

    def __init__(self, name, line_number):
        self.name = name
        self.line_number = line_number
        self.seen_open_brace = False
        self.is_derived = False
        self.virtual_method_line_number = None
        self.has_virtual_destructor = False
        self.brace_depth = 0


class _ClassState(object):
    """Holds the current state of the parse relating to class declarations.

    It maintains a stack of _ClassInfos representing the parser's guess
    as to the current nesting of class declarations. The innermost class
    is at the top (back) of the stack. Typically, the stack will either
    be empty or have exactly one entry.
    """

    def __init__(self):
        self.classinfo_stack = []

    def check_finished(self, error):
        """Checks that all classes have been completely parsed.

        Call this when all lines in a file have been processed.
        Args:
          error: The function to call with any errors found.
        """
        if self.classinfo_stack:
            # Note: This test can result in false positives if #ifdef constructs
            # get in the way of brace matching. See the testBuildClass test in
            # cpp_style_unittest.py for an example of this.
            error(self.classinfo_stack[0].line_number, 'build/class', 5,
                  'Failed to find complete declaration of class %s' %
                  self.classinfo_stack[0].name)


class _FileState(object):
    def __init__(self, clean_lines, file_extension):
        self._did_inside_namespace_indent_warning = False
        self._clean_lines = clean_lines
        if file_extension in ['m', 'mm']:
            self._is_objective_c = True
        elif file_extension == 'h':
            # In the case of header files, it is unknown if the file
            # is objective c or not, so set this value to None and then
            # if it is requested, use heuristics to guess the value.
            self._is_objective_c = None
        else:
            self._is_objective_c = False
        self._is_c = file_extension == 'c'

    def set_did_inside_namespace_indent_warning(self):
        self._did_inside_namespace_indent_warning = True

    def did_inside_namespace_indent_warning(self):
        return self._did_inside_namespace_indent_warning

    def is_objective_c(self):
        if self._is_objective_c is None:
            for line in self._clean_lines.elided:
                # Starting with @ or #import seem like the best indications
                # that we have an Objective C file.
                if line.startswith("@") or line.startswith("#import"):
                    self._is_objective_c = True
                    break
            else:
                self._is_objective_c = False
        return self._is_objective_c

    def is_c_or_objective_c(self):
        """Return whether the file extension corresponds to C or Objective-C."""
        return self._is_c or self.is_objective_c()


def check_for_non_standard_constructs(clean_lines, line_number,
                                      class_state, error):
    """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.

    Complain about several constructs which gcc-2 accepts, but which are
    not standard C++.  Warning about these in lint is one way to ease the
    transition to new compilers.
    - put storage class first (e.g. "static const" instead of "const static").
    - "%lld" instead of %qd" in printf-type functions.
    - "%1$d" is non-standard in printf-type functions.
    - "\%" is an undefined character escape sequence.
    - text after #endif is not allowed.
    - invalid inner-style forward declaration.
    - >? and <? operators, and their >?= and <?= cousins.
    - classes with virtual methods need virtual destructors (compiler warning
        available, but not turned on yet.)

    Additionally, check for constructor/destructor style violations as it
    is very convenient to do so while checking for gcc-2 compliance.

    Args:
      clean_lines: A CleansedLines instance containing the file.
      line_number: The number of the line to check.
      class_state: A _ClassState instance which maintains information about
                   the current stack of nested class declarations being parsed.
      error: A callable to which errors are reported, which takes parameters:
             line number, error level, and message
    """

    # Remove comments from the line, but leave in strings for now.
    line = clean_lines.lines[line_number]

    if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
        error(line_number, 'runtime/printf_format', 3,
              '%q in format strings is deprecated.  Use %ll instead.')

    if search(r'printf\s*\(.*".*%\d+\$', line):
        error(line_number, 'runtime/printf_format', 2,
              '%N$ formats are unconventional.  Try rewriting to avoid them.')

    # Remove escaped backslashes before looking for undefined escapes.
    line = line.replace('\\\\', '')

    if search(r'("|\').*\\(%|\[|\(|{)', line):
        error(line_number, 'build/printf_format', 3,
              '%, [, (, and { are undefined character escapes.  Unescape them.')

    # For the rest, work with both comments and strings removed.
    line = clean_lines.elided[line_number]

    i…
Tech Fingerprint

Alerts (11)

'def' Ensure functions have docstrings for documentation
312 526 564 936 1150 1173 1176 1179 1196
Complexity hotspot; lines 1082 to 1083 (total complexity: 4)
1082 1083