PageRenderTime 61ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/fix_includes.py

http://include-what-you-use.googlecode.com/
Python | 1561 lines | 1371 code | 44 blank | 146 comment | 34 complexity | f7f006b0350e365fa636d054a706846a MD5 | raw file
Possible License(s): JSON
  1. #!/usr/bin/python
  2. ##===--- fix_includes.py - rewrite source files based on iwyu output ------===##
  3. #
  4. # The LLVM Compiler Infrastructure
  5. #
  6. # This file is distributed under the University of Illinois Open Source
  7. # License. See LICENSE.TXT for details.
  8. #
  9. ##===----------------------------------------------------------------------===##
  10. """Update files with the 'correct' #include and forward-declare lines.
  11. Given the output of include_what_you_use on stdin -- when run at the
  12. (default) --v=1 verbosity level or higher -- modify the files
  13. mentioned in the output, removing their old #include lines and
  14. replacing them with the lines given by the include_what_you_use
  15. script.
  16. We only edit files that are writeable (presumably open for p4 edit),
  17. unless the user supplies a command to make files writeable via the
  18. --checkout_command flag (eg '--checkout_command="p4 edit"').
  19. This script runs in four stages. In the first, it groups physical
  20. lines together to form 'move spans'. A 'move span' is the atomic unit
  21. for moving or deleting code. A move span is either a) an #include
  22. line, along with any comment lines immediately preceding it; b) a
  23. forward-declare line -- or more if it's a multi-line forward declare
  24. -- along with preceding comments; c) any other single line. Example:
  25. // I really am glad I'm forward-declaring this class!
  26. // If I didn't, I'd have to #include the entire world.
  27. template<typename A, typename B, typename C, typename D>
  28. class MyClass;
  29. Then, it groups move spans together into 'reorder spans'. These are
  30. spans of code that consist entirely of #includes and forward-declares,
  31. maybe separated by blank lines and comments. We assume that we can
  32. arbitrarily reorder #includes and forward-declares within a reorder
  33. span, without affecting correctness. Things like #ifdefs, #defines,
  34. namespace declarations, static variable declarations, class
  35. definitions, etc -- just about anything -- break up reorder spans.
  36. In stage 3 it deletes all #include and forward-declare lines that iwyu
  37. says to delete. iwyu includes line numbers for deletion, making this
  38. part easy. If this step results in "empty" #ifdefs or namespaces
  39. (#ifdefs or namespaces with no code inside them), we delete those as
  40. well. We recalculate the reorder spans, which may have gotten bigger
  41. due to the deleted code.
  42. In stage 4 it adds new iwyu-dictated #includes and forward-declares
  43. after the last existing #includes and forward-declares. Then it
  44. reorders the #includes and forward-declares to match the order
  45. specified by iwyu. It follows iwyu's instructions as much as
  46. possible, modulo the constraint that an #include or forward-declare
  47. cannot leave its current reorder span.
  48. All this moving messes up the blank lines, which we then need to fix
  49. up. Then we're done!
  50. """
  51. __author__ = 'csilvers@google.com (Craig Silverstein)'
  52. import difflib
  53. import optparse
  54. import os
  55. import pipes # For (undocumented) pipes.quote
  56. import re
  57. import sys
  58. import subprocess
  59. _USAGE = """\
  60. %prog [options] [filename] ... < <output from include-what-you-use script>
  61. OR %prog -s [other options] <filename> ...
  62. %prog reads the output from the include-what-you-use
  63. script on stdin -- run with --v=1 (default) verbose or above -- and,
  64. unless --sort_only or --dry_run is specified,
  65. modifies the files mentioned in the output, removing their old
  66. #include lines and replacing them with the lines given by the
  67. include_what_you_use script. It also sorts the #include and
  68. forward-declare lines.
  69. Only writable files (those opened for p4 edit) are modified (unless
  70. --checkout_command is specified). All files mentioned in the
  71. include-what-you-use script are modified, unless filenames are
  72. specified on the commandline, in which case only those files are
  73. modified.
  74. The exit code is the number of files that were modified (or that would
  75. be modified if --dry_run was specified) unless that number exceeds 100,
  76. in which case 100 is returned.
  77. """
  78. _COMMENT_RE = re.compile(r'\s*//.*')
  79. # These are the types of lines a file can have. These are matched
  80. # using re.match(), so don't need a leading ^.
  81. _C_COMMENT_START_RE = re.compile(r'\s*/\*')
  82. _C_COMMENT_END_RE = re.compile(r'.*\*/\s*(.*)$')
  83. _COMMENT_LINE_RE = re.compile(r'\s*//')
  84. _BLANK_LINE_RE = re.compile(r'\s*$')
  85. _IF_RE = re.compile(r'\s*#\s*if') # compiles #if/ifdef/ifndef
  86. _ELSE_RE = re.compile(r'\s*#\s*(else|elif)\b') # compiles #else/elif
  87. _ENDIF_RE = re.compile(r'\s*#\s*endif\b')
  88. # This is used to delete 'empty' namespaces after fwd-decls are removed.
  89. # Some third-party libraries use macros to start/end namespaces.
  90. _NAMESPACE_START_RE = re.compile(r'\s*(namespace\b[^{]*{\s*)+(//.*)?$|'
  91. r'\s*(U_NAMESPACE_BEGIN)|'
  92. r'\s*(HASH_NAMESPACE_DECLARATION_START)')
  93. _NAMESPACE_END_RE = re.compile(r'\s*(})|'
  94. r'\s*(U_NAMESPACE_END)|'
  95. r'\s*(HASH_NAMESPACE_DECLARATION_END)')
  96. # The group (in parens) holds the unique 'key' identifying this #include.
  97. _INCLUDE_RE = re.compile(r'\s*#\s*include\s+([<"][^"">]+[>"])')
  98. # We don't need this to actually match forward-declare lines (we get
  99. # that information from the iwyu input), but we do need an RE here to
  100. # serve as an index to _LINE_TYPES. So we use an RE that never matches.
  101. _FORWARD_DECLARE_RE = re.compile(r'$.FORWARD_DECLARE_RE')
  102. # Likewise, used to mark an '#ifdef' line of a header guard, or other
  103. # #ifdef that covers an entire file.
  104. _HEADER_GUARD_RE = re.compile(r'$.HEADER_GUARD_RE')
  105. # Marks the '#define' line that comes after a header guard. Since we
  106. # know the previous line was a header-guard line, we're not that picky
  107. # about this one.
  108. _HEADER_GUARD_DEFINE_RE = re.compile(r'\s*#\s*define\s+')
  109. # We annotate every line in the source file by the re it matches, or None.
  110. # Note that not all of the above RE's are represented here; for instance,
  111. # we fold _C_COMMENT_START_RE and _C_COMMENT_END_RE into _COMMENT_LINE_RE.
  112. _LINE_TYPES = [_COMMENT_LINE_RE, _BLANK_LINE_RE,
  113. _NAMESPACE_START_RE, _NAMESPACE_END_RE,
  114. _IF_RE, _ELSE_RE, _ENDIF_RE,
  115. _INCLUDE_RE, _FORWARD_DECLARE_RE,
  116. _HEADER_GUARD_RE, _HEADER_GUARD_DEFINE_RE,
  117. ]
  118. # A regexp matching #include lines that should be a barrier for
  119. # sorting -- that is, we should never reorganize the code so an
  120. # #include that used to come before this line now comes after, or vice
  121. # versa. This can be used for 'fragile' #includes that require other
  122. # #includes to happen before them to function properly.
  123. # (Note that the barrier has no effect on where new #includes are
  124. # added; it just affects the reordering of existing #includes.)
  125. _BARRIER_INCLUDES = re.compile(r'^\s*#\s*include\s+(<linux/)')
  126. def _MayBeHeaderFile(filename):
  127. """Tries to figure out if filename is a C++ header file. Defaults to yes."""
  128. # Header files have all sorts of extensions: .h, .hpp, .hxx, or no
  129. # extension at all. So we say everything is a header file unless it
  130. # has a known extension that's not.
  131. extension = os.path.splitext(filename)[1]
  132. return extension not in ('.c', '.cc', '.cxx', '.cpp', '.C', '.CC')
  133. class FixIncludesError(Exception):
  134. pass
  135. class IWYUOutputRecord(object):
  136. """Information that the iwyu output file has about one source file."""
  137. def __init__(self, filename):
  138. self.filename = filename
  139. # A set of integers.
  140. self.lines_to_delete = set()
  141. # A set of integer line-numbers, for each #include iwyu saw that
  142. # is marked with a line number. This is usually not an exhaustive
  143. # list of include-lines, but that's ok because we only use this
  144. # data structure for sanity checking: we double-check with our own
  145. # analysis that these lines are all # #include lines. If not, we
  146. # know the iwyu data is likely out of date, and we complain. So
  147. # more data here is always welcome, but not essential.
  148. self.some_include_lines = set()
  149. # A set of integer line-number spans [start_line, end_line), for
  150. # each forward-declare iwyu saw. iwyu reports line numbers for
  151. # every forward-declare it sees in the source code. (It won't
  152. # report, though, forward-declares inside '#if 0' or similar.)
  153. self.seen_forward_declare_lines = set()
  154. # A set of each line in the iwyu 'add' section.
  155. self.includes_and_forward_declares_to_add = set()
  156. # A map from the include filename (including ""s or <>s) to the
  157. # full line as given by iwyu, which includes comments that iwyu
  158. # has put next to the #include. This holds both 'to-add' and
  159. # 'to-keep' #includes. If flags.comments is False, the comments
  160. # are removed before adding to this list.
  161. self.full_include_lines = {}
  162. def Merge(self, other):
  163. """Merges other with this one. They must share a filename.
  164. This function is intended to be used when we see two iwyu records
  165. in the input, both for the same file. We can merge the two together.
  166. We are conservative: we union the lines to add, and intersect the
  167. lines to delete.
  168. Arguments:
  169. other: an IWYUOutputRecord to merge into this one.
  170. It must have the same value for filename that self does.
  171. """
  172. assert self.filename == other.filename, "Can't merge distinct files"
  173. self.lines_to_delete.intersection_update(other.lines_to_delete)
  174. self.some_include_lines.update(other.some_include_lines)
  175. self.seen_forward_declare_lines.update(other.seen_forward_declare_lines)
  176. self.includes_and_forward_declares_to_add.update(
  177. other.includes_and_forward_declares_to_add)
  178. self.full_include_lines.update(other.full_include_lines)
  179. def HasContentfulChanges(self):
  180. """Returns true iff this record has at least one add or delete."""
  181. return (self.includes_and_forward_declares_to_add or
  182. self.lines_to_delete)
  183. def __str__(self):
  184. return ('--- iwyu record ---\n FILENAME: %s\n LINES TO DELETE: %s\n'
  185. ' (SOME) INCLUDE LINES: %s\n (SOME) FWD-DECL LINES: %s\n'
  186. ' TO ADD: %s\n ALL INCLUDES: %s\n---\n'
  187. % (self.filename, self.lines_to_delete,
  188. self.some_include_lines, self.seen_forward_declare_lines,
  189. self.includes_and_forward_declares_to_add,
  190. self.full_include_lines))
  191. class IWYUOutputParser(object):
  192. """Parses the lines in iwyu output corresponding to one source file."""
  193. # iwyu adds this comment to some lines to map them to the source file.
  194. _LINE_NUMBERS_COMMENT_RE = re.compile(r'\s*// lines ([0-9]+)-([0-9]+)')
  195. # The output of include-what-you-use has sections that indicate what
  196. # #includes and forward-declares should be added to the output file,
  197. # what should be removed, and what the end result is. The first line
  198. # of each section also has the filename.
  199. _ADD_SECTION_RE = re.compile(r'^(.*) should add these lines:$')
  200. _REMOVE_SECTION_RE = re.compile(r'^(.*) should remove these lines:$')
  201. _TOTAL_SECTION_RE = re.compile(r'^The full include-list for ([^:]*):$')
  202. _SECTION_END_RE = re.compile(r'^---$')
  203. # Alternately, if a file does not need any iwyu modifications (though
  204. # it still may need its #includes sorted), iwyu will emit this:
  205. _NO_EDITS_RE = re.compile(r'^\((.*) has correct #includes/fwd-decls\)$')
  206. _RE_TO_NAME = {_ADD_SECTION_RE: 'add',
  207. _REMOVE_SECTION_RE: 'remove',
  208. _TOTAL_SECTION_RE: 'total',
  209. _SECTION_END_RE: 'end',
  210. _NO_EDITS_RE: 'no_edits',
  211. }
  212. # A small state-transition machine. key==None indicates the start
  213. # state. value==None means that the key is an end state (that is,
  214. # its presence indicates the record is finished).
  215. _EXPECTED_NEXT_RE = {
  216. None: frozenset([_ADD_SECTION_RE, _NO_EDITS_RE]),
  217. _ADD_SECTION_RE: frozenset([_REMOVE_SECTION_RE]),
  218. _REMOVE_SECTION_RE: frozenset([_TOTAL_SECTION_RE]),
  219. _TOTAL_SECTION_RE: frozenset([_SECTION_END_RE]),
  220. _SECTION_END_RE: None,
  221. _NO_EDITS_RE: None,
  222. }
  223. def __init__(self):
  224. # This is set to one of the 'section' REs above. None is the start-state.
  225. self.current_section = None
  226. self.filename = '<unknown file>'
  227. self.lines_by_section = {} # key is an RE, value is a list of lines
  228. def _ProcessOneLine(self, line):
  229. """Reads one line of input, updates self, and returns False at EORecord.
  230. If the line matches one of the hard-coded section names, updates
  231. self.filename and self.current_section. Otherwise, the line is
  232. taken to be a member of the currently active section, and is added
  233. to self.lines_by_section.
  234. Arguments:
  235. line: one line from the iwyu input file.
  236. Returns:
  237. False if the line is the end-of-section marker, True otherwise.
  238. Raises:
  239. FixIncludesError: if there is an out-of-order section or
  240. mismatched filename.
  241. """
  242. line = line.rstrip() # don't worry about line endings
  243. if not line: # just ignore blank lines
  244. return True
  245. for (section_re, section_name) in self._RE_TO_NAME.iteritems():
  246. m = section_re.search(line)
  247. if m:
  248. # Check or set the filename (if the re has a group, it's for filename).
  249. if section_re.groups >= 1:
  250. this_filename = m.group(1)
  251. if (self.current_section is not None and
  252. this_filename != self.filename):
  253. raise FixIncludesError('"%s" section for %s comes after "%s" for %s'
  254. % (section_name, this_filename,
  255. self._RE_TO_NAME[self.current_section],
  256. self.filename))
  257. self.filename = this_filename
  258. # Check and set the new section we're entering.
  259. if section_re not in self._EXPECTED_NEXT_RE[self.current_section]:
  260. if self.current_section is None:
  261. raise FixIncludesError('%s: "%s" section unexpectedly comes first'
  262. % (self.filename, section_name))
  263. else:
  264. raise FixIncludesError('%s: "%s" section unexpectedly follows "%s"'
  265. % (self.filename, section_name,
  266. self._RE_TO_NAME[self.current_section]))
  267. self.current_section = section_re
  268. # We're done parsing this record if this section has nothing after it.
  269. return self._EXPECTED_NEXT_RE[self.current_section] is not None
  270. # We're not starting a new section, so just add to the current section.
  271. # We ignore lines before section-start, they're probably things like
  272. # compiler messages ("Compiling file foo").
  273. if self.current_section is not None:
  274. self.lines_by_section.setdefault(self.current_section, []).append(line)
  275. return True
  276. def ParseOneRecord(self, iwyu_output, flags):
  277. """Given a file object with output from an iwyu run, return per file info.
  278. For each source file that iwyu_output mentions (because iwyu was run on
  279. it), we return a structure holding the information in IWYUOutputRecord:
  280. 1) What file these changes apply to
  281. 2) What line numbers hold includes/fwd-declares to remove
  282. 3) What includes/fwd-declares to add
  283. 4) Ordering information for includes and fwd-declares
  284. Arguments:
  285. iwyu_output: a File object returning lines from an iwyu run
  286. flags: commandline flags, as parsed by optparse. We use
  287. flags.comments, which controls whether we output comments
  288. generated by iwyu.
  289. Returns:
  290. An IWYUOutputRecord object, or None at EOF.
  291. Raises:
  292. FixIncludesError: for malformed-looking lines in the iwyu output.
  293. """
  294. for line in iwyu_output:
  295. if not self._ProcessOneLine(line): # returns False at end-of-record
  296. break
  297. else: # for/else
  298. return None # at EOF
  299. # Now set up all the fields in an IWYUOutputRecord.
  300. # IWYUOutputRecord.filename
  301. retval = IWYUOutputRecord(self.filename)
  302. # IWYUOutputRecord.lines_to_delete
  303. for line in self.lines_by_section.get(self._REMOVE_SECTION_RE, []):
  304. m = self._LINE_NUMBERS_COMMENT_RE.search(line)
  305. if not m:
  306. raise FixIncludesError('line "%s" (for %s) has no line number'
  307. % (line, self.filename))
  308. # The RE is of the form [start_line, end_line], inclusive.
  309. for line_number in xrange(int(m.group(1)), int(m.group(2)) + 1):
  310. retval.lines_to_delete.add(line_number)
  311. # IWYUOutputRecord.some_include_lines
  312. for line in (self.lines_by_section.get(self._REMOVE_SECTION_RE, []) +
  313. self.lines_by_section.get(self._TOTAL_SECTION_RE, [])):
  314. if not _INCLUDE_RE.match(line):
  315. continue
  316. m = self._LINE_NUMBERS_COMMENT_RE.search(line)
  317. if not m:
  318. continue # not all #include lines have line numbers, but some do
  319. for line_number in xrange(int(m.group(1)), int(m.group(2)) + 1):
  320. retval.some_include_lines.add(line_number)
  321. # IWYUOutputRecord.seen_forward_declare_lines
  322. for line in (self.lines_by_section.get(self._REMOVE_SECTION_RE, []) +
  323. self.lines_by_section.get(self._TOTAL_SECTION_RE, [])):
  324. # Everything that's not an #include is a forward-declare.
  325. if line.startswith('- '): # the 'remove' lines all start with '- '.
  326. line = line[len('- '):]
  327. if _INCLUDE_RE.match(line):
  328. continue
  329. m = self._LINE_NUMBERS_COMMENT_RE.search(line)
  330. if m:
  331. retval.seen_forward_declare_lines.add((int(m.group(1)),
  332. int(m.group(2))+1))
  333. # IWYUOutputRecord.includes_and_forward_declares_to_add
  334. for line in self.lines_by_section.get(self._ADD_SECTION_RE, []):
  335. line = _COMMENT_RE.sub('', line)
  336. retval.includes_and_forward_declares_to_add.add(line)
  337. # IWYUOutputRecord.full_include_lines
  338. for line in self.lines_by_section.get(self._TOTAL_SECTION_RE, []):
  339. m = _INCLUDE_RE.match(line)
  340. if m:
  341. if not flags.comments:
  342. line = _COMMENT_RE.sub('', line) # pretend there were no comments
  343. else:
  344. # Just remove '// line XX': that's iwyu metadata, not a real comment
  345. line = self._LINE_NUMBERS_COMMENT_RE.sub('', line)
  346. retval.full_include_lines[m.group(1)] = line
  347. return retval
  348. class LineInfo(object):
  349. """Information about a single line of a source file."""
  350. def __init__(self, line):
  351. """Initializes the content of the line, but no ancillary fields."""
  352. # The content of the line in the input file
  353. self.line = line
  354. # The 'type' of the line. The 'type' is one of the regular
  355. # expression objects in _LINE_TYPES, or None for any line that
  356. # does not match any regular expression in _LINE_TYPES.
  357. self.type = None
  358. # True if no lines processed before this one have the same type
  359. # as this line.
  360. self.is_first_line_of_this_type = False
  361. # Set to true if we want to delete/ignore this line in the output
  362. # (for instance, because iwyu says to delete this line). At the
  363. # start, the only line to delete is the 'dummy' line 0.
  364. self.deleted = self.line is None
  365. # If this line is an #include or a forward-declare, gives a
  366. # [begin,end) pair saying the 'span' this line is part of. We do
  367. # this for two types of span: the move span (an #include or
  368. # forward declare, along with any preceding comments) and the
  369. # reorder span (a continguous block of move-spans, connected only
  370. # by blank lines and comments). For lines that are not an
  371. # #include or forward-declare, these may have an arbitrary value.
  372. self.move_span = None
  373. self.reorder_span = None
  374. # If this line is an #include or a forward-declare, gives the
  375. # 'key' of the line. For #includes it is the filename included,
  376. # including the ""s or <>s. For a forward-declare it's the name
  377. # of the class/struct. For other types of lines, this is None.
  378. self.key = None
  379. def __str__(self):
  380. if self.deleted:
  381. line = 'XX-%s-XX' % self.line
  382. else:
  383. line = '>>>%s<<<' % self.line
  384. if self.type is None:
  385. type_id = None
  386. else:
  387. type_id = _LINE_TYPES.index(self.type)
  388. return ('%s\n -- type: %s (key: %s). move_span: %s. reorder_span: %s'
  389. % (line, type_id, self.key, self.move_span, self.reorder_span))
  390. def _ReadFile(filename):
  391. """Read from filename and return a list of file lines."""
  392. try:
  393. return open(filename).read().splitlines()
  394. except (IOError, OSError), why:
  395. print "Skipping '%s': %s" % (filename, why)
  396. return None
  397. def _ReadWriteableFile(filename, ignore_writeable):
  398. """Read from filename and return a list of file lines.
  399. Given a filename, if the file is found and is writable, read
  400. the file contents and return it as a list of lines (newlines
  401. removed). If the file is not found or is not writable, or if
  402. there is another IO error, return None.
  403. Arguments:
  404. filename: the name of the file to read.
  405. ignore_writeable: if True, don't check whether the file is writeable;
  406. return the contents anyway.
  407. Returns:
  408. A list of lines (without trailing newline) from filename, or None
  409. if the file is not writable, or cannot be read.
  410. """
  411. if os.access(filename, os.W_OK) or ignore_writeable:
  412. return _ReadFile(filename)
  413. return None
  414. def _WriteFileContentsToFileObject(f, file_lines):
  415. """Write the given file-lines to the file."""
  416. f.write('\n'.join(file_lines))
  417. f.write('\n')
  418. def _WriteFileContents(filename, file_lines):
  419. """Write the given file-lines to the file."""
  420. try:
  421. f = open(filename, 'w')
  422. try:
  423. _WriteFileContentsToFileObject(f, file_lines)
  424. finally:
  425. f.close()
  426. except (IOError, OSError), why:
  427. print "Error writing '%s': %s" % (filename, why)
  428. def _CreateCommandLine(command, args):
  429. """Join the command with the args in a shell-quoted way."""
  430. ret = '%s %s' % (command, ' '.join(map(pipes.quote, args)))
  431. print 'Running:', ret
  432. return ret
  433. def _GetCommandOutputLines(command, args):
  434. """Return an iterable over the output lines of the given shell command."""
  435. full_command = _CreateCommandLine(command, args)
  436. proc = subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE)
  437. return proc.stdout
  438. def _RunCommand(command, args):
  439. """Run the given shell command."""
  440. for line in _GetCommandOutputLines(command, args):
  441. print line,
  442. def _GetCommandOutputWithInput(command, stdin_text):
  443. """Return the output of the given command fed the stdin_text."""
  444. print command
  445. proc = subprocess.Popen(command,
  446. stdin=subprocess.PIPE,
  447. stdout=subprocess.PIPE,
  448. shell=True)
  449. return proc.communicate(input=stdin_text)[0]
  450. def PrintFileDiff(old_file_contents, new_file_contents):
  451. """Print a unified diff between files, specified as lists of lines."""
  452. diff = difflib.unified_diff(old_file_contents, new_file_contents)
  453. # skip the '--- <filename>/+++ <filename>' lines at the start
  454. try:
  455. diff.next()
  456. diff.next()
  457. print '\n'.join(diff)
  458. except StopIteration:
  459. pass
  460. def _MarkHeaderGuardIfPresent(file_lines):
  461. """If any line in file_lines is a header-guard, mark it in file_lines.
  462. We define a header-guard as follows: an #ifdef where there is
  463. nothing contentful before or after the #ifdef. Also, the #ifdef
  464. should have no #elif in it (though we don't currently test that).
  465. This catches the common case of an 'ifdef guard' in .h file, such
  466. as '#ifndef FOO_H\n#define FOO_H\n...contents...\n#endif', but it
  467. can also catch other whole-program #ifdefs, such as
  468. '#ifdef __linux\n...\n#endif'. The issue here is that if an #ifdef
  469. encloses the entire file, then we are willing to put new
  470. #includes/fwd-declares inside the #ifdef (which normally we
  471. wouldn't do). So we want to mark such #ifdefs with a special label.
  472. If we find such an #ifdef line -- and a single file can have at most
  473. one -- we change its type to a special type for header guards.
  474. Arguments:
  475. file_lines: an array of LineInfo objects with .type filled in.
  476. """
  477. # Pass over blank lines or comments at the top of the file.
  478. i = 0
  479. for i in xrange(len(file_lines)):
  480. if (not file_lines[i].deleted and
  481. file_lines[i].type not in [_COMMENT_LINE_RE, _BLANK_LINE_RE]):
  482. break
  483. else: # for/else: got to EOF without finding any non-blank/comment lines
  484. return
  485. # This next line is the candidate header guard-line.
  486. ifdef_start = i
  487. if file_lines[ifdef_start].type != _IF_RE:
  488. # Not a header guard, just return without doing anything.
  489. return
  490. # Find the end of this ifdef, to see if it's really a header guard..
  491. ifdef_depth = 0
  492. for ifdef_end in xrange(ifdef_start, len(file_lines)):
  493. if file_lines[ifdef_end].deleted:
  494. continue
  495. if file_lines[ifdef_end].type == _IF_RE:
  496. ifdef_depth += 1
  497. elif file_lines[ifdef_end].type == _ENDIF_RE:
  498. ifdef_depth -= 1
  499. if ifdef_depth == 0: # The end of our #ifdef!
  500. break
  501. else: # for/else
  502. return False # Weird: never found a close to this #ifdef
  503. # Finally, all the lines after the end of the ifdef must be blank or comments.
  504. for i in xrange(ifdef_end + 1, len(file_lines)):
  505. if (not file_lines[i].deleted and
  506. file_lines[i].type not in [_COMMENT_LINE_RE, _BLANK_LINE_RE]):
  507. return
  508. # We passed the gauntlet!
  509. file_lines[ifdef_start].type = _HEADER_GUARD_RE
  510. # And the line after the header guard #ifdef is the '#define' (usually).
  511. if _HEADER_GUARD_DEFINE_RE.match(file_lines[ifdef_start + 1].line):
  512. file_lines[ifdef_start+1].type = _HEADER_GUARD_DEFINE_RE
  513. def _CalculateLineTypesAndKeys(file_lines, iwyu_record):
  514. """Fills file_line's type and key fields, where the 'type' is a regexp object.
  515. We match each line (line_info.line) against every regexp in
  516. _LINE_TYPES, and assign the first that matches, or None if none
  517. does. We also use iwyu_record's some_include_lines and
  518. seen_forward_declare_lines to identify those lines. In fact,
  519. that's the only data source we use for forward-declare lines.
  520. Sets file_line.type and file_line.is_first_line_of_this_type for
  521. each file_line in file_lines.
  522. Arguments:
  523. file_lines: an array of LineInfo objects with .line fields filled in.
  524. iwyu_record: the IWYUOutputRecord struct for this source file.
  525. Raises:
  526. FixIncludesError: if iwyu_record's line-number information is
  527. is inconsistent with what we see in the file. (For instance,
  528. it says line 12 is an #include, but we say it's a blank line,
  529. or the file only has 11 lines.)
  530. """
  531. seen_types = set()
  532. in_c_style_comment = False
  533. for line_info in file_lines:
  534. if line_info.line is None:
  535. line_info.type = None
  536. elif _C_COMMENT_START_RE.match(line_info.line):
  537. # Note: _C_COMMENT_START_RE only matches a comment at the start
  538. # of a line. Comments in the middle of a line are ignored.
  539. # This can cause problems with multi-line comments that start
  540. # in the middle of the line, but that's hopefully quite rare.
  541. # TODO(csilvers): check for that case.
  542. m = _C_COMMENT_END_RE.match(line_info.line)
  543. if not m: # comment continues onto future lines
  544. line_info.type = _COMMENT_LINE_RE
  545. in_c_style_comment = True
  546. elif not m.group(1): # comment extends across entire line (only)
  547. line_info.type = _COMMENT_LINE_RE
  548. else: # comment takes only part of line, treat as content
  549. # TODO(csilvers): this mis-diagnoses lines like '/*comment*/class Foo;'
  550. line_info.type = None
  551. elif in_c_style_comment and _C_COMMENT_END_RE.match(line_info.line):
  552. line_info.type = _COMMENT_LINE_RE
  553. in_c_style_comment = False
  554. elif in_c_style_comment:
  555. line_info.type = _COMMENT_LINE_RE
  556. else:
  557. for type_re in _LINE_TYPES:
  558. # header-guard-define-re has a two-part decision criterion: it
  559. # matches the RE, *and* it comes after a header guard line.
  560. # That's too complex to figure out now, so we skip over it now
  561. # and fix it up later in _MarkHeaderGuardIfPresent().
  562. if type_re in (_HEADER_GUARD_DEFINE_RE,):
  563. continue
  564. m = type_re.match(line_info.line)
  565. if m:
  566. line_info.type = type_re
  567. if type_re == _INCLUDE_RE:
  568. line_info.key = m.group(1) # get the 'key' for the #include.
  569. break
  570. else: # for/else
  571. line_info.type = None # means we didn't match any re
  572. line_info.is_first_line_of_this_type = (line_info.type not in seen_types)
  573. seen_types.add(line_info.type)
  574. # Now double-check against iwyu that we got all the #include lines right.
  575. for line_number in iwyu_record.some_include_lines:
  576. if file_lines[line_number].type != _INCLUDE_RE:
  577. raise FixIncludesError('iwyu line number %s:%d (%s) is not an #include'
  578. % (iwyu_record.filename, line_number,
  579. file_lines[line_number].line))
  580. # We depend entirely on the iwyu_record for the forward-declare lines.
  581. for (start_line, end_line) in iwyu_record.seen_forward_declare_lines:
  582. for line_number in xrange(start_line, end_line):
  583. if line_number >= len(file_lines):
  584. raise FixIncludesError('iwyu line number %s:%d is past file-end'
  585. % (iwyu_record.filename, line_number))
  586. file_lines[line_number].type = _FORWARD_DECLARE_RE
  587. # While we're at it, let's do a bit more sanity checking on iwyu_record.
  588. for line_number in iwyu_record.lines_to_delete:
  589. if line_number >= len(file_lines):
  590. raise FixIncludesError('iwyu line number %s:%d is past file-end'
  591. % (iwyu_record.filename, line_number))
  592. elif file_lines[line_number].type not in (_INCLUDE_RE,
  593. _FORWARD_DECLARE_RE):
  594. raise FixIncludesError('iwyu line number %s:%d (%s) is not'
  595. ' an #include or forward declare'
  596. % (iwyu_record.filename, line_number,
  597. file_lines[line_number].line))
  598. # Check if this file has a header guard, which for our purposes is
  599. # an #ifdef (or #if) that covers an entire source file. Usually
  600. # this will be a standard .h header-guard, but it could be something
  601. # like '#if __linux/#endif'. The point here is that if an #ifdef
  602. # encloses the entire file, then we are willing to put new
  603. # #includes/fwd-declares inside the #ifdef (which normally we
  604. # wouldn't do). So we mark such #ifdefs with a special label.
  605. _MarkHeaderGuardIfPresent(file_lines)
  606. def _PreviousNondeletedLine(file_lines, line_number):
  607. """Returns the line number of the previous not-deleted line, or None."""
  608. for line_number in xrange(line_number - 1, -1, -1):
  609. if not file_lines[line_number].deleted:
  610. return line_number
  611. return None
  612. def _NextNondeletedLine(file_lines, line_number):
  613. """Returns the line number of the next not-deleted line, or None."""
  614. for line_number in xrange(line_number + 1, len(file_lines)):
  615. if not file_lines[line_number].deleted:
  616. return line_number
  617. return None
  618. def _LineNumberStartingPrecedingComments(file_lines, line_number):
  619. """Returns the line-number for the comment-lines preceding the given linenum.
  620. Looking at file_lines, look at the lines immediately preceding the
  621. given line-number. If they're comment lines, return the first line
  622. of the comment lines preceding the given line. Otherwise, return
  623. the given line number.
  624. As a special case, if the comments go all the way up to the first
  625. line of the file (line 1), we assume they're comment lines, which
  626. are special -- they're not associated with any source code line --
  627. and we return line_number in that case.
  628. Arguments:
  629. file_lines: an array of LineInfo objects, with .type fields filled in.
  630. line_number: an index into file_lines.
  631. Returns:
  632. The first line number of the preceding comments, or line_number
  633. if there are no preceding comments or they appear to be a
  634. top-of-file copyright notice.
  635. """
  636. retval = line_number
  637. while retval > 0 and file_lines[retval - 1].type == _COMMENT_LINE_RE:
  638. retval -= 1
  639. if retval <= 1: # top-of-line comments
  640. retval = line_number # so ignore all the comment lines
  641. return retval
  642. def _CalculateMoveSpans(file_lines, forward_declare_spans):
  643. """Fills each input_line's move_span field.
  644. A 'move span' is a range of lines (from file_lines) that includes
  645. an #include or forward-declare, and all the comments preceding it.
  646. It is the unit we would move if we decided to move (or delete) this
  647. #include or forward-declare.
  648. For lines of type _INCLUDE_RE or _FORWARD_DECLARE_RE, the move span
  649. is set to the tuple [start_of_span, end_of_span). All other lines
  650. have the move span kept at None.
  651. Arguments:
  652. file_lines: an array of LineInfo objects, with .type fields filled in.
  653. forward_declare_spans: a set of line-number pairs
  654. [start_line, end_line), each representing a single namespace.
  655. In practice this comes from iwyu_record.seen_forward_declare_lines.
  656. """
  657. # First let's do #includes.
  658. for line_number in xrange(len(file_lines)):
  659. if file_lines[line_number].type == _INCLUDE_RE:
  660. span_begin = _LineNumberStartingPrecedingComments(file_lines, line_number)
  661. for i in xrange(span_begin, line_number + 1):
  662. file_lines[i].move_span = (span_begin, line_number + 1)
  663. # Now forward-declares. These spans come as input to this function.
  664. for (span_begin, span_end) in forward_declare_spans:
  665. span_begin = _LineNumberStartingPrecedingComments(file_lines, span_begin)
  666. for i in xrange(span_begin, span_end):
  667. file_lines[i].move_span = (span_begin, span_end)
  668. def _ContainsBarrierInclude(file_lines, line_range):
  669. """Returns true iff some line in [line_range[0], line_range[1]) is BARRIER."""
  670. for line_number in apply(xrange, line_range):
  671. if (not file_lines[line_number].deleted and
  672. _BARRIER_INCLUDES.search(file_lines[line_number].line)):
  673. return True
  674. return False
  675. def _LinesAreAllBlank(file_lines, start_line, end_line):
  676. """Returns true iff all lines in [start_line, end_line) are blank/deleted."""
  677. for line_number in xrange(start_line, end_line):
  678. if (not file_lines[line_number].deleted and
  679. file_lines[line_number].type != _BLANK_LINE_RE):
  680. return False
  681. return True
  682. def _CalculateReorderSpans(file_lines):
  683. """Fills each input_line's reorder_span field.
  684. A 'reorder span' is a range of lines (from file_lines) that only has
  685. #includes and forward-declares in it (and maybe blank lines, and
  686. comments associated with #includes or forward-declares). In
  687. particular, it does not include any "real code" besides #includes
  688. and forward-declares: no functions, no static variable assignment,
  689. no macro #defines, no nothing. We are willing to reorder #includes
  690. and namespaces freely inside a reorder span.
  691. Calculating reorder_span is easy: they're just the union of
  692. contiguous move-spans (with perhaps blank lines and comments
  693. thrown in), because move-spans share the 'no actual code'
  694. requirement.
  695. There's one exception: if any move-span matches the
  696. _BARRIER_INCLUDES regexp, it means that we should consider that
  697. move-span to be a 'barrier': nothing should get reordered from one
  698. side of that move-span to the other. (This is used for #includes
  699. that depend on other #includes being before them to function
  700. properly.) We do that by putting them into their own reorder span.
  701. For lines of type _INCLUDE_RE or _FORWARD_DECLARE_RE, the reorder
  702. span is set to the tuple [start_of_span, end_of_span). All other
  703. lines have an arbitrary value for the reorder span.
  704. Arguments:
  705. file_lines: an array of LineInfo objects with .type and .move_span
  706. fields filled in.
  707. """
  708. # Happily, move_spans are disjoint. Just make sure they're sorted and unique.
  709. move_spans = [s.move_span for s in file_lines if s.move_span is not None]
  710. sorted_move_spans = sorted(set(move_spans))
  711. i = 0
  712. while i < len(sorted_move_spans):
  713. reorder_span_start = sorted_move_spans[i][0]
  714. # If we're a 'nosort' include, we're always in a reorder span of
  715. # our own. Otherwise, add in the next move span if we're
  716. # connected to it only by blank lines.
  717. if not _ContainsBarrierInclude(file_lines, sorted_move_spans[i]):
  718. while i < len(sorted_move_spans) - 1:
  719. move_span_end = sorted_move_spans[i][1]
  720. next_move_span_start = sorted_move_spans[i+1][0]
  721. if (_LinesAreAllBlank(file_lines, move_span_end, next_move_span_start)
  722. and not _ContainsBarrierInclude(file_lines, sorted_move_spans[i+1])):
  723. i += 1
  724. else:
  725. break
  726. reorder_span_end = sorted_move_spans[i][1]
  727. # We'll map every line in the span to the span-extent.
  728. for line_number in xrange(reorder_span_start, reorder_span_end):
  729. file_lines[line_number].reorder_span = (reorder_span_start,
  730. reorder_span_end)
  731. i += 1
  732. def ParseOneFile(f, iwyu_record):
  733. """Given a file object, read and classify the lines of the file.
  734. For each file that iwyu_output mentions, we return a list of LineInfo
  735. objects, which is a parsed version of each line, including not only
  736. its content but its 'type', its 'key', etc.
  737. Arguments:
  738. f: an iterable object returning lines from a file.
  739. iwyu_record: the IWYUOutputRecord struct for this source file.
  740. Returns:
  741. An array of LineInfo objects. The first element is always a dummy
  742. element, so the first line of the file is at retval[1], matching
  743. the way iwyu counts line numbers.
  744. """
  745. file_lines = [LineInfo(None)]
  746. for line in f:
  747. file_lines.append(LineInfo(line))
  748. _CalculateLineTypesAndKeys(file_lines, iwyu_record)
  749. _CalculateMoveSpans(file_lines, iwyu_record.seen_forward_declare_lines)
  750. _CalculateReorderSpans(file_lines)
  751. return file_lines
  752. def _DeleteEmptyNamespaces(file_lines):
  753. """Delete namespaces with nothing in them.
  754. Empty namespaces could be caused by transformations that removed
  755. forward-declarations:
  756. namespace foo {
  757. class Myclass;
  758. }
  759. ->
  760. namespace foo {
  761. }
  762. We want to get rid of the 'empty' namespace in this case.
  763. This routine 'deletes' lines by setting their 'deleted' field to True.
  764. Arguments:
  765. file_lines: an array of LineInfo objects with .type fields filled in.
  766. Returns:
  767. The number of namespaces deleted.
  768. """
  769. num_namespaces_deleted = 0
  770. start_line = 0
  771. while start_line < len(file_lines):
  772. line_info = file_lines[start_line]
  773. if line_info.deleted or line_info.type != _NAMESPACE_START_RE:
  774. start_line += 1
  775. continue
  776. # Because multiple namespaces can be on one line
  777. # ("namespace foo { namespace bar { ..."), we need to count.
  778. # We use the max because line may have 0 '{'s if it's a macro.
  779. # TODO(csilvers): ignore { in comments.
  780. namespace_depth = max(line_info.line.count('{'), 1)
  781. end_line = start_line + 1
  782. while end_line < len(file_lines):
  783. line_info = file_lines[end_line]
  784. if line_info.deleted:
  785. end_line += 1
  786. elif line_info.type in (_COMMENT_LINE_RE, _BLANK_LINE_RE):
  787. end_line += 1 # ignore blank lines
  788. elif line_info.type == _NAMESPACE_START_RE: # nested namespace
  789. namespace_depth += max(line_info.line.count('{'), 1)
  790. end_line += 1
  791. elif line_info.type == _NAMESPACE_END_RE:
  792. namespace_depth -= max(line_info.line.count('}'), 1)
  793. end_line += 1
  794. if namespace_depth <= 0:
  795. # Delete any comments preceding this namespace as well.
  796. start_line = _LineNumberStartingPrecedingComments(file_lines,
  797. start_line)
  798. # And also blank lines.
  799. while (start_line > 0 and
  800. file_lines[start_line-1].type == _BLANK_LINE_RE):
  801. start_line -= 1
  802. for line_number in xrange(start_line, end_line):
  803. file_lines[line_number].deleted = True
  804. num_namespaces_deleted += 1
  805. break
  806. else: # bail: we're at a line indicating this isn't an empty namespace
  807. end_line = start_line + 1 # rewind to try again with nested namespaces
  808. break
  809. start_line = end_line
  810. return num_namespaces_deleted
  811. def _DeleteEmptyIfdefs(file_lines):
  812. """Deletes ifdefs with nothing in them.
  813. This could be caused by transformations that removed #includes:
  814. #ifdef OS_WINDOWS
  815. # include <windows.h>
  816. #endif
  817. ->
  818. #ifdef OS_WINDOWS
  819. #endif
  820. We want to get rid of the 'empty' #ifdef in this case.
  821. We also handle 'empty' #ifdefs with #else, if both sides of
  822. the #else are empty. We also handle #ifndef and #if.
  823. This routine 'deletes' lines by replacing their content with None.
  824. Arguments:
  825. file_lines: an array of LineInfo objects with .type fields filled in.
  826. Returns:
  827. The number of ifdefs deleted.
  828. """
  829. num_ifdefs_deleted = 0
  830. start_line = 0
  831. while start_line < len(file_lines):
  832. if file_lines[start_line].type not in (_IF_RE, _HEADER_GUARD_RE):
  833. start_line += 1
  834. continue
  835. end_line = start_line + 1
  836. while end_line < len(file_lines):
  837. line_info = file_lines[end_line]
  838. if line_info.deleted:
  839. end_line += 1
  840. elif line_info.type in (_ELSE_RE, _COMMENT_LINE_RE, _BLANK_LINE_RE):
  841. end_line += 1 # ignore blank lines
  842. elif line_info.type == _ENDIF_RE:
  843. end_line += 1
  844. # Delete any comments preceding this #ifdef as well.
  845. start_line = _LineNumberStartingPrecedingComments(file_lines,
  846. start_line)
  847. # And also blank lines.
  848. while (start_line > 0 and
  849. file_lines[start_line-1].type == _BLANK_LINE_RE):
  850. start_line -= 1
  851. for line_number in xrange(start_line, end_line):
  852. file_lines[line_number].deleted = True
  853. num_ifdefs_deleted += 1
  854. break
  855. else: # bail: we're at a line indicating this isn't an empty ifdef
  856. end_line = start_line + 1 # rewind to try again with nested #ifdefs
  857. break
  858. start_line = end_line
  859. return num_ifdefs_deleted
  860. def _DeleteDuplicateLines(file_lines, line_ranges):
  861. """Goes through all lines in line_ranges, and if any are dups, deletes them.
  862. For all lines in line_ranges, if any is the same as a previously
  863. seen line, set its deleted bit to True. The purpose of line_ranges
  864. is to avoid lines in #ifdefs and namespaces, that may be identical
  865. syntactically but have different semantics. Ideally, line_ranges
  866. should include only 'top-level' lines.
  867. We ignore lines that consist only of comments (or are blank). We
  868. ignore end-of-line comments when comparing lines for equality.
  869. NOTE: Because our comment-finding RE is primitive, it's best if
  870. line_ranges covers only #include and forward-declare lines. In
  871. particular, it should not cover lines that may have C literal
  872. strings in them.
  873. Arguments:
  874. file_lines: an array of LineInfo objects.
  875. line_ranges: a list of [start_line, end_line) pairs.
  876. """
  877. seen_lines = set()
  878. for line_range in line_ranges:
  879. for line_number in apply(xrange, line_range):
  880. if file_lines[line_number].type in (_BLANK_LINE_RE, _COMMENT_LINE_RE):
  881. continue
  882. uncommented_line = _COMMENT_RE.sub('', file_lines[line_number].line)
  883. if uncommented_line in seen_lines:
  884. file_lines[line_number].deleted = True
  885. elif not file_lines[line_number].deleted:
  886. seen_lines.add(uncommented_line)
  887. def _DeleteExtraneousBlankLines(file_lines, line_range):
  888. """Deletes extraneous blank lines caused by line deletion.
  889. Here's a example file:
  890. class Foo { ... };
  891. class Bar;
  892. class Baz { ... }
  893. If we delete the "class Bar;" line, we also want to delete one of
  894. the blank lines around it, otherwise we leave two blank lines
  895. between Foo and Baz which looks bad. The idea is that if we have
  896. whitespace on both sides of a deleted span of code, the whitespace
  897. on one of the sides is 'extraneous'. In this case, we should delete
  898. not only 'class Bar;' but also the whitespace line below it. That
  899. leaves one blank line between Foo and Bar, like people would expect.
  900. We're careful to only delete the minimum of the number of blank
  901. lines that show up on either side. If 'class Bar' had one blank
  902. line before it, and one hundred after it, we'd only delete one blank
  903. line when we delete 'class Bar'. This matches user's expecatations.
  904. The situation can get tricky when two deleted spans touch (we might
  905. think it's safe to delete the whitespace between them when it's
  906. not). To be safe, we only do this check when an entire reorder-span
  907. has been deleted. So we check the given line_range, and only do
  908. blank-line deletion if every line in the range is deleted.
  909. Arguments:
  910. file_lines: an array of LineInfo objects, with .type filled in.
  911. line_range: a range [start_line, end_line). It should correspond
  912. to a reorder-span.
  913. """
  914. # First make sure the entire span is deleted.
  915. for line_number in apply(xrange, line_range):
  916. if not file_lines[line_number].deleted:
  917. return
  918. before_line = _PreviousNondeletedLine(file_lines, line_range[0])
  919. after_line = _NextNondeletedLine(file_lines, line_range[1] - 1)
  920. while (before_line and file_lines[before_line].type == _BLANK_LINE_RE and
  921. after_line and file_lines[after_line].type == _BLANK_LINE_RE):
  922. # OK, we've got whitespace on both sides of a deleted span. We
  923. # only want to keep whitespace on one side, so delete on the other.
  924. file_lines[after_line].deleted = True
  925. before_line = _PreviousNondeletedLine(file_lines, before_line)
  926. after_line = _NextNondeletedLine(file_lines, after_line)
  927. def _ShouldInsertBlankLine(decorated_move_span, next_decorated_move_span,
  928. file_lines, flags):
  929. """Returns true iff we should insert a blank line between the two spans.
  930. Given two decorated move-spans, of the form
  931. (reorder_range, kind, noncomment_lines, all_lines)
  932. returns true if we should insert a blank line between them. We
  933. always put a blank line when transitioning from an #include to a
  934. forward-declare and back. When the appropriate commandline flag is
  935. set, we also put a blank line between the 'main' includes (foo.h)
  936. and the C/C++ system includes, and another between the system
  937. includes and the rest of the Google includes.
  938. If the two move spans are in different reorder_ranges, that means
  939. the first move_span is at the end of a reorder range. In that case,
  940. a different rule for blank lines applies: if the next line is
  941. contentful (eg 'static int x = 5;'), or a namespace start, we want
  942. to insert a blank line to separate the move-span from the next
  943. block. When figuring out if the next line is contentful, we skip
  944. over comments.
  945. Arguments:
  946. decorated_move_span: a decorated_move_span we may want to put a blank
  947. line after.
  948. next_decorated_move_span: the next decorated_move_span, which may
  949. be a sentinel decorated_move_span at end-of-file.
  950. file_lines: an array of LineInfo objects with .deleted filled in.
  951. flags: commandline flags, as parsed by optparse. We use
  952. flags.blank_lines, which controls whether we put blank
  953. lines between different 'kinds' of #includes.
  954. Returns:
  955. true if we should insert a blank line after decorated_move_span.
  956. """
  957. # First handle the 'at the end of a reorder range' case.
  958. if decorated_move_span[0] != next_decorated_move_span[0]:
  959. next_line = _NextNondeletedLine(file_lines, decorated_move_span[0][1] - 1)
  960. # Skip over comments to figure out if the next line is contentful.
  961. while (next_line and next_line < len(file_lines) and
  962. file_lines[next_line].type == _COMMENT_LINE_RE):
  963. next_line += 1
  964. return (next_line and next_line < len(file_lines) and
  965. file_lines[next_line].type in (_NAMESPACE_START_RE, None))
  966. # We never insert a blank line between two spans of the same kind.
  967. # Nor do we ever insert a blank line at EOF.
  968. (this_kind, next_kind) = (decorated_move_span[1], next_decorated_move_span[1])
  969. if this_kind == next_kind or next_kind == _EOF_KIND:
  970. return False
  971. # We also never insert a blank line between C and C++-style #includes,
  972. # no matter what the flag value.
  973. if (this_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND] and
  974. next_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND]):
  975. return False
  976. # Handle the case we're going from an include to fwd declare or
  977. # back. If we get here, we can't both be fwd-declares, so it
  978. # suffices to check if either of us is.
  979. if this_kind == _FORWARD_DECLARE_KIND or next_kind == _FORWARD_DECLARE_KIND:
  980. return True
  981. # Now, depending on the flag, we insert a blank line whenever the
  982. # kind changes (we handled the one case where a changing kind
  983. # doesn't introduce a blank line, above).
  984. if flags.blank_lines:
  985. return this_kind != next_kind
  986. return False
  987. def _GetToplevelReorderSpans(file_lines):
  988. """Returns a sorted list of all reorder_spans not inside an #ifdef/namespace.
  989. This routine looks at all the reorder_spans in file_lines, ignores
  990. reorder spans inside #ifdefs and namespaces -- except for the 'header
  991. guard' ifdef that encapsulates an entire .h file -- and returns the
  992. rest in sorted order.
  993. Arguments:
  994. file_lines: an array of LineInfo objects with .type and
  995. .reorder_span filled in.
  996. Returns:
  997. A list of [start_line, end_line) reorder_spans.
  998. """
  999. in_ifdef = [False] * len(file_lines) # lines inside an #if
  1000. ifdef_depth = 0
  1001. for line_number in xrange(len(file_lines)):
  1002. line_info = file_lines[line_number]
  1003. if line_info.deleted:
  1004. continue
  1005. if line_info.type == _IF_RE: # does not cover the header-guard ifdef
  1006. ifdef_depth += 1
  1007. elif line_info.type == _ENDIF_RE:
  1008. ifdef_depth -= 1
  1009. if ifdef_depth > 0:
  1010. in_ifdef[line_number] = True
  1011. # Figuring out whether a } ends a namespace or some other languague
  1012. # construct is hard, so as soon as we see any 'contentful' line
  1013. # inside a namespace, we assume the entire rest of the file is in
  1014. # the namespace.
  1015. in_namespace = [False] * len(file_lines)
  1016. namespace_depth = 0
  1017. for line_number in xrange(len(file_lines)):
  1018. line_info = file_lines[line_number]
  1019. if line_info.deleted:
  1020. continue
  1021. if line_info.type == _NAMESPACE_START_RE:
  1022. # The 'max' is because the namespace-re may be a macro.
  1023. namespace_depth += max(line_info.line.count('{'), 1)
  1024. elif line_info.type == _NAMESPACE_END_RE:
  1025. namespace_depth -= max(line_info.line.count('}'), 1)
  1026. if namespace_depth > 0:
  1027. in_namespace[line_number] = True
  1028. if line_info.type is None:
  1029. for i in xrange(line_number, len(file_lines)): # rest of file
  1030. in_namespace[i] = True
  1031. break
  1032. reorder_spans = sorted(set([fl.reorder_span for fl in file_lines]))
  1033. good_reorder_spans = []
  1034. for reorder_span in reorder_spans:
  1035. if reorder_span is None:
  1036. continue
  1037. for line_number in apply(xrange, reorder_span):
  1038. if in_ifdef[line_number] or in_namespace[line_number]:
  1039. break
  1040. else: # for/else
  1041. good_reorder_spans.append(reorder_span) # never in ifdef or namespace
  1042. return good_reorder_spans
  1043. def _GetFirstNamespaceLevelReorderSpan(file_lines):
  1044. """Returns the first reorder-span inside a namespace, if it's easy to do.
  1045. This routine is meant to handle the simple case where code consists
  1046. of includes and forward-declares, and then a 'namespace
  1047. my_namespace'. We return the reorder span of the inside-namespace
  1048. forward-declares, which is a good place to insert new
  1049. inside-namespace forward-declares (rather than putting these new
  1050. forward-declares at the top level).
  1051. So it goes through the top of the file, stopping at the first
  1052. 'contentful' line. If that line has the form 'namespace <foo> {',
  1053. it then continues until it finds a forward-declare line, or a
  1054. non-namespace contentful line. In the former case, it figures out
  1055. the reorder-span this forward-declare line is part of, while in the
  1056. latter case it creates a new reorder-span. It returns
  1057. (enclosing_namespaces, reorder_span).
  1058. Arguments:
  1059. file_lines: an array of LineInfo objects with .type and
  1060. .reorder_span filled in.
  1061. Returns:
  1062. (None, None) if we could not find a first namespace-level
  1063. reorder-span, or (enclosing_namespaces, reorder_span), where
  1064. enclosing_namespaces is a string that looks like (for instance)
  1065. 'namespace ns1 { namespace ns2 {', and reorder-span is a
  1066. [start_line, end_line) pair.
  1067. """
  1068. simple_namespace_re = re.compile(r'^\s*namespace\s+([^{\s]+)\s*\{\s*(//.*)?$')
  1069. namespace_prefix = ''
  1070. for line_number in xrange(len(file_lines)):
  1071. line_info = file_lines[line_number]
  1072. if line_info.deleted:
  1073. continue
  1074. # If we're an empty line, just ignore us. Likewise with #include
  1075. # lines, which aren't 'contentful' for our purposes, and the
  1076. # header guard, which is (by definition) the only kind of #ifdef
  1077. # that we can be inside and still considered at the "top level".
  1078. if line_info.type in (_COMMENT_LINE_RE, _BLANK_LINE_RE, _INCLUDE_RE,
  1079. _HEADER_GUARD_RE, _HEADER_GUARD_DEFINE_RE):
  1080. continue
  1081. # If we're a 'contentful' line such as a (non-header-guard) #ifdef, bail.
  1082. elif line_info.type in (_IF_RE, _NAMESPACE_END_RE, _ELSE_RE, _ENDIF_RE,
  1083. None): # None is a 'normal' contentful line
  1084. # TODO(csilvers): we could probably keep going if there are no
  1085. # braces on the line. We could also keep track of our #ifdef
  1086. # depth instead of bailing on #else and #endif, and only accept
  1087. # the fwd-decl-inside-namespace if it's at ifdef-depth 0.
  1088. break
  1089. elif line_info.type == _NAMESPACE_START_RE:
  1090. # Only handle the simple case of 'namespace <foo> {'
  1091. m = simple_namespace_re.match(line_info.line)
  1092. if not m:
  1093. break
  1094. namespace_prefix += ('namespace %s { ' % m.group(1).strip())
  1095. elif line_info.type == _FORWARD_DECLARE_RE:
  1096. # If we're not in a namespace, keep going. Otherwise, this is
  1097. # just the situation we're looking for!
  1098. if namespace_prefix:
  1099. return (namespace_prefix, line_info.reorder_span)
  1100. else:
  1101. # We should have handled all the cases above!
  1102. assert False, ('unknown line-info type',
  1103. _LINE_TYPES.index(line_info.type))
  1104. # We stopped because we hit a contentful line (or, possibly, a
  1105. # weird-looking namespace). If we're inside the first-namespace,
  1106. # return this position as a good place to insert forward-declares.
  1107. if namespace_prefix:
  1108. return (namespace_prefix, (line_number, line_number))
  1109. return (None, None)
  1110. # These are potential 'kind' arguments to _FirstReorderSpanWith.
  1111. # We also sort our output in this order, to the extent possible.
  1112. _MAIN_CU_INCLUDE_KIND = 1 # e.g. #include "foo.h" when editing foo.cc
  1113. _C_SYSTEM_INCLUDE_KIND = 2 # e.g. #include <stdio.h>
  1114. _CXX_SYSTEM_INCLUDE_KIND = 3 # e.g. #include <vector>
  1115. _NONSYSTEM_INCLUDE_KIND = 4 # e.g. #include "bar.h"
  1116. _PROJECT_INCLUDE_KIND = 5 # e.g. #include "myproject/quux.h"
  1117. _FORWARD_DECLARE_KIND = 6 # e.g. class Baz;
  1118. _EOF_KIND = 7 # used at eof
  1119. def _IsSystemInclude(line_info):
  1120. """Given a line-info, return true iff the line is a <>-style #include."""
  1121. # The key for #includes includes the <> or "", so this is easy. :-)
  1122. return line_info.type == _INCLUDE_RE and line_info.key[0] == '<'
  1123. def _IsMainCUInclude(line_info, filename):
  1124. """Given a line-info, return true iff the line is a 'main-CU' #include line.
  1125. A 'main-CU' #include line is one that is related to the file being edited.
  1126. For instance, if we are editing foo.cc, foo.h is a main-CU #include, as
  1127. is foo-inl.h. The same holds if we are editing foo_test.cc.
  1128. The algorithm is like so: first, remove the following extensions
  1129. from both the includer and includee to get the 'canonical' name:
  1130. -inl.h .h _unittest.cc _regtest.cc _test.cc .cc .c
  1131. Rule 1: If the canonical names (filenames after removal) match --
  1132. including all directories -- the .h file is a main-cu #include.
  1133. Rule 2: If the basenames of the canonnical names match -- that is,
  1134. ignoring all directories -- the .h file is a main-cu #include *if*
  1135. it is the first #include seen.
  1136. Arguments:
  1137. line_info: a LineInfo structure with .type,
  1138. .is_first_line_of_this_type, and .key filled in.
  1139. filename: the name of the file being edited.
  1140. Returns:
  1141. True if line_info is an #include of a main_CU file, False else.
  1142. """
  1143. if line_info.type != _INCLUDE_RE or _IsSystemInclude(line_info):
  1144. return False
  1145. # First, normalize the filenames by getting rid of -inl.h and .h
  1146. # suffixes (for the #include) and _test.cc and .cc extensions (for
  1147. # the filename). We also get rid of the "'s around the #include line.
  1148. canonical_include = re.sub(r'(-inl\.h|\.h)$',
  1149. '', line_info.key.replace('"', ''))
  1150. canonical_file = re.sub(r'(_unittest\.cc|_regtest\.cc|_test\.cc|\.cc|\.c)$',
  1151. '', filename)
  1152. # .h files in /public/ match .cc files in /internal/.
  1153. canonical_include2 = re.sub(r'/public/', '/internal/', canonical_include)
  1154. # Rule 1:
  1155. if canonical_file in (canonical_include, canonical_include2):
  1156. return True
  1157. # Rule 2:
  1158. if (line_info.is_first_line_of_this_type and
  1159. os.path.basename(canonical_file) == os.path.basename(canonical_include)):
  1160. return True
  1161. return False
  1162. def _IsSameProject(line_info, edited_file, project):
  1163. """Return true if included file and edited file are in the same project.
  1164. An included_file is in project 'project' if the project is a prefix of the
  1165. included_file. 'project' should end with /.
  1166. As a special case, if project is '<tld>', then the project is defined to
  1167. be the top-level directory of edited_file.
  1168. Arguments:
  1169. line_info: a LineInfo structure with .key containing the file that is
  1170. being included.
  1171. edited_file: the name of the file being edited.
  1172. project: if '<tld>', set the project path to be the top-level directory
  1173. name of the file being edited. If not '<tld>', this value is used to
  1174. specify the project directory.
  1175. Returns:
  1176. True if line_info and filename belong in the same project, False otherwise.
  1177. """
  1178. included_file = line_info.key[1:]
  1179. if project != '<tld>':
  1180. return included_file.startswith(project)
  1181. included_root = included_file.find(os.path.sep)
  1182. edited_root = edited_file.find(os.path.sep)
  1183. return (included_root > -1 and edited_root > -1 and
  1184. included_file[0:included_root] == edited_file[0:edited_root])
  1185. def _GetLineKind(file_line, filename, separate_project_includes):
  1186. """Given a file_line + file being edited, return best *_KIND value or None."""
  1187. line_without_coments = _COMMENT_RE.sub('', file_line.line)
  1188. if file_line.deleted:
  1189. return None
  1190. elif _IsMainCUInclude(file_line, filename):
  1191. return _MAIN_CU_INCLUDE_KIND
  1192. elif _IsSystemInclude(file_line) and '.' in line_without_coments:
  1193. return _C_SYSTEM_INCLUDE_KIND
  1194. elif _IsSystemInclude(file_line):
  1195. return _CXX_SYSTEM_INCLUDE_KIND
  1196. elif file_line.type == _INCLUDE_RE:
  1197. if (separate_project_includes and
  1198. _IsSameProject(file_line, filename, separate_project_includes)):
  1199. return _PROJECT_INCLUDE_KIND
  1200. return _NONSYSTEM_INCLUDE_KIND
  1201. elif file_line.type == _FORWARD_DECLARE_RE:
  1202. return _FORWARD_DECLARE_KIND
  1203. else:
  1204. return None
  1205. def _FirstReorderSpanWith(file_lines, good_reorder_spans, kind, filename,
  1206. flags):
  1207. """Returns [start_line,end_line) of 1st reorder_span with a line of kind kind.
  1208. This function iterates over all the reorder_spans in file_lines, and
  1209. calculates the first one that has a line of the given kind in it.
  1210. If no such reorder span is found, it takes the last span of 'lower'
  1211. kinds (main-cu kind is lowest, forward-declare is highest). If no
  1212. such reorder span is found, it takes the first span of 'higher'
  1213. kind, but not considering the forward-declare kind (we don't want to
  1214. put an #include with the first forward-declare, because it may be
  1215. inside a class or something weird). If there's *still* no match, we
  1216. return the first line past leading comments, whitespace, and #ifdef
  1217. guard lines. If there's *still* no match, we just insert at
  1218. end-of-file.
  1219. As a special case, we never return a span for forward-declares that is
  1220. after 'contentful' code, even if other forward-declares are there.
  1221. For instance:
  1222. using Foo::Bar;
  1223. class Bang;
  1224. We want to make sure to put 'namespace Foo { class Bar; }'
  1225. *before* the using line!
  1226. kind is one of the following enums, with examples:
  1227. _MAIN_CU_INCLUDE_KIND: #include "foo.h" when editing foo.cc
  1228. _C_SYSTEM_INCLUDE_KIND: #include <stdio.h>
  1229. _CXX_SYSTEM_INCLUDE_KIND: #include <vector>
  1230. _NONSYSTEM_INCLUDE_KIND: #include "bar.h"
  1231. _PROJECT_INCLUDE_KIND: #include "myproject/quux.h"
  1232. _FORWARD_DECLARE_KIND: class Baz;
  1233. Arguments:
  1234. file_lines: an array of LineInfo objects with .type and
  1235. .reorder_span filled in.
  1236. good_reorder_spans: a sorted list of reorder_spans to consider
  1237. (should not include reorder_spans inside #ifdefs or
  1238. namespaces).
  1239. kind: one of *_KIND values.
  1240. filename: the name of the file that file_lines comes from.
  1241. This is passed to _GetLineKind (are we a main-CU #include?)
  1242. flags: commandline flags, as parsed by optparse. We use
  1243. flags.separate_project_includes to sort the #includes for the
  1244. current project separately from other #includes.
  1245. Returns:
  1246. A pair of line numbers, [start_line, end_line), that is the 'best'
  1247. reorder_span in file_lines for the given kind.
  1248. """
  1249. assert kind in (_MAIN_CU_INCLUDE_KIND, _C_SYSTEM_INCLUDE_KIND,
  1250. _CXX_SYSTEM_INCLUDE_KIND, _NONSYSTEM_INCLUDE_KIND,
  1251. _PROJECT_INCLUDE_KIND, _FORWARD_DECLARE_KIND), kind
  1252. # Figure out where the first 'contentful' line is (after the first
  1253. # 'good' span, so we skip past header guards and the like). Basically,
  1254. # the first contentful line is a line not in any reorder span.
  1255. for i in xrange(len(good_reorder_spans) - 1):
  1256. if good_reorder_spans[i][1] != good_reorder_spans[i+1][0]:
  1257. first_contentful_line = good_reorder_spans[i][1]
  1258. break
  1259. else: # got to the end of the file without finding a break in the spans
  1260. if good_reorder_spans:
  1261. first_contentful_line = good_reorder_spans[-1][1]
  1262. else:
  1263. first_contentful_line = 0
  1264. # Let's just find the first and last span for each kind.
  1265. first_reorder_spans = {}
  1266. last_reorder_spans = {}
  1267. for reorder_span in good_reorder_spans:
  1268. for line_number in apply(xrange, reorder_span):
  1269. line_kind = _GetLineKind(file_lines[line_number], filename,
  1270. flags.separate_project_includes)
  1271. # Ignore forward-declares that come after 'contentful' code; we
  1272. # never want to insert new forward-declares there.
  1273. if (line_kind == _FORWARD_DECLARE_KIND and
  1274. line_number > first_contentful_line):
  1275. continue
  1276. if line_kind is not None:
  1277. first_reorder_spans.setdefault(line_kind, reorder_span)
  1278. last_reorder_spans[line_kind] = reorder_span
  1279. # Find the first span of our kind.
  1280. if kind in first_reorder_spans:
  1281. return first_reorder_spans[kind]
  1282. # Second choice: last span of the kinds above us:
  1283. for backup_kind in xrange(kind - 1, _MAIN_CU_INCLUDE_KIND - 1, -1):
  1284. if backup_kind in last_reorder_spans:
  1285. return last_reorder_spans[backup_kind]
  1286. # Third choice: first span of the kinds below us, but not counting
  1287. # _FORWARD_DECLARE_KIND.
  1288. for backup_kind in xrange(kind + 1, _FORWARD_DECLARE_KIND):
  1289. if backup_kind in first_reorder_spans:
  1290. return first_reorder_spans[backup_kind]
  1291. # There are no reorder-spans at all, or they are only
  1292. # _FORWARD_DECLARE spans. Return the first line past the leading
  1293. # comments, whitespace, and #ifdef guard lines, or the beginning
  1294. # of the _FORWARD_DECLARE span, whichever is smaller.
  1295. line_number = 0
  1296. seen_header_guard = False
  1297. while line_number < len(file_lines):
  1298. if file_lines[line_number].deleted:
  1299. line_number += 1
  1300. elif file_lines[line_number].type == _HEADER_GUARD_RE:
  1301. seen_header_guard = True
  1302. line_number += 2 # skip over the header guard
  1303. elif file_lines[line_number].type == _BLANK_LINE_RE:
  1304. line_number += 1
  1305. elif (file_lines[line_number].type == _COMMENT_LINE_RE
  1306. and not seen_header_guard):
  1307. # We put #includes after top-of-file comments. But comments
  1308. # inside the header guard are no longer top-of-file comments;
  1309. # #includes go before them.
  1310. line_number += 1
  1311. else:
  1312. # If the "first line" we would return is inside the forward-declare
  1313. # reorder span, just return that span, rather than creating a new
  1314. # span inside the existing one.
  1315. if first_reorder_spans:
  1316. assert first