PageRenderTime 54ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/pymode/autopep8.py

https://gitlab.com/vim-IDE/python-mode
Python | 1475 lines | 1446 code | 2 blank | 27 comment | 0 complexity | 3c9463e020b660677ccc04017ba27a07 MD5 | raw file
  1. #!/usr/bin/env python
  2. # Copyright (C) 2010-2011 Hideo Hattori
  3. # Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
  4. # Copyright (C) 2013-2015 Hideo Hattori, Steven Myint, Bill Wendling
  5. #
  6. # Permission is hereby granted, free of charge, to any person obtaining
  7. # a copy of this software and associated documentation files (the
  8. # "Software"), to deal in the Software without restriction, including
  9. # without limitation the rights to use, copy, modify, merge, publish,
  10. # distribute, sublicense, and/or sell copies of the Software, and to
  11. # permit persons to whom the Software is furnished to do so, subject to
  12. # the following conditions:
  13. #
  14. # The above copyright notice and this permission notice shall be
  15. # included in all copies or substantial portions of the Software.
  16. #
  17. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20. # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  21. # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  22. # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23. # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  24. # SOFTWARE.
  25. """Automatically formats Python code to conform to the PEP 8 style guide.
  26. Fixes that only need be done once can be added by adding a function of the form
  27. "fix_<code>(source)" to this module. They should return the fixed source code.
  28. These fixes are picked up by apply_global_fixes().
  29. Fixes that depend on pep8 should be added as methods to FixPEP8. See the class
  30. documentation for more information.
  31. """
  32. from __future__ import absolute_import
  33. from __future__ import division
  34. from __future__ import print_function
  35. from __future__ import unicode_literals
  36. import codecs
  37. import collections
  38. import copy
  39. import difflib
  40. import fnmatch
  41. import inspect
  42. import io
  43. import keyword
  44. import locale
  45. import os
  46. import re
  47. import signal
  48. import sys
  49. import textwrap
  50. import token
  51. import tokenize
  52. import pep8
  53. try:
  54. unicode
  55. except NameError:
  56. unicode = str
  57. __version__ = '1.2.1a0'
  58. CR = '\r'
  59. LF = '\n'
  60. CRLF = '\r\n'
  61. PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
  62. # For generating line shortening candidates.
  63. SHORTEN_OPERATOR_GROUPS = frozenset([
  64. frozenset([',']),
  65. frozenset(['%']),
  66. frozenset([',', '(', '[', '{']),
  67. frozenset(['%', '(', '[', '{']),
  68. frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
  69. frozenset(['%', '+', '-', '*', '/', '//']),
  70. ])
  71. DEFAULT_IGNORE = 'E24'
  72. DEFAULT_INDENT_SIZE = 4
  73. # W602 is handled separately due to the need to avoid "with_traceback".
  74. CODE_TO_2TO3 = {
  75. 'E231': ['ws_comma'],
  76. 'E721': ['idioms'],
  77. 'W601': ['has_key'],
  78. 'W603': ['ne'],
  79. 'W604': ['repr'],
  80. 'W690': ['apply',
  81. 'except',
  82. 'exitfunc',
  83. 'numliterals',
  84. 'operator',
  85. 'paren',
  86. 'reduce',
  87. 'renames',
  88. 'standarderror',
  89. 'sys_exc',
  90. 'throw',
  91. 'tuple_params',
  92. 'xreadlines']}
  93. if sys.platform == 'win32': # pragma: no cover
  94. DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
  95. else:
  96. DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
  97. os.path.expanduser('~/.config'), 'pep8')
  98. PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
  99. def open_with_encoding(filename, encoding=None, mode='r'):
  100. """Return opened file with a specific encoding."""
  101. if not encoding:
  102. encoding = detect_encoding(filename)
  103. return io.open(filename, mode=mode, encoding=encoding,
  104. newline='') # Preserve line endings
  105. def detect_encoding(filename):
  106. """Return file encoding."""
  107. try:
  108. with open(filename, 'rb') as input_file:
  109. from lib2to3.pgen2 import tokenize as lib2to3_tokenize
  110. encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
  111. # Check for correctness of encoding
  112. with open_with_encoding(filename, encoding) as test_file:
  113. test_file.read()
  114. return encoding
  115. except (LookupError, SyntaxError, UnicodeDecodeError):
  116. return 'latin-1'
  117. def readlines_from_file(filename):
  118. """Return contents of file."""
  119. with open_with_encoding(filename) as input_file:
  120. return input_file.readlines()
  121. def extended_blank_lines(logical_line,
  122. blank_lines,
  123. blank_before,
  124. indent_level,
  125. previous_logical):
  126. """Check for missing blank lines after class declaration."""
  127. if previous_logical.startswith('class '):
  128. if logical_line.startswith(('def ', 'class ', '@')):
  129. if indent_level and not blank_lines and not blank_before:
  130. yield (0, 'E309 expected 1 blank line after class declaration')
  131. elif previous_logical.startswith('def '):
  132. if blank_lines and pep8.DOCSTRING_REGEX.match(logical_line):
  133. yield (0, 'E303 too many blank lines ({0})'.format(blank_lines))
  134. elif pep8.DOCSTRING_REGEX.match(previous_logical):
  135. # Missing blank line between class docstring and method declaration.
  136. if (
  137. indent_level and
  138. not blank_lines and
  139. not blank_before and
  140. logical_line.startswith(('def ')) and
  141. '(self' in logical_line
  142. ):
  143. yield (0, 'E301 expected 1 blank line, found 0')
  144. pep8.register_check(extended_blank_lines)
  145. def continued_indentation(logical_line, tokens, indent_level, indent_char,
  146. noqa):
  147. """Override pep8's function to provide indentation information."""
  148. first_row = tokens[0][2][0]
  149. nrows = 1 + tokens[-1][2][0] - first_row
  150. if noqa or nrows == 1:
  151. return
  152. # indent_next tells us whether the next block is indented. Assuming
  153. # that it is indented by 4 spaces, then we should not allow 4-space
  154. # indents on the final continuation line. In turn, some other
  155. # indents are allowed to have an extra 4 spaces.
  156. indent_next = logical_line.endswith(':')
  157. row = depth = 0
  158. valid_hangs = (
  159. (DEFAULT_INDENT_SIZE,)
  160. if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
  161. 2 * DEFAULT_INDENT_SIZE)
  162. )
  163. # Remember how many brackets were opened on each line.
  164. parens = [0] * nrows
  165. # Relative indents of physical lines.
  166. rel_indent = [0] * nrows
  167. # For each depth, collect a list of opening rows.
  168. open_rows = [[0]]
  169. # For each depth, memorize the hanging indentation.
  170. hangs = [None]
  171. # Visual indents.
  172. indent_chances = {}
  173. last_indent = tokens[0][2]
  174. indent = [last_indent[1]]
  175. last_token_multiline = None
  176. line = None
  177. last_line = ''
  178. last_line_begins_with_multiline = False
  179. for token_type, text, start, end, line in tokens:
  180. newline = row < start[0] - first_row
  181. if newline:
  182. row = start[0] - first_row
  183. newline = (not last_token_multiline and
  184. token_type not in (tokenize.NL, tokenize.NEWLINE))
  185. last_line_begins_with_multiline = last_token_multiline
  186. if newline:
  187. # This is the beginning of a continuation line.
  188. last_indent = start
  189. # Record the initial indent.
  190. rel_indent[row] = pep8.expand_indent(line) - indent_level
  191. # Identify closing bracket.
  192. close_bracket = (token_type == tokenize.OP and text in ']})')
  193. # Is the indent relative to an opening bracket line?
  194. for open_row in reversed(open_rows[depth]):
  195. hang = rel_indent[row] - rel_indent[open_row]
  196. hanging_indent = hang in valid_hangs
  197. if hanging_indent:
  198. break
  199. if hangs[depth]:
  200. hanging_indent = (hang == hangs[depth])
  201. visual_indent = (not close_bracket and hang > 0 and
  202. indent_chances.get(start[1]))
  203. if close_bracket and indent[depth]:
  204. # Closing bracket for visual indent.
  205. if start[1] != indent[depth]:
  206. yield (start, 'E124 {0}'.format(indent[depth]))
  207. elif close_bracket and not hang:
  208. pass
  209. elif indent[depth] and start[1] < indent[depth]:
  210. # Visual indent is broken.
  211. yield (start, 'E128 {0}'.format(indent[depth]))
  212. elif (hanging_indent or
  213. (indent_next and
  214. rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
  215. # Hanging indent is verified.
  216. if close_bracket:
  217. yield (start, 'E123 {0}'.format(indent_level +
  218. rel_indent[open_row]))
  219. hangs[depth] = hang
  220. elif visual_indent is True:
  221. # Visual indent is verified.
  222. indent[depth] = start[1]
  223. elif visual_indent in (text, unicode):
  224. # Ignore token lined up with matching one from a previous line.
  225. pass
  226. else:
  227. one_indented = (indent_level + rel_indent[open_row] +
  228. DEFAULT_INDENT_SIZE)
  229. # Indent is broken.
  230. if hang <= 0:
  231. error = ('E122', one_indented)
  232. elif indent[depth]:
  233. error = ('E127', indent[depth])
  234. elif hang > DEFAULT_INDENT_SIZE:
  235. error = ('E126', one_indented)
  236. else:
  237. hangs[depth] = hang
  238. error = ('E121', one_indented)
  239. yield (start, '{0} {1}'.format(*error))
  240. # Look for visual indenting.
  241. if (
  242. parens[row] and
  243. token_type not in (tokenize.NL, tokenize.COMMENT) and
  244. not indent[depth]
  245. ):
  246. indent[depth] = start[1]
  247. indent_chances[start[1]] = True
  248. # Deal with implicit string concatenation.
  249. elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
  250. text in ('u', 'ur', 'b', 'br')):
  251. indent_chances[start[1]] = unicode
  252. # Special case for the "if" statement because len("if (") is equal to
  253. # 4.
  254. elif not indent_chances and not row and not depth and text == 'if':
  255. indent_chances[end[1] + 1] = True
  256. elif text == ':' and line[end[1]:].isspace():
  257. open_rows[depth].append(row)
  258. # Keep track of bracket depth.
  259. if token_type == tokenize.OP:
  260. if text in '([{':
  261. depth += 1
  262. indent.append(0)
  263. hangs.append(None)
  264. if len(open_rows) == depth:
  265. open_rows.append([])
  266. open_rows[depth].append(row)
  267. parens[row] += 1
  268. elif text in ')]}' and depth > 0:
  269. # Parent indents should not be more than this one.
  270. prev_indent = indent.pop() or last_indent[1]
  271. hangs.pop()
  272. for d in range(depth):
  273. if indent[d] > prev_indent:
  274. indent[d] = 0
  275. for ind in list(indent_chances):
  276. if ind >= prev_indent:
  277. del indent_chances[ind]
  278. del open_rows[depth + 1:]
  279. depth -= 1
  280. if depth:
  281. indent_chances[indent[depth]] = True
  282. for idx in range(row, -1, -1):
  283. if parens[idx]:
  284. parens[idx] -= 1
  285. break
  286. assert len(indent) == depth + 1
  287. if (
  288. start[1] not in indent_chances and
  289. # This is for purposes of speeding up E121 (GitHub #90).
  290. not last_line.rstrip().endswith(',')
  291. ):
  292. # Allow to line up tokens.
  293. indent_chances[start[1]] = text
  294. last_token_multiline = (start[0] != end[0])
  295. if last_token_multiline:
  296. rel_indent[end[0] - first_row] = rel_indent[row]
  297. last_line = line
  298. if (
  299. indent_next and
  300. not last_line_begins_with_multiline and
  301. pep8.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
  302. ):
  303. pos = (start[0], indent[0] + 4)
  304. yield (pos, 'E125 {0}'.format(indent_level +
  305. 2 * DEFAULT_INDENT_SIZE))
  306. del pep8._checks['logical_line'][pep8.continued_indentation]
  307. pep8.register_check(continued_indentation)
  308. class FixPEP8(object):
  309. """Fix invalid code.
  310. Fixer methods are prefixed "fix_". The _fix_source() method looks for these
  311. automatically.
  312. The fixer method can take either one or two arguments (in addition to
  313. self). The first argument is "result", which is the error information from
  314. pep8. The second argument, "logical", is required only for logical-line
  315. fixes.
  316. The fixer method can return the list of modified lines or None. An empty
  317. list would mean that no changes were made. None would mean that only the
  318. line reported in the pep8 error was modified. Note that the modified line
  319. numbers that are returned are indexed at 1. This typically would correspond
  320. with the line number reported in the pep8 error information.
  321. [fixed method list]
  322. - e121,e122,e123,e124,e125,e126,e127,e128,e129
  323. - e201,e202,e203
  324. - e211
  325. - e221,e222,e223,e224,e225
  326. - e231
  327. - e251
  328. - e261,e262
  329. - e271,e272,e273,e274
  330. - e301,e302,e303
  331. - e401
  332. - e502
  333. - e701,e702
  334. - e711
  335. - w291
  336. """
  337. def __init__(self, filename,
  338. options,
  339. contents=None,
  340. long_line_ignore_cache=None):
  341. self.filename = filename
  342. if contents is None:
  343. self.source = readlines_from_file(filename)
  344. else:
  345. sio = io.StringIO(contents)
  346. self.source = sio.readlines()
  347. self.options = options
  348. self.indent_word = _get_indentword(''.join(self.source))
  349. self.long_line_ignore_cache = (
  350. set() if long_line_ignore_cache is None
  351. else long_line_ignore_cache)
  352. # Many fixers are the same even though pep8 categorizes them
  353. # differently.
  354. self.fix_e115 = self.fix_e112
  355. self.fix_e116 = self.fix_e113
  356. self.fix_e121 = self._fix_reindent
  357. self.fix_e122 = self._fix_reindent
  358. self.fix_e123 = self._fix_reindent
  359. self.fix_e124 = self._fix_reindent
  360. self.fix_e126 = self._fix_reindent
  361. self.fix_e127 = self._fix_reindent
  362. self.fix_e128 = self._fix_reindent
  363. self.fix_e129 = self._fix_reindent
  364. self.fix_e202 = self.fix_e201
  365. self.fix_e203 = self.fix_e201
  366. self.fix_e211 = self.fix_e201
  367. self.fix_e221 = self.fix_e271
  368. self.fix_e222 = self.fix_e271
  369. self.fix_e223 = self.fix_e271
  370. self.fix_e226 = self.fix_e225
  371. self.fix_e227 = self.fix_e225
  372. self.fix_e228 = self.fix_e225
  373. self.fix_e241 = self.fix_e271
  374. self.fix_e242 = self.fix_e224
  375. self.fix_e261 = self.fix_e262
  376. self.fix_e272 = self.fix_e271
  377. self.fix_e273 = self.fix_e271
  378. self.fix_e274 = self.fix_e271
  379. self.fix_e309 = self.fix_e301
  380. self.fix_e501 = (
  381. self.fix_long_line_logically if
  382. options and (options.aggressive >= 2 or options.experimental) else
  383. self.fix_long_line_physically)
  384. self.fix_e703 = self.fix_e702
  385. self.fix_w293 = self.fix_w291
  386. def _fix_source(self, results):
  387. try:
  388. (logical_start, logical_end) = _find_logical(self.source)
  389. logical_support = True
  390. except (SyntaxError, tokenize.TokenError): # pragma: no cover
  391. logical_support = False
  392. completed_lines = set()
  393. for result in sorted(results, key=_priority_key):
  394. if result['line'] in completed_lines:
  395. continue
  396. fixed_methodname = 'fix_' + result['id'].lower()
  397. if hasattr(self, fixed_methodname):
  398. fix = getattr(self, fixed_methodname)
  399. line_index = result['line'] - 1
  400. original_line = self.source[line_index]
  401. is_logical_fix = len(inspect.getargspec(fix).args) > 2
  402. if is_logical_fix:
  403. logical = None
  404. if logical_support:
  405. logical = _get_logical(self.source,
  406. result,
  407. logical_start,
  408. logical_end)
  409. if logical and set(range(
  410. logical[0][0] + 1,
  411. logical[1][0] + 1)).intersection(
  412. completed_lines):
  413. continue
  414. modified_lines = fix(result, logical)
  415. else:
  416. modified_lines = fix(result)
  417. if modified_lines is None:
  418. # Force logical fixes to report what they modified.
  419. assert not is_logical_fix
  420. if self.source[line_index] == original_line:
  421. modified_lines = []
  422. if modified_lines:
  423. completed_lines.update(modified_lines)
  424. elif modified_lines == []: # Empty list means no fix
  425. if self.options.verbose >= 2:
  426. print(
  427. '---> Not fixing {f} on line {l}'.format(
  428. f=result['id'], l=result['line']),
  429. file=sys.stderr)
  430. else: # We assume one-line fix when None.
  431. completed_lines.add(result['line'])
  432. else:
  433. if self.options.verbose >= 3:
  434. print(
  435. "---> '{0}' is not defined.".format(fixed_methodname),
  436. file=sys.stderr)
  437. info = result['info'].strip()
  438. print('---> {0}:{1}:{2}:{3}'.format(self.filename,
  439. result['line'],
  440. result['column'],
  441. info),
  442. file=sys.stderr)
  443. def fix(self):
  444. """Return a version of the source code with PEP 8 violations fixed."""
  445. pep8_options = {
  446. 'ignore': self.options.ignore,
  447. 'select': self.options.select,
  448. 'max_line_length': self.options.max_line_length,
  449. }
  450. results = _execute_pep8(pep8_options, self.source)
  451. if self.options.verbose:
  452. progress = {}
  453. for r in results:
  454. if r['id'] not in progress:
  455. progress[r['id']] = set()
  456. progress[r['id']].add(r['line'])
  457. print('---> {n} issue(s) to fix {progress}'.format(
  458. n=len(results), progress=progress), file=sys.stderr)
  459. if self.options.line_range:
  460. start, end = self.options.line_range
  461. results = [r for r in results
  462. if start <= r['line'] <= end]
  463. self._fix_source(filter_results(source=''.join(self.source),
  464. results=results,
  465. aggressive=self.options.aggressive))
  466. if self.options.line_range:
  467. # If number of lines has changed then change line_range.
  468. count = sum(sline.count('\n')
  469. for sline in self.source[start - 1:end])
  470. self.options.line_range[1] = start + count - 1
  471. return ''.join(self.source)
  472. def _fix_reindent(self, result):
  473. """Fix a badly indented line.
  474. This is done by adding or removing from its initial indent only.
  475. """
  476. num_indent_spaces = int(result['info'].split()[1])
  477. line_index = result['line'] - 1
  478. target = self.source[line_index]
  479. self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
  480. def fix_e112(self, result):
  481. """Fix under-indented comments."""
  482. line_index = result['line'] - 1
  483. target = self.source[line_index]
  484. if not target.lstrip().startswith('#'):
  485. # Don't screw with invalid syntax.
  486. return []
  487. self.source[line_index] = self.indent_word + target
  488. def fix_e113(self, result):
  489. """Fix over-indented comments."""
  490. line_index = result['line'] - 1
  491. target = self.source[line_index]
  492. indent = _get_indentation(target)
  493. stripped = target.lstrip()
  494. if not stripped.startswith('#'):
  495. # Don't screw with invalid syntax.
  496. return []
  497. self.source[line_index] = indent[1:] + stripped
  498. def fix_e125(self, result):
  499. """Fix indentation undistinguish from the next logical line."""
  500. num_indent_spaces = int(result['info'].split()[1])
  501. line_index = result['line'] - 1
  502. target = self.source[line_index]
  503. spaces_to_add = num_indent_spaces - len(_get_indentation(target))
  504. indent = len(_get_indentation(target))
  505. modified_lines = []
  506. while len(_get_indentation(self.source[line_index])) >= indent:
  507. self.source[line_index] = (' ' * spaces_to_add +
  508. self.source[line_index])
  509. modified_lines.append(1 + line_index) # Line indexed at 1.
  510. line_index -= 1
  511. return modified_lines
  512. def fix_e201(self, result):
  513. """Remove extraneous whitespace."""
  514. line_index = result['line'] - 1
  515. target = self.source[line_index]
  516. offset = result['column'] - 1
  517. if is_probably_part_of_multiline(target):
  518. return []
  519. fixed = fix_whitespace(target,
  520. offset=offset,
  521. replacement='')
  522. self.source[line_index] = fixed
  523. def fix_e224(self, result):
  524. """Remove extraneous whitespace around operator."""
  525. target = self.source[result['line'] - 1]
  526. offset = result['column'] - 1
  527. fixed = target[:offset] + target[offset:].replace('\t', ' ')
  528. self.source[result['line'] - 1] = fixed
  529. def fix_e225(self, result):
  530. """Fix missing whitespace around operator."""
  531. target = self.source[result['line'] - 1]
  532. offset = result['column'] - 1
  533. fixed = target[:offset] + ' ' + target[offset:]
  534. # Only proceed if non-whitespace characters match.
  535. # And make sure we don't break the indentation.
  536. if (
  537. fixed.replace(' ', '') == target.replace(' ', '') and
  538. _get_indentation(fixed) == _get_indentation(target)
  539. ):
  540. self.source[result['line'] - 1] = fixed
  541. else:
  542. return []
  543. def fix_e231(self, result):
  544. """Add missing whitespace."""
  545. line_index = result['line'] - 1
  546. target = self.source[line_index]
  547. offset = result['column']
  548. fixed = target[:offset] + ' ' + target[offset:]
  549. self.source[line_index] = fixed
  550. def fix_e251(self, result):
  551. """Remove whitespace around parameter '=' sign."""
  552. line_index = result['line'] - 1
  553. target = self.source[line_index]
  554. # This is necessary since pep8 sometimes reports columns that goes
  555. # past the end of the physical line. This happens in cases like,
  556. # foo(bar\n=None)
  557. c = min(result['column'] - 1,
  558. len(target) - 1)
  559. if target[c].strip():
  560. fixed = target
  561. else:
  562. fixed = target[:c].rstrip() + target[c:].lstrip()
  563. # There could be an escaped newline
  564. #
  565. # def foo(a=\
  566. # 1)
  567. if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
  568. self.source[line_index] = fixed.rstrip('\n\r \t\\')
  569. self.source[line_index + 1] = self.source[line_index + 1].lstrip()
  570. return [line_index + 1, line_index + 2] # Line indexed at 1
  571. self.source[result['line'] - 1] = fixed
  572. def fix_e262(self, result):
  573. """Fix spacing after comment hash."""
  574. target = self.source[result['line'] - 1]
  575. offset = result['column']
  576. code = target[:offset].rstrip(' \t#')
  577. comment = target[offset:].lstrip(' \t#')
  578. fixed = code + (' # ' + comment if comment.strip() else '\n')
  579. self.source[result['line'] - 1] = fixed
  580. def fix_e271(self, result):
  581. """Fix extraneous whitespace around keywords."""
  582. line_index = result['line'] - 1
  583. target = self.source[line_index]
  584. offset = result['column'] - 1
  585. if is_probably_part_of_multiline(target):
  586. return []
  587. fixed = fix_whitespace(target,
  588. offset=offset,
  589. replacement=' ')
  590. if fixed == target:
  591. return []
  592. else:
  593. self.source[line_index] = fixed
  594. def fix_e301(self, result):
  595. """Add missing blank line."""
  596. cr = '\n'
  597. self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
  598. def fix_e302(self, result):
  599. """Add missing 2 blank lines."""
  600. add_linenum = 2 - int(result['info'].split()[-1])
  601. cr = '\n' * add_linenum
  602. self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
  603. def fix_e303(self, result):
  604. """Remove extra blank lines."""
  605. delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
  606. delete_linenum = max(1, delete_linenum)
  607. # We need to count because pep8 reports an offset line number if there
  608. # are comments.
  609. cnt = 0
  610. line = result['line'] - 2
  611. modified_lines = []
  612. while cnt < delete_linenum and line >= 0:
  613. if not self.source[line].strip():
  614. self.source[line] = ''
  615. modified_lines.append(1 + line) # Line indexed at 1
  616. cnt += 1
  617. line -= 1
  618. return modified_lines
  619. def fix_e304(self, result):
  620. """Remove blank line following function decorator."""
  621. line = result['line'] - 2
  622. if not self.source[line].strip():
  623. self.source[line] = ''
  624. def fix_e401(self, result):
  625. """Put imports on separate lines."""
  626. line_index = result['line'] - 1
  627. target = self.source[line_index]
  628. offset = result['column'] - 1
  629. if not target.lstrip().startswith('import'):
  630. return []
  631. indentation = re.split(pattern=r'\bimport\b',
  632. string=target, maxsplit=1)[0]
  633. fixed = (target[:offset].rstrip('\t ,') + '\n' +
  634. indentation + 'import ' + target[offset:].lstrip('\t ,'))
  635. self.source[line_index] = fixed
  636. def fix_long_line_logically(self, result, logical):
  637. """Try to make lines fit within --max-line-length characters."""
  638. if (
  639. not logical or
  640. len(logical[2]) == 1 or
  641. self.source[result['line'] - 1].lstrip().startswith('#')
  642. ):
  643. return self.fix_long_line_physically(result)
  644. start_line_index = logical[0][0]
  645. end_line_index = logical[1][0]
  646. logical_lines = logical[2]
  647. previous_line = get_item(self.source, start_line_index - 1, default='')
  648. next_line = get_item(self.source, end_line_index + 1, default='')
  649. single_line = join_logical_line(''.join(logical_lines))
  650. try:
  651. fixed = self.fix_long_line(
  652. target=single_line,
  653. previous_line=previous_line,
  654. next_line=next_line,
  655. original=''.join(logical_lines))
  656. except (SyntaxError, tokenize.TokenError):
  657. return self.fix_long_line_physically(result)
  658. if fixed:
  659. for line_index in range(start_line_index, end_line_index + 1):
  660. self.source[line_index] = ''
  661. self.source[start_line_index] = fixed
  662. return range(start_line_index + 1, end_line_index + 1)
  663. else:
  664. return []
  665. def fix_long_line_physically(self, result):
  666. """Try to make lines fit within --max-line-length characters."""
  667. line_index = result['line'] - 1
  668. target = self.source[line_index]
  669. previous_line = get_item(self.source, line_index - 1, default='')
  670. next_line = get_item(self.source, line_index + 1, default='')
  671. try:
  672. fixed = self.fix_long_line(
  673. target=target,
  674. previous_line=previous_line,
  675. next_line=next_line,
  676. original=target)
  677. except (SyntaxError, tokenize.TokenError):
  678. return []
  679. if fixed:
  680. self.source[line_index] = fixed
  681. return [line_index + 1]
  682. else:
  683. return []
  684. def fix_long_line(self, target, previous_line,
  685. next_line, original):
  686. cache_entry = (target, previous_line, next_line)
  687. if cache_entry in self.long_line_ignore_cache:
  688. return []
  689. if target.lstrip().startswith('#'):
  690. # Wrap commented lines.
  691. return shorten_comment(
  692. line=target,
  693. max_line_length=self.options.max_line_length,
  694. last_comment=not next_line.lstrip().startswith('#'))
  695. fixed = get_fixed_long_line(
  696. target=target,
  697. previous_line=previous_line,
  698. original=original,
  699. indent_word=self.indent_word,
  700. max_line_length=self.options.max_line_length,
  701. aggressive=self.options.aggressive,
  702. experimental=self.options.experimental,
  703. verbose=self.options.verbose)
  704. if fixed and not code_almost_equal(original, fixed):
  705. return fixed
  706. else:
  707. self.long_line_ignore_cache.add(cache_entry)
  708. return None
  709. def fix_e502(self, result):
  710. """Remove extraneous escape of newline."""
  711. (line_index, _, target) = get_index_offset_contents(result,
  712. self.source)
  713. self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
  714. def fix_e701(self, result):
  715. """Put colon-separated compound statement on separate lines."""
  716. line_index = result['line'] - 1
  717. target = self.source[line_index]
  718. c = result['column']
  719. fixed_source = (target[:c] + '\n' +
  720. _get_indentation(target) + self.indent_word +
  721. target[c:].lstrip('\n\r \t\\'))
  722. self.source[result['line'] - 1] = fixed_source
  723. return [result['line'], result['line'] + 1]
  724. def fix_e702(self, result, logical):
  725. """Put semicolon-separated compound statement on separate lines."""
  726. if not logical:
  727. return [] # pragma: no cover
  728. logical_lines = logical[2]
  729. line_index = result['line'] - 1
  730. target = self.source[line_index]
  731. if target.rstrip().endswith('\\'):
  732. # Normalize '1; \\\n2' into '1; 2'.
  733. self.source[line_index] = target.rstrip('\n \r\t\\')
  734. self.source[line_index + 1] = self.source[line_index + 1].lstrip()
  735. return [line_index + 1, line_index + 2]
  736. if target.rstrip().endswith(';'):
  737. self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
  738. return [line_index + 1]
  739. offset = result['column'] - 1
  740. first = target[:offset].rstrip(';').rstrip()
  741. second = (_get_indentation(logical_lines[0]) +
  742. target[offset:].lstrip(';').lstrip())
  743. # find inline commnet
  744. inline_comment = None
  745. if '# ' == target[offset:].lstrip(';').lstrip()[:2]:
  746. inline_comment = target[offset:].lstrip(';')
  747. if inline_comment:
  748. self.source[line_index] = first + inline_comment
  749. else:
  750. self.source[line_index] = first + '\n' + second
  751. return [line_index + 1]
  752. def fix_e711(self, result):
  753. """Fix comparison with None."""
  754. (line_index, offset, target) = get_index_offset_contents(result,
  755. self.source)
  756. right_offset = offset + 2
  757. if right_offset >= len(target):
  758. return []
  759. left = target[:offset].rstrip()
  760. center = target[offset:right_offset]
  761. right = target[right_offset:].lstrip()
  762. if not right.startswith('None'):
  763. return []
  764. if center.strip() == '==':
  765. new_center = 'is'
  766. elif center.strip() == '!=':
  767. new_center = 'is not'
  768. else:
  769. return []
  770. self.source[line_index] = ' '.join([left, new_center, right])
  771. def fix_e712(self, result):
  772. """Fix (trivial case of) comparison with boolean."""
  773. (line_index, offset, target) = get_index_offset_contents(result,
  774. self.source)
  775. # Handle very easy "not" special cases.
  776. if re.match(r'^\s*if [\w.]+ == False:$', target):
  777. self.source[line_index] = re.sub(r'if ([\w.]+) == False:',
  778. r'if not \1:', target, count=1)
  779. elif re.match(r'^\s*if [\w.]+ != True:$', target):
  780. self.source[line_index] = re.sub(r'if ([\w.]+) != True:',
  781. r'if not \1:', target, count=1)
  782. else:
  783. right_offset = offset + 2
  784. if right_offset >= len(target):
  785. return []
  786. left = target[:offset].rstrip()
  787. center = target[offset:right_offset]
  788. right = target[right_offset:].lstrip()
  789. # Handle simple cases only.
  790. new_right = None
  791. if center.strip() == '==':
  792. if re.match(r'\bTrue\b', right):
  793. new_right = re.sub(r'\bTrue\b *', '', right, count=1)
  794. elif center.strip() == '!=':
  795. if re.match(r'\bFalse\b', right):
  796. new_right = re.sub(r'\bFalse\b *', '', right, count=1)
  797. if new_right is None:
  798. return []
  799. if new_right[0].isalnum():
  800. new_right = ' ' + new_right
  801. self.source[line_index] = left + new_right
  802. def fix_e713(self, result):
  803. """Fix (trivial case of) non-membership check."""
  804. (line_index, _, target) = get_index_offset_contents(result,
  805. self.source)
  806. # Handle very easy case only.
  807. if re.match(r'^\s*if not [\w.]+ in [\w.]+:$', target):
  808. self.source[line_index] = re.sub(r'if not ([\w.]+) in ([\w.]+):',
  809. r'if \1 not in \2:',
  810. target,
  811. count=1)
  812. def fix_w291(self, result):
  813. """Remove trailing whitespace."""
  814. fixed_line = self.source[result['line'] - 1].rstrip()
  815. self.source[result['line'] - 1] = fixed_line + '\n'
  816. def fix_w391(self, _):
  817. """Remove trailing blank lines."""
  818. blank_count = 0
  819. for line in reversed(self.source):
  820. line = line.rstrip()
  821. if line:
  822. break
  823. else:
  824. blank_count += 1
  825. original_length = len(self.source)
  826. self.source = self.source[:original_length - blank_count]
  827. return range(1, 1 + original_length)
  828. def get_index_offset_contents(result, source):
  829. """Return (line_index, column_offset, line_contents)."""
  830. line_index = result['line'] - 1
  831. return (line_index,
  832. result['column'] - 1,
  833. source[line_index])
  834. def get_fixed_long_line(target, previous_line, original,
  835. indent_word=' ', max_line_length=79,
  836. aggressive=False, experimental=False, verbose=False):
  837. """Break up long line and return result.
  838. Do this by generating multiple reformatted candidates and then
  839. ranking the candidates to heuristically select the best option.
  840. """
  841. indent = _get_indentation(target)
  842. source = target[len(indent):]
  843. assert source.lstrip() == source
  844. # Check for partial multiline.
  845. tokens = list(generate_tokens(source))
  846. candidates = shorten_line(
  847. tokens, source, indent,
  848. indent_word,
  849. max_line_length,
  850. aggressive=aggressive,
  851. experimental=experimental,
  852. previous_line=previous_line)
  853. # Also sort alphabetically as a tie breaker (for determinism).
  854. candidates = sorted(
  855. sorted(set(candidates).union([target, original])),
  856. key=lambda x: line_shortening_rank(
  857. x,
  858. indent_word,
  859. max_line_length,
  860. experimental=experimental))
  861. if verbose >= 4:
  862. print(('-' * 79 + '\n').join([''] + candidates + ['']),
  863. file=wrap_output(sys.stderr, 'utf-8'))
  864. if candidates:
  865. best_candidate = candidates[0]
  866. # Don't allow things to get longer.
  867. if longest_line_length(best_candidate) > longest_line_length(original):
  868. return None
  869. else:
  870. return best_candidate
  871. def longest_line_length(code):
  872. """Return length of longest line."""
  873. return max(len(line) for line in code.splitlines())
  874. def join_logical_line(logical_line):
  875. """Return single line based on logical line input."""
  876. indentation = _get_indentation(logical_line)
  877. return indentation + untokenize_without_newlines(
  878. generate_tokens(logical_line.lstrip())) + '\n'
  879. def untokenize_without_newlines(tokens):
  880. """Return source code based on tokens."""
  881. text = ''
  882. last_row = 0
  883. last_column = -1
  884. for t in tokens:
  885. token_string = t[1]
  886. (start_row, start_column) = t[2]
  887. (end_row, end_column) = t[3]
  888. if start_row > last_row:
  889. last_column = 0
  890. if (
  891. (start_column > last_column or token_string == '\n') and
  892. not text.endswith(' ')
  893. ):
  894. text += ' '
  895. if token_string != '\n':
  896. text += token_string
  897. last_row = end_row
  898. last_column = end_column
  899. return text.rstrip()
  900. def _find_logical(source_lines):
  901. # Make a variable which is the index of all the starts of lines.
  902. logical_start = []
  903. logical_end = []
  904. last_newline = True
  905. parens = 0
  906. for t in generate_tokens(''.join(source_lines)):
  907. if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
  908. tokenize.INDENT, tokenize.NL,
  909. tokenize.ENDMARKER]:
  910. continue
  911. if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
  912. last_newline = True
  913. logical_end.append((t[3][0] - 1, t[2][1]))
  914. continue
  915. if last_newline and not parens:
  916. logical_start.append((t[2][0] - 1, t[2][1]))
  917. last_newline = False
  918. if t[0] == tokenize.OP:
  919. if t[1] in '([{':
  920. parens += 1
  921. elif t[1] in '}])':
  922. parens -= 1
  923. return (logical_start, logical_end)
  924. def _get_logical(source_lines, result, logical_start, logical_end):
  925. """Return the logical line corresponding to the result.
  926. Assumes input is already E702-clean.
  927. """
  928. row = result['line'] - 1
  929. col = result['column'] - 1
  930. ls = None
  931. le = None
  932. for i in range(0, len(logical_start), 1):
  933. assert logical_end
  934. x = logical_end[i]
  935. if x[0] > row or (x[0] == row and x[1] > col):
  936. le = x
  937. ls = logical_start[i]
  938. break
  939. if ls is None:
  940. return None
  941. original = source_lines[ls[0]:le[0] + 1]
  942. return ls, le, original
  943. def get_item(items, index, default=None):
  944. if 0 <= index < len(items):
  945. return items[index]
  946. else:
  947. return default
  948. def reindent(source, indent_size):
  949. """Reindent all lines."""
  950. reindenter = Reindenter(source)
  951. return reindenter.run(indent_size)
  952. def code_almost_equal(a, b):
  953. """Return True if code is similar.
  954. Ignore whitespace when comparing specific line.
  955. """
  956. split_a = split_and_strip_non_empty_lines(a)
  957. split_b = split_and_strip_non_empty_lines(b)
  958. if len(split_a) != len(split_b):
  959. return False
  960. for index in range(len(split_a)):
  961. if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
  962. return False
  963. return True
  964. def split_and_strip_non_empty_lines(text):
  965. """Return lines split by newline.
  966. Ignore empty lines.
  967. """
  968. return [line.strip() for line in text.splitlines() if line.strip()]
  969. def fix_e265(source, aggressive=False): # pylint: disable=unused-argument
  970. """Format block comments."""
  971. if '#' not in source:
  972. # Optimization.
  973. return source
  974. ignored_line_numbers = multiline_string_lines(
  975. source,
  976. include_docstrings=True) | set(commented_out_code_lines(source))
  977. fixed_lines = []
  978. sio = io.StringIO(source)
  979. for (line_number, line) in enumerate(sio.readlines(), start=1):
  980. if (
  981. line.lstrip().startswith('#') and
  982. line_number not in ignored_line_numbers
  983. ):
  984. indentation = _get_indentation(line)
  985. line = line.lstrip()
  986. # Normalize beginning if not a shebang.
  987. if len(line) > 1:
  988. pos = next((index for index, c in enumerate(line)
  989. if c != '#'))
  990. if (
  991. # Leave multiple spaces like '# ' alone.
  992. (line[:pos].count('#') > 1 or line[1].isalnum()) and
  993. # Leave stylistic outlined blocks alone.
  994. not line.rstrip().endswith('#')
  995. ):
  996. line = '# ' + line.lstrip('# \t')
  997. fixed_lines.append(indentation + line)
  998. else:
  999. fixed_lines.append(line)
  1000. return ''.join(fixed_lines)
  1001. def refactor(source, fixer_names, ignore=None, filename=''):
  1002. """Return refactored code using lib2to3.
  1003. Skip if ignore string is produced in the refactored code.
  1004. """
  1005. from lib2to3 import pgen2
  1006. try:
  1007. new_text = refactor_with_2to3(source,
  1008. fixer_names=fixer_names,
  1009. filename=filename)
  1010. except (pgen2.parse.ParseError,
  1011. SyntaxError,
  1012. UnicodeDecodeError,
  1013. UnicodeEncodeError):
  1014. return source
  1015. if ignore:
  1016. if ignore in new_text and ignore not in source:
  1017. return source
  1018. return new_text
  1019. def code_to_2to3(select, ignore):
  1020. fixes = set()
  1021. for code, fix in CODE_TO_2TO3.items():
  1022. if code_match(code, select=select, ignore=ignore):
  1023. fixes |= set(fix)
  1024. return fixes
  1025. def fix_2to3(source,
  1026. aggressive=True, select=None, ignore=None, filename=''):
  1027. """Fix various deprecated code (via lib2to3)."""
  1028. if not aggressive:
  1029. return source
  1030. select = select or []
  1031. ignore = ignore or []
  1032. return refactor(source,
  1033. code_to_2to3(select=select,
  1034. ignore=ignore),
  1035. filename=filename)
  1036. def fix_w602(source, aggressive=True):
  1037. """Fix deprecated form of raising exception."""
  1038. if not aggressive:
  1039. return source
  1040. return refactor(source, ['raise'],
  1041. ignore='with_traceback')
  1042. def find_newline(source):
  1043. """Return type of newline used in source.
  1044. Input is a list of lines.
  1045. """
  1046. assert not isinstance(source, unicode)
  1047. counter = collections.defaultdict(int)
  1048. for line in source:
  1049. if line.endswith(CRLF):
  1050. counter[CRLF] += 1
  1051. elif line.endswith(CR):
  1052. counter[CR] += 1
  1053. elif line.endswith(LF):
  1054. counter[LF] += 1
  1055. return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
  1056. def _get_indentword(source):
  1057. """Return indentation type."""
  1058. indent_word = ' ' # Default in case source has no indentation
  1059. try:
  1060. for t in generate_tokens(source):
  1061. if t[0] == token.INDENT:
  1062. indent_word = t[1]
  1063. break
  1064. except (SyntaxError, tokenize.TokenError):
  1065. pass
  1066. return indent_word
  1067. def _get_indentation(line):
  1068. """Return leading whitespace."""
  1069. if line.strip():
  1070. non_whitespace_index = len(line) - len(line.lstrip())
  1071. return line[:non_whitespace_index]
  1072. else:
  1073. return ''
  1074. def get_diff_text(old, new, filename):
  1075. """Return text of unified diff between old and new."""
  1076. newline = '\n'
  1077. diff = difflib.unified_diff(
  1078. old, new,
  1079. 'original/' + filename,
  1080. 'fixed/' + filename,
  1081. lineterm=newline)
  1082. text = ''
  1083. for line in diff:
  1084. text += line
  1085. # Work around missing newline (http://bugs.python.org/issue2142).
  1086. if text and not line.endswith(newline):
  1087. text += newline + r'\ No newline at end of file' + newline
  1088. return text
  1089. def _priority_key(pep8_result):
  1090. """Key for sorting PEP8 results.
  1091. Global fixes should be done first. This is important for things like
  1092. indentation.
  1093. """
  1094. priority = [
  1095. # Fix multiline colon-based before semicolon based.
  1096. 'e701',
  1097. # Break multiline statements early.
  1098. 'e702',
  1099. # Things that make lines longer.
  1100. 'e225', 'e231',
  1101. # Remove extraneous whitespace before breaking lines.
  1102. 'e201',
  1103. # Shorten whitespace in comment before resorting to wrapping.
  1104. 'e262'
  1105. ]
  1106. middle_index = 10000
  1107. lowest_priority = [
  1108. # We need to shorten lines last since the logical fixer can get in a
  1109. # loop, which causes us to exit early.
  1110. 'e501'
  1111. ]
  1112. key = pep8_result['id'].lower()
  1113. try:
  1114. return priority.index(key)
  1115. except ValueError:
  1116. try:
  1117. return middle_index + lowest_priority.index(key) + 1
  1118. except ValueError:
  1119. return middle_index
  1120. def shorten_line(tokens, source, indentation, indent_word, max_line_length,
  1121. aggressive=False, experimental=False, previous_line=''):
  1122. """Separate line at OPERATOR.
  1123. Multiple candidates will be yielded.
  1124. """
  1125. for candidate in _shorten_line(tokens=tokens,
  1126. source=source,
  1127. indentation=indentation,
  1128. indent_word=indent_word,
  1129. aggressive=aggressive,
  1130. previous_line=previous_line):
  1131. yield candidate
  1132. if aggressive:
  1133. for key_token_strings in SHORTEN_OPERATOR_GROUPS:
  1134. shortened = _shorten_line_at_tokens(
  1135. tokens=tokens,
  1136. source=source,
  1137. indentation=indentation,
  1138. indent_word=indent_word,
  1139. key_token_strings=key_token_strings,
  1140. aggressive=aggressive)
  1141. if shortened is not None and shortened != source:
  1142. yield shortened
  1143. if experimental:
  1144. for shortened in _shorten_line_at_tokens_new(
  1145. tokens=tokens,
  1146. source=source,
  1147. indentation=indentation,
  1148. max_line_length=max_line_length):
  1149. yield shortened
  1150. def _shorten_line(tokens, source, indentation, indent_word,
  1151. aggressive=False, previous_line=''):
  1152. """Separate line at OPERATOR.
  1153. The input is expected to be free of newlines except for inside multiline
  1154. strings and at the end.
  1155. Multiple candidates will be yielded.
  1156. """
  1157. for (token_type,
  1158. token_string,
  1159. start_offset,
  1160. end_offset) in token_offsets(tokens):
  1161. if (
  1162. token_type == tokenize.COMMENT and
  1163. not is_probably_part_of_multiline(previous_line) and
  1164. not is_probably_part_of_multiline(source) and
  1165. not source[start_offset + 1:].strip().lower().startswith(
  1166. ('noqa', 'pragma:', 'pylint:'))
  1167. ):
  1168. # Move inline comments to previous line.
  1169. first = source[:start_offset]
  1170. second = source[start_offset:]
  1171. yield (indentation + second.strip() + '\n' +
  1172. indentation + first.strip() + '\n')
  1173. elif token_type == token.OP and token_string != '=':
  1174. # Don't break on '=' after keyword as this violates PEP 8.
  1175. assert token_type != token.INDENT
  1176. first = source[:end_offset]
  1177. second_indent = indentation
  1178. if first.rstrip().endswith('('):
  1179. second_indent += indent_word
  1180. elif '(' in first:
  1181. second_indent += ' ' * (1 + first.find('('))
  1182. else:
  1183. second_indent += indent_word
  1184. second = (second_indent + source[end_offset:].lstrip())
  1185. if (
  1186. not second.strip() or
  1187. second.lstrip().startswith('#')
  1188. ):
  1189. continue
  1190. # Do not begin a line with a comma
  1191. if second.lstrip().startswith(','):
  1192. continue
  1193. # Do end a line with a dot
  1194. if first.rstrip().endswith('.'):
  1195. continue
  1196. if token_string in '+-*/':
  1197. fixed = first + ' \\' + '\n' + second
  1198. else:
  1199. fixed = first + '\n' + second