PageRenderTime 47ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/tools/find_deprecated.py

https://github.com/BrucePHill/nltk
Python | 226 lines | 172 code | 16 blank | 38 comment | 25 complexity | d36d548282ec51d8714a5443b4920d08 MD5 | raw file
Possible License(s): Apache-2.0
  1. #!/usr/bin/env python
  2. #
  3. ## Natural Language Toolkit: Deprecated Function & Class Finder
  4. #
  5. # Copyright (C) 2001-2013 NLTK Project
  6. # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
  7. # URL: <http://www.nltk.org/>
  8. # For license information, see LICENSE.TXT
  9. from __future__ import print_statement
  10. """
  11. This command-line tool takes a list of python files or directories,
  12. and searches them for calls to deprecated NLTK functions, or uses of
  13. deprecated NLTK classes. For each use of a deprecated object it
  14. finds, it will print out a warning containing the offending line, as
  15. well as its line number and containing file name. If the terminal has
  16. color support (and if epydoc is installed), then the offending
  17. identifier will be highlighted in red.
  18. """
  19. ######################################################################
  20. # Imports
  21. ######################################################################
  22. import os, re, sys, tokenize, textwrap
  23. import nltk, nltk.corpus
  24. from doctest import DocTestParser, register_optionflag
  25. from cStringIO import StringIO
  26. from nltk import defaultdict
  27. ######################################################################
  28. # Regexps
  29. ######################################################################
  30. #: A little over-simplified, but it'll do.
  31. STRING_PAT = (r'\s*[ur]{0,2}(?:'
  32. '"""[\s\S]*?"""|' '"[^"\n]+?"|'
  33. "'''[\s\S]*?'''|" "'[^'\n]+?'" ")\s*")
  34. STRING_RE = re.compile(STRING_PAT)
  35. STRINGS_PAT = '%s(?:[+]?%s)*' % (STRING_PAT, STRING_PAT)
  36. STRINGS_RE = re.compile(STRINGS_PAT)
  37. # Define a regexp to search for deprecated definitions.
  38. DEPRECATED_DEF_PAT = (
  39. r'^\s*@deprecated\s*\(\s*(%s)\s*\)\s*\n+' % STRINGS_PAT +
  40. r'\s*def\s*(\w+).*' +
  41. r'|' +
  42. r'^\s*class\s+(\w+)\s*\(.*Deprecated.*\):\s*')
  43. DEPRECATED_DEF_RE = re.compile(DEPRECATED_DEF_PAT, re.MULTILINE)
  44. CORPUS_READ_METHOD_RE = re.compile(
  45. '(%s)\.read\(' % ('|'.join(re.escape(n) for n in dir(nltk.corpus))))
  46. CLASS_DEF_RE = re.compile('^\s*class\s+(\w+)\s*[:\(]')
  47. ######################################################################
  48. # Globals
  49. ######################################################################
  50. # Yes, it's bad programming practice, but this is a little hack
  51. # script. :) These get initialized by find_deprecated_defs.
  52. deprecated_funcs = defaultdict(set)
  53. deprecated_classes = defaultdict(set)
  54. deprecated_methods = defaultdict(set)
  55. try:
  56. from epydoc.cli import TerminalController
  57. except ImportError:
  58. class TerminalController:
  59. def __getattr__(self, attr): return ''
  60. term = TerminalController()
  61. ######################################################################
  62. # Code
  63. ######################################################################
  64. # If we're using py24, then ignore the +SKIP directive.
  65. if sys.version_info[:2] < (2,5): register_optionflag('SKIP')
  66. def strip_quotes(s):
  67. s = s.strip()
  68. while (s and (s[0] in "ur") and (s[-1] in "'\"")):
  69. s = s[1:]
  70. while (s and (s[0] in "'\"" and (s[0] == s[-1]))):
  71. s = s[1:-1]
  72. s = s.strip()
  73. return s
  74. def find_class(s, index):
  75. lines = s[:index].split('\n')
  76. while lines:
  77. m = CLASS_DEF_RE.match(lines[-1])
  78. if m: return m.group(1)+'.'
  79. lines.pop()
  80. return '?.'
  81. def find_deprecated_defs(pkg_dir):
  82. """
  83. Return a list of all functions marked with the @deprecated
  84. decorator, and classes with an immediate Deprecated base class, in
  85. all Python files in the given directory.
  86. """
  87. # Walk through the directory, finding python files.
  88. for root, dirs, files in os.walk(pkg_dir):
  89. for filename in files:
  90. if filename.endswith('.py'):
  91. # Search the file for any deprecated definitions.
  92. s = open(os.path.join(root, filename)).read()
  93. for m in DEPRECATED_DEF_RE.finditer(s):
  94. if m.group(2):
  95. name = m.group(2)
  96. msg = ' '.join(strip_quotes(s) for s in
  97. STRING_RE.findall(m.group(1)))
  98. msg = ' '.join(msg.split())
  99. if m.group()[0] in ' \t':
  100. cls = find_class(s, m.start())
  101. deprecated_methods[name].add( (msg, cls, '()') )
  102. else:
  103. deprecated_funcs[name].add( (msg, '', '()') )
  104. else:
  105. name = m.group(3)
  106. m2 = STRING_RE.match(s, m.end())
  107. if m2: msg = strip_quotes(m2.group())
  108. else: msg = ''
  109. msg = ' '.join(msg.split())
  110. deprecated_classes[name].add( (msg, '', ''))
  111. def print_deprecated_uses(paths):
  112. dep_names = set()
  113. dep_files = set()
  114. for path in sorted(paths):
  115. if os.path.isdir(path):
  116. dep_names.update(print_deprecated_uses(
  117. [os.path.join(path,f) for f in os.listdir(path)]))
  118. elif path.endswith('.py'):
  119. print_deprecated_uses_in(open(path).readline, path,
  120. dep_files, dep_names, 0)
  121. elif path.endswith('.doctest') or path.endswith('.txt'):
  122. for example in DocTestParser().get_examples(open(path).read()):
  123. ex = StringIO(example.source)
  124. try:
  125. print_deprecated_uses_in(ex.readline, path, dep_files,
  126. dep_names, example.lineno)
  127. except tokenize.TokenError:
  128. print(term.RED + 'Caught TokenError -- '
  129. 'malformatted doctest?' + term.NORMAL)
  130. return dep_names
  131. def print_deprecated_uses_in(readline, path, dep_files, dep_names,
  132. lineno_offset):
  133. tokiter = tokenize.generate_tokens(readline)
  134. context = ['']
  135. for (typ, tok, start, end, line) in tokiter:
  136. # Remember the previous line -- it might contain
  137. # the @deprecated decorator.
  138. if line is not context[-1]:
  139. context.append(line)
  140. if len(context) > 10: del context[0]
  141. esctok = re.escape(tok)
  142. # Ignore all tokens except deprecated names.
  143. if not (tok in deprecated_classes or
  144. (tok in deprecated_funcs and
  145. re.search(r'\b%s\s*\(' % esctok, line)) or
  146. (tok in deprecated_methods and
  147. re.search(r'(?!<\bself)[.]\s*%s\s*\(' % esctok, line))):
  148. continue
  149. # Hack: only complain about read if it's used after a corpus.
  150. if tok == 'read' and not CORPUS_READ_METHOD_RE.search(line):
  151. continue
  152. # Ignore deprecated definitions:
  153. if DEPRECATED_DEF_RE.search(''.join(context)):
  154. continue
  155. # Print a header for the first use in a file:
  156. if path not in dep_files:
  157. print('\n'+term.BOLD + path + term.NORMAL)
  158. print(' %slinenum%s' % (term.YELLOW, term.NORMAL))
  159. dep_files.add(path)
  160. # Mark the offending token.
  161. dep_names.add(tok)
  162. if term.RED: sub = term.RED+tok+term.NORMAL
  163. elif term.BOLD: sub = term.BOLD+tok+term.NORMAL
  164. else: sub = '<<'+tok+'>>'
  165. line = re.sub(r'\b%s\b' % esctok, sub, line)
  166. # Print the offending line.
  167. print(' %s[%5d]%s %s' % (term.YELLOW, start[0]+lineno_offset,
  168. term.NORMAL, line.rstrip()))
  169. def main():
  170. paths = sys.argv[1:] or ['.']
  171. print('Importing nltk...')
  172. try:
  173. import nltk
  174. except ImportError:
  175. print('Unable to import nltk -- check your PYTHONPATH.')
  176. sys.exit(-1)
  177. print('Finding definitions of deprecated funtions & classes in nltk...')
  178. find_deprecated_defs(nltk.__path__[0])
  179. print('Looking for possible uses of deprecated funcs & classes...')
  180. dep_names = print_deprecated_uses(paths)
  181. if not dep_names:
  182. print('No deprecated funcs or classes found!')
  183. else:
  184. print("\n"+term.BOLD+"What you should use instead:"+term.NORMAL)
  185. for name in sorted(dep_names):
  186. msgs = deprecated_funcs[name].union(
  187. deprecated_classes[name]).union(
  188. deprecated_methods[name])
  189. for msg, prefix, suffix in msgs:
  190. print(textwrap.fill(term.RED+prefix+name+suffix+)
  191. term.NORMAL+': '+msg,
  192. width=75, initial_indent=' '*2,
  193. subsequent_indent=' '*6)
  194. if __name__ == '__main__':
  195. main()