PageRenderTime 73ms CodeModel.GetById 24ms RepoModel.GetById 2ms app.codeStats 0ms

/tools/find_deprecated.py

https://github.com/haewoon/nltk
Python | 224 lines | 171 code | 15 blank | 38 comment | 25 complexity | 9cfd50f4b1250be1a2bfcf853f1e3a31 MD5 | raw file
Possible License(s): Apache-2.0
  1. #!/usr/bin/env python
  2. #
  3. ## Natural Language Toolkit: Deprecated Function & Class Finder
  4. #
  5. # Copyright (C) 2001-2012 NLTK Project
  6. # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
  7. # URL: <http://www.nltk.org/>
  8. # For license information, see LICENSE.TXT
  9. """
  10. This command-line tool takes a list of python files or directories,
  11. and searches them for calls to deprecated NLTK functions, or uses of
  12. deprecated NLTK classes. For each use of a deprecated object it
  13. finds, it will print out a warning containing the offending line, as
  14. well as its line number and containing file name. If the terminal has
  15. color support (and if epydoc is installed), then the offending
  16. identifier will be highlighted in red.
  17. """
  18. ######################################################################
  19. # Imports
  20. ######################################################################
  21. import os, re, sys, tokenize, textwrap
  22. import nltk, nltk.corpus
  23. from doctest import DocTestParser, register_optionflag
  24. from cStringIO import StringIO
  25. from nltk import defaultdict
  26. ######################################################################
  27. # Regexps
  28. ######################################################################
  29. #: A little over-simplified, but it'll do.
  30. STRING_PAT = (r'\s*[ur]{0,2}(?:'
  31. '"""[\s\S]*?"""|' '"[^"\n]+?"|'
  32. "'''[\s\S]*?'''|" "'[^'\n]+?'" ")\s*")
  33. STRING_RE = re.compile(STRING_PAT)
  34. STRINGS_PAT = '%s(?:[+]?%s)*' % (STRING_PAT, STRING_PAT)
  35. STRINGS_RE = re.compile(STRINGS_PAT)
  36. # Define a regexp to search for deprecated definitions.
  37. DEPRECATED_DEF_PAT = (
  38. r'^\s*@deprecated\s*\(\s*(%s)\s*\)\s*\n+' % STRINGS_PAT +
  39. r'\s*def\s*(\w+).*' +
  40. r'|' +
  41. r'^\s*class\s+(\w+)\s*\(.*Deprecated.*\):\s*')
  42. DEPRECATED_DEF_RE = re.compile(DEPRECATED_DEF_PAT, re.MULTILINE)
  43. CORPUS_READ_METHOD_RE = re.compile(
  44. '(%s)\.read\(' % ('|'.join(re.escape(n) for n in dir(nltk.corpus))))
  45. CLASS_DEF_RE = re.compile('^\s*class\s+(\w+)\s*[:\(]')
  46. ######################################################################
  47. # Globals
  48. ######################################################################
  49. # Yes, it's bad programming practice, but this is a little hack
  50. # script. :) These get initialized by find_deprecated_defs.
  51. deprecated_funcs = defaultdict(set)
  52. deprecated_classes = defaultdict(set)
  53. deprecated_methods = defaultdict(set)
  54. try:
  55. from epydoc.cli import TerminalController
  56. except ImportError:
  57. class TerminalController:
  58. def __getattr__(self, attr): return ''
  59. term = TerminalController()
  60. ######################################################################
  61. # Code
  62. ######################################################################
  63. # If we're using py24, then ignore the +SKIP directive.
  64. if sys.version_info[:2] < (2,5): register_optionflag('SKIP')
  65. def strip_quotes(s):
  66. s = s.strip()
  67. while (s and (s[0] in "ur") and (s[-1] in "'\"")):
  68. s = s[1:]
  69. while (s and (s[0] in "'\"" and (s[0] == s[-1]))):
  70. s = s[1:-1]
  71. s = s.strip()
  72. return s
  73. def find_class(s, index):
  74. lines = s[:index].split('\n')
  75. while lines:
  76. m = CLASS_DEF_RE.match(lines[-1])
  77. if m: return m.group(1)+'.'
  78. lines.pop()
  79. return '?.'
  80. def find_deprecated_defs(pkg_dir):
  81. """
  82. Return a list of all functions marked with the @deprecated
  83. decorator, and classes with an immediate Deprecated base class, in
  84. all Python files in the given directory.
  85. """
  86. # Walk through the directory, finding python files.
  87. for root, dirs, files in os.walk(pkg_dir):
  88. for filename in files:
  89. if filename.endswith('.py'):
  90. # Search the file for any deprecated definitions.
  91. s = open(os.path.join(root, filename)).read()
  92. for m in DEPRECATED_DEF_RE.finditer(s):
  93. if m.group(2):
  94. name = m.group(2)
  95. msg = ' '.join(strip_quotes(s) for s in
  96. STRING_RE.findall(m.group(1)))
  97. msg = ' '.join(msg.split())
  98. if m.group()[0] in ' \t':
  99. cls = find_class(s, m.start())
  100. deprecated_methods[name].add( (msg, cls, '()') )
  101. else:
  102. deprecated_funcs[name].add( (msg, '', '()') )
  103. else:
  104. name = m.group(3)
  105. m2 = STRING_RE.match(s, m.end())
  106. if m2: msg = strip_quotes(m2.group())
  107. else: msg = ''
  108. msg = ' '.join(msg.split())
  109. deprecated_classes[name].add( (msg, '', ''))
  110. def print_deprecated_uses(paths):
  111. dep_names = set()
  112. dep_files = set()
  113. for path in sorted(paths):
  114. if os.path.isdir(path):
  115. dep_names.update(print_deprecated_uses(
  116. [os.path.join(path,f) for f in os.listdir(path)]))
  117. elif path.endswith('.py'):
  118. print_deprecated_uses_in(open(path).readline, path,
  119. dep_files, dep_names, 0)
  120. elif path.endswith('.doctest') or path.endswith('.txt'):
  121. for example in DocTestParser().get_examples(open(path).read()):
  122. ex = StringIO(example.source)
  123. try:
  124. print_deprecated_uses_in(ex.readline, path, dep_files,
  125. dep_names, example.lineno)
  126. except tokenize.TokenError:
  127. print (term.RED + 'Caught TokenError -- '
  128. 'malformatted doctest?' + term.NORMAL)
  129. return dep_names
  130. def print_deprecated_uses_in(readline, path, dep_files, dep_names,
  131. lineno_offset):
  132. tokiter = tokenize.generate_tokens(readline)
  133. context = ['']
  134. for (typ, tok, start, end, line) in tokiter:
  135. # Remember the previous line -- it might contain
  136. # the @deprecated decorator.
  137. if line is not context[-1]:
  138. context.append(line)
  139. if len(context) > 10: del context[0]
  140. esctok = re.escape(tok)
  141. # Ignore all tokens except deprecated names.
  142. if not (tok in deprecated_classes or
  143. (tok in deprecated_funcs and
  144. re.search(r'\b%s\s*\(' % esctok, line)) or
  145. (tok in deprecated_methods and
  146. re.search(r'(?!<\bself)[.]\s*%s\s*\(' % esctok, line))):
  147. continue
  148. # Hack: only complain about read if it's used after a corpus.
  149. if tok == 'read' and not CORPUS_READ_METHOD_RE.search(line):
  150. continue
  151. # Ignore deprecated definitions:
  152. if DEPRECATED_DEF_RE.search(''.join(context)):
  153. continue
  154. # Print a header for the first use in a file:
  155. if path not in dep_files:
  156. print '\n'+term.BOLD + path + term.NORMAL
  157. print ' %slinenum%s' % (term.YELLOW, term.NORMAL)
  158. dep_files.add(path)
  159. # Mark the offending token.
  160. dep_names.add(tok)
  161. if term.RED: sub = term.RED+tok+term.NORMAL
  162. elif term.BOLD: sub = term.BOLD+tok+term.NORMAL
  163. else: sub = '<<'+tok+'>>'
  164. line = re.sub(r'\b%s\b' % esctok, sub, line)
  165. # Print the offending line.
  166. print ' %s[%5d]%s %s' % (term.YELLOW, start[0]+lineno_offset,
  167. term.NORMAL, line.rstrip())
  168. def main():
  169. paths = sys.argv[1:] or ['.']
  170. print 'Importing nltk...'
  171. try:
  172. import nltk
  173. except ImportError:
  174. print 'Unable to import nltk -- check your PYTHONPATH.'
  175. sys.exit(-1)
  176. print 'Finding definitions of deprecated funtions & classes in nltk...'
  177. find_deprecated_defs(nltk.__path__[0])
  178. print 'Looking for possible uses of deprecated funcs & classes...'
  179. dep_names = print_deprecated_uses(paths)
  180. if not dep_names:
  181. print 'No deprecated funcs or classes found!'
  182. else:
  183. print "\n"+term.BOLD+"What you should use instead:"+term.NORMAL
  184. for name in sorted(dep_names):
  185. msgs = deprecated_funcs[name].union(
  186. deprecated_classes[name]).union(
  187. deprecated_methods[name])
  188. for msg, prefix, suffix in msgs:
  189. print textwrap.fill(term.RED+prefix+name+suffix+
  190. term.NORMAL+': '+msg,
  191. width=75, initial_indent=' '*2,
  192. subsequent_indent=' '*6)
  193. if __name__ == '__main__':
  194. main()