PageRenderTime 46ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/nltk/help.py

https://github.com/BrucePHill/nltk
Python | 56 lines | 34 code | 10 blank | 12 comment | 9 complexity | 1f355a7c8c9a347e5ae7c87bdf74d1cd MD5 | raw file
Possible License(s): Apache-2.0
  1. # Natural Language Toolkit (NLTK) Help
  2. #
  3. # Copyright (C) 2001-2013 NLTK Project
  4. # Authors: Steven Bird <stevenbird1@gmail.com>
  5. # URL: <http://www.nltk.org/>
  6. # For license information, see LICENSE.TXT
  7. """
  8. Provide structured access to documentation.
  9. """
  10. from __future__ import print_function
  11. import re
  12. from textwrap import wrap
  13. from nltk.data import load
  14. def brown_tagset(tagpattern=None):
  15. _format_tagset("brown_tagset", tagpattern)
  16. def claws5_tagset(tagpattern=None):
  17. _format_tagset("claws5_tagset", tagpattern)
  18. def upenn_tagset(tagpattern=None):
  19. _format_tagset("upenn_tagset", tagpattern)
  20. #####################################################################
  21. # UTILITIES
  22. #####################################################################
  23. def _print_entries(tags, tagdict):
  24. for tag in tags:
  25. entry = tagdict[tag]
  26. defn = [tag + ": " + entry[0]]
  27. examples = wrap(entry[1], width=75, initial_indent=' ', subsequent_indent=' ')
  28. print("\n".join(defn + examples))
  29. def _format_tagset(tagset, tagpattern=None):
  30. tagdict = load("help/tagsets/" + tagset + ".pickle")
  31. if not tagpattern:
  32. _print_entries(sorted(tagdict), tagdict)
  33. elif tagpattern in tagdict:
  34. _print_entries([tagpattern], tagdict)
  35. else:
  36. tagpattern = re.compile(tagpattern)
  37. tags = [tag for tag in sorted(tagdict) if tagpattern.match(tag)]
  38. if tags:
  39. _print_entries(tags, tagdict)
  40. else:
  41. print("No matching tags found.")
  42. if __name__ == '__main__':
  43. brown_tagset(r'NN.*')
  44. upenn_tagset(r'.*\$')
  45. claws5_tagset('UNDEFINED')
  46. brown_tagset(r'NN')