/howie/aiml/WordSub.py

https://github.com/ghedsouza/Enigma · Python · 95 lines · 64 code · 11 blank · 20 comment · 10 complexity · 81de602520248ecbc1f3b8f8d40300a1 MD5 · raw file

  1. """This module implements the WordSub class, modelled after a recipe
  2. in "Python Cookbook" (Recipe 3.14, "Replacing Multiple Patterns in a
  3. Single Pass" by Xavier Defrang).
  4. Usage:
  5. Use this class like a dictionary to add before/after pairs:
  6. > subber = TextSub()
  7. > subber["before"] = "after"
  8. > subber["begin"] = "end"
  9. Use the sub() method to perform the substitution:
  10. > print subber.sub("before we begin")
  11. after we end
  12. All matching is intelligently case-insensitive:
  13. > print subber.sub("Before we BEGIN")
  14. After we END
  15. The 'before' words must be complete words -- no prefixes.
  16. The following example illustrates this point:
  17. > subber["he"] = "she"
  18. > print subber.sub("he says he'd like to help her")
  19. she says she'd like to help her
  20. Note that "he" and "he'd" were replaced, but "help" and "her" were
  21. not.
  22. """
  23. # 'dict' objects weren't available to subclass from until version 2.2.
  24. # Get around this by importing UserDict.UserDict if the built-in dict
  25. # object isn't available.
  26. try: dict
  27. except: from UserDict import UserDict as dict
  28. import ConfigParser
  29. import re
  30. import string
  31. class WordSub(dict):
  32. """All-in-one multiple-string-substitution class."""
  33. def _wordToRegex(self, word):
  34. """Convert a word to a regex object which matches the word."""
  35. return r"\b%s\b" % re.escape(word)
  36. def _update_regex(self):
  37. """Build re object based on the keys of the current
  38. dictionary.
  39. """
  40. self._regex = re.compile("|".join(map(self._wordToRegex, self.keys())))
  41. self._regexIsDirty = False
  42. def __init__(self, defaults = {}):
  43. """Initialize the object, and populate it with the entries in
  44. the defaults dictionary.
  45. """
  46. self._regex = None
  47. self._regexIsDirty = True
  48. for k,v in defaults.items():
  49. self[k] = v
  50. def __call__(self, match):
  51. """Handler invoked for each regex match."""
  52. return self[match.group(0)]
  53. def __setitem__(self, i, y):
  54. self._regexIsDirty = True
  55. # for each entry the user adds, we actually add three entrys:
  56. super(type(self),self).__setitem__(string.lower(i),string.lower(y)) # key = value
  57. super(type(self),self).__setitem__(string.capwords(i), string.capwords(y)) # Key = Value
  58. super(type(self),self).__setitem__(string.upper(i), string.upper(y)) # KEY = VALUE
  59. def sub(self, text):
  60. """Translate text, returns the modified text."""
  61. if self._regexIsDirty:
  62. self._update_regex()
  63. return self._regex.sub(self, text)
  64. # self-test
  65. if __name__ == "__main__":
  66. subber = WordSub()
  67. subber["apple"] = "banana"
  68. subber["orange"] = "pear"
  69. subber["banana" ] = "apple"
  70. subber["he"] = "she"
  71. subber["I'd"] = "I would"
  72. # test case insensitivity
  73. inStr = "I'd like one apple, one Orange and one BANANA."
  74. outStr = "I Would like one banana, one Pear and one APPLE."
  75. if subber.sub(inStr) == outStr: print "Test #1 PASSED"
  76. else: print "Test #1 FAILED: '%s'" % subber.sub(inStr)
  77. inStr = "He said he'd like to go with me"
  78. outStr = "She said she'd like to go with me"
  79. if subber.sub(inStr) == outStr: print "Test #2 PASSED"
  80. else: print "Test #2 FAILED: '%s'" % subber.sub(inStr)