PageRenderTime 45ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/python/dict.py

http://github.com/offby1/anagrams
Python | 88 lines | 71 code | 16 blank | 1 comment | 11 complexity | 19b4fe7fdce3ea87808092d5efdbe939 MD5 | raw file
  1. #!/usr/bin/env python
  2. import StringIO
  3. import string
  4. import re
  5. import sys
  6. import cPickle
  7. import os
  8. import unittest
  9. from stat import *
  10. from bag import bag, bag_empty, bags_equal, subtract_bags
  11. has_a_vowel_re = re.compile (r'[aeiouy]')
  12. long_enough_re = re.compile (r'^i$|^a$|^..')
  13. non_letter_re = re.compile (r'[^a-z]')
  14. def word_acceptable(w):
  15. if non_letter_re.search (w):
  16. return False
  17. if (not long_enough_re.match (w)):
  18. return False
  19. if (not has_a_vowel_re.search (w)):
  20. return False
  21. return True
  22. default_dict_name =os.path.join(os.path.dirname(__file__), "../words.utf8")
  23. def snarf_dictionary_from_IO (I):
  24. print >> sys.stderr, "Snarfing", I
  25. hash_table = {}
  26. for w in re.findall (r'.+', I.read ()):
  27. w = string.lower (w)
  28. if not word_acceptable(w):
  29. continue
  30. key = bag(w)
  31. if hash_table.has_key (key):
  32. if (0 == hash_table[key].count (w)): # avoid duplicates
  33. hash_table[key].append (w)
  34. else:
  35. hash_table[key] = [w]
  36. print >> sys.stderr, "done"
  37. return hash_table
  38. hash_cache = os.path.join(os.path.dirname(__file__), "hash.cache")
  39. def snarf_dictionary (fn):
  40. try:
  41. fh = open (hash_cache, "rb")
  42. rv= cPickle.load (fh)
  43. print >> sys.stderr, "Reading cache", hash_cache, "instead of dictionary", fn
  44. except:
  45. fh = open (fn, "r")
  46. rv = snarf_dictionary_from_IO (fh)
  47. fh.close ()
  48. fh = open (hash_cache, "wb")
  49. cPickle.dump (rv, fh, 2)
  50. fh.close ()
  51. return rv
  52. if __name__ == "__main__":
  53. class TestStuff(unittest.TestCase):
  54. def setUp(self):
  55. self.fake_input = "cat\ntac\nfred\n"
  56. self.fake_dict = snarf_dictionary_from_IO (StringIO.StringIO (self.fake_input))
  57. def test_word_acceptable(self):
  58. self.assert_(word_acceptable("dog"))
  59. self.assertFalse (word_acceptable("C3PO"))
  60. d = snarf_dictionary(os.path.join(default_dict_name))
  61. self.assertEqual(66965, len(d))
  62. self.assertEqual(72794, sum(len(words) for words in d.values()))
  63. def test_this_and_that(self):
  64. self.assert_ (2 == len (self.fake_dict.keys ()))
  65. cat_hits = self.fake_dict[bag ("cat")]
  66. self.assert_ (2 == len (cat_hits))
  67. self.assert_ (cat_hits[0] == "cat")
  68. self.assert_ (cat_hits[1] == "tac")
  69. self.assert_ (1 == len (self.fake_dict[bag ("fred")]))
  70. self.assert_ (self.fake_dict[bag ("fred")][0] == "fred")
  71. unittest.main()