PageRenderTime 60ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/tests.py

https://bitbucket.org/zozo123/fuzzywuzzy
Python | 285 lines | 215 code | 56 blank | 14 comment | 3 complexity | 82746b465d8df57a5d0a16bba3c5cfe0 MD5 | raw file
  1. # -*- coding: utf8 -*-
  2. from fuzzywuzzy import fuzz
  3. from fuzzywuzzy import process
  4. from fuzzywuzzy import utils
  5. import itertools
  6. import unittest
  7. class UtilsTest(unittest.TestCase):
  8. def setUp(self):
  9. self.s1 = "new york mets"
  10. self.s1a = "new york mets"
  11. self.s2 = "new YORK mets"
  12. self.s3 = "the wonderful new york mets"
  13. self.s4 = "new york mets vs atlanta braves"
  14. self.s5 = "atlanta braves vs new york mets"
  15. self.s6 = "new york mets - atlanta braves"
  16. self.mixed_strings = [
  17. "Lorem Ipsum is simply dummy text of the printing and typesetting industry.",
  18. "C'est la vie",
  19. "Ça va?",
  20. "Cães danados",
  21. u"\xacCamarões assados",
  22. u"a\xac\u1234\u20ac\U00008000",
  23. u"\u00C1"
  24. ]
  25. def tearDown(self):
  26. pass
  27. def test_asciidammit(self):
  28. for s in self.mixed_strings:
  29. utils.asciidammit(s)
  30. def test_asciionly(self):
  31. for s in self.mixed_strings:
  32. # ascii only only runs on strings
  33. s = utils.asciidammit(s)
  34. utils.asciionly(s)
  35. def test_fullProcess(self):
  36. for s in self.mixed_strings:
  37. utils.full_process(s)
  38. class RatioTest(unittest.TestCase):
  39. def setUp(self):
  40. self.s1 = "new york mets"
  41. self.s1a = "new york mets"
  42. self.s2 = "new YORK mets"
  43. self.s3 = "the wonderful new york mets"
  44. self.s4 = "new york mets vs atlanta braves"
  45. self.s5 = "atlanta braves vs new york mets"
  46. self.s6 = "new york mets - atlanta braves"
  47. self.cirque_strings = [
  48. "cirque du soleil - zarkana - las vegas",
  49. "cirque du soleil ",
  50. "cirque du soleil las vegas",
  51. "zarkana las vegas",
  52. "las vegas cirque du soleil at the bellagio",
  53. "zarakana - cirque du soleil - bellagio"
  54. ]
  55. self.baseball_strings = [
  56. "new york mets vs chicago cubs",
  57. "chicago cubs vs chicago white sox",
  58. "philladelphia phillies vs atlanta braves",
  59. "braves vs mets",
  60. ]
  61. def tearDown(self):
  62. pass
  63. def testEqual(self):
  64. self.assertEqual(fuzz.ratio(self.s1, self.s1a),100)
  65. def testCaseInsensitive(self):
  66. self.assertNotEqual(fuzz.ratio(self.s1, self.s2),100)
  67. self.assertEqual(fuzz.ratio(utils.full_process(self.s1), utils.full_process(self.s2)),100)
  68. def testPartialRatio(self):
  69. self.assertEqual(fuzz.partial_ratio(self.s1, self.s3),100)
  70. def testTokenSortRatio(self):
  71. self.assertEqual(fuzz.token_sort_ratio(self.s1, self.s1a),100)
  72. def testPartialTokenSortRatio(self):
  73. self.assertEqual(fuzz.partial_token_sort_ratio(self.s1, self.s1a),100)
  74. self.assertEqual(fuzz.partial_token_sort_ratio(self.s4, self.s5),100)
  75. def testTokenSetRatio(self):
  76. self.assertEqual(fuzz.token_set_ratio(self.s4, self.s5),100)
  77. def testPartialTokenSetRatio(self):
  78. self.assertEqual(fuzz.token_set_ratio(self.s4, self.s5),100)
  79. def testQuickRatioEqual(self):
  80. self.assertEqual(fuzz.QRatio(self.s1, self.s1a), 100)
  81. def testQuickRatioCaseInsensitive(self):
  82. self.assertEqual(fuzz.QRatio(self.s1, self.s2), 100)
  83. def testQuickRatioNotEqual(self):
  84. self.assertNotEqual(fuzz.QRatio(self.s1, self.s3), 100)
  85. def testWRatioEqual(self):
  86. self.assertEqual(fuzz.WRatio(self.s1, self.s1a), 100)
  87. def testWRatioCaseInsensitive(self):
  88. self.assertEqual(fuzz.WRatio(self.s1, self.s2), 100)
  89. def testWRatioPartialMatch(self):
  90. # a partial match is scaled by .9
  91. self.assertEqual(fuzz.WRatio(self.s1, self.s3), 90)
  92. def testWRatioMisorderedMatch(self):
  93. # misordered full matches are scaled by .95
  94. self.assertEqual(fuzz.WRatio(self.s4, self.s5), 95)
  95. def testWRatioUnicode(self):
  96. self.assertEqual(fuzz.WRatio(unicode(self.s1), unicode(self.s1a)), 100)
  97. def testQRatioUnicode(self):
  98. self.assertEqual(fuzz.WRatio(unicode(self.s1), unicode(self.s1a)), 100)
  99. def testIssueSeven(self):
  100. s1 = "HSINCHUANG"
  101. s2 = "SINJHUAN"
  102. s3 = "LSINJHUANG DISTRIC"
  103. s4 = "SINJHUANG DISTRICT"
  104. self.assertTrue(fuzz.partial_ratio(s1, s2) > 75)
  105. self.assertTrue(fuzz.partial_ratio(s1, s3) > 75)
  106. self.assertTrue(fuzz.partial_ratio(s1, s4) > 75)
  107. def testWRatioUnicodeString(self):
  108. s1 = u"\u00C1"
  109. s2 = "ABCD"
  110. score = fuzz.WRatio(s1, s2)
  111. self.assertEqual(0, score)
  112. def testQRatioUnicodeString(self):
  113. s1 = u"\u00C1"
  114. s2 = "ABCD"
  115. score = fuzz.QRatio(s1, s2)
  116. self.assertEqual(0, score)
  117. # test processing methods
  118. def testGetBestChoice1(self):
  119. query = "new york mets at atlanta braves"
  120. best = process.extractOne(query, self.baseball_strings)
  121. self.assertEqual(best[0], "braves vs mets")
  122. def testGetBestChoice2(self):
  123. query = "philadelphia phillies at atlanta braves"
  124. best = process.extractOne(query, self.baseball_strings)
  125. self.assertEqual(best[0], self.baseball_strings[2])
  126. def testGetBestChoice3(self):
  127. query = "atlanta braves at philadelphia phillies"
  128. best = process.extractOne(query, self.baseball_strings)
  129. self.assertEqual(best[0], self.baseball_strings[2])
  130. def testGetBestChoice4(self):
  131. query = "chicago cubs vs new york mets"
  132. best = process.extractOne(query, self.baseball_strings)
  133. self.assertEqual(best[0], self.baseball_strings[0])
  134. class ProcessTest(unittest.TestCase):
  135. def setUp(self):
  136. self.s1 = "new york mets"
  137. self.s1a = "new york mets"
  138. self.s2 = "new YORK mets"
  139. self.s3 = "the wonderful new york mets"
  140. self.s4 = "new york mets vs atlanta braves"
  141. self.s5 = "atlanta braves vs new york mets"
  142. self.s6 = "new york mets - atlanta braves"
  143. self.cirque_strings = [
  144. "cirque du soleil - zarkana - las vegas",
  145. "cirque du soleil ",
  146. "cirque du soleil las vegas",
  147. "zarkana las vegas",
  148. "las vegas cirque du soleil at the bellagio",
  149. "zarakana - cirque du soleil - bellagio"
  150. ]
  151. self.baseball_strings = [
  152. "new york mets vs chicago cubs",
  153. "chicago cubs vs chicago white sox",
  154. "philladelphia phillies vs atlanta braves",
  155. "braves vs mets",
  156. ]
  157. def testWithProcessor(self):
  158. events = [
  159. ["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"],
  160. ["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"],
  161. ["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"],
  162. ]
  163. query = "new york mets vs chicago cubs"
  164. processor = lambda event: event[0]
  165. best = process.extractOne(query, events, processor=processor)
  166. self.assertEqual(best[0], events[0])
  167. def testWithScorer(self):
  168. choices = [
  169. "new york mets vs chicago cubs",
  170. "chicago cubs at new york mets",
  171. "atlanta braves vs pittsbugh pirates",
  172. "new york yankees vs boston red sox"
  173. ]
  174. # in this hypothetical example we care about ordering, so we use quick ratio
  175. query = "new york mets at chicago cubs"
  176. scorer = fuzz.QRatio
  177. # first, as an example, the normal way would select the "more 'complete' match of choices[1]"
  178. best = process.extractOne(query, choices)
  179. self.assertEqual(best[0], choices[1])
  180. # now, use the custom scorer
  181. best = process.extractOne(query, choices, scorer=scorer)
  182. self.assertEqual(best[0], choices[0])
  183. def testWithCutoff(self):
  184. choices = [
  185. "new york mets vs chicago cubs",
  186. "chicago cubs at new york mets",
  187. "atlanta braves vs pittsbugh pirates",
  188. "new york yankees vs boston red sox"
  189. ]
  190. query = "los angeles dodgers vs san francisco giants"
  191. # in this situation, this is an event that does not exist in the list
  192. # we don't want to randomly match to something, so we use a reasonable cutoff
  193. best = process.extractOne(query, choices, score_cutoff=50)
  194. self.assertTrue(best is None)
  195. #self.assertIsNone(best) # unittest.TestCase did not have assertIsNone until Python 2.7
  196. # however if we had no cutoff, something would get returned
  197. #best = process.extractOne(query, choices)
  198. #self.assertIsNotNone(best)
  199. def testEmptyStrings(self):
  200. choices = [
  201. "",
  202. "new york mets vs chicago cubs",
  203. "new york yankees vs boston red sox",
  204. "",
  205. ""
  206. ]
  207. query = "new york mets at chicago cubs"
  208. best = process.extractOne(query, choices)
  209. self.assertEqual(best[0], choices[1])
  210. def testNullStrings(self):
  211. choices = [
  212. None,
  213. "new york mets vs chicago cubs",
  214. "new york yankees vs boston red sox",
  215. None,
  216. None
  217. ]
  218. query = "new york mets at chicago cubs"
  219. best = process.extractOne(query, choices)
  220. self.assertEqual(best[0], choices[1])
  221. if __name__ == '__main__':
  222. unittest.main() # run all tests