PageRenderTime 29ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/pe1/src/cs276/pe1/spell/KGramWithEditDistanceSpellingCorrector.java

https://github.com/sarnoff/CS276
Java | 77 lines | 64 code | 12 blank | 1 comment | 13 complexity | e87a285ac1e31924aa30ce2e5625f813 MD5 | raw file
  1. package cs276.pe1.spell;
  2. import java.util.List;
  3. import java.util.Set;
  4. import cs276.util.StringUtils;
  5. import cs276.util.Counter;
  6. import cs276.util.Counters;
  7. public class KGramWithEditDistanceSpellingCorrector extends KGramSpellingCorrector {
  8. public enum ListCompare {
  9. BASIC, //from the jaccard top 10, finds the one with the smallest edit distance (and highest jaccard score)
  10. TIEBREAKING, //uses word frequency to break ties
  11. NORMALIZE //Normalizes then multiplies score and edit distance
  12. };
  13. //Change this to change the type of corrections returned
  14. protected static ListCompare correctionsType = ListCompare.NORMALIZE;
  15. public KGramWithEditDistanceSpellingCorrector() {
  16. super();
  17. }
  18. protected Counter<String> editDistance(String word, List<String> potentialWords)
  19. {
  20. Counter<String> editDistance = new Counter<String>();
  21. for(String s:potentialWords)
  22. editDistance.incrementCount(s,StringUtils.levenshtein(word,s));
  23. return editDistance;
  24. }
  25. protected void reciprocal(Counter<String> target) {
  26. for (String key : target.keySet()) {
  27. target.setCount(key, 1.0/((double)target.getCount(key)));
  28. }
  29. }
  30. public List<String> corrections(String word) {
  31. Counter<String> scoredCounter = jaccardScore(word);
  32. List<String> firstPass = scoredCounter.topK(WL);
  33. if(correctionsType == ListCompare.BASIC||correctionsType == ListCompare.TIEBREAKING)
  34. {
  35. Counter<String> editDistance = editDistance(word,firstPass);
  36. Set<String> minKeys = Counters.keysAt(editDistance,Counters.min(editDistance));
  37. Counter<String> smallestEDJaccard = new Counter<String>();
  38. for(String s:minKeys)
  39. smallestEDJaccard.incrementCount(s,scoredCounter.getCount(s));
  40. if(correctionsType == ListCompare.TIEBREAKING)
  41. {
  42. Set<String> minKeys2 = Counters.keysAt(smallestEDJaccard,Counters.min(smallestEDJaccard));
  43. if(minKeys2.size()>1)
  44. {
  45. Counter<String> tie = new Counter<String>();
  46. for(String s:minKeys2)
  47. tie.incrementCount(s,freq.getCount(s));
  48. return tie.topK(WL);
  49. }
  50. }
  51. return smallestEDJaccard.topK(WL);
  52. }
  53. else if(correctionsType == ListCompare.NORMALIZE)
  54. {
  55. Counter<String>editDistance = editDistance(word,firstPass);
  56. Counters.retainTop(scoredCounter,WL);
  57. Counters.normalize(scoredCounter);
  58. Counters.normalize(editDistance);
  59. reciprocal(editDistance);
  60. Counters.multiplyInPlace(scoredCounter,editDistance);
  61. return scoredCounter.topK(WL);
  62. }
  63. return firstPass;
  64. }
  65. }