PageRenderTime 310ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/starter/src/cs276/pe1/spell/KGramWithSoundexSpellingCorrector.java

https://github.com/rioleo/huangenius
Java | 66 lines | 46 code | 17 blank | 3 comment | 18 complexity | 0af9710f3658e03f6e6f8513e387e6d5 MD5 | raw file
  1. package cs276.pe1.spell;
  2. import java.io.File;
  3. import java.util.List;
  4. import cs276.util.IOUtils;
  5. import cs276.util.StringUtils;
  6. import java.util.*;
  7. import cs276.util.Counter;
  8. public class KGramWithSoundexSpellingCorrector implements SpellingCorrector {
  9. /** Initializes spelling corrector by indexing kgrams in words from a file */
  10. KGramWithEditDistanceSpellingCorrector KGram;
  11. KGramSpellingCorrector simpleKGram;
  12. public KGramWithSoundexSpellingCorrector() {
  13. KGram = new KGramWithEditDistanceSpellingCorrector();
  14. simpleKGram = new KGramSpellingCorrector();
  15. }
  16. public List<String> corrections(String word) {
  17. List<String> guesses = KGram.corrections(word);
  18. List<String> tiedGuesses = new ArrayList<String>();
  19. List<String> newGuesses = new ArrayList<String>();
  20. Counter<String> frequencies = new Counter<String>();
  21. if (guesses != null && guesses.size() > 0) {
  22. double closestEditDistance = KGram.getEditDistance(word, guesses.get(0));
  23. for (String guess : guesses) {
  24. if (KGram.getEditDistance(word, guess) == closestEditDistance) tiedGuesses.add(guess);
  25. }
  26. }
  27. if (tiedGuesses != null && tiedGuesses.size() > 0) {
  28. for (String guess : tiedGuesses) {
  29. if (Soundex.soundex(word).equals(Soundex.soundex(guess))) {
  30. newGuesses.add(guess);
  31. return newGuesses;
  32. }
  33. }
  34. }
  35. // didn't find soundex match
  36. if (guesses != null && guesses.size() > 0) {
  37. double closestEditDistance = KGram.getEditDistance(word, guesses.get(0));
  38. for (String guess : guesses) {
  39. if (KGram.getEditDistance(word, guess) == closestEditDistance) {
  40. frequencies.setCount(guess, simpleKGram.getOccurrences(guess));
  41. } else {
  42. break;
  43. }
  44. }
  45. }
  46. return frequencies.topK(1);
  47. // Use guess frequency as tie-break between guesses with same edit distance;
  48. }
  49. }