PageRenderTime 47ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/src/spell/SpellingCorrector.java

https://gitlab.com/mountain01/spelling-corrector
Java | 165 lines | 100 code | 21 blank | 44 comment | 21 complexity | 1d83010388b5c3d53508f97f98b356ce MD5 | raw file
  1. /**
  2. * Created by Matt on 9/4/2014.
  3. */
  4. package spell;
  5. //import org.junit.Assert;
  6. //import org.junit.Before;
  7. //import org.junit.Test;
  8. import java.io.File;
  9. import java.io.IOException;
  10. import java.util.*;
  11. public class SpellingCorrector implements SpellCorrector {
  12. // @Test
  13. // public void test1() throws IOException, NoSimilarWordFoundException {
  14. // SpellingCorrector testSpell = new SpellingCorrector();
  15. // testSpell.useDictionary("dictionary.txt");
  16. // System.out.println(test.toString());
  17. //
  18. // Assert.assertEquals(test.getNodeCount(), 19);
  19. // Assert.assertEquals(test.getWordCount(), 6);
  20. // Assert.assertNotNull(test.find("apple"));
  21. // Assert.assertNull(test.find("bob"));
  22. // test.add("bob");
  23. // Assert.assertEquals(test.getNodeCount(), 21);
  24. // Assert.assertEquals(test.getWordCount(), 7);
  25. // Assert.assertNotNull(test.find("bob"));
  26. // test.add("bob");
  27. // Assert.assertEquals(test.getNodeCount(), 21);
  28. // Assert.assertEquals(test.getWordCount(), 7);
  29. // Assert.assertNotNull(test.find("bob"));
  30. // System.out.println(testSpell.dictionary.toString());
  31. //
  32. //
  33. // Assert.assertFalse(false);
  34. // }
  35. Words dictionary = new Words();
  36. public Words test;
  37. // @Before
  38. // public void init(){
  39. // test = new Words();
  40. // test.add("kick");
  41. // test.add("kicks");
  42. // test.add("kicker");
  43. // test.add("apple");
  44. // test.add("ape");
  45. // test.add("brick");
  46. // }
  47. @Override
  48. public void useDictionary(String dictionaryFileName) throws IOException {
  49. Scanner in = new Scanner(new File(dictionaryFileName));
  50. while(in.hasNext()){
  51. dictionary.add(in.next());
  52. }
  53. in.close();
  54. }
  55. @Override
  56. public String suggestSimilarWord(String inputWord) throws NoSimilarWordFoundException {
  57. ArrayList<String> possibles = new ArrayList<String>();
  58. // if word is in the Trie
  59. if(dictionary.find(inputWord) != null){
  60. return inputWord.toLowerCase();
  61. } else {
  62. possibles = getEditDistances(inputWord.toLowerCase());
  63. Map<String,Integer> words = getValidWords(possibles);
  64. // no similar words found
  65. if(words.size() == 0){
  66. possibles = getEditDistances(possibles);
  67. words = getValidWords(possibles);
  68. if(words.size() == 0){
  69. throw new NoSimilarWordFoundException();
  70. }
  71. }
  72. // get max occurances
  73. int max = 0;
  74. for(String word:words.keySet()){
  75. max = words.get(word) > max ? words.get(word):max;
  76. }
  77. // get list of words with max count
  78. ArrayList<String> validWords = new ArrayList<String>();
  79. for(String word:words.keySet()){
  80. if(words.get(word) == max){
  81. validWords.add(word);
  82. }
  83. }
  84. // if only 1 return it, else return first alphebetically
  85. Collections.sort(validWords);
  86. return validWords.get(0);
  87. }
  88. }
  89. private Map<String,Integer> getValidWords(ArrayList<String> list){
  90. Map<String,Integer> words = new HashMap<String, Integer>();
  91. for(String word:list){
  92. Words.WordNode node = dictionary.find(word);
  93. if(node != null){
  94. words.put(word,node.getValue());
  95. }
  96. }
  97. return words;
  98. }
  99. private ArrayList<String> getEditDistances(String input){
  100. ArrayList<String> possibilities = new ArrayList<String>();
  101. possibilities.addAll(deleteDistance(input));
  102. possibilities.addAll(insAltDistance(input, 0));
  103. possibilities.addAll(insAltDistance(input, 1));
  104. possibilities.addAll(transpositionDistance(input));
  105. return possibilities;
  106. }
  107. private ArrayList<String> getEditDistances(ArrayList<String> list){
  108. ArrayList<String> possiblities = new ArrayList<String>();
  109. for(String word:list){
  110. possiblities.addAll(getEditDistances(word));
  111. }
  112. return possiblities;
  113. }
  114. private ArrayList<String> deleteDistance(String input){
  115. ArrayList<String> deleteList = new ArrayList<String>();
  116. for(int i = 0; i < input.length();i++){
  117. String newWord = input.substring(0,i).concat(input.substring(i+1));
  118. deleteList.add(newWord);
  119. }
  120. return deleteList;
  121. }
  122. private ArrayList<String> insAltDistance(String input,int distance){
  123. ArrayList<String> insertList = new ArrayList<String>();
  124. for(int i = 0;i<26;i++){
  125. char c = (char) ('a'+i);
  126. for(int k = 0;k<input.length();k++){
  127. String newWord = input.substring(0,k)+ c + input.substring(k+distance);
  128. insertList.add(newWord);
  129. }
  130. if(distance == 0){
  131. insertList.add(input+c);
  132. }
  133. }
  134. return insertList;
  135. }
  136. private ArrayList<String> transpositionDistance(String input){
  137. ArrayList<String> transList = new ArrayList<String>();
  138. for(int i = 0; i < input.length()-1;i++){
  139. String newWord = input.substring(0,i)+input.charAt(i+1)+input.charAt(i)+input.substring(i+2);
  140. transList.add(newWord);
  141. }
  142. return transList;
  143. }
  144. }