PageRenderTime 52ms CodeModel.GetById 31ms app.highlight 18ms RepoModel.GetById 0ms app.codeStats 0ms

/src/spell/SpellingCorrector.java

https://gitlab.com/mountain01/spelling-corrector
Java | 165 lines | 100 code | 21 blank | 44 comment | 21 complexity | 1d83010388b5c3d53508f97f98b356ce MD5 | raw file
  1/**
  2 * Created by Matt on 9/4/2014.
  3 */
  4package spell;
  5
  6//import org.junit.Assert;
  7//import org.junit.Before;
  8//import org.junit.Test;
  9
 10import java.io.File;
 11import java.io.IOException;
 12import java.util.*;
 13
 14public class SpellingCorrector implements SpellCorrector {
 15
 16//    @Test
 17//    public void test1() throws IOException, NoSimilarWordFoundException {
 18//        SpellingCorrector testSpell = new SpellingCorrector();
 19//        testSpell.useDictionary("dictionary.txt");
 20//        System.out.println(test.toString());
 21//
 22//        Assert.assertEquals(test.getNodeCount(), 19);
 23//        Assert.assertEquals(test.getWordCount(), 6);
 24//        Assert.assertNotNull(test.find("apple"));
 25//        Assert.assertNull(test.find("bob"));
 26//        test.add("bob");
 27//        Assert.assertEquals(test.getNodeCount(), 21);
 28//        Assert.assertEquals(test.getWordCount(), 7);
 29//        Assert.assertNotNull(test.find("bob"));
 30//        test.add("bob");
 31//        Assert.assertEquals(test.getNodeCount(), 21);
 32//        Assert.assertEquals(test.getWordCount(), 7);
 33//        Assert.assertNotNull(test.find("bob"));
 34//        System.out.println(testSpell.dictionary.toString());
 35//
 36//
 37//        Assert.assertFalse(false);
 38//    }
 39
 40    Words dictionary = new Words();
 41    public Words test;
 42
 43//    @Before
 44//    public void init(){
 45//        test = new Words();
 46//        test.add("kick");
 47//        test.add("kicks");
 48//        test.add("kicker");
 49//        test.add("apple");
 50//        test.add("ape");
 51//        test.add("brick");
 52//    }
 53
 54    @Override
 55    public void useDictionary(String dictionaryFileName) throws IOException {
 56        Scanner in = new Scanner(new File(dictionaryFileName));
 57        while(in.hasNext()){
 58            dictionary.add(in.next());
 59        }
 60        in.close();
 61    }
 62
 63    @Override
 64    public String suggestSimilarWord(String inputWord) throws NoSimilarWordFoundException {
 65        ArrayList<String> possibles = new ArrayList<String>();
 66        // if word is in the Trie
 67        if(dictionary.find(inputWord) != null){
 68            return inputWord.toLowerCase();
 69        } else {
 70            possibles = getEditDistances(inputWord.toLowerCase());
 71            Map<String,Integer> words = getValidWords(possibles);
 72
 73            // no similar words found
 74            if(words.size() == 0){
 75                possibles = getEditDistances(possibles);
 76                words = getValidWords(possibles);
 77                if(words.size() == 0){
 78                    throw new NoSimilarWordFoundException();
 79                }
 80            }
 81
 82            // get max occurances
 83            int max = 0;
 84            for(String word:words.keySet()){
 85                max = words.get(word) > max ? words.get(word):max;
 86            }
 87
 88            // get list of words with max count
 89            ArrayList<String> validWords = new ArrayList<String>();
 90            for(String word:words.keySet()){
 91                if(words.get(word) == max){
 92                    validWords.add(word);
 93                }
 94            }
 95
 96            // if only 1 return it, else return first alphebetically
 97            Collections.sort(validWords);
 98            return validWords.get(0);
 99
100        }
101    }
102
103    private Map<String,Integer> getValidWords(ArrayList<String> list){
104        Map<String,Integer> words = new HashMap<String, Integer>();
105        for(String word:list){
106            Words.WordNode node = dictionary.find(word);
107            if(node != null){
108                words.put(word,node.getValue());
109            }
110        }
111        return words;
112    }
113
114    private ArrayList<String> getEditDistances(String input){
115        ArrayList<String> possibilities = new ArrayList<String>();
116        possibilities.addAll(deleteDistance(input));
117        possibilities.addAll(insAltDistance(input, 0));
118        possibilities.addAll(insAltDistance(input, 1));
119        possibilities.addAll(transpositionDistance(input));
120        return possibilities;
121    }
122
123    private ArrayList<String> getEditDistances(ArrayList<String> list){
124        ArrayList<String> possiblities = new ArrayList<String>();
125        for(String word:list){
126            possiblities.addAll(getEditDistances(word));
127        }
128        return possiblities;
129    }
130
131    private ArrayList<String> deleteDistance(String input){
132        ArrayList<String> deleteList = new ArrayList<String>();
133       for(int i = 0; i < input.length();i++){
134           String newWord = input.substring(0,i).concat(input.substring(i+1));
135           deleteList.add(newWord);
136       }
137        return deleteList;
138    }
139
140    private ArrayList<String> insAltDistance(String input,int distance){
141        ArrayList<String> insertList = new ArrayList<String>();
142        for(int i = 0;i<26;i++){
143            char c = (char) ('a'+i);
144            for(int k = 0;k<input.length();k++){
145                String newWord = input.substring(0,k)+ c + input.substring(k+distance);
146                insertList.add(newWord);
147            }
148            if(distance == 0){
149                insertList.add(input+c);
150            }
151        }
152        return insertList;
153    }
154
155    private ArrayList<String> transpositionDistance(String input){
156        ArrayList<String> transList = new ArrayList<String>();
157        for(int i = 0; i < input.length()-1;i++){
158            String newWord = input.substring(0,i)+input.charAt(i+1)+input.charAt(i)+input.substring(i+2);
159            transList.add(newWord);
160        }
161        return transList;
162    }
163
164
165}