PageRenderTime 0ms CodeModel.GetById 23ms app.highlight 6ms RepoModel.GetById 0ms app.codeStats 0ms

/examples/duplicates/1.java

https://github.com/boyter/scc
Java | 186 lines | 117 code | 38 blank | 31 comment | 26 complexity | 4245ddd182a560b9389ad852701d1aac MD5 | raw file
Possible License(s): MIT, Unlicense
  1package com.boyter.SpellingCorrector;
  2
  3import java.util.*;
  4import java.util.stream.Stream;
  5
  6/**
  7 * A simple spell checker based on a few implementations such as the infamous Peter Noving spell checker and
  8 * the like. Attempts to be highly performing by never changing the first character since we can assume that the
  9 * user got that correct.
 10 */
 11public class SpellingCorrector implements ISpellingCorrector {
 12
 13    // word to count map - how may times a word is present - or a weight attached to a word
 14    private Map<String, Integer> dictionary = null;
 15
 16    public SpellingCorrector(int lruCount) {
 17        this.dictionary = Collections.synchronizedMap(new LruCache<>(lruCount));
 18    }
 19
 20    @Override
 21    public void putWord(String word) {
 22        word = word.toLowerCase();
 23        if (dictionary.containsKey(word)) {
 24            dictionary.put(word, (dictionary.get(word) + 1));
 25        }
 26        else {
 27            dictionary.put(word, 1);
 28        }
 29    }
 30
 31    @Override
 32    public String correct(String word) {
 33        if (word == null || word.trim().isEmpty()) {
 34            return word;
 35        }
 36
 37        word = word.toLowerCase();
 38
 39        // If the word exists in our dictionary then return
 40        if (dictionary.containsKey(word)) {
 41            return word;
 42        }
 43
 44        Map<String, Integer> possibleMatches = new HashMap<>();
 45
 46        List<String> closeEdits = wordEdits(word);
 47        for (String closeEdit: closeEdits) {
 48            if (dictionary.containsKey(closeEdit)) {
 49                possibleMatches.put(closeEdit, this.dictionary.get(closeEdit));
 50            }
 51        }
 52
 53        if (!possibleMatches.isEmpty()) {
 54            // Sorted least likely first
 55            Object[] matches = this.sortByValue(possibleMatches).keySet().toArray();
 56
 57            // Try to match anything of the same length first
 58            String bestMatch = "";
 59            for(Object o: matches) {
 60                if (o.toString().length() == word.length()) {
 61                    bestMatch = o.toString();
 62                }
 63            }
 64
 65            if (!bestMatch.trim().isEmpty()) {
 66                return bestMatch;
 67            }
 68
 69            // Just return whatever is the best match
 70            return matches[matches.length - 1].toString();
 71        }
 72
 73        // Ok we did't find anything, so lets run the edits function on the previous results and use those
 74        // this gives us results which are 2 characters away from whatever was entered
 75        List<String> furtherEdits = new ArrayList<>();
 76        for(String closeEdit: closeEdits) {
 77            furtherEdits.addAll(this.wordEdits(closeEdit));
 78        }
 79
 80        for (String futherEdit: furtherEdits) {
 81            if (dictionary.containsKey(futherEdit)) {
 82                possibleMatches.put(futherEdit, this.dictionary.get(futherEdit));
 83            }
 84        }
 85
 86        if (!possibleMatches.isEmpty()) {
 87            // Sorted least likely first
 88            Object[] matches = this.sortByValue(possibleMatches).keySet().toArray();
 89
 90            // Try to match anything of the same length first
 91            String bestMatch = "";
 92            for(Object o: matches) {
 93                if (o.toString().length() == word.length()) {
 94                    bestMatch = o.toString();
 95                }
 96            }
 97
 98            if (!bestMatch.trim().isEmpty()) {
 99                return bestMatch;
100            }
101
102            // Just return whatever is the best match
103            return matches[matches.length - 1].toString();
104        }
105
106
107        // If unable to find something better return the same string
108        return word;
109    }
110
111    @Override
112    public boolean containsWord(String word) {
113        if (dictionary.containsKey(word)) {
114            return true;
115        }
116
117        return false;
118    }
119
120
121    /**
122     * Return a list of strings which are words similar to our one which could potentially be misspellings
123     * Abuse the fact that a char can be used as an integer
124     * Assume that they got the first letter correct for all edits to cut on CPU burn time
125     */
126    private List<String> wordEdits(String word) {
127        List<String> closeWords = new ArrayList<String>();
128
129        for (int i = 1; i < word.length() + 1; i++) {
130            for (char character = 'a'; character <= 'z'; character++) {
131                // Maybe they forgot to type a letter? Try adding one
132                StringBuilder sb = new StringBuilder(word);
133                sb.insert(i, character);
134                closeWords.add(sb.toString());
135            }
136        }
137
138        for (int i = 1; i < word.length(); i++) {
139            for (char character = 'a'; character <= 'z'; character++) {
140                // Maybe they mistyped a single letter? Try replacing them all
141                StringBuilder sb = new StringBuilder(word);
142                sb.setCharAt(i, character);
143                closeWords.add(sb.toString());
144
145                // Maybe they added an extra letter? Try deleting one
146                sb = new StringBuilder(word);
147                sb.deleteCharAt(i);
148                closeWords.add(sb.toString());
149            }
150        }
151
152        return closeWords;
153    }
154
155
156    /**
157     * Sorts a map by value taken from
158     * http://stackoverflow.com/questions/109383/sort-a-mapkey-value-by-values-java
159     */
160    public static <K, V extends Comparable<? super V>> Map<K, V> sortByValue( Map<K, V> map ) {
161        Map<K, V> result = new LinkedHashMap<>();
162        Stream<Map.Entry<K, V>> st = map.entrySet().stream();
163
164        st.sorted( Map.Entry.comparingByValue() ).forEachOrdered( e -> result.put(e.getKey(), e.getValue()) );
165
166        return result;
167    }
168
169    /**
170     * A very simple LRU cache implementation that can be used for random data types.
171     */
172    public class LruCache<A, B> extends LinkedHashMap<A, B> {
173        private final int maxEntries;
174
175        public LruCache(final int maxEntries) {
176            super(maxEntries + 1, 1.0f, true);
177            this.maxEntries = maxEntries;
178        }
179
180        @Override
181        protected boolean removeEldestEntry(final Map.Entry<A, B> eldest) {
182            return super.size() > maxEntries;
183        }
184    }
185
186}