/examples/duplicates/11.java
Java | 186 lines | 117 code | 38 blank | 31 comment | 26 complexity | 4245ddd182a560b9389ad852701d1aac MD5 | raw file
Possible License(s): MIT, Unlicense
1package com.boyter.SpellingCorrector;
2
3import java.util.*;
4import java.util.stream.Stream;
5
6/**
7 * A simple spell checker based on a few implementations such as the infamous Peter Noving spell checker and
8 * the like. Attempts to be highly performing by never changing the first character since we can assume that the
9 * user got that correct.
10 */
11public class SpellingCorrector implements ISpellingCorrector {
12
13 // word to count map - how may times a word is present - or a weight attached to a word
14 private Map<String, Integer> dictionary = null;
15
16 public SpellingCorrector(int lruCount) {
17 this.dictionary = Collections.synchronizedMap(new LruCache<>(lruCount));
18 }
19
20 @Override
21 public void putWord(String word) {
22 word = word.toLowerCase();
23 if (dictionary.containsKey(word)) {
24 dictionary.put(word, (dictionary.get(word) + 1));
25 }
26 else {
27 dictionary.put(word, 1);
28 }
29 }
30
31 @Override
32 public String correct(String word) {
33 if (word == null || word.trim().isEmpty()) {
34 return word;
35 }
36
37 word = word.toLowerCase();
38
39 // If the word exists in our dictionary then return
40 if (dictionary.containsKey(word)) {
41 return word;
42 }
43
44 Map<String, Integer> possibleMatches = new HashMap<>();
45
46 List<String> closeEdits = wordEdits(word);
47 for (String closeEdit: closeEdits) {
48 if (dictionary.containsKey(closeEdit)) {
49 possibleMatches.put(closeEdit, this.dictionary.get(closeEdit));
50 }
51 }
52
53 if (!possibleMatches.isEmpty()) {
54 // Sorted least likely first
55 Object[] matches = this.sortByValue(possibleMatches).keySet().toArray();
56
57 // Try to match anything of the same length first
58 String bestMatch = "";
59 for(Object o: matches) {
60 if (o.toString().length() == word.length()) {
61 bestMatch = o.toString();
62 }
63 }
64
65 if (!bestMatch.trim().isEmpty()) {
66 return bestMatch;
67 }
68
69 // Just return whatever is the best match
70 return matches[matches.length - 1].toString();
71 }
72
73 // Ok we did't find anything, so lets run the edits function on the previous results and use those
74 // this gives us results which are 2 characters away from whatever was entered
75 List<String> furtherEdits = new ArrayList<>();
76 for(String closeEdit: closeEdits) {
77 furtherEdits.addAll(this.wordEdits(closeEdit));
78 }
79
80 for (String futherEdit: furtherEdits) {
81 if (dictionary.containsKey(futherEdit)) {
82 possibleMatches.put(futherEdit, this.dictionary.get(futherEdit));
83 }
84 }
85
86 if (!possibleMatches.isEmpty()) {
87 // Sorted least likely first
88 Object[] matches = this.sortByValue(possibleMatches).keySet().toArray();
89
90 // Try to match anything of the same length first
91 String bestMatch = "";
92 for(Object o: matches) {
93 if (o.toString().length() == word.length()) {
94 bestMatch = o.toString();
95 }
96 }
97
98 if (!bestMatch.trim().isEmpty()) {
99 return bestMatch;
100 }
101
102 // Just return whatever is the best match
103 return matches[matches.length - 1].toString();
104 }
105
106
107 // If unable to find something better return the same string
108 return word;
109 }
110
111 @Override
112 public boolean containsWord(String word) {
113 if (dictionary.containsKey(word)) {
114 return true;
115 }
116
117 return false;
118 }
119
120
121 /**
122 * Return a list of strings which are words similar to our one which could potentially be misspellings
123 * Abuse the fact that a char can be used as an integer
124 * Assume that they got the first letter correct for all edits to cut on CPU burn time
125 */
126 private List<String> wordEdits(String word) {
127 List<String> closeWords = new ArrayList<String>();
128
129 for (int i = 1; i < word.length() + 1; i++) {
130 for (char character = 'a'; character <= 'z'; character++) {
131 // Maybe they forgot to type a letter? Try adding one
132 StringBuilder sb = new StringBuilder(word);
133 sb.insert(i, character);
134 closeWords.add(sb.toString());
135 }
136 }
137
138 for (int i = 1; i < word.length(); i++) {
139 for (char character = 'a'; character <= 'z'; character++) {
140 // Maybe they mistyped a single letter? Try replacing them all
141 StringBuilder sb = new StringBuilder(word);
142 sb.setCharAt(i, character);
143 closeWords.add(sb.toString());
144
145 // Maybe they added an extra letter? Try deleting one
146 sb = new StringBuilder(word);
147 sb.deleteCharAt(i);
148 closeWords.add(sb.toString());
149 }
150 }
151
152 return closeWords;
153 }
154
155
156 /**
157 * Sorts a map by value taken from
158 * http://stackoverflow.com/questions/109383/sort-a-mapkey-value-by-values-java
159 */
160 public static <K, V extends Comparable<? super V>> Map<K, V> sortByValue( Map<K, V> map ) {
161 Map<K, V> result = new LinkedHashMap<>();
162 Stream<Map.Entry<K, V>> st = map.entrySet().stream();
163
164 st.sorted( Map.Entry.comparingByValue() ).forEachOrdered( e -> result.put(e.getKey(), e.getValue()) );
165
166 return result;
167 }
168
169 /**
170 * A very simple LRU cache implementation that can be used for random data types.
171 */
172 public class LruCache<A, B> extends LinkedHashMap<A, B> {
173 private final int maxEntries;
174
175 public LruCache(final int maxEntries) {
176 super(maxEntries + 1, 1.0f, true);
177 this.maxEntries = maxEntries;
178 }
179
180 @Override
181 protected boolean removeEldestEntry(final Map.Entry<A, B> eldest) {
182 return super.size() > maxEntries;
183 }
184 }
185
186}