PageRenderTime 1547ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/examples/duplicates/3.java

https://github.com/boyter/scc
Java | 186 lines | 117 code | 38 blank | 31 comment | 26 complexity | 4245ddd182a560b9389ad852701d1aac MD5 | raw file
Possible License(s): MIT, Unlicense
  1. package com.boyter.SpellingCorrector;
  2. import java.util.*;
  3. import java.util.stream.Stream;
  4. /**
  5. * A simple spell checker based on a few implementations such as the infamous Peter Noving spell checker and
  6. * the like. Attempts to be highly performing by never changing the first character since we can assume that the
  7. * user got that correct.
  8. */
  9. public class SpellingCorrector implements ISpellingCorrector {
  10. // word to count map - how may times a word is present - or a weight attached to a word
  11. private Map<String, Integer> dictionary = null;
  12. public SpellingCorrector(int lruCount) {
  13. this.dictionary = Collections.synchronizedMap(new LruCache<>(lruCount));
  14. }
  15. @Override
  16. public void putWord(String word) {
  17. word = word.toLowerCase();
  18. if (dictionary.containsKey(word)) {
  19. dictionary.put(word, (dictionary.get(word) + 1));
  20. }
  21. else {
  22. dictionary.put(word, 1);
  23. }
  24. }
  25. @Override
  26. public String correct(String word) {
  27. if (word == null || word.trim().isEmpty()) {
  28. return word;
  29. }
  30. word = word.toLowerCase();
  31. // If the word exists in our dictionary then return
  32. if (dictionary.containsKey(word)) {
  33. return word;
  34. }
  35. Map<String, Integer> possibleMatches = new HashMap<>();
  36. List<String> closeEdits = wordEdits(word);
  37. for (String closeEdit: closeEdits) {
  38. if (dictionary.containsKey(closeEdit)) {
  39. possibleMatches.put(closeEdit, this.dictionary.get(closeEdit));
  40. }
  41. }
  42. if (!possibleMatches.isEmpty()) {
  43. // Sorted least likely first
  44. Object[] matches = this.sortByValue(possibleMatches).keySet().toArray();
  45. // Try to match anything of the same length first
  46. String bestMatch = "";
  47. for(Object o: matches) {
  48. if (o.toString().length() == word.length()) {
  49. bestMatch = o.toString();
  50. }
  51. }
  52. if (!bestMatch.trim().isEmpty()) {
  53. return bestMatch;
  54. }
  55. // Just return whatever is the best match
  56. return matches[matches.length - 1].toString();
  57. }
  58. // Ok we did't find anything, so lets run the edits function on the previous results and use those
  59. // this gives us results which are 2 characters away from whatever was entered
  60. List<String> furtherEdits = new ArrayList<>();
  61. for(String closeEdit: closeEdits) {
  62. furtherEdits.addAll(this.wordEdits(closeEdit));
  63. }
  64. for (String futherEdit: furtherEdits) {
  65. if (dictionary.containsKey(futherEdit)) {
  66. possibleMatches.put(futherEdit, this.dictionary.get(futherEdit));
  67. }
  68. }
  69. if (!possibleMatches.isEmpty()) {
  70. // Sorted least likely first
  71. Object[] matches = this.sortByValue(possibleMatches).keySet().toArray();
  72. // Try to match anything of the same length first
  73. String bestMatch = "";
  74. for(Object o: matches) {
  75. if (o.toString().length() == word.length()) {
  76. bestMatch = o.toString();
  77. }
  78. }
  79. if (!bestMatch.trim().isEmpty()) {
  80. return bestMatch;
  81. }
  82. // Just return whatever is the best match
  83. return matches[matches.length - 1].toString();
  84. }
  85. // If unable to find something better return the same string
  86. return word;
  87. }
  88. @Override
  89. public boolean containsWord(String word) {
  90. if (dictionary.containsKey(word)) {
  91. return true;
  92. }
  93. return false;
  94. }
  95. /**
  96. * Return a list of strings which are words similar to our one which could potentially be misspellings
  97. * Abuse the fact that a char can be used as an integer
  98. * Assume that they got the first letter correct for all edits to cut on CPU burn time
  99. */
  100. private List<String> wordEdits(String word) {
  101. List<String> closeWords = new ArrayList<String>();
  102. for (int i = 1; i < word.length() + 1; i++) {
  103. for (char character = 'a'; character <= 'z'; character++) {
  104. // Maybe they forgot to type a letter? Try adding one
  105. StringBuilder sb = new StringBuilder(word);
  106. sb.insert(i, character);
  107. closeWords.add(sb.toString());
  108. }
  109. }
  110. for (int i = 1; i < word.length(); i++) {
  111. for (char character = 'a'; character <= 'z'; character++) {
  112. // Maybe they mistyped a single letter? Try replacing them all
  113. StringBuilder sb = new StringBuilder(word);
  114. sb.setCharAt(i, character);
  115. closeWords.add(sb.toString());
  116. // Maybe they added an extra letter? Try deleting one
  117. sb = new StringBuilder(word);
  118. sb.deleteCharAt(i);
  119. closeWords.add(sb.toString());
  120. }
  121. }
  122. return closeWords;
  123. }
  124. /**
  125. * Sorts a map by value taken from
  126. * http://stackoverflow.com/questions/109383/sort-a-mapkey-value-by-values-java
  127. */
  128. public static <K, V extends Comparable<? super V>> Map<K, V> sortByValue( Map<K, V> map ) {
  129. Map<K, V> result = new LinkedHashMap<>();
  130. Stream<Map.Entry<K, V>> st = map.entrySet().stream();
  131. st.sorted( Map.Entry.comparingByValue() ).forEachOrdered( e -> result.put(e.getKey(), e.getValue()) );
  132. return result;
  133. }
  134. /**
  135. * A very simple LRU cache implementation that can be used for random data types.
  136. */
  137. public class LruCache<A, B> extends LinkedHashMap<A, B> {
  138. private final int maxEntries;
  139. public LruCache(final int maxEntries) {
  140. super(maxEntries + 1, 1.0f, true);
  141. this.maxEntries = maxEntries;
  142. }
  143. @Override
  144. protected boolean removeEldestEntry(final Map.Entry<A, B> eldest) {
  145. return super.size() > maxEntries;
  146. }
  147. }
  148. }