PageRenderTime 62ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/SpellingCorrector.java

https://bitbucket.org/skyline0623/spellingcorrector
Java | 93 lines | 91 code | 2 blank | 0 comment | 24 complexity | 10968daff1c0c2d2f0546dc8fe24bb82 MD5 | raw file
  1. import java.io.*;
  2. import java.util.*;
  3. import java.util.Map.Entry;
  4. import java.util.AbstractMap.SimpleEntry;
  5. import java.util.regex.Matcher;
  6. import java.util.regex.Pattern;
  7. public class SpellingCorrector {
  8. private HashMap<String, Integer> nWords = new HashMap<String, Integer>();
  9. public SpellingCorrector(String path) throws IOException{
  10. BufferedReader reader = new BufferedReader(new FileReader(new File(path)));
  11. Pattern p = Pattern.compile("\\w+");
  12. for(String temp = reader.readLine(); temp != null; temp = reader.readLine()){
  13. Matcher m = p.matcher(temp.toLowerCase());
  14. while(m.find())
  15. nWords.put((temp = m.group()), nWords.containsKey(temp) ? nWords.get(temp) + 1 : 2);
  16. }
  17. reader.close();
  18. }
  19. private static String alph = "abcdefghijklmnopqrstuvwxyz";
  20. private HashSet<String> knownEdit1(String word){
  21. HashSet<String> res = new HashSet<String>();
  22. ArrayList<Entry<String, String>> splits = new ArrayList<Entry<String, String>>(word.length() + 1);
  23. for(int i = 0; i < word.length() + 1; i++)//split
  24. splits.add(new SimpleEntry<String, String>(word.substring(0, i), word.substring(i)));
  25. for(Entry<String, String> e : splits){
  26. String a = e.getKey(), b = e.getValue(), temp = null;
  27. if(b.length() > 0){//deletes & replaces
  28. temp = a + b.substring(1);
  29. if(nWords.containsKey(temp))
  30. res.add(temp);
  31. for(int i = 0; i < 26; i++){
  32. temp = a + alph.charAt(i) + b.substring(1);
  33. if(nWords.containsKey(temp))
  34. res.add(temp);
  35. }
  36. if(b.length() > 1){//transpose
  37. temp = a + b.charAt(1) + b.charAt(0) + b.substring(2);
  38. if(nWords.containsKey(temp))
  39. res.add(temp);
  40. }
  41. }
  42. for(int i = 0; i < 26; i++){//insert
  43. temp = a + alph.charAt(i) + b;
  44. if(nWords.containsKey(temp))
  45. res.add(temp);
  46. }
  47. }
  48. return res;
  49. }
  50. private HashSet<String> knownEdits2(String word){
  51. HashSet<String> res = new HashSet<String>();
  52. HashSet<String> edit1Res = knownEdit1(word);
  53. for(String s : edit1Res)
  54. for(String ss : knownEdit1(s))
  55. res.add(ss);
  56. return res;
  57. }
  58. public String correct(String word){
  59. if(nWords.containsKey(word))
  60. return word;
  61. HashSet<String> candidate = knownEdit1(word);
  62. if(candidate.size() == 0)
  63. candidate = knownEdits2(word);
  64. if(candidate.size() == 0)
  65. return word;
  66. PriorityQueue<String> pq = new PriorityQueue<String>(candidate.size(), new Comparator<String>(){
  67. public int compare(String arg0, String arg1) {
  68. int a = nWords.get(arg0), b = nWords.get(arg1);
  69. if(a > b) return -1;
  70. if(a < b) return 1;
  71. return 0;
  72. }
  73. });
  74. for(String s : candidate)
  75. pq.add(s);
  76. return pq.peek();
  77. }
  78. public static void main(String[] args)throws IOException {
  79. SpellingCorrector sc = new SpellingCorrector("big.txt");
  80. while(true){
  81. System.out.println("Please input your word:");
  82. byte[] b = new byte[100];
  83. int n = System.in.read(b);
  84. String temp = new String(b, 0, n);
  85. temp = temp.substring(0, temp.length() - 2);
  86. if(temp == "!q")
  87. break;
  88. System.out.println("Do you mean " + sc.correct(temp) + "?");
  89. }
  90. }
  91. }