PageRenderTime 1847ms CodeModel.GetById 34ms RepoModel.GetById 0ms app.codeStats 0ms

/starter/src/cs276/pe1/lucene/IMDBReader.java

https://github.com/rioleo/huangenius
Java | 162 lines | 59 code | 47 blank | 56 comment | 7 complexity | 128cac438697f6d455c871daf9dda02d MD5 | raw file
  1. package cs276.pe1.lucene;
  2. import java.io.File;
  3. import org.apache.lucene.search.*;
  4. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  5. import org.apache.lucene.index.IndexWriter;
  6. import org.apache.lucene.util.Version;
  7. import cs276.pe1.lucene.IMDBParser.MoviePlotRecord;
  8. import org.apache.lucene.document.*;
  9. import org.apache.lucene.queryParser.QueryParser;
  10. import org.apache.lucene.index.IndexReader;
  11. import org.apache.lucene.index.Term;
  12. import org.apache.lucene.search.spell.SpellChecker;
  13. import org.apache.lucene.search.spell.LuceneDictionary;
  14. import java.util.List;
  15. import org.apache.lucene.store.FSDirectory;
  16. import cs276.util.Counter;
  17. import cs276.pe1.spell.KGramWithEditDistanceSpellingCorrector;
  18. import cs276.util.StringUtils;
  19. public class IMDBReader {
  20. static KGramWithEditDistanceSpellingCorrector spellChecker = new KGramWithEditDistanceSpellingCorrector();
  21. public static String runQueryForTitle(String rawQuery, String field) throws Exception {
  22. File indexPath = new File(new File(System.getProperty("user.home")),"cs276-index");
  23. IndexReader ireader = IndexReader.open(indexPath);
  24. IndexSearcher indexsearcher = new IndexSearcher(ireader);
  25. //Our spellchecker
  26. QueryParser queryParser = new QueryParser(field,new StandardAnalyzer());
  27. List<String> corrections = spellChecker.corrections(rawQuery);
  28. //Lucene spellchecker
  29. System.out.println("******Lucene spellchecker suggestions");
  30. FSDirectory fs = FSDirectory.getDirectory(indexPath);
  31. SpellChecker spell = new SpellChecker(fs);
  32. //Implementation 1.
  33. String[] similar = spell.suggestSimilar(rawQuery, 1);
  34. for (String word : similar) {
  35. System.out.println(word);
  36. }
  37. //Implementation 2.
  38. //System.out.println("----> with morePopular = true <--");
  39. //String[] similar = spell.suggestSimilar(rawQuery, 1, ireader, field, true);
  40. //for (String word : similar) {
  41. // System.out.println(word);
  42. // }
  43. //System.out.println("----> with morePopular = false <--");
  44. //similar = spell.suggestSimilar(rawQuery, 1, ireader, field, false);
  45. //for (String word : similar) {
  46. // System.out.println(word);
  47. //}
  48. //Implementation 3. Uncomment this section
  49. //Counter<String> editDistances = new Counter<String>();
  50. //for (String guess : similar) {
  51. // editDistances.setCount(guess, -1*StringUtils.levenshtein(rawQuery, guess));
  52. //}
  53. //System.out.println("---->Edit distance on Lucene<----");
  54. //System.out.println(editDistances.topK(1));
  55. if (corrections != null && corrections.size() > 0 && !corrections.get(0).equals(rawQuery)) {
  56. rawQuery = corrections.get(0);
  57. System.out.println("******Spellchecker: searching for " + rawQuery);
  58. }
  59. Query query = queryParser.parse(rawQuery);
  60. TopDocs results = indexsearcher.search(query, null, 20);
  61. for (ScoreDoc doc : results.scoreDocs) {
  62. System.out.println(ireader.document(doc.doc).get("title"));
  63. }
  64. // if (corrections != null && corrections.size() > 0 && !corrections.get(0).equals(rawQuery)) {
  65. // rawQuery = corrections.get(0);
  66. // System.out.println("******Spellchecker: searching for " + rawQuery);
  67. // }
  68. // Query query = queryParser.parse(rawQuery);
  69. //
  70. // TopDocs results = indexsearcher.search(query, null, 20);
  71. // for (ScoreDoc doc : results.scoreDocs) {
  72. // System.out.println(ireader.document(doc.doc).get("title"));
  73. // }
  74. //
  75. //
  76. // System.out.println("Query: " + query);
  77. // System.out.println("Results: showing " + results.scoreDocs.length + " out of " + results.totalHits);
  78. return "";
  79. }
  80. public static void main(String[] argv) throws Exception {
  81. File indexPath = new File(new File(System.getProperty("user.home")),"cs276-index");
  82. IndexReader ireader = IndexReader.open(indexPath);
  83. // Is there stuff in it?
  84. System.out.println("Total docs: " + ireader.numDocs());
  85. // System.out.println(ireader.document(100));
  86. IndexSearcher indexsearcher = new IndexSearcher(ireader);
  87. // WORKS
  88. QueryParser queryParser = new QueryParser("title",new StandardAnalyzer());
  89. Query query = queryParser.parse("\"10 items or less\"~1");
  90. // This WORKS
  91. // PhraseQuery query = new PhraseQuery();
  92. // query.setSlop(5);
  93. // query.add(new Term("plots","eighteen"));
  94. // query.add(new Term("plots","murdered"));
  95. // Rob query WORKS
  96. //QueryParser queryParser = new QueryParser("authors",new StandardAnalyzer());
  97. //Query query = queryParser.parse("Rob");
  98. // Search
  99. //FSDirectory fs = FSDirectory.getDirectory(indexPath);
  100. //SpellChecker spell = new SpellChecker(fs);
  101. //spell.indexDictionary(new LuceneDictionary(ireader,"title"));
  102. //String[] similar = spell.suggestSimilar("Trmmy", 10);
  103. //for (String word : similar) {
  104. // System.out.println(word);
  105. // }
  106. // QueryParser queryParser = new QueryParser("title",new StandardAnalyzer());
  107. //
  108. TopDocs results = indexsearcher.search(query, null, 20);
  109. for (ScoreDoc doc : results.scoreDocs) {
  110. System.out.println(ireader.document(doc.doc));
  111. }
  112. System.out.println("Query: " + query);
  113. System.out.println("Results: " + results.totalHits);
  114. }
  115. }