IMDBReader.java - Our spellchecker Lucene spellchecker Impl…

/starter/src/cs276/pe1/lucene/IMDBReader.java

https://github.com/rioleo/huangenius · Java · 162 lines · 59 code · 47 blank · 56 comment · 7 complexity · 128cac438697f6d455c871daf9dda02d MD5 · raw file


package cs276.pe1.lucene;

import java.io.File;

import org.apache.lucene.search.*;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.util.Version;

import cs276.pe1.lucene.IMDBParser.MoviePlotRecord;
import org.apache.lucene.document.*;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;

import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.search.spell.LuceneDictionary;
import java.util.List;
import org.apache.lucene.store.FSDirectory;
import cs276.util.Counter;
import cs276.pe1.spell.KGramWithEditDistanceSpellingCorrector;
import cs276.util.StringUtils;

public class IMDBReader {
	
	static KGramWithEditDistanceSpellingCorrector spellChecker = new KGramWithEditDistanceSpellingCorrector();
	
	public static String runQueryForTitle(String rawQuery, String field) throws Exception {
		
		File indexPath = new File(new File(System.getProperty("user.home")),"cs276-index");		
		IndexReader ireader = IndexReader.open(indexPath);
		IndexSearcher indexsearcher = new IndexSearcher(ireader);
		

		//Our spellchecker
        QueryParser queryParser = new QueryParser(field,new StandardAnalyzer());
        List<String> corrections = spellChecker.corrections(rawQuery);
		
		//Lucene spellchecker
		System.out.println("******Lucene spellchecker suggestions");
		FSDirectory fs = FSDirectory.getDirectory(indexPath);
		SpellChecker spell = new SpellChecker(fs);

		//Implementation 1. 
		String[] similar = spell.suggestSimilar(rawQuery, 1);	
		for (String word : similar) {
			System.out.println(word);
		}
		
		//Implementation 2.
		//System.out.println("----> with morePopular = true <--");
		//String[] similar = spell.suggestSimilar(rawQuery, 1, ireader, field, true);
		//for (String word : similar) {
		//		    System.out.println(word);
		//	    }
		//System.out.println("----> with morePopular = false <--");
		//similar = spell.suggestSimilar(rawQuery, 1, ireader, field, false);
		//for (String word : similar) {
		//	System.out.println(word);
		//}
		
		//Implementation 3. Uncomment this section
		
		//Counter<String> editDistances = new Counter<String>();
		//for (String guess : similar) {
	    //    editDistances.setCount(guess, -1*StringUtils.levenshtein(rawQuery, guess));
        //}
		//System.out.println("---->Edit distance on Lucene<----");
		//System.out.println(editDistances.topK(1));
		
        if (corrections != null && corrections.size() > 0 && !corrections.get(0).equals(rawQuery)) {
            rawQuery = corrections.get(0);
            System.out.println("******Spellchecker: searching for " + rawQuery);
        }
        
        Query query = queryParser.parse(rawQuery);
		
		TopDocs results = indexsearcher.search(query, null, 20);
		for (ScoreDoc doc : results.scoreDocs) {
		    System.out.println(ireader.document(doc.doc).get("title"));
	    }
		
//        if (corrections != null && corrections.size() > 0 && !corrections.get(0).equals(rawQuery)) {
//            rawQuery = corrections.get(0);
//            System.out.println("******Spellchecker: searching for " + rawQuery);
//        }
        
//        Query query = queryParser.parse(rawQuery);
//		
//		TopDocs results = indexsearcher.search(query, null, 20);
//		for (ScoreDoc doc : results.scoreDocs) {
//		    System.out.println(ireader.document(doc.doc).get("title"));
//	    }
//				
//		
//        System.out.println("Query: " + query);
//		System.out.println("Results: showing " + results.scoreDocs.length + " out of " + results.totalHits);
        return "";
		
	}
	
	
	public static void main(String[] argv) throws Exception {
		
		
		File indexPath = new File(new File(System.getProperty("user.home")),"cs276-index");		
		IndexReader ireader = IndexReader.open(indexPath);
		
		// Is there stuff in it?
		System.out.println("Total docs: " + ireader.numDocs());
		
		//		System.out.println(ireader.document(100));
		
		IndexSearcher indexsearcher = new IndexSearcher(ireader);
		
		
		

		
		// WORKS
		QueryParser queryParser = new QueryParser("title",new StandardAnalyzer());
		Query query = queryParser.parse("\"10 items or less\"~1");
		
		// This WORKS
//		PhraseQuery query = new PhraseQuery();
//		query.setSlop(5);
//		query.add(new Term("plots","eighteen"));
//		query.add(new Term("plots","murdered"));
		
		
		// Rob query WORKS
		//QueryParser queryParser = new QueryParser("authors",new StandardAnalyzer());
		//Query query = queryParser.parse("Rob");
		
		// Search
		//FSDirectory fs = FSDirectory.getDirectory(indexPath);
		//SpellChecker spell = new SpellChecker(fs);
		//spell.indexDictionary(new LuceneDictionary(ireader,"title"));
		//String[] similar = spell.suggestSimilar("Trmmy", 10);	
		
		//for (String word : similar) {
		//		    System.out.println(word);
		//	    }
		
//		QueryParser queryParser = new QueryParser("title",new StandardAnalyzer());
//		
		
		TopDocs results = indexsearcher.search(query, null, 20);
		
		
		for (ScoreDoc doc : results.scoreDocs) {
		    System.out.println(ireader.document(doc.doc));
	    }
		
		
        System.out.println("Query: " + query);
		System.out.println("Results: " + results.totalHits);
		
	}
	
	
}

Tech Fingerprint

Alerts (17)

'import' Maintainability Info: Wildcard imports (e.g., `import java.util.*;`) can obscure the origin of classes and lead to namespace collisions. Prefer importing specific classes explicitly.
5 11
'throws Exception' Declaring 'throws Exception' is too broad. Declare specific checked exceptions that the method might throw, allowing callers to handle them appropriately.
28
'System.out.println(' Use a logging framework (e.g., SLF4J, Log4j) for better control and configurability
40 47 73 80 110 152 156 157
Complexity hotspot; line 71 (total complexity: 4)
71
'+' Performance Info: Using string concatenation ('+' or '+=') inside loops can be inefficient due to repeated String object creation. Use StringBuilder (or StringBuffer for thread-safety) instead.
73 156 157
'=' Maintainability Info: Avoid using unnamed 'magic' numbers directly in comparisons or assignments. Use named constants (static final variables) instead to improve readability and maintainability.
78 148