/src/main/java/com/tamingtext/fuzzy/SpellCorrector.java
https://github.com/aman-tandon/book · Java · 86 lines · 50 code · 7 blank · 29 comment · 3 complexity · 39b8ee90279d4e28318ce2c70ea441ff MD5 · raw file
- /*
- * Copyright 2008-2011 Grant Ingersoll, Thomas Morton and Drew Farris
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * -------------------
- * To purchase or learn more about Taming Text, by Grant Ingersoll, Thomas Morton and Drew Farris, visit
- * http://www.manning.com/ingersoll
- */
- package com.tamingtext.fuzzy;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.util.Iterator;
- import org.apache.lucene.search.spell.StringDistance;
- import org.apache.solr.client.solrj.SolrQuery;
- import org.apache.solr.client.solrj.SolrServer;
- import org.apache.solr.client.solrj.SolrServerException;
- import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
- import org.apache.solr.client.solrj.response.QueryResponse;
- import org.apache.solr.common.SolrDocument;
- import org.apache.solr.common.SolrDocumentList;
- //<start id="did-you-mean.corrector"/>
- public class SpellCorrector {
- private SolrServer solr;
- private SolrQuery query;
- private StringDistance sd;
- private float threshold;
-
- public SpellCorrector(StringDistance sd, float threshold)
- throws MalformedURLException {
- solr = new CommonsHttpSolrServer(
- new URL("http://localhost:8983/solr"));
- query = new SolrQuery();
- query.setFields("word");
- query.setRows(50); //<co id="co.dym.num"/>
- this.sd = sd;
- this.threshold = threshold;
- }
-
- public String topSuggestion(String spelling)
- throws SolrServerException {
- query.setQuery("wordNGram:"+spelling); //<co id="co.dym.field"/>
- QueryResponse response = solr.query(query);
- SolrDocumentList dl = response.getResults();
- Iterator<SolrDocument> di = dl.iterator();
- float maxDistance = 0;
- String suggestion = null;
- while (di.hasNext()) {
- SolrDocument doc = di.next();
- String word = (String) doc.getFieldValue("word");
- float distance = sd.getDistance(word, spelling); //<co id="co.dym.edit"/>
- if (distance > maxDistance) {
- maxDistance = distance;
- suggestion = word; //<co id="co.dym.max"/>
- }
- }
- if (maxDistance > threshold) { //<co id="co.dym.threshold"/>
- return suggestion;
- }
- return null;
- }
- }
- /*
- <calloutlist>
- <callout arearefs="co.dym.num"><para>The number of n-gram matches to consider.</para></callout>
- <callout arearefs="co.dym.field"><para>Query the field which contains the n-gram.</para></callout>
- <callout arearefs="co.dym.edit"><para>Compute the edit distance.</para></callout>
- <callout arearefs="co.dym.max"><para>Keep best suggestion.</para></callout>
- <callout arearefs="co.dym.threshold"><para>Check threshold otherwise return no suggestion.</para></callout>
- </calloutlist>
- */
- //<end id="did-you-mean.corrector"/>