/src/main/java/com/tamingtext/fuzzy/SpellCorrector.java

https://github.com/aman-tandon/book · Java · 86 lines · 50 code · 7 blank · 29 comment · 3 complexity · 39b8ee90279d4e28318ce2c70ea441ff MD5 · raw file

  1. /*
  2. * Copyright 2008-2011 Grant Ingersoll, Thomas Morton and Drew Farris
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. * -------------------
  16. * To purchase or learn more about Taming Text, by Grant Ingersoll, Thomas Morton and Drew Farris, visit
  17. * http://www.manning.com/ingersoll
  18. */
  19. package com.tamingtext.fuzzy;
  20. import java.net.MalformedURLException;
  21. import java.net.URL;
  22. import java.util.Iterator;
  23. import org.apache.lucene.search.spell.StringDistance;
  24. import org.apache.solr.client.solrj.SolrQuery;
  25. import org.apache.solr.client.solrj.SolrServer;
  26. import org.apache.solr.client.solrj.SolrServerException;
  27. import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
  28. import org.apache.solr.client.solrj.response.QueryResponse;
  29. import org.apache.solr.common.SolrDocument;
  30. import org.apache.solr.common.SolrDocumentList;
  31. //<start id="did-you-mean.corrector"/>
  32. public class SpellCorrector {
  33. private SolrServer solr;
  34. private SolrQuery query;
  35. private StringDistance sd;
  36. private float threshold;
  37. public SpellCorrector(StringDistance sd, float threshold)
  38. throws MalformedURLException {
  39. solr = new CommonsHttpSolrServer(
  40. new URL("http://localhost:8983/solr"));
  41. query = new SolrQuery();
  42. query.setFields("word");
  43. query.setRows(50); //<co id="co.dym.num"/>
  44. this.sd = sd;
  45. this.threshold = threshold;
  46. }
  47. public String topSuggestion(String spelling)
  48. throws SolrServerException {
  49. query.setQuery("wordNGram:"+spelling); //<co id="co.dym.field"/>
  50. QueryResponse response = solr.query(query);
  51. SolrDocumentList dl = response.getResults();
  52. Iterator<SolrDocument> di = dl.iterator();
  53. float maxDistance = 0;
  54. String suggestion = null;
  55. while (di.hasNext()) {
  56. SolrDocument doc = di.next();
  57. String word = (String) doc.getFieldValue("word");
  58. float distance = sd.getDistance(word, spelling); //<co id="co.dym.edit"/>
  59. if (distance > maxDistance) {
  60. maxDistance = distance;
  61. suggestion = word; //<co id="co.dym.max"/>
  62. }
  63. }
  64. if (maxDistance > threshold) { //<co id="co.dym.threshold"/>
  65. return suggestion;
  66. }
  67. return null;
  68. }
  69. }
  70. /*
  71. <calloutlist>
  72. <callout arearefs="co.dym.num"><para>The number of n-gram matches to consider.</para></callout>
  73. <callout arearefs="co.dym.field"><para>Query the field which contains the n-gram.</para></callout>
  74. <callout arearefs="co.dym.edit"><para>Compute the edit distance.</para></callout>
  75. <callout arearefs="co.dym.max"><para>Keep best suggestion.</para></callout>
  76. <callout arearefs="co.dym.threshold"><para>Check threshold otherwise return no suggestion.</para></callout>
  77. </calloutlist>
  78. */
  79. //<end id="did-you-mean.corrector"/>