PageRenderTime 40ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 1ms

/arqoid/src/com/hp/hpl/jena/query/larq/LuceneSearch.java

http://androjena.googlecode.com/
Java | 304 lines | 223 code | 46 blank | 35 comment | 50 complexity | e083d761da5a686b8d27ecb4bfd5f5b6 MD5 | raw file
Possible License(s): Apache-2.0, GPL-3.0
  1. /*
  2. * (c) Copyright 2006, 2007, 2008, 2009 Hewlett-Packard Development Company, LP
  3. * All rights reserved.
  4. * [See end of file]
  5. */
  6. package com.hp.hpl.jena.query.larq;
  7. import java.util.Iterator;
  8. import com.hp.hpl.jena.util.iterator.Map1;
  9. import com.hp.hpl.jena.util.iterator.Map1Iterator;
  10. import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
  11. import com.hp.hpl.jena.graph.Node;
  12. import com.hp.hpl.jena.sparql.core.Var;
  13. import com.hp.hpl.jena.sparql.engine.ExecutionContext;
  14. import com.hp.hpl.jena.sparql.engine.QueryIterator;
  15. import com.hp.hpl.jena.sparql.engine.binding.Binding;
  16. import com.hp.hpl.jena.sparql.engine.binding.BindingMap;
  17. import com.hp.hpl.jena.sparql.engine.iterator.QueryIterNullIterator;
  18. import com.hp.hpl.jena.sparql.engine.iterator.QueryIterPlainWrapper;
  19. import com.hp.hpl.jena.sparql.engine.iterator.QueryIterSingleton;
  20. import com.hp.hpl.jena.sparql.engine.iterator.QueryIterSlice;
  21. import com.hp.hpl.jena.sparql.expr.ExprEvalException;
  22. import com.hp.hpl.jena.sparql.expr.NodeValue;
  23. import com.hp.hpl.jena.sparql.lib.iterator.IteratorTruncate ;
  24. import com.hp.hpl.jena.sparql.pfunction.PropFuncArg;
  25. import com.hp.hpl.jena.sparql.pfunction.PropFuncArgType;
  26. import com.hp.hpl.jena.sparql.pfunction.PropertyFunctionEval;
  27. import com.hp.hpl.jena.sparql.util.ALog;
  28. import com.hp.hpl.jena.sparql.util.IterLib;
  29. import com.hp.hpl.jena.sparql.util.NodeFactory;
  30. import com.hp.hpl.jena.query.Query;
  31. import com.hp.hpl.jena.query.QueryBuildException;
  32. import com.hp.hpl.jena.query.QueryExecException;
  33. /** Base class for searching a IndexLARQ */
  34. public abstract class LuceneSearch extends PropertyFunctionEval
  35. {
  36. protected LuceneSearch()
  37. {
  38. super(PropFuncArgType.PF_ARG_EITHER,
  39. PropFuncArgType.PF_ARG_EITHER) ;
  40. }
  41. protected abstract IndexLARQ getIndex(ExecutionContext execCxt) ;
  42. @Override
  43. public void build(PropFuncArg argSubject, Node predicate, PropFuncArg argObject, ExecutionContext execCxt)
  44. {
  45. super.build(argSubject, predicate, argObject, execCxt) ;
  46. if ( getIndex(execCxt) == null )
  47. throw new QueryBuildException("Index not found") ;
  48. if ( argSubject.isList() && argSubject.getArgListSize() != 2 )
  49. throw new QueryBuildException("Subject has "+argSubject.getArgList().size()+" elements, not 2: "+argSubject) ;
  50. if ( argObject.isList() && (argObject.getArgListSize() != 2 && argObject.getArgListSize() != 3) )
  51. throw new QueryBuildException("Object has "+argObject.getArgList().size()+" elements, not 2 or 3: "+argObject) ;
  52. }
  53. @Override
  54. public QueryIterator execEvaluated(Binding binding, PropFuncArg argSubject, Node predicate, PropFuncArg argObject, ExecutionContext execCxt)
  55. {
  56. try {
  57. return execEvaluatedProtected(binding, argSubject, predicate, argObject, execCxt) ;
  58. } catch (RuntimeException ex)
  59. {
  60. ALog.fatal(this, "Exception from Lucene search", ex) ;
  61. throw ex ;
  62. }
  63. }
  64. private QueryIterator execEvaluatedProtected(Binding binding, PropFuncArg argSubject, Node predicate, PropFuncArg argObject, ExecutionContext execCxt)
  65. {
  66. Node match = null ;
  67. Node score = null ;
  68. Node searchString = null ;
  69. long limit = Query.NOLIMIT ;
  70. float scoreLimit = -1.0f ;
  71. if ( argSubject.isList() )
  72. {
  73. // Length checked in build
  74. match = argSubject.getArg(0) ;
  75. score = argSubject.getArg(1) ;
  76. if ( ! score.isVariable() )
  77. throw new QueryExecException("Hit score is not a variable: "+argSubject) ;
  78. }
  79. else
  80. {
  81. match = argSubject.getArg() ;
  82. //score = null ;
  83. }
  84. if ( argObject.isList() )
  85. {
  86. // Length checked in build
  87. searchString = argObject.getArg(0) ;
  88. for ( int i = 1 ; i < argObject.getArgListSize() ; i++ )
  89. {
  90. Node n = argObject.getArg(i) ;
  91. int nInt = asInteger(n) ;
  92. if ( isInteger(nInt) )
  93. {
  94. if ( limit > 0 )
  95. throw new ExprEvalException("2 potential limits to Lucene search: "+argObject) ;
  96. limit = nInt ;
  97. if ( limit < 0 )
  98. limit = Query.NOLIMIT ;
  99. continue ;
  100. }
  101. float nFloat = asFloat(n) ;
  102. if ( isFloat(nFloat) )
  103. {
  104. if ( scoreLimit > 0 )
  105. throw new ExprEvalException("2 potential score limits to Lucene search: "+argObject) ;
  106. if ( nFloat < 0 )
  107. throw new ExprEvalException("Negative score limit to Lucene search: "+argObject) ;
  108. scoreLimit = nFloat ;
  109. continue ;
  110. }
  111. throw new ExprEvalException("Bad argument to Lucene search: "+argObject) ;
  112. }
  113. if ( scoreLimit < 0 )
  114. scoreLimit = 0.0f ;
  115. if ( ! isValidSearchString(searchString) )
  116. return new QueryIterNullIterator(execCxt) ;
  117. }
  118. else
  119. {
  120. searchString = argObject.getArg() ;
  121. limit = Query.NOLIMIT ;
  122. scoreLimit = 0.0f ;
  123. }
  124. if ( !isValidSearchString(searchString) )
  125. return IterLib.noResults(execCxt) ;
  126. String qs = asString(searchString) ;
  127. if ( qs == null )
  128. {
  129. ALog.warn(this, "Not a string (it was a moment ago!): "+searchString) ;
  130. return new QueryIterNullIterator(execCxt) ;
  131. }
  132. Var scoreVar = (score==null)?null:Var.alloc(score) ;
  133. if ( match.isVariable() )
  134. return varSubject(binding,
  135. Var.alloc(match), scoreVar,
  136. qs, limit, scoreLimit,
  137. execCxt) ;
  138. else
  139. return boundSubject(binding,
  140. match, scoreVar,
  141. qs, limit, scoreLimit,
  142. execCxt) ;
  143. }
  144. private static boolean isValidSearchString(Node searchString)
  145. {
  146. if ( !searchString.isLiteral() )
  147. {
  148. ALog.warn(LuceneSearch.class, "Not a string: "+searchString) ;
  149. return false ;
  150. }
  151. if ( searchString.getLiteralDatatypeURI() != null )
  152. {
  153. ALog.warn(LuceneSearch.class, "Not a plain string: "+searchString) ;
  154. return false ;
  155. }
  156. if ( searchString.getLiteralLanguage() != null && ! searchString.getLiteralLanguage().equals("") )
  157. {
  158. ALog.warn(LuceneSearch.class, "Not a plain string (has lang tag): "+searchString) ;
  159. return false ;
  160. }
  161. return true ;
  162. }
  163. public QueryIterator varSubject(Binding binding,
  164. Var match, Var score,
  165. String searchString, long limit, float scoreLimit,
  166. ExecutionContext execCxt)
  167. {
  168. Iterator<HitLARQ> iter = getIndex(execCxt).search(searchString) ;
  169. if ( scoreLimit > 0 )
  170. iter = new IteratorTruncate<HitLARQ>(new ScoreTest(scoreLimit), iter) ;
  171. HitConverter converter = new HitConverter(binding, match, score) ;
  172. Iterator<Binding> iter2 = new Map1Iterator<HitLARQ, Binding>(converter, iter) ;
  173. QueryIterator qIter = new QueryIterPlainWrapper(iter2, execCxt) ;
  174. if ( limit >= 0 )
  175. qIter = new QueryIterSlice(qIter, 0, limit, execCxt) ;
  176. return qIter ;
  177. }
  178. static class HitConverter implements Map1<HitLARQ, Binding>
  179. {
  180. private Binding binding ;
  181. private Var match ;
  182. private Var score ;
  183. HitConverter(Binding binding, Var matchVar, Var score)
  184. {
  185. this.binding = binding ;
  186. this.match = matchVar ;
  187. this.score = score ;
  188. }
  189. public Binding map1(HitLARQ hit)
  190. {
  191. Binding b = new BindingMap(binding) ;
  192. b.add(match, hit.getNode()) ;
  193. if ( score != null )
  194. b.add(score, NodeFactory.floatToNode(hit.getScore())) ;
  195. return b ;
  196. }
  197. }
  198. public QueryIterator boundSubject(Binding binding,
  199. Node match, Var score,
  200. String searchString, long limit, float scoreLimit,
  201. ExecutionContext execCxt)
  202. {
  203. HitLARQ hit = getIndex(execCxt).contains(match, searchString) ;
  204. if ( hit == null )
  205. return new QueryIterNullIterator(execCxt) ;
  206. if ( score == null )
  207. return QueryIterSingleton.create(binding, execCxt) ;
  208. return IterLib.oneResult(binding, score, NodeFactory.floatToNode(hit.getScore()), execCxt) ;
  209. }
  210. static private String asString(Node node)
  211. {
  212. if ( node.getLiteralDatatype() != null
  213. && ! node.getLiteralDatatype().equals(XSDDatatype.XSDstring) )
  214. return null ;
  215. return node.getLiteralLexicalForm() ;
  216. }
  217. static private float asFloat(Node n)
  218. {
  219. if ( n == null ) return Float.MIN_VALUE ;
  220. NodeValue nv = NodeValue.makeNode(n) ;
  221. if ( nv.isFloat() )
  222. return nv.getFloat() ;
  223. return Float.MIN_VALUE ;
  224. }
  225. static private int asInteger(Node n)
  226. {
  227. if ( n == null ) return Integer.MIN_VALUE ;
  228. return NodeFactory.nodeToInt(n) ;
  229. }
  230. static private boolean isInteger(int i) { return i != Integer.MIN_VALUE ; }
  231. static private boolean isFloat(float f) { return f != Float.MIN_VALUE ; }
  232. }
  233. /*
  234. * (c) Copyright 2006, 2007, 2008, 2009 Hewlett-Packard Development Company, LP
  235. * All rights reserved.
  236. *
  237. * Redistribution and use in source and binary forms, with or without
  238. * modification, are permitted provided that the following conditions
  239. * are met:
  240. * 1. Redistributions of source code must retain the above copyright
  241. * notice, this list of conditions and the following disclaimer.
  242. * 2. Redistributions in binary form must reproduce the above copyright
  243. * notice, this list of conditions and the following disclaimer in the
  244. * documentation and/or other materials provided with the distribution.
  245. * 3. The name of the author may not be used to endorse or promote products
  246. * derived from this software without specific prior written permission.
  247. *
  248. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  249. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  250. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  251. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  252. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  253. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  254. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  255. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  256. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  257. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  258. */