PageRenderTime 50ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/backwards/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java

https://bitbucket.org/gasol/lucene3
Java | 351 lines | 271 code | 30 blank | 50 comment | 37 complexity | 398e1e62d3d3b256f630282c56ff6300 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. package org.apache.lucene.search.payloads;
  2. /**
  3. * Licensed to the Apache Software Foundation (ASF) under one or more
  4. * contributor license agreements. See the NOTICE file distributed with
  5. * this work for additional information regarding copyright ownership.
  6. * The ASF licenses this file to You under the Apache License, Version 2.0
  7. * (the "License"); you may not use this file except in compliance with
  8. * the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. import java.io.IOException;
  19. import java.io.Reader;
  20. import java.util.Collection;
  21. import org.apache.lucene.analysis.Analyzer;
  22. import org.apache.lucene.analysis.LowerCaseTokenizer;
  23. import org.apache.lucene.analysis.TokenFilter;
  24. import org.apache.lucene.analysis.TokenStream;
  25. import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
  26. import org.apache.lucene.document.Document;
  27. import org.apache.lucene.document.Field;
  28. import org.apache.lucene.index.FieldInvertState;
  29. import org.apache.lucene.index.IndexReader;
  30. import org.apache.lucene.index.Payload;
  31. import org.apache.lucene.index.RandomIndexWriter;
  32. import org.apache.lucene.index.Term;
  33. import org.apache.lucene.search.DefaultSimilarity;
  34. import org.apache.lucene.search.Explanation;
  35. import org.apache.lucene.search.IndexSearcher;
  36. import org.apache.lucene.search.QueryUtils;
  37. import org.apache.lucene.search.ScoreDoc;
  38. import org.apache.lucene.search.Searcher;
  39. import org.apache.lucene.search.TopDocs;
  40. import org.apache.lucene.search.spans.SpanQuery;
  41. import org.apache.lucene.search.spans.SpanNearQuery;
  42. import org.apache.lucene.search.spans.SpanTermQuery;
  43. import org.apache.lucene.store.Directory;
  44. import org.apache.lucene.util.English;
  45. import org.apache.lucene.util.LuceneTestCase;
  46. import org.apache.lucene.search.Explanation.IDFExplanation;
  47. import org.junit.AfterClass;
  48. import org.junit.BeforeClass;
  49. public class TestPayloadNearQuery extends LuceneTestCase {
  50. private static IndexSearcher searcher;
  51. private static IndexReader reader;
  52. private static Directory directory;
  53. private static BoostingSimilarity similarity = new BoostingSimilarity();
  54. private static byte[] payload2 = new byte[]{2};
  55. private static byte[] payload4 = new byte[]{4};
  56. private static class PayloadAnalyzer extends Analyzer {
  57. @Override
  58. public TokenStream tokenStream(String fieldName, Reader reader) {
  59. TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader);
  60. result = new PayloadFilter(result, fieldName);
  61. return result;
  62. }
  63. }
  64. private static class PayloadFilter extends TokenFilter {
  65. private final String fieldName;
  66. private int numSeen = 0;
  67. private final PayloadAttribute payAtt;
  68. public PayloadFilter(TokenStream input, String fieldName) {
  69. super(input);
  70. this.fieldName = fieldName;
  71. payAtt = addAttribute(PayloadAttribute.class);
  72. }
  73. @Override
  74. public boolean incrementToken() throws IOException {
  75. boolean result = false;
  76. if (input.incrementToken()) {
  77. if (numSeen % 2 == 0) {
  78. payAtt.setPayload(new Payload(payload2));
  79. } else {
  80. payAtt.setPayload(new Payload(payload4));
  81. }
  82. numSeen++;
  83. result = true;
  84. }
  85. return result;
  86. }
  87. @Override
  88. public void reset() throws IOException {
  89. super.reset();
  90. this.numSeen = 0;
  91. }
  92. }
  93. private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder, PayloadFunction function ) {
  94. String[] words = phrase.split("[\\s]+");
  95. SpanQuery clauses[] = new SpanQuery[words.length];
  96. for (int i=0;i<clauses.length;i++) {
  97. clauses[i] = new SpanTermQuery(new Term(fieldName, words[i]));
  98. }
  99. return new PayloadNearQuery(clauses, 0, inOrder, function);
  100. }
  101. @BeforeClass
  102. public static void beforeClass() throws Exception {
  103. directory = newDirectory();
  104. RandomIndexWriter writer = new RandomIndexWriter(random, directory,
  105. newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
  106. .setSimilarity(similarity));
  107. //writer.infoStream = System.out;
  108. for (int i = 0; i < 1000; i++) {
  109. Document doc = new Document();
  110. doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
  111. String txt = English.intToEnglish(i) +' '+English.intToEnglish(i+1);
  112. doc.add(newField("field2", txt, Field.Store.YES, Field.Index.ANALYZED));
  113. writer.addDocument(doc);
  114. }
  115. reader = writer.getReader();
  116. writer.close();
  117. searcher = newSearcher(reader);
  118. searcher.setSimilarity(similarity);
  119. }
  120. @AfterClass
  121. public static void afterClass() throws Exception {
  122. searcher.close();
  123. searcher = null;
  124. reader.close();
  125. reader = null;
  126. directory.close();
  127. directory = null;
  128. }
  129. public void test() throws IOException {
  130. PayloadNearQuery query;
  131. TopDocs hits;
  132. query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
  133. QueryUtils.check(query);
  134. // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
  135. // and all the similarity factors are set to 1
  136. hits = searcher.search(query, null, 100);
  137. assertTrue("hits is null and it shouldn't be", hits != null);
  138. assertTrue("should be 10 hits", hits.totalHits == 10);
  139. for (int j = 0; j < hits.scoreDocs.length; j++) {
  140. ScoreDoc doc = hits.scoreDocs[j];
  141. assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
  142. }
  143. for (int i=1;i<10;i++) {
  144. query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true, new AveragePayloadFunction());
  145. // all should have score = 3 because adjacent terms have payloads of 2,4
  146. // and all the similarity factors are set to 1
  147. hits = searcher.search(query, null, 100);
  148. assertTrue("hits is null and it shouldn't be", hits != null);
  149. assertTrue("should be 100 hits", hits.totalHits == 100);
  150. for (int j = 0; j < hits.scoreDocs.length; j++) {
  151. ScoreDoc doc = hits.scoreDocs[j];
  152. // System.out.println("Doc: " + doc.toString());
  153. // System.out.println("Explain: " + searcher.explain(query, doc.doc));
  154. assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
  155. }
  156. }
  157. }
  158. public void testPayloadNear() throws IOException {
  159. SpanNearQuery q1, q2;
  160. PayloadNearQuery query;
  161. //SpanNearQuery(clauses, 10000, false)
  162. q1 = spanNearQuery("field2", "twenty two");
  163. q2 = spanNearQuery("field2", "twenty three");
  164. SpanQuery[] clauses = new SpanQuery[2];
  165. clauses[0] = q1;
  166. clauses[1] = q2;
  167. query = new PayloadNearQuery(clauses, 10, false);
  168. //System.out.println(query.toString());
  169. assertEquals(12, searcher.search(query, null, 100).totalHits);
  170. /*
  171. System.out.println(hits.totalHits);
  172. for (int j = 0; j < hits.scoreDocs.length; j++) {
  173. ScoreDoc doc = hits.scoreDocs[j];
  174. System.out.println("doc: "+doc.doc+", score: "+doc.score);
  175. }
  176. */
  177. }
  178. public void testAverageFunction() throws IOException {
  179. PayloadNearQuery query;
  180. TopDocs hits;
  181. query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
  182. QueryUtils.check(query);
  183. // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
  184. // and all the similarity factors are set to 1
  185. hits = searcher.search(query, null, 100);
  186. assertTrue("hits is null and it shouldn't be", hits != null);
  187. assertTrue("should be 10 hits", hits.totalHits == 10);
  188. for (int j = 0; j < hits.scoreDocs.length; j++) {
  189. ScoreDoc doc = hits.scoreDocs[j];
  190. assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
  191. Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
  192. String exp = explain.toString();
  193. assertTrue(exp, exp.indexOf("AveragePayloadFunction") > -1);
  194. assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 3, explain.getValue() == 3f);
  195. }
  196. }
  197. public void testMaxFunction() throws IOException {
  198. PayloadNearQuery query;
  199. TopDocs hits;
  200. query = newPhraseQuery("field", "twenty two", true, new MaxPayloadFunction());
  201. QueryUtils.check(query);
  202. // all 10 hits should have score = 4 (max payload value)
  203. hits = searcher.search(query, null, 100);
  204. assertTrue("hits is null and it shouldn't be", hits != null);
  205. assertTrue("should be 10 hits", hits.totalHits == 10);
  206. for (int j = 0; j < hits.scoreDocs.length; j++) {
  207. ScoreDoc doc = hits.scoreDocs[j];
  208. assertTrue(doc.score + " does not equal: " + 4, doc.score == 4);
  209. Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
  210. String exp = explain.toString();
  211. assertTrue(exp, exp.indexOf("MaxPayloadFunction") > -1);
  212. assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 4, explain.getValue() == 4f);
  213. }
  214. }
  215. public void testMinFunction() throws IOException {
  216. PayloadNearQuery query;
  217. TopDocs hits;
  218. query = newPhraseQuery("field", "twenty two", true, new MinPayloadFunction());
  219. QueryUtils.check(query);
  220. // all 10 hits should have score = 2 (min payload value)
  221. hits = searcher.search(query, null, 100);
  222. assertTrue("hits is null and it shouldn't be", hits != null);
  223. assertTrue("should be 10 hits", hits.totalHits == 10);
  224. for (int j = 0; j < hits.scoreDocs.length; j++) {
  225. ScoreDoc doc = hits.scoreDocs[j];
  226. assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
  227. Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
  228. String exp = explain.toString();
  229. assertTrue(exp, exp.indexOf("MinPayloadFunction") > -1);
  230. assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 2, explain.getValue() == 2f);
  231. }
  232. }
  233. private SpanQuery[] getClauses() {
  234. SpanNearQuery q1, q2;
  235. q1 = spanNearQuery("field2", "twenty two");
  236. q2 = spanNearQuery("field2", "twenty three");
  237. SpanQuery[] clauses = new SpanQuery[2];
  238. clauses[0] = q1;
  239. clauses[1] = q2;
  240. return clauses;
  241. }
  242. private SpanNearQuery spanNearQuery(String fieldName, String words) {
  243. String[] wordList = words.split("[\\s]+");
  244. SpanQuery clauses[] = new SpanQuery[wordList.length];
  245. for (int i=0;i<clauses.length;i++) {
  246. clauses[i] = new PayloadTermQuery(new Term(fieldName, wordList[i]), new AveragePayloadFunction());
  247. }
  248. return new SpanNearQuery(clauses, 10000, false);
  249. }
  250. public void testLongerSpan() throws IOException {
  251. PayloadNearQuery query;
  252. TopDocs hits;
  253. query = newPhraseQuery("field", "nine hundred ninety nine", true, new AveragePayloadFunction());
  254. hits = searcher.search(query, null, 100);
  255. assertTrue("hits is null and it shouldn't be", hits != null);
  256. ScoreDoc doc = hits.scoreDocs[0];
  257. // System.out.println("Doc: " + doc.toString());
  258. // System.out.println("Explain: " + searcher.explain(query, doc.doc));
  259. assertTrue("there should only be one hit", hits.totalHits == 1);
  260. // should have score = 3 because adjacent terms have payloads of 2,4
  261. assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
  262. }
  263. public void testComplexNested() throws IOException {
  264. PayloadNearQuery query;
  265. TopDocs hits;
  266. // combine ordered and unordered spans with some nesting to make sure all payloads are counted
  267. SpanQuery q1 = newPhraseQuery("field", "nine hundred", true, new AveragePayloadFunction());
  268. SpanQuery q2 = newPhraseQuery("field", "ninety nine", true, new AveragePayloadFunction());
  269. SpanQuery q3 = newPhraseQuery("field", "nine ninety", false, new AveragePayloadFunction());
  270. SpanQuery q4 = newPhraseQuery("field", "hundred nine", false, new AveragePayloadFunction());
  271. SpanQuery[]clauses = new SpanQuery[] {new PayloadNearQuery(new SpanQuery[] {q1,q2}, 0, true), new PayloadNearQuery(new SpanQuery[] {q3,q4}, 0, false)};
  272. query = new PayloadNearQuery(clauses, 0, false);
  273. hits = searcher.search(query, null, 100);
  274. assertTrue("hits is null and it shouldn't be", hits != null);
  275. // should be only 1 hit - doc 999
  276. assertTrue("should only be one hit", hits.scoreDocs.length == 1);
  277. // the score should be 3 - the average of all the underlying payloads
  278. ScoreDoc doc = hits.scoreDocs[0];
  279. // System.out.println("Doc: " + doc.toString());
  280. // System.out.println("Explain: " + searcher.explain(query, doc.doc));
  281. assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
  282. }
  283. // must be static for weight serialization tests
  284. static class BoostingSimilarity extends DefaultSimilarity {
  285. @Override public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) {
  286. //we know it is size 4 here, so ignore the offset/length
  287. return payload[0];
  288. }
  289. //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  290. //Make everything else 1 so we see the effect of the payload
  291. //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  292. @Override public float computeNorm(String fieldName, FieldInvertState state) {
  293. return state.getBoost();
  294. }
  295. @Override public float queryNorm(float sumOfSquaredWeights) {
  296. return 1.0f;
  297. }
  298. @Override public float sloppyFreq(int distance) {
  299. return 1.0f;
  300. }
  301. @Override public float coord(int overlap, int maxOverlap) {
  302. return 1.0f;
  303. }
  304. @Override public float tf(float freq) {
  305. return 1.0f;
  306. }
  307. // idf used for phrase queries
  308. @Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
  309. return new IDFExplanation() {
  310. @Override
  311. public float getIdf() {
  312. return 1.0f;
  313. }
  314. @Override
  315. public String explain() {
  316. return "Inexplicable";
  317. }
  318. };
  319. }
  320. }
  321. }