PageRenderTime 74ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/lucene/core/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java

http://github.com/apache/lucene-solr
Java | 254 lines | 205 code | 29 blank | 20 comment | 22 complexity | 6ba59d2bee03b50457fe8dd58676d17c MD5 | raw file
Possible License(s): LGPL-2.1, CPL-1.0, MPL-2.0-no-copyleft-exception, JSON, Apache-2.0, AGPL-1.0, GPL-2.0, GPL-3.0, MIT, BSD-3-Clause
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package org.apache.lucene.search;
  18. import java.io.IOException;
  19. import java.util.ArrayList;
  20. import java.util.HashSet;
  21. import java.util.List;
  22. import java.util.Random;
  23. import java.util.Set;
  24. import org.apache.lucene.analysis.MockAnalyzer;
  25. import org.apache.lucene.document.Document;
  26. import org.apache.lucene.document.Field.Store;
  27. import org.apache.lucene.document.NumericDocValuesField;
  28. import org.apache.lucene.document.StringField;
  29. import org.apache.lucene.index.IndexReader;
  30. import org.apache.lucene.index.IndexWriterConfig;
  31. import org.apache.lucene.index.MockRandomMergePolicy;
  32. import org.apache.lucene.index.QueryTimeout;
  33. import org.apache.lucene.index.RandomIndexWriter;
  34. import org.apache.lucene.index.SerialMergeScheduler;
  35. import org.apache.lucene.index.Term;
  36. import org.apache.lucene.search.IndexSearcher;
  37. import org.apache.lucene.search.MatchAllDocsQuery;
  38. import org.apache.lucene.search.Query;
  39. import org.apache.lucene.search.ScoreDoc;
  40. import org.apache.lucene.search.Sort;
  41. import org.apache.lucene.search.SortField;
  42. import org.apache.lucene.search.TermQuery;
  43. import org.apache.lucene.search.TopFieldCollector;
  44. import org.apache.lucene.store.Directory;
  45. import org.apache.lucene.util.LuceneTestCase;
  46. import org.apache.lucene.util.TestUtil;
  47. import com.carrotsearch.randomizedtesting.generators.RandomPicks;
  48. public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
  49. private int numDocs;
  50. private List<String> terms;
  51. private Directory dir;
  52. private final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG));
  53. private RandomIndexWriter iw;
  54. private IndexReader reader;
  55. private final int forceMergeMaxSegmentCount = 5;
  56. private Document randomDocument() {
  57. final Document doc = new Document();
  58. doc.add(new NumericDocValuesField("ndv1", random().nextInt(10)));
  59. doc.add(new NumericDocValuesField("ndv2", random().nextInt(10)));
  60. doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES));
  61. return doc;
  62. }
  63. private void createRandomIndex(boolean singleSortedSegment) throws IOException {
  64. dir = newDirectory();
  65. numDocs = atLeast(150);
  66. final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
  67. Set<String> randomTerms = new HashSet<>();
  68. while (randomTerms.size() < numTerms) {
  69. randomTerms.add(TestUtil.randomSimpleString(random()));
  70. }
  71. terms = new ArrayList<>(randomTerms);
  72. final long seed = random().nextLong();
  73. final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
  74. if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
  75. // MockRandomMP randomly wraps the leaf readers which makes merging angry
  76. iwc.setMergePolicy(newTieredMergePolicy());
  77. }
  78. iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
  79. iwc.setIndexSort(sort);
  80. iw = new RandomIndexWriter(new Random(seed), dir, iwc);
  81. iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
  82. for (int i = 0; i < numDocs; ++i) {
  83. final Document doc = randomDocument();
  84. iw.addDocument(doc);
  85. if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) {
  86. iw.commit();
  87. }
  88. if (random().nextInt(15) == 0) {
  89. final String term = RandomPicks.randomFrom(random(), terms);
  90. iw.deleteDocuments(new Term("s", term));
  91. }
  92. }
  93. if (singleSortedSegment) {
  94. // because of deletions, there might still be a single flush segment in
  95. // the index, although want want a sorted segment so it needs to be merged
  96. iw.getReader().close(); // refresh
  97. iw.addDocument(new Document());
  98. iw.commit();
  99. iw.addDocument(new Document());
  100. iw.forceMerge(1);
  101. }
  102. else if (random().nextBoolean()) {
  103. iw.forceMerge(forceMergeMaxSegmentCount);
  104. }
  105. reader = iw.getReader();
  106. }
  107. private void closeIndex() throws IOException {
  108. reader.close();
  109. iw.close();
  110. dir.close();
  111. }
  112. public void testEarlyTermination() throws IOException {
  113. final int iters = atLeast(8);
  114. for (int i = 0; i < iters; ++i) {
  115. createRandomIndex(false);
  116. for (int j = 0; j < iters; ++j) {
  117. final IndexSearcher searcher = newSearcher(reader);
  118. final int numHits = TestUtil.nextInt(random(), 1, numDocs);
  119. final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG, false));
  120. final boolean fillFields = random().nextBoolean();
  121. final boolean trackDocScores = random().nextBoolean();
  122. final boolean trackMaxScore = random().nextBoolean();
  123. final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore);
  124. final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore);
  125. final Query query;
  126. if (random().nextBoolean()) {
  127. query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms)));
  128. } else {
  129. query = new MatchAllDocsQuery();
  130. }
  131. searcher.search(query, collector1);
  132. searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
  133. assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
  134. assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
  135. }
  136. closeIndex();
  137. }
  138. }
  139. public void testCanEarlyTerminate() {
  140. assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
  141. new Sort(new SortField("a", SortField.Type.LONG)),
  142. new Sort(new SortField("a", SortField.Type.LONG))));
  143. assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
  144. new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
  145. new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
  146. assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
  147. new Sort(new SortField("a", SortField.Type.LONG)),
  148. new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
  149. assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
  150. new Sort(new SortField("a", SortField.Type.LONG, true)),
  151. new Sort(new SortField("a", SortField.Type.LONG, false))));
  152. assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
  153. new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
  154. new Sort(new SortField("a", SortField.Type.LONG))));
  155. assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
  156. new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
  157. new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING))));
  158. assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
  159. new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
  160. new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
  161. }
  162. public void testEarlyTerminationDifferentSorter() throws IOException {
  163. createRandomIndex(true);
  164. Sort sort = new Sort(new SortField("ndv2", SortField.Type.LONG, false));
  165. Collector c = new EarlyTerminatingSortingCollector(TopFieldCollector.create(sort, 10, true, true, true), sort, 10);
  166. IndexSearcher searcher = newSearcher(reader);
  167. Exception e = expectThrows(IllegalStateException.class,
  168. () -> {
  169. searcher.search(new MatchAllDocsQuery(), c);
  170. });
  171. assertEquals("Cannot early terminate with sort order <long: \"ndv2\"> if segments are sorted with <long: \"ndv1\">", e.getMessage());
  172. closeIndex();
  173. }
  174. private static void assertTopDocsEquals(ScoreDoc[] scoreDocs1, ScoreDoc[] scoreDocs2) {
  175. assertEquals(scoreDocs1.length, scoreDocs2.length);
  176. for (int i = 0; i < scoreDocs1.length; ++i) {
  177. final ScoreDoc scoreDoc1 = scoreDocs1[i];
  178. final ScoreDoc scoreDoc2 = scoreDocs2[i];
  179. assertEquals(scoreDoc1.doc, scoreDoc2.doc);
  180. assertEquals(scoreDoc1.score, scoreDoc2.score, 0.001f);
  181. }
  182. }
  183. private class TestTerminatedEarlySimpleCollector extends SimpleCollector {
  184. private boolean collectedSomething;
  185. public boolean collectedSomething() {
  186. return collectedSomething;
  187. }
  188. @Override
  189. public void collect(int doc) throws IOException {
  190. collectedSomething = true;
  191. }
  192. @Override
  193. public boolean needsScores() {
  194. return false;
  195. }
  196. }
  197. private class TestEarlyTerminatingSortingcollectorQueryTimeout implements QueryTimeout {
  198. final private boolean shouldExit;
  199. public TestEarlyTerminatingSortingcollectorQueryTimeout(boolean shouldExit) {
  200. this.shouldExit = shouldExit;
  201. }
  202. public boolean shouldExit() {
  203. return shouldExit;
  204. }
  205. }
  206. public void testTerminatedEarly() throws IOException {
  207. final int iters = atLeast(8);
  208. for (int i = 0; i < iters; ++i) {
  209. createRandomIndex(true);
  210. final IndexSearcher searcher = new IndexSearcher(reader); // future TODO: use newSearcher(reader);
  211. final Query query = new MatchAllDocsQuery(); // search for everything/anything
  212. final TestTerminatedEarlySimpleCollector collector1 = new TestTerminatedEarlySimpleCollector();
  213. searcher.search(query, collector1);
  214. final TestTerminatedEarlySimpleCollector collector2 = new TestTerminatedEarlySimpleCollector();
  215. final EarlyTerminatingSortingCollector etsCollector = new EarlyTerminatingSortingCollector(collector2, sort, 1);
  216. searcher.search(query, etsCollector);
  217. assertTrue("collector1="+collector1.collectedSomething()+" vs. collector2="+collector2.collectedSomething(), collector1.collectedSomething() == collector2.collectedSomething());
  218. if (collector1.collectedSomething()) {
  219. // we collected something and since we modestly asked for just one document we should have terminated early
  220. assertTrue("should have terminated early (searcher.reader="+searcher.reader+")", etsCollector.terminatedEarly());
  221. }
  222. closeIndex();
  223. }
  224. }
  225. }