PageRenderTime 27ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSortRandom.java

http://github.com/apache/lucene-solr
Java | 331 lines | 263 code | 39 blank | 29 comment | 62 complexity | 9259403dedfe370a2fa5b2aba972c8cc MD5 | raw file
Possible License(s): LGPL-2.1, CPL-1.0, MPL-2.0-no-copyleft-exception, JSON, Apache-2.0, AGPL-1.0, GPL-2.0, GPL-3.0, MIT, BSD-3-Clause
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package org.apache.solr.uninverting;
  18. import java.io.IOException;
  19. import java.util.ArrayList;
  20. import java.util.Collections;
  21. import java.util.Comparator;
  22. import java.util.HashMap;
  23. import java.util.HashSet;
  24. import java.util.List;
  25. import java.util.Map;
  26. import java.util.Objects;
  27. import java.util.Random;
  28. import java.util.Set;
  29. import org.apache.lucene.document.Document;
  30. import org.apache.lucene.document.Field;
  31. import org.apache.lucene.document.IntPoint;
  32. import org.apache.lucene.document.StoredField;
  33. import org.apache.lucene.document.StringField;
  34. import org.apache.lucene.index.DocValues;
  35. import org.apache.lucene.index.IndexReader;
  36. import org.apache.lucene.index.LeafReaderContext;
  37. import org.apache.lucene.index.NumericDocValues;
  38. import org.apache.lucene.index.RandomIndexWriter;
  39. import org.apache.lucene.search.ConstantScoreQuery;
  40. import org.apache.lucene.search.ConstantScoreScorer;
  41. import org.apache.lucene.search.ConstantScoreWeight;
  42. import org.apache.lucene.search.FieldDoc;
  43. import org.apache.lucene.search.IndexSearcher;
  44. import org.apache.lucene.search.Query;
  45. import org.apache.lucene.search.QueryVisitor;
  46. import org.apache.lucene.search.ScoreMode;
  47. import org.apache.lucene.search.Scorer;
  48. import org.apache.lucene.search.Sort;
  49. import org.apache.lucene.search.SortField;
  50. import org.apache.lucene.search.TopFieldDocs;
  51. import org.apache.lucene.search.Weight;
  52. import org.apache.lucene.store.Directory;
  53. import org.apache.lucene.util.BitSetIterator;
  54. import org.apache.lucene.util.BytesRef;
  55. import org.apache.lucene.util.FixedBitSet;
  56. import org.apache.lucene.util.TestUtil;
  57. import org.apache.solr.SolrTestCase;
  58. import org.apache.solr.uninverting.UninvertingReader.Type;
  59. /** random sorting tests with uninversion */
  60. public class TestFieldCacheSortRandom extends SolrTestCase {
  61. public void testRandomStringSort() throws Exception {
  62. testRandomStringSort(SortField.Type.STRING);
  63. }
  64. public void testRandomStringValSort() throws Exception {
  65. testRandomStringSort(SortField.Type.STRING_VAL);
  66. }
  67. private void testRandomStringSort(SortField.Type type) throws Exception {
  68. Random random = new Random(random().nextLong());
  69. final int NUM_DOCS = atLeast(100);
  70. final Directory dir = newDirectory();
  71. final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
  72. final boolean allowDups = random.nextBoolean();
  73. final Set<String> seen = new HashSet<>();
  74. final int maxLength = TestUtil.nextInt(random, 5, 100);
  75. if (VERBOSE) {
  76. System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
  77. }
  78. int numDocs = 0;
  79. final List<BytesRef> docValues = new ArrayList<>();
  80. // TODO: deletions
  81. while (numDocs < NUM_DOCS) {
  82. final Document doc = new Document();
  83. // 10% of the time, the document is missing the value:
  84. final BytesRef br;
  85. if (random().nextInt(10) != 7) {
  86. final String s;
  87. if (random.nextBoolean()) {
  88. s = TestUtil.randomSimpleString(random, maxLength);
  89. } else {
  90. s = TestUtil.randomUnicodeString(random, maxLength);
  91. }
  92. if (!allowDups) {
  93. if (seen.contains(s)) {
  94. continue;
  95. }
  96. seen.add(s);
  97. }
  98. if (VERBOSE) {
  99. System.out.println(" " + numDocs + ": s=" + s);
  100. }
  101. doc.add(new StringField("stringdv", s, Field.Store.NO));
  102. docValues.add(new BytesRef(s));
  103. } else {
  104. br = null;
  105. if (VERBOSE) {
  106. System.out.println(" " + numDocs + ": <missing>");
  107. }
  108. docValues.add(null);
  109. }
  110. doc.add(new IntPoint("id", numDocs));
  111. doc.add(new StoredField("id", numDocs));
  112. writer.addDocument(doc);
  113. numDocs++;
  114. if (random.nextInt(40) == 17) {
  115. // force flush
  116. writer.getReader().close();
  117. }
  118. }
  119. Map<String,UninvertingReader.Type> mapping = new HashMap<>();
  120. mapping.put("stringdv", Type.SORTED);
  121. mapping.put("id", Type.INTEGER_POINT);
  122. final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
  123. writer.close();
  124. if (VERBOSE) {
  125. System.out.println(" reader=" + r);
  126. }
  127. final IndexSearcher s = newSearcher(r, false);
  128. final int ITERS = atLeast(100);
  129. for(int iter=0;iter<ITERS;iter++) {
  130. final boolean reverse = random.nextBoolean();
  131. final TopFieldDocs hits;
  132. final SortField sf;
  133. final boolean sortMissingLast;
  134. final boolean missingIsNull;
  135. sf = new SortField("stringdv", type, reverse);
  136. sortMissingLast = random().nextBoolean();
  137. missingIsNull = true;
  138. if (sortMissingLast) {
  139. sf.setMissingValue(SortField.STRING_LAST);
  140. }
  141. final Sort sort;
  142. if (random.nextBoolean()) {
  143. sort = new Sort(sf);
  144. } else {
  145. sort = new Sort(sf, SortField.FIELD_DOC);
  146. }
  147. final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
  148. final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
  149. int queryType = random.nextInt(2);
  150. if (queryType == 0) {
  151. hits = s.search(new ConstantScoreQuery(f),
  152. hitCount, sort, false);
  153. } else {
  154. hits = s.search(f, hitCount, sort, false);
  155. }
  156. if (VERBOSE) {
  157. System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " ; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
  158. }
  159. // Compute expected results:
  160. Collections.sort(f.matchValues, new Comparator<BytesRef>() {
  161. @Override
  162. public int compare(BytesRef a, BytesRef b) {
  163. if (a == null) {
  164. if (b == null) {
  165. return 0;
  166. }
  167. if (sortMissingLast) {
  168. return 1;
  169. } else {
  170. return -1;
  171. }
  172. } else if (b == null) {
  173. if (sortMissingLast) {
  174. return -1;
  175. } else {
  176. return 1;
  177. }
  178. } else {
  179. return a.compareTo(b);
  180. }
  181. }
  182. });
  183. if (reverse) {
  184. Collections.reverse(f.matchValues);
  185. }
  186. final List<BytesRef> expected = f.matchValues;
  187. if (VERBOSE) {
  188. System.out.println(" expected:");
  189. for(int idx=0;idx<expected.size();idx++) {
  190. BytesRef br = expected.get(idx);
  191. if (br == null && missingIsNull == false) {
  192. br = new BytesRef();
  193. }
  194. System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
  195. if (idx == hitCount-1) {
  196. break;
  197. }
  198. }
  199. }
  200. if (VERBOSE) {
  201. System.out.println(" actual:");
  202. for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
  203. final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
  204. BytesRef br = (BytesRef) fd.fields[0];
  205. System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
  206. }
  207. }
  208. for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
  209. final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
  210. BytesRef br = expected.get(hitIDX);
  211. if (br == null && missingIsNull == false) {
  212. br = new BytesRef();
  213. }
  214. // Normally, the old codecs (that don't support
  215. // docsWithField via doc values) will always return
  216. // an empty BytesRef for the missing case; however,
  217. // if all docs in a given segment were missing, in
  218. // that case it will return null! So we must map
  219. // null here, too:
  220. BytesRef br2 = (BytesRef) fd.fields[0];
  221. if (br2 == null && missingIsNull == false) {
  222. br2 = new BytesRef();
  223. }
  224. assertEquals(br, br2);
  225. }
  226. }
  227. r.close();
  228. dir.close();
  229. }
  230. private static class RandomQuery extends Query {
  231. private final long seed;
  232. private float density;
  233. private final List<BytesRef> docValues;
  234. public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>());
  235. // density should be 0.0 ... 1.0
  236. public RandomQuery(long seed, float density, List<BytesRef> docValues) {
  237. this.seed = seed;
  238. this.density = density;
  239. this.docValues = docValues;
  240. }
  241. @Override
  242. public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  243. return new ConstantScoreWeight(this, boost) {
  244. @Override
  245. public Scorer scorer(LeafReaderContext context) throws IOException {
  246. Random random = new Random(seed ^ context.docBase);
  247. final int maxDoc = context.reader().maxDoc();
  248. final NumericDocValues idSource = DocValues.getNumeric(context.reader(), "id");
  249. assertNotNull(idSource);
  250. final FixedBitSet bits = new FixedBitSet(maxDoc);
  251. for(int docID=0;docID<maxDoc;docID++) {
  252. if (random.nextFloat() <= density) {
  253. bits.set(docID);
  254. //System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
  255. assertEquals(docID, idSource.advance(docID));
  256. matchValues.add(docValues.get((int) idSource.longValue()));
  257. }
  258. }
  259. return new ConstantScoreScorer(this, score(), scoreMode, new BitSetIterator(bits, bits.approximateCardinality()));
  260. }
  261. @Override
  262. public boolean isCacheable(LeafReaderContext ctx) {
  263. return true;
  264. }
  265. };
  266. }
  267. @Override
  268. public void visit(QueryVisitor visitor) {
  269. }
  270. @Override
  271. public String toString(String field) {
  272. return "RandomFilter(density=" + density + ")";
  273. }
  274. @Override
  275. public boolean equals(Object other) {
  276. return sameClassAs(other) &&
  277. equalsTo(getClass().cast(other));
  278. }
  279. private boolean equalsTo(RandomQuery other) {
  280. return seed == other.seed &&
  281. docValues == other.docValues &&
  282. density == other.density;
  283. }
  284. @Override
  285. public int hashCode() {
  286. int h = classHash();
  287. h = 31 * h + Objects.hash(seed, density);
  288. h = 31 * h + System.identityHashCode(docValues);
  289. return h;
  290. }
  291. }
  292. }