/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSortRandom.java
Java | 331 lines | 263 code | 39 blank | 29 comment | 62 complexity | 9259403dedfe370a2fa5b2aba972c8cc MD5 | raw file
Possible License(s): LGPL-2.1, CPL-1.0, MPL-2.0-no-copyleft-exception, JSON, Apache-2.0, AGPL-1.0, GPL-2.0, GPL-3.0, MIT, BSD-3-Clause
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.solr.uninverting;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.Comparator;
- import java.util.HashMap;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Map;
- import java.util.Objects;
- import java.util.Random;
- import java.util.Set;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.IntPoint;
- import org.apache.lucene.document.StoredField;
- import org.apache.lucene.document.StringField;
- import org.apache.lucene.index.DocValues;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.LeafReaderContext;
- import org.apache.lucene.index.NumericDocValues;
- import org.apache.lucene.index.RandomIndexWriter;
- import org.apache.lucene.search.ConstantScoreQuery;
- import org.apache.lucene.search.ConstantScoreScorer;
- import org.apache.lucene.search.ConstantScoreWeight;
- import org.apache.lucene.search.FieldDoc;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.QueryVisitor;
- import org.apache.lucene.search.ScoreMode;
- import org.apache.lucene.search.Scorer;
- import org.apache.lucene.search.Sort;
- import org.apache.lucene.search.SortField;
- import org.apache.lucene.search.TopFieldDocs;
- import org.apache.lucene.search.Weight;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.util.BitSetIterator;
- import org.apache.lucene.util.BytesRef;
- import org.apache.lucene.util.FixedBitSet;
- import org.apache.lucene.util.TestUtil;
- import org.apache.solr.SolrTestCase;
- import org.apache.solr.uninverting.UninvertingReader.Type;
- /** random sorting tests with uninversion */
- public class TestFieldCacheSortRandom extends SolrTestCase {
- public void testRandomStringSort() throws Exception {
- testRandomStringSort(SortField.Type.STRING);
- }
- public void testRandomStringValSort() throws Exception {
- testRandomStringSort(SortField.Type.STRING_VAL);
- }
- private void testRandomStringSort(SortField.Type type) throws Exception {
- Random random = new Random(random().nextLong());
- final int NUM_DOCS = atLeast(100);
- final Directory dir = newDirectory();
- final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
- final boolean allowDups = random.nextBoolean();
- final Set<String> seen = new HashSet<>();
- final int maxLength = TestUtil.nextInt(random, 5, 100);
- if (VERBOSE) {
- System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
- }
- int numDocs = 0;
- final List<BytesRef> docValues = new ArrayList<>();
- // TODO: deletions
- while (numDocs < NUM_DOCS) {
- final Document doc = new Document();
- // 10% of the time, the document is missing the value:
- final BytesRef br;
- if (random().nextInt(10) != 7) {
- final String s;
- if (random.nextBoolean()) {
- s = TestUtil.randomSimpleString(random, maxLength);
- } else {
- s = TestUtil.randomUnicodeString(random, maxLength);
- }
- if (!allowDups) {
- if (seen.contains(s)) {
- continue;
- }
- seen.add(s);
- }
- if (VERBOSE) {
- System.out.println(" " + numDocs + ": s=" + s);
- }
- doc.add(new StringField("stringdv", s, Field.Store.NO));
- docValues.add(new BytesRef(s));
- } else {
- br = null;
- if (VERBOSE) {
- System.out.println(" " + numDocs + ": <missing>");
- }
- docValues.add(null);
- }
- doc.add(new IntPoint("id", numDocs));
- doc.add(new StoredField("id", numDocs));
- writer.addDocument(doc);
- numDocs++;
- if (random.nextInt(40) == 17) {
- // force flush
- writer.getReader().close();
- }
- }
- Map<String,UninvertingReader.Type> mapping = new HashMap<>();
- mapping.put("stringdv", Type.SORTED);
- mapping.put("id", Type.INTEGER_POINT);
- final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
- writer.close();
- if (VERBOSE) {
- System.out.println(" reader=" + r);
- }
-
- final IndexSearcher s = newSearcher(r, false);
- final int ITERS = atLeast(100);
- for(int iter=0;iter<ITERS;iter++) {
- final boolean reverse = random.nextBoolean();
- final TopFieldDocs hits;
- final SortField sf;
- final boolean sortMissingLast;
- final boolean missingIsNull;
- sf = new SortField("stringdv", type, reverse);
- sortMissingLast = random().nextBoolean();
- missingIsNull = true;
- if (sortMissingLast) {
- sf.setMissingValue(SortField.STRING_LAST);
- }
-
- final Sort sort;
- if (random.nextBoolean()) {
- sort = new Sort(sf);
- } else {
- sort = new Sort(sf, SortField.FIELD_DOC);
- }
- final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
- final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
- int queryType = random.nextInt(2);
- if (queryType == 0) {
- hits = s.search(new ConstantScoreQuery(f),
- hitCount, sort, false);
- } else {
- hits = s.search(f, hitCount, sort, false);
- }
- if (VERBOSE) {
- System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " ; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
- }
- // Compute expected results:
- Collections.sort(f.matchValues, new Comparator<BytesRef>() {
- @Override
- public int compare(BytesRef a, BytesRef b) {
- if (a == null) {
- if (b == null) {
- return 0;
- }
- if (sortMissingLast) {
- return 1;
- } else {
- return -1;
- }
- } else if (b == null) {
- if (sortMissingLast) {
- return -1;
- } else {
- return 1;
- }
- } else {
- return a.compareTo(b);
- }
- }
- });
- if (reverse) {
- Collections.reverse(f.matchValues);
- }
- final List<BytesRef> expected = f.matchValues;
- if (VERBOSE) {
- System.out.println(" expected:");
- for(int idx=0;idx<expected.size();idx++) {
- BytesRef br = expected.get(idx);
- if (br == null && missingIsNull == false) {
- br = new BytesRef();
- }
- System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
- if (idx == hitCount-1) {
- break;
- }
- }
- }
-
- if (VERBOSE) {
- System.out.println(" actual:");
- for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
- final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
- BytesRef br = (BytesRef) fd.fields[0];
- System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
- }
- }
- for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
- final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
- BytesRef br = expected.get(hitIDX);
- if (br == null && missingIsNull == false) {
- br = new BytesRef();
- }
- // Normally, the old codecs (that don't support
- // docsWithField via doc values) will always return
- // an empty BytesRef for the missing case; however,
- // if all docs in a given segment were missing, in
- // that case it will return null! So we must map
- // null here, too:
- BytesRef br2 = (BytesRef) fd.fields[0];
- if (br2 == null && missingIsNull == false) {
- br2 = new BytesRef();
- }
-
- assertEquals(br, br2);
- }
- }
- r.close();
- dir.close();
- }
-
- private static class RandomQuery extends Query {
- private final long seed;
- private float density;
- private final List<BytesRef> docValues;
- public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>());
- // density should be 0.0 ... 1.0
- public RandomQuery(long seed, float density, List<BytesRef> docValues) {
- this.seed = seed;
- this.density = density;
- this.docValues = docValues;
- }
- @Override
- public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
- return new ConstantScoreWeight(this, boost) {
- @Override
- public Scorer scorer(LeafReaderContext context) throws IOException {
- Random random = new Random(seed ^ context.docBase);
- final int maxDoc = context.reader().maxDoc();
- final NumericDocValues idSource = DocValues.getNumeric(context.reader(), "id");
- assertNotNull(idSource);
- final FixedBitSet bits = new FixedBitSet(maxDoc);
- for(int docID=0;docID<maxDoc;docID++) {
- if (random.nextFloat() <= density) {
- bits.set(docID);
- //System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
- assertEquals(docID, idSource.advance(docID));
- matchValues.add(docValues.get((int) idSource.longValue()));
- }
- }
- return new ConstantScoreScorer(this, score(), scoreMode, new BitSetIterator(bits, bits.approximateCardinality()));
- }
- @Override
- public boolean isCacheable(LeafReaderContext ctx) {
- return true;
- }
- };
- }
- @Override
- public void visit(QueryVisitor visitor) {
- }
- @Override
- public String toString(String field) {
- return "RandomFilter(density=" + density + ")";
- }
- @Override
- public boolean equals(Object other) {
- return sameClassAs(other) &&
- equalsTo(getClass().cast(other));
- }
- private boolean equalsTo(RandomQuery other) {
- return seed == other.seed &&
- docValues == other.docValues &&
- density == other.density;
- }
- @Override
- public int hashCode() {
- int h = classHash();
- h = 31 * h + Objects.hash(seed, density);
- h = 31 * h + System.identityHashCode(docValues);
- return h;
- }
- }
- }