/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom.java
http://github.com/apache/lucene-solr · Java · 142 lines · 100 code · 20 blank · 22 comment · 4 complexity · debc0d3b1e05b7c19e5fe0aa7c30c0b5 MD5 · raw file
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.lucene.search;
- import java.text.DecimalFormat;
- import java.text.DecimalFormatSymbols;
- import java.text.NumberFormat;
- import java.util.Locale;
- import org.apache.lucene.analysis.MockAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.FieldType;
- import org.apache.lucene.document.TextField;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.RandomIndexWriter;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.util.LuceneTestCase;
- import org.apache.lucene.util.TestUtil;
- /**
- * Create an index with terms from 000-999.
- * Generates random regexps according to simple patterns,
- * and validates the correct number of hits are returned.
- */
- public class TestRegexpRandom extends LuceneTestCase {
- private IndexSearcher searcher;
- private IndexReader reader;
- private Directory dir;
-
- @Override
- public void setUp() throws Exception {
- super.setUp();
- dir = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
- newIndexWriterConfig(new MockAnalyzer(random()))
- .setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000)));
-
- Document doc = new Document();
- FieldType customType = new FieldType(TextField.TYPE_STORED);
- customType.setOmitNorms(true);
- Field field = newField("field", "", customType);
- doc.add(field);
-
- NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
- for (int i = 0; i < 1000; i++) {
- field.setStringValue(df.format(i));
- writer.addDocument(doc);
- }
-
- reader = writer.getReader();
- writer.close();
- searcher = newSearcher(reader);
- }
-
- private char N() {
- return (char) (0x30 + random().nextInt(10));
- }
-
- private String fillPattern(String wildcardPattern) {
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < wildcardPattern.length(); i++) {
- switch(wildcardPattern.charAt(i)) {
- case 'N':
- sb.append(N());
- break;
- default:
- sb.append(wildcardPattern.charAt(i));
- }
- }
- return sb.toString();
- }
-
- private void assertPatternHits(String pattern, int numHits) throws Exception {
- Query wq = new RegexpQuery(new Term("field", fillPattern(pattern)));
- TopDocs docs = searcher.search(wq, 25);
- assertEquals("Incorrect hits for pattern: " + pattern, numHits, docs.totalHits.value);
- }
- @Override
- public void tearDown() throws Exception {
- reader.close();
- dir.close();
- super.tearDown();
- }
-
- public void testRegexps() throws Exception {
- int num = atLeast(1);
- for (int i = 0; i < num; i++) {
- assertPatternHits("NNN", 1);
- assertPatternHits(".NN", 10);
- assertPatternHits("N.N", 10);
- assertPatternHits("NN.", 10);
- }
-
- for (int i = 0; i < num; i++) {
- assertPatternHits(".{1,2}N", 100);
- assertPatternHits("N.{1,2}", 100);
- assertPatternHits(".{1,3}", 1000);
-
- assertPatternHits("NN[3-7]", 5);
- assertPatternHits("N[2-6][3-7]", 25);
- assertPatternHits("[1-5][2-6][3-7]", 125);
- assertPatternHits("[0-4][3-7][4-8]", 125);
- assertPatternHits("[2-6][0-4]N", 25);
- assertPatternHits("[2-6]NN", 5);
-
- assertPatternHits("NN.*", 10);
- assertPatternHits("N.*", 100);
- assertPatternHits(".*", 1000);
-
- assertPatternHits(".*NN", 10);
- assertPatternHits(".*N", 100);
-
- assertPatternHits("N.*N", 10);
-
- // combo of ? and * operators
- assertPatternHits(".N.*", 100);
- assertPatternHits("N..*", 100);
-
- assertPatternHits(".*N.", 100);
- assertPatternHits(".*..", 1000);
- assertPatternHits(".*.N", 100);
- }
- }
- }