/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom.java

http://github.com/apache/lucene-solr · Java · 142 lines · 100 code · 20 blank · 22 comment · 4 complexity · debc0d3b1e05b7c19e5fe0aa7c30c0b5 MD5 · raw file

  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package org.apache.lucene.search;
  18. import java.text.DecimalFormat;
  19. import java.text.DecimalFormatSymbols;
  20. import java.text.NumberFormat;
  21. import java.util.Locale;
  22. import org.apache.lucene.analysis.MockAnalyzer;
  23. import org.apache.lucene.document.Document;
  24. import org.apache.lucene.document.Field;
  25. import org.apache.lucene.document.FieldType;
  26. import org.apache.lucene.document.TextField;
  27. import org.apache.lucene.index.IndexReader;
  28. import org.apache.lucene.index.RandomIndexWriter;
  29. import org.apache.lucene.index.Term;
  30. import org.apache.lucene.store.Directory;
  31. import org.apache.lucene.util.LuceneTestCase;
  32. import org.apache.lucene.util.TestUtil;
  33. /**
  34. * Create an index with terms from 000-999.
  35. * Generates random regexps according to simple patterns,
  36. * and validates the correct number of hits are returned.
  37. */
  38. public class TestRegexpRandom extends LuceneTestCase {
  39. private IndexSearcher searcher;
  40. private IndexReader reader;
  41. private Directory dir;
  42. @Override
  43. public void setUp() throws Exception {
  44. super.setUp();
  45. dir = newDirectory();
  46. RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
  47. newIndexWriterConfig(new MockAnalyzer(random()))
  48. .setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000)));
  49. Document doc = new Document();
  50. FieldType customType = new FieldType(TextField.TYPE_STORED);
  51. customType.setOmitNorms(true);
  52. Field field = newField("field", "", customType);
  53. doc.add(field);
  54. NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
  55. for (int i = 0; i < 1000; i++) {
  56. field.setStringValue(df.format(i));
  57. writer.addDocument(doc);
  58. }
  59. reader = writer.getReader();
  60. writer.close();
  61. searcher = newSearcher(reader);
  62. }
  63. private char N() {
  64. return (char) (0x30 + random().nextInt(10));
  65. }
  66. private String fillPattern(String wildcardPattern) {
  67. StringBuilder sb = new StringBuilder();
  68. for (int i = 0; i < wildcardPattern.length(); i++) {
  69. switch(wildcardPattern.charAt(i)) {
  70. case 'N':
  71. sb.append(N());
  72. break;
  73. default:
  74. sb.append(wildcardPattern.charAt(i));
  75. }
  76. }
  77. return sb.toString();
  78. }
  79. private void assertPatternHits(String pattern, int numHits) throws Exception {
  80. Query wq = new RegexpQuery(new Term("field", fillPattern(pattern)));
  81. TopDocs docs = searcher.search(wq, 25);
  82. assertEquals("Incorrect hits for pattern: " + pattern, numHits, docs.totalHits.value);
  83. }
  84. @Override
  85. public void tearDown() throws Exception {
  86. reader.close();
  87. dir.close();
  88. super.tearDown();
  89. }
  90. public void testRegexps() throws Exception {
  91. int num = atLeast(1);
  92. for (int i = 0; i < num; i++) {
  93. assertPatternHits("NNN", 1);
  94. assertPatternHits(".NN", 10);
  95. assertPatternHits("N.N", 10);
  96. assertPatternHits("NN.", 10);
  97. }
  98. for (int i = 0; i < num; i++) {
  99. assertPatternHits(".{1,2}N", 100);
  100. assertPatternHits("N.{1,2}", 100);
  101. assertPatternHits(".{1,3}", 1000);
  102. assertPatternHits("NN[3-7]", 5);
  103. assertPatternHits("N[2-6][3-7]", 25);
  104. assertPatternHits("[1-5][2-6][3-7]", 125);
  105. assertPatternHits("[0-4][3-7][4-8]", 125);
  106. assertPatternHits("[2-6][0-4]N", 25);
  107. assertPatternHits("[2-6]NN", 5);
  108. assertPatternHits("NN.*", 10);
  109. assertPatternHits("N.*", 100);
  110. assertPatternHits(".*", 1000);
  111. assertPatternHits(".*NN", 10);
  112. assertPatternHits(".*N", 100);
  113. assertPatternHits("N.*N", 10);
  114. // combo of ? and * operators
  115. assertPatternHits(".N.*", 100);
  116. assertPatternHits("N..*", 100);
  117. assertPatternHits(".*N.", 100);
  118. assertPatternHits(".*..", 1000);
  119. assertPatternHits(".*.N", 100);
  120. }
  121. }
  122. }