PageRenderTime 133ms CodeModel.GetById 23ms app.highlight 85ms RepoModel.GetById 1ms app.codeStats 4ms

/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java

http://github.com/apache/hive
Java | 5602 lines | 4473 code | 771 blank | 358 comment | 886 complexity | 0df930dff1787e4b360a9aa7e904337d MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1/*
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.exec.vector.expressions;
 20
 21import static org.junit.Assert.assertEquals;
 22
 23import java.nio.charset.StandardCharsets;
 24import java.util.Arrays;
 25import java.util.Random;
 26import java.util.StringTokenizer;
 27
 28import org.apache.hadoop.hive.conf.HiveConf;
 29import org.junit.Assert;
 30
 31import org.apache.commons.codec.binary.Hex;
 32import org.apache.hadoop.hive.common.type.HiveChar;
 33import org.apache.hadoop.hive.common.type.HiveVarchar;
 34import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 35import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 36import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 37import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CharScalarEqualStringGroupColumn;
 38import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarEqualStringGroupColumn;
 39import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarGreaterStringGroupColumn;
 40import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarLessEqualStringGroupColumn;
 41import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualCharScalar;
 42import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualStringScalar;
 43import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualVarCharScalar;
 44import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualCharScalar;
 45import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualStringScalar;
 46import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualVarCharScalar;
 47import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessCharScalar;
 48import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessStringGroupColumn;
 49import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessStringScalar;
 50import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessVarCharScalar;
 51import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarEqualStringGroupColumn;
 52import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarGreaterStringGroupColumn;
 53import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarLessEqualStringGroupColumn;
 54import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarEqualStringGroupColumn;
 55import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarGreaterStringGroupColumn;
 56import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarLessEqualStringGroupColumn;
 57import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualCharScalar;
 58import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualStringScalar;
 59import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualVarCharScalar;
 60import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColLessStringGroupColumn;
 61import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringScalarEqualStringGroupColumn;
 62import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.VarCharScalarEqualStringGroupColumn;
 63import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil;
 64import org.apache.hadoop.hive.ql.metadata.HiveException;
 65import org.apache.hadoop.hive.ql.udf.UDFLike;
 66import org.apache.hadoop.io.BooleanWritable;
 67import org.apache.hadoop.io.Text;
 68import org.junit.Test;
 69import org.slf4j.Logger;
 70import org.slf4j.LoggerFactory;
 71
 72/**
 73 * Test vectorized expression and filter evaluation for strings.
 74 */
 75public class TestVectorStringExpressions {
 76
 77  private static final Logger LOG = LoggerFactory
 78      .getLogger(TestVectorStringExpressions.class);
 79
 80  private static byte[] red;
 81  private static byte[] redred;
 82  private static byte[] red2; // second copy of red, different object
 83  private static byte[] green;
 84  private static byte[] greenred;
 85  private static byte[] redgreen;
 86  private static byte[] greengreen;
 87  private static byte[] blue;
 88  private static byte[] emptyString;
 89  private static byte[] mixedUp;
 90  private static byte[] mixedUpLower;
 91  private static byte[] mixedUpUpper;
 92  private static byte[] multiByte;
 93  private static byte[] mixPercentPattern;
 94  private static byte[] blanksLeft;
 95  private static byte[] blanksRight;
 96  private static byte[] blanksBoth;
 97  private static byte[] blankString;
 98  private static byte[] blankRanges;
 99  private static byte[] ascii_sentence;
100
101  static {
102    blue = "blue".getBytes(StandardCharsets.UTF_8);
103    red = "red".getBytes(StandardCharsets.UTF_8);
104    redred = "redred".getBytes(StandardCharsets.UTF_8);
105    green = "green".getBytes(StandardCharsets.UTF_8);
106    greenred = "greenred".getBytes(StandardCharsets.UTF_8);
107    redgreen = "redgreen".getBytes(StandardCharsets.UTF_8);
108    greengreen = "greengreen".getBytes(StandardCharsets.UTF_8);
109    emptyString = "".getBytes(StandardCharsets.UTF_8);
110    mixedUp = "mixedUp".getBytes(StandardCharsets.UTF_8);
111    mixedUpLower = "mixedup".getBytes(StandardCharsets.UTF_8);
112    mixedUpUpper = "MIXEDUP".getBytes(StandardCharsets.UTF_8);
113
114    // for use as wildcard pattern to test LIKE
115    mixPercentPattern = "mix%".getBytes(StandardCharsets.UTF_8); 
116
117    multiByte = new byte[10];
118    addMultiByteChars(multiByte);
119    blanksLeft = "  foo".getBytes(StandardCharsets.UTF_8);
120    blanksRight = "foo  ".getBytes(StandardCharsets.UTF_8);
121    blanksBoth = "  foo  ".getBytes(StandardCharsets.UTF_8);
122    blankString = "  ".getBytes(StandardCharsets.UTF_8);
123    blankRanges =
124        "   more  than a    bargain    ".getBytes(StandardCharsets.UTF_8);
125    // 012345678901234567890123456789
126    ascii_sentence =
127        "The fox trotted over the fence.".getBytes(StandardCharsets.UTF_8);
128    // 0123456789012345678901234567890
129    red2 = new byte[red.length];
130    System.arraycopy(red, 0, red2, 0, red.length);
131  }
132
133  // add some multi-byte characters to test length routine later.
134  // total characters = 4; byte length = 10
135  static void addMultiByteChars(byte[] b) {
136    int i = 0;
137    b[i++] = (byte) 0x41; // letter "A" (1 byte)
138    b[i++] = (byte) 0xC3; // Latin capital A with grave (2 bytes)
139    b[i++] = (byte) 0x80;
140    b[i++] = (byte) 0xE2; // Euro sign (3 bytes)
141    b[i++] = (byte) 0x82;
142    b[i++] = (byte) 0xAC;
143    b[i++] = (byte) 0xF0; // Asian character U+24B62 (4 bytes)
144    b[i++] = (byte) 0xA4;
145    b[i++] = (byte) 0xAD;
146    b[i++] = (byte) 0xA2;
147  }
148
149  //-------------------------------------------------------------
150  
151  // total characters = 2; byte length = 3
152  static void addMultiByteCharLeftPadded1_1(byte[] b) {
153    int i = 0;
154    b[i++] = (byte) 0x20; // blank " " (1 byte)
155    b[i++] = (byte) 0xD0; // Cyrillic Capital DJE U+402 (2 bytes)
156    b[i++] = (byte) 0x82;
157  }
158
159  // total characters = 3; byte length = 9
160  static void addMultiByteCharLeftPadded1_2(byte[] b) {
161    int i = 0;
162    b[i++] = (byte) 0x20; // blank " " (1 byte)
163    b[i++] = (byte) 0xF0; // Smiling Face with Open Mouth and Smiling Eyes U+1F604 (4 bytes)
164    b[i++] = (byte) 0x9F;
165    b[i++] = (byte) 0x98;
166    b[i++] = (byte) 0x84;
167    b[i++] = (byte) 0xF0; // Grimacing Face U+1F62C (4 bytes)
168    b[i++] = (byte) 0x9F;
169    b[i++] = (byte) 0x98;
170    b[i++] = (byte) 0xAC;
171  }
172
173  // total characters = 4; byte length = 6
174  static void addMultiByteCharLeftPadded3_1(byte[] b) {
175    int i = 0;
176    b[i++] = (byte) 0x20; // blank " " (1 byte)
177    b[i++] = (byte) 0x20; // blank " " (1 byte)
178    b[i++] = (byte) 0x20; // blank " " (1 byte)
179    b[i++] = (byte) 0xE4; // Asian character U+4824 (3 bytes)
180    b[i++] = (byte) 0xA0;
181    b[i++] = (byte) 0xA4;
182  }
183
184  //-------------------------------------------------------------
185  
186  // total characters = 2; byte length = 4
187  static void addMultiByteCharRightPadded1_1(byte[] b) {
188    int i = 0;
189    b[i++] = (byte) 0xE0; // Tamil Om U+0BD0 (3 bytes)
190    b[i++] = (byte) 0xAF;
191    b[i++] = (byte) 0x90;
192    b[i++] = (byte) 0x20; // blank " " (1 byte)
193  }
194
195  // total characters = 3; byte length = 5
196  static void addMultiByteCharRightPadded1_2(byte[] b) {
197    int i = 0;
198    b[i++] = (byte) 0xEA; // Va Syllable MEE U+A521 (3 bytes)
199    b[i++] = (byte) 0x94;
200    b[i++] = (byte) 0xA1;
201    b[i++] = (byte) 0x5A; // Latin Capital Letter Z U+005A (1 bytes)
202    b[i++] = (byte) 0x20; // blank " " (1 byte)
203  }
204
205  // total characters = 4; byte length = 9
206  static void addMultiByteCharRightPadded1_3(byte[] b) {
207    int i = 0;
208    b[i++] = (byte) 0xCC; // COMBINING ACUTE ACENT U+0301 (2 bytes)
209    b[i++] = (byte) 0x81;
210    b[i++] = (byte) 0xE0; // DEVENAGARI LETTER KA U+0915 (3 bytes)
211    b[i++] = (byte) 0xA4;
212    b[i++] = (byte) 0x95;
213    b[i++] = (byte) 0xE0; // DEVENAGARI SIGN VIRAMA U+094D (3 bytes)
214    b[i++] = (byte) 0xA5;
215    b[i++] = (byte) 0x8D;
216    b[i++] = (byte) 0x20; // blank " " (1 byte)
217  }
218
219  // total characters = 10; byte length = 26
220  static int addMultiByteCharSentenceOne(byte[] b, int start) {
221    int i = start;
222    b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER THA U+1992 (3 bytes)
223    b[i++] = (byte) 0xA6;
224    b[i++] = (byte) 0x92;
225    b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW XA U+1986 (3 bytes)
226    b[i++] = (byte) 0xA6;
227    b[i++] = (byte) 0x86;
228    b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER HIGH MA U+1996 (3 bytes)
229    b[i++] = (byte) 0xA6;
230    b[i++] = (byte) 0x96;
231    b[i++] = (byte) 0x20; // blank " " (1 byte)
232    b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW QA U+1981 (3 bytes)
233    b[i++] = (byte) 0xA6;
234    b[i++] = (byte) 0x81;
235    b[i++] = (byte) 0x20; // blank " " (1 byte)
236    b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW BA U+19A5 (3 bytes)
237    b[i++] = (byte) 0xA6;
238    b[i++] = (byte) 0xA5;
239    b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER HIGH LA U+199C (3 bytes)
240    b[i++] = (byte) 0xA6;
241    b[i++] = (byte) 0x9C;
242    b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW KVA U+19A8 (3 bytes)
243    b[i++] = (byte) 0xA6;
244    b[i++] = (byte) 0xA8;
245    b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW FA U+199D (3 bytes)
246    b[i++] = (byte) 0xA6;
247    b[i++] = (byte) 0x9D;
248    return i;
249  }
250
251  // total characters = 13; byte length = 24
252  static int addMultiByteCharSentenceTwo(byte[] b, int start) {
253    int i = start;
254    b[i++] = (byte) 0xC9; // LATIN SMALL LETTER TURNED A U+0250 (2 bytes)
255    b[i++] = (byte) 0x90;
256    b[i++] = (byte) 0xC9; // LATIN SMALL LETTER GAMMA U+0263 (2 bytes)
257    b[i++] = (byte) 0xA3;
258    b[i++] = (byte) 0xC9; // LATIN SMALL LETTER TURNED M U+026F (2 bytes)
259    b[i++] = (byte) 0xAF;
260    b[i++] = (byte) 0xCA; // LATIN SMALL LETTER S WITH HOOK U+0282 (2 bytes)
261    b[i++] = (byte) 0x82;
262    b[i++] = (byte) 0x20; // blank " " (1 byte)
263    b[i++] = (byte) 0xCA; // LATIN LETTER SMALL CAPITAL L U+029F (2 bytes)
264    b[i++] = (byte) 0x9F;
265    b[i++] = (byte) 0xCB; // MODIFIER LETTER TRIANGULAR COLON U+02D0 (2 bytes)
266    b[i++] = (byte) 0x90;
267    b[i++] = (byte) 0x20; // blank " " (1 byte)
268    b[i++] = (byte) 0xCB; // RING ABOVE U+02DA (2 bytes)
269    b[i++] = (byte) 0x9A;
270    b[i++] = (byte) 0xCB; // MODIFIER LETTER SMALL L U+02E1 (2 bytes)
271    b[i++] = (byte) 0xA1;
272    b[i++] = (byte) 0xCB; // MODIFIER LETTER SMALL X U+02E3 (2 bytes)
273    b[i++] = (byte) 0xA3;
274    b[i++] = (byte) 0xCB; // MODIFIER LETTER UP ARROWHEAD U+02C4 (2 bytes)
275    b[i++] = (byte) 0x84;
276    b[i++] = (byte) 0x2E; // FULL STOP "." (1 byte)
277    return i;
278  }
279
280  // total characters = 17; byte length = 30
281  static int addMultiByteCharSentenceBlankRanges(byte[] b, int start) {
282    int i = start;
283    b[i++] = (byte) 0xF0; // INSCRIPTIONAL YODH U+10B49 (4 bytes)
284    b[i++] = (byte) 0x90;
285    b[i++] = (byte) 0xAD;
286    b[i++] = (byte) 0x89;
287    b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW FA U+199D (3 bytes)
288    b[i++] = (byte) 0xA6;
289    b[i++] = (byte) 0x9D;
290    b[i++] = (byte) 0x20; // blank " " (1 byte)
291    b[i++] = (byte) 0x20; // blank " " (1 byte)
292    b[i++] = (byte) 0x20; // blank " " (1 byte)
293    b[i++] = (byte) 0x2D; // hyphen-minus "-" U-002D (1 byte)
294    b[i++] = (byte) 0x20; // blank " " (1 byte)
295    b[i++] = (byte) 0x60; // grave accent "-" U-0060 (1 byte)
296    b[i++] = (byte) 0xE2; // BLACK SUN WITH RAYS U+2600 (3 bytes)
297    b[i++] = (byte) 0x98;
298    b[i++] = (byte) 0x80;
299    b[i++] = (byte) 0xE2; // BALLOT BOX WITH X U+2612 (3 bytes)
300    b[i++] = (byte) 0x98;
301    b[i++] = (byte) 0x92;
302    b[i++] = (byte) 0x20; // blank " " (1 byte)
303    b[i++] = (byte) 0x20; // blank " " (1 byte)
304    b[i++] = (byte) 0x20; // blank " " (1 byte)
305    b[i++] = (byte) 0x20; // blank " " (1 byte)
306    b[i++] = (byte) 0x20; // blank " " (1 byte)
307    b[i++] = (byte) 0xE2; // WHITE START U+2606 (3 bytes)
308    b[i++] = (byte) 0x98;
309    b[i++] = (byte) 0x86;
310    b[i++] = (byte) 0xE2; // WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE U+26FF (3 bytes)
311    b[i++] = (byte) 0x9B;
312    b[i++] = (byte) 0xBF;
313    return i;
314  }
315
316
317  static int addPads(byte[] b, int start, int count) {
318    int i = start;
319    int end = start + count;
320    for ( ; i < end; i++) {
321      b[i] = (byte) 0x20; // blank " " (1 byte)
322    }
323    return i;
324  }
325
326  private HiveConf hiveConf = new HiveConf();
327
328  private boolean vectorEqual(BytesColumnVector vector, int i, byte[] bytes, int offset, int length) {
329    byte[] bytesSlice = new byte[length];
330    System.arraycopy(bytes, offset, bytesSlice, 0, length);
331    int vectorLength = vector.length[i];
332    byte[] vectorSlice = new byte[vectorLength];
333    System.arraycopy(vector.vector[i], vector.start[i], vectorSlice, 0, vectorLength);
334    boolean equals = Arrays.equals(bytesSlice, vectorSlice);
335    if (!equals) {
336      System.out.println("vectorEqual offset " + offset + " length " + length + " vectorSlice.length " + vectorSlice.length);
337      System.out.println("vectorEqual bytesSlice " + Hex.encodeHexString(bytesSlice));
338      System.out.println("vectorEqual vectorSlice " + Hex.encodeHexString(vectorSlice));
339    }
340    return equals;
341  }
342
343  private int vectorCharacterCount(BytesColumnVector vector, int i) {
344    return StringExpr.characterCount(vector.vector[i], vector.start[i], vector.length[i]);
345  }
346
347  @Test
348  // Test basic assign to vector.
349  public void testAssignBytesColumnVector()  {
350      BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
351      outV.initBuffer(35); // initialize with estimated element size 35
352
353      int i = 0;
354
355      int expectedResultLen;
356
357      Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
358      StringExpr.assign(outV, i, blue, 0, blue.length);
359      expectedResultLen = blue.length;
360      Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
361      i++;
362      Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
363      StringExpr.assign(outV, i, redgreen, 0, redgreen.length);
364      expectedResultLen =  redgreen.length;
365      Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
366      i++;
367      Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
368      StringExpr.assign(outV, i, ascii_sentence, 0, ascii_sentence.length);
369      expectedResultLen =  ascii_sentence.length;
370      Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
371      i++;
372      Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
373      StringExpr.assign(outV, i, blanksLeft, 0, blanksLeft.length);
374      expectedResultLen =  blanksLeft.length;
375      Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
376      i++;
377
378      // Multi-byte characters with blank ranges.
379      byte[] sentenceBlankRanges = new byte[100];
380      int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
381
382      Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
383      StringExpr.assign(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen);
384      expectedResultLen = sentenceBlankRangesLen;
385      Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
386      i++;
387      Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
388      StringExpr.assign(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3);
389      expectedResultLen = sentenceBlankRangesLen - 3;
390      Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
391      i++;
392
393      // Some non-zero offsets.
394      Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 4, sentenceBlankRangesLen - 4) == 16);
395      StringExpr.assign(outV, i, sentenceBlankRanges, 4, sentenceBlankRangesLen - 4);
396      expectedResultLen = sentenceBlankRangesLen - 4;
397      Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 4, expectedResultLen));
398      Assert.assertTrue(vectorCharacterCount(outV, i) == 16);
399      i++;
400      Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
401      StringExpr.assign(outV, i, sentenceBlankRanges, 7, 17);
402      expectedResultLen = 17;
403      Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen));
404      Assert.assertTrue(vectorCharacterCount(outV, i) == 13);
405      i++;
406  }
407
408  @Test
409  // Test basic right trim of bytes slice.
410  public void testRightTrimBytesSlice()  {
411      int resultLen;
412      // Nothing to trim (ASCII).
413      Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
414      resultLen = StringExpr.rightTrim(blue, 0, blue.length);
415      Assert.assertTrue(resultLen == blue.length);
416      Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
417
418      Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
419      resultLen = StringExpr.rightTrim(redgreen, 0, redgreen.length);
420      Assert.assertTrue(resultLen == redgreen.length);
421
422      Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
423      resultLen = StringExpr.rightTrim(ascii_sentence, 0, ascii_sentence.length);
424      Assert.assertTrue(resultLen == ascii_sentence.length);
425
426      Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
427      resultLen = StringExpr.rightTrim(blanksLeft, 0, blanksLeft.length);
428      Assert.assertTrue(resultLen == blanksLeft.length);
429
430      // Simple trims.
431      Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
432      resultLen = StringExpr.rightTrim(blanksRight, 0, blanksRight.length);
433      Assert.assertTrue(resultLen == 3);
434      Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 3);
435
436      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
437      resultLen = StringExpr.rightTrim(blanksBoth, 0, blanksBoth.length);
438      Assert.assertTrue(resultLen == 5);
439      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 5);
440     
441      Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
442      resultLen = StringExpr.rightTrim(blankString, 0, blankString.length);
443      Assert.assertTrue(resultLen == 0);
444      Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 0);
445
446      Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
447      resultLen = StringExpr.rightTrim(blankRanges, 0, blankRanges.length);
448      Assert.assertTrue(resultLen == blankRanges.length - 4);
449      Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 26);
450
451      // Offset trims.
452      Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
453      resultLen = StringExpr.rightTrim(blanksRight, 1, blanksRight.length - 1);
454      Assert.assertTrue(resultLen == 2);
455      Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 2);
456
457      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
458      resultLen = StringExpr.rightTrim(blanksBoth, 4, blanksBoth.length - 4);
459      Assert.assertTrue(resultLen == 1);
460      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 1);
461
462      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
463      resultLen = StringExpr.rightTrim(blanksBoth, 5, blanksBoth.length -5 );
464      Assert.assertTrue(resultLen == 0);
465      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 0);
466
467      Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
468      resultLen = StringExpr.rightTrim(blankString, 1, blankString.length - 1);
469      Assert.assertTrue(resultLen == 0);
470      Assert.assertTrue(StringExpr.characterCount(blankString, 1, resultLen) == 0);
471
472      Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
473      resultLen = StringExpr.rightTrim(blankRanges, 4, blankRanges.length - 4);
474      Assert.assertTrue(resultLen == blankRanges.length - 4 -4);
475      Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22);
476
477      Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
478      resultLen = StringExpr.rightTrim(blankRanges, 6, blankRanges.length- 6);
479      Assert.assertTrue(resultLen == blankRanges.length - 6 - 4);
480      Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 20);
481
482      Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
483      resultLen = StringExpr.rightTrim(blankRanges, 7, blankRanges.length - 7);
484      Assert.assertTrue(resultLen == blankRanges.length - 7 - 4);
485      Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 19);
486
487      Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
488      resultLen = StringExpr.rightTrim(blankRanges, 7, 8 - 7);
489      Assert.assertTrue(resultLen == 0);
490      Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 0);
491
492      // Multi-byte trims.
493      byte[] multiByte = new byte[100];
494
495      addMultiByteCharRightPadded1_1(multiByte);
496      Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
497      resultLen = StringExpr.rightTrim(multiByte, 0, 4);
498      Assert.assertTrue(resultLen == 3);
499      Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1);
500
501      addMultiByteCharRightPadded1_2(multiByte);
502      Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
503      resultLen = StringExpr.rightTrim(multiByte, 0, 5);
504      Assert.assertTrue(resultLen == 4);
505      Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2);
506
507      addMultiByteCharRightPadded1_3(multiByte);
508      Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
509      resultLen = StringExpr.rightTrim(multiByte, 0, 9);
510      Assert.assertTrue(resultLen == 8);
511      Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 3);
512
513      addMultiByteCharRightPadded1_1(multiByte);
514      Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
515      resultLen = StringExpr.rightTrim(multiByte, 3, 1);
516      Assert.assertTrue(resultLen == 0);
517      Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 0);
518
519      addMultiByteCharRightPadded1_2(multiByte);
520      Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
521      resultLen = StringExpr.rightTrim(multiByte, 3, 2);
522      Assert.assertTrue(resultLen == 1);
523      Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1);
524
525      byte[] sentenceOne = new byte[100];
526      int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
527
528      Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
529      resultLen = StringExpr.rightTrim(sentenceOne, 0, sentenceOneLen);
530      Assert.assertTrue(resultLen == sentenceOneLen);
531
532      Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
533      resultLen = StringExpr.rightTrim(sentenceOne, 0, sentenceOneLen - 3);
534      Assert.assertTrue(resultLen == sentenceOneLen - 3);
535
536      byte[] sentenceTwo = new byte[100];
537      int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
538
539      Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
540      resultLen = StringExpr.rightTrim(sentenceTwo, 0, sentenceTwoLen);
541      Assert.assertTrue(resultLen == sentenceTwoLen);
542
543      Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
544      resultLen = StringExpr.rightTrim(sentenceTwo, 0, sentenceTwoLen - 5);
545      Assert.assertTrue(resultLen == sentenceTwoLen - 5);
546
547      int start;
548
549      // Left pad longer strings with multi-byte characters.
550      byte[] sentenceOnePaddedLeft = new byte[100];
551      start = addPads(sentenceOnePaddedLeft, 0, 3);
552      int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
553
554      Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
555      resultLen = StringExpr.rightTrim(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen);
556      Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen);
557
558      Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
559      resultLen = StringExpr.rightTrim(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3);
560      Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen - 3);
561
562      byte[] sentenceTwoPaddedLeft = new byte[100];
563      start = addPads(sentenceTwoPaddedLeft, 0, 2);
564      int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
565
566      Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
567      resultLen = StringExpr.rightTrim(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen);
568      Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen);
569
570      Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
571      resultLen = StringExpr.rightTrim(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5);
572      Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen - 5);
573
574      // Right pad longer strings with multi-byte characters.
575      byte[] sentenceOnePaddedRight = new byte[100];
576      start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
577      int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
578
579      Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
580      resultLen = StringExpr.rightTrim(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen);
581      Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 4);
582
583      Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
584      resultLen = StringExpr.rightTrim(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4);
585      Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 3 - 4);
586
587      byte[] sentenceTwoPaddedRight = new byte[100];
588      start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
589      int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
590
591      Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
592      resultLen = StringExpr.rightTrim(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen);
593      Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 1);
594
595      Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
596      resultLen = StringExpr.rightTrim(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1);
597      Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 5 - 1);
598
599      // Multi-byte characters with blank ranges.
600      byte[] sentenceBlankRanges = new byte[100];
601      int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
602
603      Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
604      resultLen = StringExpr.rightTrim(sentenceBlankRanges, 0, sentenceBlankRangesLen);
605      Assert.assertTrue(resultLen == sentenceBlankRangesLen);
606
607      Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
608      resultLen = StringExpr.rightTrim(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3);
609      Assert.assertTrue(resultLen == sentenceBlankRangesLen - 3);
610
611      Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
612      resultLen = StringExpr.rightTrim(sentenceBlankRanges, 7, 17);
613      Assert.assertTrue(resultLen == 12);
614      Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 8);
615  }
616
617  @Test
618  // Test basic right trim to vector.
619  public void testRightTrimBytesColumnVector()  {
620      BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
621      outV.initBuffer(30); // initialize with estimated element size 35
622
623      int i = 0;
624      int expectedResultLen;
625
626      // Nothing to trim (ASCII).
627      Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
628      StringExpr.rightTrim(outV, i, blue, 0, blue.length);
629      expectedResultLen = blue.length;
630      Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
631      Assert.assertTrue(vectorCharacterCount(outV, i) == 4);
632      i++;
633      Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
634      StringExpr.rightTrim(outV, i, redgreen, 0, redgreen.length);
635      expectedResultLen = redgreen.length;
636      Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
637      i++;
638      Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
639      StringExpr.rightTrim(outV, i, ascii_sentence, 0, ascii_sentence.length);
640      expectedResultLen = ascii_sentence.length;
641      Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
642      i++;
643      Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
644      StringExpr.rightTrim(outV, i, blanksLeft, 0, blanksLeft.length);
645      expectedResultLen = blanksLeft.length;
646      Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
647      i++;
648
649      // Simple trims.
650      Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
651      StringExpr.rightTrim(outV, i, blanksRight, 0, blanksRight.length);
652      expectedResultLen = 3;
653      Assert.assertTrue(vectorEqual(outV, i, blanksRight, 0, expectedResultLen));
654      Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
655      i++;
656      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
657      StringExpr.rightTrim(outV, i, blanksBoth, 0, blanksBoth.length);
658      expectedResultLen = 5;
659      Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen));
660      Assert.assertTrue(vectorCharacterCount(outV, i) == 5);
661      i++;
662      Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
663      StringExpr.rightTrim(outV, i, blankString, 0, blankString.length);
664      expectedResultLen = 0;
665      Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen));
666      Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
667      i++;
668      Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
669      StringExpr.rightTrim(outV, i, blankRanges, 0, blankRanges.length);
670      expectedResultLen = blankRanges.length - 4;
671      Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen));
672      Assert.assertTrue(vectorCharacterCount(outV, i) == 26);
673      i++;
674
675      // Offset trims.
676      Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
677      StringExpr.rightTrim(outV, i, blanksRight, 1, blanksRight.length - 1);
678      expectedResultLen = 2;
679      Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen));
680      Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
681      i++;
682      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
683      StringExpr.rightTrim(outV, i, blanksBoth, 4, blanksBoth.length - 4);
684      expectedResultLen = 1;
685      Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen));
686      Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
687      i++;
688      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
689      StringExpr.rightTrim(outV, i, blanksBoth, 5, blanksBoth.length -5 );
690      expectedResultLen = 0;
691      Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen));
692      Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
693      i++;
694      Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
695      StringExpr.rightTrim(outV, i, blankString, 1, blankString.length - 1);
696      expectedResultLen = 0;
697      Assert.assertTrue(vectorEqual(outV, i, blankString, 1, expectedResultLen));
698      Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
699      i++;
700      Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
701      StringExpr.rightTrim(outV, i, blankRanges, 4, blankRanges.length - 4);
702      expectedResultLen = blankRanges.length - 4 -4;
703      Assert.assertTrue(vectorEqual(outV, i, blankRanges, 4, expectedResultLen));
704      Assert.assertTrue(vectorCharacterCount(outV, i) == 22);
705      i++;
706      Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
707      StringExpr.rightTrim(outV, i, blankRanges, 6, blankRanges.length- 6);
708      expectedResultLen = blankRanges.length - 6 - 4;
709      Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen));
710      Assert.assertTrue(vectorCharacterCount(outV, i) == 20);
711      i++;
712      Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
713      StringExpr.rightTrim(outV, i, blankRanges, 7, blankRanges.length - 7);
714      expectedResultLen = blankRanges.length - 7 - 4;
715      Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
716      Assert.assertTrue(vectorCharacterCount(outV, i) == 19);
717      i++;
718      Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
719      StringExpr.rightTrim(outV, i, blankRanges, 7, 8 - 7);
720      expectedResultLen = 0;
721      Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
722      Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
723      i++;
724
725      // Multi-byte trims.
726      byte[] multiByte = new byte[100];
727
728      addMultiByteCharRightPadded1_1(multiByte);
729      Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
730      StringExpr.rightTrim(outV, i, multiByte, 0, 4);
731      expectedResultLen = 3;
732      Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
733      Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
734      i++;
735      addMultiByteCharRightPadded1_2(multiByte);
736      Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
737      StringExpr.rightTrim(outV, i, multiByte, 0, 5);
738      expectedResultLen = 4;
739      Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
740      Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
741      i++;
742      addMultiByteCharRightPadded1_3(multiByte);
743      Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
744      StringExpr.rightTrim(outV, i, multiByte, 0, 9);
745      expectedResultLen = 8;
746      Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
747      Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
748      i++;
749      addMultiByteCharRightPadded1_1(multiByte);
750      Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
751      StringExpr.rightTrim(outV, i, multiByte, 3, 1);
752      expectedResultLen = 0;
753      Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
754      Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
755      i++;
756      addMultiByteCharRightPadded1_2(multiByte);
757      Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
758      StringExpr.rightTrim(outV, i, multiByte, 3, 2);
759      expectedResultLen = 1;
760      Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
761      Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
762      i++;
763
764      byte[] sentenceOne = new byte[100];
765      int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
766
767      Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
768      StringExpr.rightTrim(outV, i, sentenceOne, 0, sentenceOneLen);
769      expectedResultLen = sentenceOneLen;
770      Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
771      i++;
772      Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
773      StringExpr.rightTrim(outV, i, sentenceOne, 0, sentenceOneLen - 3);
774      expectedResultLen = sentenceOneLen - 3;
775      Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
776      i++;
777
778      byte[] sentenceTwo = new byte[100];
779      int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
780
781      Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
782      StringExpr.rightTrim(outV, i, sentenceTwo, 0, sentenceTwoLen);
783      expectedResultLen = sentenceTwoLen;
784      Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
785      i++;
786      Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
787      StringExpr.rightTrim(outV, i, sentenceTwo, 0, sentenceTwoLen - 5);
788      expectedResultLen = sentenceTwoLen - 5;
789      Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
790      i++;
791
792      int start;
793
794      // Left pad longer strings with multi-byte characters.
795      byte[] sentenceOnePaddedLeft = new byte[100];
796      start = addPads(sentenceOnePaddedLeft, 0, 3);
797      int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
798
799      Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
800      StringExpr.rightTrim(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen);
801      expectedResultLen = sentenceOnePaddedLeftLen;
802      Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
803      i++;
804      Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
805      StringExpr.rightTrim(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3);
806      expectedResultLen = sentenceOnePaddedLeftLen - 3;
807      Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
808      i++;
809
810      byte[] sentenceTwoPaddedLeft = new byte[100];
811      start = addPads(sentenceTwoPaddedLeft, 0, 2);
812      int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
813
814      Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
815      StringExpr.rightTrim(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen);
816      expectedResultLen = sentenceTwoPaddedLeftLen;
817      Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
818      i++;
819      Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
820      StringExpr.rightTrim(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5);
821      expectedResultLen = sentenceTwoPaddedLeftLen - 5;
822      Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
823      i++;
824
825      // Right pad longer strings with multi-byte characters.
826      byte[] sentenceOnePaddedRight = new byte[100];
827      start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
828      int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
829
830      Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
831      StringExpr.rightTrim(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen);
832      expectedResultLen = sentenceOnePaddedRightLen - 4;
833      Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
834      i++;
835      Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
836      StringExpr.rightTrim(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4);
837      expectedResultLen = sentenceOnePaddedRightLen - 3 - 4;
838      Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
839      i++;
840
841      byte[] sentenceTwoPaddedRight = new byte[100];
842      start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
843      int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
844
845      Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
846      StringExpr.rightTrim(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen);
847      expectedResultLen = sentenceTwoPaddedRightLen - 1;
848      Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
849      i++;
850      Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
851      StringExpr.rightTrim(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1);
852      expectedResultLen = sentenceTwoPaddedRightLen - 5 - 1;
853      Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
854      i++;
855
856      // Multi-byte characters with blank ranges.
857      byte[] sentenceBlankRanges = new byte[100];
858      int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
859
860      Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
861      StringExpr.rightTrim(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen);
862      expectedResultLen = sentenceBlankRangesLen;
863      Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
864      i++;
865      Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
866      StringExpr.rightTrim(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3);
867      expectedResultLen = sentenceBlankRangesLen - 3;
868      Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
869      i++;
870      Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
871      StringExpr.rightTrim(outV, i, sentenceBlankRanges, 7, 17);
872      expectedResultLen = 12;
873      Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen));
874      Assert.assertTrue(vectorCharacterCount(outV, i) == 8);
875  }
876
877  @Test
878  // Test basic truncate of bytes slice.
879  public void testTruncateBytesSlice()  {
880      int largeMaxLength = 100;
881      int resultLen;
882
883      // No truncate (ASCII) -- maximum length large.
884      Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
885      resultLen = StringExpr.truncate(blue, 0, blue.length, largeMaxLength);
886      Assert.assertTrue(resultLen == blue.length);
887      Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
888
889      Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
890      resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, largeMaxLength);
891      Assert.assertTrue(resultLen == redgreen.length);
892
893      Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
894      resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, largeMaxLength);
895      Assert.assertTrue(resultLen == ascii_sentence.length);
896
897      Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
898      resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, largeMaxLength);
899      Assert.assertTrue(resultLen == blanksLeft.length);
900
901      // No truncate (ASCII) -- same maximum length.
902      Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
903      resultLen = StringExpr.truncate(blue, 0, blue.length, 4);
904      Assert.assertTrue(resultLen == blue.length);
905      Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
906
907      Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
908      resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, 8);
909      Assert.assertTrue(resultLen == redgreen.length);
910
911      Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
912      resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, 31);
913      Assert.assertTrue(resultLen == ascii_sentence.length);
914
915      Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
916      resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, 5);
917      Assert.assertTrue(resultLen == blanksLeft.length);
918
919      // Simple truncation.
920      Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
921      resultLen = StringExpr.truncate(blue, 0, blue.length, 3);
922      Assert.assertTrue(resultLen == 3);
923      Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 3);
924
925      Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
926      resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, 6);
927      Assert.assertTrue(resultLen == 6);
928
929      Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
930      resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, 14);
931      Assert.assertTrue(resultLen == 14);
932
933      Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
934      resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, 2);
935      Assert.assertTrue(resultLen == 2);
936
937      Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
938      resultLen = StringExpr.truncate(blanksRight, 0, blanksRight.length, 4);
939      Assert.assertTrue(resultLen == 4);
940      Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 4);
941
942      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
943      resultLen = StringExpr.truncate(blanksBoth, 0, blanksBoth.length, 2);
944      Assert.assertTrue(resultLen == 2);
945      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 2);
946     
947      Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
948      resultLen = StringExpr.truncate(blankString, 0, blankString.length, 1);
949      Assert.assertTrue(resultLen == 1);
950      Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 1);
951
952      Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
953      resultLen = StringExpr.truncate(blankRanges, 0, blankRanges.length, 29);
954      Assert.assertTrue(resultLen == 29);
955      Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 29);
956
957      // Offset truncation.
958      Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
959      resultLen = StringExpr.truncate(blanksRight, 1, blanksRight.length - 1, 3);
960      Assert.assertTrue(resultLen == 3);
961      Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 3);
962
963      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
964      resultLen = StringExpr.truncate(blanksBoth, 4, blanksBoth.length - 4, 2);
965      Assert.assertTrue(resultLen == 2);
966      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 2);
967
968      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
969      resultLen = StringExpr.truncate(blanksBoth, 5, blanksBoth.length -5, 1);
970      Assert.assertTrue(resultLen == 1);
971      Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 1);
972
973      Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
974      resultLen = StringExpr.truncate(blankRanges, 4, blankRanges.length - 4, 22);
975      Assert.assertTrue(resultLen == 22);
976      Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22);
977
978      Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
979      resultLen = StringExpr.truncate(blankRanges, 6, blankRanges.length- 6, 7);
980      Assert.assertTrue(resultLen == 7);
981      Assert.assert

Large files files are truncated, but you can click here to view the full file