/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
Java | 5602 lines | 4473 code | 771 blank | 358 comment | 886 complexity | f975af9609e8ad5e31d7471c9db930f3 MD5 | raw file
Possible License(s): Apache-2.0
Large files files are truncated, but you can click here to view the full file
- /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hadoop.hive.ql.exec.vector.expressions;
- import static org.junit.Assert.assertEquals;
- import java.nio.charset.StandardCharsets;
- import java.util.Arrays;
- import java.util.Random;
- import java.util.StringTokenizer;
- import org.apache.hadoop.hive.conf.HiveConf;
- import org.junit.Assert;
- import org.apache.commons.codec.binary.Hex;
- import org.apache.hadoop.hive.common.type.HiveChar;
- import org.apache.hadoop.hive.common.type.HiveVarchar;
- import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
- import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
- import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CharScalarEqualStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarEqualStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarGreaterStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarLessEqualStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualCharScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualStringScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualVarCharScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualCharScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualStringScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualVarCharScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessCharScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessStringScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessVarCharScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarEqualStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarGreaterStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarLessEqualStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarEqualStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarGreaterStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarLessEqualStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualCharScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualStringScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualVarCharScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColLessStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringScalarEqualStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.VarCharScalarEqualStringGroupColumn;
- import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil;
- import org.apache.hadoop.hive.ql.metadata.HiveException;
- import org.apache.hadoop.hive.ql.udf.UDFLike;
- import org.apache.hadoop.io.BooleanWritable;
- import org.apache.hadoop.io.Text;
- import org.junit.Test;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- /**
- * Test vectorized expression and filter evaluation for strings.
- */
- public class TestVectorStringExpressions {
- private static final Logger LOG = LoggerFactory
- .getLogger(TestVectorStringExpressions.class);
- private static byte[] red;
- private static byte[] redred;
- private static byte[] red2; // second copy of red, different object
- private static byte[] green;
- private static byte[] greenred;
- private static byte[] redgreen;
- private static byte[] greengreen;
- private static byte[] blue;
- private static byte[] emptyString;
- private static byte[] mixedUp;
- private static byte[] mixedUpLower;
- private static byte[] mixedUpUpper;
- private static byte[] multiByte;
- private static byte[] mixPercentPattern;
- private static byte[] blanksLeft;
- private static byte[] blanksRight;
- private static byte[] blanksBoth;
- private static byte[] blankString;
- private static byte[] blankRanges;
- private static byte[] ascii_sentence;
- static {
- blue = "blue".getBytes(StandardCharsets.UTF_8);
- red = "red".getBytes(StandardCharsets.UTF_8);
- redred = "redred".getBytes(StandardCharsets.UTF_8);
- green = "green".getBytes(StandardCharsets.UTF_8);
- greenred = "greenred".getBytes(StandardCharsets.UTF_8);
- redgreen = "redgreen".getBytes(StandardCharsets.UTF_8);
- greengreen = "greengreen".getBytes(StandardCharsets.UTF_8);
- emptyString = "".getBytes(StandardCharsets.UTF_8);
- mixedUp = "mixedUp".getBytes(StandardCharsets.UTF_8);
- mixedUpLower = "mixedup".getBytes(StandardCharsets.UTF_8);
- mixedUpUpper = "MIXEDUP".getBytes(StandardCharsets.UTF_8);
- // for use as wildcard pattern to test LIKE
- mixPercentPattern = "mix%".getBytes(StandardCharsets.UTF_8);
- multiByte = new byte[10];
- addMultiByteChars(multiByte);
- blanksLeft = " foo".getBytes(StandardCharsets.UTF_8);
- blanksRight = "foo ".getBytes(StandardCharsets.UTF_8);
- blanksBoth = " foo ".getBytes(StandardCharsets.UTF_8);
- blankString = " ".getBytes(StandardCharsets.UTF_8);
- blankRanges =
- " more than a bargain ".getBytes(StandardCharsets.UTF_8);
- // 012345678901234567890123456789
- ascii_sentence =
- "The fox trotted over the fence.".getBytes(StandardCharsets.UTF_8);
- // 0123456789012345678901234567890
- red2 = new byte[red.length];
- System.arraycopy(red, 0, red2, 0, red.length);
- }
- // add some multi-byte characters to test length routine later.
- // total characters = 4; byte length = 10
- static void addMultiByteChars(byte[] b) {
- int i = 0;
- b[i++] = (byte) 0x41; // letter "A" (1 byte)
- b[i++] = (byte) 0xC3; // Latin capital A with grave (2 bytes)
- b[i++] = (byte) 0x80;
- b[i++] = (byte) 0xE2; // Euro sign (3 bytes)
- b[i++] = (byte) 0x82;
- b[i++] = (byte) 0xAC;
- b[i++] = (byte) 0xF0; // Asian character U+24B62 (4 bytes)
- b[i++] = (byte) 0xA4;
- b[i++] = (byte) 0xAD;
- b[i++] = (byte) 0xA2;
- }
- //-------------------------------------------------------------
-
- // total characters = 2; byte length = 3
- static void addMultiByteCharLeftPadded1_1(byte[] b) {
- int i = 0;
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0xD0; // Cyrillic Capital DJE U+402 (2 bytes)
- b[i++] = (byte) 0x82;
- }
- // total characters = 3; byte length = 9
- static void addMultiByteCharLeftPadded1_2(byte[] b) {
- int i = 0;
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0xF0; // Smiling Face with Open Mouth and Smiling Eyes U+1F604 (4 bytes)
- b[i++] = (byte) 0x9F;
- b[i++] = (byte) 0x98;
- b[i++] = (byte) 0x84;
- b[i++] = (byte) 0xF0; // Grimacing Face U+1F62C (4 bytes)
- b[i++] = (byte) 0x9F;
- b[i++] = (byte) 0x98;
- b[i++] = (byte) 0xAC;
- }
- // total characters = 4; byte length = 6
- static void addMultiByteCharLeftPadded3_1(byte[] b) {
- int i = 0;
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0xE4; // Asian character U+4824 (3 bytes)
- b[i++] = (byte) 0xA0;
- b[i++] = (byte) 0xA4;
- }
- //-------------------------------------------------------------
-
- // total characters = 2; byte length = 4
- static void addMultiByteCharRightPadded1_1(byte[] b) {
- int i = 0;
- b[i++] = (byte) 0xE0; // Tamil Om U+0BD0 (3 bytes)
- b[i++] = (byte) 0xAF;
- b[i++] = (byte) 0x90;
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- }
- // total characters = 3; byte length = 5
- static void addMultiByteCharRightPadded1_2(byte[] b) {
- int i = 0;
- b[i++] = (byte) 0xEA; // Va Syllable MEE U+A521 (3 bytes)
- b[i++] = (byte) 0x94;
- b[i++] = (byte) 0xA1;
- b[i++] = (byte) 0x5A; // Latin Capital Letter Z U+005A (1 bytes)
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- }
- // total characters = 4; byte length = 9
- static void addMultiByteCharRightPadded1_3(byte[] b) {
- int i = 0;
- b[i++] = (byte) 0xCC; // COMBINING ACUTE ACENT U+0301 (2 bytes)
- b[i++] = (byte) 0x81;
- b[i++] = (byte) 0xE0; // DEVENAGARI LETTER KA U+0915 (3 bytes)
- b[i++] = (byte) 0xA4;
- b[i++] = (byte) 0x95;
- b[i++] = (byte) 0xE0; // DEVENAGARI SIGN VIRAMA U+094D (3 bytes)
- b[i++] = (byte) 0xA5;
- b[i++] = (byte) 0x8D;
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- }
- // total characters = 10; byte length = 26
- static int addMultiByteCharSentenceOne(byte[] b, int start) {
- int i = start;
- b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER THA U+1992 (3 bytes)
- b[i++] = (byte) 0xA6;
- b[i++] = (byte) 0x92;
- b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW XA U+1986 (3 bytes)
- b[i++] = (byte) 0xA6;
- b[i++] = (byte) 0x86;
- b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER HIGH MA U+1996 (3 bytes)
- b[i++] = (byte) 0xA6;
- b[i++] = (byte) 0x96;
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW QA U+1981 (3 bytes)
- b[i++] = (byte) 0xA6;
- b[i++] = (byte) 0x81;
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW BA U+19A5 (3 bytes)
- b[i++] = (byte) 0xA6;
- b[i++] = (byte) 0xA5;
- b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER HIGH LA U+199C (3 bytes)
- b[i++] = (byte) 0xA6;
- b[i++] = (byte) 0x9C;
- b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW KVA U+19A8 (3 bytes)
- b[i++] = (byte) 0xA6;
- b[i++] = (byte) 0xA8;
- b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW FA U+199D (3 bytes)
- b[i++] = (byte) 0xA6;
- b[i++] = (byte) 0x9D;
- return i;
- }
- // total characters = 13; byte length = 24
- static int addMultiByteCharSentenceTwo(byte[] b, int start) {
- int i = start;
- b[i++] = (byte) 0xC9; // LATIN SMALL LETTER TURNED A U+0250 (2 bytes)
- b[i++] = (byte) 0x90;
- b[i++] = (byte) 0xC9; // LATIN SMALL LETTER GAMMA U+0263 (2 bytes)
- b[i++] = (byte) 0xA3;
- b[i++] = (byte) 0xC9; // LATIN SMALL LETTER TURNED M U+026F (2 bytes)
- b[i++] = (byte) 0xAF;
- b[i++] = (byte) 0xCA; // LATIN SMALL LETTER S WITH HOOK U+0282 (2 bytes)
- b[i++] = (byte) 0x82;
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0xCA; // LATIN LETTER SMALL CAPITAL L U+029F (2 bytes)
- b[i++] = (byte) 0x9F;
- b[i++] = (byte) 0xCB; // MODIFIER LETTER TRIANGULAR COLON U+02D0 (2 bytes)
- b[i++] = (byte) 0x90;
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0xCB; // RING ABOVE U+02DA (2 bytes)
- b[i++] = (byte) 0x9A;
- b[i++] = (byte) 0xCB; // MODIFIER LETTER SMALL L U+02E1 (2 bytes)
- b[i++] = (byte) 0xA1;
- b[i++] = (byte) 0xCB; // MODIFIER LETTER SMALL X U+02E3 (2 bytes)
- b[i++] = (byte) 0xA3;
- b[i++] = (byte) 0xCB; // MODIFIER LETTER UP ARROWHEAD U+02C4 (2 bytes)
- b[i++] = (byte) 0x84;
- b[i++] = (byte) 0x2E; // FULL STOP "." (1 byte)
- return i;
- }
- // total characters = 17; byte length = 30
- static int addMultiByteCharSentenceBlankRanges(byte[] b, int start) {
- int i = start;
- b[i++] = (byte) 0xF0; // INSCRIPTIONAL YODH U+10B49 (4 bytes)
- b[i++] = (byte) 0x90;
- b[i++] = (byte) 0xAD;
- b[i++] = (byte) 0x89;
- b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW FA U+199D (3 bytes)
- b[i++] = (byte) 0xA6;
- b[i++] = (byte) 0x9D;
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0x2D; // hyphen-minus "-" U-002D (1 byte)
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0x60; // grave accent "-" U-0060 (1 byte)
- b[i++] = (byte) 0xE2; // BLACK SUN WITH RAYS U+2600 (3 bytes)
- b[i++] = (byte) 0x98;
- b[i++] = (byte) 0x80;
- b[i++] = (byte) 0xE2; // BALLOT BOX WITH X U+2612 (3 bytes)
- b[i++] = (byte) 0x98;
- b[i++] = (byte) 0x92;
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0x20; // blank " " (1 byte)
- b[i++] = (byte) 0xE2; // WHITE START U+2606 (3 bytes)
- b[i++] = (byte) 0x98;
- b[i++] = (byte) 0x86;
- b[i++] = (byte) 0xE2; // WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE U+26FF (3 bytes)
- b[i++] = (byte) 0x9B;
- b[i++] = (byte) 0xBF;
- return i;
- }
- static int addPads(byte[] b, int start, int count) {
- int i = start;
- int end = start + count;
- for ( ; i < end; i++) {
- b[i] = (byte) 0x20; // blank " " (1 byte)
- }
- return i;
- }
- private HiveConf hiveConf = new HiveConf();
- private boolean vectorEqual(BytesColumnVector vector, int i, byte[] bytes, int offset, int length) {
- byte[] bytesSlice = new byte[length];
- System.arraycopy(bytes, offset, bytesSlice, 0, length);
- int vectorLength = vector.length[i];
- byte[] vectorSlice = new byte[vectorLength];
- System.arraycopy(vector.vector[i], vector.start[i], vectorSlice, 0, vectorLength);
- boolean equals = Arrays.equals(bytesSlice, vectorSlice);
- if (!equals) {
- System.out.println("vectorEqual offset " + offset + " length " + length + " vectorSlice.length " + vectorSlice.length);
- System.out.println("vectorEqual bytesSlice " + Hex.encodeHexString(bytesSlice));
- System.out.println("vectorEqual vectorSlice " + Hex.encodeHexString(vectorSlice));
- }
- return equals;
- }
- private int vectorCharacterCount(BytesColumnVector vector, int i) {
- return StringExpr.characterCount(vector.vector[i], vector.start[i], vector.length[i]);
- }
- @Test
- // Test basic assign to vector.
- public void testAssignBytesColumnVector() {
- BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
- outV.initBuffer(35); // initialize with estimated element size 35
- int i = 0;
- int expectedResultLen;
- Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
- StringExpr.assign(outV, i, blue, 0, blue.length);
- expectedResultLen = blue.length;
- Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
- StringExpr.assign(outV, i, redgreen, 0, redgreen.length);
- expectedResultLen = redgreen.length;
- Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
- StringExpr.assign(outV, i, ascii_sentence, 0, ascii_sentence.length);
- expectedResultLen = ascii_sentence.length;
- Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
- StringExpr.assign(outV, i, blanksLeft, 0, blanksLeft.length);
- expectedResultLen = blanksLeft.length;
- Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
- i++;
- // Multi-byte characters with blank ranges.
- byte[] sentenceBlankRanges = new byte[100];
- int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
- Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
- StringExpr.assign(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen);
- expectedResultLen = sentenceBlankRangesLen;
- Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
- StringExpr.assign(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3);
- expectedResultLen = sentenceBlankRangesLen - 3;
- Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
- i++;
- // Some non-zero offsets.
- Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 4, sentenceBlankRangesLen - 4) == 16);
- StringExpr.assign(outV, i, sentenceBlankRanges, 4, sentenceBlankRangesLen - 4);
- expectedResultLen = sentenceBlankRangesLen - 4;
- Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 4, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 16);
- i++;
- Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
- StringExpr.assign(outV, i, sentenceBlankRanges, 7, 17);
- expectedResultLen = 17;
- Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 13);
- i++;
- }
- @Test
- // Test basic right trim of bytes slice.
- public void testRightTrimBytesSlice() {
- int resultLen;
- // Nothing to trim (ASCII).
- Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
- resultLen = StringExpr.rightTrim(blue, 0, blue.length);
- Assert.assertTrue(resultLen == blue.length);
- Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
- Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
- resultLen = StringExpr.rightTrim(redgreen, 0, redgreen.length);
- Assert.assertTrue(resultLen == redgreen.length);
- Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
- resultLen = StringExpr.rightTrim(ascii_sentence, 0, ascii_sentence.length);
- Assert.assertTrue(resultLen == ascii_sentence.length);
- Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
- resultLen = StringExpr.rightTrim(blanksLeft, 0, blanksLeft.length);
- Assert.assertTrue(resultLen == blanksLeft.length);
- // Simple trims.
- Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
- resultLen = StringExpr.rightTrim(blanksRight, 0, blanksRight.length);
- Assert.assertTrue(resultLen == 3);
- Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 3);
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
- resultLen = StringExpr.rightTrim(blanksBoth, 0, blanksBoth.length);
- Assert.assertTrue(resultLen == 5);
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 5);
-
- Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
- resultLen = StringExpr.rightTrim(blankString, 0, blankString.length);
- Assert.assertTrue(resultLen == 0);
- Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 0);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
- resultLen = StringExpr.rightTrim(blankRanges, 0, blankRanges.length);
- Assert.assertTrue(resultLen == blankRanges.length - 4);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 26);
- // Offset trims.
- Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
- resultLen = StringExpr.rightTrim(blanksRight, 1, blanksRight.length - 1);
- Assert.assertTrue(resultLen == 2);
- Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 2);
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
- resultLen = StringExpr.rightTrim(blanksBoth, 4, blanksBoth.length - 4);
- Assert.assertTrue(resultLen == 1);
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 1);
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
- resultLen = StringExpr.rightTrim(blanksBoth, 5, blanksBoth.length -5 );
- Assert.assertTrue(resultLen == 0);
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 0);
- Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
- resultLen = StringExpr.rightTrim(blankString, 1, blankString.length - 1);
- Assert.assertTrue(resultLen == 0);
- Assert.assertTrue(StringExpr.characterCount(blankString, 1, resultLen) == 0);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
- resultLen = StringExpr.rightTrim(blankRanges, 4, blankRanges.length - 4);
- Assert.assertTrue(resultLen == blankRanges.length - 4 -4);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
- resultLen = StringExpr.rightTrim(blankRanges, 6, blankRanges.length- 6);
- Assert.assertTrue(resultLen == blankRanges.length - 6 - 4);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 20);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
- resultLen = StringExpr.rightTrim(blankRanges, 7, blankRanges.length - 7);
- Assert.assertTrue(resultLen == blankRanges.length - 7 - 4);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 19);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
- resultLen = StringExpr.rightTrim(blankRanges, 7, 8 - 7);
- Assert.assertTrue(resultLen == 0);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 0);
- // Multi-byte trims.
- byte[] multiByte = new byte[100];
- addMultiByteCharRightPadded1_1(multiByte);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
- resultLen = StringExpr.rightTrim(multiByte, 0, 4);
- Assert.assertTrue(resultLen == 3);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1);
- addMultiByteCharRightPadded1_2(multiByte);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
- resultLen = StringExpr.rightTrim(multiByte, 0, 5);
- Assert.assertTrue(resultLen == 4);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2);
- addMultiByteCharRightPadded1_3(multiByte);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
- resultLen = StringExpr.rightTrim(multiByte, 0, 9);
- Assert.assertTrue(resultLen == 8);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 3);
- addMultiByteCharRightPadded1_1(multiByte);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
- resultLen = StringExpr.rightTrim(multiByte, 3, 1);
- Assert.assertTrue(resultLen == 0);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 0);
- addMultiByteCharRightPadded1_2(multiByte);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
- resultLen = StringExpr.rightTrim(multiByte, 3, 2);
- Assert.assertTrue(resultLen == 1);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1);
- byte[] sentenceOne = new byte[100];
- int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
- Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
- resultLen = StringExpr.rightTrim(sentenceOne, 0, sentenceOneLen);
- Assert.assertTrue(resultLen == sentenceOneLen);
- Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
- resultLen = StringExpr.rightTrim(sentenceOne, 0, sentenceOneLen - 3);
- Assert.assertTrue(resultLen == sentenceOneLen - 3);
- byte[] sentenceTwo = new byte[100];
- int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
- Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
- resultLen = StringExpr.rightTrim(sentenceTwo, 0, sentenceTwoLen);
- Assert.assertTrue(resultLen == sentenceTwoLen);
- Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
- resultLen = StringExpr.rightTrim(sentenceTwo, 0, sentenceTwoLen - 5);
- Assert.assertTrue(resultLen == sentenceTwoLen - 5);
- int start;
- // Left pad longer strings with multi-byte characters.
- byte[] sentenceOnePaddedLeft = new byte[100];
- start = addPads(sentenceOnePaddedLeft, 0, 3);
- int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
- Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
- resultLen = StringExpr.rightTrim(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen);
- Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen);
- Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
- resultLen = StringExpr.rightTrim(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3);
- Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen - 3);
- byte[] sentenceTwoPaddedLeft = new byte[100];
- start = addPads(sentenceTwoPaddedLeft, 0, 2);
- int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
- Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
- resultLen = StringExpr.rightTrim(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen);
- Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen);
- Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
- resultLen = StringExpr.rightTrim(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5);
- Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen - 5);
- // Right pad longer strings with multi-byte characters.
- byte[] sentenceOnePaddedRight = new byte[100];
- start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
- int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
- Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
- resultLen = StringExpr.rightTrim(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen);
- Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 4);
- Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
- resultLen = StringExpr.rightTrim(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4);
- Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 3 - 4);
- byte[] sentenceTwoPaddedRight = new byte[100];
- start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
- int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
- Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
- resultLen = StringExpr.rightTrim(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen);
- Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 1);
- Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
- resultLen = StringExpr.rightTrim(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1);
- Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 5 - 1);
- // Multi-byte characters with blank ranges.
- byte[] sentenceBlankRanges = new byte[100];
- int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
- Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
- resultLen = StringExpr.rightTrim(sentenceBlankRanges, 0, sentenceBlankRangesLen);
- Assert.assertTrue(resultLen == sentenceBlankRangesLen);
- Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
- resultLen = StringExpr.rightTrim(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3);
- Assert.assertTrue(resultLen == sentenceBlankRangesLen - 3);
- Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
- resultLen = StringExpr.rightTrim(sentenceBlankRanges, 7, 17);
- Assert.assertTrue(resultLen == 12);
- Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 8);
- }
- @Test
- // Test basic right trim to vector.
- public void testRightTrimBytesColumnVector() {
- BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
- outV.initBuffer(30); // initialize with estimated element size 35
- int i = 0;
- int expectedResultLen;
- // Nothing to trim (ASCII).
- Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
- StringExpr.rightTrim(outV, i, blue, 0, blue.length);
- expectedResultLen = blue.length;
- Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 4);
- i++;
- Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
- StringExpr.rightTrim(outV, i, redgreen, 0, redgreen.length);
- expectedResultLen = redgreen.length;
- Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
- StringExpr.rightTrim(outV, i, ascii_sentence, 0, ascii_sentence.length);
- expectedResultLen = ascii_sentence.length;
- Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
- StringExpr.rightTrim(outV, i, blanksLeft, 0, blanksLeft.length);
- expectedResultLen = blanksLeft.length;
- Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
- i++;
- // Simple trims.
- Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
- StringExpr.rightTrim(outV, i, blanksRight, 0, blanksRight.length);
- expectedResultLen = 3;
- Assert.assertTrue(vectorEqual(outV, i, blanksRight, 0, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
- i++;
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
- StringExpr.rightTrim(outV, i, blanksBoth, 0, blanksBoth.length);
- expectedResultLen = 5;
- Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 5);
- i++;
- Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
- StringExpr.rightTrim(outV, i, blankString, 0, blankString.length);
- expectedResultLen = 0;
- Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
- i++;
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
- StringExpr.rightTrim(outV, i, blankRanges, 0, blankRanges.length);
- expectedResultLen = blankRanges.length - 4;
- Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 26);
- i++;
- // Offset trims.
- Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
- StringExpr.rightTrim(outV, i, blanksRight, 1, blanksRight.length - 1);
- expectedResultLen = 2;
- Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
- i++;
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
- StringExpr.rightTrim(outV, i, blanksBoth, 4, blanksBoth.length - 4);
- expectedResultLen = 1;
- Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
- i++;
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
- StringExpr.rightTrim(outV, i, blanksBoth, 5, blanksBoth.length -5 );
- expectedResultLen = 0;
- Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
- i++;
- Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
- StringExpr.rightTrim(outV, i, blankString, 1, blankString.length - 1);
- expectedResultLen = 0;
- Assert.assertTrue(vectorEqual(outV, i, blankString, 1, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
- i++;
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
- StringExpr.rightTrim(outV, i, blankRanges, 4, blankRanges.length - 4);
- expectedResultLen = blankRanges.length - 4 -4;
- Assert.assertTrue(vectorEqual(outV, i, blankRanges, 4, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 22);
- i++;
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
- StringExpr.rightTrim(outV, i, blankRanges, 6, blankRanges.length- 6);
- expectedResultLen = blankRanges.length - 6 - 4;
- Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 20);
- i++;
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
- StringExpr.rightTrim(outV, i, blankRanges, 7, blankRanges.length - 7);
- expectedResultLen = blankRanges.length - 7 - 4;
- Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 19);
- i++;
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
- StringExpr.rightTrim(outV, i, blankRanges, 7, 8 - 7);
- expectedResultLen = 0;
- Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
- i++;
- // Multi-byte trims.
- byte[] multiByte = new byte[100];
- addMultiByteCharRightPadded1_1(multiByte);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
- StringExpr.rightTrim(outV, i, multiByte, 0, 4);
- expectedResultLen = 3;
- Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
- i++;
- addMultiByteCharRightPadded1_2(multiByte);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
- StringExpr.rightTrim(outV, i, multiByte, 0, 5);
- expectedResultLen = 4;
- Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
- i++;
- addMultiByteCharRightPadded1_3(multiByte);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
- StringExpr.rightTrim(outV, i, multiByte, 0, 9);
- expectedResultLen = 8;
- Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
- i++;
- addMultiByteCharRightPadded1_1(multiByte);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
- StringExpr.rightTrim(outV, i, multiByte, 3, 1);
- expectedResultLen = 0;
- Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
- i++;
- addMultiByteCharRightPadded1_2(multiByte);
- Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
- StringExpr.rightTrim(outV, i, multiByte, 3, 2);
- expectedResultLen = 1;
- Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
- i++;
- byte[] sentenceOne = new byte[100];
- int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
- Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
- StringExpr.rightTrim(outV, i, sentenceOne, 0, sentenceOneLen);
- expectedResultLen = sentenceOneLen;
- Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
- StringExpr.rightTrim(outV, i, sentenceOne, 0, sentenceOneLen - 3);
- expectedResultLen = sentenceOneLen - 3;
- Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
- i++;
- byte[] sentenceTwo = new byte[100];
- int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
- Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
- StringExpr.rightTrim(outV, i, sentenceTwo, 0, sentenceTwoLen);
- expectedResultLen = sentenceTwoLen;
- Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
- StringExpr.rightTrim(outV, i, sentenceTwo, 0, sentenceTwoLen - 5);
- expectedResultLen = sentenceTwoLen - 5;
- Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
- i++;
- int start;
- // Left pad longer strings with multi-byte characters.
- byte[] sentenceOnePaddedLeft = new byte[100];
- start = addPads(sentenceOnePaddedLeft, 0, 3);
- int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
- Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
- StringExpr.rightTrim(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen);
- expectedResultLen = sentenceOnePaddedLeftLen;
- Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
- StringExpr.rightTrim(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3);
- expectedResultLen = sentenceOnePaddedLeftLen - 3;
- Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
- i++;
- byte[] sentenceTwoPaddedLeft = new byte[100];
- start = addPads(sentenceTwoPaddedLeft, 0, 2);
- int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
- Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
- StringExpr.rightTrim(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen);
- expectedResultLen = sentenceTwoPaddedLeftLen;
- Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
- StringExpr.rightTrim(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5);
- expectedResultLen = sentenceTwoPaddedLeftLen - 5;
- Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
- i++;
- // Right pad longer strings with multi-byte characters.
- byte[] sentenceOnePaddedRight = new byte[100];
- start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
- int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
- Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
- StringExpr.rightTrim(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen);
- expectedResultLen = sentenceOnePaddedRightLen - 4;
- Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
- StringExpr.rightTrim(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4);
- expectedResultLen = sentenceOnePaddedRightLen - 3 - 4;
- Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
- i++;
- byte[] sentenceTwoPaddedRight = new byte[100];
- start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
- int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
- Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
- StringExpr.rightTrim(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen);
- expectedResultLen = sentenceTwoPaddedRightLen - 1;
- Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
- StringExpr.rightTrim(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1);
- expectedResultLen = sentenceTwoPaddedRightLen - 5 - 1;
- Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
- i++;
- // Multi-byte characters with blank ranges.
- byte[] sentenceBlankRanges = new byte[100];
- int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
- Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
- StringExpr.rightTrim(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen);
- expectedResultLen = sentenceBlankRangesLen;
- Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
- StringExpr.rightTrim(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3);
- expectedResultLen = sentenceBlankRangesLen - 3;
- Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
- i++;
- Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
- StringExpr.rightTrim(outV, i, sentenceBlankRanges, 7, 17);
- expectedResultLen = 12;
- Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen));
- Assert.assertTrue(vectorCharacterCount(outV, i) == 8);
- }
- @Test
- // Test basic truncate of bytes slice.
- public void testTruncateBytesSlice() {
- int largeMaxLength = 100;
- int resultLen;
- // No truncate (ASCII) -- maximum length large.
- Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
- resultLen = StringExpr.truncate(blue, 0, blue.length, largeMaxLength);
- Assert.assertTrue(resultLen == blue.length);
- Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
- Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
- resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, largeMaxLength);
- Assert.assertTrue(resultLen == redgreen.length);
- Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
- resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, largeMaxLength);
- Assert.assertTrue(resultLen == ascii_sentence.length);
- Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
- resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, largeMaxLength);
- Assert.assertTrue(resultLen == blanksLeft.length);
- // No truncate (ASCII) -- same maximum length.
- Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
- resultLen = StringExpr.truncate(blue, 0, blue.length, 4);
- Assert.assertTrue(resultLen == blue.length);
- Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
- Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
- resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, 8);
- Assert.assertTrue(resultLen == redgreen.length);
- Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
- resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, 31);
- Assert.assertTrue(resultLen == ascii_sentence.length);
- Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
- resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, 5);
- Assert.assertTrue(resultLen == blanksLeft.length);
- // Simple truncation.
- Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
- resultLen = StringExpr.truncate(blue, 0, blue.length, 3);
- Assert.assertTrue(resultLen == 3);
- Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 3);
- Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
- resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, 6);
- Assert.assertTrue(resultLen == 6);
- Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
- resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, 14);
- Assert.assertTrue(resultLen == 14);
- Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
- resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, 2);
- Assert.assertTrue(resultLen == 2);
- Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
- resultLen = StringExpr.truncate(blanksRight, 0, blanksRight.length, 4);
- Assert.assertTrue(resultLen == 4);
- Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 4);
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
- resultLen = StringExpr.truncate(blanksBoth, 0, blanksBoth.length, 2);
- Assert.assertTrue(resultLen == 2);
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 2);
-
- Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
- resultLen = StringExpr.truncate(blankString, 0, blankString.length, 1);
- Assert.assertTrue(resultLen == 1);
- Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 1);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
- resultLen = StringExpr.truncate(blankRanges, 0, blankRanges.length, 29);
- Assert.assertTrue(resultLen == 29);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 29);
- // Offset truncation.
- Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
- resultLen = StringExpr.truncate(blanksRight, 1, blanksRight.length - 1, 3);
- Assert.assertTrue(resultLen == 3);
- Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 3);
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
- resultLen = StringExpr.truncate(blanksBoth, 4, blanksBoth.length - 4, 2);
- Assert.assertTrue(resultLen == 2);
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 2);
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
- resultLen = StringExpr.truncate(blanksBoth, 5, blanksBoth.length -5, 1);
- Assert.assertTrue(resultLen == 1);
- Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 1);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
- resultLen = StringExpr.truncate(blankRanges, 4, blankRanges.length - 4, 22);
- Assert.assertTrue(resultLen == 22);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22);
- Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
- resultLen = StringExpr.truncate(blankRanges, 6, blankRanges.length- 6, 7);
- Assert.assertTrue(resultLen == 7);
- Assert.assert…
Large files files are truncated, but you can click here to view the full file