PageRenderTime 89ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 1ms

/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java

http://github.com/apache/hive
Java | 5602 lines | 4473 code | 771 blank | 358 comment | 886 complexity | f975af9609e8ad5e31d7471c9db930f3 MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.exec.vector.expressions;
  19. import static org.junit.Assert.assertEquals;
  20. import java.nio.charset.StandardCharsets;
  21. import java.util.Arrays;
  22. import java.util.Random;
  23. import java.util.StringTokenizer;
  24. import org.apache.hadoop.hive.conf.HiveConf;
  25. import org.junit.Assert;
  26. import org.apache.commons.codec.binary.Hex;
  27. import org.apache.hadoop.hive.common.type.HiveChar;
  28. import org.apache.hadoop.hive.common.type.HiveVarchar;
  29. import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
  30. import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
  31. import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
  32. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CharScalarEqualStringGroupColumn;
  33. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarEqualStringGroupColumn;
  34. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarGreaterStringGroupColumn;
  35. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarLessEqualStringGroupColumn;
  36. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualCharScalar;
  37. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualStringScalar;
  38. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualVarCharScalar;
  39. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualCharScalar;
  40. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualStringScalar;
  41. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualVarCharScalar;
  42. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessCharScalar;
  43. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessStringGroupColumn;
  44. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessStringScalar;
  45. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessVarCharScalar;
  46. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarEqualStringGroupColumn;
  47. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarGreaterStringGroupColumn;
  48. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarLessEqualStringGroupColumn;
  49. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarEqualStringGroupColumn;
  50. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarGreaterStringGroupColumn;
  51. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarLessEqualStringGroupColumn;
  52. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualCharScalar;
  53. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualStringScalar;
  54. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualVarCharScalar;
  55. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColLessStringGroupColumn;
  56. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringScalarEqualStringGroupColumn;
  57. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.VarCharScalarEqualStringGroupColumn;
  58. import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil;
  59. import org.apache.hadoop.hive.ql.metadata.HiveException;
  60. import org.apache.hadoop.hive.ql.udf.UDFLike;
  61. import org.apache.hadoop.io.BooleanWritable;
  62. import org.apache.hadoop.io.Text;
  63. import org.junit.Test;
  64. import org.slf4j.Logger;
  65. import org.slf4j.LoggerFactory;
  66. /**
  67. * Test vectorized expression and filter evaluation for strings.
  68. */
  69. public class TestVectorStringExpressions {
  70. private static final Logger LOG = LoggerFactory
  71. .getLogger(TestVectorStringExpressions.class);
  72. private static byte[] red;
  73. private static byte[] redred;
  74. private static byte[] red2; // second copy of red, different object
  75. private static byte[] green;
  76. private static byte[] greenred;
  77. private static byte[] redgreen;
  78. private static byte[] greengreen;
  79. private static byte[] blue;
  80. private static byte[] emptyString;
  81. private static byte[] mixedUp;
  82. private static byte[] mixedUpLower;
  83. private static byte[] mixedUpUpper;
  84. private static byte[] multiByte;
  85. private static byte[] mixPercentPattern;
  86. private static byte[] blanksLeft;
  87. private static byte[] blanksRight;
  88. private static byte[] blanksBoth;
  89. private static byte[] blankString;
  90. private static byte[] blankRanges;
  91. private static byte[] ascii_sentence;
  92. static {
  93. blue = "blue".getBytes(StandardCharsets.UTF_8);
  94. red = "red".getBytes(StandardCharsets.UTF_8);
  95. redred = "redred".getBytes(StandardCharsets.UTF_8);
  96. green = "green".getBytes(StandardCharsets.UTF_8);
  97. greenred = "greenred".getBytes(StandardCharsets.UTF_8);
  98. redgreen = "redgreen".getBytes(StandardCharsets.UTF_8);
  99. greengreen = "greengreen".getBytes(StandardCharsets.UTF_8);
  100. emptyString = "".getBytes(StandardCharsets.UTF_8);
  101. mixedUp = "mixedUp".getBytes(StandardCharsets.UTF_8);
  102. mixedUpLower = "mixedup".getBytes(StandardCharsets.UTF_8);
  103. mixedUpUpper = "MIXEDUP".getBytes(StandardCharsets.UTF_8);
  104. // for use as wildcard pattern to test LIKE
  105. mixPercentPattern = "mix%".getBytes(StandardCharsets.UTF_8);
  106. multiByte = new byte[10];
  107. addMultiByteChars(multiByte);
  108. blanksLeft = " foo".getBytes(StandardCharsets.UTF_8);
  109. blanksRight = "foo ".getBytes(StandardCharsets.UTF_8);
  110. blanksBoth = " foo ".getBytes(StandardCharsets.UTF_8);
  111. blankString = " ".getBytes(StandardCharsets.UTF_8);
  112. blankRanges =
  113. " more than a bargain ".getBytes(StandardCharsets.UTF_8);
  114. // 012345678901234567890123456789
  115. ascii_sentence =
  116. "The fox trotted over the fence.".getBytes(StandardCharsets.UTF_8);
  117. // 0123456789012345678901234567890
  118. red2 = new byte[red.length];
  119. System.arraycopy(red, 0, red2, 0, red.length);
  120. }
  121. // add some multi-byte characters to test length routine later.
  122. // total characters = 4; byte length = 10
  123. static void addMultiByteChars(byte[] b) {
  124. int i = 0;
  125. b[i++] = (byte) 0x41; // letter "A" (1 byte)
  126. b[i++] = (byte) 0xC3; // Latin capital A with grave (2 bytes)
  127. b[i++] = (byte) 0x80;
  128. b[i++] = (byte) 0xE2; // Euro sign (3 bytes)
  129. b[i++] = (byte) 0x82;
  130. b[i++] = (byte) 0xAC;
  131. b[i++] = (byte) 0xF0; // Asian character U+24B62 (4 bytes)
  132. b[i++] = (byte) 0xA4;
  133. b[i++] = (byte) 0xAD;
  134. b[i++] = (byte) 0xA2;
  135. }
  136. //-------------------------------------------------------------
  137. // total characters = 2; byte length = 3
  138. static void addMultiByteCharLeftPadded1_1(byte[] b) {
  139. int i = 0;
  140. b[i++] = (byte) 0x20; // blank " " (1 byte)
  141. b[i++] = (byte) 0xD0; // Cyrillic Capital DJE U+402 (2 bytes)
  142. b[i++] = (byte) 0x82;
  143. }
  144. // total characters = 3; byte length = 9
  145. static void addMultiByteCharLeftPadded1_2(byte[] b) {
  146. int i = 0;
  147. b[i++] = (byte) 0x20; // blank " " (1 byte)
  148. b[i++] = (byte) 0xF0; // Smiling Face with Open Mouth and Smiling Eyes U+1F604 (4 bytes)
  149. b[i++] = (byte) 0x9F;
  150. b[i++] = (byte) 0x98;
  151. b[i++] = (byte) 0x84;
  152. b[i++] = (byte) 0xF0; // Grimacing Face U+1F62C (4 bytes)
  153. b[i++] = (byte) 0x9F;
  154. b[i++] = (byte) 0x98;
  155. b[i++] = (byte) 0xAC;
  156. }
  157. // total characters = 4; byte length = 6
  158. static void addMultiByteCharLeftPadded3_1(byte[] b) {
  159. int i = 0;
  160. b[i++] = (byte) 0x20; // blank " " (1 byte)
  161. b[i++] = (byte) 0x20; // blank " " (1 byte)
  162. b[i++] = (byte) 0x20; // blank " " (1 byte)
  163. b[i++] = (byte) 0xE4; // Asian character U+4824 (3 bytes)
  164. b[i++] = (byte) 0xA0;
  165. b[i++] = (byte) 0xA4;
  166. }
  167. //-------------------------------------------------------------
  168. // total characters = 2; byte length = 4
  169. static void addMultiByteCharRightPadded1_1(byte[] b) {
  170. int i = 0;
  171. b[i++] = (byte) 0xE0; // Tamil Om U+0BD0 (3 bytes)
  172. b[i++] = (byte) 0xAF;
  173. b[i++] = (byte) 0x90;
  174. b[i++] = (byte) 0x20; // blank " " (1 byte)
  175. }
  176. // total characters = 3; byte length = 5
  177. static void addMultiByteCharRightPadded1_2(byte[] b) {
  178. int i = 0;
  179. b[i++] = (byte) 0xEA; // Va Syllable MEE U+A521 (3 bytes)
  180. b[i++] = (byte) 0x94;
  181. b[i++] = (byte) 0xA1;
  182. b[i++] = (byte) 0x5A; // Latin Capital Letter Z U+005A (1 bytes)
  183. b[i++] = (byte) 0x20; // blank " " (1 byte)
  184. }
  185. // total characters = 4; byte length = 9
  186. static void addMultiByteCharRightPadded1_3(byte[] b) {
  187. int i = 0;
  188. b[i++] = (byte) 0xCC; // COMBINING ACUTE ACENT U+0301 (2 bytes)
  189. b[i++] = (byte) 0x81;
  190. b[i++] = (byte) 0xE0; // DEVENAGARI LETTER KA U+0915 (3 bytes)
  191. b[i++] = (byte) 0xA4;
  192. b[i++] = (byte) 0x95;
  193. b[i++] = (byte) 0xE0; // DEVENAGARI SIGN VIRAMA U+094D (3 bytes)
  194. b[i++] = (byte) 0xA5;
  195. b[i++] = (byte) 0x8D;
  196. b[i++] = (byte) 0x20; // blank " " (1 byte)
  197. }
  198. // total characters = 10; byte length = 26
  199. static int addMultiByteCharSentenceOne(byte[] b, int start) {
  200. int i = start;
  201. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER THA U+1992 (3 bytes)
  202. b[i++] = (byte) 0xA6;
  203. b[i++] = (byte) 0x92;
  204. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW XA U+1986 (3 bytes)
  205. b[i++] = (byte) 0xA6;
  206. b[i++] = (byte) 0x86;
  207. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER HIGH MA U+1996 (3 bytes)
  208. b[i++] = (byte) 0xA6;
  209. b[i++] = (byte) 0x96;
  210. b[i++] = (byte) 0x20; // blank " " (1 byte)
  211. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW QA U+1981 (3 bytes)
  212. b[i++] = (byte) 0xA6;
  213. b[i++] = (byte) 0x81;
  214. b[i++] = (byte) 0x20; // blank " " (1 byte)
  215. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW BA U+19A5 (3 bytes)
  216. b[i++] = (byte) 0xA6;
  217. b[i++] = (byte) 0xA5;
  218. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER HIGH LA U+199C (3 bytes)
  219. b[i++] = (byte) 0xA6;
  220. b[i++] = (byte) 0x9C;
  221. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW KVA U+19A8 (3 bytes)
  222. b[i++] = (byte) 0xA6;
  223. b[i++] = (byte) 0xA8;
  224. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW FA U+199D (3 bytes)
  225. b[i++] = (byte) 0xA6;
  226. b[i++] = (byte) 0x9D;
  227. return i;
  228. }
  229. // total characters = 13; byte length = 24
  230. static int addMultiByteCharSentenceTwo(byte[] b, int start) {
  231. int i = start;
  232. b[i++] = (byte) 0xC9; // LATIN SMALL LETTER TURNED A U+0250 (2 bytes)
  233. b[i++] = (byte) 0x90;
  234. b[i++] = (byte) 0xC9; // LATIN SMALL LETTER GAMMA U+0263 (2 bytes)
  235. b[i++] = (byte) 0xA3;
  236. b[i++] = (byte) 0xC9; // LATIN SMALL LETTER TURNED M U+026F (2 bytes)
  237. b[i++] = (byte) 0xAF;
  238. b[i++] = (byte) 0xCA; // LATIN SMALL LETTER S WITH HOOK U+0282 (2 bytes)
  239. b[i++] = (byte) 0x82;
  240. b[i++] = (byte) 0x20; // blank " " (1 byte)
  241. b[i++] = (byte) 0xCA; // LATIN LETTER SMALL CAPITAL L U+029F (2 bytes)
  242. b[i++] = (byte) 0x9F;
  243. b[i++] = (byte) 0xCB; // MODIFIER LETTER TRIANGULAR COLON U+02D0 (2 bytes)
  244. b[i++] = (byte) 0x90;
  245. b[i++] = (byte) 0x20; // blank " " (1 byte)
  246. b[i++] = (byte) 0xCB; // RING ABOVE U+02DA (2 bytes)
  247. b[i++] = (byte) 0x9A;
  248. b[i++] = (byte) 0xCB; // MODIFIER LETTER SMALL L U+02E1 (2 bytes)
  249. b[i++] = (byte) 0xA1;
  250. b[i++] = (byte) 0xCB; // MODIFIER LETTER SMALL X U+02E3 (2 bytes)
  251. b[i++] = (byte) 0xA3;
  252. b[i++] = (byte) 0xCB; // MODIFIER LETTER UP ARROWHEAD U+02C4 (2 bytes)
  253. b[i++] = (byte) 0x84;
  254. b[i++] = (byte) 0x2E; // FULL STOP "." (1 byte)
  255. return i;
  256. }
  257. // total characters = 17; byte length = 30
  258. static int addMultiByteCharSentenceBlankRanges(byte[] b, int start) {
  259. int i = start;
  260. b[i++] = (byte) 0xF0; // INSCRIPTIONAL YODH U+10B49 (4 bytes)
  261. b[i++] = (byte) 0x90;
  262. b[i++] = (byte) 0xAD;
  263. b[i++] = (byte) 0x89;
  264. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW FA U+199D (3 bytes)
  265. b[i++] = (byte) 0xA6;
  266. b[i++] = (byte) 0x9D;
  267. b[i++] = (byte) 0x20; // blank " " (1 byte)
  268. b[i++] = (byte) 0x20; // blank " " (1 byte)
  269. b[i++] = (byte) 0x20; // blank " " (1 byte)
  270. b[i++] = (byte) 0x2D; // hyphen-minus "-" U-002D (1 byte)
  271. b[i++] = (byte) 0x20; // blank " " (1 byte)
  272. b[i++] = (byte) 0x60; // grave accent "-" U-0060 (1 byte)
  273. b[i++] = (byte) 0xE2; // BLACK SUN WITH RAYS U+2600 (3 bytes)
  274. b[i++] = (byte) 0x98;
  275. b[i++] = (byte) 0x80;
  276. b[i++] = (byte) 0xE2; // BALLOT BOX WITH X U+2612 (3 bytes)
  277. b[i++] = (byte) 0x98;
  278. b[i++] = (byte) 0x92;
  279. b[i++] = (byte) 0x20; // blank " " (1 byte)
  280. b[i++] = (byte) 0x20; // blank " " (1 byte)
  281. b[i++] = (byte) 0x20; // blank " " (1 byte)
  282. b[i++] = (byte) 0x20; // blank " " (1 byte)
  283. b[i++] = (byte) 0x20; // blank " " (1 byte)
  284. b[i++] = (byte) 0xE2; // WHITE START U+2606 (3 bytes)
  285. b[i++] = (byte) 0x98;
  286. b[i++] = (byte) 0x86;
  287. b[i++] = (byte) 0xE2; // WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE U+26FF (3 bytes)
  288. b[i++] = (byte) 0x9B;
  289. b[i++] = (byte) 0xBF;
  290. return i;
  291. }
  292. static int addPads(byte[] b, int start, int count) {
  293. int i = start;
  294. int end = start + count;
  295. for ( ; i < end; i++) {
  296. b[i] = (byte) 0x20; // blank " " (1 byte)
  297. }
  298. return i;
  299. }
  300. private HiveConf hiveConf = new HiveConf();
  301. private boolean vectorEqual(BytesColumnVector vector, int i, byte[] bytes, int offset, int length) {
  302. byte[] bytesSlice = new byte[length];
  303. System.arraycopy(bytes, offset, bytesSlice, 0, length);
  304. int vectorLength = vector.length[i];
  305. byte[] vectorSlice = new byte[vectorLength];
  306. System.arraycopy(vector.vector[i], vector.start[i], vectorSlice, 0, vectorLength);
  307. boolean equals = Arrays.equals(bytesSlice, vectorSlice);
  308. if (!equals) {
  309. System.out.println("vectorEqual offset " + offset + " length " + length + " vectorSlice.length " + vectorSlice.length);
  310. System.out.println("vectorEqual bytesSlice " + Hex.encodeHexString(bytesSlice));
  311. System.out.println("vectorEqual vectorSlice " + Hex.encodeHexString(vectorSlice));
  312. }
  313. return equals;
  314. }
  315. private int vectorCharacterCount(BytesColumnVector vector, int i) {
  316. return StringExpr.characterCount(vector.vector[i], vector.start[i], vector.length[i]);
  317. }
  318. @Test
  319. // Test basic assign to vector.
  320. public void testAssignBytesColumnVector() {
  321. BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  322. outV.initBuffer(35); // initialize with estimated element size 35
  323. int i = 0;
  324. int expectedResultLen;
  325. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  326. StringExpr.assign(outV, i, blue, 0, blue.length);
  327. expectedResultLen = blue.length;
  328. Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
  329. i++;
  330. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  331. StringExpr.assign(outV, i, redgreen, 0, redgreen.length);
  332. expectedResultLen = redgreen.length;
  333. Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
  334. i++;
  335. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  336. StringExpr.assign(outV, i, ascii_sentence, 0, ascii_sentence.length);
  337. expectedResultLen = ascii_sentence.length;
  338. Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
  339. i++;
  340. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  341. StringExpr.assign(outV, i, blanksLeft, 0, blanksLeft.length);
  342. expectedResultLen = blanksLeft.length;
  343. Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
  344. i++;
  345. // Multi-byte characters with blank ranges.
  346. byte[] sentenceBlankRanges = new byte[100];
  347. int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  348. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  349. StringExpr.assign(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen);
  350. expectedResultLen = sentenceBlankRangesLen;
  351. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  352. i++;
  353. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  354. StringExpr.assign(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3);
  355. expectedResultLen = sentenceBlankRangesLen - 3;
  356. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  357. i++;
  358. // Some non-zero offsets.
  359. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 4, sentenceBlankRangesLen - 4) == 16);
  360. StringExpr.assign(outV, i, sentenceBlankRanges, 4, sentenceBlankRangesLen - 4);
  361. expectedResultLen = sentenceBlankRangesLen - 4;
  362. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 4, expectedResultLen));
  363. Assert.assertTrue(vectorCharacterCount(outV, i) == 16);
  364. i++;
  365. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  366. StringExpr.assign(outV, i, sentenceBlankRanges, 7, 17);
  367. expectedResultLen = 17;
  368. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen));
  369. Assert.assertTrue(vectorCharacterCount(outV, i) == 13);
  370. i++;
  371. }
  372. @Test
  373. // Test basic right trim of bytes slice.
  374. public void testRightTrimBytesSlice() {
  375. int resultLen;
  376. // Nothing to trim (ASCII).
  377. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  378. resultLen = StringExpr.rightTrim(blue, 0, blue.length);
  379. Assert.assertTrue(resultLen == blue.length);
  380. Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
  381. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  382. resultLen = StringExpr.rightTrim(redgreen, 0, redgreen.length);
  383. Assert.assertTrue(resultLen == redgreen.length);
  384. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  385. resultLen = StringExpr.rightTrim(ascii_sentence, 0, ascii_sentence.length);
  386. Assert.assertTrue(resultLen == ascii_sentence.length);
  387. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  388. resultLen = StringExpr.rightTrim(blanksLeft, 0, blanksLeft.length);
  389. Assert.assertTrue(resultLen == blanksLeft.length);
  390. // Simple trims.
  391. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  392. resultLen = StringExpr.rightTrim(blanksRight, 0, blanksRight.length);
  393. Assert.assertTrue(resultLen == 3);
  394. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 3);
  395. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  396. resultLen = StringExpr.rightTrim(blanksBoth, 0, blanksBoth.length);
  397. Assert.assertTrue(resultLen == 5);
  398. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 5);
  399. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  400. resultLen = StringExpr.rightTrim(blankString, 0, blankString.length);
  401. Assert.assertTrue(resultLen == 0);
  402. Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 0);
  403. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  404. resultLen = StringExpr.rightTrim(blankRanges, 0, blankRanges.length);
  405. Assert.assertTrue(resultLen == blankRanges.length - 4);
  406. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 26);
  407. // Offset trims.
  408. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  409. resultLen = StringExpr.rightTrim(blanksRight, 1, blanksRight.length - 1);
  410. Assert.assertTrue(resultLen == 2);
  411. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 2);
  412. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  413. resultLen = StringExpr.rightTrim(blanksBoth, 4, blanksBoth.length - 4);
  414. Assert.assertTrue(resultLen == 1);
  415. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 1);
  416. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  417. resultLen = StringExpr.rightTrim(blanksBoth, 5, blanksBoth.length -5 );
  418. Assert.assertTrue(resultLen == 0);
  419. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 0);
  420. Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
  421. resultLen = StringExpr.rightTrim(blankString, 1, blankString.length - 1);
  422. Assert.assertTrue(resultLen == 0);
  423. Assert.assertTrue(StringExpr.characterCount(blankString, 1, resultLen) == 0);
  424. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
  425. resultLen = StringExpr.rightTrim(blankRanges, 4, blankRanges.length - 4);
  426. Assert.assertTrue(resultLen == blankRanges.length - 4 -4);
  427. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22);
  428. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  429. resultLen = StringExpr.rightTrim(blankRanges, 6, blankRanges.length- 6);
  430. Assert.assertTrue(resultLen == blankRanges.length - 6 - 4);
  431. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 20);
  432. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
  433. resultLen = StringExpr.rightTrim(blankRanges, 7, blankRanges.length - 7);
  434. Assert.assertTrue(resultLen == blankRanges.length - 7 - 4);
  435. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 19);
  436. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
  437. resultLen = StringExpr.rightTrim(blankRanges, 7, 8 - 7);
  438. Assert.assertTrue(resultLen == 0);
  439. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 0);
  440. // Multi-byte trims.
  441. byte[] multiByte = new byte[100];
  442. addMultiByteCharRightPadded1_1(multiByte);
  443. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  444. resultLen = StringExpr.rightTrim(multiByte, 0, 4);
  445. Assert.assertTrue(resultLen == 3);
  446. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1);
  447. addMultiByteCharRightPadded1_2(multiByte);
  448. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  449. resultLen = StringExpr.rightTrim(multiByte, 0, 5);
  450. Assert.assertTrue(resultLen == 4);
  451. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2);
  452. addMultiByteCharRightPadded1_3(multiByte);
  453. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  454. resultLen = StringExpr.rightTrim(multiByte, 0, 9);
  455. Assert.assertTrue(resultLen == 8);
  456. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 3);
  457. addMultiByteCharRightPadded1_1(multiByte);
  458. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
  459. resultLen = StringExpr.rightTrim(multiByte, 3, 1);
  460. Assert.assertTrue(resultLen == 0);
  461. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 0);
  462. addMultiByteCharRightPadded1_2(multiByte);
  463. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
  464. resultLen = StringExpr.rightTrim(multiByte, 3, 2);
  465. Assert.assertTrue(resultLen == 1);
  466. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1);
  467. byte[] sentenceOne = new byte[100];
  468. int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
  469. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
  470. resultLen = StringExpr.rightTrim(sentenceOne, 0, sentenceOneLen);
  471. Assert.assertTrue(resultLen == sentenceOneLen);
  472. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
  473. resultLen = StringExpr.rightTrim(sentenceOne, 0, sentenceOneLen - 3);
  474. Assert.assertTrue(resultLen == sentenceOneLen - 3);
  475. byte[] sentenceTwo = new byte[100];
  476. int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
  477. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
  478. resultLen = StringExpr.rightTrim(sentenceTwo, 0, sentenceTwoLen);
  479. Assert.assertTrue(resultLen == sentenceTwoLen);
  480. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
  481. resultLen = StringExpr.rightTrim(sentenceTwo, 0, sentenceTwoLen - 5);
  482. Assert.assertTrue(resultLen == sentenceTwoLen - 5);
  483. int start;
  484. // Left pad longer strings with multi-byte characters.
  485. byte[] sentenceOnePaddedLeft = new byte[100];
  486. start = addPads(sentenceOnePaddedLeft, 0, 3);
  487. int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
  488. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
  489. resultLen = StringExpr.rightTrim(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen);
  490. Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen);
  491. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
  492. resultLen = StringExpr.rightTrim(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3);
  493. Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen - 3);
  494. byte[] sentenceTwoPaddedLeft = new byte[100];
  495. start = addPads(sentenceTwoPaddedLeft, 0, 2);
  496. int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
  497. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
  498. resultLen = StringExpr.rightTrim(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen);
  499. Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen);
  500. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
  501. resultLen = StringExpr.rightTrim(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5);
  502. Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen - 5);
  503. // Right pad longer strings with multi-byte characters.
  504. byte[] sentenceOnePaddedRight = new byte[100];
  505. start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
  506. int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
  507. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
  508. resultLen = StringExpr.rightTrim(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen);
  509. Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 4);
  510. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
  511. resultLen = StringExpr.rightTrim(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4);
  512. Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 3 - 4);
  513. byte[] sentenceTwoPaddedRight = new byte[100];
  514. start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
  515. int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
  516. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
  517. resultLen = StringExpr.rightTrim(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen);
  518. Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 1);
  519. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
  520. resultLen = StringExpr.rightTrim(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1);
  521. Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 5 - 1);
  522. // Multi-byte characters with blank ranges.
  523. byte[] sentenceBlankRanges = new byte[100];
  524. int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  525. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  526. resultLen = StringExpr.rightTrim(sentenceBlankRanges, 0, sentenceBlankRangesLen);
  527. Assert.assertTrue(resultLen == sentenceBlankRangesLen);
  528. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  529. resultLen = StringExpr.rightTrim(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3);
  530. Assert.assertTrue(resultLen == sentenceBlankRangesLen - 3);
  531. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  532. resultLen = StringExpr.rightTrim(sentenceBlankRanges, 7, 17);
  533. Assert.assertTrue(resultLen == 12);
  534. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 8);
  535. }
  536. @Test
  537. // Test basic right trim to vector.
  538. public void testRightTrimBytesColumnVector() {
  539. BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  540. outV.initBuffer(30); // initialize with estimated element size 35
  541. int i = 0;
  542. int expectedResultLen;
  543. // Nothing to trim (ASCII).
  544. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  545. StringExpr.rightTrim(outV, i, blue, 0, blue.length);
  546. expectedResultLen = blue.length;
  547. Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
  548. Assert.assertTrue(vectorCharacterCount(outV, i) == 4);
  549. i++;
  550. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  551. StringExpr.rightTrim(outV, i, redgreen, 0, redgreen.length);
  552. expectedResultLen = redgreen.length;
  553. Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
  554. i++;
  555. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  556. StringExpr.rightTrim(outV, i, ascii_sentence, 0, ascii_sentence.length);
  557. expectedResultLen = ascii_sentence.length;
  558. Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
  559. i++;
  560. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  561. StringExpr.rightTrim(outV, i, blanksLeft, 0, blanksLeft.length);
  562. expectedResultLen = blanksLeft.length;
  563. Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
  564. i++;
  565. // Simple trims.
  566. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  567. StringExpr.rightTrim(outV, i, blanksRight, 0, blanksRight.length);
  568. expectedResultLen = 3;
  569. Assert.assertTrue(vectorEqual(outV, i, blanksRight, 0, expectedResultLen));
  570. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  571. i++;
  572. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  573. StringExpr.rightTrim(outV, i, blanksBoth, 0, blanksBoth.length);
  574. expectedResultLen = 5;
  575. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen));
  576. Assert.assertTrue(vectorCharacterCount(outV, i) == 5);
  577. i++;
  578. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  579. StringExpr.rightTrim(outV, i, blankString, 0, blankString.length);
  580. expectedResultLen = 0;
  581. Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen));
  582. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  583. i++;
  584. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  585. StringExpr.rightTrim(outV, i, blankRanges, 0, blankRanges.length);
  586. expectedResultLen = blankRanges.length - 4;
  587. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen));
  588. Assert.assertTrue(vectorCharacterCount(outV, i) == 26);
  589. i++;
  590. // Offset trims.
  591. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  592. StringExpr.rightTrim(outV, i, blanksRight, 1, blanksRight.length - 1);
  593. expectedResultLen = 2;
  594. Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen));
  595. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  596. i++;
  597. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  598. StringExpr.rightTrim(outV, i, blanksBoth, 4, blanksBoth.length - 4);
  599. expectedResultLen = 1;
  600. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen));
  601. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  602. i++;
  603. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  604. StringExpr.rightTrim(outV, i, blanksBoth, 5, blanksBoth.length -5 );
  605. expectedResultLen = 0;
  606. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen));
  607. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  608. i++;
  609. Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
  610. StringExpr.rightTrim(outV, i, blankString, 1, blankString.length - 1);
  611. expectedResultLen = 0;
  612. Assert.assertTrue(vectorEqual(outV, i, blankString, 1, expectedResultLen));
  613. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  614. i++;
  615. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
  616. StringExpr.rightTrim(outV, i, blankRanges, 4, blankRanges.length - 4);
  617. expectedResultLen = blankRanges.length - 4 -4;
  618. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 4, expectedResultLen));
  619. Assert.assertTrue(vectorCharacterCount(outV, i) == 22);
  620. i++;
  621. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  622. StringExpr.rightTrim(outV, i, blankRanges, 6, blankRanges.length- 6);
  623. expectedResultLen = blankRanges.length - 6 - 4;
  624. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen));
  625. Assert.assertTrue(vectorCharacterCount(outV, i) == 20);
  626. i++;
  627. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
  628. StringExpr.rightTrim(outV, i, blankRanges, 7, blankRanges.length - 7);
  629. expectedResultLen = blankRanges.length - 7 - 4;
  630. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
  631. Assert.assertTrue(vectorCharacterCount(outV, i) == 19);
  632. i++;
  633. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
  634. StringExpr.rightTrim(outV, i, blankRanges, 7, 8 - 7);
  635. expectedResultLen = 0;
  636. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
  637. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  638. i++;
  639. // Multi-byte trims.
  640. byte[] multiByte = new byte[100];
  641. addMultiByteCharRightPadded1_1(multiByte);
  642. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  643. StringExpr.rightTrim(outV, i, multiByte, 0, 4);
  644. expectedResultLen = 3;
  645. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  646. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  647. i++;
  648. addMultiByteCharRightPadded1_2(multiByte);
  649. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  650. StringExpr.rightTrim(outV, i, multiByte, 0, 5);
  651. expectedResultLen = 4;
  652. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  653. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  654. i++;
  655. addMultiByteCharRightPadded1_3(multiByte);
  656. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  657. StringExpr.rightTrim(outV, i, multiByte, 0, 9);
  658. expectedResultLen = 8;
  659. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  660. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  661. i++;
  662. addMultiByteCharRightPadded1_1(multiByte);
  663. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
  664. StringExpr.rightTrim(outV, i, multiByte, 3, 1);
  665. expectedResultLen = 0;
  666. Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
  667. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  668. i++;
  669. addMultiByteCharRightPadded1_2(multiByte);
  670. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
  671. StringExpr.rightTrim(outV, i, multiByte, 3, 2);
  672. expectedResultLen = 1;
  673. Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
  674. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  675. i++;
  676. byte[] sentenceOne = new byte[100];
  677. int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
  678. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
  679. StringExpr.rightTrim(outV, i, sentenceOne, 0, sentenceOneLen);
  680. expectedResultLen = sentenceOneLen;
  681. Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
  682. i++;
  683. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
  684. StringExpr.rightTrim(outV, i, sentenceOne, 0, sentenceOneLen - 3);
  685. expectedResultLen = sentenceOneLen - 3;
  686. Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
  687. i++;
  688. byte[] sentenceTwo = new byte[100];
  689. int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
  690. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
  691. StringExpr.rightTrim(outV, i, sentenceTwo, 0, sentenceTwoLen);
  692. expectedResultLen = sentenceTwoLen;
  693. Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
  694. i++;
  695. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
  696. StringExpr.rightTrim(outV, i, sentenceTwo, 0, sentenceTwoLen - 5);
  697. expectedResultLen = sentenceTwoLen - 5;
  698. Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
  699. i++;
  700. int start;
  701. // Left pad longer strings with multi-byte characters.
  702. byte[] sentenceOnePaddedLeft = new byte[100];
  703. start = addPads(sentenceOnePaddedLeft, 0, 3);
  704. int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
  705. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
  706. StringExpr.rightTrim(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen);
  707. expectedResultLen = sentenceOnePaddedLeftLen;
  708. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
  709. i++;
  710. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
  711. StringExpr.rightTrim(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3);
  712. expectedResultLen = sentenceOnePaddedLeftLen - 3;
  713. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
  714. i++;
  715. byte[] sentenceTwoPaddedLeft = new byte[100];
  716. start = addPads(sentenceTwoPaddedLeft, 0, 2);
  717. int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
  718. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
  719. StringExpr.rightTrim(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen);
  720. expectedResultLen = sentenceTwoPaddedLeftLen;
  721. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
  722. i++;
  723. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
  724. StringExpr.rightTrim(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5);
  725. expectedResultLen = sentenceTwoPaddedLeftLen - 5;
  726. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
  727. i++;
  728. // Right pad longer strings with multi-byte characters.
  729. byte[] sentenceOnePaddedRight = new byte[100];
  730. start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
  731. int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
  732. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
  733. StringExpr.rightTrim(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen);
  734. expectedResultLen = sentenceOnePaddedRightLen - 4;
  735. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
  736. i++;
  737. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
  738. StringExpr.rightTrim(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4);
  739. expectedResultLen = sentenceOnePaddedRightLen - 3 - 4;
  740. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
  741. i++;
  742. byte[] sentenceTwoPaddedRight = new byte[100];
  743. start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
  744. int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
  745. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
  746. StringExpr.rightTrim(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen);
  747. expectedResultLen = sentenceTwoPaddedRightLen - 1;
  748. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
  749. i++;
  750. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
  751. StringExpr.rightTrim(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1);
  752. expectedResultLen = sentenceTwoPaddedRightLen - 5 - 1;
  753. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
  754. i++;
  755. // Multi-byte characters with blank ranges.
  756. byte[] sentenceBlankRanges = new byte[100];
  757. int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  758. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  759. StringExpr.rightTrim(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen);
  760. expectedResultLen = sentenceBlankRangesLen;
  761. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  762. i++;
  763. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  764. StringExpr.rightTrim(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3);
  765. expectedResultLen = sentenceBlankRangesLen - 3;
  766. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  767. i++;
  768. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  769. StringExpr.rightTrim(outV, i, sentenceBlankRanges, 7, 17);
  770. expectedResultLen = 12;
  771. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen));
  772. Assert.assertTrue(vectorCharacterCount(outV, i) == 8);
  773. }
  774. @Test
  775. // Test basic truncate of bytes slice.
  776. public void testTruncateBytesSlice() {
  777. int largeMaxLength = 100;
  778. int resultLen;
  779. // No truncate (ASCII) -- maximum length large.
  780. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  781. resultLen = StringExpr.truncate(blue, 0, blue.length, largeMaxLength);
  782. Assert.assertTrue(resultLen == blue.length);
  783. Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
  784. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  785. resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, largeMaxLength);
  786. Assert.assertTrue(resultLen == redgreen.length);
  787. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  788. resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, largeMaxLength);
  789. Assert.assertTrue(resultLen == ascii_sentence.length);
  790. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  791. resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, largeMaxLength);
  792. Assert.assertTrue(resultLen == blanksLeft.length);
  793. // No truncate (ASCII) -- same maximum length.
  794. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  795. resultLen = StringExpr.truncate(blue, 0, blue.length, 4);
  796. Assert.assertTrue(resultLen == blue.length);
  797. Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
  798. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  799. resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, 8);
  800. Assert.assertTrue(resultLen == redgreen.length);
  801. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  802. resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, 31);
  803. Assert.assertTrue(resultLen == ascii_sentence.length);
  804. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  805. resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, 5);
  806. Assert.assertTrue(resultLen == blanksLeft.length);
  807. // Simple truncation.
  808. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  809. resultLen = StringExpr.truncate(blue, 0, blue.length, 3);
  810. Assert.assertTrue(resultLen == 3);
  811. Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 3);
  812. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  813. resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, 6);
  814. Assert.assertTrue(resultLen == 6);
  815. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  816. resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, 14);
  817. Assert.assertTrue(resultLen == 14);
  818. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  819. resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, 2);
  820. Assert.assertTrue(resultLen == 2);
  821. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  822. resultLen = StringExpr.truncate(blanksRight, 0, blanksRight.length, 4);
  823. Assert.assertTrue(resultLen == 4);
  824. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 4);
  825. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  826. resultLen = StringExpr.truncate(blanksBoth, 0, blanksBoth.length, 2);
  827. Assert.assertTrue(resultLen == 2);
  828. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 2);
  829. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  830. resultLen = StringExpr.truncate(blankString, 0, blankString.length, 1);
  831. Assert.assertTrue(resultLen == 1);
  832. Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 1);
  833. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  834. resultLen = StringExpr.truncate(blankRanges, 0, blankRanges.length, 29);
  835. Assert.assertTrue(resultLen == 29);
  836. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 29);
  837. // Offset truncation.
  838. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  839. resultLen = StringExpr.truncate(blanksRight, 1, blanksRight.length - 1, 3);
  840. Assert.assertTrue(resultLen == 3);
  841. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 3);
  842. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  843. resultLen = StringExpr.truncate(blanksBoth, 4, blanksBoth.length - 4, 2);
  844. Assert.assertTrue(resultLen == 2);
  845. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 2);
  846. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  847. resultLen = StringExpr.truncate(blanksBoth, 5, blanksBoth.length -5, 1);
  848. Assert.assertTrue(resultLen == 1);
  849. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 1);
  850. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
  851. resultLen = StringExpr.truncate(blankRanges, 4, blankRanges.length - 4, 22);
  852. Assert.assertTrue(resultLen == 22);
  853. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22);
  854. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  855. resultLen = StringExpr.truncate(blankRanges, 6, blankRanges.length- 6, 7);
  856. Assert.assertTrue(resultLen == 7);
  857. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 7);
  858. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
  859. resultLen = StringExpr.truncate(blankRanges, 7, blankRanges.length - 7, 20);
  860. Assert.assertTrue(resultLen == 20);
  861. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 20);
  862. // Multi-byte truncation.
  863. byte[] multiByte = new byte[100];
  864. addMultiByteCharRightPadded1_1(multiByte);
  865. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  866. resultLen = StringExpr.truncate(multiByte, 0, 4, 1);
  867. Assert.assertTrue(resultLen == 3);
  868. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1);
  869. addMultiByteCharRightPadded1_2(multiByte);
  870. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  871. resultLen = StringExpr.truncate(multiByte, 0, 5, 2);
  872. Assert.assertTrue(resultLen == 4);
  873. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2);
  874. addMultiByteCharRightPadded1_3(multiByte);
  875. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  876. resultLen = StringExpr.truncate(multiByte, 0, 9, 2);
  877. Assert.assertTrue(resultLen == 5);
  878. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2);
  879. addMultiByteCharRightPadded1_2(multiByte);
  880. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
  881. resultLen = StringExpr.truncate(multiByte, 3, 2, 1);
  882. Assert.assertTrue(resultLen == 1);
  883. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1);
  884. byte[] sentenceOne = new byte[100];
  885. int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
  886. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
  887. resultLen = StringExpr.truncate(sentenceOne, 0, sentenceOneLen, 8);
  888. Assert.assertTrue(resultLen == 20);
  889. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
  890. resultLen = StringExpr.truncate(sentenceOne, 0, sentenceOneLen - 3, 3);
  891. Assert.assertTrue(resultLen == 9);
  892. byte[] sentenceTwo = new byte[100];
  893. int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
  894. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
  895. resultLen = StringExpr.truncate(sentenceTwo, 0, sentenceTwoLen, 9);
  896. Assert.assertTrue(resultLen == 16);
  897. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
  898. resultLen = StringExpr.truncate(sentenceTwo, 0, sentenceTwoLen - 5, 6);
  899. Assert.assertTrue(resultLen == 11);
  900. int start;
  901. // Left pad longer strings with multi-byte characters.
  902. byte[] sentenceOnePaddedLeft = new byte[100];
  903. start = addPads(sentenceOnePaddedLeft, 0, 3);
  904. int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
  905. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
  906. resultLen = StringExpr.truncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, 4);
  907. Assert.assertTrue(resultLen == 6);
  908. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
  909. resultLen = StringExpr.truncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, 7);
  910. Assert.assertTrue(resultLen == 13);
  911. byte[] sentenceTwoPaddedLeft = new byte[100];
  912. start = addPads(sentenceTwoPaddedLeft, 0, 2);
  913. int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
  914. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
  915. resultLen = StringExpr.truncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, 14);
  916. Assert.assertTrue(resultLen == 24);
  917. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
  918. resultLen = StringExpr.truncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, 9);
  919. Assert.assertTrue(resultLen == 15);
  920. // Right pad longer strings with multi-byte characters.
  921. byte[] sentenceOnePaddedRight = new byte[100];
  922. start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
  923. int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
  924. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
  925. resultLen = StringExpr.truncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, 1);
  926. Assert.assertTrue(resultLen == 3);
  927. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
  928. resultLen = StringExpr.truncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, 5);
  929. Assert.assertTrue(resultLen == 13);
  930. byte[] sentenceTwoPaddedRight = new byte[100];
  931. start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
  932. int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
  933. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
  934. resultLen = StringExpr.truncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, 6);
  935. Assert.assertTrue(resultLen == 11);
  936. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
  937. resultLen = StringExpr.truncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, 8);
  938. Assert.assertTrue(resultLen == 14);
  939. // Multi-byte characters with blank ranges.
  940. byte[] sentenceBlankRanges = new byte[100];
  941. int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  942. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  943. resultLen = StringExpr.truncate(sentenceBlankRanges, 0, sentenceBlankRangesLen, 4);
  944. Assert.assertTrue(resultLen == 9);
  945. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  946. resultLen = StringExpr.truncate(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, 14);
  947. Assert.assertTrue(resultLen == 23);
  948. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  949. resultLen = StringExpr.truncate(sentenceBlankRanges, 7, 17, 11);
  950. Assert.assertTrue(resultLen == 15);
  951. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 11);
  952. }
  953. @Test
  954. // Test basic truncate to vector.
  955. public void testTruncateBytesColumnVector() {
  956. BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  957. outV.initBuffer(35); // initialize with estimated element size 35
  958. int i = 0;
  959. int largeMaxLength = 100;
  960. int expectedResultLen;
  961. // No truncate (ASCII) -- maximum length large.
  962. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  963. StringExpr.truncate(outV, i, blue, 0, blue.length, largeMaxLength);
  964. expectedResultLen = blue.length;
  965. Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
  966. Assert.assertTrue(vectorCharacterCount(outV, i) == 4);
  967. i++;
  968. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  969. StringExpr.truncate(outV, i, redgreen, 0, redgreen.length, largeMaxLength);
  970. expectedResultLen = redgreen.length;
  971. Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
  972. i++;
  973. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  974. StringExpr.truncate(outV, i, ascii_sentence, 0, ascii_sentence.length, largeMaxLength);
  975. expectedResultLen = ascii_sentence.length;
  976. Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
  977. i++;
  978. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  979. StringExpr.truncate(outV, i, blanksLeft, 0, blanksLeft.length, largeMaxLength);
  980. expectedResultLen = blanksLeft.length;
  981. Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
  982. i++;
  983. // No truncate (ASCII) -- same maximum length.
  984. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  985. StringExpr.truncate(outV, i, blue, 0, blue.length, 4);
  986. expectedResultLen = blue.length;
  987. Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
  988. Assert.assertTrue(vectorCharacterCount(outV, i) == 4);
  989. i++;
  990. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  991. StringExpr.truncate(outV, i, redgreen, 0, redgreen.length, 8);
  992. expectedResultLen = redgreen.length;
  993. Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
  994. i++;
  995. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  996. StringExpr.truncate(outV, i, ascii_sentence, 0, ascii_sentence.length, 31);
  997. expectedResultLen = ascii_sentence.length;
  998. Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
  999. i++;
  1000. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  1001. StringExpr.truncate(outV, i, blanksLeft, 0, blanksLeft.length, 5);
  1002. expectedResultLen = blanksLeft.length;
  1003. Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
  1004. i++;
  1005. // Simple truncation.
  1006. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  1007. StringExpr.truncate(outV, i, blue, 0, blue.length, 3);
  1008. expectedResultLen = 3;
  1009. Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
  1010. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  1011. i++;
  1012. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  1013. StringExpr.truncate(outV, i, redgreen, 0, redgreen.length, 6);
  1014. expectedResultLen = 6;
  1015. Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
  1016. i++;
  1017. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  1018. StringExpr.truncate(outV, i, ascii_sentence, 0, ascii_sentence.length, 14);
  1019. expectedResultLen = 14;
  1020. Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
  1021. i++;
  1022. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  1023. StringExpr.truncate(outV, i, blanksLeft, 0, blanksLeft.length, 2);
  1024. expectedResultLen = 2;
  1025. Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
  1026. i++;
  1027. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  1028. StringExpr.truncate(outV, i, blanksRight, 0, blanksRight.length, 4);
  1029. expectedResultLen = 4;
  1030. Assert.assertTrue(vectorCharacterCount(outV, i) == 4);
  1031. i++;
  1032. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  1033. StringExpr.truncate(outV, i, blanksBoth, 0, blanksBoth.length, 2);
  1034. expectedResultLen = 2;
  1035. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen));
  1036. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  1037. i++;
  1038. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  1039. StringExpr.truncate(outV, i, blankString, 0, blankString.length, 1);
  1040. expectedResultLen = 1;
  1041. Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen));
  1042. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  1043. i++;
  1044. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  1045. StringExpr.truncate(outV, i, blankRanges, 0, blankRanges.length, 29);
  1046. expectedResultLen = 29;
  1047. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen));
  1048. Assert.assertTrue(vectorCharacterCount(outV, i) == 29);
  1049. i++;
  1050. // Offset truncation.
  1051. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  1052. StringExpr.truncate(outV, i, blanksRight, 1, blanksRight.length - 1, 3);
  1053. expectedResultLen = 3;
  1054. Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen));
  1055. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  1056. i++;
  1057. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  1058. StringExpr.truncate(outV, i, blanksBoth, 4, blanksBoth.length - 4, 2);
  1059. expectedResultLen = 2;
  1060. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen));
  1061. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  1062. i++;
  1063. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  1064. StringExpr.truncate(outV, i, blanksBoth, 5, blanksBoth.length -5, 1);
  1065. expectedResultLen = 1;
  1066. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen));
  1067. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  1068. i++;
  1069. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
  1070. StringExpr.truncate(outV, i, blankRanges, 4, blankRanges.length - 4, 22);
  1071. expectedResultLen = 22;
  1072. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 4, expectedResultLen));
  1073. Assert.assertTrue(vectorCharacterCount(outV, i) == 22);
  1074. i++;
  1075. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  1076. StringExpr.truncate(outV, i, blankRanges, 6, blankRanges.length- 6, 7);
  1077. expectedResultLen = 7;
  1078. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen));
  1079. Assert.assertTrue(vectorCharacterCount(outV, i) == 7);
  1080. i++;
  1081. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
  1082. StringExpr.truncate(outV, i, blankRanges, 7, blankRanges.length - 7, 20);
  1083. expectedResultLen = 20;
  1084. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
  1085. Assert.assertTrue(vectorCharacterCount(outV, i) == 20);
  1086. i++;
  1087. // Multi-byte truncation.
  1088. byte[] multiByte = new byte[100];
  1089. addMultiByteCharRightPadded1_1(multiByte);
  1090. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  1091. StringExpr.truncate(outV, i, multiByte, 0, 4, 1);
  1092. expectedResultLen = 3;
  1093. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  1094. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  1095. i++;
  1096. addMultiByteCharRightPadded1_2(multiByte);
  1097. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  1098. StringExpr.truncate(outV, i, multiByte, 0, 5, 2);
  1099. expectedResultLen = 4;
  1100. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  1101. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  1102. i++;
  1103. addMultiByteCharRightPadded1_3(multiByte);
  1104. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  1105. StringExpr.truncate(outV, i, multiByte, 0, 9, 2);
  1106. expectedResultLen = 5;
  1107. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  1108. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  1109. i++;
  1110. addMultiByteCharRightPadded1_2(multiByte);
  1111. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
  1112. StringExpr.truncate(outV, i, multiByte, 3, 2, 1);
  1113. expectedResultLen = 1;
  1114. Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
  1115. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  1116. i++;
  1117. byte[] sentenceOne = new byte[100];
  1118. int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
  1119. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
  1120. StringExpr.truncate(outV, i, sentenceOne, 0, sentenceOneLen, 8);
  1121. expectedResultLen = 20;
  1122. Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
  1123. i++;
  1124. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
  1125. StringExpr.truncate(outV, i, sentenceOne, 0, sentenceOneLen - 3, 3);
  1126. expectedResultLen = 9;
  1127. Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
  1128. i++;
  1129. byte[] sentenceTwo = new byte[100];
  1130. int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
  1131. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
  1132. StringExpr.truncate(outV, i, sentenceTwo, 0, sentenceTwoLen, 9);
  1133. expectedResultLen = 16;
  1134. Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
  1135. i++;
  1136. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
  1137. StringExpr.truncate(outV, i, sentenceTwo, 0, sentenceTwoLen - 5, 6);
  1138. expectedResultLen = 11;
  1139. Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
  1140. i++;
  1141. int start;
  1142. // Left pad longer strings with multi-byte characters.
  1143. byte[] sentenceOnePaddedLeft = new byte[100];
  1144. start = addPads(sentenceOnePaddedLeft, 0, 3);
  1145. int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
  1146. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
  1147. StringExpr.truncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, 4);
  1148. expectedResultLen = 6;
  1149. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
  1150. i++;
  1151. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
  1152. StringExpr.truncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, 7);
  1153. expectedResultLen = 13;
  1154. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
  1155. i++;
  1156. byte[] sentenceTwoPaddedLeft = new byte[100];
  1157. start = addPads(sentenceTwoPaddedLeft, 0, 2);
  1158. int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
  1159. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
  1160. StringExpr.truncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, 14);
  1161. expectedResultLen = 24;
  1162. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
  1163. i++;
  1164. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
  1165. StringExpr.truncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, 9);
  1166. expectedResultLen = 15;
  1167. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
  1168. i++;
  1169. // Right pad longer strings with multi-byte characters.
  1170. byte[] sentenceOnePaddedRight = new byte[100];
  1171. start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
  1172. int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
  1173. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
  1174. StringExpr.truncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, 1);
  1175. expectedResultLen = 3;
  1176. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
  1177. i++;
  1178. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
  1179. StringExpr.truncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, 5);
  1180. expectedResultLen = 13;
  1181. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
  1182. i++;
  1183. byte[] sentenceTwoPaddedRight = new byte[100];
  1184. start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
  1185. int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
  1186. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
  1187. StringExpr.truncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, 6);
  1188. expectedResultLen = 11;
  1189. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
  1190. i++;
  1191. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
  1192. StringExpr.truncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, 8);
  1193. expectedResultLen = 14;
  1194. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
  1195. i++;
  1196. // Multi-byte characters with blank ranges.
  1197. byte[] sentenceBlankRanges = new byte[100];
  1198. int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  1199. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  1200. StringExpr.truncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen, 4);
  1201. expectedResultLen = 9;
  1202. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  1203. i++;
  1204. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  1205. StringExpr.truncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, 14);
  1206. expectedResultLen = 23;
  1207. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges,0, expectedResultLen));
  1208. i++;
  1209. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  1210. StringExpr.truncate(outV, i, sentenceBlankRanges, 7, 17, 11);
  1211. expectedResultLen = 15;
  1212. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen));
  1213. Assert.assertTrue(vectorCharacterCount(outV, i) == 11);
  1214. i++;
  1215. }
  1216. @Test
  1217. // Test basic truncate to vector.
  1218. public void testTruncateScalar() {
  1219. int largeMaxLength = 100;
  1220. byte[] result;
  1221. // No truncate (ASCII) -- maximum length large.
  1222. Assert.assertTrue(StringExpr.characterCount(blue) == 4);
  1223. result = StringExpr.truncateScalar(blue, largeMaxLength);
  1224. Assert.assertTrue(Arrays.equals(blue, result));
  1225. Assert.assertTrue(StringExpr.characterCount(redgreen) == 8);
  1226. result = StringExpr.truncateScalar(redgreen, largeMaxLength);
  1227. Assert.assertTrue(Arrays.equals(redgreen, result));
  1228. Assert.assertTrue(StringExpr.characterCount(ascii_sentence) == 31);
  1229. result = StringExpr.truncateScalar(ascii_sentence, largeMaxLength);
  1230. Assert.assertTrue(Arrays.equals(ascii_sentence, result));
  1231. Assert.assertTrue(StringExpr.characterCount(blanksLeft) == 5);
  1232. result = StringExpr.truncateScalar(blanksLeft, largeMaxLength);
  1233. Assert.assertTrue(Arrays.equals(blanksLeft, result));
  1234. // No truncate (ASCII) -- same maximum length.
  1235. Assert.assertTrue(StringExpr.characterCount(blue) == 4);
  1236. result = StringExpr.truncateScalar(blue, blue.length);
  1237. Assert.assertTrue(Arrays.equals(blue, result));
  1238. Assert.assertTrue(StringExpr.characterCount(redgreen) == 8);
  1239. result = StringExpr.truncateScalar(redgreen, redgreen.length);
  1240. Assert.assertTrue(Arrays.equals(redgreen, result));
  1241. Assert.assertTrue(StringExpr.characterCount(ascii_sentence) == 31);
  1242. result = StringExpr.truncateScalar(ascii_sentence, ascii_sentence.length);
  1243. Assert.assertTrue(Arrays.equals(ascii_sentence, result));
  1244. Assert.assertTrue(StringExpr.characterCount(blanksLeft) == 5);
  1245. result = StringExpr.truncateScalar(blanksLeft, blanksLeft.length);
  1246. Assert.assertTrue(Arrays.equals(blanksLeft, result));
  1247. // Simple truncation.
  1248. result = StringExpr.truncateScalar(blue, 3);
  1249. Assert.assertTrue(Arrays.equals(Arrays.copyOf(blue, 3), result));
  1250. result = StringExpr.truncateScalar(redgreen, 6);
  1251. Assert.assertTrue(Arrays.equals(Arrays.copyOf(redgreen, 6), result));
  1252. result = StringExpr.truncateScalar(ascii_sentence, 14);
  1253. Assert.assertTrue(Arrays.equals(Arrays.copyOf(ascii_sentence, 14), result));
  1254. result = StringExpr.truncateScalar(blanksLeft, 2);
  1255. Assert.assertTrue(Arrays.equals(Arrays.copyOf(blanksLeft, 2), result));
  1256. result = StringExpr.truncateScalar(blanksRight, 4);
  1257. Assert.assertTrue(Arrays.equals(Arrays.copyOf(blanksRight, 4), result));
  1258. result = StringExpr.truncateScalar(blanksBoth, 2);
  1259. Assert.assertTrue(Arrays.equals(Arrays.copyOf(blanksBoth, 2), result));
  1260. result = StringExpr.truncateScalar(blankString, 1);
  1261. Assert.assertTrue(Arrays.equals(Arrays.copyOf(blankString, 1), result));
  1262. result = StringExpr.truncateScalar(blankRanges, 29);
  1263. Assert.assertTrue(Arrays.equals(Arrays.copyOf(blankRanges, 29), result));
  1264. // Multi-byte truncation.
  1265. byte[] scratch = new byte[100];
  1266. byte[] multiByte;
  1267. addMultiByteCharRightPadded1_1(scratch);
  1268. multiByte = Arrays.copyOf(scratch, 4);
  1269. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  1270. result = StringExpr.truncateScalar(multiByte, 1);
  1271. Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 3), result));
  1272. addMultiByteCharRightPadded1_2(scratch);
  1273. multiByte = Arrays.copyOf(scratch, 5);
  1274. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  1275. result = StringExpr.truncateScalar(multiByte, 2);
  1276. Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 4), result));
  1277. addMultiByteCharRightPadded1_3(scratch);
  1278. multiByte = Arrays.copyOf(scratch, 9);
  1279. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  1280. result = StringExpr.truncateScalar(multiByte, 2);
  1281. Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 5), result));
  1282. addMultiByteCharRightPadded1_2(scratch);
  1283. multiByte = Arrays.copyOfRange(scratch, 3, 3 + 2);
  1284. Assert.assertTrue(StringExpr.characterCount(multiByte) == 2);
  1285. result = StringExpr.truncateScalar(multiByte, 1);
  1286. Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 1), result));
  1287. int sentenceOneLen = addMultiByteCharSentenceOne(scratch, 0);
  1288. byte[] sentenceOne = Arrays.copyOf(scratch, sentenceOneLen);
  1289. Assert.assertTrue(StringExpr.characterCount(sentenceOne) == 10);
  1290. result = StringExpr.truncateScalar(sentenceOne, 8);
  1291. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOne, 20), result));
  1292. byte[] sentenceOnePortion = Arrays.copyOf(sentenceOne, sentenceOneLen - 3);
  1293. Assert.assertTrue(StringExpr.characterCount(sentenceOnePortion) == 9);
  1294. result = StringExpr.truncateScalar(sentenceOnePortion, 3);
  1295. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePortion, 9), result));
  1296. int sentenceTwoLen = addMultiByteCharSentenceTwo(scratch, 0);
  1297. byte[] sentenceTwo = Arrays.copyOf(scratch, sentenceTwoLen);
  1298. Assert.assertTrue(StringExpr.characterCount(sentenceTwo) == 13);
  1299. result = StringExpr.truncateScalar(sentenceTwo, 9);
  1300. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwo, 16), result));
  1301. byte[] sentenceTwoPortion = Arrays.copyOf(sentenceTwo, sentenceTwoLen - 5);
  1302. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPortion) == 10);
  1303. result = StringExpr.truncateScalar(sentenceTwoPortion, 6);
  1304. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPortion, 11), result));
  1305. int start;
  1306. // Left pad longer strings with multi-byte characters.
  1307. start = addPads(scratch, 0, 3);
  1308. int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(scratch, start);
  1309. byte[] sentenceOnePaddedLeft = Arrays.copyOf(scratch, sentenceOnePaddedLeftLen);
  1310. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft) == 3 + 10);
  1311. result = StringExpr.truncateScalar(sentenceOnePaddedLeft, 4);
  1312. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedLeft, 6), result));
  1313. byte[] sentenceOnePaddedLeftPortion = Arrays.copyOf(sentenceOnePaddedLeft, sentenceOnePaddedLeftLen - 3);
  1314. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeftPortion) == 3 + 9);
  1315. result = StringExpr.truncateScalar(sentenceOnePaddedLeftPortion, 7);
  1316. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedLeftPortion, 13), result));
  1317. start = addPads(scratch, 0, 2);
  1318. int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(scratch, start);
  1319. byte[] sentenceTwoPaddedLeft = Arrays.copyOf(scratch, sentenceTwoPaddedLeftLen);
  1320. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft) == 2 + 13);
  1321. result = StringExpr.truncateScalar(sentenceTwoPaddedLeft, 14);
  1322. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedLeft, 24), result));
  1323. byte[] sentenceTwoPaddedLeftPortion = Arrays.copyOf(sentenceTwoPaddedLeft, sentenceTwoPaddedLeftLen - 5);
  1324. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeftPortion) == 2 + 10);
  1325. result = StringExpr.truncateScalar(sentenceTwoPaddedLeftPortion, 9);
  1326. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedLeftPortion, 15), result));
  1327. // Right pad longer strings with multi-byte characters.
  1328. start = addMultiByteCharSentenceOne(scratch, 0);
  1329. int sentenceOnePaddedRightLen = addPads(scratch, start, 4);
  1330. byte[] sentenceOnePaddedRight = Arrays.copyOf(scratch, sentenceOnePaddedRightLen);
  1331. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight) == 10 + 4);
  1332. result = StringExpr.truncateScalar(sentenceOnePaddedRight, 1);
  1333. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedRight, 3), result));
  1334. byte[] sentenceOnePaddedRightPortion = Arrays.copyOf(sentenceOnePaddedRight, sentenceOnePaddedRightLen - 3 - 4);
  1335. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRightPortion) == 9);
  1336. result = StringExpr.truncateScalar(sentenceOnePaddedRightPortion, 5);
  1337. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedRightPortion, 13), result));
  1338. start = addMultiByteCharSentenceTwo(scratch, 0);
  1339. int sentenceTwoPaddedRightLen = addPads(scratch, start, 1);
  1340. byte[] sentenceTwoPaddedRight = Arrays.copyOf(scratch, sentenceTwoPaddedRightLen);
  1341. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight) == 13 + 1);
  1342. result = StringExpr.truncateScalar(sentenceTwoPaddedRight, 6);
  1343. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedRight, 11), result));
  1344. byte[] sentenceTwoPaddedRightPortion = Arrays.copyOf(sentenceTwoPaddedRight, sentenceTwoPaddedRightLen - 5 - 1);
  1345. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRightPortion) == 10);
  1346. result = StringExpr.truncateScalar(sentenceTwoPaddedRightPortion, 8);
  1347. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedRightPortion, 14), result));
  1348. // Multi-byte characters with blank ranges.
  1349. int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(scratch, 0);
  1350. byte[] sentenceBlankRanges = Arrays.copyOf(scratch, sentenceBlankRangesLen);
  1351. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges) == 17);
  1352. result = StringExpr.truncateScalar(sentenceBlankRanges, 4);
  1353. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceBlankRanges, 9), result));
  1354. byte[] sentenceBlankRangesPortion = Arrays.copyOf(sentenceBlankRanges, sentenceBlankRangesLen - 3);
  1355. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRangesPortion) == 16);
  1356. result = StringExpr.truncateScalar(sentenceBlankRangesPortion, 14);
  1357. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceBlankRangesPortion, 23), result));
  1358. sentenceBlankRangesPortion = Arrays.copyOfRange(sentenceBlankRanges, 7, 7 + 17);
  1359. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRangesPortion) == 13);
  1360. result = StringExpr.truncateScalar(sentenceBlankRangesPortion, 11);
  1361. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceBlankRangesPortion, 15), result));
  1362. Assert.assertTrue(StringExpr.characterCount(result) == 11);
  1363. }
  1364. @Test
  1365. // Test basic right trim and truncate to vector.
  1366. public void testRightTrimAndTruncateBytesSlice() {
  1367. // This first section repeats the tests of testRightTrimWithOffset with a large maxLength parameter.
  1368. // (i.e. too large to have an effect).
  1369. int largeMaxLength = 100;
  1370. int resultLen;
  1371. // Nothing to trim (ASCII).
  1372. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  1373. resultLen = StringExpr.rightTrimAndTruncate(blue, 0, blue.length, largeMaxLength);
  1374. Assert.assertTrue(resultLen == blue.length);
  1375. Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
  1376. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  1377. resultLen = StringExpr.rightTrimAndTruncate(redgreen, 0, redgreen.length, largeMaxLength);
  1378. Assert.assertTrue(resultLen == redgreen.length);
  1379. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  1380. resultLen = StringExpr.rightTrimAndTruncate(ascii_sentence, 0, ascii_sentence.length, largeMaxLength);
  1381. Assert.assertTrue(resultLen == ascii_sentence.length);
  1382. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  1383. resultLen = StringExpr.rightTrimAndTruncate(blanksLeft, 0, blanksLeft.length, largeMaxLength);
  1384. Assert.assertTrue(resultLen == blanksLeft.length);
  1385. // Simple trims.
  1386. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  1387. resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 0, blanksRight.length, largeMaxLength);
  1388. Assert.assertTrue(resultLen == 3);
  1389. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 3);
  1390. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  1391. resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 0, blanksBoth.length, largeMaxLength);
  1392. Assert.assertTrue(resultLen == 5);
  1393. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 5);
  1394. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  1395. resultLen = StringExpr.rightTrimAndTruncate(blankString, 0, blankString.length, largeMaxLength);
  1396. Assert.assertTrue(resultLen == 0);
  1397. Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 0);
  1398. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  1399. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 0, blankRanges.length, largeMaxLength);
  1400. Assert.assertTrue(resultLen == blankRanges.length - 4);
  1401. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 26);
  1402. // Offset trims.
  1403. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  1404. resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 1, blanksRight.length - 1, largeMaxLength);
  1405. Assert.assertTrue(resultLen == 2);
  1406. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 2);
  1407. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  1408. resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 4, blanksBoth.length - 4, largeMaxLength);
  1409. Assert.assertTrue(resultLen == 1);
  1410. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 1);
  1411. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  1412. resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 5, blanksBoth.length -5, largeMaxLength);
  1413. Assert.assertTrue(resultLen == 0);
  1414. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 0);
  1415. Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
  1416. resultLen = StringExpr.rightTrimAndTruncate(blankString, 1, blankString.length - 1, largeMaxLength);
  1417. Assert.assertTrue(resultLen == 0);
  1418. Assert.assertTrue(StringExpr.characterCount(blankString, 1, resultLen) == 0);
  1419. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
  1420. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 4, blankRanges.length - 4, largeMaxLength);
  1421. Assert.assertTrue(resultLen == blankRanges.length - 4 -4);
  1422. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22);
  1423. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  1424. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 6, blankRanges.length- 6, largeMaxLength);
  1425. Assert.assertTrue(resultLen == blankRanges.length - 6 - 4);
  1426. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 20);
  1427. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
  1428. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 7, blankRanges.length - 7, largeMaxLength);
  1429. Assert.assertTrue(resultLen == blankRanges.length - 7 - 4);
  1430. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 19);
  1431. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
  1432. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 7, 8 - 7, largeMaxLength);
  1433. Assert.assertTrue(resultLen == 0);
  1434. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 0);
  1435. // Multi-byte trims.
  1436. byte[] multiByte = new byte[100];
  1437. addMultiByteCharRightPadded1_1(multiByte);
  1438. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  1439. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 4, largeMaxLength);
  1440. Assert.assertTrue(resultLen == 3);
  1441. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1);
  1442. addMultiByteCharRightPadded1_2(multiByte);
  1443. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  1444. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 5, largeMaxLength);
  1445. Assert.assertTrue(resultLen == 4);
  1446. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2);
  1447. addMultiByteCharRightPadded1_3(multiByte);
  1448. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  1449. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 9, largeMaxLength);
  1450. Assert.assertTrue(resultLen == 8);
  1451. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 3);
  1452. addMultiByteCharRightPadded1_1(multiByte);
  1453. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
  1454. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 3, 1, largeMaxLength);
  1455. Assert.assertTrue(resultLen == 0);
  1456. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 0);
  1457. addMultiByteCharRightPadded1_2(multiByte);
  1458. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
  1459. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 3, 2, largeMaxLength);
  1460. Assert.assertTrue(resultLen == 1);
  1461. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1);
  1462. byte[] sentenceOne = new byte[100];
  1463. int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
  1464. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
  1465. resultLen = StringExpr.rightTrimAndTruncate(sentenceOne, 0, sentenceOneLen, largeMaxLength);
  1466. Assert.assertTrue(resultLen == sentenceOneLen);
  1467. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
  1468. resultLen = StringExpr.rightTrimAndTruncate(sentenceOne, 0, sentenceOneLen - 3, largeMaxLength);
  1469. Assert.assertTrue(resultLen == sentenceOneLen - 3);
  1470. byte[] sentenceTwo = new byte[100];
  1471. int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
  1472. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
  1473. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwo, 0, sentenceTwoLen, largeMaxLength);
  1474. Assert.assertTrue(resultLen == sentenceTwoLen);
  1475. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
  1476. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwo, 0, sentenceTwoLen - 5, largeMaxLength);
  1477. Assert.assertTrue(resultLen == sentenceTwoLen - 5);
  1478. int start;
  1479. // Left pad longer strings with multi-byte characters.
  1480. byte[] sentenceOnePaddedLeft = new byte[100];
  1481. start = addPads(sentenceOnePaddedLeft, 0, 3);
  1482. int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
  1483. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
  1484. resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, largeMaxLength);
  1485. Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen);
  1486. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
  1487. resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, largeMaxLength);
  1488. Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen - 3);
  1489. byte[] sentenceTwoPaddedLeft = new byte[100];
  1490. start = addPads(sentenceTwoPaddedLeft, 0, 2);
  1491. int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
  1492. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
  1493. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, largeMaxLength);
  1494. Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen);
  1495. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
  1496. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, largeMaxLength);
  1497. Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen - 5);
  1498. // Right pad longer strings with multi-byte characters.
  1499. byte[] sentenceOnePaddedRight = new byte[100];
  1500. start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
  1501. int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
  1502. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
  1503. resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, largeMaxLength);
  1504. Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 4);
  1505. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
  1506. resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, largeMaxLength);
  1507. Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 3 - 4);
  1508. byte[] sentenceTwoPaddedRight = new byte[100];
  1509. start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
  1510. int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
  1511. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
  1512. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, largeMaxLength);
  1513. Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 1);
  1514. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
  1515. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, largeMaxLength);
  1516. Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 5 - 1);
  1517. // Multi-byte characters with blank ranges.
  1518. byte[] sentenceBlankRanges = new byte[100];
  1519. int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  1520. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  1521. resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 0, sentenceBlankRangesLen, largeMaxLength);
  1522. Assert.assertTrue(resultLen == sentenceBlankRangesLen);
  1523. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  1524. resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, largeMaxLength);
  1525. Assert.assertTrue(resultLen == sentenceBlankRangesLen - 3);
  1526. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  1527. resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 7, 17, largeMaxLength);
  1528. Assert.assertTrue(resultLen == 12);
  1529. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 8);
  1530. // This next section repeats the tests of testRightTrimWithOffset with a maxLength parameter that is
  1531. // exactly the number of current characters in the string. This shouldn't affect the trim.
  1532. // Nothing to trim (ASCII).
  1533. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  1534. resultLen = StringExpr.rightTrimAndTruncate(blue, 0, blue.length, 4);
  1535. Assert.assertTrue(resultLen == blue.length);
  1536. Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
  1537. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  1538. resultLen = StringExpr.rightTrimAndTruncate(redgreen, 0, redgreen.length, 8);
  1539. Assert.assertTrue(resultLen == redgreen.length);
  1540. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  1541. resultLen = StringExpr.rightTrimAndTruncate(ascii_sentence, 0, ascii_sentence.length, 31);
  1542. Assert.assertTrue(resultLen == ascii_sentence.length);
  1543. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  1544. resultLen = StringExpr.rightTrimAndTruncate(blanksLeft, 0, blanksLeft.length, 5);
  1545. Assert.assertTrue(resultLen == blanksLeft.length);
  1546. // Truncate everything and nothing to trim
  1547. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  1548. resultLen = StringExpr.rightTrimAndTruncate(blanksLeft, 0, blanksLeft.length, 0);
  1549. Assert.assertTrue(resultLen == 0);
  1550. // Simple trims.
  1551. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  1552. resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 0, blanksRight.length, 5);
  1553. Assert.assertTrue(resultLen == 3);
  1554. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 3);
  1555. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  1556. resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 0, blanksBoth.length, 7);
  1557. Assert.assertTrue(resultLen == 5);
  1558. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 5);
  1559. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  1560. resultLen = StringExpr.rightTrimAndTruncate(blankString, 0, blankString.length, 2);
  1561. Assert.assertTrue(resultLen == 0);
  1562. Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 0);
  1563. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  1564. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 0, blankRanges.length, 30);
  1565. Assert.assertTrue(resultLen == blankRanges.length - 4);
  1566. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 26);
  1567. // Offset trims.
  1568. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  1569. resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 1, blanksRight.length - 1, 4);
  1570. Assert.assertTrue(resultLen == 2);
  1571. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 2);
  1572. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  1573. resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 4, blanksBoth.length - 4, 3);
  1574. Assert.assertTrue(resultLen == 1);
  1575. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 1);
  1576. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  1577. resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 5, blanksBoth.length -5, 2);
  1578. Assert.assertTrue(resultLen == 0);
  1579. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 0);
  1580. Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
  1581. resultLen = StringExpr.rightTrimAndTruncate(blankString, 1, blankString.length - 1, 1);
  1582. Assert.assertTrue(resultLen == 0);
  1583. Assert.assertTrue(StringExpr.characterCount(blankString, 1, resultLen) == 0);
  1584. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
  1585. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 4, blankRanges.length - 4, 26);
  1586. Assert.assertTrue(resultLen == blankRanges.length - 4 -4);
  1587. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22);
  1588. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  1589. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 6, blankRanges.length- 6, 24);
  1590. Assert.assertTrue(resultLen == blankRanges.length - 6 - 4);
  1591. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 20);
  1592. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
  1593. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 7, blankRanges.length - 7, 23);
  1594. Assert.assertTrue(resultLen == blankRanges.length - 7 - 4);
  1595. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 19);
  1596. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
  1597. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 7, 8 - 7, 1);
  1598. Assert.assertTrue(resultLen == 0);
  1599. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 0);
  1600. // Multi-byte trims.
  1601. multiByte = new byte[100];
  1602. addMultiByteCharRightPadded1_1(multiByte);
  1603. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  1604. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 4, 2);
  1605. Assert.assertTrue(resultLen == 3);
  1606. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1);
  1607. addMultiByteCharRightPadded1_2(multiByte);
  1608. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  1609. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 5, 3);
  1610. Assert.assertTrue(resultLen == 4);
  1611. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2);
  1612. addMultiByteCharRightPadded1_3(multiByte);
  1613. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  1614. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 9, 4);
  1615. Assert.assertTrue(resultLen == 8);
  1616. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 3);
  1617. addMultiByteCharRightPadded1_1(multiByte);
  1618. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
  1619. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 3, 1, 1);
  1620. Assert.assertTrue(resultLen == 0);
  1621. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 0);
  1622. addMultiByteCharRightPadded1_2(multiByte);
  1623. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
  1624. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 3, 2, 2);
  1625. Assert.assertTrue(resultLen == 1);
  1626. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1);
  1627. sentenceOne = new byte[100];
  1628. sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
  1629. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
  1630. resultLen = StringExpr.rightTrimAndTruncate(sentenceOne, 0, sentenceOneLen, 10);
  1631. Assert.assertTrue(resultLen == sentenceOneLen);
  1632. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
  1633. resultLen = StringExpr.rightTrimAndTruncate(sentenceOne, 0, sentenceOneLen - 3, 9);
  1634. Assert.assertTrue(resultLen == sentenceOneLen - 3);
  1635. sentenceTwo = new byte[100];
  1636. sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
  1637. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
  1638. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwo, 0, sentenceTwoLen, 13);
  1639. Assert.assertTrue(resultLen == sentenceTwoLen);
  1640. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
  1641. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwo, 0, sentenceTwoLen - 5, 10);
  1642. Assert.assertTrue(resultLen == sentenceTwoLen - 5);
  1643. // Left pad longer strings with multi-byte characters.
  1644. sentenceOnePaddedLeft = new byte[100];
  1645. start = addPads(sentenceOnePaddedLeft, 0, 3);
  1646. sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
  1647. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
  1648. resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, 3 + 10);
  1649. Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen);
  1650. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
  1651. resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, 3 + 9);
  1652. Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen - 3);
  1653. sentenceTwoPaddedLeft = new byte[100];
  1654. start = addPads(sentenceTwoPaddedLeft, 0, 2);
  1655. sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
  1656. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
  1657. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, 2 + 13);
  1658. Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen);
  1659. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
  1660. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, 2 + 10);
  1661. Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen - 5);
  1662. // Right pad longer strings with multi-byte characters.
  1663. sentenceOnePaddedRight = new byte[100];
  1664. start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
  1665. sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
  1666. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
  1667. resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, 10 + 4);
  1668. Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 4);
  1669. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
  1670. resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, 9);
  1671. Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 3 - 4);
  1672. sentenceTwoPaddedRight = new byte[100];
  1673. start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
  1674. sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
  1675. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
  1676. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, 13 + 1);
  1677. Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 1);
  1678. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
  1679. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, 10);
  1680. Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 5 - 1);
  1681. // Multi-byte characters with blank ranges.
  1682. sentenceBlankRanges = new byte[100];
  1683. sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  1684. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  1685. resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 0, sentenceBlankRangesLen, 17);
  1686. Assert.assertTrue(resultLen == sentenceBlankRangesLen);
  1687. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  1688. resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, 16);
  1689. Assert.assertTrue(resultLen == sentenceBlankRangesLen - 3);
  1690. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  1691. resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 7, 17, largeMaxLength);
  1692. Assert.assertTrue(resultLen == 12);
  1693. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 8);
  1694. // This next section repeats the tests of testRightTrimWithOffset with a maxLength parameter that is
  1695. // less than the number of current characters in the string and thus affects the trim.
  1696. // Nothing to trim (ASCII).
  1697. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  1698. resultLen = StringExpr.rightTrimAndTruncate(blue, 0, blue.length, 3);
  1699. Assert.assertTrue(resultLen == 3);
  1700. Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 3);
  1701. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  1702. resultLen = StringExpr.rightTrimAndTruncate(redgreen, 0, redgreen.length, 6);
  1703. Assert.assertTrue(resultLen == 6);
  1704. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  1705. resultLen = StringExpr.rightTrimAndTruncate(ascii_sentence, 0, ascii_sentence.length, 30);
  1706. Assert.assertTrue(resultLen == 30);
  1707. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  1708. resultLen = StringExpr.rightTrimAndTruncate(blanksLeft, 0, blanksLeft.length, 1);
  1709. Assert.assertTrue(resultLen == 0);
  1710. // Simple trims.
  1711. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  1712. resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 0, blanksRight.length, 4);
  1713. Assert.assertTrue(resultLen == 3);
  1714. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 3);
  1715. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  1716. resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 0, blanksBoth.length, 6);
  1717. Assert.assertTrue(resultLen == 5);
  1718. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 5);
  1719. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  1720. resultLen = StringExpr.rightTrimAndTruncate(blankString, 0, blankString.length, 1);
  1721. Assert.assertTrue(resultLen == 0);
  1722. Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 0);
  1723. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  1724. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 0, blankRanges.length, 19);
  1725. Assert.assertTrue(resultLen == 15);
  1726. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 15);
  1727. // Offset trims.
  1728. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  1729. resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 1, blanksRight.length - 1, 3);
  1730. Assert.assertTrue(resultLen == 2);
  1731. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 2);
  1732. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  1733. resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 4, blanksBoth.length - 4, 2);
  1734. Assert.assertTrue(resultLen == 1);
  1735. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 1);
  1736. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  1737. resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 5, blanksBoth.length -5, 1);
  1738. Assert.assertTrue(resultLen == 0);
  1739. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 0);
  1740. Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
  1741. resultLen = StringExpr.rightTrimAndTruncate(blankString, 1, blankString.length - 1, 1);
  1742. Assert.assertTrue(resultLen == 0);
  1743. Assert.assertTrue(StringExpr.characterCount(blankString, 1, resultLen) == 0);
  1744. Assert.assertTrue(StringExpr.characterCount(blankRanges, 3, 6) == 6);
  1745. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 3, 6, 5);
  1746. Assert.assertTrue(resultLen == 4);
  1747. Assert.assertTrue(StringExpr.characterCount(blankRanges, 3, resultLen) == 4);
  1748. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  1749. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 6, blankRanges.length- 6, 22);
  1750. Assert.assertTrue(resultLen == blankRanges.length - 6 - 4);
  1751. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 20);
  1752. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
  1753. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 7, blankRanges.length - 7, 10);
  1754. Assert.assertTrue(resultLen == 8);
  1755. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 8);
  1756. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
  1757. resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 7, 8 - 7, 1);
  1758. Assert.assertTrue(resultLen == 0);
  1759. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 0);
  1760. // Multi-byte trims.
  1761. multiByte = new byte[100];
  1762. addMultiByteCharRightPadded1_1(multiByte);
  1763. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  1764. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 4, 1);
  1765. Assert.assertTrue(resultLen == 3);
  1766. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1);
  1767. addMultiByteCharRightPadded1_2(multiByte);
  1768. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  1769. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 5, 2);
  1770. Assert.assertTrue(resultLen == 4);
  1771. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2);
  1772. addMultiByteCharRightPadded1_3(multiByte);
  1773. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  1774. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 9, 3);
  1775. Assert.assertTrue(resultLen == 8);
  1776. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 3);
  1777. addMultiByteCharRightPadded1_1(multiByte);
  1778. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
  1779. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 3, 1, 1);
  1780. Assert.assertTrue(resultLen == 0);
  1781. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 0);
  1782. addMultiByteCharRightPadded1_2(multiByte);
  1783. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
  1784. resultLen = StringExpr.rightTrimAndTruncate(multiByte, 3, 2, 1);
  1785. Assert.assertTrue(resultLen == 1);
  1786. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1);
  1787. sentenceOne = new byte[100];
  1788. sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
  1789. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
  1790. resultLen = StringExpr.rightTrimAndTruncate(sentenceOne, 0, sentenceOneLen, 7);
  1791. Assert.assertTrue(resultLen == sentenceOneLen - 9);
  1792. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
  1793. resultLen = StringExpr.rightTrimAndTruncate(sentenceOne, 0, sentenceOneLen - 3, 6);
  1794. Assert.assertTrue(resultLen == 13);
  1795. sentenceTwo = new byte[100];
  1796. sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
  1797. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
  1798. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwo, 0, sentenceTwoLen, 13);
  1799. Assert.assertTrue(resultLen == sentenceTwoLen);
  1800. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
  1801. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwo, 0, sentenceTwoLen - 5, 10);
  1802. Assert.assertTrue(resultLen == sentenceTwoLen - 5);
  1803. // Left pad longer strings with multi-byte characters.
  1804. sentenceOnePaddedLeft = new byte[100];
  1805. start = addPads(sentenceOnePaddedLeft, 0, 3);
  1806. sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
  1807. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
  1808. resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, 3 + 8);
  1809. Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen - 6);
  1810. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
  1811. resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, 3 + 6);
  1812. Assert.assertTrue(resultLen == 16);
  1813. sentenceTwoPaddedLeft = new byte[100];
  1814. start = addPads(sentenceTwoPaddedLeft, 0, 2);
  1815. sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
  1816. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
  1817. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, 7);
  1818. Assert.assertTrue(resultLen == 10);
  1819. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
  1820. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, 6);
  1821. Assert.assertTrue(resultLen == 10);
  1822. // Right pad longer strings with multi-byte characters.
  1823. sentenceOnePaddedRight = new byte[100];
  1824. start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
  1825. sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
  1826. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
  1827. resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, 10);
  1828. Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 4);
  1829. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
  1830. resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, 7);
  1831. Assert.assertTrue(resultLen == 17);
  1832. sentenceTwoPaddedRight = new byte[100];
  1833. start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
  1834. sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
  1835. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
  1836. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, 13);
  1837. Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 1);
  1838. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
  1839. resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, 4);
  1840. Assert.assertTrue(resultLen == 8);
  1841. // Multi-byte characters with blank ranges.
  1842. sentenceBlankRanges = new byte[100];
  1843. sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  1844. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  1845. resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 0, sentenceBlankRangesLen, 4);
  1846. Assert.assertTrue(resultLen == 7);
  1847. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  1848. resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, 6);
  1849. Assert.assertTrue(resultLen == 11);
  1850. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 4, 12) == 8);
  1851. resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 4, 12, 6);
  1852. Assert.assertTrue(resultLen == 7);
  1853. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 4, resultLen) == 5);
  1854. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  1855. resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 7, 17, 11);
  1856. Assert.assertTrue(resultLen == 12);
  1857. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 8);
  1858. }
  1859. @Test
  1860. // Test basic right trim and truncate to vector.
  1861. public void testRightTrimAndTruncateBytesColumnVector() {
  1862. BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  1863. outV.initBuffer(35); // initialize with estimated element size 35
  1864. int i = 0;
  1865. // This first section repeats the tests of testRightTrimWithOffset with a large maxLength parameter.
  1866. // (i.e. too large to have an effect).
  1867. int largeMaxLength = 100;
  1868. int expectedResultLen;
  1869. // Nothing to trim (ASCII).
  1870. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  1871. StringExpr.rightTrimAndTruncate(outV, i, blue, 0, blue.length, largeMaxLength);
  1872. expectedResultLen = blue.length;
  1873. Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
  1874. i++;
  1875. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  1876. StringExpr.rightTrimAndTruncate(outV, i, redgreen, 0, redgreen.length, largeMaxLength);
  1877. expectedResultLen = redgreen.length;
  1878. Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
  1879. i++;
  1880. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  1881. StringExpr.rightTrimAndTruncate(outV, i, ascii_sentence, 0, ascii_sentence.length, largeMaxLength);
  1882. expectedResultLen = ascii_sentence.length;
  1883. Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
  1884. i++;
  1885. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  1886. StringExpr.rightTrimAndTruncate(outV, i, blanksLeft, 0, blanksLeft.length, largeMaxLength);
  1887. expectedResultLen = blanksLeft.length;
  1888. Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
  1889. i++;
  1890. // Simple trims.
  1891. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  1892. StringExpr.rightTrimAndTruncate(outV, i, blanksRight, 0, blanksRight.length, largeMaxLength);
  1893. expectedResultLen = 3;
  1894. Assert.assertTrue(vectorEqual(outV, i, blanksRight, 0, expectedResultLen));
  1895. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  1896. i++;
  1897. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  1898. StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 0, blanksBoth.length, largeMaxLength);
  1899. expectedResultLen = 5;
  1900. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen));
  1901. Assert.assertTrue(vectorCharacterCount(outV, i) == 5);
  1902. i++;
  1903. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  1904. StringExpr.rightTrimAndTruncate(outV, i, blankString, 0, blankString.length, largeMaxLength);
  1905. expectedResultLen = 0;
  1906. Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen));
  1907. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  1908. i++;
  1909. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  1910. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 0, blankRanges.length, largeMaxLength);
  1911. expectedResultLen = blankRanges.length - 4;
  1912. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen));
  1913. Assert.assertTrue(vectorCharacterCount(outV, i) == 26);
  1914. i++;
  1915. // Offset trims.
  1916. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  1917. StringExpr.rightTrimAndTruncate(outV, i, blanksRight, 1, blanksRight.length - 1, largeMaxLength);
  1918. expectedResultLen = 2;
  1919. Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen));
  1920. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  1921. i++;
  1922. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  1923. StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 4, blanksBoth.length - 4, largeMaxLength);
  1924. expectedResultLen = 1;
  1925. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen));
  1926. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  1927. i++;
  1928. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  1929. StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 5, blanksBoth.length -5, largeMaxLength);
  1930. expectedResultLen = 0;
  1931. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen));
  1932. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  1933. i++;
  1934. Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
  1935. StringExpr.rightTrimAndTruncate(outV, i, blankString, 1, blankString.length - 1, largeMaxLength);
  1936. expectedResultLen = 0;
  1937. Assert.assertTrue(vectorEqual(outV, i, blankString, 1, expectedResultLen));
  1938. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  1939. i++;
  1940. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
  1941. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 4, blankRanges.length - 4, largeMaxLength);
  1942. expectedResultLen = blankRanges.length - 4 -4;
  1943. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 4, expectedResultLen));
  1944. Assert.assertTrue(vectorCharacterCount(outV, i) == 22);
  1945. i++;
  1946. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  1947. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 6, blankRanges.length- 6, largeMaxLength);
  1948. expectedResultLen = blankRanges.length - 6 - 4;
  1949. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen));
  1950. Assert.assertTrue(vectorCharacterCount(outV, i) == 20);
  1951. i++;
  1952. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
  1953. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 7, blankRanges.length - 7, largeMaxLength);
  1954. expectedResultLen = blankRanges.length - 7 - 4;
  1955. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
  1956. Assert.assertTrue(vectorCharacterCount(outV, i) == 19);
  1957. i++;
  1958. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
  1959. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 7, 8 - 7, largeMaxLength);
  1960. expectedResultLen = 0;
  1961. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
  1962. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  1963. i++;
  1964. // Multi-byte trims.
  1965. byte[] multiByte = new byte[100];
  1966. addMultiByteCharRightPadded1_1(multiByte);
  1967. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  1968. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 4, largeMaxLength);
  1969. expectedResultLen = 3;
  1970. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  1971. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  1972. i++;
  1973. addMultiByteCharRightPadded1_2(multiByte);
  1974. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  1975. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 5, largeMaxLength);
  1976. expectedResultLen = 4;
  1977. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  1978. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  1979. i++;
  1980. addMultiByteCharRightPadded1_3(multiByte);
  1981. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  1982. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 9, largeMaxLength);
  1983. expectedResultLen = 8;
  1984. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  1985. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  1986. i++;
  1987. addMultiByteCharRightPadded1_1(multiByte);
  1988. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
  1989. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 3, 1, largeMaxLength);
  1990. expectedResultLen = 0;
  1991. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  1992. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  1993. i++;
  1994. addMultiByteCharRightPadded1_2(multiByte);
  1995. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
  1996. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 3, 2, largeMaxLength);
  1997. expectedResultLen = 1;
  1998. Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
  1999. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  2000. i++;
  2001. byte[] sentenceOne = new byte[100];
  2002. int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
  2003. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
  2004. StringExpr.rightTrimAndTruncate(outV, i, sentenceOne, 0, sentenceOneLen, largeMaxLength);
  2005. expectedResultLen = sentenceOneLen;
  2006. Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
  2007. i++;
  2008. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
  2009. StringExpr.rightTrimAndTruncate(outV, i, sentenceOne, 0, sentenceOneLen - 3, largeMaxLength);
  2010. expectedResultLen = sentenceOneLen - 3;
  2011. Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
  2012. i++;
  2013. byte[] sentenceTwo = new byte[100];
  2014. int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
  2015. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
  2016. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwo, 0, sentenceTwoLen, largeMaxLength);
  2017. expectedResultLen = sentenceTwoLen;
  2018. Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
  2019. i++;
  2020. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
  2021. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwo, 0, sentenceTwoLen - 5, largeMaxLength);
  2022. expectedResultLen = sentenceTwoLen - 5;
  2023. Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
  2024. i++;
  2025. int start;
  2026. // Left pad longer strings with multi-byte characters.
  2027. byte[] sentenceOnePaddedLeft = new byte[100];
  2028. start = addPads(sentenceOnePaddedLeft, 0, 3);
  2029. int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
  2030. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
  2031. StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, largeMaxLength);
  2032. expectedResultLen = sentenceOnePaddedLeftLen;
  2033. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
  2034. i++;
  2035. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
  2036. StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, largeMaxLength);
  2037. expectedResultLen = sentenceOnePaddedLeftLen - 3;
  2038. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
  2039. i++;
  2040. byte[] sentenceTwoPaddedLeft = new byte[100];
  2041. start = addPads(sentenceTwoPaddedLeft, 0, 2);
  2042. int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
  2043. i++;
  2044. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
  2045. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, largeMaxLength);
  2046. expectedResultLen = sentenceTwoPaddedLeftLen;
  2047. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
  2048. i++;
  2049. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
  2050. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, largeMaxLength);
  2051. expectedResultLen = sentenceTwoPaddedLeftLen - 5;
  2052. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
  2053. i++;
  2054. // Right pad longer strings with multi-byte characters.
  2055. byte[] sentenceOnePaddedRight = new byte[100];
  2056. start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
  2057. int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
  2058. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
  2059. StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, largeMaxLength);
  2060. expectedResultLen = sentenceOnePaddedRightLen - 4;
  2061. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
  2062. i++;
  2063. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
  2064. StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, largeMaxLength);
  2065. expectedResultLen = sentenceOnePaddedRightLen - 3 - 4;
  2066. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
  2067. i++;
  2068. byte[] sentenceTwoPaddedRight = new byte[100];
  2069. start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
  2070. int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
  2071. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
  2072. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, largeMaxLength);
  2073. expectedResultLen = sentenceTwoPaddedRightLen - 1;
  2074. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
  2075. i++;
  2076. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
  2077. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, largeMaxLength);
  2078. expectedResultLen = sentenceTwoPaddedRightLen - 5 - 1;
  2079. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
  2080. i++;
  2081. // Multi-byte characters with blank ranges.
  2082. byte[] sentenceBlankRanges = new byte[100];
  2083. int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  2084. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  2085. StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen, largeMaxLength);
  2086. expectedResultLen = sentenceBlankRangesLen;
  2087. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  2088. i++;
  2089. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  2090. StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, largeMaxLength);
  2091. expectedResultLen = sentenceBlankRangesLen - 3;
  2092. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  2093. i++;
  2094. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  2095. StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 7, 17, largeMaxLength);
  2096. expectedResultLen = 12;
  2097. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen));
  2098. Assert.assertTrue(vectorCharacterCount(outV, i) == 8);
  2099. i++;
  2100. // This next section repeats the tests of testRightTrimWithOffset with a maxLength parameter that is
  2101. // exactly the number of current characters in the string. This shouldn't affect the trim.
  2102. // Nothing to trim (ASCII).
  2103. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  2104. StringExpr.rightTrimAndTruncate(outV, i, blue, 0, blue.length, 4);
  2105. expectedResultLen = blue.length;
  2106. Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
  2107. Assert.assertTrue(vectorCharacterCount(outV, i) == 4);
  2108. i++;
  2109. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  2110. StringExpr.rightTrimAndTruncate(outV, i, redgreen, 0, redgreen.length, 8);
  2111. expectedResultLen = redgreen.length;
  2112. Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
  2113. i++;
  2114. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  2115. StringExpr.rightTrimAndTruncate(outV, i, ascii_sentence, 0, ascii_sentence.length, 31);
  2116. expectedResultLen = ascii_sentence.length;
  2117. Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
  2118. i++;
  2119. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  2120. StringExpr.rightTrimAndTruncate(outV, i, blanksLeft, 0, blanksLeft.length, 5);
  2121. expectedResultLen = blanksLeft.length;
  2122. Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
  2123. i++;
  2124. // Simple trims.
  2125. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  2126. StringExpr.rightTrimAndTruncate(outV, i, blanksRight, 0, blanksRight.length, 5);
  2127. expectedResultLen = 3;
  2128. Assert.assertTrue(vectorEqual(outV, i, blanksRight, 0, expectedResultLen));
  2129. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  2130. i++;
  2131. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  2132. StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 0, blanksBoth.length, 7);
  2133. expectedResultLen = 5;
  2134. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen));
  2135. Assert.assertTrue(vectorCharacterCount(outV, i) == 5);
  2136. i++;
  2137. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  2138. StringExpr.rightTrimAndTruncate(outV, i, blankString, 0, blankString.length, 2);
  2139. expectedResultLen = 0;
  2140. Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen));
  2141. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  2142. i++;
  2143. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  2144. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 0, blankRanges.length, 30);
  2145. expectedResultLen = blankRanges.length - 4;
  2146. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen));
  2147. Assert.assertTrue(vectorCharacterCount(outV, i) == 26);
  2148. i++;
  2149. // Offset trims.
  2150. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  2151. StringExpr.rightTrimAndTruncate(outV, i, blanksRight, 1, blanksRight.length - 1, 4);
  2152. expectedResultLen = 2;
  2153. Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen));
  2154. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  2155. i++;
  2156. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  2157. StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 4, blanksBoth.length - 4, 3);
  2158. expectedResultLen = 1;
  2159. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen));
  2160. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  2161. i++;
  2162. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  2163. StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 5, blanksBoth.length -5, 2);
  2164. expectedResultLen = 0;
  2165. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen));
  2166. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  2167. i++;
  2168. Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
  2169. StringExpr.rightTrimAndTruncate(outV, i, blankString, 1, blankString.length - 1, 1);
  2170. expectedResultLen = 0;
  2171. Assert.assertTrue(vectorEqual(outV, i, blankString, 1, expectedResultLen));
  2172. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  2173. i++;
  2174. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
  2175. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 4, blankRanges.length - 4, 26);
  2176. expectedResultLen = blankRanges.length - 4 -4;
  2177. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 4, expectedResultLen));
  2178. Assert.assertTrue(vectorCharacterCount(outV, i) == 22);
  2179. i++;
  2180. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  2181. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 6, blankRanges.length- 6, 24);
  2182. expectedResultLen = blankRanges.length - 6 - 4;
  2183. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen));
  2184. Assert.assertTrue(vectorCharacterCount(outV, i) == 20);
  2185. i++;
  2186. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
  2187. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 7, blankRanges.length - 7, 23);
  2188. expectedResultLen = blankRanges.length - 7 - 4;
  2189. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
  2190. Assert.assertTrue(vectorCharacterCount(outV, i) == 19);
  2191. i++;
  2192. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
  2193. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 7, 8 - 7, 1);
  2194. expectedResultLen = 0;
  2195. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
  2196. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  2197. i++;
  2198. // Multi-byte trims.
  2199. multiByte = new byte[100];
  2200. addMultiByteCharRightPadded1_1(multiByte);
  2201. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  2202. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 4, 2);
  2203. expectedResultLen = 3;
  2204. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  2205. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  2206. i++;
  2207. addMultiByteCharRightPadded1_2(multiByte);
  2208. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  2209. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 5, 3);
  2210. expectedResultLen = 4;
  2211. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  2212. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  2213. i++;
  2214. addMultiByteCharRightPadded1_3(multiByte);
  2215. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  2216. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 9, 4);
  2217. expectedResultLen = 8;
  2218. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  2219. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  2220. i++;
  2221. addMultiByteCharRightPadded1_1(multiByte);
  2222. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
  2223. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 3, 1, 1);
  2224. expectedResultLen = 0;
  2225. Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
  2226. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  2227. i++;
  2228. addMultiByteCharRightPadded1_2(multiByte);
  2229. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
  2230. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 3, 2, 2);
  2231. expectedResultLen = 1;
  2232. Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
  2233. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  2234. i++;
  2235. sentenceOne = new byte[100];
  2236. sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
  2237. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
  2238. StringExpr.rightTrimAndTruncate(outV, i, sentenceOne, 0, sentenceOneLen, 10);
  2239. expectedResultLen = sentenceOneLen;
  2240. Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
  2241. i++;
  2242. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
  2243. StringExpr.rightTrimAndTruncate(outV, i, sentenceOne, 0, sentenceOneLen - 3, 9);
  2244. expectedResultLen = sentenceOneLen - 3;
  2245. Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
  2246. i++;
  2247. sentenceTwo = new byte[100];
  2248. sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
  2249. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
  2250. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwo, 0, sentenceTwoLen, 13);
  2251. expectedResultLen = sentenceTwoLen;
  2252. Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
  2253. i++;
  2254. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
  2255. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwo, 0, sentenceTwoLen - 5, 10);
  2256. expectedResultLen = sentenceTwoLen - 5;
  2257. Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
  2258. i++;
  2259. // Left pad longer strings with multi-byte characters.
  2260. sentenceOnePaddedLeft = new byte[100];
  2261. start = addPads(sentenceOnePaddedLeft, 0, 3);
  2262. sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
  2263. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
  2264. StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, 3 + 10);
  2265. expectedResultLen = sentenceOnePaddedLeftLen;
  2266. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
  2267. i++;
  2268. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
  2269. StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, 3 + 9);
  2270. expectedResultLen = sentenceOnePaddedLeftLen - 3;
  2271. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
  2272. i++;
  2273. sentenceTwoPaddedLeft = new byte[100];
  2274. start = addPads(sentenceTwoPaddedLeft, 0, 2);
  2275. sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
  2276. i++;
  2277. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
  2278. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, 2 + 13);
  2279. expectedResultLen = sentenceTwoPaddedLeftLen;
  2280. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
  2281. i++;
  2282. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
  2283. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, 2 + 10);
  2284. expectedResultLen = sentenceTwoPaddedLeftLen - 5;
  2285. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
  2286. i++;
  2287. // Right pad longer strings with multi-byte characters.
  2288. sentenceOnePaddedRight = new byte[100];
  2289. start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
  2290. sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
  2291. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
  2292. StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, 10 + 4);
  2293. expectedResultLen = sentenceOnePaddedRightLen - 4;
  2294. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
  2295. i++;
  2296. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
  2297. StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, 9);
  2298. expectedResultLen = sentenceOnePaddedRightLen - 3 - 4;
  2299. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
  2300. i++;
  2301. sentenceTwoPaddedRight = new byte[100];
  2302. start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
  2303. sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
  2304. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
  2305. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, 13 + 1);
  2306. expectedResultLen = sentenceTwoPaddedRightLen - 1;
  2307. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
  2308. i++;
  2309. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
  2310. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, 10);
  2311. expectedResultLen = sentenceTwoPaddedRightLen - 5 - 1;
  2312. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
  2313. i++;
  2314. // Multi-byte characters with blank ranges.
  2315. sentenceBlankRanges = new byte[100];
  2316. sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  2317. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  2318. StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen, 17);
  2319. expectedResultLen = sentenceBlankRangesLen;
  2320. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  2321. i++;
  2322. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  2323. StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, 16);
  2324. expectedResultLen = sentenceBlankRangesLen - 3;
  2325. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  2326. i++;
  2327. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  2328. StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 7, 17, largeMaxLength);
  2329. expectedResultLen = 12;
  2330. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen));
  2331. Assert.assertTrue(vectorCharacterCount(outV, i) == 8);
  2332. i++;
  2333. // This next section repeats the tests of testRightTrimWithOffset with a maxLength parameter that is
  2334. // less than the number of current characters in the string and thus affects the trim.
  2335. // Nothing to trim (ASCII).
  2336. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  2337. StringExpr.rightTrimAndTruncate(outV, i, blue, 0, blue.length, 3);
  2338. expectedResultLen = 3;
  2339. Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
  2340. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  2341. i++;
  2342. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  2343. StringExpr.rightTrimAndTruncate(outV, i, redgreen, 0, redgreen.length, 6);
  2344. expectedResultLen = 6;
  2345. Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
  2346. i++;
  2347. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  2348. StringExpr.rightTrimAndTruncate(outV, i, ascii_sentence, 0, ascii_sentence.length, 30);
  2349. expectedResultLen = 30;
  2350. Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
  2351. i++;
  2352. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  2353. StringExpr.rightTrimAndTruncate(outV, i, blanksLeft, 0, blanksLeft.length, 1);
  2354. expectedResultLen = 0;
  2355. Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
  2356. i++;
  2357. // Simple trims.
  2358. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  2359. StringExpr.rightTrimAndTruncate(outV, i, blanksRight, 0, blanksRight.length, 4);
  2360. expectedResultLen = 3;
  2361. Assert.assertTrue(vectorEqual(outV, i, blanksRight, 0, expectedResultLen));
  2362. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  2363. i++;
  2364. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  2365. StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 0, blanksBoth.length, 6);
  2366. expectedResultLen = 5;
  2367. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen));
  2368. Assert.assertTrue(vectorCharacterCount(outV, i) == 5);
  2369. i++;
  2370. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  2371. StringExpr.rightTrimAndTruncate(outV, i, blankString, 0, blankString.length, 1);
  2372. expectedResultLen = 0;
  2373. Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen));
  2374. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  2375. i++;
  2376. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  2377. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 0, blankRanges.length, 19);
  2378. expectedResultLen = 15;
  2379. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen));
  2380. Assert.assertTrue(vectorCharacterCount(outV, i) == 15);
  2381. i++;
  2382. // Offset trims.
  2383. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  2384. StringExpr.rightTrimAndTruncate(outV, i, blanksRight, 1, blanksRight.length - 1, 3);
  2385. expectedResultLen = 2;
  2386. Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen));
  2387. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  2388. i++;
  2389. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  2390. StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 4, blanksBoth.length - 4, 2);
  2391. expectedResultLen = 1;
  2392. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen));
  2393. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  2394. i++;
  2395. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  2396. StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 5, blanksBoth.length -5, 1);
  2397. expectedResultLen = 0;
  2398. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen));
  2399. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  2400. i++;
  2401. Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
  2402. StringExpr.rightTrimAndTruncate(outV, i, blankString, 1, blankString.length - 1, 1);
  2403. expectedResultLen = 0;
  2404. Assert.assertTrue(vectorEqual(outV, i, blankString, 1, expectedResultLen));
  2405. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  2406. i++;
  2407. Assert.assertTrue(StringExpr.characterCount(blankRanges, 3, 6) == 6);
  2408. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 3, 6, 5);
  2409. expectedResultLen = 4;
  2410. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 3, expectedResultLen));
  2411. Assert.assertTrue(vectorCharacterCount(outV, i) == 4);
  2412. i++;
  2413. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  2414. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 6, blankRanges.length- 6, 22);
  2415. expectedResultLen = blankRanges.length - 6 - 4;
  2416. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen));
  2417. Assert.assertTrue(vectorCharacterCount(outV, i) == 20);
  2418. i++;
  2419. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
  2420. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 7, blankRanges.length - 7, 10);
  2421. expectedResultLen = 8;
  2422. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
  2423. Assert.assertTrue(vectorCharacterCount(outV, i) == 8);
  2424. i++;
  2425. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
  2426. StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 7, 8 - 7, 1);
  2427. expectedResultLen = 0;
  2428. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
  2429. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  2430. i++;
  2431. // Multi-byte trims.
  2432. multiByte = new byte[100];
  2433. addMultiByteCharRightPadded1_1(multiByte);
  2434. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  2435. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 4, 1);
  2436. expectedResultLen = 3;
  2437. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  2438. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  2439. i++;
  2440. addMultiByteCharRightPadded1_2(multiByte);
  2441. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  2442. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 5, 2);
  2443. expectedResultLen = 4;
  2444. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  2445. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  2446. i++;
  2447. addMultiByteCharRightPadded1_3(multiByte);
  2448. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  2449. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 9, 3);
  2450. expectedResultLen = 8;
  2451. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  2452. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  2453. i++;
  2454. addMultiByteCharRightPadded1_1(multiByte);
  2455. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
  2456. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 3, 1, 1);
  2457. expectedResultLen = 0;
  2458. Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
  2459. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  2460. i++;
  2461. addMultiByteCharRightPadded1_2(multiByte);
  2462. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
  2463. StringExpr.rightTrimAndTruncate(outV, i, multiByte, 3, 2, 1);
  2464. expectedResultLen = 1;
  2465. Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
  2466. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  2467. i++;
  2468. sentenceOne = new byte[100];
  2469. sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
  2470. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
  2471. StringExpr.rightTrimAndTruncate(outV, i, sentenceOne, 0, sentenceOneLen, 7);
  2472. expectedResultLen = sentenceOneLen - 9;
  2473. Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
  2474. i++;
  2475. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
  2476. StringExpr.rightTrimAndTruncate(outV, i, sentenceOne, 0, sentenceOneLen - 3, 6);
  2477. expectedResultLen = 13;
  2478. Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
  2479. i++;
  2480. sentenceTwo = new byte[100];
  2481. sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
  2482. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
  2483. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwo, 0, sentenceTwoLen, 13);
  2484. expectedResultLen = sentenceTwoLen;
  2485. Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
  2486. i++;
  2487. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
  2488. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwo, 0, sentenceTwoLen - 5, 10);
  2489. expectedResultLen = sentenceTwoLen - 5;
  2490. Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
  2491. i++;
  2492. // Left pad longer strings with multi-byte characters.
  2493. sentenceOnePaddedLeft = new byte[100];
  2494. start = addPads(sentenceOnePaddedLeft, 0, 3);
  2495. sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
  2496. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
  2497. StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, 3 + 8);
  2498. expectedResultLen = sentenceOnePaddedLeftLen - 6;
  2499. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
  2500. i++;
  2501. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
  2502. StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, 3 + 6);
  2503. expectedResultLen = 16;
  2504. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
  2505. i++;
  2506. sentenceTwoPaddedLeft = new byte[100];
  2507. start = addPads(sentenceTwoPaddedLeft, 0, 2);
  2508. sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
  2509. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
  2510. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, 7);
  2511. expectedResultLen = 10;
  2512. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
  2513. i++;
  2514. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
  2515. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, 6);
  2516. expectedResultLen = 10;
  2517. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
  2518. i++;
  2519. // Right pad longer strings with multi-byte characters.
  2520. sentenceOnePaddedRight = new byte[100];
  2521. start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
  2522. sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
  2523. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
  2524. StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, 10);
  2525. expectedResultLen = sentenceOnePaddedRightLen - 4;
  2526. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
  2527. i++;
  2528. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
  2529. StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, 7);
  2530. expectedResultLen = 17;
  2531. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
  2532. i++;
  2533. sentenceTwoPaddedRight = new byte[100];
  2534. start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
  2535. sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
  2536. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
  2537. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, 13);
  2538. expectedResultLen = sentenceTwoPaddedRightLen - 1;
  2539. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
  2540. i++;
  2541. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
  2542. StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, 4);
  2543. expectedResultLen = 8;
  2544. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
  2545. i++;
  2546. // Multi-byte characters with blank ranges.
  2547. sentenceBlankRanges = new byte[100];
  2548. sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  2549. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  2550. StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen, 4);
  2551. expectedResultLen = 7;
  2552. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  2553. i++;
  2554. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  2555. StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, 6);
  2556. expectedResultLen = 11;
  2557. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  2558. i++;
  2559. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 4, 12) == 8);
  2560. StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 4, 12, 6);
  2561. expectedResultLen = 7;
  2562. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 4, expectedResultLen));
  2563. Assert.assertTrue(vectorCharacterCount(outV, i) == 5);
  2564. i++;
  2565. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  2566. StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 7, 17, 11);
  2567. expectedResultLen = 12;
  2568. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen));
  2569. Assert.assertTrue(vectorCharacterCount(outV, i) == 8);
  2570. i++;
  2571. }
  2572. @Test
  2573. // Test basic truncate to vector.
  2574. public void testRightTrimAndTruncateScalar() {
  2575. int largeMaxLength = 100;
  2576. byte[] result;
  2577. // No truncate (ASCII) -- maximum length large.
  2578. Assert.assertTrue(StringExpr.characterCount(blue) == 4);
  2579. result = StringExpr.rightTrimAndTruncateScalar(blue, largeMaxLength);
  2580. Assert.assertTrue(Arrays.equals(blue, result));
  2581. Assert.assertTrue(StringExpr.characterCount(redgreen) == 8);
  2582. result = StringExpr.rightTrimAndTruncateScalar(redgreen, largeMaxLength);
  2583. Assert.assertTrue(Arrays.equals(redgreen, result));
  2584. Assert.assertTrue(StringExpr.characterCount(ascii_sentence) == 31);
  2585. result = StringExpr.rightTrimAndTruncateScalar(ascii_sentence, largeMaxLength);
  2586. Assert.assertTrue(Arrays.equals(ascii_sentence, result));
  2587. Assert.assertTrue(StringExpr.characterCount(blanksLeft) == 5);
  2588. result = StringExpr.rightTrimAndTruncateScalar(blanksLeft, largeMaxLength);
  2589. Assert.assertTrue(Arrays.equals(blanksLeft, result));
  2590. // No truncate (ASCII) -- same maximum length.
  2591. Assert.assertTrue(StringExpr.characterCount(blue) == 4);
  2592. result = StringExpr.rightTrimAndTruncateScalar(blue, blue.length);
  2593. Assert.assertTrue(Arrays.equals(blue, result));
  2594. Assert.assertTrue(StringExpr.characterCount(redgreen) == 8);
  2595. result = StringExpr.rightTrimAndTruncateScalar(redgreen, redgreen.length);
  2596. Assert.assertTrue(Arrays.equals(redgreen, result));
  2597. Assert.assertTrue(StringExpr.characterCount(ascii_sentence) == 31);
  2598. result = StringExpr.rightTrimAndTruncateScalar(ascii_sentence, ascii_sentence.length);
  2599. Assert.assertTrue(Arrays.equals(ascii_sentence, result));
  2600. Assert.assertTrue(StringExpr.characterCount(blanksLeft) == 5);
  2601. result = StringExpr.rightTrimAndTruncateScalar(blanksLeft, blanksLeft.length);
  2602. Assert.assertTrue(Arrays.equals(blanksLeft, result));
  2603. // Simple truncation.
  2604. result = StringExpr.rightTrimAndTruncateScalar(blue, 3);
  2605. Assert.assertTrue(Arrays.equals(Arrays.copyOf(blue, 3), result));
  2606. result = StringExpr.rightTrimAndTruncateScalar(redgreen, 6);
  2607. Assert.assertTrue(Arrays.equals(Arrays.copyOf(redgreen, 6), result));
  2608. result = StringExpr.rightTrimAndTruncateScalar(ascii_sentence, 14);
  2609. Assert.assertTrue(Arrays.equals(Arrays.copyOf(ascii_sentence, 14), result));
  2610. result = StringExpr.rightTrimAndTruncateScalar(blanksLeft, 2);
  2611. Assert.assertTrue(Arrays.equals(Arrays.copyOf(blanksLeft, 0), result));
  2612. result = StringExpr.rightTrimAndTruncateScalar(blanksRight, 4);
  2613. Assert.assertTrue(Arrays.equals(Arrays.copyOf(blanksRight, 3), result));
  2614. result = StringExpr.rightTrimAndTruncateScalar(blanksBoth, 2);
  2615. Assert.assertTrue(Arrays.equals(Arrays.copyOf(blanksBoth, 0), result));
  2616. result = StringExpr.rightTrimAndTruncateScalar(blankString, 1);
  2617. Assert.assertTrue(Arrays.equals(Arrays.copyOf(blankString, 0), result));
  2618. result = StringExpr.rightTrimAndTruncateScalar(blankRanges, 29);
  2619. Assert.assertTrue(Arrays.equals(Arrays.copyOf(blankRanges, 26), result));
  2620. // Multi-byte truncation.
  2621. byte[] scratch = new byte[100];
  2622. byte[] multiByte;
  2623. addMultiByteCharRightPadded1_1(scratch);
  2624. multiByte = Arrays.copyOf(scratch, 4);
  2625. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  2626. result = StringExpr.rightTrimAndTruncateScalar(multiByte, 1);
  2627. Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 3), result));
  2628. addMultiByteCharRightPadded1_2(scratch);
  2629. multiByte = Arrays.copyOf(scratch, 5);
  2630. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  2631. result = StringExpr.rightTrimAndTruncateScalar(multiByte, 2);
  2632. Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 4), result));
  2633. addMultiByteCharRightPadded1_3(scratch);
  2634. multiByte = Arrays.copyOf(scratch, 9);
  2635. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  2636. result = StringExpr.rightTrimAndTruncateScalar(multiByte, 2);
  2637. Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 5), result));
  2638. addMultiByteCharRightPadded1_2(scratch);
  2639. multiByte = Arrays.copyOfRange(scratch, 3, 3 + 2);
  2640. Assert.assertTrue(StringExpr.characterCount(multiByte) == 2);
  2641. result = StringExpr.rightTrimAndTruncateScalar(multiByte, 1);
  2642. Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 1), result));
  2643. int sentenceOneLen = addMultiByteCharSentenceOne(scratch, 0);
  2644. byte[] sentenceOne = Arrays.copyOf(scratch, sentenceOneLen);
  2645. Assert.assertTrue(StringExpr.characterCount(sentenceOne) == 10);
  2646. result = StringExpr.rightTrimAndTruncateScalar(sentenceOne, 8);
  2647. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOne, 20), result));
  2648. byte[] sentenceOnePortion = Arrays.copyOf(sentenceOne, sentenceOneLen - 3);
  2649. Assert.assertTrue(StringExpr.characterCount(sentenceOnePortion) == 9);
  2650. result = StringExpr.rightTrimAndTruncateScalar(sentenceOnePortion, 3);
  2651. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePortion, 9), result));
  2652. int sentenceTwoLen = addMultiByteCharSentenceTwo(scratch, 0);
  2653. byte[] sentenceTwo = Arrays.copyOf(scratch, sentenceTwoLen);
  2654. Assert.assertTrue(StringExpr.characterCount(sentenceTwo) == 13);
  2655. result = StringExpr.rightTrimAndTruncateScalar(sentenceTwo, 9);
  2656. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwo, 16), result));
  2657. byte[] sentenceTwoPortion = Arrays.copyOf(sentenceTwo, sentenceTwoLen - 5);
  2658. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPortion) == 10);
  2659. result = StringExpr.rightTrimAndTruncateScalar(sentenceTwoPortion, 6);
  2660. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPortion, 11), result));
  2661. int start;
  2662. // Left pad longer strings with multi-byte characters.
  2663. start = addPads(scratch, 0, 3);
  2664. int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(scratch, start);
  2665. byte[] sentenceOnePaddedLeft = Arrays.copyOf(scratch, sentenceOnePaddedLeftLen);
  2666. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft) == 3 + 10);
  2667. result = StringExpr.rightTrimAndTruncateScalar(sentenceOnePaddedLeft, 4);
  2668. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedLeft, 6), result));
  2669. byte[] sentenceOnePaddedLeftPortion = Arrays.copyOf(sentenceOnePaddedLeft, sentenceOnePaddedLeftLen - 3);
  2670. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeftPortion) == 3 + 9);
  2671. result = StringExpr.rightTrimAndTruncateScalar(sentenceOnePaddedLeftPortion, 7);
  2672. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedLeftPortion, 12), result));
  2673. start = addPads(scratch, 0, 2);
  2674. int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(scratch, start);
  2675. byte[] sentenceTwoPaddedLeft = Arrays.copyOf(scratch, sentenceTwoPaddedLeftLen);
  2676. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft) == 2 + 13);
  2677. result = StringExpr.rightTrimAndTruncateScalar(sentenceTwoPaddedLeft, 14);
  2678. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedLeft, 24), result));
  2679. byte[] sentenceTwoPaddedLeftPortion = Arrays.copyOf(sentenceTwoPaddedLeft, sentenceTwoPaddedLeftLen - 5);
  2680. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeftPortion) == 2 + 10);
  2681. result = StringExpr.rightTrimAndTruncateScalar(sentenceTwoPaddedLeftPortion, 9);
  2682. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedLeftPortion, 15), result));
  2683. // Right pad longer strings with multi-byte characters.
  2684. start = addMultiByteCharSentenceOne(scratch, 0);
  2685. int sentenceOnePaddedRightLen = addPads(scratch, start, 4);
  2686. byte[] sentenceOnePaddedRight = Arrays.copyOf(scratch, sentenceOnePaddedRightLen);
  2687. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight) == 10 + 4);
  2688. result = StringExpr.rightTrimAndTruncateScalar(sentenceOnePaddedRight, 1);
  2689. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedRight, 3), result));
  2690. byte[] sentenceOnePaddedRightPortion = Arrays.copyOf(sentenceOnePaddedRight, sentenceOnePaddedRightLen - 3 - 4);
  2691. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRightPortion) == 9);
  2692. result = StringExpr.rightTrimAndTruncateScalar(sentenceOnePaddedRightPortion, 5);
  2693. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedRightPortion, 13), result));
  2694. start = addMultiByteCharSentenceTwo(scratch, 0);
  2695. int sentenceTwoPaddedRightLen = addPads(scratch, start, 1);
  2696. byte[] sentenceTwoPaddedRight = Arrays.copyOf(scratch, sentenceTwoPaddedRightLen);
  2697. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight) == 13 + 1);
  2698. result = StringExpr.rightTrimAndTruncateScalar(sentenceTwoPaddedRight, 6);
  2699. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedRight, 11), result));
  2700. byte[] sentenceTwoPaddedRightPortion = Arrays.copyOf(sentenceTwoPaddedRight, sentenceTwoPaddedRightLen - 5 - 1);
  2701. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRightPortion) == 10);
  2702. result = StringExpr.rightTrimAndTruncateScalar(sentenceTwoPaddedRightPortion, 8);
  2703. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedRightPortion, 13), result));
  2704. // Multi-byte characters with blank ranges.
  2705. int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(scratch, 0);
  2706. byte[] sentenceBlankRanges = Arrays.copyOf(scratch, sentenceBlankRangesLen);
  2707. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges) == 17);
  2708. result = StringExpr.rightTrimAndTruncateScalar(sentenceBlankRanges, 4);
  2709. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceBlankRanges, 7), result));
  2710. byte[] sentenceBlankRangesPortion = Arrays.copyOf(sentenceBlankRanges, sentenceBlankRangesLen - 3);
  2711. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRangesPortion) == 16);
  2712. result = StringExpr.rightTrimAndTruncateScalar(sentenceBlankRangesPortion, 14);
  2713. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceBlankRangesPortion, 19), result));
  2714. sentenceBlankRangesPortion = Arrays.copyOfRange(sentenceBlankRanges, 7, 7 + 17);
  2715. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRangesPortion) == 13);
  2716. result = StringExpr.rightTrimAndTruncateScalar(sentenceBlankRangesPortion, 11);
  2717. Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceBlankRangesPortion, 12), result));
  2718. Assert.assertTrue(StringExpr.characterCount(result) == 8);
  2719. }
  2720. @Test
  2721. // Load a BytesColumnVector by copying in large data, enough to force
  2722. // the buffer to expand.
  2723. public void testLoadBytesColumnVectorByValueLargeData() {
  2724. BytesColumnVector bcv = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  2725. bcv.initBuffer(10); // initialize with estimated element size 10
  2726. // Record initial buffer size
  2727. int initialBufferSize = bcv.bufferSize();
  2728. String s = "0123456789";
  2729. while (s.length() < 500) {
  2730. s += s;
  2731. }
  2732. byte[] b = s.getBytes(StandardCharsets.UTF_8);
  2733. for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) {
  2734. bcv.setVal(i, b, 0, b.length);
  2735. }
  2736. // Current buffer size should be larger than initial size
  2737. Assert.assertTrue(bcv.bufferSize() > initialBufferSize);
  2738. }
  2739. @Test
  2740. // set values by reference, copy the data out, and verify equality
  2741. public void testLoadBytesColumnVectorByRef() {
  2742. BytesColumnVector bcv = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  2743. String s = "red";
  2744. byte[] b = s.getBytes(StandardCharsets.UTF_8);
  2745. for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) {
  2746. bcv.setRef(i, b, 0, b.length);
  2747. }
  2748. // verify
  2749. byte[] v = new byte[b.length];
  2750. for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) {
  2751. Assert.assertTrue(bcv.length[i] == b.length);
  2752. System.arraycopy(bcv.vector[i], bcv.start[i], v, 0, b.length);
  2753. Assert.assertTrue(Arrays.equals(b, v));
  2754. }
  2755. }
  2756. @Test
  2757. // Test string column to string literal comparison
  2758. public void testStringColCompareStringScalarFilter() throws HiveException {
  2759. VectorizedRowBatch batch = makeStringBatch();
  2760. VectorExpression expr;
  2761. expr = new FilterStringGroupColEqualStringScalar(0, red2);
  2762. expr.evaluate(batch);
  2763. // only red qualifies, and it's in entry 0
  2764. Assert.assertTrue(batch.size == 1);
  2765. Assert.assertTrue(batch.selected[0] == 0);
  2766. batch = makeStringBatch();
  2767. expr = new FilterStringGroupColLessStringScalar(0, red2);
  2768. expr.evaluate(batch);
  2769. // only green qualifies, and it's in entry 1
  2770. Assert.assertTrue(batch.size == 1);
  2771. Assert.assertTrue(batch.selected[0] == 1);
  2772. batch = makeStringBatch();
  2773. expr = new FilterStringGroupColGreaterEqualStringScalar(0, green);
  2774. expr.evaluate(batch);
  2775. // green and red qualify
  2776. Assert.assertTrue(batch.size == 2);
  2777. Assert.assertTrue(batch.selected[0] == 0);
  2778. Assert.assertTrue(batch.selected[1] == 1);
  2779. }
  2780. @Test
  2781. // Test string column to CHAR literal comparison
  2782. public void testStringColCompareCharScalarFilter() throws HiveException {
  2783. VectorizedRowBatch batch = makeStringBatch();
  2784. VectorExpression expr;
  2785. expr =
  2786. new FilterStringGroupColEqualCharScalar(
  2787. 0, new HiveChar(new String(red2), 10).getStrippedValue().getBytes());
  2788. expr.evaluate(batch);
  2789. // only red qualifies, and it's in entry 0
  2790. Assert.assertTrue(batch.size == 1);
  2791. Assert.assertTrue(batch.selected[0] == 0);
  2792. batch = makeStringBatch();
  2793. expr =
  2794. new FilterStringGroupColLessCharScalar(
  2795. 0, new HiveChar(new String(red2), 8).getStrippedValue().getBytes());
  2796. expr.evaluate(batch);
  2797. // only green qualifies, and it's in entry 1
  2798. Assert.assertTrue(batch.size == 1);
  2799. Assert.assertTrue(batch.selected[0] == 1);
  2800. batch = makeStringBatch();
  2801. expr =
  2802. new FilterStringGroupColGreaterEqualCharScalar(
  2803. 0, new HiveChar(new String(green), 12).getStrippedValue().getBytes());
  2804. expr.evaluate(batch);
  2805. // green and red qualify
  2806. Assert.assertTrue(batch.size == 2);
  2807. Assert.assertTrue(batch.selected[0] == 0);
  2808. Assert.assertTrue(batch.selected[1] == 1);
  2809. }
  2810. @Test
  2811. // Test string column to VARCHAR literal comparison
  2812. public void testStringColCompareVarCharScalarFilter() throws HiveException {
  2813. VectorizedRowBatch batch = makeStringBatch();
  2814. VectorExpression expr;
  2815. expr =
  2816. new FilterStringGroupColEqualVarCharScalar(
  2817. 0, new HiveVarchar(new String(red2), 10).getValue().getBytes());
  2818. expr.evaluate(batch);
  2819. // only red qualifies, and it's in entry 0
  2820. Assert.assertTrue(batch.size == 1);
  2821. Assert.assertTrue(batch.selected[0] == 0);
  2822. batch = makeStringBatch();
  2823. expr =
  2824. new FilterStringGroupColLessVarCharScalar(
  2825. 0, new HiveVarchar(new String(red2), 8).getValue().getBytes());
  2826. expr.evaluate(batch);
  2827. // only green qualifies, and it's in entry 1
  2828. Assert.assertTrue(batch.size == 1);
  2829. Assert.assertTrue(batch.selected[0] == 1);
  2830. batch = makeStringBatch();
  2831. expr =
  2832. new FilterStringGroupColGreaterEqualVarCharScalar(
  2833. 0, new HiveVarchar(new String(green), 12).getValue().getBytes());
  2834. expr.evaluate(batch);
  2835. // green and red qualify
  2836. Assert.assertTrue(batch.size == 2);
  2837. Assert.assertTrue(batch.selected[0] == 0);
  2838. Assert.assertTrue(batch.selected[1] == 1);
  2839. }
  2840. @Test
  2841. public void testStringColCompareStringScalarProjection() throws HiveException {
  2842. VectorizedRowBatch batch = makeStringBatch();
  2843. VectorExpression expr;
  2844. expr = new StringGroupColEqualStringScalar(0, red2, 2);
  2845. expr.evaluate(batch);
  2846. Assert.assertEquals(3, batch.size);
  2847. LongColumnVector outVector = (LongColumnVector) batch.cols[2];
  2848. Assert.assertEquals(1, outVector.vector[0]);
  2849. Assert.assertEquals(0, outVector.vector[1]);
  2850. Assert.assertEquals(0, outVector.vector[2]);
  2851. batch = makeStringBatch();
  2852. expr = new StringGroupColEqualStringScalar(0, green, 2);
  2853. expr.evaluate(batch);
  2854. Assert.assertEquals(3, batch.size);
  2855. outVector = (LongColumnVector) batch.cols[2];
  2856. Assert.assertEquals(0, outVector.vector[0]);
  2857. Assert.assertEquals(1, outVector.vector[1]);
  2858. Assert.assertEquals(0, outVector.vector[2]);
  2859. }
  2860. @Test
  2861. public void testStringColCompareCharScalarProjection() throws HiveException {
  2862. VectorizedRowBatch batch = makeStringBatch();
  2863. VectorExpression expr;
  2864. expr =
  2865. new StringGroupColEqualCharScalar(
  2866. 0, new HiveChar(new String(red2), 8).getStrippedValue().getBytes(), 2);
  2867. expr.evaluate(batch);
  2868. Assert.assertEquals(3, batch.size);
  2869. LongColumnVector outVector = (LongColumnVector) batch.cols[2];
  2870. Assert.assertEquals(1, outVector.vector[0]);
  2871. Assert.assertEquals(0, outVector.vector[1]);
  2872. Assert.assertEquals(0, outVector.vector[2]);
  2873. batch = makeStringBatch();
  2874. expr =
  2875. new StringGroupColEqualCharScalar(
  2876. 0, new HiveChar(new String(green), 10).getStrippedValue().getBytes(), 2);
  2877. expr.evaluate(batch);
  2878. Assert.assertEquals(3, batch.size);
  2879. outVector = (LongColumnVector) batch.cols[2];
  2880. Assert.assertEquals(0, outVector.vector[0]);
  2881. Assert.assertEquals(1, outVector.vector[1]);
  2882. Assert.assertEquals(0, outVector.vector[2]);
  2883. }
  2884. @Test
  2885. public void testStringColCompareVarCharScalarProjection() throws HiveException {
  2886. VectorizedRowBatch batch = makeStringBatch();
  2887. VectorExpression expr;
  2888. expr =
  2889. new StringGroupColEqualVarCharScalar(
  2890. 0, new HiveVarchar(new String(red2), 8).getValue().getBytes(), 2);
  2891. expr.evaluate(batch);
  2892. Assert.assertEquals(3, batch.size);
  2893. LongColumnVector outVector = (LongColumnVector) batch.cols[2];
  2894. Assert.assertEquals(1, outVector.vector[0]);
  2895. Assert.assertEquals(0, outVector.vector[1]);
  2896. Assert.assertEquals(0, outVector.vector[2]);
  2897. batch = makeStringBatch();
  2898. expr =
  2899. new StringGroupColEqualVarCharScalar(
  2900. 0, new HiveVarchar(new String(green), 10).getValue().getBytes(), 2);
  2901. expr.evaluate(batch);
  2902. Assert.assertEquals(3, batch.size);
  2903. outVector = (LongColumnVector) batch.cols[2];
  2904. Assert.assertEquals(0, outVector.vector[0]);
  2905. Assert.assertEquals(1, outVector.vector[1]);
  2906. Assert.assertEquals(0, outVector.vector[2]);
  2907. }
  2908. @Test
  2909. // Test string literal to string column comparison
  2910. public void testStringScalarCompareStringCol() throws HiveException {
  2911. VectorizedRowBatch batch = makeStringBatch();
  2912. VectorExpression expr;
  2913. expr = new FilterStringScalarEqualStringGroupColumn(red2, 0);
  2914. expr.evaluate(batch);
  2915. // only red qualifies, and it's in entry 0
  2916. Assert.assertTrue(batch.size == 1);
  2917. Assert.assertTrue(batch.selected[0] == 0);
  2918. batch = makeStringBatch();
  2919. expr = new FilterStringScalarGreaterStringGroupColumn(red2, 0);
  2920. expr.evaluate(batch);
  2921. // only green qualifies, and it's in entry 1
  2922. Assert.assertTrue(batch.size == 1);
  2923. Assert.assertTrue(batch.selected[0] == 1);
  2924. batch = makeStringBatch();
  2925. expr = new FilterStringScalarLessEqualStringGroupColumn(green, 0);
  2926. expr.evaluate(batch);
  2927. // green and red qualify
  2928. Assert.assertTrue(batch.size == 2);
  2929. Assert.assertTrue(batch.selected[0] == 0);
  2930. Assert.assertTrue(batch.selected[1] == 1);
  2931. }
  2932. @Test
  2933. // Test CHAR literal to string column comparison
  2934. public void testCharScalarCompareStringCol() throws HiveException {
  2935. VectorizedRowBatch batch = makeStringBatch();
  2936. VectorExpression expr;
  2937. expr =
  2938. new FilterCharScalarEqualStringGroupColumn(
  2939. new HiveChar(new String(red2), 8).getStrippedValue().getBytes(), 0);
  2940. expr.evaluate(batch);
  2941. // only red qualifies, and it's in entry 0
  2942. Assert.assertTrue(batch.size == 1);
  2943. Assert.assertTrue(batch.selected[0] == 0);
  2944. batch = makeStringBatch();
  2945. expr =
  2946. new FilterCharScalarGreaterStringGroupColumn(
  2947. new HiveChar(new String(red2), 8).getStrippedValue().getBytes(), 0);
  2948. expr.evaluate(batch);
  2949. // only green qualifies, and it's in entry 1
  2950. Assert.assertTrue(batch.size == 1);
  2951. Assert.assertTrue(batch.selected[0] == 1);
  2952. batch = makeStringBatch();
  2953. expr =
  2954. new FilterCharScalarLessEqualStringGroupColumn(
  2955. new HiveChar(new String(green), 10).getStrippedValue().getBytes(), 0);
  2956. expr.evaluate(batch);
  2957. // green and red qualify
  2958. Assert.assertTrue(batch.size == 2);
  2959. Assert.assertTrue(batch.selected[0] == 0);
  2960. Assert.assertTrue(batch.selected[1] == 1);
  2961. }
  2962. @Test
  2963. // Test VARCHAR literal to string column comparison
  2964. public void testVarCharScalarCompareStringCol() throws HiveException {
  2965. VectorizedRowBatch batch = makeStringBatch();
  2966. VectorExpression expr;
  2967. expr =
  2968. new FilterVarCharScalarEqualStringGroupColumn(
  2969. new HiveVarchar(new String(red2), 8).getValue().getBytes(), 0);
  2970. expr.evaluate(batch);
  2971. // only red qualifies, and it's in entry 0
  2972. Assert.assertTrue(batch.size == 1);
  2973. Assert.assertTrue(batch.selected[0] == 0);
  2974. batch = makeStringBatch();
  2975. expr =
  2976. new FilterVarCharScalarGreaterStringGroupColumn(
  2977. new HiveVarchar(new String(red2), 8).getValue().getBytes(), 0);
  2978. expr.evaluate(batch);
  2979. // only green qualifies, and it's in entry 1
  2980. Assert.assertTrue(batch.size == 1);
  2981. Assert.assertTrue(batch.selected[0] == 1);
  2982. batch = makeStringBatch();
  2983. expr =
  2984. new FilterVarCharScalarLessEqualStringGroupColumn(
  2985. new HiveVarchar(new String(green), 10).getValue().getBytes(), 0);
  2986. expr.evaluate(batch);
  2987. // green and red qualify
  2988. Assert.assertTrue(batch.size == 2);
  2989. Assert.assertTrue(batch.selected[0] == 0);
  2990. Assert.assertTrue(batch.selected[1] == 1);
  2991. }
  2992. @Test
  2993. public void testStringScalarCompareStringColProjection() throws HiveException {
  2994. VectorizedRowBatch batch = makeStringBatch();
  2995. VectorExpression expr;
  2996. expr = new StringScalarEqualStringGroupColumn(red2, 0, 2);
  2997. expr.evaluate(batch);
  2998. Assert.assertEquals(3, batch.size);
  2999. LongColumnVector outVector = (LongColumnVector) batch.cols[2];
  3000. Assert.assertEquals(1, outVector.vector[0]);
  3001. Assert.assertEquals(0, outVector.vector[1]);
  3002. Assert.assertEquals(0, outVector.vector[2]);
  3003. batch = makeStringBatch();
  3004. expr = new StringScalarEqualStringGroupColumn(green, 0, 2);
  3005. expr.evaluate(batch);
  3006. Assert.assertEquals(3, batch.size);
  3007. outVector = (LongColumnVector) batch.cols[2];
  3008. Assert.assertEquals(0, outVector.vector[0]);
  3009. Assert.assertEquals(1, outVector.vector[1]);
  3010. Assert.assertEquals(0, outVector.vector[2]);
  3011. }
  3012. @Test
  3013. public void testCharScalarCompareStringColProjection() throws HiveException {
  3014. VectorizedRowBatch batch = makeStringBatch();
  3015. VectorExpression expr;
  3016. expr =
  3017. new CharScalarEqualStringGroupColumn(
  3018. new HiveChar(new String(red2), 8).getStrippedValue().getBytes(), 0, 2);
  3019. expr.evaluate(batch);
  3020. Assert.assertEquals(3, batch.size);
  3021. LongColumnVector outVector = (LongColumnVector) batch.cols[2];
  3022. Assert.assertEquals(1, outVector.vector[0]);
  3023. Assert.assertEquals(0, outVector.vector[1]);
  3024. Assert.assertEquals(0, outVector.vector[2]);
  3025. batch = makeStringBatch();
  3026. expr =
  3027. new CharScalarEqualStringGroupColumn(
  3028. new HiveChar(new String(green), 10).getStrippedValue().getBytes(), 0, 2);
  3029. expr.evaluate(batch);
  3030. Assert.assertEquals(3, batch.size);
  3031. outVector = (LongColumnVector) batch.cols[2];
  3032. Assert.assertEquals(0, outVector.vector[0]);
  3033. Assert.assertEquals(1, outVector.vector[1]);
  3034. Assert.assertEquals(0, outVector.vector[2]);
  3035. }
  3036. @Test
  3037. public void testVarCharScalarCompareStringColProjection() throws HiveException {
  3038. VectorizedRowBatch batch = makeStringBatch();
  3039. VectorExpression expr;
  3040. expr =
  3041. new VarCharScalarEqualStringGroupColumn(
  3042. new HiveVarchar(new String(red2), 8).getValue().getBytes(), 0, 2);
  3043. expr.evaluate(batch);
  3044. Assert.assertEquals(3, batch.size);
  3045. LongColumnVector outVector = (LongColumnVector) batch.cols[2];
  3046. Assert.assertEquals(1, outVector.vector[0]);
  3047. Assert.assertEquals(0, outVector.vector[1]);
  3048. Assert.assertEquals(0, outVector.vector[2]);
  3049. batch = makeStringBatch();
  3050. expr =
  3051. new VarCharScalarEqualStringGroupColumn(
  3052. new HiveVarchar(new String(green), 10).getValue().getBytes(), 0, 2);
  3053. expr.evaluate(batch);
  3054. Assert.assertEquals(3, batch.size);
  3055. outVector = (LongColumnVector) batch.cols[2];
  3056. Assert.assertEquals(0, outVector.vector[0]);
  3057. Assert.assertEquals(1, outVector.vector[1]);
  3058. Assert.assertEquals(0, outVector.vector[2]);
  3059. }
  3060. @Test
  3061. public void testStringColCompareStringColFilter() throws HiveException {
  3062. VectorizedRowBatch batch;
  3063. VectorExpression expr;
  3064. /* input data
  3065. *
  3066. * col0 col1
  3067. * ===============
  3068. * blue red
  3069. * green green
  3070. * red blue
  3071. * NULL red col0 data is empty string if we un-set NULL property
  3072. */
  3073. // nulls possible on left, right
  3074. batch = makeStringBatchForColColCompare();
  3075. expr = new FilterStringGroupColLessStringGroupColumn(0,1);
  3076. expr.evaluate(batch);
  3077. Assert.assertEquals(1, batch.size);
  3078. Assert.assertEquals(0, batch.selected[0]);
  3079. // no nulls possible
  3080. batch = makeStringBatchForColColCompare();
  3081. batch.cols[0].noNulls = true;
  3082. batch.cols[1].noNulls = true;
  3083. expr.evaluate(batch);
  3084. Assert.assertEquals(2, batch.size);
  3085. Assert.assertEquals(3, batch.selected[1]);
  3086. // nulls on left, no nulls on right
  3087. batch = makeStringBatchForColColCompare();
  3088. batch.cols[1].noNulls = true;
  3089. expr.evaluate(batch);
  3090. Assert.assertEquals(1, batch.size);
  3091. Assert.assertEquals(0, batch.selected[0]);
  3092. // nulls on right, no nulls on left
  3093. batch = makeStringBatchForColColCompare();
  3094. batch.cols[0].noNulls = true;
  3095. batch.cols[1].isNull[3] = true;
  3096. expr.evaluate(batch);
  3097. Assert.assertEquals(1, batch.size);
  3098. Assert.assertEquals(0, batch.selected[0]);
  3099. // Now vary isRepeating
  3100. // nulls possible on left, right
  3101. // left repeats
  3102. batch = makeStringBatchForColColCompare();
  3103. batch.cols[0].isRepeating = true;
  3104. expr.evaluate(batch);
  3105. Assert.assertEquals(3, batch.size);
  3106. Assert.assertEquals(3, batch.selected[2]);
  3107. // right repeats
  3108. batch = makeStringBatchForColColCompare();
  3109. batch.cols[1].isRepeating = true;
  3110. expr.evaluate(batch);
  3111. Assert.assertEquals(2, batch.size); // first 2 qualify
  3112. Assert.assertEquals(1, batch.selected[1]);
  3113. // left and right repeat
  3114. batch = makeStringBatchForColColCompare();
  3115. batch.cols[0].isRepeating = true;
  3116. batch.cols[1].isRepeating = true;
  3117. expr.evaluate(batch);
  3118. Assert.assertEquals(4, batch.size);
  3119. // Now vary isRepeating
  3120. // nulls possible only on left
  3121. // left repeats
  3122. batch = makeStringBatchForColColCompare();
  3123. batch.cols[0].isRepeating = true;
  3124. batch.cols[1].noNulls = true;
  3125. expr.evaluate(batch);
  3126. Assert.assertEquals(3, batch.size);
  3127. Assert.assertEquals(3, batch.selected[2]);
  3128. // left repeats and is null
  3129. batch = makeStringBatchForColColCompare();
  3130. batch.cols[0].isRepeating = true;
  3131. batch.cols[1].noNulls = true;
  3132. batch.cols[0].isNull[0] = true;
  3133. expr.evaluate(batch);
  3134. Assert.assertEquals(0, batch.size);
  3135. // right repeats
  3136. batch = makeStringBatchForColColCompare();
  3137. batch.cols[1].isRepeating = true;
  3138. batch.cols[1].noNulls = true;
  3139. expr.evaluate(batch);
  3140. Assert.assertEquals(2, batch.size);
  3141. Assert.assertEquals(0, batch.selected[0]);
  3142. Assert.assertEquals(1, batch.selected[1]);
  3143. // left and right repeat
  3144. batch = makeStringBatchForColColCompare();
  3145. batch.cols[0].isRepeating = true;
  3146. batch.cols[1].isRepeating = true;
  3147. batch.cols[1].noNulls = true;
  3148. expr.evaluate(batch);
  3149. Assert.assertEquals(4, batch.size);
  3150. // Now vary isRepeating
  3151. // nulls possible only on right
  3152. // left repeats
  3153. batch = makeStringBatchForColColCompare();
  3154. batch.cols[0].isRepeating = true;
  3155. batch.cols[0].noNulls = true;
  3156. batch.cols[1].isNull[0] = true;
  3157. expr.evaluate(batch);
  3158. Assert.assertEquals(2, batch.size);
  3159. Assert.assertEquals(3, batch.selected[1]);
  3160. // right repeats
  3161. batch = makeStringBatchForColColCompare();
  3162. batch.cols[1].isRepeating = true;
  3163. batch.cols[0].noNulls = true;
  3164. expr.evaluate(batch);
  3165. Assert.assertEquals(3, batch.size);
  3166. Assert.assertEquals(3, batch.selected[2]);
  3167. // right repeats and is null
  3168. batch = makeStringBatchForColColCompare();
  3169. batch.cols[1].isRepeating = true;
  3170. batch.cols[0].noNulls = true;
  3171. batch.cols[1].isNull[0] = true;
  3172. expr.evaluate(batch);
  3173. Assert.assertEquals(0, batch.size);
  3174. // left and right repeat
  3175. batch = makeStringBatchForColColCompare();
  3176. batch.cols[0].isRepeating = true;
  3177. batch.cols[1].isRepeating = true;
  3178. batch.cols[0].noNulls = true;
  3179. expr.evaluate(batch);
  3180. Assert.assertEquals(4, batch.size);
  3181. // left and right repeat and right is null
  3182. batch = makeStringBatchForColColCompare();
  3183. batch.cols[0].isRepeating = true;
  3184. batch.cols[1].isRepeating = true;
  3185. batch.cols[0].noNulls = true;
  3186. batch.cols[1].isNull[0] = true;
  3187. expr.evaluate(batch);
  3188. Assert.assertEquals(0, batch.size);
  3189. }
  3190. @Test
  3191. public void testStringColCompareStringColProjection() throws HiveException {
  3192. VectorizedRowBatch batch;
  3193. VectorExpression expr;
  3194. long [] outVector;
  3195. /* input data
  3196. *
  3197. * col0 col1
  3198. * ===============
  3199. * blue red
  3200. * green green
  3201. * red blue
  3202. * NULL red col0 data is empty string if we un-set NULL property
  3203. */
  3204. // nulls possible on left, right
  3205. batch = makeStringBatchForColColCompare();
  3206. expr = new StringGroupColLessStringGroupColumn(0, 1, 3);
  3207. expr.evaluate(batch);
  3208. Assert.assertEquals(4, batch.size);
  3209. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3210. Assert.assertFalse(batch.cols[3].isNull[0]);
  3211. Assert.assertEquals(1, outVector[0]);
  3212. Assert.assertFalse(batch.cols[3].isNull[1]);
  3213. Assert.assertEquals(0, outVector[1]);
  3214. Assert.assertFalse(batch.cols[3].isNull[2]);
  3215. Assert.assertEquals(0, outVector[2]);
  3216. Assert.assertTrue(batch.cols[3].isNull[3]);
  3217. // no nulls possible
  3218. batch = makeStringBatchForColColCompare();
  3219. batch.cols[0].noNulls = true;
  3220. batch.cols[1].noNulls = true;
  3221. expr.evaluate(batch);
  3222. Assert.assertEquals(4, batch.size);
  3223. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3224. Assert.assertTrue(batch.cols[3].noNulls);
  3225. Assert.assertFalse(batch.cols[3].isNull[0]);
  3226. Assert.assertEquals(1, outVector[0]);
  3227. Assert.assertFalse(batch.cols[3].isNull[1]);
  3228. Assert.assertEquals(0, outVector[1]);
  3229. Assert.assertFalse(batch.cols[3].isNull[2]);
  3230. Assert.assertEquals(0, outVector[2]);
  3231. Assert.assertFalse(batch.cols[3].isNull[3]);
  3232. Assert.assertEquals(1, outVector[3]);
  3233. // nulls on left, no nulls on right
  3234. batch = makeStringBatchForColColCompare();
  3235. batch.cols[1].noNulls = true;
  3236. expr.evaluate(batch);
  3237. Assert.assertEquals(4, batch.size);
  3238. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3239. Assert.assertFalse(batch.cols[3].isNull[0]);
  3240. Assert.assertEquals(1, outVector[0]);
  3241. Assert.assertFalse(batch.cols[3].isNull[1]);
  3242. Assert.assertEquals(0, outVector[1]);
  3243. Assert.assertFalse(batch.cols[3].isNull[2]);
  3244. Assert.assertEquals(0, outVector[2]);
  3245. Assert.assertTrue(batch.cols[3].isNull[3]);
  3246. // nulls on right, no nulls on left
  3247. batch = makeStringBatchForColColCompare();
  3248. batch.cols[0].noNulls = true;
  3249. batch.cols[1].isNull[3] = true;
  3250. expr.evaluate(batch);
  3251. Assert.assertEquals(4, batch.size);
  3252. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3253. Assert.assertFalse(batch.cols[3].isNull[0]);
  3254. Assert.assertEquals(1, outVector[0]);
  3255. Assert.assertFalse(batch.cols[3].isNull[1]);
  3256. Assert.assertEquals(0, outVector[1]);
  3257. Assert.assertFalse(batch.cols[3].isNull[2]);
  3258. Assert.assertEquals(0, outVector[2]);
  3259. Assert.assertTrue(batch.cols[3].isNull[3]);
  3260. // Now vary isRepeating
  3261. // nulls possible on left, right
  3262. // left repeats
  3263. batch = makeStringBatchForColColCompare();
  3264. batch.cols[0].isRepeating = true;
  3265. expr.evaluate(batch);
  3266. Assert.assertEquals(4, batch.size);
  3267. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3268. Assert.assertFalse(batch.cols[3].isNull[0]);
  3269. Assert.assertEquals(1, outVector[0]);
  3270. Assert.assertFalse(batch.cols[3].isNull[1]);
  3271. Assert.assertEquals(1, outVector[1]);
  3272. Assert.assertFalse(batch.cols[3].isNull[2]);
  3273. Assert.assertEquals(0, outVector[2]);
  3274. Assert.assertFalse(batch.cols[3].isNull[3]);
  3275. Assert.assertEquals(1, outVector[3]);
  3276. // right repeats
  3277. batch = makeStringBatchForColColCompare();
  3278. batch.cols[1].isRepeating = true;
  3279. expr.evaluate(batch);
  3280. Assert.assertEquals(4, batch.size);
  3281. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3282. Assert.assertFalse(batch.cols[3].noNulls);
  3283. Assert.assertFalse(batch.cols[3].isNull[0]);
  3284. Assert.assertEquals(1, outVector[0]);
  3285. Assert.assertFalse(batch.cols[3].isNull[1]);
  3286. Assert.assertEquals(1, outVector[1]);
  3287. Assert.assertFalse(batch.cols[3].isNull[2]);
  3288. Assert.assertEquals(0, outVector[2]);
  3289. Assert.assertTrue(batch.cols[3].isNull[3]);
  3290. // left and right repeat
  3291. batch = makeStringBatchForColColCompare();
  3292. batch.cols[0].isRepeating = true;
  3293. batch.cols[1].isRepeating = true;
  3294. expr.evaluate(batch);
  3295. Assert.assertEquals(4, batch.size);
  3296. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3297. Assert.assertTrue(batch.cols[3].isRepeating);
  3298. Assert.assertFalse(batch.cols[3].isNull[0]);
  3299. Assert.assertEquals(1, outVector[0]);
  3300. // Now vary isRepeating
  3301. // nulls possible only on left
  3302. // left repeats
  3303. batch = makeStringBatchForColColCompare();
  3304. batch.cols[0].isRepeating = true;
  3305. batch.cols[1].noNulls = true;
  3306. expr.evaluate(batch);
  3307. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3308. Assert.assertEquals(4, batch.size);
  3309. Assert.assertFalse(batch.cols[3].isNull[0]);
  3310. Assert.assertEquals(1, outVector[0]);
  3311. Assert.assertFalse(batch.cols[3].isNull[1]);
  3312. Assert.assertEquals(1, outVector[1]);
  3313. Assert.assertFalse(batch.cols[3].isNull[2]);
  3314. Assert.assertEquals(0, outVector[2]);
  3315. Assert.assertFalse(batch.cols[3].isNull[3]);
  3316. Assert.assertEquals(1, outVector[3]);
  3317. // left repeats and is null
  3318. batch = makeStringBatchForColColCompare();
  3319. batch.cols[0].isRepeating = true;
  3320. batch.cols[1].noNulls = true;
  3321. batch.cols[0].isNull[0] = true;
  3322. expr.evaluate(batch);
  3323. Assert.assertEquals(4, batch.size);
  3324. Assert.assertFalse(batch.cols[3].noNulls);
  3325. Assert.assertTrue(batch.cols[3].isRepeating);
  3326. Assert.assertTrue(batch.cols[3].isNull[0]);
  3327. // right repeats
  3328. batch = makeStringBatchForColColCompare();
  3329. batch.cols[1].isRepeating = true;
  3330. batch.cols[1].noNulls = true;
  3331. expr.evaluate(batch);
  3332. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3333. Assert.assertEquals(4, batch.size);
  3334. Assert.assertFalse(batch.cols[3].noNulls);
  3335. Assert.assertFalse(batch.cols[3].isNull[0]);
  3336. Assert.assertEquals(1, outVector[0]);
  3337. Assert.assertFalse(batch.cols[3].isNull[1]);
  3338. Assert.assertEquals(1, outVector[1]);
  3339. Assert.assertFalse(batch.cols[3].isNull[2]);
  3340. Assert.assertEquals(0, outVector[2]);
  3341. Assert.assertTrue(batch.cols[3].isNull[3]);
  3342. // left and right repeat
  3343. batch = makeStringBatchForColColCompare();
  3344. batch.cols[0].isRepeating = true;
  3345. batch.cols[1].isRepeating = true;
  3346. batch.cols[1].noNulls = true;
  3347. expr.evaluate(batch);
  3348. Assert.assertEquals(4, batch.size);
  3349. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3350. Assert.assertTrue(batch.cols[3].isRepeating);
  3351. Assert.assertFalse(batch.cols[3].isNull[0]);
  3352. Assert.assertEquals(1, outVector[0]);
  3353. // Now vary isRepeating
  3354. // nulls possible only on right
  3355. // left repeats
  3356. batch = makeStringBatchForColColCompare();
  3357. batch.cols[0].isRepeating = true;
  3358. batch.cols[0].noNulls = true;
  3359. batch.cols[1].isNull[0] = true;
  3360. expr.evaluate(batch);
  3361. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3362. Assert.assertEquals(4, batch.size);
  3363. Assert.assertFalse(batch.cols[3].noNulls);
  3364. Assert.assertTrue(batch.cols[3].isNull[0]);
  3365. Assert.assertFalse(batch.cols[3].isNull[1]);
  3366. Assert.assertEquals(1, outVector[1]);
  3367. Assert.assertFalse(batch.cols[3].isNull[2]);
  3368. Assert.assertEquals(0, outVector[2]);
  3369. Assert.assertFalse(batch.cols[3].isNull[3]);
  3370. Assert.assertEquals(1, outVector[3]);
  3371. // right repeats
  3372. batch = makeStringBatchForColColCompare();
  3373. batch.cols[1].isRepeating = true;
  3374. batch.cols[0].noNulls = true;
  3375. expr.evaluate(batch);
  3376. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3377. Assert.assertEquals(4, batch.size);
  3378. Assert.assertFalse(batch.cols[3].noNulls);
  3379. Assert.assertFalse(batch.cols[3].isNull[0]);
  3380. Assert.assertEquals(1, outVector[0]);
  3381. Assert.assertFalse(batch.cols[3].isNull[1]);
  3382. Assert.assertEquals(1, outVector[1]);
  3383. Assert.assertFalse(batch.cols[3].isNull[2]);
  3384. Assert.assertEquals(0, outVector[2]);
  3385. Assert.assertFalse(batch.cols[3].isNull[3]);
  3386. Assert.assertEquals(1, outVector[3]);
  3387. // right repeats and is null
  3388. batch = makeStringBatchForColColCompare();
  3389. batch.cols[1].isRepeating = true;
  3390. batch.cols[0].noNulls = true;
  3391. batch.cols[1].isNull[0] = true;
  3392. expr.evaluate(batch);
  3393. Assert.assertEquals(4, batch.size);
  3394. Assert.assertFalse(batch.cols[3].noNulls);
  3395. Assert.assertTrue(batch.cols[3].isRepeating);
  3396. Assert.assertTrue(batch.cols[3].isNull[0]);
  3397. // left and right repeat
  3398. batch = makeStringBatchForColColCompare();
  3399. batch.cols[0].isRepeating = true;
  3400. batch.cols[1].isRepeating = true;
  3401. batch.cols[0].noNulls = true;
  3402. expr.evaluate(batch);
  3403. Assert.assertEquals(4, batch.size);
  3404. outVector = ((LongColumnVector) batch.cols[3]).vector;
  3405. Assert.assertTrue(batch.cols[3].isRepeating);
  3406. Assert.assertFalse(batch.cols[3].isNull[0]);
  3407. Assert.assertEquals(1, outVector[0]);
  3408. // left and right repeat and right is null
  3409. batch = makeStringBatchForColColCompare();
  3410. batch.cols[0].isRepeating = true;
  3411. batch.cols[1].isRepeating = true;
  3412. batch.cols[0].noNulls = true;
  3413. batch.cols[1].isNull[0] = true;
  3414. expr.evaluate(batch);
  3415. Assert.assertEquals(4, batch.size);
  3416. Assert.assertFalse(batch.cols[3].noNulls);
  3417. Assert.assertTrue(batch.cols[3].isRepeating);
  3418. Assert.assertTrue(batch.cols[3].isNull[0]);
  3419. }
  3420. VectorizedRowBatch makeStringBatch() {
  3421. // create a batch with one string ("Bytes") column
  3422. VectorizedRowBatch batch = new VectorizedRowBatch(3);
  3423. BytesColumnVector v = new BytesColumnVector();
  3424. batch.cols[0] = v;
  3425. batch.cols[1] = new BytesColumnVector(); // to hold output if needed
  3426. batch.cols[2] = new LongColumnVector(batch.size); // to hold boolean output
  3427. /*
  3428. * Add these 3 values:
  3429. *
  3430. * red
  3431. * green
  3432. * NULL
  3433. */
  3434. v.setRef(0, red, 0, red.length);
  3435. v.isNull[0] = false;
  3436. v.setRef(1, green, 0, green.length);
  3437. v.isNull[1] = false;
  3438. v.setRef(2, emptyString, 0, emptyString.length);
  3439. v.isNull[2] = true;
  3440. v.noNulls = false;
  3441. batch.size = 3;
  3442. return batch;
  3443. }
  3444. VectorizedRowBatch makeStringBatchMixedCase() {
  3445. // create a batch with two string ("Bytes") columns
  3446. VectorizedRowBatch batch = new VectorizedRowBatch(2, VectorizedRowBatch.DEFAULT_SIZE);
  3447. BytesColumnVector v = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  3448. batch.cols[0] = v;
  3449. BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  3450. outV.initBuffer();
  3451. batch.cols[1] = outV;
  3452. /*
  3453. * Add these 3 values:
  3454. *
  3455. * mixedUp
  3456. * green
  3457. * NULL
  3458. */
  3459. v.setRef(0, mixedUp, 0, mixedUp.length);
  3460. v.isNull[0] = false;
  3461. v.setRef(1, green, 0, green.length);
  3462. v.isNull[1] = false;
  3463. v.setRef(2, emptyString, 0, emptyString.length);
  3464. v.isNull[2] = true;
  3465. v.noNulls = false;
  3466. batch.size = 3;
  3467. return batch;
  3468. }
  3469. VectorizedRowBatch makeStringBatchMixedCharSize() {
  3470. // create a new batch with one char column (for input) and one long column (for output)
  3471. VectorizedRowBatch batch = new VectorizedRowBatch(2, VectorizedRowBatch.DEFAULT_SIZE);
  3472. BytesColumnVector v = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  3473. batch.cols[0] = v;
  3474. LongColumnVector outV = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  3475. batch.cols[1] = outV;
  3476. /*
  3477. * Add these 3 values:
  3478. *
  3479. * mixedUp
  3480. * green
  3481. * NULL
  3482. * <4 char string with mult-byte chars>
  3483. */
  3484. v.setRef(0, mixedUp, 0, mixedUp.length);
  3485. v.isNull[0] = false;
  3486. v.setRef(1, green, 0, green.length);
  3487. v.isNull[1] = false;
  3488. v.setRef(2, emptyString, 0, emptyString.length);
  3489. v.isNull[2] = true;
  3490. v.noNulls = false;
  3491. v.setRef(3, multiByte, 0, 10);
  3492. v.isNull[3] = false;
  3493. batch.size = 4;
  3494. return batch;
  3495. }
  3496. @Test
  3497. public void testColLower() throws HiveException {
  3498. // has nulls, not repeating
  3499. VectorizedRowBatch batch = makeStringBatchMixedCase();
  3500. StringLower expr = new StringLower(0, 1);
  3501. expr.evaluate(batch);
  3502. BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
  3503. int cmp = StringExpr.compare(mixedUpLower, 0, mixedUpLower.length, outCol.vector[0],
  3504. outCol.start[0], outCol.length[0]);
  3505. Assert.assertEquals(0, cmp);
  3506. Assert.assertTrue(outCol.isNull[2]);
  3507. int cmp2 = StringExpr.compare(green, 0, green.length, outCol.vector[1],
  3508. outCol.start[1], outCol.length[1]);
  3509. Assert.assertEquals(0, cmp2);
  3510. // no nulls, not repeating
  3511. batch = makeStringBatchMixedCase();
  3512. batch.cols[0].noNulls = true;
  3513. expr.evaluate(batch);
  3514. outCol = (BytesColumnVector) batch.cols[1];
  3515. cmp = StringExpr.compare(mixedUpLower, 0, mixedUpLower.length, outCol.vector[0],
  3516. outCol.start[0], outCol.length[0]);
  3517. Assert.assertEquals(0, cmp);
  3518. Assert.assertTrue(outCol.noNulls);
  3519. // has nulls, is repeating
  3520. batch = makeStringBatchMixedCase();
  3521. batch.cols[0].isRepeating = true;
  3522. expr.evaluate(batch);
  3523. outCol = (BytesColumnVector) batch.cols[1];
  3524. cmp = StringExpr.compare(mixedUpLower, 0, mixedUpLower.length, outCol.vector[0],
  3525. outCol.start[0], outCol.length[0]);
  3526. Assert.assertEquals(0, cmp);
  3527. Assert.assertTrue(outCol.isRepeating);
  3528. // no nulls, is repeating
  3529. batch = makeStringBatchMixedCase();
  3530. batch.cols[0].isRepeating = true;
  3531. batch.cols[0].noNulls = true;
  3532. expr.evaluate(batch);
  3533. outCol = (BytesColumnVector) batch.cols[1];
  3534. cmp = StringExpr.compare(mixedUpLower, 0, mixedUpLower.length, outCol.vector[0],
  3535. outCol.start[0], outCol.length[0]);
  3536. Assert.assertEquals(0, cmp);
  3537. Assert.assertTrue(outCol.isRepeating);
  3538. Assert.assertTrue(outCol.noNulls);
  3539. }
  3540. @Test
  3541. public void testColUpper() throws HiveException {
  3542. // no nulls, not repeating
  3543. /* We don't test all the combinations because (at least currently)
  3544. * the logic is inherited to be the same as testColLower, which checks all the cases).
  3545. */
  3546. VectorizedRowBatch batch = makeStringBatchMixedCase();
  3547. StringUpper expr = new StringUpper(0, 1);
  3548. batch.cols[0].noNulls = true;
  3549. expr.evaluate(batch);
  3550. BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
  3551. int cmp = StringExpr.compare(mixedUpUpper, 0, mixedUpUpper.length, outCol.vector[0],
  3552. outCol.start[0], outCol.length[0]);
  3553. Assert.assertEquals(0, cmp);
  3554. Assert.assertTrue(outCol.noNulls);
  3555. }
  3556. @Test
  3557. public void testStringLength() throws HiveException {
  3558. // has nulls, not repeating
  3559. VectorizedRowBatch batch = makeStringBatchMixedCharSize();
  3560. StringLength expr = new StringLength(0, 1);
  3561. expr.evaluate(batch);
  3562. LongColumnVector outCol = (LongColumnVector) batch.cols[1];
  3563. Assert.assertEquals(5, outCol.vector[1]); // length of green is 5
  3564. Assert.assertTrue(outCol.isNull[2]);
  3565. Assert.assertEquals(4, outCol.vector[3]); // this one has the mixed-size chars
  3566. // no nulls, not repeating
  3567. batch = makeStringBatchMixedCharSize();
  3568. batch.cols[0].noNulls = true;
  3569. expr.evaluate(batch);
  3570. outCol = (LongColumnVector) batch.cols[1];
  3571. Assert.assertTrue(outCol.noNulls);
  3572. Assert.assertEquals(4, outCol.vector[3]); // this one has the mixed-size chars
  3573. // has nulls, is repeating
  3574. batch = makeStringBatchMixedCharSize();
  3575. batch.cols[0].isRepeating = true;
  3576. expr.evaluate(batch);
  3577. outCol = (LongColumnVector) batch.cols[1];
  3578. Assert.assertTrue(outCol.isRepeating);
  3579. Assert.assertEquals(7, outCol.vector[0]); // length of "mixedUp"
  3580. // no nulls, is repeating
  3581. batch = makeStringBatchMixedCharSize();
  3582. batch.cols[0].isRepeating = true;
  3583. batch.cols[0].noNulls = true;
  3584. expr.evaluate(batch);
  3585. outCol = (LongColumnVector) batch.cols[1];
  3586. Assert.assertEquals(7, outCol.vector[0]); // length of "mixedUp"
  3587. Assert.assertTrue(outCol.isRepeating);
  3588. Assert.assertTrue(outCol.noNulls);
  3589. }
  3590. private VectorizedRowBatch makeStringBatch2In1Out() {
  3591. VectorizedRowBatch batch = new VectorizedRowBatch(3);
  3592. BytesColumnVector v = new BytesColumnVector();
  3593. batch.cols[0] = v;
  3594. BytesColumnVector v2 = new BytesColumnVector();
  3595. batch.cols[1] = v2;
  3596. batch.cols[2] = new BytesColumnVector();
  3597. v.setRef(0, red, 0, red.length);
  3598. v.isNull[0] = false;
  3599. v.setRef(1, green, 0, green.length);
  3600. v.isNull[1] = false;
  3601. v.setRef(2, emptyString, 0, emptyString.length);
  3602. v.isNull[2] = true;
  3603. v.noNulls = false;
  3604. v2.setRef(0, red, 0, red.length);
  3605. v2.isNull[0] = false;
  3606. v2.setRef(1, green, 0, green.length);
  3607. v2.isNull[1] = false;
  3608. v2.setRef(2, emptyString, 0, emptyString.length);
  3609. v2.isNull[2] = true;
  3610. v2.noNulls = false;
  3611. batch.size = 3;
  3612. return batch;
  3613. }
  3614. private VectorizedRowBatch makeStringBatchForColColCompare() {
  3615. VectorizedRowBatch batch = new VectorizedRowBatch(4);
  3616. BytesColumnVector v = new BytesColumnVector();
  3617. batch.cols[0] = v;
  3618. BytesColumnVector v2 = new BytesColumnVector();
  3619. batch.cols[1] = v2;
  3620. batch.cols[2] = new BytesColumnVector();
  3621. batch.cols[3] = new LongColumnVector();
  3622. v.setRef(0, blue, 0, blue.length);
  3623. v.isNull[0] = false;
  3624. v.setRef(1, green, 0, green.length);
  3625. v.isNull[1] = false;
  3626. v.setRef(2, red, 0, red.length);
  3627. v.isNull[2] = false;
  3628. v.setRef(3, emptyString, 0, emptyString.length);
  3629. v.isNull[3] = true;
  3630. v.noNulls = false;
  3631. v2.setRef(0, red, 0, red.length);
  3632. v2.isNull[0] = false;
  3633. v2.setRef(1, green, 0, green.length);
  3634. v2.isNull[1] = false;
  3635. v2.setRef(2, blue, 0, blue.length);
  3636. v2.isNull[2] = false;
  3637. v2.setRef(3, red, 0, red.length);
  3638. v2.isNull[3] = false;
  3639. v2.noNulls = false;
  3640. batch.size = 4;
  3641. return batch;
  3642. }
  3643. @Test
  3644. public void testStringLike() throws HiveException {
  3645. // has nulls, not repeating
  3646. VectorizedRowBatch batch;
  3647. Text pattern;
  3648. int initialBatchSize;
  3649. batch = makeStringBatchMixedCharSize();
  3650. pattern = new Text(mixPercentPattern);
  3651. FilterStringColLikeStringScalar expr = new FilterStringColLikeStringScalar(0, mixPercentPattern);
  3652. expr.transientInit(hiveConf);
  3653. expr.evaluate(batch);
  3654. // verify that the beginning entry is the only one that matches
  3655. Assert.assertEquals(1, batch.size);
  3656. Assert.assertEquals(0, batch.selected[0]);
  3657. // no nulls, not repeating
  3658. batch = makeStringBatchMixedCharSize();
  3659. batch.cols[0].noNulls = true;
  3660. expr.evaluate(batch);
  3661. // verify that the beginning entry is the only one that matches
  3662. Assert.assertEquals(1, batch.size);
  3663. Assert.assertEquals(0, batch.selected[0]);
  3664. // has nulls, is repeating
  3665. batch = makeStringBatchMixedCharSize();
  3666. initialBatchSize = batch.size;
  3667. batch.cols[0].isRepeating = true;
  3668. expr.evaluate(batch);
  3669. // all rows qualify
  3670. Assert.assertEquals(initialBatchSize, batch.size);
  3671. // same, but repeating value is null
  3672. batch = makeStringBatchMixedCharSize();
  3673. batch.cols[0].isRepeating = true;
  3674. batch.cols[0].isNull[0] = true;
  3675. expr.evaluate(batch);
  3676. // no rows qualify
  3677. Assert.assertEquals(0, batch.size);
  3678. // no nulls, is repeating
  3679. batch = makeStringBatchMixedCharSize();
  3680. initialBatchSize = batch.size;
  3681. batch.cols[0].isRepeating = true;
  3682. batch.cols[0].noNulls = true;
  3683. expr.evaluate(batch);
  3684. // all rows qualify
  3685. Assert.assertEquals(initialBatchSize, batch.size);
  3686. }
  3687. @Test
  3688. public void testStringLikePatternType() throws HiveException {
  3689. FilterStringColLikeStringScalar expr;
  3690. VectorizedRowBatch vrb = VectorizedRowGroupGenUtil.getVectorizedRowBatch(1, 1, 1);
  3691. vrb.cols[0] = new BytesColumnVector(1);
  3692. BytesColumnVector bcv = (BytesColumnVector) vrb.cols[0];
  3693. vrb.size = 0;
  3694. // BEGIN pattern
  3695. expr = new FilterStringColLikeStringScalar(0, "abc%".getBytes());
  3696. expr.transientInit(hiveConf);
  3697. expr.evaluate(vrb);
  3698. Assert.assertEquals(FilterStringColLikeStringScalar.BeginChecker.class,
  3699. expr.checker.getClass());
  3700. // END pattern
  3701. expr = new FilterStringColLikeStringScalar(0, "%abc".getBytes(StandardCharsets.UTF_8));
  3702. expr.transientInit(hiveConf);
  3703. expr.evaluate(vrb);
  3704. Assert.assertEquals(FilterStringColLikeStringScalar.EndChecker.class,
  3705. expr.checker.getClass());
  3706. // MIDDLE pattern
  3707. expr = new FilterStringColLikeStringScalar(0, "%abc%".getBytes());
  3708. expr.transientInit(hiveConf);
  3709. expr.evaluate(vrb);
  3710. Assert.assertEquals(FilterStringColLikeStringScalar.MiddleChecker.class,
  3711. expr.checker.getClass());
  3712. // CHAIN pattern
  3713. expr = new FilterStringColLikeStringScalar(0, "%abc%de".getBytes());
  3714. expr.transientInit(hiveConf);
  3715. expr.evaluate(vrb);
  3716. Assert.assertEquals(FilterStringColLikeStringScalar.ChainedChecker.class,
  3717. expr.checker.getClass());
  3718. // COMPLEX pattern
  3719. expr = new FilterStringColLikeStringScalar(0, "%abc_%de".getBytes());
  3720. expr.transientInit(hiveConf);
  3721. expr.evaluate(vrb);
  3722. Assert.assertEquals(FilterStringColLikeStringScalar.ComplexChecker.class,
  3723. expr.checker.getClass());
  3724. // NONE pattern
  3725. expr = new FilterStringColLikeStringScalar(0, "abc".getBytes());
  3726. expr.transientInit(hiveConf);
  3727. expr.evaluate(vrb);
  3728. Assert.assertEquals(FilterStringColLikeStringScalar.NoneChecker.class,
  3729. expr.checker.getClass());
  3730. }
  3731. @Test
  3732. public void testStringLikeMultiByte() throws HiveException {
  3733. FilterStringColLikeStringScalar expr;
  3734. VectorizedRowBatch batch;
  3735. // verify that a multi byte LIKE expression matches a matching string
  3736. batch = makeStringBatchMixedCharSize();
  3737. expr = new FilterStringColLikeStringScalar(0, ('%' + new String(multiByte) + '%').getBytes(StandardCharsets.UTF_8));
  3738. expr.transientInit(hiveConf);
  3739. expr.evaluate(batch);
  3740. Assert.assertEquals(1, batch.size);
  3741. // verify that a multi byte LIKE expression doesn't match a non-matching string
  3742. batch = makeStringBatchMixedCharSize();
  3743. expr = new FilterStringColLikeStringScalar(0, ('%' + new String(multiByte) + 'x').getBytes(StandardCharsets.UTF_8));
  3744. expr.transientInit(hiveConf);
  3745. expr.evaluate(batch);
  3746. Assert.assertEquals(0, batch.size);
  3747. }
  3748. private String randomizePattern(Random control, String value) {
  3749. switch (control.nextInt(10)) {
  3750. default:
  3751. case 0: {
  3752. return value;
  3753. }
  3754. case 1: {
  3755. return control.nextInt(1000) + value;
  3756. }
  3757. case 2: {
  3758. return value + control.nextInt(1000);
  3759. }
  3760. case 3: {
  3761. return control.nextInt(1000) + value.substring(1);
  3762. }
  3763. case 4: {
  3764. return value.substring(1) + control.nextInt(1000);
  3765. }
  3766. case 5: {
  3767. return control.nextInt(1000) + value.substring(0, value.length() - 1);
  3768. }
  3769. case 6: {
  3770. return "";
  3771. }
  3772. case 7: {
  3773. return value.toLowerCase();
  3774. }
  3775. case 8: {
  3776. StringBuilder sb = new StringBuilder();
  3777. for (int i = 0; i < control.nextInt(12); i++) {
  3778. sb.append((char) ('a' + control.nextInt(26)));
  3779. }
  3780. return sb.toString();
  3781. }
  3782. case 9: {
  3783. StringBuilder sb = new StringBuilder();
  3784. for (int i = 0; i < control.nextInt(12); i++) {
  3785. sb.append((char) ('A' + control.nextInt(26)));
  3786. }
  3787. return sb.toString();
  3788. }
  3789. }
  3790. }
  3791. private String generateCandidate(Random control, String pattern) {
  3792. StringBuilder sb = new StringBuilder();
  3793. final StringTokenizer tokens = new StringTokenizer(pattern, "%");
  3794. final boolean leftAnchor = pattern.startsWith("%");
  3795. final boolean rightAnchor = pattern.endsWith("%");
  3796. for (int i = 0; tokens.hasMoreTokens(); i++) {
  3797. String chunk = tokens.nextToken();
  3798. if (leftAnchor && i == 0) {
  3799. // first item
  3800. sb.append(randomizePattern(control, chunk));
  3801. } else if (rightAnchor && tokens.hasMoreTokens() == false) {
  3802. // last item
  3803. sb.append(randomizePattern(control, chunk));
  3804. } else {
  3805. // middle item
  3806. sb.append(randomizePattern(control, chunk));
  3807. }
  3808. }
  3809. return sb.toString();
  3810. }
  3811. @Test
  3812. public void testStringLikeRandomized() throws HiveException {
  3813. final String [] patterns = new String[] {
  3814. "ABC%",
  3815. "%ABC",
  3816. "%ABC%",
  3817. "ABC%DEF",
  3818. "ABC%DEF%",
  3819. "%ABC%DEF",
  3820. "%ABC%DEF%",
  3821. "ABC%DEF%EFG",
  3822. "%ABC%DEF%EFG",
  3823. "%ABC%DEF%EFG%H",
  3824. };
  3825. long positive = 0;
  3826. long negative = 0;
  3827. Random control = new Random(1234);
  3828. UDFLike udf = new UDFLike();
  3829. for (String pattern : patterns) {
  3830. VectorExpression expr = new FilterStringColLikeStringScalar(0, pattern.getBytes(StandardCharsets.UTF_8));
  3831. expr.transientInit(hiveConf);
  3832. VectorizedRowBatch batch = VectorizedRowGroupGenUtil.getVectorizedRowBatch(1, 1, 1);
  3833. batch.cols[0] = new BytesColumnVector(1);
  3834. BytesColumnVector bcv = (BytesColumnVector) batch.cols[0];
  3835. Text pText = new Text(pattern);
  3836. for (int i=0; i < 1024; i++) {
  3837. String input = generateCandidate(control,pattern);
  3838. BooleanWritable like = udf.evaluate(new Text(input), pText);
  3839. batch.reset();
  3840. bcv.initBuffer();
  3841. byte[] utf8 = input.getBytes(StandardCharsets.UTF_8);
  3842. bcv.setVal(0, utf8, 0, utf8.length);
  3843. bcv.noNulls = true;
  3844. batch.size = 1;
  3845. expr.evaluate(batch);
  3846. if (like.get()) {
  3847. positive++;
  3848. } else {
  3849. negative++;
  3850. }
  3851. assertEquals(String.format("Checking '%s' against '%s'", input, pattern), like.get(), (batch.size != 0));
  3852. }
  3853. }
  3854. LOG.info(String.format("Randomized testing: ran %d positive tests and %d negative tests",
  3855. positive, negative));
  3856. }
  3857. @Test
  3858. public void testColConcatStringScalar() throws HiveException {
  3859. // has nulls, not repeating
  3860. VectorizedRowBatch batch = makeStringBatch();
  3861. StringGroupColConcatStringScalar expr = new StringGroupColConcatStringScalar(0, red, 1);
  3862. expr.evaluate(batch);
  3863. BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
  3864. int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  3865. outCol.start[0], outCol.length[0]);
  3866. Assert.assertEquals(0, cmp);
  3867. Assert.assertTrue(outCol.isNull[2]);
  3868. int cmp2 = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1],
  3869. outCol.start[1], outCol.length[1]);
  3870. Assert.assertEquals(0, cmp2);
  3871. Assert.assertFalse(outCol.noNulls);
  3872. Assert.assertFalse(outCol.isRepeating);
  3873. // no nulls, not repeating
  3874. batch = makeStringBatch();
  3875. batch.cols[0].noNulls = true;
  3876. expr.evaluate(batch);
  3877. outCol = (BytesColumnVector) batch.cols[1];
  3878. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  3879. outCol.start[0], outCol.length[0]);
  3880. Assert.assertEquals(0, cmp);
  3881. cmp2 = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1],
  3882. outCol.start[1], outCol.length[1]);
  3883. Assert.assertEquals(0, cmp2);
  3884. int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2],
  3885. outCol.start[2], outCol.length[2]);
  3886. Assert.assertEquals(0, cmp3);
  3887. Assert.assertTrue(outCol.noNulls);
  3888. Assert.assertFalse(outCol.isRepeating);
  3889. // has nulls, is repeating
  3890. batch = makeStringBatch();
  3891. batch.cols[0].isRepeating = true;
  3892. expr.evaluate(batch);
  3893. outCol = (BytesColumnVector) batch.cols[1];
  3894. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  3895. outCol.start[0], outCol.length[0]);
  3896. Assert.assertEquals(0, cmp);
  3897. Assert.assertTrue(outCol.isRepeating);
  3898. // no nulls, is repeating
  3899. batch = makeStringBatch();
  3900. batch.cols[0].isRepeating = true;
  3901. batch.cols[0].noNulls = true;
  3902. expr.evaluate(batch);
  3903. outCol = (BytesColumnVector) batch.cols[1];
  3904. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  3905. outCol.start[0], outCol.length[0]);
  3906. Assert.assertEquals(0, cmp);
  3907. Assert.assertTrue(outCol.isRepeating);
  3908. Assert.assertTrue(outCol.noNulls);
  3909. }
  3910. @Test
  3911. public void testColConcatCharScalar() throws HiveException {
  3912. // has nulls, not repeating
  3913. VectorizedRowBatch batch = makeStringBatch();
  3914. StringGroupColConcatStringScalar expr =
  3915. new StringGroupColConcatStringScalar(
  3916. 0, new HiveChar(new String(red), 6).getStrippedValue().getBytes(), 1);
  3917. expr.evaluate(batch);
  3918. BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
  3919. int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  3920. outCol.start[0], outCol.length[0]);
  3921. Assert.assertEquals(0, cmp);
  3922. Assert.assertTrue(outCol.isNull[2]);
  3923. int cmp2 = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1],
  3924. outCol.start[1], outCol.length[1]);
  3925. Assert.assertEquals(0, cmp2);
  3926. Assert.assertFalse(outCol.noNulls);
  3927. Assert.assertFalse(outCol.isRepeating);
  3928. // no nulls, not repeating
  3929. batch = makeStringBatch();
  3930. batch.cols[0].noNulls = true;
  3931. expr.evaluate(batch);
  3932. outCol = (BytesColumnVector) batch.cols[1];
  3933. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  3934. outCol.start[0], outCol.length[0]);
  3935. Assert.assertEquals(0, cmp);
  3936. cmp2 = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1],
  3937. outCol.start[1], outCol.length[1]);
  3938. Assert.assertEquals(0, cmp2);
  3939. int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2],
  3940. outCol.start[2], outCol.length[2]);
  3941. Assert.assertEquals(0, cmp3);
  3942. Assert.assertTrue(outCol.noNulls);
  3943. Assert.assertFalse(outCol.isRepeating);
  3944. // has nulls, is repeating
  3945. batch = makeStringBatch();
  3946. batch.cols[0].isRepeating = true;
  3947. expr.evaluate(batch);
  3948. outCol = (BytesColumnVector) batch.cols[1];
  3949. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  3950. outCol.start[0], outCol.length[0]);
  3951. Assert.assertEquals(0, cmp);
  3952. Assert.assertTrue(outCol.isRepeating);
  3953. // no nulls, is repeating
  3954. batch = makeStringBatch();
  3955. batch.cols[0].isRepeating = true;
  3956. batch.cols[0].noNulls = true;
  3957. expr.evaluate(batch);
  3958. outCol = (BytesColumnVector) batch.cols[1];
  3959. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  3960. outCol.start[0], outCol.length[0]);
  3961. Assert.assertEquals(0, cmp);
  3962. Assert.assertTrue(outCol.isRepeating);
  3963. Assert.assertTrue(outCol.noNulls);
  3964. }
  3965. @Test
  3966. public void testColConcatVarCharScalar() throws HiveException {
  3967. // has nulls, not repeating
  3968. VectorizedRowBatch batch = makeStringBatch();
  3969. StringGroupColConcatStringScalar expr =
  3970. new StringGroupColConcatStringScalar(
  3971. 0, new HiveVarchar(new String(red), 14).getValue().getBytes(), 1);
  3972. expr.evaluate(batch);
  3973. BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
  3974. int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  3975. outCol.start[0], outCol.length[0]);
  3976. Assert.assertEquals(0, cmp);
  3977. Assert.assertTrue(outCol.isNull[2]);
  3978. int cmp2 = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1],
  3979. outCol.start[1], outCol.length[1]);
  3980. Assert.assertEquals(0, cmp2);
  3981. Assert.assertFalse(outCol.noNulls);
  3982. Assert.assertFalse(outCol.isRepeating);
  3983. // no nulls, not repeating
  3984. batch = makeStringBatch();
  3985. batch.cols[0].noNulls = true;
  3986. expr.evaluate(batch);
  3987. outCol = (BytesColumnVector) batch.cols[1];
  3988. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  3989. outCol.start[0], outCol.length[0]);
  3990. Assert.assertEquals(0, cmp);
  3991. cmp2 = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1],
  3992. outCol.start[1], outCol.length[1]);
  3993. Assert.assertEquals(0, cmp2);
  3994. int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2],
  3995. outCol.start[2], outCol.length[2]);
  3996. Assert.assertEquals(0, cmp3);
  3997. Assert.assertTrue(outCol.noNulls);
  3998. Assert.assertFalse(outCol.isRepeating);
  3999. // has nulls, is repeating
  4000. batch = makeStringBatch();
  4001. batch.cols[0].isRepeating = true;
  4002. expr.evaluate(batch);
  4003. outCol = (BytesColumnVector) batch.cols[1];
  4004. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4005. outCol.start[0], outCol.length[0]);
  4006. Assert.assertEquals(0, cmp);
  4007. Assert.assertTrue(outCol.isRepeating);
  4008. // no nulls, is repeating
  4009. batch = makeStringBatch();
  4010. batch.cols[0].isRepeating = true;
  4011. batch.cols[0].noNulls = true;
  4012. expr.evaluate(batch);
  4013. outCol = (BytesColumnVector) batch.cols[1];
  4014. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4015. outCol.start[0], outCol.length[0]);
  4016. Assert.assertEquals(0, cmp);
  4017. Assert.assertTrue(outCol.isRepeating);
  4018. Assert.assertTrue(outCol.noNulls);
  4019. }
  4020. @Test
  4021. public void testStringScalarConcatCol() throws HiveException {
  4022. // has nulls, not repeating
  4023. VectorizedRowBatch batch = makeStringBatch();
  4024. StringScalarConcatStringGroupCol expr = new StringScalarConcatStringGroupCol(red, 0, 1);
  4025. expr.evaluate(batch);
  4026. BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
  4027. int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4028. outCol.start[0], outCol.length[0]);
  4029. Assert.assertEquals(0, cmp);
  4030. Assert.assertTrue(outCol.isNull[2]);
  4031. int cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1],
  4032. outCol.start[1], outCol.length[1]);
  4033. Assert.assertEquals(0, cmp2);
  4034. Assert.assertFalse(outCol.noNulls);
  4035. Assert.assertFalse(outCol.isRepeating);
  4036. // no nulls, not repeating
  4037. batch = makeStringBatch();
  4038. batch.cols[0].noNulls = true;
  4039. expr.evaluate(batch);
  4040. outCol = (BytesColumnVector) batch.cols[1];
  4041. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4042. outCol.start[0], outCol.length[0]);
  4043. Assert.assertEquals(0, cmp);
  4044. cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1],
  4045. outCol.start[1], outCol.length[1]);
  4046. Assert.assertEquals(0, cmp2);
  4047. int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2],
  4048. outCol.start[2], outCol.length[2]);
  4049. Assert.assertEquals(0, cmp3);
  4050. Assert.assertTrue(outCol.noNulls);
  4051. Assert.assertFalse(outCol.isRepeating);
  4052. // has nulls, is repeating
  4053. batch = makeStringBatch();
  4054. batch.cols[0].isRepeating = true;
  4055. expr.evaluate(batch);
  4056. outCol = (BytesColumnVector) batch.cols[1];
  4057. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4058. outCol.start[0], outCol.length[0]);
  4059. Assert.assertEquals(0, cmp);
  4060. Assert.assertTrue(outCol.isRepeating);
  4061. // no nulls, is repeating
  4062. batch = makeStringBatch();
  4063. batch.cols[0].isRepeating = true;
  4064. batch.cols[0].noNulls = true;
  4065. expr.evaluate(batch);
  4066. outCol = (BytesColumnVector) batch.cols[1];
  4067. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4068. outCol.start[0], outCol.length[0]);
  4069. Assert.assertEquals(0, cmp);
  4070. Assert.assertTrue(outCol.isRepeating);
  4071. Assert.assertTrue(outCol.noNulls);
  4072. }
  4073. @Test
  4074. public void testCharScalarConcatCol() throws HiveException {
  4075. // has nulls, not repeating
  4076. VectorizedRowBatch batch = makeStringBatch();
  4077. StringScalarConcatStringGroupCol expr =
  4078. new StringScalarConcatStringGroupCol(
  4079. new HiveChar(new String(red), 6).getStrippedValue().getBytes(), 0, 1);
  4080. expr.evaluate(batch);
  4081. BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
  4082. int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4083. outCol.start[0], outCol.length[0]);
  4084. Assert.assertEquals(0, cmp);
  4085. Assert.assertTrue(outCol.isNull[2]);
  4086. int cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1],
  4087. outCol.start[1], outCol.length[1]);
  4088. Assert.assertEquals(0, cmp2);
  4089. Assert.assertFalse(outCol.noNulls);
  4090. Assert.assertFalse(outCol.isRepeating);
  4091. // no nulls, not repeating
  4092. batch = makeStringBatch();
  4093. batch.cols[0].noNulls = true;
  4094. expr.evaluate(batch);
  4095. outCol = (BytesColumnVector) batch.cols[1];
  4096. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4097. outCol.start[0], outCol.length[0]);
  4098. Assert.assertEquals(0, cmp);
  4099. cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1],
  4100. outCol.start[1], outCol.length[1]);
  4101. Assert.assertEquals(0, cmp2);
  4102. int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2],
  4103. outCol.start[2], outCol.length[2]);
  4104. Assert.assertEquals(0, cmp3);
  4105. Assert.assertTrue(outCol.noNulls);
  4106. Assert.assertFalse(outCol.isRepeating);
  4107. // has nulls, is repeating
  4108. batch = makeStringBatch();
  4109. batch.cols[0].isRepeating = true;
  4110. expr.evaluate(batch);
  4111. outCol = (BytesColumnVector) batch.cols[1];
  4112. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4113. outCol.start[0], outCol.length[0]);
  4114. Assert.assertEquals(0, cmp);
  4115. Assert.assertTrue(outCol.isRepeating);
  4116. // no nulls, is repeating
  4117. batch = makeStringBatch();
  4118. batch.cols[0].isRepeating = true;
  4119. batch.cols[0].noNulls = true;
  4120. expr.evaluate(batch);
  4121. outCol = (BytesColumnVector) batch.cols[1];
  4122. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4123. outCol.start[0], outCol.length[0]);
  4124. Assert.assertEquals(0, cmp);
  4125. Assert.assertTrue(outCol.isRepeating);
  4126. Assert.assertTrue(outCol.noNulls);
  4127. }
  4128. @Test
  4129. public void testVarCharScalarConcatCol() throws HiveException {
  4130. // has nulls, not repeating
  4131. VectorizedRowBatch batch = makeStringBatch();
  4132. StringScalarConcatStringGroupCol expr =
  4133. new StringScalarConcatStringGroupCol(
  4134. new HiveVarchar(new String(red), 14).getValue().getBytes(), 0, 1);
  4135. expr.evaluate(batch);
  4136. BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
  4137. int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4138. outCol.start[0], outCol.length[0]);
  4139. Assert.assertEquals(0, cmp);
  4140. Assert.assertTrue(outCol.isNull[2]);
  4141. int cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1],
  4142. outCol.start[1], outCol.length[1]);
  4143. Assert.assertEquals(0, cmp2);
  4144. Assert.assertFalse(outCol.noNulls);
  4145. Assert.assertFalse(outCol.isRepeating);
  4146. // no nulls, not repeating
  4147. batch = makeStringBatch();
  4148. batch.cols[0].noNulls = true;
  4149. expr.evaluate(batch);
  4150. outCol = (BytesColumnVector) batch.cols[1];
  4151. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4152. outCol.start[0], outCol.length[0]);
  4153. Assert.assertEquals(0, cmp);
  4154. cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1],
  4155. outCol.start[1], outCol.length[1]);
  4156. Assert.assertEquals(0, cmp2);
  4157. int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2],
  4158. outCol.start[2], outCol.length[2]);
  4159. Assert.assertEquals(0, cmp3);
  4160. Assert.assertTrue(outCol.noNulls);
  4161. Assert.assertFalse(outCol.isRepeating);
  4162. // has nulls, is repeating
  4163. batch = makeStringBatch();
  4164. batch.cols[0].isRepeating = true;
  4165. expr.evaluate(batch);
  4166. outCol = (BytesColumnVector) batch.cols[1];
  4167. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4168. outCol.start[0], outCol.length[0]);
  4169. Assert.assertEquals(0, cmp);
  4170. Assert.assertTrue(outCol.isRepeating);
  4171. // no nulls, is repeating
  4172. batch = makeStringBatch();
  4173. batch.cols[0].isRepeating = true;
  4174. batch.cols[0].noNulls = true;
  4175. expr.evaluate(batch);
  4176. outCol = (BytesColumnVector) batch.cols[1];
  4177. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4178. outCol.start[0], outCol.length[0]);
  4179. Assert.assertEquals(0, cmp);
  4180. Assert.assertTrue(outCol.isRepeating);
  4181. Assert.assertTrue(outCol.noNulls);
  4182. }
  4183. @Test
  4184. public void testColConcatCol() throws HiveException {
  4185. // has nulls, not repeating
  4186. VectorizedRowBatch batch = makeStringBatch2In1Out();
  4187. StringGroupConcatColCol expr = new StringGroupConcatColCol(0, 1, 2);
  4188. expr.evaluate(batch);
  4189. BytesColumnVector outCol = (BytesColumnVector) batch.cols[2];
  4190. int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4191. outCol.start[0], outCol.length[0]);
  4192. Assert.assertEquals(0, cmp);
  4193. Assert.assertTrue(outCol.isNull[2]);
  4194. int cmp2 = StringExpr.compare(greengreen, 0, greengreen.length, outCol.vector[1],
  4195. outCol.start[1], outCol.length[1]);
  4196. Assert.assertEquals(0, cmp2);
  4197. Assert.assertFalse(outCol.noNulls);
  4198. Assert.assertFalse(outCol.isRepeating);
  4199. // no nulls, not repeating
  4200. batch = makeStringBatch2In1Out();
  4201. batch.cols[0].noNulls = true;
  4202. batch.cols[1].noNulls = true;
  4203. expr.evaluate(batch);
  4204. outCol = (BytesColumnVector) batch.cols[2];
  4205. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4206. outCol.start[0], outCol.length[0]);
  4207. Assert.assertEquals(0, cmp);
  4208. cmp2 = StringExpr.compare(greengreen, 0, greengreen.length, outCol.vector[1],
  4209. outCol.start[1], outCol.length[1]);
  4210. Assert.assertEquals(0, cmp2);
  4211. int cmp3 = StringExpr.compare(emptyString, 0, emptyString.length,
  4212. outCol.vector[2], outCol.start[2], outCol.length[2]);
  4213. Assert.assertEquals(0, cmp3);
  4214. Assert.assertTrue(outCol.noNulls);
  4215. Assert.assertFalse(outCol.isRepeating);
  4216. // has nulls, is repeating
  4217. batch = makeStringBatch2In1Out();
  4218. batch.cols[0].isRepeating = true; // only left input repeating
  4219. batch.cols[0].isNull[0] = true;
  4220. expr.evaluate(batch);
  4221. outCol = (BytesColumnVector) batch.cols[2];
  4222. Assert.assertEquals(3, batch.size);
  4223. Assert.assertEquals(true, outCol.isRepeating);
  4224. Assert.assertEquals(true, outCol.isNull[0]);
  4225. // same, but repeating input is not null
  4226. batch = makeStringBatch2In1Out();
  4227. batch.cols[0].isRepeating = true;
  4228. expr.evaluate(batch);
  4229. outCol = (BytesColumnVector) batch.cols[2];
  4230. Assert.assertEquals(false, outCol.isRepeating); //TEST FAILED
  4231. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4232. outCol.start[0], outCol.length[0]);
  4233. Assert.assertEquals(0, cmp);
  4234. Assert.assertEquals(true, outCol.isNull[2]);
  4235. batch = makeStringBatch2In1Out();
  4236. batch.cols[1].isRepeating = true; // only right input repeating
  4237. batch.cols[1].isNull[0] = true;
  4238. expr.evaluate(batch);
  4239. outCol = (BytesColumnVector) batch.cols[2];
  4240. Assert.assertEquals(3, batch.size);
  4241. Assert.assertEquals(true, outCol.isRepeating);
  4242. Assert.assertEquals(true, outCol.isNull[0]);
  4243. batch = makeStringBatch2In1Out();
  4244. batch.cols[0].isRepeating = true; // both inputs repeat
  4245. batch.cols[0].isNull[0] = true;
  4246. batch.cols[1].isRepeating = true;
  4247. batch.cols[1].isNull[0] = true;
  4248. expr.evaluate(batch);
  4249. outCol = (BytesColumnVector) batch.cols[2];
  4250. Assert.assertEquals(3, batch.size);
  4251. Assert.assertEquals(true, outCol.isRepeating);
  4252. Assert.assertEquals(true, outCol.isNull[0]);
  4253. // no nulls, is repeating
  4254. batch = makeStringBatch2In1Out();
  4255. batch.cols[1].isRepeating = true; // only right input repeating and has no nulls
  4256. batch.cols[1].noNulls = true;
  4257. expr.evaluate(batch);
  4258. outCol = (BytesColumnVector) batch.cols[2];
  4259. Assert.assertEquals(3, batch.size);
  4260. Assert.assertEquals(false, outCol.isRepeating);
  4261. Assert.assertEquals(false, outCol.isNull[0]);
  4262. Assert.assertEquals(false, outCol.noNulls);
  4263. Assert.assertEquals(true, outCol.isNull[2]);
  4264. cmp = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1],
  4265. outCol.start[1], outCol.length[1]);
  4266. Assert.assertEquals(0, cmp);
  4267. // try again with left input also having no nulls
  4268. batch.cols[0].noNulls = true;
  4269. expr.evaluate(batch);
  4270. Assert.assertEquals(false, outCol.isRepeating);
  4271. cmp = StringExpr.compare(red, 0, red.length, outCol.vector[2],
  4272. outCol.start[2], outCol.length[2]);
  4273. Assert.assertEquals(0, cmp);
  4274. batch = makeStringBatch2In1Out();
  4275. batch.cols[0].isRepeating = true; // only left input repeating and has no nulls
  4276. batch.cols[0].noNulls = true;
  4277. expr.evaluate(batch);
  4278. outCol = (BytesColumnVector) batch.cols[2];
  4279. Assert.assertEquals(3, batch.size);
  4280. Assert.assertEquals(false, outCol.isRepeating);
  4281. Assert.assertEquals(false, outCol.isNull[0]);
  4282. Assert.assertEquals(false, outCol.noNulls);
  4283. Assert.assertEquals(true, outCol.isNull[2]);
  4284. cmp = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1],
  4285. outCol.start[1], outCol.length[1]);
  4286. Assert.assertEquals(0, cmp);
  4287. batch = makeStringBatch2In1Out();
  4288. batch.cols[0].isRepeating = true; // both inputs repeat
  4289. batch.cols[0].noNulls = true;
  4290. batch.cols[1].isRepeating = true;
  4291. batch.cols[1].noNulls = true;
  4292. expr.evaluate(batch);
  4293. outCol = (BytesColumnVector) batch.cols[2];
  4294. Assert.assertEquals(3, batch.size);
  4295. Assert.assertEquals(true, outCol.isRepeating);
  4296. Assert.assertEquals(false, outCol.isNull[0]);
  4297. cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0],
  4298. outCol.start[0], outCol.length[0]);
  4299. Assert.assertEquals(0, cmp);
  4300. }
  4301. @Test
  4302. public void testSubstrStart() throws HiveException {
  4303. // Testing no nulls and no repeating
  4304. VectorizedRowBatch batch = new VectorizedRowBatch(2);
  4305. BytesColumnVector v = new BytesColumnVector();
  4306. batch.cols[0] = v;
  4307. BytesColumnVector outV = new BytesColumnVector();
  4308. batch.cols[1] = outV;
  4309. byte[] data1 = "abcd string".getBytes(StandardCharsets.UTF_8);
  4310. byte[] data2 = "efgh string".getBytes(StandardCharsets.UTF_8);
  4311. byte[] data3 = "efgh".getBytes(StandardCharsets.UTF_8);
  4312. batch.size = 3;
  4313. v.noNulls = true;
  4314. v.setRef(0, data1, 0, data1.length);
  4315. v.isNull[0] = false;
  4316. v.setRef(1, data2, 0, data2.length);
  4317. v.isNull[1] = false;
  4318. v.setRef(2, data3, 0, data3.length);
  4319. v.isNull[2] = false;
  4320. StringSubstrColStart expr = new StringSubstrColStart(0, 6, 1);
  4321. expr.evaluate(batch);
  4322. BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
  4323. Assert.assertEquals(3, batch.size);
  4324. Assert.assertTrue(outCol.noNulls);
  4325. Assert.assertFalse(outCol.isRepeating);
  4326. byte[] expected = "string".getBytes(StandardCharsets.UTF_8);
  4327. Assert.assertEquals(0,
  4328. StringExpr.compare(
  4329. expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]
  4330. )
  4331. );
  4332. Assert.assertEquals(0,
  4333. StringExpr.compare(
  4334. expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]
  4335. )
  4336. );
  4337. // This yields empty because starting idx is out of bounds.
  4338. Assert.assertEquals(0,
  4339. StringExpr.compare(
  4340. emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]
  4341. )
  4342. );
  4343. outCol.noNulls = false;
  4344. outCol.isRepeating = true;
  4345. // Testing negative substring index.
  4346. // Start index -6 should yield the last 6 characters of the string
  4347. expr = new StringSubstrColStart(0, -6, 1);
  4348. expr.evaluate(batch);
  4349. outCol = (BytesColumnVector) batch.cols[1];
  4350. Assert.assertEquals(3, batch.size);
  4351. Assert.assertFalse(outCol.isRepeating);
  4352. Assert.assertEquals(0,
  4353. StringExpr.compare(
  4354. expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]
  4355. )
  4356. );
  4357. Assert.assertEquals(0,
  4358. StringExpr.compare(
  4359. expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]
  4360. )
  4361. );
  4362. Assert.assertEquals(0,
  4363. StringExpr.compare(
  4364. emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]
  4365. )
  4366. );
  4367. outCol.noNulls = false;
  4368. outCol.isRepeating = true;
  4369. // Testing substring starting from index 1
  4370. expr = new StringSubstrColStart(0, 1, 1);
  4371. expr.evaluate(batch);
  4372. Assert.assertEquals(3, batch.size);
  4373. Assert.assertFalse(outCol.isRepeating);
  4374. Assert.assertEquals(0,
  4375. StringExpr.compare(
  4376. data1, 0, data1.length, outCol.vector[0], outCol.start[0], outCol.length[0]
  4377. )
  4378. );
  4379. Assert.assertEquals(0,
  4380. StringExpr.compare(
  4381. data2, 0, data2.length, outCol.vector[1], outCol.start[1], outCol.length[1]
  4382. )
  4383. );
  4384. Assert.assertEquals(0,
  4385. StringExpr.compare(
  4386. data3, 0, data3.length, outCol.vector[2], outCol.start[2], outCol.length[2]
  4387. )
  4388. );
  4389. outV.noNulls = false;
  4390. outV.isRepeating = true;
  4391. // Testing with nulls
  4392. expr = new StringSubstrColStart(0, 6, 1);
  4393. v.noNulls = false;
  4394. v.isNull[0] = true;
  4395. expr.evaluate(batch);
  4396. Assert.assertEquals(3, batch.size);
  4397. Assert.assertFalse(outV.noNulls);
  4398. Assert.assertTrue(outV.isNull[0]);
  4399. Assert.assertEquals(0,
  4400. StringExpr.compare(
  4401. expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]
  4402. )
  4403. );
  4404. Assert.assertEquals(0,
  4405. StringExpr.compare(
  4406. emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]
  4407. )
  4408. );
  4409. outCol.noNulls = false;
  4410. outCol.isRepeating = false;
  4411. // Testing with repeating and no nulls
  4412. outV = new BytesColumnVector();
  4413. v = new BytesColumnVector();
  4414. v.isRepeating = true;
  4415. v.noNulls = true;
  4416. v.setRef(0, data1, 0, data1.length);
  4417. batch = new VectorizedRowBatch(2);
  4418. batch.cols[0] = v;
  4419. batch.cols[1] = outV;
  4420. expr.evaluate(batch);
  4421. outCol = (BytesColumnVector) batch.cols[1];
  4422. expected = "string".getBytes(StandardCharsets.UTF_8);
  4423. Assert.assertTrue(outV.isRepeating);
  4424. Assert.assertTrue(outV.noNulls);
  4425. Assert.assertEquals(0,
  4426. StringExpr.compare(
  4427. expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]
  4428. )
  4429. );
  4430. // Testing multiByte string substring
  4431. v = new BytesColumnVector();
  4432. v.isRepeating = false;
  4433. v.noNulls = true;
  4434. v.setRef(0, multiByte, 0, 10);
  4435. batch.cols[0] = v;
  4436. batch.cols[1] = outV;
  4437. outV.isRepeating = true;
  4438. outV.noNulls = false;
  4439. expr = new StringSubstrColStart(0, 3, 1);
  4440. batch.size = 1;
  4441. expr.evaluate(batch);
  4442. outCol = (BytesColumnVector) batch.cols[1];
  4443. Assert.assertFalse(outV.isRepeating);
  4444. Assert.assertEquals(0,
  4445. StringExpr.compare(
  4446. // 3nd char starts from index 3 and total length should be 7 bytes as max is 10
  4447. multiByte, 3, 10 - 3, outCol.vector[0], outCol.start[0], outCol.length[0]
  4448. )
  4449. );
  4450. // Testing multiByte string with reference starting mid array
  4451. v = new BytesColumnVector();
  4452. v.isRepeating = false;
  4453. v.noNulls = true;
  4454. // string is 2 chars long (a 3 byte and a 4 byte char)
  4455. v.setRef(0, multiByte, 3, 7);
  4456. batch.cols[0] = v;
  4457. batch.cols[1] = outV;
  4458. outV.isRepeating = true;
  4459. outV.noNulls = false;
  4460. outCol = (BytesColumnVector) batch.cols[1];
  4461. expr = new StringSubstrColStart(0, 2, 1);
  4462. expr.evaluate(batch);
  4463. Assert.assertFalse(outV.isRepeating);
  4464. Assert.assertEquals(0,
  4465. StringExpr.compare(
  4466. // the result is the last 1 character, which occupies 4 bytes
  4467. multiByte, 6, 4, outCol.vector[0], outCol.start[0], outCol.length[0]
  4468. )
  4469. );
  4470. }
  4471. @Test
  4472. public void testSubstrStartLen() throws HiveException {
  4473. // Testing no nulls and no repeating
  4474. VectorizedRowBatch batch = new VectorizedRowBatch(2);
  4475. BytesColumnVector v = new BytesColumnVector();
  4476. batch.cols[0] = v;
  4477. BytesColumnVector outV = new BytesColumnVector();
  4478. batch.cols[1] = outV;
  4479. byte[] data1 = "abcd string".getBytes(StandardCharsets.UTF_8);
  4480. byte[] data2 = "efgh string".getBytes(StandardCharsets.UTF_8);
  4481. byte[] data3 = "efgh".getBytes(StandardCharsets.UTF_8);
  4482. batch.size = 3;
  4483. v.noNulls = true;
  4484. v.setRef(0, data1, 0, data1.length);
  4485. v.isNull[0] = false;
  4486. v.setRef(1, data2, 0, data2.length);
  4487. v.isNull[1] = false;
  4488. v.setRef(2, data3, 0, data3.length);
  4489. v.isNull[2] = false;
  4490. outV.isRepeating = true;
  4491. outV.noNulls = false;
  4492. StringSubstrColStartLen expr = new StringSubstrColStartLen(0, 6, 6, 1);
  4493. expr.evaluate(batch);
  4494. BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
  4495. Assert.assertEquals(3, batch.size);
  4496. Assert.assertFalse(outCol.isRepeating);
  4497. byte[] expected = "string".getBytes(StandardCharsets.UTF_8);
  4498. Assert.assertEquals(0,
  4499. StringExpr.compare(
  4500. expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]
  4501. )
  4502. );
  4503. Assert.assertEquals(0,
  4504. StringExpr.compare(
  4505. expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]
  4506. )
  4507. );
  4508. Assert.assertEquals(0,
  4509. StringExpr.compare(
  4510. emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]
  4511. )
  4512. );
  4513. // Testing negative substring index
  4514. outV.isRepeating = true;
  4515. outV.noNulls = false;
  4516. expr = new StringSubstrColStartLen(0, -6, 6, 1);
  4517. expr.evaluate(batch);
  4518. outCol = (BytesColumnVector) batch.cols[1];
  4519. Assert.assertFalse(outCol.isRepeating);
  4520. Assert.assertEquals(3, batch.size);
  4521. Assert.assertEquals(0,
  4522. StringExpr.compare(
  4523. expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]
  4524. )
  4525. );
  4526. Assert.assertEquals(0,
  4527. StringExpr.compare(
  4528. expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]
  4529. )
  4530. );
  4531. // This yields empty because starting index is out of bounds
  4532. Assert.assertEquals(0,
  4533. StringExpr.compare(
  4534. emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]
  4535. )
  4536. );
  4537. //Testing substring index starting with 1 and zero length
  4538. outV.isRepeating = true;
  4539. outV.noNulls = false;
  4540. expr = new StringSubstrColStartLen(0, 1, 0, 1);
  4541. outCol = (BytesColumnVector) batch.cols[1];
  4542. expr.evaluate(batch);
  4543. Assert.assertEquals(3, batch.size);
  4544. Assert.assertFalse(outCol.isRepeating);
  4545. Assert.assertEquals(0,
  4546. StringExpr.compare(
  4547. data1, 1, 0, outCol.vector[0], outCol.start[0], outCol.length[0]
  4548. )
  4549. );
  4550. Assert.assertEquals(0,
  4551. StringExpr.compare(
  4552. data2, 1, 0, outCol.vector[1], outCol.start[1], outCol.length[1]
  4553. )
  4554. );
  4555. Assert.assertEquals(0,
  4556. StringExpr.compare(
  4557. data3, 1, 0, outCol.vector[2], outCol.start[2], outCol.length[2]
  4558. )
  4559. );
  4560. //Testing substring index starting with 0 and length equal to array length
  4561. outV.isRepeating = true;
  4562. outV.noNulls = false;
  4563. expr = new StringSubstrColStartLen(0, 0, 11, 1);
  4564. outCol = (BytesColumnVector) batch.cols[1];
  4565. expr.evaluate(batch);
  4566. Assert.assertEquals(3, batch.size);
  4567. Assert.assertFalse(outCol.isRepeating);
  4568. Assert.assertEquals(0,
  4569. StringExpr.compare(
  4570. data1, 0, data1.length, outCol.vector[0], outCol.start[0], outCol.length[0]
  4571. )
  4572. );
  4573. Assert.assertEquals(0,
  4574. StringExpr.compare(
  4575. data2, 0, data2.length, outCol.vector[1], outCol.start[1], outCol.length[1]
  4576. )
  4577. );
  4578. Assert.assertEquals(0,
  4579. StringExpr.compare(
  4580. data3, 0, data3.length, outCol.vector[2], outCol.start[2], outCol.length[2]
  4581. )
  4582. );
  4583. // Testing setting length larger than array length, which should cap to the length itself
  4584. outV.isRepeating = true;
  4585. outV.noNulls = false;
  4586. expr = new StringSubstrColStartLen(0, 6, 10, 1);
  4587. expr.evaluate(batch);
  4588. outCol = (BytesColumnVector) batch.cols[1];
  4589. Assert.assertEquals(3, batch.size);
  4590. Assert.assertFalse(outCol.isRepeating);
  4591. Assert.assertEquals(0,
  4592. StringExpr.compare(
  4593. expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]
  4594. )
  4595. );
  4596. Assert.assertEquals(0,
  4597. StringExpr.compare(
  4598. expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]
  4599. )
  4600. );
  4601. Assert.assertEquals(0,
  4602. StringExpr.compare(
  4603. emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]
  4604. )
  4605. );
  4606. outV.isRepeating = true;
  4607. outV.noNulls = true;
  4608. // Testing with nulls
  4609. v.noNulls = false;
  4610. v.isNull[0] = true;
  4611. expr.evaluate(batch);
  4612. Assert.assertEquals(3, batch.size);
  4613. Assert.assertFalse(outV.noNulls);
  4614. Assert.assertTrue(outV.isNull[0]);
  4615. Assert.assertFalse(outCol.isRepeating);
  4616. Assert.assertEquals(0,
  4617. StringExpr.compare(
  4618. expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]
  4619. )
  4620. );
  4621. Assert.assertEquals(0,
  4622. StringExpr.compare(
  4623. emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]
  4624. )
  4625. );
  4626. // Testing with repeating and no nulls
  4627. outV = new BytesColumnVector();
  4628. v = new BytesColumnVector();
  4629. outV.isRepeating = false;
  4630. outV.noNulls = true;
  4631. v.isRepeating = true;
  4632. v.noNulls = false;
  4633. v.setRef(0, data1, 0, data1.length);
  4634. batch = new VectorizedRowBatch(2);
  4635. batch.cols[0] = v;
  4636. batch.cols[1] = outV;
  4637. expr.evaluate(batch);
  4638. outCol = (BytesColumnVector) batch.cols[1];
  4639. Assert.assertTrue(outCol.noNulls);
  4640. Assert.assertTrue(outCol.isRepeating);
  4641. Assert.assertEquals(0,
  4642. StringExpr.compare(
  4643. expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]
  4644. )
  4645. );
  4646. // Testing with multiByte String
  4647. v = new BytesColumnVector();
  4648. v.isRepeating = false;
  4649. v.noNulls = true;
  4650. batch.size = 1;
  4651. v.setRef(0, multiByte, 0, 10);
  4652. batch.cols[0] = v;
  4653. batch.cols[1] = outV;
  4654. outV.isRepeating = true;
  4655. outV.noNulls = false;
  4656. expr = new StringSubstrColStartLen(0, 3, 2, 1);
  4657. expr.evaluate(batch);
  4658. Assert.assertEquals(1, batch.size);
  4659. Assert.assertFalse(outV.isRepeating);
  4660. Assert.assertEquals(0,
  4661. StringExpr.compare(
  4662. // 3rd char starts at index 3, and with length 2 it is covering the rest of the array.
  4663. multiByte, 3, 10 - 3, outCol.vector[0], outCol.start[0], outCol.length[0]
  4664. )
  4665. );
  4666. // Testing multiByte string with reference set to mid array
  4667. v = new BytesColumnVector();
  4668. v.isRepeating = false;
  4669. v.noNulls = true;
  4670. outV = new BytesColumnVector();
  4671. batch.size = 1;
  4672. v.setRef(0, multiByte, 3, 7);
  4673. batch.cols[0] = v;
  4674. batch.cols[1] = outV;
  4675. outV.isRepeating = true;
  4676. outV.noNulls = false;
  4677. expr = new StringSubstrColStartLen(0, 2, 2, 1);
  4678. expr.evaluate(batch);
  4679. outCol = (BytesColumnVector) batch.cols[1];
  4680. Assert.assertEquals(1, batch.size);
  4681. Assert.assertFalse(outV.isRepeating);
  4682. Assert.assertEquals(0,
  4683. StringExpr.compare(
  4684. // 2nd substring index refers to the 6th index (last char in the array)
  4685. multiByte, 6, 10 - 6, outCol.vector[0], outCol.start[0], outCol.length[0]
  4686. )
  4687. );
  4688. }
  4689. @Test
  4690. public void testVectorLTrim() throws HiveException {
  4691. VectorizedRowBatch b = makeTrimBatch();
  4692. VectorExpression expr = new StringLTrimCol(0, 1);
  4693. expr.evaluate(b);
  4694. BytesColumnVector outV = (BytesColumnVector) b.cols[1];
  4695. Assert.assertEquals(0,
  4696. StringExpr.compare(emptyString, 0, 0, outV.vector[0], 0, 0));
  4697. Assert.assertEquals(0,
  4698. StringExpr.compare(blanksLeft, 2, 3, outV.vector[1], outV.start[1], outV.length[1]));
  4699. Assert.assertEquals(0,
  4700. StringExpr.compare(blanksRight, 0, 5, outV.vector[2], outV.start[2], outV.length[2]));
  4701. Assert.assertEquals(0,
  4702. StringExpr.compare(blanksBoth, 2, 5, outV.vector[3], outV.start[3], outV.length[3]));
  4703. Assert.assertEquals(0,
  4704. StringExpr.compare(red, 0, 3, outV.vector[4], outV.start[4], outV.length[4]));
  4705. Assert.assertEquals(0,
  4706. StringExpr.compare(blankString, 0, 0, outV.vector[5], outV.start[5], outV.length[5]));
  4707. }
  4708. @Test
  4709. public void testVectorRTrim() throws HiveException {
  4710. VectorizedRowBatch b = makeTrimBatch();
  4711. VectorExpression expr = new StringRTrimCol(0, 1);
  4712. expr.evaluate(b);
  4713. BytesColumnVector outV = (BytesColumnVector) b.cols[1];
  4714. Assert.assertEquals(0,
  4715. StringExpr.compare(emptyString, 0, 0, outV.vector[0], 0, 0));
  4716. Assert.assertEquals(0,
  4717. StringExpr.compare(blanksLeft, 0, 5, outV.vector[1], outV.start[1], outV.length[1]));
  4718. Assert.assertEquals(0,
  4719. StringExpr.compare(blanksRight, 0, 3, outV.vector[2], outV.start[2], outV.length[2]));
  4720. Assert.assertEquals(0,
  4721. StringExpr.compare(blanksBoth, 0, 5, outV.vector[3], outV.start[3], outV.length[3]));
  4722. Assert.assertEquals(0,
  4723. StringExpr.compare(red, 0, 3, outV.vector[4], outV.start[4], outV.length[4]));
  4724. Assert.assertEquals(0,
  4725. StringExpr.compare(blankString, 0, 0, outV.vector[5], outV.start[5], outV.length[5]));
  4726. }
  4727. @Test
  4728. public void testVectorTrim() throws HiveException {
  4729. VectorizedRowBatch b = makeTrimBatch();
  4730. VectorExpression expr = new StringTrimCol(0, 1);
  4731. expr.evaluate(b);
  4732. BytesColumnVector outV = (BytesColumnVector) b.cols[1];
  4733. Assert.assertEquals(0,
  4734. StringExpr.compare(emptyString, 0, 0, outV.vector[0], 0, 0));
  4735. Assert.assertEquals(0,
  4736. StringExpr.compare(blanksLeft, 2, 3, outV.vector[1], outV.start[1], outV.length[1]));
  4737. Assert.assertEquals(0,
  4738. StringExpr.compare(blanksRight, 0, 3, outV.vector[2], outV.start[2], outV.length[2]));
  4739. Assert.assertEquals(0,
  4740. StringExpr.compare(blanksBoth, 2, 3, outV.vector[3], outV.start[3], outV.length[3]));
  4741. Assert.assertEquals(0,
  4742. StringExpr.compare(red, 0, 3, outV.vector[4], outV.start[4], outV.length[4]));
  4743. Assert.assertEquals(0,
  4744. StringExpr.compare(blankString, 0, 0, outV.vector[5], outV.start[5], outV.length[5]));
  4745. }
  4746. // Make a batch to test the trim functions.
  4747. private VectorizedRowBatch makeTrimBatch() {
  4748. VectorizedRowBatch b = new VectorizedRowBatch(2);
  4749. BytesColumnVector inV = new BytesColumnVector();
  4750. BytesColumnVector outV = new BytesColumnVector();
  4751. b.cols[0] = inV;
  4752. b.cols[1] = outV;
  4753. inV.setRef(0, emptyString, 0, 0);
  4754. inV.setRef(1, blanksLeft, 0, blanksLeft.length);
  4755. inV.setRef(2, blanksRight, 0, blanksRight.length);
  4756. inV.setRef(3, blanksBoth, 0, blanksBoth.length);
  4757. inV.setRef(4, red, 0, red.length);
  4758. inV.setRef(5, blankString, 0, blankString.length);
  4759. b.size = 5;
  4760. return b;
  4761. }
  4762. // Test boolean-valued (non-filter) IN expression for strings
  4763. @Test
  4764. public void testStringInExpr() throws HiveException {
  4765. // test basic operation
  4766. VectorizedRowBatch b = makeStringBatch();
  4767. b.size = 2;
  4768. b.cols[0].noNulls = true;
  4769. byte[][] inVals = new byte[2][];
  4770. inVals[0] = red;
  4771. inVals[1] = blue;
  4772. StringColumnInList expr = new StringColumnInList(0, 2);
  4773. expr.setInListValues(inVals);
  4774. expr.evaluate(b);
  4775. LongColumnVector outV = (LongColumnVector) b.cols[2];
  4776. Assert.assertEquals(1, outV.vector[0]);
  4777. Assert.assertEquals(0, outV.vector[1]);
  4778. // test null input
  4779. b = makeStringBatch();
  4780. b.size = 2;
  4781. b.cols[0].noNulls = false;
  4782. b.cols[0].isNull[0] = true;
  4783. expr.evaluate(b);
  4784. outV = (LongColumnVector) b.cols[2];
  4785. Assert.assertEquals(true, !outV.noNulls && outV.isNull[0] && !outV.isNull[1]);
  4786. Assert.assertEquals(0, outV.vector[1]);
  4787. // test repeating logic
  4788. b = makeStringBatch();
  4789. b.size = 2;
  4790. b.cols[0].noNulls = true;
  4791. b.cols[0].isRepeating = true;
  4792. expr.evaluate(b);
  4793. outV = (LongColumnVector) b.cols[2];
  4794. Assert.assertEquals(1, outV.vector[0]);
  4795. Assert.assertEquals(true, outV.isRepeating);
  4796. }
  4797. /**
  4798. * Test vectorized regex expression.
  4799. */
  4800. @Test
  4801. public void testRegex() throws HiveException {
  4802. VectorizedRowBatch b = makeStringBatch();
  4803. FilterStringColRegExpStringScalar expr = new FilterStringColRegExpStringScalar(0, "a.*".getBytes());
  4804. expr.transientInit(hiveConf);
  4805. b.size = 5;
  4806. b.selectedInUse = false;
  4807. BytesColumnVector v = (BytesColumnVector) b.cols[0];
  4808. v.isRepeating = false;
  4809. v.noNulls = false;
  4810. String s1 = "4kMasVoB7lX1wc5i64bNk";
  4811. String s2 = "a27V63IL7jK3o";
  4812. String s3 = "27V63IL7jK3oa";
  4813. String s4 = "27V63IL7jK3o";
  4814. v.isNull[0] = false;
  4815. v.setRef(0, s1.getBytes(), 0, s1.getBytes().length);
  4816. v.isNull[1] = true;
  4817. v.vector[1] = null;
  4818. v.isNull[2] = false;
  4819. v.setRef(2, s2.getBytes(), 0, s2.getBytes().length);
  4820. v.isNull[3] = false;
  4821. v.setRef(3, s3.getBytes(), 0, s3.getBytes().length);
  4822. v.isNull[4] = false;
  4823. v.setRef(4, s4.getBytes(), 0, s4.getBytes().length);
  4824. expr.evaluate(b);
  4825. Assert.assertTrue(b.selectedInUse);
  4826. Assert.assertEquals(3,b.size);
  4827. Assert.assertEquals(0,b.selected[0]);
  4828. Assert.assertEquals(2,b.selected[1]);
  4829. Assert.assertEquals(3,b.selected[2]);
  4830. }
  4831. }