PageRenderTime 59ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 2ms

/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java

http://github.com/apache/hive
Java | 5602 lines | 4473 code | 771 blank | 358 comment | 886 complexity | f975af9609e8ad5e31d7471c9db930f3 MD5 | raw file
Possible License(s): Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.exec.vector.expressions;
  19. import static org.junit.Assert.assertEquals;
  20. import java.nio.charset.StandardCharsets;
  21. import java.util.Arrays;
  22. import java.util.Random;
  23. import java.util.StringTokenizer;
  24. import org.apache.hadoop.hive.conf.HiveConf;
  25. import org.junit.Assert;
  26. import org.apache.commons.codec.binary.Hex;
  27. import org.apache.hadoop.hive.common.type.HiveChar;
  28. import org.apache.hadoop.hive.common.type.HiveVarchar;
  29. import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
  30. import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
  31. import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
  32. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CharScalarEqualStringGroupColumn;
  33. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarEqualStringGroupColumn;
  34. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarGreaterStringGroupColumn;
  35. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarLessEqualStringGroupColumn;
  36. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualCharScalar;
  37. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualStringScalar;
  38. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualVarCharScalar;
  39. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualCharScalar;
  40. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualStringScalar;
  41. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualVarCharScalar;
  42. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessCharScalar;
  43. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessStringGroupColumn;
  44. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessStringScalar;
  45. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessVarCharScalar;
  46. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarEqualStringGroupColumn;
  47. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarGreaterStringGroupColumn;
  48. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarLessEqualStringGroupColumn;
  49. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarEqualStringGroupColumn;
  50. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarGreaterStringGroupColumn;
  51. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarLessEqualStringGroupColumn;
  52. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualCharScalar;
  53. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualStringScalar;
  54. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualVarCharScalar;
  55. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColLessStringGroupColumn;
  56. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringScalarEqualStringGroupColumn;
  57. import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.VarCharScalarEqualStringGroupColumn;
  58. import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil;
  59. import org.apache.hadoop.hive.ql.metadata.HiveException;
  60. import org.apache.hadoop.hive.ql.udf.UDFLike;
  61. import org.apache.hadoop.io.BooleanWritable;
  62. import org.apache.hadoop.io.Text;
  63. import org.junit.Test;
  64. import org.slf4j.Logger;
  65. import org.slf4j.LoggerFactory;
  66. /**
  67. * Test vectorized expression and filter evaluation for strings.
  68. */
  69. public class TestVectorStringExpressions {
  70. private static final Logger LOG = LoggerFactory
  71. .getLogger(TestVectorStringExpressions.class);
  72. private static byte[] red;
  73. private static byte[] redred;
  74. private static byte[] red2; // second copy of red, different object
  75. private static byte[] green;
  76. private static byte[] greenred;
  77. private static byte[] redgreen;
  78. private static byte[] greengreen;
  79. private static byte[] blue;
  80. private static byte[] emptyString;
  81. private static byte[] mixedUp;
  82. private static byte[] mixedUpLower;
  83. private static byte[] mixedUpUpper;
  84. private static byte[] multiByte;
  85. private static byte[] mixPercentPattern;
  86. private static byte[] blanksLeft;
  87. private static byte[] blanksRight;
  88. private static byte[] blanksBoth;
  89. private static byte[] blankString;
  90. private static byte[] blankRanges;
  91. private static byte[] ascii_sentence;
  92. static {
  93. blue = "blue".getBytes(StandardCharsets.UTF_8);
  94. red = "red".getBytes(StandardCharsets.UTF_8);
  95. redred = "redred".getBytes(StandardCharsets.UTF_8);
  96. green = "green".getBytes(StandardCharsets.UTF_8);
  97. greenred = "greenred".getBytes(StandardCharsets.UTF_8);
  98. redgreen = "redgreen".getBytes(StandardCharsets.UTF_8);
  99. greengreen = "greengreen".getBytes(StandardCharsets.UTF_8);
  100. emptyString = "".getBytes(StandardCharsets.UTF_8);
  101. mixedUp = "mixedUp".getBytes(StandardCharsets.UTF_8);
  102. mixedUpLower = "mixedup".getBytes(StandardCharsets.UTF_8);
  103. mixedUpUpper = "MIXEDUP".getBytes(StandardCharsets.UTF_8);
  104. // for use as wildcard pattern to test LIKE
  105. mixPercentPattern = "mix%".getBytes(StandardCharsets.UTF_8);
  106. multiByte = new byte[10];
  107. addMultiByteChars(multiByte);
  108. blanksLeft = " foo".getBytes(StandardCharsets.UTF_8);
  109. blanksRight = "foo ".getBytes(StandardCharsets.UTF_8);
  110. blanksBoth = " foo ".getBytes(StandardCharsets.UTF_8);
  111. blankString = " ".getBytes(StandardCharsets.UTF_8);
  112. blankRanges =
  113. " more than a bargain ".getBytes(StandardCharsets.UTF_8);
  114. // 012345678901234567890123456789
  115. ascii_sentence =
  116. "The fox trotted over the fence.".getBytes(StandardCharsets.UTF_8);
  117. // 0123456789012345678901234567890
  118. red2 = new byte[red.length];
  119. System.arraycopy(red, 0, red2, 0, red.length);
  120. }
  121. // add some multi-byte characters to test length routine later.
  122. // total characters = 4; byte length = 10
  123. static void addMultiByteChars(byte[] b) {
  124. int i = 0;
  125. b[i++] = (byte) 0x41; // letter "A" (1 byte)
  126. b[i++] = (byte) 0xC3; // Latin capital A with grave (2 bytes)
  127. b[i++] = (byte) 0x80;
  128. b[i++] = (byte) 0xE2; // Euro sign (3 bytes)
  129. b[i++] = (byte) 0x82;
  130. b[i++] = (byte) 0xAC;
  131. b[i++] = (byte) 0xF0; // Asian character U+24B62 (4 bytes)
  132. b[i++] = (byte) 0xA4;
  133. b[i++] = (byte) 0xAD;
  134. b[i++] = (byte) 0xA2;
  135. }
  136. //-------------------------------------------------------------
  137. // total characters = 2; byte length = 3
  138. static void addMultiByteCharLeftPadded1_1(byte[] b) {
  139. int i = 0;
  140. b[i++] = (byte) 0x20; // blank " " (1 byte)
  141. b[i++] = (byte) 0xD0; // Cyrillic Capital DJE U+402 (2 bytes)
  142. b[i++] = (byte) 0x82;
  143. }
  144. // total characters = 3; byte length = 9
  145. static void addMultiByteCharLeftPadded1_2(byte[] b) {
  146. int i = 0;
  147. b[i++] = (byte) 0x20; // blank " " (1 byte)
  148. b[i++] = (byte) 0xF0; // Smiling Face with Open Mouth and Smiling Eyes U+1F604 (4 bytes)
  149. b[i++] = (byte) 0x9F;
  150. b[i++] = (byte) 0x98;
  151. b[i++] = (byte) 0x84;
  152. b[i++] = (byte) 0xF0; // Grimacing Face U+1F62C (4 bytes)
  153. b[i++] = (byte) 0x9F;
  154. b[i++] = (byte) 0x98;
  155. b[i++] = (byte) 0xAC;
  156. }
  157. // total characters = 4; byte length = 6
  158. static void addMultiByteCharLeftPadded3_1(byte[] b) {
  159. int i = 0;
  160. b[i++] = (byte) 0x20; // blank " " (1 byte)
  161. b[i++] = (byte) 0x20; // blank " " (1 byte)
  162. b[i++] = (byte) 0x20; // blank " " (1 byte)
  163. b[i++] = (byte) 0xE4; // Asian character U+4824 (3 bytes)
  164. b[i++] = (byte) 0xA0;
  165. b[i++] = (byte) 0xA4;
  166. }
  167. //-------------------------------------------------------------
  168. // total characters = 2; byte length = 4
  169. static void addMultiByteCharRightPadded1_1(byte[] b) {
  170. int i = 0;
  171. b[i++] = (byte) 0xE0; // Tamil Om U+0BD0 (3 bytes)
  172. b[i++] = (byte) 0xAF;
  173. b[i++] = (byte) 0x90;
  174. b[i++] = (byte) 0x20; // blank " " (1 byte)
  175. }
  176. // total characters = 3; byte length = 5
  177. static void addMultiByteCharRightPadded1_2(byte[] b) {
  178. int i = 0;
  179. b[i++] = (byte) 0xEA; // Va Syllable MEE U+A521 (3 bytes)
  180. b[i++] = (byte) 0x94;
  181. b[i++] = (byte) 0xA1;
  182. b[i++] = (byte) 0x5A; // Latin Capital Letter Z U+005A (1 bytes)
  183. b[i++] = (byte) 0x20; // blank " " (1 byte)
  184. }
  185. // total characters = 4; byte length = 9
  186. static void addMultiByteCharRightPadded1_3(byte[] b) {
  187. int i = 0;
  188. b[i++] = (byte) 0xCC; // COMBINING ACUTE ACENT U+0301 (2 bytes)
  189. b[i++] = (byte) 0x81;
  190. b[i++] = (byte) 0xE0; // DEVENAGARI LETTER KA U+0915 (3 bytes)
  191. b[i++] = (byte) 0xA4;
  192. b[i++] = (byte) 0x95;
  193. b[i++] = (byte) 0xE0; // DEVENAGARI SIGN VIRAMA U+094D (3 bytes)
  194. b[i++] = (byte) 0xA5;
  195. b[i++] = (byte) 0x8D;
  196. b[i++] = (byte) 0x20; // blank " " (1 byte)
  197. }
  198. // total characters = 10; byte length = 26
  199. static int addMultiByteCharSentenceOne(byte[] b, int start) {
  200. int i = start;
  201. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER THA U+1992 (3 bytes)
  202. b[i++] = (byte) 0xA6;
  203. b[i++] = (byte) 0x92;
  204. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW XA U+1986 (3 bytes)
  205. b[i++] = (byte) 0xA6;
  206. b[i++] = (byte) 0x86;
  207. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER HIGH MA U+1996 (3 bytes)
  208. b[i++] = (byte) 0xA6;
  209. b[i++] = (byte) 0x96;
  210. b[i++] = (byte) 0x20; // blank " " (1 byte)
  211. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW QA U+1981 (3 bytes)
  212. b[i++] = (byte) 0xA6;
  213. b[i++] = (byte) 0x81;
  214. b[i++] = (byte) 0x20; // blank " " (1 byte)
  215. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW BA U+19A5 (3 bytes)
  216. b[i++] = (byte) 0xA6;
  217. b[i++] = (byte) 0xA5;
  218. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER HIGH LA U+199C (3 bytes)
  219. b[i++] = (byte) 0xA6;
  220. b[i++] = (byte) 0x9C;
  221. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW KVA U+19A8 (3 bytes)
  222. b[i++] = (byte) 0xA6;
  223. b[i++] = (byte) 0xA8;
  224. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW FA U+199D (3 bytes)
  225. b[i++] = (byte) 0xA6;
  226. b[i++] = (byte) 0x9D;
  227. return i;
  228. }
  229. // total characters = 13; byte length = 24
  230. static int addMultiByteCharSentenceTwo(byte[] b, int start) {
  231. int i = start;
  232. b[i++] = (byte) 0xC9; // LATIN SMALL LETTER TURNED A U+0250 (2 bytes)
  233. b[i++] = (byte) 0x90;
  234. b[i++] = (byte) 0xC9; // LATIN SMALL LETTER GAMMA U+0263 (2 bytes)
  235. b[i++] = (byte) 0xA3;
  236. b[i++] = (byte) 0xC9; // LATIN SMALL LETTER TURNED M U+026F (2 bytes)
  237. b[i++] = (byte) 0xAF;
  238. b[i++] = (byte) 0xCA; // LATIN SMALL LETTER S WITH HOOK U+0282 (2 bytes)
  239. b[i++] = (byte) 0x82;
  240. b[i++] = (byte) 0x20; // blank " " (1 byte)
  241. b[i++] = (byte) 0xCA; // LATIN LETTER SMALL CAPITAL L U+029F (2 bytes)
  242. b[i++] = (byte) 0x9F;
  243. b[i++] = (byte) 0xCB; // MODIFIER LETTER TRIANGULAR COLON U+02D0 (2 bytes)
  244. b[i++] = (byte) 0x90;
  245. b[i++] = (byte) 0x20; // blank " " (1 byte)
  246. b[i++] = (byte) 0xCB; // RING ABOVE U+02DA (2 bytes)
  247. b[i++] = (byte) 0x9A;
  248. b[i++] = (byte) 0xCB; // MODIFIER LETTER SMALL L U+02E1 (2 bytes)
  249. b[i++] = (byte) 0xA1;
  250. b[i++] = (byte) 0xCB; // MODIFIER LETTER SMALL X U+02E3 (2 bytes)
  251. b[i++] = (byte) 0xA3;
  252. b[i++] = (byte) 0xCB; // MODIFIER LETTER UP ARROWHEAD U+02C4 (2 bytes)
  253. b[i++] = (byte) 0x84;
  254. b[i++] = (byte) 0x2E; // FULL STOP "." (1 byte)
  255. return i;
  256. }
  257. // total characters = 17; byte length = 30
  258. static int addMultiByteCharSentenceBlankRanges(byte[] b, int start) {
  259. int i = start;
  260. b[i++] = (byte) 0xF0; // INSCRIPTIONAL YODH U+10B49 (4 bytes)
  261. b[i++] = (byte) 0x90;
  262. b[i++] = (byte) 0xAD;
  263. b[i++] = (byte) 0x89;
  264. b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW FA U+199D (3 bytes)
  265. b[i++] = (byte) 0xA6;
  266. b[i++] = (byte) 0x9D;
  267. b[i++] = (byte) 0x20; // blank " " (1 byte)
  268. b[i++] = (byte) 0x20; // blank " " (1 byte)
  269. b[i++] = (byte) 0x20; // blank " " (1 byte)
  270. b[i++] = (byte) 0x2D; // hyphen-minus "-" U-002D (1 byte)
  271. b[i++] = (byte) 0x20; // blank " " (1 byte)
  272. b[i++] = (byte) 0x60; // grave accent "-" U-0060 (1 byte)
  273. b[i++] = (byte) 0xE2; // BLACK SUN WITH RAYS U+2600 (3 bytes)
  274. b[i++] = (byte) 0x98;
  275. b[i++] = (byte) 0x80;
  276. b[i++] = (byte) 0xE2; // BALLOT BOX WITH X U+2612 (3 bytes)
  277. b[i++] = (byte) 0x98;
  278. b[i++] = (byte) 0x92;
  279. b[i++] = (byte) 0x20; // blank " " (1 byte)
  280. b[i++] = (byte) 0x20; // blank " " (1 byte)
  281. b[i++] = (byte) 0x20; // blank " " (1 byte)
  282. b[i++] = (byte) 0x20; // blank " " (1 byte)
  283. b[i++] = (byte) 0x20; // blank " " (1 byte)
  284. b[i++] = (byte) 0xE2; // WHITE START U+2606 (3 bytes)
  285. b[i++] = (byte) 0x98;
  286. b[i++] = (byte) 0x86;
  287. b[i++] = (byte) 0xE2; // WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE U+26FF (3 bytes)
  288. b[i++] = (byte) 0x9B;
  289. b[i++] = (byte) 0xBF;
  290. return i;
  291. }
  292. static int addPads(byte[] b, int start, int count) {
  293. int i = start;
  294. int end = start + count;
  295. for ( ; i < end; i++) {
  296. b[i] = (byte) 0x20; // blank " " (1 byte)
  297. }
  298. return i;
  299. }
  300. private HiveConf hiveConf = new HiveConf();
  301. private boolean vectorEqual(BytesColumnVector vector, int i, byte[] bytes, int offset, int length) {
  302. byte[] bytesSlice = new byte[length];
  303. System.arraycopy(bytes, offset, bytesSlice, 0, length);
  304. int vectorLength = vector.length[i];
  305. byte[] vectorSlice = new byte[vectorLength];
  306. System.arraycopy(vector.vector[i], vector.start[i], vectorSlice, 0, vectorLength);
  307. boolean equals = Arrays.equals(bytesSlice, vectorSlice);
  308. if (!equals) {
  309. System.out.println("vectorEqual offset " + offset + " length " + length + " vectorSlice.length " + vectorSlice.length);
  310. System.out.println("vectorEqual bytesSlice " + Hex.encodeHexString(bytesSlice));
  311. System.out.println("vectorEqual vectorSlice " + Hex.encodeHexString(vectorSlice));
  312. }
  313. return equals;
  314. }
  315. private int vectorCharacterCount(BytesColumnVector vector, int i) {
  316. return StringExpr.characterCount(vector.vector[i], vector.start[i], vector.length[i]);
  317. }
  318. @Test
  319. // Test basic assign to vector.
  320. public void testAssignBytesColumnVector() {
  321. BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  322. outV.initBuffer(35); // initialize with estimated element size 35
  323. int i = 0;
  324. int expectedResultLen;
  325. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  326. StringExpr.assign(outV, i, blue, 0, blue.length);
  327. expectedResultLen = blue.length;
  328. Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
  329. i++;
  330. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  331. StringExpr.assign(outV, i, redgreen, 0, redgreen.length);
  332. expectedResultLen = redgreen.length;
  333. Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
  334. i++;
  335. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  336. StringExpr.assign(outV, i, ascii_sentence, 0, ascii_sentence.length);
  337. expectedResultLen = ascii_sentence.length;
  338. Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
  339. i++;
  340. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  341. StringExpr.assign(outV, i, blanksLeft, 0, blanksLeft.length);
  342. expectedResultLen = blanksLeft.length;
  343. Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
  344. i++;
  345. // Multi-byte characters with blank ranges.
  346. byte[] sentenceBlankRanges = new byte[100];
  347. int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  348. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  349. StringExpr.assign(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen);
  350. expectedResultLen = sentenceBlankRangesLen;
  351. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  352. i++;
  353. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  354. StringExpr.assign(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3);
  355. expectedResultLen = sentenceBlankRangesLen - 3;
  356. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  357. i++;
  358. // Some non-zero offsets.
  359. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 4, sentenceBlankRangesLen - 4) == 16);
  360. StringExpr.assign(outV, i, sentenceBlankRanges, 4, sentenceBlankRangesLen - 4);
  361. expectedResultLen = sentenceBlankRangesLen - 4;
  362. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 4, expectedResultLen));
  363. Assert.assertTrue(vectorCharacterCount(outV, i) == 16);
  364. i++;
  365. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  366. StringExpr.assign(outV, i, sentenceBlankRanges, 7, 17);
  367. expectedResultLen = 17;
  368. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen));
  369. Assert.assertTrue(vectorCharacterCount(outV, i) == 13);
  370. i++;
  371. }
  372. @Test
  373. // Test basic right trim of bytes slice.
  374. public void testRightTrimBytesSlice() {
  375. int resultLen;
  376. // Nothing to trim (ASCII).
  377. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  378. resultLen = StringExpr.rightTrim(blue, 0, blue.length);
  379. Assert.assertTrue(resultLen == blue.length);
  380. Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
  381. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  382. resultLen = StringExpr.rightTrim(redgreen, 0, redgreen.length);
  383. Assert.assertTrue(resultLen == redgreen.length);
  384. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  385. resultLen = StringExpr.rightTrim(ascii_sentence, 0, ascii_sentence.length);
  386. Assert.assertTrue(resultLen == ascii_sentence.length);
  387. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  388. resultLen = StringExpr.rightTrim(blanksLeft, 0, blanksLeft.length);
  389. Assert.assertTrue(resultLen == blanksLeft.length);
  390. // Simple trims.
  391. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  392. resultLen = StringExpr.rightTrim(blanksRight, 0, blanksRight.length);
  393. Assert.assertTrue(resultLen == 3);
  394. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 3);
  395. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  396. resultLen = StringExpr.rightTrim(blanksBoth, 0, blanksBoth.length);
  397. Assert.assertTrue(resultLen == 5);
  398. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 5);
  399. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  400. resultLen = StringExpr.rightTrim(blankString, 0, blankString.length);
  401. Assert.assertTrue(resultLen == 0);
  402. Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 0);
  403. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  404. resultLen = StringExpr.rightTrim(blankRanges, 0, blankRanges.length);
  405. Assert.assertTrue(resultLen == blankRanges.length - 4);
  406. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 26);
  407. // Offset trims.
  408. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  409. resultLen = StringExpr.rightTrim(blanksRight, 1, blanksRight.length - 1);
  410. Assert.assertTrue(resultLen == 2);
  411. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 2);
  412. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  413. resultLen = StringExpr.rightTrim(blanksBoth, 4, blanksBoth.length - 4);
  414. Assert.assertTrue(resultLen == 1);
  415. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 1);
  416. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  417. resultLen = StringExpr.rightTrim(blanksBoth, 5, blanksBoth.length -5 );
  418. Assert.assertTrue(resultLen == 0);
  419. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 0);
  420. Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
  421. resultLen = StringExpr.rightTrim(blankString, 1, blankString.length - 1);
  422. Assert.assertTrue(resultLen == 0);
  423. Assert.assertTrue(StringExpr.characterCount(blankString, 1, resultLen) == 0);
  424. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
  425. resultLen = StringExpr.rightTrim(blankRanges, 4, blankRanges.length - 4);
  426. Assert.assertTrue(resultLen == blankRanges.length - 4 -4);
  427. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22);
  428. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  429. resultLen = StringExpr.rightTrim(blankRanges, 6, blankRanges.length- 6);
  430. Assert.assertTrue(resultLen == blankRanges.length - 6 - 4);
  431. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 20);
  432. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
  433. resultLen = StringExpr.rightTrim(blankRanges, 7, blankRanges.length - 7);
  434. Assert.assertTrue(resultLen == blankRanges.length - 7 - 4);
  435. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 19);
  436. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
  437. resultLen = StringExpr.rightTrim(blankRanges, 7, 8 - 7);
  438. Assert.assertTrue(resultLen == 0);
  439. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 0);
  440. // Multi-byte trims.
  441. byte[] multiByte = new byte[100];
  442. addMultiByteCharRightPadded1_1(multiByte);
  443. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  444. resultLen = StringExpr.rightTrim(multiByte, 0, 4);
  445. Assert.assertTrue(resultLen == 3);
  446. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1);
  447. addMultiByteCharRightPadded1_2(multiByte);
  448. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  449. resultLen = StringExpr.rightTrim(multiByte, 0, 5);
  450. Assert.assertTrue(resultLen == 4);
  451. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2);
  452. addMultiByteCharRightPadded1_3(multiByte);
  453. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  454. resultLen = StringExpr.rightTrim(multiByte, 0, 9);
  455. Assert.assertTrue(resultLen == 8);
  456. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 3);
  457. addMultiByteCharRightPadded1_1(multiByte);
  458. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
  459. resultLen = StringExpr.rightTrim(multiByte, 3, 1);
  460. Assert.assertTrue(resultLen == 0);
  461. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 0);
  462. addMultiByteCharRightPadded1_2(multiByte);
  463. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
  464. resultLen = StringExpr.rightTrim(multiByte, 3, 2);
  465. Assert.assertTrue(resultLen == 1);
  466. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1);
  467. byte[] sentenceOne = new byte[100];
  468. int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
  469. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
  470. resultLen = StringExpr.rightTrim(sentenceOne, 0, sentenceOneLen);
  471. Assert.assertTrue(resultLen == sentenceOneLen);
  472. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
  473. resultLen = StringExpr.rightTrim(sentenceOne, 0, sentenceOneLen - 3);
  474. Assert.assertTrue(resultLen == sentenceOneLen - 3);
  475. byte[] sentenceTwo = new byte[100];
  476. int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
  477. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
  478. resultLen = StringExpr.rightTrim(sentenceTwo, 0, sentenceTwoLen);
  479. Assert.assertTrue(resultLen == sentenceTwoLen);
  480. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
  481. resultLen = StringExpr.rightTrim(sentenceTwo, 0, sentenceTwoLen - 5);
  482. Assert.assertTrue(resultLen == sentenceTwoLen - 5);
  483. int start;
  484. // Left pad longer strings with multi-byte characters.
  485. byte[] sentenceOnePaddedLeft = new byte[100];
  486. start = addPads(sentenceOnePaddedLeft, 0, 3);
  487. int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
  488. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
  489. resultLen = StringExpr.rightTrim(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen);
  490. Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen);
  491. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
  492. resultLen = StringExpr.rightTrim(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3);
  493. Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen - 3);
  494. byte[] sentenceTwoPaddedLeft = new byte[100];
  495. start = addPads(sentenceTwoPaddedLeft, 0, 2);
  496. int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
  497. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
  498. resultLen = StringExpr.rightTrim(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen);
  499. Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen);
  500. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
  501. resultLen = StringExpr.rightTrim(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5);
  502. Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen - 5);
  503. // Right pad longer strings with multi-byte characters.
  504. byte[] sentenceOnePaddedRight = new byte[100];
  505. start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
  506. int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
  507. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
  508. resultLen = StringExpr.rightTrim(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen);
  509. Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 4);
  510. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
  511. resultLen = StringExpr.rightTrim(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4);
  512. Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 3 - 4);
  513. byte[] sentenceTwoPaddedRight = new byte[100];
  514. start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
  515. int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
  516. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
  517. resultLen = StringExpr.rightTrim(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen);
  518. Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 1);
  519. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
  520. resultLen = StringExpr.rightTrim(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1);
  521. Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 5 - 1);
  522. // Multi-byte characters with blank ranges.
  523. byte[] sentenceBlankRanges = new byte[100];
  524. int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  525. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  526. resultLen = StringExpr.rightTrim(sentenceBlankRanges, 0, sentenceBlankRangesLen);
  527. Assert.assertTrue(resultLen == sentenceBlankRangesLen);
  528. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  529. resultLen = StringExpr.rightTrim(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3);
  530. Assert.assertTrue(resultLen == sentenceBlankRangesLen - 3);
  531. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  532. resultLen = StringExpr.rightTrim(sentenceBlankRanges, 7, 17);
  533. Assert.assertTrue(resultLen == 12);
  534. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 8);
  535. }
  536. @Test
  537. // Test basic right trim to vector.
  538. public void testRightTrimBytesColumnVector() {
  539. BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  540. outV.initBuffer(30); // initialize with estimated element size 35
  541. int i = 0;
  542. int expectedResultLen;
  543. // Nothing to trim (ASCII).
  544. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  545. StringExpr.rightTrim(outV, i, blue, 0, blue.length);
  546. expectedResultLen = blue.length;
  547. Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen));
  548. Assert.assertTrue(vectorCharacterCount(outV, i) == 4);
  549. i++;
  550. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  551. StringExpr.rightTrim(outV, i, redgreen, 0, redgreen.length);
  552. expectedResultLen = redgreen.length;
  553. Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen));
  554. i++;
  555. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  556. StringExpr.rightTrim(outV, i, ascii_sentence, 0, ascii_sentence.length);
  557. expectedResultLen = ascii_sentence.length;
  558. Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen));
  559. i++;
  560. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  561. StringExpr.rightTrim(outV, i, blanksLeft, 0, blanksLeft.length);
  562. expectedResultLen = blanksLeft.length;
  563. Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen));
  564. i++;
  565. // Simple trims.
  566. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  567. StringExpr.rightTrim(outV, i, blanksRight, 0, blanksRight.length);
  568. expectedResultLen = 3;
  569. Assert.assertTrue(vectorEqual(outV, i, blanksRight, 0, expectedResultLen));
  570. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  571. i++;
  572. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  573. StringExpr.rightTrim(outV, i, blanksBoth, 0, blanksBoth.length);
  574. expectedResultLen = 5;
  575. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen));
  576. Assert.assertTrue(vectorCharacterCount(outV, i) == 5);
  577. i++;
  578. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  579. StringExpr.rightTrim(outV, i, blankString, 0, blankString.length);
  580. expectedResultLen = 0;
  581. Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen));
  582. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  583. i++;
  584. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  585. StringExpr.rightTrim(outV, i, blankRanges, 0, blankRanges.length);
  586. expectedResultLen = blankRanges.length - 4;
  587. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen));
  588. Assert.assertTrue(vectorCharacterCount(outV, i) == 26);
  589. i++;
  590. // Offset trims.
  591. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  592. StringExpr.rightTrim(outV, i, blanksRight, 1, blanksRight.length - 1);
  593. expectedResultLen = 2;
  594. Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen));
  595. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  596. i++;
  597. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  598. StringExpr.rightTrim(outV, i, blanksBoth, 4, blanksBoth.length - 4);
  599. expectedResultLen = 1;
  600. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen));
  601. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  602. i++;
  603. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  604. StringExpr.rightTrim(outV, i, blanksBoth, 5, blanksBoth.length -5 );
  605. expectedResultLen = 0;
  606. Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen));
  607. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  608. i++;
  609. Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1);
  610. StringExpr.rightTrim(outV, i, blankString, 1, blankString.length - 1);
  611. expectedResultLen = 0;
  612. Assert.assertTrue(vectorEqual(outV, i, blankString, 1, expectedResultLen));
  613. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  614. i++;
  615. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
  616. StringExpr.rightTrim(outV, i, blankRanges, 4, blankRanges.length - 4);
  617. expectedResultLen = blankRanges.length - 4 -4;
  618. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 4, expectedResultLen));
  619. Assert.assertTrue(vectorCharacterCount(outV, i) == 22);
  620. i++;
  621. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  622. StringExpr.rightTrim(outV, i, blankRanges, 6, blankRanges.length- 6);
  623. expectedResultLen = blankRanges.length - 6 - 4;
  624. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen));
  625. Assert.assertTrue(vectorCharacterCount(outV, i) == 20);
  626. i++;
  627. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23);
  628. StringExpr.rightTrim(outV, i, blankRanges, 7, blankRanges.length - 7);
  629. expectedResultLen = blankRanges.length - 7 - 4;
  630. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
  631. Assert.assertTrue(vectorCharacterCount(outV, i) == 19);
  632. i++;
  633. Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1);
  634. StringExpr.rightTrim(outV, i, blankRanges, 7, 8 - 7);
  635. expectedResultLen = 0;
  636. Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen));
  637. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  638. i++;
  639. // Multi-byte trims.
  640. byte[] multiByte = new byte[100];
  641. addMultiByteCharRightPadded1_1(multiByte);
  642. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2);
  643. StringExpr.rightTrim(outV, i, multiByte, 0, 4);
  644. expectedResultLen = 3;
  645. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  646. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  647. i++;
  648. addMultiByteCharRightPadded1_2(multiByte);
  649. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3);
  650. StringExpr.rightTrim(outV, i, multiByte, 0, 5);
  651. expectedResultLen = 4;
  652. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  653. Assert.assertTrue(vectorCharacterCount(outV, i) == 2);
  654. i++;
  655. addMultiByteCharRightPadded1_3(multiByte);
  656. Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4);
  657. StringExpr.rightTrim(outV, i, multiByte, 0, 9);
  658. expectedResultLen = 8;
  659. Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen));
  660. Assert.assertTrue(vectorCharacterCount(outV, i) == 3);
  661. i++;
  662. addMultiByteCharRightPadded1_1(multiByte);
  663. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1);
  664. StringExpr.rightTrim(outV, i, multiByte, 3, 1);
  665. expectedResultLen = 0;
  666. Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
  667. Assert.assertTrue(vectorCharacterCount(outV, i) == 0);
  668. i++;
  669. addMultiByteCharRightPadded1_2(multiByte);
  670. Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2);
  671. StringExpr.rightTrim(outV, i, multiByte, 3, 2);
  672. expectedResultLen = 1;
  673. Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen));
  674. Assert.assertTrue(vectorCharacterCount(outV, i) == 1);
  675. i++;
  676. byte[] sentenceOne = new byte[100];
  677. int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0);
  678. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10);
  679. StringExpr.rightTrim(outV, i, sentenceOne, 0, sentenceOneLen);
  680. expectedResultLen = sentenceOneLen;
  681. Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
  682. i++;
  683. Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9);
  684. StringExpr.rightTrim(outV, i, sentenceOne, 0, sentenceOneLen - 3);
  685. expectedResultLen = sentenceOneLen - 3;
  686. Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen));
  687. i++;
  688. byte[] sentenceTwo = new byte[100];
  689. int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0);
  690. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13);
  691. StringExpr.rightTrim(outV, i, sentenceTwo, 0, sentenceTwoLen);
  692. expectedResultLen = sentenceTwoLen;
  693. Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
  694. i++;
  695. Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10);
  696. StringExpr.rightTrim(outV, i, sentenceTwo, 0, sentenceTwoLen - 5);
  697. expectedResultLen = sentenceTwoLen - 5;
  698. Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen));
  699. i++;
  700. int start;
  701. // Left pad longer strings with multi-byte characters.
  702. byte[] sentenceOnePaddedLeft = new byte[100];
  703. start = addPads(sentenceOnePaddedLeft, 0, 3);
  704. int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start);
  705. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10);
  706. StringExpr.rightTrim(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen);
  707. expectedResultLen = sentenceOnePaddedLeftLen;
  708. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
  709. i++;
  710. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9);
  711. StringExpr.rightTrim(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3);
  712. expectedResultLen = sentenceOnePaddedLeftLen - 3;
  713. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen));
  714. i++;
  715. byte[] sentenceTwoPaddedLeft = new byte[100];
  716. start = addPads(sentenceTwoPaddedLeft, 0, 2);
  717. int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start);
  718. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13);
  719. StringExpr.rightTrim(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen);
  720. expectedResultLen = sentenceTwoPaddedLeftLen;
  721. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
  722. i++;
  723. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10);
  724. StringExpr.rightTrim(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5);
  725. expectedResultLen = sentenceTwoPaddedLeftLen - 5;
  726. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen));
  727. i++;
  728. // Right pad longer strings with multi-byte characters.
  729. byte[] sentenceOnePaddedRight = new byte[100];
  730. start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0);
  731. int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4);
  732. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4);
  733. StringExpr.rightTrim(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen);
  734. expectedResultLen = sentenceOnePaddedRightLen - 4;
  735. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
  736. i++;
  737. Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9);
  738. StringExpr.rightTrim(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4);
  739. expectedResultLen = sentenceOnePaddedRightLen - 3 - 4;
  740. Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen));
  741. i++;
  742. byte[] sentenceTwoPaddedRight = new byte[100];
  743. start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0);
  744. int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1);
  745. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1);
  746. StringExpr.rightTrim(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen);
  747. expectedResultLen = sentenceTwoPaddedRightLen - 1;
  748. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
  749. i++;
  750. Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10);
  751. StringExpr.rightTrim(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1);
  752. expectedResultLen = sentenceTwoPaddedRightLen - 5 - 1;
  753. Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen));
  754. i++;
  755. // Multi-byte characters with blank ranges.
  756. byte[] sentenceBlankRanges = new byte[100];
  757. int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0);
  758. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17);
  759. StringExpr.rightTrim(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen);
  760. expectedResultLen = sentenceBlankRangesLen;
  761. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  762. i++;
  763. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16);
  764. StringExpr.rightTrim(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3);
  765. expectedResultLen = sentenceBlankRangesLen - 3;
  766. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen));
  767. i++;
  768. Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13);
  769. StringExpr.rightTrim(outV, i, sentenceBlankRanges, 7, 17);
  770. expectedResultLen = 12;
  771. Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen));
  772. Assert.assertTrue(vectorCharacterCount(outV, i) == 8);
  773. }
  774. @Test
  775. // Test basic truncate of bytes slice.
  776. public void testTruncateBytesSlice() {
  777. int largeMaxLength = 100;
  778. int resultLen;
  779. // No truncate (ASCII) -- maximum length large.
  780. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  781. resultLen = StringExpr.truncate(blue, 0, blue.length, largeMaxLength);
  782. Assert.assertTrue(resultLen == blue.length);
  783. Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
  784. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  785. resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, largeMaxLength);
  786. Assert.assertTrue(resultLen == redgreen.length);
  787. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  788. resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, largeMaxLength);
  789. Assert.assertTrue(resultLen == ascii_sentence.length);
  790. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  791. resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, largeMaxLength);
  792. Assert.assertTrue(resultLen == blanksLeft.length);
  793. // No truncate (ASCII) -- same maximum length.
  794. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  795. resultLen = StringExpr.truncate(blue, 0, blue.length, 4);
  796. Assert.assertTrue(resultLen == blue.length);
  797. Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4);
  798. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  799. resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, 8);
  800. Assert.assertTrue(resultLen == redgreen.length);
  801. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  802. resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, 31);
  803. Assert.assertTrue(resultLen == ascii_sentence.length);
  804. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  805. resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, 5);
  806. Assert.assertTrue(resultLen == blanksLeft.length);
  807. // Simple truncation.
  808. Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4);
  809. resultLen = StringExpr.truncate(blue, 0, blue.length, 3);
  810. Assert.assertTrue(resultLen == 3);
  811. Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 3);
  812. Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8);
  813. resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, 6);
  814. Assert.assertTrue(resultLen == 6);
  815. Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31);
  816. resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, 14);
  817. Assert.assertTrue(resultLen == 14);
  818. Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5);
  819. resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, 2);
  820. Assert.assertTrue(resultLen == 2);
  821. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5);
  822. resultLen = StringExpr.truncate(blanksRight, 0, blanksRight.length, 4);
  823. Assert.assertTrue(resultLen == 4);
  824. Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 4);
  825. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7);
  826. resultLen = StringExpr.truncate(blanksBoth, 0, blanksBoth.length, 2);
  827. Assert.assertTrue(resultLen == 2);
  828. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 2);
  829. Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2);
  830. resultLen = StringExpr.truncate(blankString, 0, blankString.length, 1);
  831. Assert.assertTrue(resultLen == 1);
  832. Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 1);
  833. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30);
  834. resultLen = StringExpr.truncate(blankRanges, 0, blankRanges.length, 29);
  835. Assert.assertTrue(resultLen == 29);
  836. Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 29);
  837. // Offset truncation.
  838. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4);
  839. resultLen = StringExpr.truncate(blanksRight, 1, blanksRight.length - 1, 3);
  840. Assert.assertTrue(resultLen == 3);
  841. Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 3);
  842. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3);
  843. resultLen = StringExpr.truncate(blanksBoth, 4, blanksBoth.length - 4, 2);
  844. Assert.assertTrue(resultLen == 2);
  845. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 2);
  846. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2);
  847. resultLen = StringExpr.truncate(blanksBoth, 5, blanksBoth.length -5, 1);
  848. Assert.assertTrue(resultLen == 1);
  849. Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 1);
  850. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26);
  851. resultLen = StringExpr.truncate(blankRanges, 4, blankRanges.length - 4, 22);
  852. Assert.assertTrue(resultLen == 22);
  853. Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22);
  854. Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24);
  855. resultLen = StringExpr.truncate(blankRanges, 6, blankRanges.length- 6, 7);
  856. Assert.assertTrue(resultLen == 7);
  857. Assert.assert

Large files files are truncated, but you can click here to view the full file