PageRenderTime 37ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 1ms

/tags/release-0.1-rc2/hive/external/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java

#
Java | 238 lines | 118 code | 24 blank | 96 comment | 24 complexity | 739ff4c0ecf2699bb24cdd368bbc8e74 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.serde2.lazybinary;
  19. import java.util.ArrayList;
  20. import java.util.Arrays;
  21. import java.util.List;
  22. import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
  23. import org.apache.hadoop.hive.serde2.lazy.LazyObject;
  24. import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
  25. import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
  26. import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector;
  27. import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
  28. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  29. /**
  30. * LazyBinaryArray is serialized as follows: start A b b b b b b end bytes[] ->
  31. * |--------|---|---|---|---| ... |---|---|
  32. *
  33. * Section A is the null-bytes. Suppose the list has N elements, then there are
  34. * (N+7)/8 bytes used as null-bytes. Each bit corresponds to an element and it
  35. * indicates whether that element is null (0) or not null (1).
  36. *
  37. * After A, all b(s) represent the elements of the list. Each of them is again a
  38. * LazyBinaryObject.
  39. *
  40. */
  41. public class LazyBinaryArray extends
  42. LazyBinaryNonPrimitive<LazyBinaryListObjectInspector> {
  43. /**
  44. * Whether the data is already parsed or not.
  45. */
  46. boolean parsed = false;
  47. /**
  48. * The length of the array. Only valid when the data is parsed.
  49. */
  50. int arraySize = 0;
  51. /**
  52. * The start positions and lengths of array elements. Only valid when the data
  53. * is parsed.
  54. */
  55. int[] elementStart;
  56. int[] elementLength;
  57. /**
  58. * Whether an element is initialized or not.
  59. */
  60. boolean[] elementInited;
  61. /**
  62. * Whether an element is null or not. Because length is 0 does not means the
  63. * field is null. In particular, a 0-length string is not null.
  64. */
  65. boolean[] elementIsNull;
  66. /**
  67. * The elements of the array. Note that we call arrayElements[i].init(bytes,
  68. * begin, length) only when that element is accessed.
  69. */
  70. LazyBinaryObject[] arrayElements;
  71. /**
  72. * Construct a LazyBinaryArray object with the ObjectInspector.
  73. *
  74. * @param oi
  75. * the oi representing the type of this LazyBinaryArray
  76. */
  77. protected LazyBinaryArray(LazyBinaryListObjectInspector oi) {
  78. super(oi);
  79. }
  80. /**
  81. * Set the row data for this LazyBinaryArray.
  82. *
  83. * @see LazyObject#init(ByteArrayRef, int, int)
  84. */
  85. @Override
  86. public void init(ByteArrayRef bytes, int start, int length) {
  87. super.init(bytes, start, length);
  88. parsed = false;
  89. }
  90. /**
  91. * Enlarge the size of arrays storing information for the elements inside the
  92. * array.
  93. */
  94. private void adjustArraySize(int newSize) {
  95. if (elementStart == null || elementStart.length < newSize) {
  96. elementStart = new int[newSize];
  97. elementLength = new int[newSize];
  98. elementInited = new boolean[newSize];
  99. elementIsNull = new boolean[newSize];
  100. arrayElements = new LazyBinaryObject[newSize];
  101. }
  102. }
  103. VInt vInt = new LazyBinaryUtils.VInt();
  104. RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();
  105. /**
  106. * Parse the bytes and fill elementStart, elementLength, elementInited and
  107. * elementIsNull.
  108. */
  109. private void parse() {
  110. byte[] bytes = this.bytes.getData();
  111. // get the vlong that represents the map size
  112. LazyBinaryUtils.readVInt(bytes, start, vInt);
  113. arraySize = vInt.value;
  114. if (0 == arraySize) {
  115. parsed = true;
  116. return;
  117. }
  118. // adjust arrays
  119. adjustArraySize(arraySize);
  120. // find out the null-bytes
  121. int arryByteStart = start + vInt.length;
  122. int nullByteCur = arryByteStart;
  123. int nullByteEnd = arryByteStart + (arraySize + 7) / 8;
  124. // the begin the real elements
  125. int lastElementByteEnd = nullByteEnd;
  126. // the list element object inspector
  127. ObjectInspector listEleObjectInspector = ((ListObjectInspector) oi)
  128. .getListElementObjectInspector();
  129. // parsing elements one by one
  130. for (int i = 0; i < arraySize; i++) {
  131. elementIsNull[i] = true;
  132. if ((bytes[nullByteCur] & (1 << (i % 8))) != 0) {
  133. elementIsNull[i] = false;
  134. LazyBinaryUtils.checkObjectByteInfo(listEleObjectInspector, bytes,
  135. lastElementByteEnd, recordInfo);
  136. elementStart[i] = lastElementByteEnd + recordInfo.elementOffset;
  137. elementLength[i] = recordInfo.elementSize;
  138. lastElementByteEnd = elementStart[i] + elementLength[i];
  139. }
  140. // move onto the next null byte
  141. if (7 == (i % 8)) {
  142. nullByteCur++;
  143. }
  144. }
  145. Arrays.fill(elementInited, 0, arraySize, false);
  146. parsed = true;
  147. }
  148. /**
  149. * Returns the actual primitive object at the index position inside the array
  150. * represented by this LazyBinaryObject.
  151. */
  152. public Object getListElementObject(int index) {
  153. if (!parsed) {
  154. parse();
  155. }
  156. if (index < 0 || index >= arraySize) {
  157. return null;
  158. }
  159. return uncheckedGetElement(index);
  160. }
  161. /**
  162. * Get the element without checking out-of-bound index.
  163. *
  164. * @param index
  165. * index to the array element
  166. */
  167. private Object uncheckedGetElement(int index) {
  168. if (elementIsNull[index]) {
  169. return null;
  170. } else {
  171. if (!elementInited[index]) {
  172. elementInited[index] = true;
  173. if (arrayElements[index] == null) {
  174. arrayElements[index] = LazyBinaryFactory.createLazyBinaryObject((oi)
  175. .getListElementObjectInspector());
  176. }
  177. arrayElements[index].init(bytes, elementStart[index],
  178. elementLength[index]);
  179. }
  180. }
  181. return arrayElements[index].getObject();
  182. }
  183. /**
  184. * Returns the array size.
  185. */
  186. public int getListLength() {
  187. if (!parsed) {
  188. parse();
  189. }
  190. return arraySize;
  191. }
  192. /**
  193. * cachedList is reused every time getList is called. Different
  194. * LazyBianryArray instances cannot share the same cachedList.
  195. */
  196. ArrayList<Object> cachedList;
  197. /**
  198. * Returns the List of actual primitive objects. Returns null for null array.
  199. */
  200. public List<Object> getList() {
  201. if (!parsed) {
  202. parse();
  203. }
  204. if (cachedList == null) {
  205. cachedList = new ArrayList<Object>(arraySize);
  206. } else {
  207. cachedList.clear();
  208. }
  209. for (int index = 0; index < arraySize; index++) {
  210. cachedList.add(uncheckedGetElement(index));
  211. }
  212. return cachedList;
  213. }
  214. }