PageRenderTime 51ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java

#
Java | 240 lines | 123 code | 27 blank | 90 comment | 24 complexity | 56a98452de37adb671a4d0def0e78cfe MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.serde2.lazybinary;
  19. import java.util.ArrayList;
  20. import java.util.Arrays;
  21. import java.util.List;
  22. import org.apache.commons.logging.Log;
  23. import org.apache.commons.logging.LogFactory;
  24. import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
  25. import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
  26. import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
  27. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  28. import org.apache.hadoop.hive.serde2.objectinspector.StructField;
  29. import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
  30. /**
  31. * LazyBinaryStruct is serialized as follows: start A B A B A B end bytes[] ->
  32. * |-----|---------|--- ... ---|-----|---------|
  33. *
  34. * Section A is one null-byte, corresponding to eight struct fields in Section
  35. * B. Each bit indicates whether the corresponding field is null (0) or not null
  36. * (1). Each field is a LazyBinaryObject.
  37. *
  38. * Following B, there is another section A and B. This pattern repeats until the
  39. * all struct fields are serialized.
  40. */
  41. public class LazyBinaryStruct extends
  42. LazyBinaryNonPrimitive<LazyBinaryStructObjectInspector> {
  43. private static Log LOG = LogFactory.getLog(LazyBinaryStruct.class.getName());
  44. /**
  45. * Whether the data is already parsed or not.
  46. */
  47. boolean parsed;
  48. /**
  49. * The fields of the struct.
  50. */
  51. LazyBinaryObject[] fields;
  52. /**
  53. * Whether a field is initialized or not.
  54. */
  55. boolean[] fieldInited;
  56. /**
  57. * Whether a field is null or not. Because length is 0 does not means the
  58. * field is null. In particular, a 0-length string is not null.
  59. */
  60. boolean[] fieldIsNull;
  61. /**
  62. * The start positions and lengths of struct fields. Only valid when the data
  63. * is parsed.
  64. */
  65. int[] fieldStart;
  66. int[] fieldLength;
  67. /**
  68. * Construct a LazyBinaryStruct object with an ObjectInspector.
  69. */
  70. protected LazyBinaryStruct(LazyBinaryStructObjectInspector oi) {
  71. super(oi);
  72. }
  73. @Override
  74. public void init(ByteArrayRef bytes, int start, int length) {
  75. super.init(bytes, start, length);
  76. parsed = false;
  77. }
  78. RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();
  79. boolean missingFieldWarned = false;
  80. boolean extraFieldWarned = false;
  81. /**
  82. * Parse the byte[] and fill fieldStart, fieldLength, fieldInited and
  83. * fieldIsNull.
  84. */
  85. private void parse() {
  86. List<? extends StructField> fieldRefs = ((StructObjectInspector) oi)
  87. .getAllStructFieldRefs();
  88. if (fields == null) {
  89. fields = new LazyBinaryObject[fieldRefs.size()];
  90. for (int i = 0; i < fields.length; i++) {
  91. ObjectInspector insp = fieldRefs.get(i).getFieldObjectInspector() ;
  92. fields[i] = insp == null? null: LazyBinaryFactory.createLazyBinaryObject(insp);
  93. }
  94. fieldInited = new boolean[fields.length];
  95. fieldIsNull = new boolean[fields.length];
  96. fieldStart = new int[fields.length];
  97. fieldLength = new int[fields.length];
  98. }
  99. /**
  100. * Please note that one null byte is followed by eight fields, then more
  101. * null byte and fields.
  102. */
  103. int fieldId = 0;
  104. int structByteEnd = start + length;
  105. byte[] bytes = this.bytes.getData();
  106. byte nullByte = bytes[start];
  107. int lastFieldByteEnd = start + 1;
  108. // Go through all bytes in the byte[]
  109. for (int i = 0; i < fields.length; i++) {
  110. fieldIsNull[i] = true;
  111. if ((nullByte & (1 << (i % 8))) != 0) {
  112. fieldIsNull[i] = false;
  113. LazyBinaryUtils.checkObjectByteInfo(fieldRefs.get(i)
  114. .getFieldObjectInspector(), bytes, lastFieldByteEnd, recordInfo);
  115. fieldStart[i] = lastFieldByteEnd + recordInfo.elementOffset;
  116. fieldLength[i] = recordInfo.elementSize;
  117. lastFieldByteEnd = fieldStart[i] + fieldLength[i];
  118. }
  119. // count how many fields are there
  120. if (lastFieldByteEnd <= structByteEnd) {
  121. fieldId++;
  122. }
  123. // next byte is a null byte if there are more bytes to go
  124. if (7 == (i % 8)) {
  125. if (lastFieldByteEnd < structByteEnd) {
  126. nullByte = bytes[lastFieldByteEnd];
  127. lastFieldByteEnd++;
  128. } else {
  129. // otherwise all null afterwards
  130. nullByte = 0;
  131. lastFieldByteEnd++;
  132. }
  133. }
  134. }
  135. // Extra bytes at the end?
  136. if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) {
  137. extraFieldWarned = true;
  138. LOG.warn("Extra bytes detected at the end of the row! Ignoring similar "
  139. + "problems.");
  140. }
  141. // Missing fields?
  142. if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) {
  143. missingFieldWarned = true;
  144. LOG.warn("Missing fields! Expected " + fields.length + " fields but "
  145. + "only got " + fieldId + "! Ignoring similar problems.");
  146. }
  147. Arrays.fill(fieldInited, false);
  148. parsed = true;
  149. }
  150. /**
  151. * Get one field out of the struct.
  152. *
  153. * If the field is a primitive field, return the actual object. Otherwise
  154. * return the LazyObject. This is because PrimitiveObjectInspector does not
  155. * have control over the object used by the user - the user simply directly
  156. * use the Object instead of going through Object
  157. * PrimitiveObjectInspector.get(Object).
  158. *
  159. * @param fieldID
  160. * The field ID
  161. * @return The field as a LazyObject
  162. */
  163. public Object getField(int fieldID) {
  164. if (!parsed) {
  165. parse();
  166. }
  167. return uncheckedGetField(fieldID);
  168. }
  169. /**
  170. * Get the field out of the row without checking parsed. This is called by
  171. * both getField and getFieldsAsList.
  172. *
  173. * @param fieldID
  174. * The id of the field starting from 0.
  175. * @return The value of the field
  176. */
  177. private Object uncheckedGetField(int fieldID) {
  178. // Test the length first so in most cases we avoid doing a byte[]
  179. // comparison.
  180. if (fieldIsNull[fieldID]) {
  181. return null;
  182. }
  183. if (!fieldInited[fieldID]) {
  184. fieldInited[fieldID] = true;
  185. fields[fieldID].init(bytes, fieldStart[fieldID], fieldLength[fieldID]);
  186. }
  187. return fields[fieldID].getObject();
  188. }
  189. ArrayList<Object> cachedList;
  190. /**
  191. * Get the values of the fields as an ArrayList.
  192. *
  193. * @return The values of the fields as an ArrayList.
  194. */
  195. public ArrayList<Object> getFieldsAsList() {
  196. if (!parsed) {
  197. parse();
  198. }
  199. if (cachedList == null) {
  200. cachedList = new ArrayList<Object>();
  201. } else {
  202. cachedList.clear();
  203. }
  204. for (int i = 0; i < fields.length; i++) {
  205. cachedList.add(uncheckedGetField(i));
  206. }
  207. return cachedList;
  208. }
  209. @Override
  210. public Object getObject() {
  211. return this;
  212. }
  213. }