PageRenderTime 27ms CodeModel.GetById 9ms app.highlight 14ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java

#
Java | 240 lines | 123 code | 27 blank | 90 comment | 24 complexity | 56a98452de37adb671a4d0def0e78cfe MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18package org.apache.hadoop.hive.serde2.lazybinary;
 19
 20import java.util.ArrayList;
 21import java.util.Arrays;
 22import java.util.List;
 23
 24import org.apache.commons.logging.Log;
 25import org.apache.commons.logging.LogFactory;
 26import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
 27import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
 28import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
 29import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 30import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 31import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 32
 33/**
 34 * LazyBinaryStruct is serialized as follows: start A B A B A B end bytes[] ->
 35 * |-----|---------|--- ... ---|-----|---------|
 36 *
 37 * Section A is one null-byte, corresponding to eight struct fields in Section
 38 * B. Each bit indicates whether the corresponding field is null (0) or not null
 39 * (1). Each field is a LazyBinaryObject.
 40 *
 41 * Following B, there is another section A and B. This pattern repeats until the
 42 * all struct fields are serialized.
 43 */
 44public class LazyBinaryStruct extends
 45    LazyBinaryNonPrimitive<LazyBinaryStructObjectInspector> {
 46
 47  private static Log LOG = LogFactory.getLog(LazyBinaryStruct.class.getName());
 48
 49  /**
 50   * Whether the data is already parsed or not.
 51   */
 52  boolean parsed;
 53
 54  /**
 55   * The fields of the struct.
 56   */
 57  LazyBinaryObject[] fields;
 58
 59  /**
 60   * Whether a field is initialized or not.
 61   */
 62  boolean[] fieldInited;
 63
 64  /**
 65   * Whether a field is null or not. Because length is 0 does not means the
 66   * field is null. In particular, a 0-length string is not null.
 67   */
 68  boolean[] fieldIsNull;
 69
 70  /**
 71   * The start positions and lengths of struct fields. Only valid when the data
 72   * is parsed.
 73   */
 74  int[] fieldStart;
 75  int[] fieldLength;
 76
 77  /**
 78   * Construct a LazyBinaryStruct object with an ObjectInspector.
 79   */
 80  protected LazyBinaryStruct(LazyBinaryStructObjectInspector oi) {
 81    super(oi);
 82  }
 83
 84  @Override
 85  public void init(ByteArrayRef bytes, int start, int length) {
 86    super.init(bytes, start, length);
 87    parsed = false;
 88  }
 89
 90  RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();
 91  boolean missingFieldWarned = false;
 92  boolean extraFieldWarned = false;
 93
 94  /**
 95   * Parse the byte[] and fill fieldStart, fieldLength, fieldInited and
 96   * fieldIsNull.
 97   */
 98  private void parse() {
 99
100    List<? extends StructField> fieldRefs = ((StructObjectInspector) oi)
101        .getAllStructFieldRefs();
102
103    if (fields == null) {
104      fields = new LazyBinaryObject[fieldRefs.size()];
105      for (int i = 0; i < fields.length; i++) {
106        ObjectInspector insp = fieldRefs.get(i).getFieldObjectInspector() ;
107        fields[i] = insp == null? null: LazyBinaryFactory.createLazyBinaryObject(insp);
108      }
109      fieldInited = new boolean[fields.length];
110      fieldIsNull = new boolean[fields.length];
111      fieldStart = new int[fields.length];
112      fieldLength = new int[fields.length];
113    }
114
115    /**
116     * Please note that one null byte is followed by eight fields, then more
117     * null byte and fields.
118     */
119
120    int fieldId = 0;
121    int structByteEnd = start + length;
122    byte[] bytes = this.bytes.getData();
123
124    byte nullByte = bytes[start];
125    int lastFieldByteEnd = start + 1;
126    // Go through all bytes in the byte[]
127    for (int i = 0; i < fields.length; i++) {
128      fieldIsNull[i] = true;
129      if ((nullByte & (1 << (i % 8))) != 0) {
130        fieldIsNull[i] = false;
131        LazyBinaryUtils.checkObjectByteInfo(fieldRefs.get(i)
132            .getFieldObjectInspector(), bytes, lastFieldByteEnd, recordInfo);
133        fieldStart[i] = lastFieldByteEnd + recordInfo.elementOffset;
134        fieldLength[i] = recordInfo.elementSize;
135        lastFieldByteEnd = fieldStart[i] + fieldLength[i];
136      }
137
138      // count how many fields are there
139      if (lastFieldByteEnd <= structByteEnd) {
140        fieldId++;
141      }
142      // next byte is a null byte if there are more bytes to go
143      if (7 == (i % 8)) {
144        if (lastFieldByteEnd < structByteEnd) {
145          nullByte = bytes[lastFieldByteEnd];
146          lastFieldByteEnd++;
147        } else {
148          // otherwise all null afterwards
149          nullByte = 0;
150          lastFieldByteEnd++;
151        }
152      }
153    }
154
155    // Extra bytes at the end?
156    if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) {
157      extraFieldWarned = true;
158      LOG.warn("Extra bytes detected at the end of the row! Ignoring similar "
159          + "problems.");
160    }
161
162    // Missing fields?
163    if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) {
164      missingFieldWarned = true;
165      LOG.warn("Missing fields! Expected " + fields.length + " fields but "
166          + "only got " + fieldId + "! Ignoring similar problems.");
167    }
168
169    Arrays.fill(fieldInited, false);
170    parsed = true;
171  }
172
173  /**
174   * Get one field out of the struct.
175   *
176   * If the field is a primitive field, return the actual object. Otherwise
177   * return the LazyObject. This is because PrimitiveObjectInspector does not
178   * have control over the object used by the user - the user simply directly
179   * use the Object instead of going through Object
180   * PrimitiveObjectInspector.get(Object).
181   *
182   * @param fieldID
183   *          The field ID
184   * @return The field as a LazyObject
185   */
186  public Object getField(int fieldID) {
187    if (!parsed) {
188      parse();
189    }
190    return uncheckedGetField(fieldID);
191  }
192
193  /**
194   * Get the field out of the row without checking parsed. This is called by
195   * both getField and getFieldsAsList.
196   *
197   * @param fieldID
198   *          The id of the field starting from 0.
199   * @return The value of the field
200   */
201  private Object uncheckedGetField(int fieldID) {
202    // Test the length first so in most cases we avoid doing a byte[]
203    // comparison.
204    if (fieldIsNull[fieldID]) {
205      return null;
206    }
207    if (!fieldInited[fieldID]) {
208      fieldInited[fieldID] = true;
209      fields[fieldID].init(bytes, fieldStart[fieldID], fieldLength[fieldID]);
210    }
211    return fields[fieldID].getObject();
212  }
213
214  ArrayList<Object> cachedList;
215
216  /**
217   * Get the values of the fields as an ArrayList.
218   *
219   * @return The values of the fields as an ArrayList.
220   */
221  public ArrayList<Object> getFieldsAsList() {
222    if (!parsed) {
223      parse();
224    }
225    if (cachedList == null) {
226      cachedList = new ArrayList<Object>();
227    } else {
228      cachedList.clear();
229    }
230    for (int i = 0; i < fields.length; i++) {
231      cachedList.add(uncheckedGetField(i));
232    }
233    return cachedList;
234  }
235
236  @Override
237  public Object getObject() {
238    return this;
239  }
240}