PageRenderTime 23ms CodeModel.GetById 9ms app.highlight 11ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java

#
Java | 238 lines | 118 code | 24 blank | 96 comment | 24 complexity | 739ff4c0ecf2699bb24cdd368bbc8e74 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18package org.apache.hadoop.hive.serde2.lazybinary;
 19
 20import java.util.ArrayList;
 21import java.util.Arrays;
 22import java.util.List;
 23
 24import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
 25import org.apache.hadoop.hive.serde2.lazy.LazyObject;
 26import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
 27import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
 28import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector;
 29import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 30import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 31
 32/**
 33 * LazyBinaryArray is serialized as follows: start A b b b b b b end bytes[] ->
 34 * |--------|---|---|---|---| ... |---|---|
 35 * 
 36 * Section A is the null-bytes. Suppose the list has N elements, then there are
 37 * (N+7)/8 bytes used as null-bytes. Each bit corresponds to an element and it
 38 * indicates whether that element is null (0) or not null (1).
 39 * 
 40 * After A, all b(s) represent the elements of the list. Each of them is again a
 41 * LazyBinaryObject.
 42 * 
 43 */
 44
 45public class LazyBinaryArray extends
 46    LazyBinaryNonPrimitive<LazyBinaryListObjectInspector> {
 47
 48  /**
 49   * Whether the data is already parsed or not.
 50   */
 51  boolean parsed = false;
 52  /**
 53   * The length of the array. Only valid when the data is parsed.
 54   */
 55  int arraySize = 0;
 56
 57  /**
 58   * The start positions and lengths of array elements. Only valid when the data
 59   * is parsed.
 60   */
 61  int[] elementStart;
 62  int[] elementLength;
 63
 64  /**
 65   * Whether an element is initialized or not.
 66   */
 67  boolean[] elementInited;
 68
 69  /**
 70   * Whether an element is null or not. Because length is 0 does not means the
 71   * field is null. In particular, a 0-length string is not null.
 72   */
 73  boolean[] elementIsNull;
 74
 75  /**
 76   * The elements of the array. Note that we call arrayElements[i].init(bytes,
 77   * begin, length) only when that element is accessed.
 78   */
 79  LazyBinaryObject[] arrayElements;
 80
 81  /**
 82   * Construct a LazyBinaryArray object with the ObjectInspector.
 83   * 
 84   * @param oi
 85   *          the oi representing the type of this LazyBinaryArray
 86   */
 87  protected LazyBinaryArray(LazyBinaryListObjectInspector oi) {
 88    super(oi);
 89  }
 90
 91  /**
 92   * Set the row data for this LazyBinaryArray.
 93   * 
 94   * @see LazyObject#init(ByteArrayRef, int, int)
 95   */
 96  @Override
 97  public void init(ByteArrayRef bytes, int start, int length) {
 98    super.init(bytes, start, length);
 99    parsed = false;
100  }
101
102  /**
103   * Enlarge the size of arrays storing information for the elements inside the
104   * array.
105   */
106  private void adjustArraySize(int newSize) {
107    if (elementStart == null || elementStart.length < newSize) {
108      elementStart = new int[newSize];
109      elementLength = new int[newSize];
110      elementInited = new boolean[newSize];
111      elementIsNull = new boolean[newSize];
112      arrayElements = new LazyBinaryObject[newSize];
113    }
114  }
115
116  VInt vInt = new LazyBinaryUtils.VInt();
117  RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();
118
119  /**
120   * Parse the bytes and fill elementStart, elementLength, elementInited and
121   * elementIsNull.
122   */
123  private void parse() {
124
125    byte[] bytes = this.bytes.getData();
126
127    // get the vlong that represents the map size
128    LazyBinaryUtils.readVInt(bytes, start, vInt);
129    arraySize = vInt.value;
130    if (0 == arraySize) {
131      parsed = true;
132      return;
133    }
134
135    // adjust arrays
136    adjustArraySize(arraySize);
137    // find out the null-bytes
138    int arryByteStart = start + vInt.length;
139    int nullByteCur = arryByteStart;
140    int nullByteEnd = arryByteStart + (arraySize + 7) / 8;
141    // the begin the real elements
142    int lastElementByteEnd = nullByteEnd;
143    // the list element object inspector
144    ObjectInspector listEleObjectInspector = ((ListObjectInspector) oi)
145        .getListElementObjectInspector();
146    // parsing elements one by one
147    for (int i = 0; i < arraySize; i++) {
148      elementIsNull[i] = true;
149      if ((bytes[nullByteCur] & (1 << (i % 8))) != 0) {
150        elementIsNull[i] = false;
151        LazyBinaryUtils.checkObjectByteInfo(listEleObjectInspector, bytes,
152            lastElementByteEnd, recordInfo);
153        elementStart[i] = lastElementByteEnd + recordInfo.elementOffset;
154        elementLength[i] = recordInfo.elementSize;
155        lastElementByteEnd = elementStart[i] + elementLength[i];
156      }
157      // move onto the next null byte
158      if (7 == (i % 8)) {
159        nullByteCur++;
160      }
161    }
162
163    Arrays.fill(elementInited, 0, arraySize, false);
164    parsed = true;
165  }
166
167  /**
168   * Returns the actual primitive object at the index position inside the array
169   * represented by this LazyBinaryObject.
170   */
171  public Object getListElementObject(int index) {
172    if (!parsed) {
173      parse();
174    }
175    if (index < 0 || index >= arraySize) {
176      return null;
177    }
178    return uncheckedGetElement(index);
179  }
180
181  /**
182   * Get the element without checking out-of-bound index.
183   * 
184   * @param index
185   *          index to the array element
186   */
187  private Object uncheckedGetElement(int index) {
188
189    if (elementIsNull[index]) {
190      return null;
191    } else {
192      if (!elementInited[index]) {
193        elementInited[index] = true;
194        if (arrayElements[index] == null) {
195          arrayElements[index] = LazyBinaryFactory.createLazyBinaryObject((oi)
196              .getListElementObjectInspector());
197        }
198        arrayElements[index].init(bytes, elementStart[index],
199            elementLength[index]);
200      }
201    }
202    return arrayElements[index].getObject();
203  }
204
205  /**
206   * Returns the array size.
207   */
208  public int getListLength() {
209    if (!parsed) {
210      parse();
211    }
212    return arraySize;
213  }
214
215  /**
216   * cachedList is reused every time getList is called. Different
217   * LazyBianryArray instances cannot share the same cachedList.
218   */
219  ArrayList<Object> cachedList;
220
221  /**
222   * Returns the List of actual primitive objects. Returns null for null array.
223   */
224  public List<Object> getList() {
225    if (!parsed) {
226      parse();
227    }
228    if (cachedList == null) {
229      cachedList = new ArrayList<Object>(arraySize);
230    } else {
231      cachedList.clear();
232    }
233    for (int index = 0; index < arraySize; index++) {
234      cachedList.add(uncheckedGetElement(index));
235    }
236    return cachedList;
237  }
238}