PageRenderTime 64ms CodeModel.GetById 8ms app.highlight 51ms RepoModel.GetById 1ms app.codeStats 1ms

/tags/release-0.0.0-rc0/hive/external/contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesInput.java

#
Java | 533 lines | 286 code | 40 blank | 207 comment | 92 complexity | d71f330de7bae6faefd5e77924a43376 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.contrib.util.typedbytes;
 20
 21import java.io.DataInput;
 22import java.io.EOFException;
 23import java.io.IOException;
 24import java.util.ArrayList;
 25import java.util.List;
 26import java.util.TreeMap;
 27
 28import org.apache.hadoop.io.WritableUtils;
 29import org.apache.hadoop.record.Buffer;
 30
 31/**
 32 * Provides functionality for reading typed bytes.
 33 */
 34public class TypedBytesInput {
 35
 36  private DataInput in;
 37
 38  private TypedBytesInput() {
 39  }
 40
 41  private void setDataInput(DataInput in) {
 42    this.in = in;
 43  }
 44
 45  private static ThreadLocal tbIn = new ThreadLocal() {
 46    @Override
 47    protected synchronized Object initialValue() {
 48      return new TypedBytesInput();
 49    }
 50  };
 51
 52  /**
 53   * Get a thread-local typed bytes input for the supplied {@link DataInput}.
 54   * 
 55   * @param in
 56   *          data input object
 57   * @return typed bytes input corresponding to the supplied {@link DataInput}.
 58   */
 59  public static TypedBytesInput get(DataInput in) {
 60    TypedBytesInput bin = (TypedBytesInput) tbIn.get();
 61    bin.setDataInput(in);
 62    return bin;
 63  }
 64
 65  /** Creates a new instance of TypedBytesInput. */
 66  public TypedBytesInput(DataInput in) {
 67    this.in = in;
 68  }
 69
 70  /**
 71   * Reads a typed bytes sequence and converts it to a Java object. The first
 72   * byte is interpreted as a type code, and then the right number of subsequent
 73   * bytes are read depending on the obtained type.
 74   * 
 75   * @return the obtained object or null when the end of the file is reached
 76   * @throws IOException
 77   */
 78  public Object read() throws IOException {
 79    int code = 1;
 80    try {
 81      code = in.readUnsignedByte();
 82    } catch (EOFException eof) {
 83      return null;
 84    }
 85    if (code == Type.BYTES.code) {
 86      return new Buffer(readBytes());
 87    } else if (code == Type.BYTE.code) {
 88      return readByte();
 89    } else if (code == Type.BOOL.code) {
 90      return readBool();
 91    } else if (code == Type.INT.code) {
 92      return readInt();
 93    } else if (code == Type.SHORT.code) {
 94      return readShort();
 95    } else if (code == Type.LONG.code) {
 96      return readLong();
 97    } else if (code == Type.FLOAT.code) {
 98      return readFloat();
 99    } else if (code == Type.DOUBLE.code) {
100      return readDouble();
101    } else if (code == Type.STRING.code) {
102      return readString();
103    } else if (code == Type.VECTOR.code) {
104      return readVector();
105    } else if (code == Type.LIST.code) {
106      return readList();
107    } else if (code == Type.MAP.code) {
108      return readMap();
109    } else if (code == Type.MARKER.code) {
110      return null;
111    } else if (50 <= code && code <= 200) { // application-specific typecodes
112      return new Buffer(readBytes());
113    } else {
114      throw new RuntimeException("unknown type");
115    }
116  }
117
118  /**
119   * Reads a typed bytes sequence. The first byte is interpreted as a type code,
120   * and then the right number of subsequent bytes are read depending on the
121   * obtained type.
122   * 
123   * @return the obtained typed bytes sequence or null when the end of the file
124   *         is reached
125   * @throws IOException
126   */
127  public byte[] readRaw() throws IOException {
128    int code = -1;
129    try {
130      code = in.readUnsignedByte();
131    } catch (EOFException eof) {
132      return null;
133    }
134    if (code == Type.BYTES.code) {
135      return readRawBytes();
136    } else if (code == Type.BYTE.code) {
137      return readRawByte();
138    } else if (code == Type.BOOL.code) {
139      return readRawBool();
140    } else if (code == Type.INT.code) {
141      return readRawInt();
142    } else if (code == Type.LONG.code) {
143      return readRawLong();
144    } else if (code == Type.FLOAT.code) {
145      return readRawFloat();
146    } else if (code == Type.DOUBLE.code) {
147      return readRawDouble();
148    } else if (code == Type.STRING.code) {
149      return readRawString();
150    } else if (code == Type.VECTOR.code) {
151      return readRawVector();
152    } else if (code == Type.LIST.code) {
153      return readRawList();
154    } else if (code == Type.MAP.code) {
155      return readRawMap();
156    } else if (code == Type.MARKER.code) {
157      return null;
158    } else if (50 <= code && code <= 200) { // application-specific typecodes
159      return readRawBytes();
160    } else {
161      throw new RuntimeException("unknown type");
162    }
163  }
164
165  /**
166   * Reads a type byte and returns the corresponding {@link Type}.
167   * 
168   * @return the obtained Type or null when the end of the file is reached
169   * @throws IOException
170   */
171  public Type readType() throws IOException {
172    int code = -1;
173    try {
174      code = in.readUnsignedByte();
175    } catch (EOFException eof) {
176      return null;
177    }
178    for (Type type : Type.values()) {
179      if (type.code == code) {
180        return type;
181      }
182    }
183    return null;
184  }
185
186  /**
187   * Skips a type byte.
188   * 
189   * @return true iff the end of the file was not reached
190   * @throws IOException
191   */
192  public boolean skipType() throws IOException {
193    try {
194      in.readByte();
195      return true;
196    } catch (EOFException eof) {
197      return false;
198    }
199  }
200
201  /**
202   * Reads the bytes following a <code>Type.BYTES</code> code.
203   * 
204   * @return the obtained bytes sequence
205   * @throws IOException
206   */
207  public byte[] readBytes() throws IOException {
208    int length = in.readInt();
209    byte[] bytes = new byte[length];
210    in.readFully(bytes);
211    return bytes;
212  }
213
214  /**
215   * Reads the raw bytes following a <code>Type.BYTES</code> code.
216   * 
217   * @return the obtained bytes sequence
218   * @throws IOException
219   */
220  public byte[] readRawBytes() throws IOException {
221    int length = in.readInt();
222    byte[] bytes = new byte[5 + length];
223    bytes[0] = (byte) Type.BYTES.code;
224    bytes[1] = (byte) (0xff & (length >> 24));
225    bytes[2] = (byte) (0xff & (length >> 16));
226    bytes[3] = (byte) (0xff & (length >> 8));
227    bytes[4] = (byte) (0xff & length);
228    in.readFully(bytes, 5, length);
229    return bytes;
230  }
231
232  /**
233   * Reads the byte following a <code>Type.BYTE</code> code.
234   * 
235   * @return the obtained byte
236   * @throws IOException
237   */
238  public byte readByte() throws IOException {
239    return in.readByte();
240  }
241
242  /**
243   * Reads the raw byte following a <code>Type.BYTE</code> code.
244   * 
245   * @return the obtained byte
246   * @throws IOException
247   */
248  public byte[] readRawByte() throws IOException {
249    byte[] bytes = new byte[2];
250    bytes[0] = (byte) Type.BYTE.code;
251    in.readFully(bytes, 1, 1);
252    return bytes;
253  }
254
255  /**
256   * Reads the boolean following a <code>Type.BOOL</code> code.
257   * 
258   * @return the obtained boolean
259   * @throws IOException
260   */
261  public boolean readBool() throws IOException {
262    return in.readBoolean();
263  }
264
265  /**
266   * Reads the raw bytes following a <code>Type.BOOL</code> code.
267   * 
268   * @return the obtained bytes sequence
269   * @throws IOException
270   */
271  public byte[] readRawBool() throws IOException {
272    byte[] bytes = new byte[2];
273    bytes[0] = (byte) Type.BOOL.code;
274    in.readFully(bytes, 1, 1);
275    return bytes;
276  }
277
278  /**
279   * Reads the integer following a <code>Type.INT</code> code.
280   * 
281   * @return the obtained integer
282   * @throws IOException
283   */
284  public int readInt() throws IOException {
285    return in.readInt();
286  }
287
288  /**
289   * Reads the short following a <code>Type.SHORT</code> code.
290   * 
291   * @return the obtained short
292   * @throws IOException
293   */
294  public short readShort() throws IOException {
295    return in.readShort();
296  }
297
298  /**
299   * Reads the raw bytes following a <code>Type.INT</code> code.
300   * 
301   * @return the obtained bytes sequence
302   * @throws IOException
303   */
304  public byte[] readRawInt() throws IOException {
305    byte[] bytes = new byte[5];
306    bytes[0] = (byte) Type.INT.code;
307    in.readFully(bytes, 1, 4);
308    return bytes;
309  }
310
311  /**
312   * Reads the long following a <code>Type.LONG</code> code.
313   * 
314   * @return the obtained long
315   * @throws IOException
316   */
317  public long readLong() throws IOException {
318    return in.readLong();
319  }
320
321  /**
322   * Reads the raw bytes following a <code>Type.LONG</code> code.
323   * 
324   * @return the obtained bytes sequence
325   * @throws IOException
326   */
327  public byte[] readRawLong() throws IOException {
328    byte[] bytes = new byte[9];
329    bytes[0] = (byte) Type.LONG.code;
330    in.readFully(bytes, 1, 8);
331    return bytes;
332  }
333
334  /**
335   * Reads the float following a <code>Type.FLOAT</code> code.
336   * 
337   * @return the obtained float
338   * @throws IOException
339   */
340  public float readFloat() throws IOException {
341    return in.readFloat();
342  }
343
344  /**
345   * Reads the raw bytes following a <code>Type.FLOAT</code> code.
346   * 
347   * @return the obtained bytes sequence
348   * @throws IOException
349   */
350  public byte[] readRawFloat() throws IOException {
351    byte[] bytes = new byte[5];
352    bytes[0] = (byte) Type.FLOAT.code;
353    in.readFully(bytes, 1, 4);
354    return bytes;
355  }
356
357  /**
358   * Reads the double following a <code>Type.DOUBLE</code> code.
359   * 
360   * @return the obtained double
361   * @throws IOException
362   */
363  public double readDouble() throws IOException {
364    return in.readDouble();
365  }
366
367  /**
368   * Reads the raw bytes following a <code>Type.DOUBLE</code> code.
369   * 
370   * @return the obtained bytes sequence
371   * @throws IOException
372   */
373  public byte[] readRawDouble() throws IOException {
374    byte[] bytes = new byte[9];
375    bytes[0] = (byte) Type.DOUBLE.code;
376    in.readFully(bytes, 1, 8);
377    return bytes;
378  }
379
380  /**
381   * Reads the string following a <code>Type.STRING</code> code.
382   * 
383   * @return the obtained string
384   * @throws IOException
385   */
386  public String readString() throws IOException {
387    return WritableUtils.readString(in);
388  }
389
390  /**
391   * Reads the raw bytes following a <code>Type.STRING</code> code.
392   * 
393   * @return the obtained bytes sequence
394   * @throws IOException
395   */
396  public byte[] readRawString() throws IOException {
397    int length = in.readInt();
398    byte[] bytes = new byte[5 + length];
399    bytes[0] = (byte) Type.STRING.code;
400    bytes[1] = (byte) (0xff & (length >> 24));
401    bytes[2] = (byte) (0xff & (length >> 16));
402    bytes[3] = (byte) (0xff & (length >> 8));
403    bytes[4] = (byte) (0xff & length);
404    in.readFully(bytes, 5, length);
405    return bytes;
406  }
407
408  /**
409   * Reads the vector following a <code>Type.VECTOR</code> code.
410   * 
411   * @return the obtained vector
412   * @throws IOException
413   */
414  @SuppressWarnings("unchecked")
415  public ArrayList readVector() throws IOException {
416    int length = readVectorHeader();
417    ArrayList result = new ArrayList(length);
418    for (int i = 0; i < length; i++) {
419      result.add(read());
420    }
421    return result;
422  }
423
424  /**
425   * Reads the raw bytes following a <code>Type.VECTOR</code> code.
426   * 
427   * @return the obtained bytes sequence
428   * @throws IOException
429   */
430  public byte[] readRawVector() throws IOException {
431    Buffer buffer = new Buffer();
432    int length = readVectorHeader();
433    buffer.append(new byte[] {(byte) Type.VECTOR.code,
434        (byte) (0xff & (length >> 24)), (byte) (0xff & (length >> 16)),
435        (byte) (0xff & (length >> 8)), (byte) (0xff & length)});
436    for (int i = 0; i < length; i++) {
437      buffer.append(readRaw());
438    }
439    return buffer.get();
440  }
441
442  /**
443   * Reads the header following a <code>Type.VECTOR</code> code.
444   * 
445   * @return the number of elements in the vector
446   * @throws IOException
447   */
448  public int readVectorHeader() throws IOException {
449    return in.readInt();
450  }
451
452  /**
453   * Reads the list following a <code>Type.LIST</code> code.
454   * 
455   * @return the obtained list
456   * @throws IOException
457   */
458  @SuppressWarnings("unchecked")
459  public List readList() throws IOException {
460    List list = new ArrayList();
461    Object obj = read();
462    while (obj != null) {
463      list.add(obj);
464      obj = read();
465    }
466    return list;
467  }
468
469  /**
470   * Reads the raw bytes following a <code>Type.LIST</code> code.
471   * 
472   * @return the obtained bytes sequence
473   * @throws IOException
474   */
475  public byte[] readRawList() throws IOException {
476    Buffer buffer = new Buffer(new byte[] {(byte) Type.LIST.code});
477    byte[] bytes = readRaw();
478    while (bytes != null) {
479      buffer.append(bytes);
480      bytes = readRaw();
481    }
482    buffer.append(new byte[] {(byte) Type.MARKER.code});
483    return buffer.get();
484  }
485
486  /**
487   * Reads the map following a <code>Type.MAP</code> code.
488   * 
489   * @return the obtained map
490   * @throws IOException
491   */
492  @SuppressWarnings("unchecked")
493  public TreeMap readMap() throws IOException {
494    int length = readMapHeader();
495    TreeMap result = new TreeMap();
496    for (int i = 0; i < length; i++) {
497      Object key = read();
498      Object value = read();
499      result.put(key, value);
500    }
501    return result;
502  }
503
504  /**
505   * Reads the raw bytes following a <code>Type.MAP</code> code.
506   * 
507   * @return the obtained bytes sequence
508   * @throws IOException
509   */
510  public byte[] readRawMap() throws IOException {
511    Buffer buffer = new Buffer();
512    int length = readMapHeader();
513    buffer.append(new byte[] {(byte) Type.MAP.code,
514        (byte) (0xff & (length >> 24)), (byte) (0xff & (length >> 16)),
515        (byte) (0xff & (length >> 8)), (byte) (0xff & length)});
516    for (int i = 0; i < length; i++) {
517      buffer.append(readRaw());
518      buffer.append(readRaw());
519    }
520    return buffer.get();
521  }
522
523  /**
524   * Reads the header following a <code>Type.MAP</code> code.
525   * 
526   * @return the number of key-value pairs in the map
527   * @throws IOException
528   */
529  public int readMapHeader() throws IOException {
530    return in.readInt();
531  }
532
533}