PageRenderTime 16ms CodeModel.GetById 9ms app.highlight 5ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveRecordReader.java

#
Java | 125 lines | 81 code | 14 blank | 30 comment | 15 complexity | e40a898eef61b963e08562405b77b858 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.io;
 20
 21import java.io.IOException;
 22
 23import org.apache.hadoop.io.Writable;
 24import org.apache.hadoop.io.WritableComparable;
 25import org.apache.hadoop.mapred.InputFormat;
 26import org.apache.hadoop.mapred.InputSplit;
 27import org.apache.hadoop.mapred.JobConf;
 28import org.apache.hadoop.mapred.RecordReader;
 29import org.apache.hadoop.mapred.Reporter;
 30import org.apache.hadoop.mapred.SequenceFileInputFormat;
 31
 32/**
 33 * BucketizedHiveRecordReader is a wrapper on a list of RecordReader. It behaves
 34 * similar as HiveRecordReader while it wraps a list of RecordReader from one
 35 * file.
 36 */
 37public class BucketizedHiveRecordReader<K extends WritableComparable, V extends Writable>
 38    extends HiveContextAwareRecordReader<K, V> {
 39  protected final BucketizedHiveInputSplit split;
 40  protected final InputFormat inputFormat;
 41  protected final JobConf jobConf;
 42  protected final Reporter reporter;
 43  protected RecordReader curReader;
 44  protected long progress;
 45  protected int idx;
 46
 47  public BucketizedHiveRecordReader(InputFormat inputFormat,
 48      BucketizedHiveInputSplit bucketizedSplit, JobConf jobConf,
 49      Reporter reporter) throws IOException {
 50    this.split = bucketizedSplit;
 51    this.inputFormat = inputFormat;
 52    this.jobConf = jobConf;
 53    this.reporter = reporter;
 54    initNextRecordReader();
 55  }
 56
 57  public void doClose() throws IOException {
 58    if (curReader != null) {
 59      curReader.close();
 60      curReader = null;
 61    }
 62    idx = 0;
 63  }
 64
 65  public K createKey() {
 66    return (K) curReader.createKey();
 67  }
 68
 69  public V createValue() {
 70    return (V) curReader.createValue();
 71  }
 72
 73  public long getPos() throws IOException {
 74    if (curReader != null) {
 75      return curReader.getPos();
 76    } else {
 77      return 0;
 78    }
 79  }
 80
 81  public float getProgress() throws IOException {
 82    // The calculation is strongly dependent on the assumption that all splits
 83    // came from the same file
 84    return Math.min(1.0f, ((curReader == null) ? progress : curReader.getPos())
 85        / (float) (split.getLength()));
 86  }
 87
 88  public boolean doNext(K key, V value) throws IOException {
 89    while ((curReader == null) || !curReader.next(key, value)) {
 90      if (!initNextRecordReader()) {
 91        return false;
 92      }
 93    }
 94    return true;
 95  }
 96
 97  /**
 98   * Get the record reader for the next chunk in this
 99   * BucketizedHiveRecordReader.
100   */
101  protected boolean initNextRecordReader() throws IOException {
102    if (curReader != null) {
103      curReader.close();
104      curReader = null;
105      if (idx > 0) {
106        progress += split.getLength(idx - 1); // done processing so far
107      }
108    }
109
110    // if all chunks have been processed, nothing more to do.
111    if (idx == split.getNumSplits()) {
112      return false;
113    }
114
115    // get a record reader for the idx-th chunk
116    try {
117      curReader = inputFormat.getRecordReader(split.getSplit(idx), jobConf,
118          reporter);
119    } catch (Exception e) {
120      throw new RuntimeException(e);
121    }
122    idx++;
123    return true;
124  }
125}