/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveRecordReader.java

# · Java · 125 lines · 81 code · 14 blank · 30 comment · 15 complexity · e40a898eef61b963e08562405b77b858 MD5 · raw file

  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.io;
  19. import java.io.IOException;
  20. import org.apache.hadoop.io.Writable;
  21. import org.apache.hadoop.io.WritableComparable;
  22. import org.apache.hadoop.mapred.InputFormat;
  23. import org.apache.hadoop.mapred.InputSplit;
  24. import org.apache.hadoop.mapred.JobConf;
  25. import org.apache.hadoop.mapred.RecordReader;
  26. import org.apache.hadoop.mapred.Reporter;
  27. import org.apache.hadoop.mapred.SequenceFileInputFormat;
  28. /**
  29. * BucketizedHiveRecordReader is a wrapper on a list of RecordReader. It behaves
  30. * similar as HiveRecordReader while it wraps a list of RecordReader from one
  31. * file.
  32. */
  33. public class BucketizedHiveRecordReader<K extends WritableComparable, V extends Writable>
  34. extends HiveContextAwareRecordReader<K, V> {
  35. protected final BucketizedHiveInputSplit split;
  36. protected final InputFormat inputFormat;
  37. protected final JobConf jobConf;
  38. protected final Reporter reporter;
  39. protected RecordReader curReader;
  40. protected long progress;
  41. protected int idx;
  42. public BucketizedHiveRecordReader(InputFormat inputFormat,
  43. BucketizedHiveInputSplit bucketizedSplit, JobConf jobConf,
  44. Reporter reporter) throws IOException {
  45. this.split = bucketizedSplit;
  46. this.inputFormat = inputFormat;
  47. this.jobConf = jobConf;
  48. this.reporter = reporter;
  49. initNextRecordReader();
  50. }
  51. public void doClose() throws IOException {
  52. if (curReader != null) {
  53. curReader.close();
  54. curReader = null;
  55. }
  56. idx = 0;
  57. }
  58. public K createKey() {
  59. return (K) curReader.createKey();
  60. }
  61. public V createValue() {
  62. return (V) curReader.createValue();
  63. }
  64. public long getPos() throws IOException {
  65. if (curReader != null) {
  66. return curReader.getPos();
  67. } else {
  68. return 0;
  69. }
  70. }
  71. public float getProgress() throws IOException {
  72. // The calculation is strongly dependent on the assumption that all splits
  73. // came from the same file
  74. return Math.min(1.0f, ((curReader == null) ? progress : curReader.getPos())
  75. / (float) (split.getLength()));
  76. }
  77. public boolean doNext(K key, V value) throws IOException {
  78. while ((curReader == null) || !curReader.next(key, value)) {
  79. if (!initNextRecordReader()) {
  80. return false;
  81. }
  82. }
  83. return true;
  84. }
  85. /**
  86. * Get the record reader for the next chunk in this
  87. * BucketizedHiveRecordReader.
  88. */
  89. protected boolean initNextRecordReader() throws IOException {
  90. if (curReader != null) {
  91. curReader.close();
  92. curReader = null;
  93. if (idx > 0) {
  94. progress += split.getLength(idx - 1); // done processing so far
  95. }
  96. }
  97. // if all chunks have been processed, nothing more to do.
  98. if (idx == split.getNumSplits()) {
  99. return false;
  100. }
  101. // get a record reader for the idx-th chunk
  102. try {
  103. curReader = inputFormat.getRecordReader(split.getSplit(idx), jobConf,
  104. reporter);
  105. } catch (Exception e) {
  106. throw new RuntimeException(e);
  107. }
  108. idx++;
  109. return true;
  110. }
  111. }