PageRenderTime 52ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputSplit.java

https://github.com/steeve/hive
Java | 198 lines | 151 code | 22 blank | 25 comment | 25 complexity | 2ed74176a682fe41fe20cdedc3b8caf5 MD5 | raw file
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.io;
  19. import java.io.DataInput;
  20. import java.io.DataOutput;
  21. import java.io.File;
  22. import java.io.IOException;
  23. import java.io.Serializable;
  24. import java.net.URI;
  25. import java.net.URISyntaxException;
  26. import java.util.ArrayList;
  27. import java.util.HashMap;
  28. import java.util.Iterator;
  29. import java.util.Map;
  30. import java.util.Map.Entry;
  31. import org.apache.commons.logging.Log;
  32. import org.apache.commons.logging.LogFactory;
  33. import org.apache.hadoop.conf.Configurable;
  34. import org.apache.hadoop.conf.Configuration;
  35. import org.apache.hadoop.fs.Path;
  36. import org.apache.hadoop.fs.PathFilter;
  37. import org.apache.hadoop.hive.ql.exec.ExecMapper;
  38. import org.apache.hadoop.hive.ql.exec.Operator;
  39. import org.apache.hadoop.hive.ql.exec.TableScanOperator;
  40. import org.apache.hadoop.hive.ql.exec.Utilities;
  41. import org.apache.hadoop.hive.ql.io.HiveInputFormat.HiveInputSplit;
  42. import org.apache.hadoop.hive.ql.plan.MapredWork;
  43. import org.apache.hadoop.hive.ql.plan.PartitionDesc;
  44. import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
  45. import org.apache.hadoop.hive.shims.ShimLoader;
  46. import org.apache.hadoop.hive.shims.HadoopShims.CombineFileInputFormatShim;
  47. import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim;
  48. import org.apache.hadoop.io.Writable;
  49. import org.apache.hadoop.io.WritableComparable;
  50. import org.apache.hadoop.mapred.FileInputFormat;
  51. import org.apache.hadoop.mapred.FileSplit;
  52. import org.apache.hadoop.mapred.InputFormat;
  53. import org.apache.hadoop.mapred.InputSplit;
  54. import org.apache.hadoop.mapred.JobConf;
  55. import org.apache.hadoop.mapred.JobConfigurable;
  56. import org.apache.hadoop.mapred.RecordReader;
  57. import org.apache.hadoop.mapred.Reporter;
  58. import org.apache.hadoop.util.ReflectionUtils;
  59. /**
  60. * HiveInputSplit encapsulates an InputSplit with its corresponding
  61. * inputFormatClass. The reason that it derives from FileSplit is to make sure
  62. * "map.input.file" in MapTask.
  63. */
  64. public class BucketizedHiveInputSplit extends HiveInputSplit {
  65. protected InputSplit[] inputSplits;
  66. protected String inputFormatClassName;
  67. public String getInputFormatClassName() {
  68. return inputFormatClassName;
  69. }
  70. public void setInputFormatClassName(String inputFormatClassName) {
  71. this.inputFormatClassName = inputFormatClassName;
  72. }
  73. public BucketizedHiveInputSplit() {
  74. // This is the only public constructor of FileSplit
  75. super();
  76. }
  77. public BucketizedHiveInputSplit(InputSplit[] inputSplits,
  78. String inputFormatClassName) {
  79. // This is the only public constructor of FileSplit
  80. super();
  81. assert (inputSplits != null && inputSplits.length > 0);
  82. this.inputSplits = inputSplits;
  83. this.inputFormatClassName = inputFormatClassName;
  84. }
  85. public int getNumSplits() {
  86. return inputSplits.length;
  87. }
  88. public InputSplit getSplit(int idx) {
  89. assert (idx >= 0 && idx < inputSplits.length);
  90. return inputSplits[idx];
  91. }
  92. public String inputFormatClassName() {
  93. return inputFormatClassName;
  94. }
  95. @Override
  96. public Path getPath() {
  97. if (inputSplits != null && inputSplits.length > 0
  98. && inputSplits[0] instanceof FileSplit) {
  99. return ((FileSplit) inputSplits[0]).getPath();
  100. }
  101. return new Path("");
  102. }
  103. /** The position of the first byte in the file to process. */
  104. @Override
  105. public long getStart() {
  106. if (inputSplits != null && inputSplits.length > 0
  107. && inputSplits[0] instanceof FileSplit) {
  108. return ((FileSplit) inputSplits[0]).getStart();
  109. }
  110. return 0;
  111. }
  112. @Override
  113. public String toString() {
  114. if (inputSplits != null && inputSplits.length > 0) {
  115. return inputFormatClassName + ":" + inputSplits[0].toString();
  116. }
  117. return inputFormatClassName + ":null";
  118. }
  119. @Override
  120. public long getLength() {
  121. long r = 0;
  122. if (inputSplits != null) {
  123. try {
  124. for (InputSplit inputSplit : inputSplits) {
  125. r += inputSplit.getLength();
  126. }
  127. } catch (Exception e) {
  128. throw new RuntimeException(e);
  129. }
  130. }
  131. return r;
  132. }
  133. public long getLength(int idx) {
  134. if (inputSplits != null) {
  135. try {
  136. return inputSplits[idx].getLength();
  137. } catch (Exception e) {
  138. throw new RuntimeException(e);
  139. }
  140. }
  141. return -1;
  142. }
  143. @Override
  144. public String[] getLocations() throws IOException {
  145. assert (inputSplits != null && inputSplits.length > 0);
  146. return inputSplits[0].getLocations();
  147. }
  148. @Override
  149. public void readFields(DataInput in) throws IOException {
  150. String inputSplitClassName = in.readUTF();
  151. int numSplits = in.readInt();
  152. inputSplits = new InputSplit[numSplits];
  153. for (int i = 0; i < numSplits; i++) {
  154. try {
  155. inputSplits[i] = (InputSplit) ReflectionUtils.newInstance(conf
  156. .getClassByName(inputSplitClassName), conf);
  157. } catch (Exception e) {
  158. throw new IOException(
  159. "Cannot create an instance of InputSplit class = "
  160. + inputSplitClassName + ":" + e.getMessage());
  161. }
  162. inputSplits[i].readFields(in);
  163. }
  164. inputFormatClassName = in.readUTF();
  165. }
  166. @Override
  167. public void write(DataOutput out) throws IOException {
  168. assert (inputSplits != null && inputSplits.length > 0);
  169. out.writeUTF(inputSplits[0].getClass().getName());
  170. out.writeInt(inputSplits.length);
  171. for (InputSplit inputSplit : inputSplits) {
  172. inputSplit.write(out);
  173. }
  174. out.writeUTF(inputFormatClassName);
  175. }
  176. }