PageRenderTime 45ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.2.0-rc0/hive/external/shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java

#
Java | 252 lines | 74 code | 43 blank | 135 comment | 0 complexity | 094a756c72e5ade2c884733cf95a0eac MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.shims;
  19. import java.io.DataInput;
  20. import java.io.DataOutput;
  21. import java.io.IOException;
  22. import javax.security.auth.login.LoginException;
  23. import org.apache.commons.logging.Log;
  24. import org.apache.commons.logging.LogFactory;
  25. import org.apache.hadoop.conf.Configuration;
  26. import org.apache.hadoop.fs.FileStatus;
  27. import org.apache.hadoop.fs.FileSystem;
  28. import org.apache.hadoop.fs.Path;
  29. import org.apache.hadoop.fs.PathFilter;
  30. import org.apache.hadoop.io.Text;
  31. import org.apache.hadoop.mapred.InputFormat;
  32. import org.apache.hadoop.mapred.InputSplit;
  33. import org.apache.hadoop.mapred.JobConf;
  34. import org.apache.hadoop.mapred.RecordReader;
  35. import org.apache.hadoop.mapred.Reporter;
  36. import org.apache.hadoop.mapred.RunningJob;
  37. import org.apache.hadoop.mapred.TaskCompletionEvent;
  38. import org.apache.hadoop.security.UserGroupInformation;
  39. /**
  40. * In order to be compatible with multiple versions of Hadoop, all parts
  41. * of the Hadoop interface that are not cross-version compatible are
  42. * encapsulated in an implementation of this class. Users should use
  43. * the ShimLoader class as a factory to obtain an implementation of
  44. * HadoopShims corresponding to the version of Hadoop currently on the
  45. * classpath.
  46. */
  47. public interface HadoopShims {
  48. static final Log LOG = LogFactory.getLog(HadoopShims.class);
  49. /**
  50. * Return true if the current version of Hadoop uses the JobShell for
  51. * command line interpretation.
  52. */
  53. boolean usesJobShell();
  54. /**
  55. * Return true if the job has not switched to RUNNING state yet
  56. * and is still in PREP state
  57. */
  58. boolean isJobPreparing(RunningJob job) throws IOException;
  59. /**
  60. * Calls fs.deleteOnExit(path) if such a function exists.
  61. *
  62. * @return true if the call was successful
  63. */
  64. boolean fileSystemDeleteOnExit(FileSystem fs, Path path) throws IOException;
  65. /**
  66. * Calls fmt.validateInput(conf) if such a function exists.
  67. */
  68. void inputFormatValidateInput(InputFormat fmt, JobConf conf) throws IOException;
  69. /**
  70. * If JobClient.getCommandLineConfig exists, sets the given
  71. * property/value pair in that Configuration object.
  72. *
  73. * This applies for Hadoop 0.17 through 0.19
  74. */
  75. void setTmpFiles(String prop, String files);
  76. /**
  77. * return the last access time of the given file.
  78. * @param file
  79. * @return last access time. -1 if not supported.
  80. */
  81. long getAccessTime(FileStatus file);
  82. /**
  83. * Returns a shim to wrap MiniDFSCluster. This is necessary since this class
  84. * was moved from org.apache.hadoop.dfs to org.apache.hadoop.hdfs
  85. */
  86. MiniDFSShim getMiniDfs(Configuration conf,
  87. int numDataNodes,
  88. boolean format,
  89. String[] racks) throws IOException;
  90. /**
  91. * Shim around the functions in MiniDFSCluster that Hive uses.
  92. */
  93. public interface MiniDFSShim {
  94. FileSystem getFileSystem() throws IOException;
  95. void shutdown() throws IOException;
  96. }
  97. /**
  98. * We define this function here to make the code compatible between
  99. * hadoop 0.17 and hadoop 0.20.
  100. *
  101. * Hive binary that compiled Text.compareTo(Text) with hadoop 0.20 won't
  102. * work with hadoop 0.17 because in hadoop 0.20, Text.compareTo(Text) is
  103. * implemented in org.apache.hadoop.io.BinaryComparable, and Java compiler
  104. * references that class, which is not available in hadoop 0.17.
  105. */
  106. int compareText(Text a, Text b);
  107. CombineFileInputFormatShim getCombineFileInputFormat();
  108. String getInputFormatClassName();
  109. /**
  110. * Wrapper for Configuration.setFloat, which was not introduced
  111. * until 0.20.
  112. */
  113. void setFloatConf(Configuration conf, String varName, float val);
  114. /**
  115. * getTaskJobIDs returns an array of String with two elements. The first
  116. * element is a string representing the task id and the second is a string
  117. * representing the job id. This is necessary as TaskID and TaskAttemptID
  118. * are not supported in Haddop 0.17
  119. */
  120. String[] getTaskJobIDs(TaskCompletionEvent t);
  121. int createHadoopArchive(Configuration conf, Path parentDir, Path destDir,
  122. String archiveName) throws Exception;
  123. /**
  124. * Hive uses side effect files exclusively for it's output. It also manages
  125. * the setup/cleanup/commit of output from the hive client. As a result it does
  126. * not need support for the same inside the MR framework
  127. *
  128. * This routine sets the appropriate options to set the output format and any
  129. * options related to bypass setup/cleanup/commit support in the MR framework
  130. */
  131. void setNullOutputFormat(JobConf conf);
  132. /**
  133. * Get the UGI that the given job configuration will run as.
  134. *
  135. * In secure versions of Hadoop, this simply returns the current
  136. * access control context's user, ignoring the configuration.
  137. */
  138. public UserGroupInformation getUGIForConf(Configuration conf) throws LoginException, IOException;
  139. /**
  140. * Get the short name corresponding to the subject in the passed UGI
  141. *
  142. * In secure versions of Hadoop, this returns the short name (after
  143. * undergoing the translation in the kerberos name rule mapping).
  144. * In unsecure versions of Hadoop, this returns the name of the subject
  145. */
  146. public String getShortUserName(UserGroupInformation ugi);
  147. /**
  148. * Return true if the Shim is based on Hadoop Security APIs.
  149. */
  150. public boolean isSecureShimImpl();
  151. /**
  152. * Get the string form of the token given a token signature.
  153. * The signature is used as the value of the "service" field in the token for lookup.
  154. * Ref: AbstractDelegationTokenSelector in Hadoop. If there exists such a token
  155. * in the token cache (credential store) of the job, the lookup returns that.
  156. * This is relevant only when running against a "secure" hadoop release
  157. * The method gets hold of the tokens if they are set up by hadoop - this should
  158. * happen on the map/reduce tasks if the client added the tokens into hadoop's
  159. * credential store in the front end during job submission. The method will
  160. * select the hive delegation token among the set of tokens and return the string
  161. * form of it
  162. * @param tokenSignature
  163. * @return the string form of the token found
  164. * @throws IOException
  165. */
  166. String getTokenStrForm(String tokenSignature) throws IOException;
  167. /**
  168. * InputSplitShim.
  169. *
  170. */
  171. public interface InputSplitShim extends InputSplit {
  172. JobConf getJob();
  173. long getLength();
  174. /** Returns an array containing the startoffsets of the files in the split. */
  175. long[] getStartOffsets();
  176. /** Returns an array containing the lengths of the files in the split. */
  177. long[] getLengths();
  178. /** Returns the start offset of the i<sup>th</sup> Path. */
  179. long getOffset(int i);
  180. /** Returns the length of the i<sup>th</sup> Path. */
  181. long getLength(int i);
  182. /** Returns the number of Paths in the split. */
  183. int getNumPaths();
  184. /** Returns the i<sup>th</sup> Path. */
  185. Path getPath(int i);
  186. /** Returns all the Paths in the split. */
  187. Path[] getPaths();
  188. /** Returns all the Paths where this input-split resides. */
  189. String[] getLocations() throws IOException;
  190. void shrinkSplit(long length);
  191. String toString();
  192. void readFields(DataInput in) throws IOException;
  193. void write(DataOutput out) throws IOException;
  194. }
  195. /**
  196. * CombineFileInputFormatShim.
  197. *
  198. * @param <K>
  199. * @param <V>
  200. */
  201. interface CombineFileInputFormatShim<K, V> {
  202. Path[] getInputPathsShim(JobConf conf);
  203. void createPool(JobConf conf, PathFilter... filters);
  204. InputSplitShim[] getSplits(JobConf job, int numSplits) throws IOException;
  205. InputSplitShim getInputSplitShim() throws IOException;
  206. RecordReader getRecordReader(JobConf job, InputSplitShim split, Reporter reporter,
  207. Class<RecordReader<K, V>> rrClass) throws IOException;
  208. }
  209. }