PageRenderTime 46ms CodeModel.GetById 15ms app.highlight 23ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.2.0-rc0/hive/external/shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java

#
Java | 252 lines | 74 code | 43 blank | 135 comment | 0 complexity | 094a756c72e5ade2c884733cf95a0eac MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18package org.apache.hadoop.hive.shims;
 19
 20import java.io.DataInput;
 21import java.io.DataOutput;
 22import java.io.IOException;
 23
 24import javax.security.auth.login.LoginException;
 25
 26import org.apache.commons.logging.Log;
 27import org.apache.commons.logging.LogFactory;
 28import org.apache.hadoop.conf.Configuration;
 29import org.apache.hadoop.fs.FileStatus;
 30import org.apache.hadoop.fs.FileSystem;
 31import org.apache.hadoop.fs.Path;
 32import org.apache.hadoop.fs.PathFilter;
 33import org.apache.hadoop.io.Text;
 34import org.apache.hadoop.mapred.InputFormat;
 35import org.apache.hadoop.mapred.InputSplit;
 36import org.apache.hadoop.mapred.JobConf;
 37import org.apache.hadoop.mapred.RecordReader;
 38import org.apache.hadoop.mapred.Reporter;
 39import org.apache.hadoop.mapred.RunningJob;
 40import org.apache.hadoop.mapred.TaskCompletionEvent;
 41import org.apache.hadoop.security.UserGroupInformation;
 42
 43/**
 44 * In order to be compatible with multiple versions of Hadoop, all parts
 45 * of the Hadoop interface that are not cross-version compatible are
 46 * encapsulated in an implementation of this class. Users should use
 47 * the ShimLoader class as a factory to obtain an implementation of
 48 * HadoopShims corresponding to the version of Hadoop currently on the
 49 * classpath.
 50 */
 51public interface HadoopShims {
 52
 53  static final Log LOG = LogFactory.getLog(HadoopShims.class);
 54
 55  /**
 56   * Return true if the current version of Hadoop uses the JobShell for
 57   * command line interpretation.
 58   */
 59  boolean usesJobShell();
 60
 61  /**
 62   * Return true if the job has not switched to RUNNING state yet
 63   * and is still in PREP state
 64   */
 65  boolean isJobPreparing(RunningJob job) throws IOException;
 66
 67  /**
 68   * Calls fs.deleteOnExit(path) if such a function exists.
 69   *
 70   * @return true if the call was successful
 71   */
 72  boolean fileSystemDeleteOnExit(FileSystem fs, Path path) throws IOException;
 73
 74  /**
 75   * Calls fmt.validateInput(conf) if such a function exists.
 76   */
 77  void inputFormatValidateInput(InputFormat fmt, JobConf conf) throws IOException;
 78
 79  /**
 80   * If JobClient.getCommandLineConfig exists, sets the given
 81   * property/value pair in that Configuration object.
 82   *
 83   * This applies for Hadoop 0.17 through 0.19
 84   */
 85  void setTmpFiles(String prop, String files);
 86
 87  /**
 88   * return the last access time of the given file.
 89   * @param file
 90   * @return last access time. -1 if not supported.
 91   */
 92  long getAccessTime(FileStatus file);
 93
 94  /**
 95   * Returns a shim to wrap MiniDFSCluster. This is necessary since this class
 96   * was moved from org.apache.hadoop.dfs to org.apache.hadoop.hdfs
 97   */
 98  MiniDFSShim getMiniDfs(Configuration conf,
 99      int numDataNodes,
100      boolean format,
101      String[] racks) throws IOException;
102
103  /**
104   * Shim around the functions in MiniDFSCluster that Hive uses.
105   */
106  public interface MiniDFSShim {
107    FileSystem getFileSystem() throws IOException;
108
109    void shutdown() throws IOException;
110  }
111
112  /**
113   * We define this function here to make the code compatible between
114   * hadoop 0.17 and hadoop 0.20.
115   *
116   * Hive binary that compiled Text.compareTo(Text) with hadoop 0.20 won't
117   * work with hadoop 0.17 because in hadoop 0.20, Text.compareTo(Text) is
118   * implemented in org.apache.hadoop.io.BinaryComparable, and Java compiler
119   * references that class, which is not available in hadoop 0.17.
120   */
121  int compareText(Text a, Text b);
122
123  CombineFileInputFormatShim getCombineFileInputFormat();
124
125  String getInputFormatClassName();
126
127  /**
128   * Wrapper for Configuration.setFloat, which was not introduced
129   * until 0.20.
130   */
131  void setFloatConf(Configuration conf, String varName, float val);
132
133  /**
134   * getTaskJobIDs returns an array of String with two elements. The first
135   * element is a string representing the task id and the second is a string
136   * representing the job id. This is necessary as TaskID and TaskAttemptID
137   * are not supported in Haddop 0.17
138   */
139  String[] getTaskJobIDs(TaskCompletionEvent t);
140
141  int createHadoopArchive(Configuration conf, Path parentDir, Path destDir,
142      String archiveName) throws Exception;
143  /**
144   * Hive uses side effect files exclusively for it's output. It also manages
145   * the setup/cleanup/commit of output from the hive client. As a result it does
146   * not need support for the same inside the MR framework
147   *
148   * This routine sets the appropriate options to set the output format and any
149   * options related to bypass setup/cleanup/commit support in the MR framework
150   */
151  void setNullOutputFormat(JobConf conf);
152
153  /**
154   * Get the UGI that the given job configuration will run as.
155   *
156   * In secure versions of Hadoop, this simply returns the current
157   * access control context's user, ignoring the configuration.
158   */
159  public UserGroupInformation getUGIForConf(Configuration conf) throws LoginException, IOException;
160
161  /**
162   * Get the short name corresponding to the subject in the passed UGI
163   *
164   * In secure versions of Hadoop, this returns the short name (after
165   * undergoing the translation in the kerberos name rule mapping).
166   * In unsecure versions of Hadoop, this returns the name of the subject
167   */
168  public String getShortUserName(UserGroupInformation ugi);
169
170  /**
171   * Return true if the Shim is based on Hadoop Security APIs.
172   */
173  public boolean isSecureShimImpl();
174
175  /**
176   * Get the string form of the token given a token signature.
177   * The signature is used as the value of the "service" field in the token for lookup.
178   * Ref: AbstractDelegationTokenSelector in Hadoop. If there exists such a token
179   * in the token cache (credential store) of the job, the lookup returns that.
180   * This is relevant only when running against a "secure" hadoop release
181   * The method gets hold of the tokens if they are set up by hadoop - this should
182   * happen on the map/reduce tasks if the client added the tokens into hadoop's
183   * credential store in the front end during job submission. The method will
184   * select the hive delegation token among the set of tokens and return the string
185   * form of it
186   * @param tokenSignature
187   * @return the string form of the token found
188   * @throws IOException
189   */
190  String getTokenStrForm(String tokenSignature) throws IOException;
191
192  /**
193   * InputSplitShim.
194   *
195   */
196  public interface InputSplitShim extends InputSplit {
197    JobConf getJob();
198
199    long getLength();
200
201    /** Returns an array containing the startoffsets of the files in the split. */
202    long[] getStartOffsets();
203
204    /** Returns an array containing the lengths of the files in the split. */
205    long[] getLengths();
206
207    /** Returns the start offset of the i<sup>th</sup> Path. */
208    long getOffset(int i);
209
210    /** Returns the length of the i<sup>th</sup> Path. */
211    long getLength(int i);
212
213    /** Returns the number of Paths in the split. */
214    int getNumPaths();
215
216    /** Returns the i<sup>th</sup> Path. */
217    Path getPath(int i);
218
219    /** Returns all the Paths in the split. */
220    Path[] getPaths();
221
222    /** Returns all the Paths where this input-split resides. */
223    String[] getLocations() throws IOException;
224
225    void shrinkSplit(long length);
226
227    String toString();
228
229    void readFields(DataInput in) throws IOException;
230
231    void write(DataOutput out) throws IOException;
232  }
233
234  /**
235   * CombineFileInputFormatShim.
236   *
237   * @param <K>
238   * @param <V>
239   */
240  interface CombineFileInputFormatShim<K, V> {
241    Path[] getInputPathsShim(JobConf conf);
242
243    void createPool(JobConf conf, PathFilter... filters);
244
245    InputSplitShim[] getSplits(JobConf job, int numSplits) throws IOException;
246
247    InputSplitShim getInputSplitShim() throws IOException;
248
249    RecordReader getRecordReader(JobConf job, InputSplitShim split, Reporter reporter,
250        Class<RecordReader<K, V>> rrClass) throws IOException;
251  }
252}