PageRenderTime 29ms CodeModel.GetById 17ms app.highlight 8ms RepoModel.GetById 1ms app.codeStats 1ms

/tags/release-0.0.0-rc0/hive/external/common/src/java/org/apache/hadoop/hive/common/FileUtils.java

#
Java | 228 lines | 160 code | 16 blank | 52 comment | 15 complexity | 3d66bfe3097ce56e40201f5a0dfe1266 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.common;
 20
 21import java.io.IOException;
 22import java.net.URI;
 23import java.util.BitSet;
 24import java.util.List;
 25
 26import org.apache.hadoop.conf.Configuration;
 27import org.apache.hadoop.fs.FileStatus;
 28import org.apache.hadoop.fs.FileSystem;
 29import org.apache.hadoop.fs.Path;
 30import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 31
 32/**
 33 * Collection of file manipulation utilities common across Hive.
 34 */
 35public final class FileUtils {
 36
 37  /**
 38   * Variant of Path.makeQualified that qualifies the input path against the default file system
 39   * indicated by the configuration
 40   *
 41   * This does not require a FileSystem handle in most cases - only requires the Filesystem URI.
 42   * This saves the cost of opening the Filesystem - which can involve RPCs - as well as cause
 43   * errors
 44   *
 45   * @param path
 46   *          path to be fully qualified
 47   * @param conf
 48   *          Configuration file
 49   * @return path qualified relative to default file system
 50   */
 51  public static Path makeQualified(Path path, Configuration conf) throws IOException {
 52
 53    if (!path.isAbsolute()) {
 54      // in this case we need to get the working directory
 55      // and this requires a FileSystem handle. So revert to
 56      // original method.
 57      return path.makeQualified(FileSystem.get(conf));
 58    }
 59
 60    URI fsUri = FileSystem.getDefaultUri(conf);
 61    URI pathUri = path.toUri();
 62
 63    String scheme = pathUri.getScheme();
 64    String authority = pathUri.getAuthority();
 65
 66    // validate/fill-in scheme and authority. this follows logic
 67    // identical to FileSystem.get(URI, conf) - but doesn't actually
 68    // obtain a file system handle
 69
 70    if (scheme == null) {
 71      // no scheme - use default file system uri
 72      scheme = fsUri.getScheme();
 73      authority = fsUri.getAuthority();
 74      if (authority == null) {
 75        authority = "";
 76      }
 77    } else {
 78      if (authority == null) {
 79        // no authority - use default one if it applies
 80        if (scheme.equals(fsUri.getScheme()) && fsUri.getAuthority() != null) {
 81          authority = fsUri.getAuthority();
 82        } else {
 83          authority = "";
 84        }
 85      }
 86    }
 87
 88    return new Path(scheme, authority, pathUri.getPath());
 89  }
 90
 91  private FileUtils() {
 92    // prevent instantiation
 93  }
 94
 95
 96  public static String makePartName(List<String> partCols, List<String> vals) {
 97
 98    StringBuilder name = new StringBuilder();
 99    for (int i = 0; i < partCols.size(); i++) {
100      if (i > 0) {
101        name.append(Path.SEPARATOR);
102      }
103      name.append(escapePathName((partCols.get(i)).toLowerCase()));
104      name.append('=');
105      name.append(escapePathName(vals.get(i)));
106    }
107    return name.toString();
108  }
109
110  // NOTE: This is for generating the internal path name for partitions. Users
111  // should always use the MetaStore API to get the path name for a partition.
112  // Users should not directly take partition values and turn it into a path
113  // name by themselves, because the logic below may change in the future.
114  //
115  // In the future, it's OK to add new chars to the escape list, and old data
116  // won't be corrupt, because the full path name in metastore is stored.
117  // In that case, Hive will continue to read the old data, but when it creates
118  // new partitions, it will use new names.
119  static BitSet charToEscape = new BitSet(128);
120  static {
121    for (char c = 0; c < ' '; c++) {
122      charToEscape.set(c);
123    }
124    char[] clist = new char[] {'"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F', '{',
125        ']'};
126    for (char c : clist) {
127      charToEscape.set(c);
128    }
129  }
130
131  static boolean needsEscaping(char c) {
132    return c >= 0 && c < charToEscape.size() && charToEscape.get(c);
133  }
134
135  public static String escapePathName(String path) {
136
137    // __HIVE_DEFAULT_NULL__ is the system default value for null and empty string. We should
138    // TODO: we should allow user to specify default partition or HDFS file location.
139    if (path == null || path.length() == 0) {
140      return "__HIVE_DEFAULT_PARTITION__";
141    }
142
143    StringBuilder sb = new StringBuilder();
144    for (int i = 0; i < path.length(); i++) {
145      char c = path.charAt(i);
146      if (needsEscaping(c)) {
147        sb.append('%');
148        sb.append(String.format("%1$02X", (int) c));
149      } else {
150        sb.append(c);
151      }
152    }
153    return sb.toString();
154  }
155
156  public static String unescapePathName(String path) {
157    StringBuilder sb = new StringBuilder();
158    for (int i = 0; i < path.length(); i++) {
159      char c = path.charAt(i);
160      if (c == '%' && i + 2 < path.length()) {
161        int code = -1;
162        try {
163          code = Integer.valueOf(path.substring(i + 1, i + 3), 16);
164        } catch (Exception e) {
165          code = -1;
166        }
167        if (code >= 0) {
168          sb.append((char) code);
169          i += 2;
170          continue;
171        }
172      }
173      sb.append(c);
174    }
175    return sb.toString();
176  }
177
178  /**
179   * Recursively lists status for all files starting from a particular directory (or individual file
180   * as base case).
181   *
182   * @param fs
183   *          file system
184   *
185   * @param fileStatus
186   *          starting point in file system
187   *
188   * @param results
189   *          receives enumeration of all files found
190   */
191  public static void listStatusRecursively(FileSystem fs, FileStatus fileStatus,
192      List<FileStatus> results) throws IOException {
193
194    if (fileStatus.isDir()) {
195      for (FileStatus stat : fs.listStatus(fileStatus.getPath())) {
196        listStatusRecursively(fs, stat, results);
197      }
198    } else {
199      results.add(fileStatus);
200    }
201  }
202
203  /**
204   * Archive all the files in the inputFiles into outputFile
205   *
206   * @param inputFiles
207   * @param outputFile
208   * @throws IOException
209   */
210  public static void tar(String parentDir, String[] inputFiles, String outputFile)
211      throws IOException {
212    StringBuffer tarCommand = new StringBuffer();
213    tarCommand.append("cd " + parentDir + " ; ");
214    tarCommand.append(" tar -zcvf ");
215    tarCommand.append(" " + outputFile);
216    for (int i = 0; i < inputFiles.length; i++) {
217      tarCommand.append(" " + inputFiles[i]);
218    }
219    String[] shellCmd = {"bash", "-c", tarCommand.toString()};
220    ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd);
221    shexec.execute();
222    int exitcode = shexec.getExitCode();
223    if (exitcode != 0) {
224      throw new IOException("Error tarring file " + outputFile
225          + ". Tar process exited with exit code " + exitcode);
226    }
227  }
228}