PageRenderTime 57ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/common/src/java/org/apache/hadoop/hive/common/FileUtils.java

#
Java | 228 lines | 160 code | 16 blank | 52 comment | 15 complexity | 3d66bfe3097ce56e40201f5a0dfe1266 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.common;
  19. import java.io.IOException;
  20. import java.net.URI;
  21. import java.util.BitSet;
  22. import java.util.List;
  23. import org.apache.hadoop.conf.Configuration;
  24. import org.apache.hadoop.fs.FileStatus;
  25. import org.apache.hadoop.fs.FileSystem;
  26. import org.apache.hadoop.fs.Path;
  27. import org.apache.hadoop.util.Shell.ShellCommandExecutor;
  28. /**
  29. * Collection of file manipulation utilities common across Hive.
  30. */
  31. public final class FileUtils {
  32. /**
  33. * Variant of Path.makeQualified that qualifies the input path against the default file system
  34. * indicated by the configuration
  35. *
  36. * This does not require a FileSystem handle in most cases - only requires the Filesystem URI.
  37. * This saves the cost of opening the Filesystem - which can involve RPCs - as well as cause
  38. * errors
  39. *
  40. * @param path
  41. * path to be fully qualified
  42. * @param conf
  43. * Configuration file
  44. * @return path qualified relative to default file system
  45. */
  46. public static Path makeQualified(Path path, Configuration conf) throws IOException {
  47. if (!path.isAbsolute()) {
  48. // in this case we need to get the working directory
  49. // and this requires a FileSystem handle. So revert to
  50. // original method.
  51. return path.makeQualified(FileSystem.get(conf));
  52. }
  53. URI fsUri = FileSystem.getDefaultUri(conf);
  54. URI pathUri = path.toUri();
  55. String scheme = pathUri.getScheme();
  56. String authority = pathUri.getAuthority();
  57. // validate/fill-in scheme and authority. this follows logic
  58. // identical to FileSystem.get(URI, conf) - but doesn't actually
  59. // obtain a file system handle
  60. if (scheme == null) {
  61. // no scheme - use default file system uri
  62. scheme = fsUri.getScheme();
  63. authority = fsUri.getAuthority();
  64. if (authority == null) {
  65. authority = "";
  66. }
  67. } else {
  68. if (authority == null) {
  69. // no authority - use default one if it applies
  70. if (scheme.equals(fsUri.getScheme()) && fsUri.getAuthority() != null) {
  71. authority = fsUri.getAuthority();
  72. } else {
  73. authority = "";
  74. }
  75. }
  76. }
  77. return new Path(scheme, authority, pathUri.getPath());
  78. }
  79. private FileUtils() {
  80. // prevent instantiation
  81. }
  82. public static String makePartName(List<String> partCols, List<String> vals) {
  83. StringBuilder name = new StringBuilder();
  84. for (int i = 0; i < partCols.size(); i++) {
  85. if (i > 0) {
  86. name.append(Path.SEPARATOR);
  87. }
  88. name.append(escapePathName((partCols.get(i)).toLowerCase()));
  89. name.append('=');
  90. name.append(escapePathName(vals.get(i)));
  91. }
  92. return name.toString();
  93. }
  94. // NOTE: This is for generating the internal path name for partitions. Users
  95. // should always use the MetaStore API to get the path name for a partition.
  96. // Users should not directly take partition values and turn it into a path
  97. // name by themselves, because the logic below may change in the future.
  98. //
  99. // In the future, it's OK to add new chars to the escape list, and old data
  100. // won't be corrupt, because the full path name in metastore is stored.
  101. // In that case, Hive will continue to read the old data, but when it creates
  102. // new partitions, it will use new names.
  103. static BitSet charToEscape = new BitSet(128);
  104. static {
  105. for (char c = 0; c < ' '; c++) {
  106. charToEscape.set(c);
  107. }
  108. char[] clist = new char[] {'"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F', '{',
  109. ']'};
  110. for (char c : clist) {
  111. charToEscape.set(c);
  112. }
  113. }
  114. static boolean needsEscaping(char c) {
  115. return c >= 0 && c < charToEscape.size() && charToEscape.get(c);
  116. }
  117. public static String escapePathName(String path) {
  118. // __HIVE_DEFAULT_NULL__ is the system default value for null and empty string. We should
  119. // TODO: we should allow user to specify default partition or HDFS file location.
  120. if (path == null || path.length() == 0) {
  121. return "__HIVE_DEFAULT_PARTITION__";
  122. }
  123. StringBuilder sb = new StringBuilder();
  124. for (int i = 0; i < path.length(); i++) {
  125. char c = path.charAt(i);
  126. if (needsEscaping(c)) {
  127. sb.append('%');
  128. sb.append(String.format("%1$02X", (int) c));
  129. } else {
  130. sb.append(c);
  131. }
  132. }
  133. return sb.toString();
  134. }
  135. public static String unescapePathName(String path) {
  136. StringBuilder sb = new StringBuilder();
  137. for (int i = 0; i < path.length(); i++) {
  138. char c = path.charAt(i);
  139. if (c == '%' && i + 2 < path.length()) {
  140. int code = -1;
  141. try {
  142. code = Integer.valueOf(path.substring(i + 1, i + 3), 16);
  143. } catch (Exception e) {
  144. code = -1;
  145. }
  146. if (code >= 0) {
  147. sb.append((char) code);
  148. i += 2;
  149. continue;
  150. }
  151. }
  152. sb.append(c);
  153. }
  154. return sb.toString();
  155. }
  156. /**
  157. * Recursively lists status for all files starting from a particular directory (or individual file
  158. * as base case).
  159. *
  160. * @param fs
  161. * file system
  162. *
  163. * @param fileStatus
  164. * starting point in file system
  165. *
  166. * @param results
  167. * receives enumeration of all files found
  168. */
  169. public static void listStatusRecursively(FileSystem fs, FileStatus fileStatus,
  170. List<FileStatus> results) throws IOException {
  171. if (fileStatus.isDir()) {
  172. for (FileStatus stat : fs.listStatus(fileStatus.getPath())) {
  173. listStatusRecursively(fs, stat, results);
  174. }
  175. } else {
  176. results.add(fileStatus);
  177. }
  178. }
  179. /**
  180. * Archive all the files in the inputFiles into outputFile
  181. *
  182. * @param inputFiles
  183. * @param outputFile
  184. * @throws IOException
  185. */
  186. public static void tar(String parentDir, String[] inputFiles, String outputFile)
  187. throws IOException {
  188. StringBuffer tarCommand = new StringBuffer();
  189. tarCommand.append("cd " + parentDir + " ; ");
  190. tarCommand.append(" tar -zcvf ");
  191. tarCommand.append(" " + outputFile);
  192. for (int i = 0; i < inputFiles.length; i++) {
  193. tarCommand.append(" " + inputFiles[i]);
  194. }
  195. String[] shellCmd = {"bash", "-c", tarCommand.toString()};
  196. ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd);
  197. shexec.execute();
  198. int exitcode = shexec.getExitCode();
  199. if (exitcode != 0) {
  200. throw new IOException("Error tarring file " + outputFile
  201. + ". Tar process exited with exit code " + exitcode);
  202. }
  203. }
  204. }