PageRenderTime 55ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java

#
Java | 270 lines | 192 code | 36 blank | 42 comment | 54 complexity | 9552ab1f5ffdb36764333ce68b065d88 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.parse;
  19. import java.io.IOException;
  20. import java.io.Serializable;
  21. import java.net.URI;
  22. import java.net.URISyntaxException;
  23. import java.util.LinkedHashMap;
  24. import java.util.List;
  25. import java.util.Map;
  26. import org.antlr.runtime.tree.Tree;
  27. import org.apache.commons.lang.StringUtils;
  28. import org.apache.hadoop.fs.FileStatus;
  29. import org.apache.hadoop.fs.FileSystem;
  30. import org.apache.hadoop.fs.Path;
  31. import org.apache.hadoop.hive.conf.HiveConf;
  32. import org.apache.hadoop.hive.metastore.api.FieldSchema;
  33. import org.apache.hadoop.hive.ql.exec.Task;
  34. import org.apache.hadoop.hive.ql.exec.TaskFactory;
  35. import org.apache.hadoop.hive.ql.exec.Utilities;
  36. import org.apache.hadoop.hive.ql.metadata.Hive;
  37. import org.apache.hadoop.hive.ql.metadata.HiveException;
  38. import org.apache.hadoop.hive.ql.metadata.Partition;
  39. import org.apache.hadoop.hive.ql.plan.CopyWork;
  40. import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
  41. import org.apache.hadoop.hive.ql.plan.MoveWork;
  42. /**
  43. * LoadSemanticAnalyzer.
  44. *
  45. */
  46. public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
  47. private boolean isLocal;
  48. private boolean isOverWrite;
  49. public LoadSemanticAnalyzer(HiveConf conf) throws SemanticException {
  50. super(conf);
  51. }
  52. public static FileStatus[] matchFilesOrDir(FileSystem fs, Path path)
  53. throws IOException {
  54. FileStatus[] srcs = fs.globStatus(path);
  55. if ((srcs != null) && srcs.length == 1) {
  56. if (srcs[0].isDir()) {
  57. srcs = fs.listStatus(srcs[0].getPath());
  58. }
  59. }
  60. return (srcs);
  61. }
  62. private URI initializeFromURI(String fromPath) throws IOException,
  63. URISyntaxException {
  64. URI fromURI = new Path(fromPath).toUri();
  65. String fromScheme = fromURI.getScheme();
  66. String fromAuthority = fromURI.getAuthority();
  67. String path = fromURI.getPath();
  68. // generate absolute path relative to current directory or hdfs home
  69. // directory
  70. if (!path.startsWith("/")) {
  71. if (isLocal) {
  72. path = new Path(System.getProperty("user.dir"), path).toString();
  73. } else {
  74. path = new Path(new Path("/user/" + System.getProperty("user.name")),
  75. path).toString();
  76. }
  77. }
  78. // set correct scheme and authority
  79. if (StringUtils.isEmpty(fromScheme)) {
  80. if (isLocal) {
  81. // file for local
  82. fromScheme = "file";
  83. } else {
  84. // use default values from fs.default.name
  85. URI defaultURI = FileSystem.get(conf).getUri();
  86. fromScheme = defaultURI.getScheme();
  87. fromAuthority = defaultURI.getAuthority();
  88. }
  89. }
  90. // if scheme is specified but not authority then use the default authority
  91. if (fromScheme.equals("hdfs") && StringUtils.isEmpty(fromAuthority)) {
  92. URI defaultURI = FileSystem.get(conf).getUri();
  93. fromAuthority = defaultURI.getAuthority();
  94. }
  95. LOG.debug(fromScheme + "@" + fromAuthority + "@" + path);
  96. return new URI(fromScheme, fromAuthority, path, null, null);
  97. }
  98. private void applyConstraints(URI fromURI, URI toURI, Tree ast,
  99. boolean isLocal) throws SemanticException {
  100. if (!fromURI.getScheme().equals("file")
  101. && !fromURI.getScheme().equals("hdfs")) {
  102. throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
  103. "only \"file\" or \"hdfs\" file systems accepted"));
  104. }
  105. // local mode implies that scheme should be "file"
  106. // we can change this going forward
  107. if (isLocal && !fromURI.getScheme().equals("file")) {
  108. throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast,
  109. "Source file system should be \"file\" if \"local\" is specified"));
  110. }
  111. try {
  112. FileStatus[] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf),
  113. new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI
  114. .getPath()));
  115. if (srcs == null || srcs.length == 0) {
  116. throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
  117. "No files matching path " + fromURI));
  118. }
  119. for (FileStatus oneSrc : srcs) {
  120. if (oneSrc.isDir()) {
  121. throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
  122. "source contains directory: " + oneSrc.getPath().toString()));
  123. }
  124. }
  125. } catch (IOException e) {
  126. // Has to use full name to make sure it does not conflict with
  127. // org.apache.commons.lang.StringUtils
  128. throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e);
  129. }
  130. // only in 'local' mode do we copy stuff from one place to another.
  131. // reject different scheme/authority in other cases.
  132. if (!isLocal
  133. && (!StringUtils.equals(fromURI.getScheme(), toURI.getScheme()) || !StringUtils
  134. .equals(fromURI.getAuthority(), toURI.getAuthority()))) {
  135. String reason = "Move from: " + fromURI.toString() + " to: "
  136. + toURI.toString() + " is not valid. "
  137. + "Please check that values for params \"default.fs.name\" and "
  138. + "\"hive.metastore.warehouse.dir\" do not conflict.";
  139. throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, reason));
  140. }
  141. }
  142. @Override
  143. public void analyzeInternal(ASTNode ast) throws SemanticException {
  144. isLocal = false;
  145. isOverWrite = false;
  146. Tree fromTree = ast.getChild(0);
  147. Tree tableTree = ast.getChild(1);
  148. if (ast.getChildCount() == 4) {
  149. isLocal = true;
  150. isOverWrite = true;
  151. }
  152. if (ast.getChildCount() == 3) {
  153. if (ast.getChild(2).getText().toLowerCase().equals("local")) {
  154. isLocal = true;
  155. } else {
  156. isOverWrite = true;
  157. }
  158. }
  159. // initialize load path
  160. URI fromURI;
  161. try {
  162. String fromPath = stripQuotes(fromTree.getText());
  163. fromURI = initializeFromURI(fromPath);
  164. } catch (IOException e) {
  165. throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e
  166. .getMessage()), e);
  167. } catch (URISyntaxException e) {
  168. throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e
  169. .getMessage()), e);
  170. }
  171. // initialize destination table/partition
  172. tableSpec ts = new tableSpec(db, conf, (ASTNode) tableTree);
  173. if (ts.tableHandle.isOffline()){
  174. throw new SemanticException(
  175. ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(":Table " + ts.tableName));
  176. }
  177. if (ts.tableHandle.isView()) {
  178. throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
  179. }
  180. if (ts.tableHandle.isNonNative()) {
  181. throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
  182. }
  183. URI toURI = (ts.partHandle != null) ? ts.partHandle.getDataLocation()
  184. : ts.tableHandle.getDataLocation();
  185. List<FieldSchema> parts = ts.tableHandle.getPartitionKeys();
  186. if (isOverWrite && (parts != null && parts.size() > 0)
  187. && (ts.partSpec == null || ts.partSpec.size() == 0)) {
  188. throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg());
  189. }
  190. // make sure the arguments make sense
  191. applyConstraints(fromURI, toURI, fromTree, isLocal);
  192. Task<? extends Serializable> rTask = null;
  193. // create copy work
  194. if (isLocal) {
  195. // if the local keyword is specified - we will always make a copy. this
  196. // might seem redundant in the case
  197. // that the hive warehouse is also located in the local file system - but
  198. // that's just a test case.
  199. String copyURIStr = ctx.getExternalTmpFileURI(toURI);
  200. URI copyURI = URI.create(copyURIStr);
  201. rTask = TaskFactory.get(new CopyWork(fromURI.toString(), copyURIStr),
  202. conf);
  203. fromURI = copyURI;
  204. }
  205. // create final load/move work
  206. String loadTmpPath = ctx.getExternalTmpFileURI(toURI);
  207. Map<String, String> partSpec = ts.getPartSpec();
  208. if (partSpec == null) {
  209. partSpec = new LinkedHashMap<String, String>();
  210. } else {
  211. try{
  212. Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false);
  213. if (part != null) {
  214. if (part.isOffline()) {
  215. throw new SemanticException(ErrorMsg.OFFLINE_TABLE_OR_PARTITION.
  216. getMsg(ts.tableName + ":" + part.getName()));
  217. }
  218. }
  219. } catch(HiveException e) {
  220. throw new SemanticException(e);
  221. }
  222. }
  223. LoadTableDesc loadTableWork = new LoadTableDesc(fromURI.toString(),
  224. loadTmpPath, Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite);
  225. if (rTask != null) {
  226. rTask.addDependentTask(TaskFactory.get(new MoveWork(getInputs(),
  227. getOutputs(), loadTableWork, null, true), conf));
  228. } else {
  229. rTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(),
  230. loadTableWork, null, true), conf);
  231. }
  232. rootTasks.add(rTask);
  233. }
  234. }