PageRenderTime 62ms CodeModel.GetById 13ms app.highlight 44ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java

#
Java | 270 lines | 192 code | 36 blank | 42 comment | 54 complexity | 9552ab1f5ffdb36764333ce68b065d88 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.parse;
 20
 21import java.io.IOException;
 22import java.io.Serializable;
 23import java.net.URI;
 24import java.net.URISyntaxException;
 25import java.util.LinkedHashMap;
 26import java.util.List;
 27import java.util.Map;
 28
 29import org.antlr.runtime.tree.Tree;
 30import org.apache.commons.lang.StringUtils;
 31import org.apache.hadoop.fs.FileStatus;
 32import org.apache.hadoop.fs.FileSystem;
 33import org.apache.hadoop.fs.Path;
 34import org.apache.hadoop.hive.conf.HiveConf;
 35import org.apache.hadoop.hive.metastore.api.FieldSchema;
 36import org.apache.hadoop.hive.ql.exec.Task;
 37import org.apache.hadoop.hive.ql.exec.TaskFactory;
 38import org.apache.hadoop.hive.ql.exec.Utilities;
 39import org.apache.hadoop.hive.ql.metadata.Hive;
 40import org.apache.hadoop.hive.ql.metadata.HiveException;
 41import org.apache.hadoop.hive.ql.metadata.Partition;
 42import org.apache.hadoop.hive.ql.plan.CopyWork;
 43import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
 44import org.apache.hadoop.hive.ql.plan.MoveWork;
 45
 46/**
 47 * LoadSemanticAnalyzer.
 48 *
 49 */
 50public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
 51
 52  private boolean isLocal;
 53  private boolean isOverWrite;
 54
 55  public LoadSemanticAnalyzer(HiveConf conf) throws SemanticException {
 56    super(conf);
 57  }
 58
 59  public static FileStatus[] matchFilesOrDir(FileSystem fs, Path path)
 60      throws IOException {
 61    FileStatus[] srcs = fs.globStatus(path);
 62    if ((srcs != null) && srcs.length == 1) {
 63      if (srcs[0].isDir()) {
 64        srcs = fs.listStatus(srcs[0].getPath());
 65      }
 66    }
 67    return (srcs);
 68  }
 69
 70  private URI initializeFromURI(String fromPath) throws IOException,
 71      URISyntaxException {
 72    URI fromURI = new Path(fromPath).toUri();
 73
 74    String fromScheme = fromURI.getScheme();
 75    String fromAuthority = fromURI.getAuthority();
 76    String path = fromURI.getPath();
 77
 78    // generate absolute path relative to current directory or hdfs home
 79    // directory
 80    if (!path.startsWith("/")) {
 81      if (isLocal) {
 82        path = new Path(System.getProperty("user.dir"), path).toString();
 83      } else {
 84        path = new Path(new Path("/user/" + System.getProperty("user.name")),
 85            path).toString();
 86      }
 87    }
 88
 89    // set correct scheme and authority
 90    if (StringUtils.isEmpty(fromScheme)) {
 91      if (isLocal) {
 92        // file for local
 93        fromScheme = "file";
 94      } else {
 95        // use default values from fs.default.name
 96        URI defaultURI = FileSystem.get(conf).getUri();
 97        fromScheme = defaultURI.getScheme();
 98        fromAuthority = defaultURI.getAuthority();
 99      }
100    }
101
102    // if scheme is specified but not authority then use the default authority
103    if (fromScheme.equals("hdfs") && StringUtils.isEmpty(fromAuthority)) {
104      URI defaultURI = FileSystem.get(conf).getUri();
105      fromAuthority = defaultURI.getAuthority();
106    }
107
108    LOG.debug(fromScheme + "@" + fromAuthority + "@" + path);
109    return new URI(fromScheme, fromAuthority, path, null, null);
110  }
111
112  private void applyConstraints(URI fromURI, URI toURI, Tree ast,
113      boolean isLocal) throws SemanticException {
114    if (!fromURI.getScheme().equals("file")
115        && !fromURI.getScheme().equals("hdfs")) {
116      throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
117          "only \"file\" or \"hdfs\" file systems accepted"));
118    }
119
120    // local mode implies that scheme should be "file"
121    // we can change this going forward
122    if (isLocal && !fromURI.getScheme().equals("file")) {
123      throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast,
124          "Source file system should be \"file\" if \"local\" is specified"));
125    }
126
127    try {
128      FileStatus[] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf),
129          new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI
130          .getPath()));
131
132      if (srcs == null || srcs.length == 0) {
133        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
134            "No files matching path " + fromURI));
135      }
136
137      for (FileStatus oneSrc : srcs) {
138        if (oneSrc.isDir()) {
139          throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
140              "source contains directory: " + oneSrc.getPath().toString()));
141        }
142      }
143    } catch (IOException e) {
144      // Has to use full name to make sure it does not conflict with
145      // org.apache.commons.lang.StringUtils
146      throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e);
147    }
148
149    // only in 'local' mode do we copy stuff from one place to another.
150    // reject different scheme/authority in other cases.
151    if (!isLocal
152        && (!StringUtils.equals(fromURI.getScheme(), toURI.getScheme()) || !StringUtils
153        .equals(fromURI.getAuthority(), toURI.getAuthority()))) {
154      String reason = "Move from: " + fromURI.toString() + " to: "
155          + toURI.toString() + " is not valid. "
156          + "Please check that values for params \"default.fs.name\" and "
157          + "\"hive.metastore.warehouse.dir\" do not conflict.";
158      throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, reason));
159    }
160  }
161
162  @Override
163  public void analyzeInternal(ASTNode ast) throws SemanticException {
164    isLocal = false;
165    isOverWrite = false;
166    Tree fromTree = ast.getChild(0);
167    Tree tableTree = ast.getChild(1);
168
169    if (ast.getChildCount() == 4) {
170      isLocal = true;
171      isOverWrite = true;
172    }
173
174    if (ast.getChildCount() == 3) {
175      if (ast.getChild(2).getText().toLowerCase().equals("local")) {
176        isLocal = true;
177      } else {
178        isOverWrite = true;
179      }
180    }
181
182    // initialize load path
183    URI fromURI;
184    try {
185      String fromPath = stripQuotes(fromTree.getText());
186      fromURI = initializeFromURI(fromPath);
187    } catch (IOException e) {
188      throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e
189          .getMessage()), e);
190    } catch (URISyntaxException e) {
191      throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e
192          .getMessage()), e);
193    }
194
195    // initialize destination table/partition
196    tableSpec ts = new tableSpec(db, conf, (ASTNode) tableTree);
197
198    if (ts.tableHandle.isOffline()){
199      throw new SemanticException(
200          ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(":Table " + ts.tableName));
201    }
202
203    if (ts.tableHandle.isView()) {
204      throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
205    }
206    if (ts.tableHandle.isNonNative()) {
207      throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
208    }
209    URI toURI = (ts.partHandle != null) ? ts.partHandle.getDataLocation()
210        : ts.tableHandle.getDataLocation();
211
212    List<FieldSchema> parts = ts.tableHandle.getPartitionKeys();
213    if (isOverWrite && (parts != null && parts.size() > 0)
214        && (ts.partSpec == null || ts.partSpec.size() == 0)) {
215      throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg());
216    }
217
218    // make sure the arguments make sense
219    applyConstraints(fromURI, toURI, fromTree, isLocal);
220
221    Task<? extends Serializable> rTask = null;
222
223    // create copy work
224    if (isLocal) {
225      // if the local keyword is specified - we will always make a copy. this
226      // might seem redundant in the case
227      // that the hive warehouse is also located in the local file system - but
228      // that's just a test case.
229      String copyURIStr = ctx.getExternalTmpFileURI(toURI);
230      URI copyURI = URI.create(copyURIStr);
231      rTask = TaskFactory.get(new CopyWork(fromURI.toString(), copyURIStr),
232          conf);
233      fromURI = copyURI;
234    }
235
236    // create final load/move work
237
238    String loadTmpPath = ctx.getExternalTmpFileURI(toURI);
239    Map<String, String> partSpec = ts.getPartSpec();
240    if (partSpec == null) {
241      partSpec = new LinkedHashMap<String, String>();
242    } else {
243      try{
244        Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false);
245        if (part != null) {
246          if (part.isOffline()) {
247            throw new SemanticException(ErrorMsg.OFFLINE_TABLE_OR_PARTITION.
248                getMsg(ts.tableName + ":" + part.getName()));
249          }
250        }
251      } catch(HiveException e) {
252        throw new SemanticException(e);
253      }
254    }
255
256
257    LoadTableDesc loadTableWork = new LoadTableDesc(fromURI.toString(),
258        loadTmpPath, Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite);
259
260    if (rTask != null) {
261      rTask.addDependentTask(TaskFactory.get(new MoveWork(getInputs(),
262          getOutputs(), loadTableWork, null, true), conf));
263    } else {
264      rTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(),
265          loadTableWork, null, true), conf);
266    }
267
268    rootTasks.add(rTask);
269  }
270}