PageRenderTime 42ms CodeModel.GetById 14ms app.highlight 23ms RepoModel.GetById 2ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java

#
Java | 473 lines | 304 code | 75 blank | 94 comment | 48 complexity | e8d8b6a94175d023ec35c9c37bb9b5cc MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.exec;
 20
 21import java.io.IOException;
 22import java.io.Serializable;
 23import java.util.ArrayList;
 24import java.util.HashMap;
 25import java.util.LinkedList;
 26import java.util.List;
 27
 28import org.apache.commons.logging.Log;
 29import org.apache.commons.logging.LogFactory;
 30import org.apache.hadoop.hive.conf.HiveConf;
 31import org.apache.hadoop.hive.ql.Context;
 32import org.apache.hadoop.hive.ql.DriverContext;
 33import org.apache.hadoop.hive.ql.QueryPlan;
 34import org.apache.hadoop.hive.ql.lib.Node;
 35import org.apache.hadoop.hive.ql.metadata.Hive;
 36import org.apache.hadoop.hive.ql.metadata.HiveException;
 37import org.apache.hadoop.hive.ql.plan.api.StageType;
 38import org.apache.hadoop.hive.ql.session.SessionState;
 39import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
 40import org.apache.hadoop.util.StringUtils;
 41
 42/**
 43 * Task implementation.
 44 **/
 45
 46public abstract class Task<T extends Serializable> implements Serializable, Node {
 47
 48  private static final long serialVersionUID = 1L;
 49  protected transient boolean started;
 50  protected transient boolean initialized;
 51  protected transient boolean isdone;
 52  protected transient boolean queued;
 53  protected transient HiveConf conf;
 54  protected transient Hive db;
 55  protected transient Log LOG;
 56  protected transient LogHelper console;
 57  protected transient QueryPlan queryPlan;
 58  protected transient TaskHandle taskHandle;
 59  protected transient HashMap<String, Long> taskCounters;
 60  protected transient DriverContext driverContext;
 61  protected transient boolean clonedConf = false;
 62  protected Task<? extends Serializable> backupTask;
 63  protected List<Task<? extends Serializable>> backupChildrenTasks = new ArrayList<Task<? extends Serializable>>();
 64  protected int taskTag;
 65  private boolean isLocalMode =false;
 66
 67  public static final int NO_TAG = 0;
 68  public static final int COMMON_JOIN = 1;
 69  public static final int CONVERTED_MAPJOIN = 2;
 70  public static final int CONVERTED_LOCAL_MAPJOIN = 3;
 71  public static final int BACKUP_COMMON_JOIN = 4;
 72  public static final int LOCAL_MAPJOIN=5;
 73
 74
 75  // Descendants tasks who subscribe feeds from this task
 76  protected transient List<Task<? extends Serializable>> feedSubscribers;
 77
 78  public static enum FeedType {
 79    DYNAMIC_PARTITIONS, // list of dynamic partitions
 80  };
 81
 82  // Bean methods
 83
 84  protected List<Task<? extends Serializable>> childTasks;
 85  protected List<Task<? extends Serializable>> parentTasks;
 86
 87  public Task() {
 88    isdone = false;
 89    started = false;
 90    initialized = false;
 91    queued = false;
 92    LOG = LogFactory.getLog(this.getClass().getName());
 93    this.taskCounters = new HashMap<String, Long>();
 94    taskTag = Task.NO_TAG;
 95  }
 96
 97  public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) {
 98    this.queryPlan = queryPlan;
 99    isdone = false;
100    started = false;
101    setInitialized();
102    this.conf = conf;
103
104    try {
105      db = Hive.get(conf);
106    } catch (HiveException e) {
107      // Bail out ungracefully - we should never hit
108      // this here - but would have hit it in SemanticAnalyzer
109      LOG.error(StringUtils.stringifyException(e));
110      throw new RuntimeException(e);
111    }
112    this.driverContext = driverContext;
113
114    console = new LogHelper(LOG);
115  }
116
117  /**
118   * This method is called in the Driver on every task. It updates counters and calls execute(),
119   * which is overridden in each task
120   *
121   * @return return value of execute()
122   */
123  public int executeTask() {
124    try {
125      SessionState ss = SessionState.get();
126      this.setStarted();
127      if (ss != null) {
128        ss.getHiveHistory().logPlanProgress(queryPlan);
129      }
130      int retval = execute(driverContext);
131      this.setDone();
132      if (ss != null) {
133        ss.getHiveHistory().logPlanProgress(queryPlan);
134      }
135      return retval;
136    } catch (IOException e) {
137      throw new RuntimeException(e.getMessage());
138    }
139  }
140
141  /**
142   * This method is overridden in each Task. TODO execute should return a TaskHandle.
143   *
144   * @return status of executing the task
145   */
146  protected abstract int execute(DriverContext driverContext);
147
148  // dummy method - FetchTask overwrites this
149  public boolean fetch(ArrayList<String> res) throws IOException {
150    assert false;
151    return false;
152  }
153
154  public void setChildTasks(List<Task<? extends Serializable>> childTasks) {
155    this.childTasks = childTasks;
156  }
157
158  public List<? extends Node> getChildren() {
159    return getChildTasks();
160  }
161
162  public List<Task<? extends Serializable>> getChildTasks() {
163    return childTasks;
164  }
165
166  public void setParentTasks(List<Task<? extends Serializable>> parentTasks) {
167    this.parentTasks = parentTasks;
168  }
169
170  public List<Task<? extends Serializable>> getParentTasks() {
171    return parentTasks;
172  }
173
174  public Task<? extends Serializable> getBackupTask() {
175    return backupTask;
176  }
177
178
179  public void setBackupTask(Task<? extends Serializable> backupTask) {
180    this.backupTask = backupTask;
181  }
182
183  public List<Task<? extends Serializable>> getBackupChildrenTasks() {
184    return backupChildrenTasks;
185  }
186
187  public void setBackupChildrenTasks(List<Task<? extends Serializable>> backupChildrenTasks) {
188    this.backupChildrenTasks = backupChildrenTasks;
189  }
190
191  public Task<? extends Serializable> getAndInitBackupTask() {
192    if (backupTask != null) {
193      // first set back the backup task with its children task.
194      for (Task<? extends Serializable> backupChild : backupChildrenTasks) {
195        backupChild.getParentTasks().add(backupTask);
196      }
197
198      // recursively remove task from its children tasks if this task doesn't have any parent task
199      this.removeFromChildrenTasks();
200    }
201    return backupTask;
202  }
203
204  public void removeFromChildrenTasks() {
205
206    List<Task<? extends Serializable>> childrenTasks = this.getChildTasks();
207    if (childrenTasks == null) {
208      return;
209    }
210
211    for (Task<? extends Serializable> childTsk : childrenTasks) {
212      // remove this task from its children tasks
213      childTsk.getParentTasks().remove(this);
214
215      // recursively remove non-parent task from its children
216      List<Task<? extends Serializable>> siblingTasks = childTsk.getParentTasks();
217      if (siblingTasks == null || siblingTasks.size() == 0) {
218        childTsk.removeFromChildrenTasks();
219      }
220    }
221
222    return;
223  }
224
225
226  /**
227   * The default dependent tasks are just child tasks, but different types could implement their own
228   * (e.g. ConditionalTask will use the listTasks as dependents).
229   *
230   * @return a list of tasks that are dependent on this task.
231   */
232  public List<Task<? extends Serializable>> getDependentTasks() {
233    return getChildTasks();
234  }
235
236  /**
237   * Add a dependent task on the current task. Return if the dependency already existed or is this a
238   * new one
239   *
240   * @return true if the task got added false if it already existed
241   */
242  public boolean addDependentTask(Task<? extends Serializable> dependent) {
243    boolean ret = false;
244    if (getChildTasks() == null) {
245      setChildTasks(new ArrayList<Task<? extends Serializable>>());
246    }
247    if (!getChildTasks().contains(dependent)) {
248      ret = true;
249      getChildTasks().add(dependent);
250      if (dependent.getParentTasks() == null) {
251        dependent.setParentTasks(new ArrayList<Task<? extends Serializable>>());
252      }
253      if (!dependent.getParentTasks().contains(this)) {
254        dependent.getParentTasks().add(this);
255      }
256    }
257    return ret;
258  }
259
260  /**
261   * Remove the dependent task.
262   *
263   * @param dependent
264   *          the task to remove
265   */
266  public void removeDependentTask(Task<? extends Serializable> dependent) {
267    if ((getChildTasks() != null) && (getChildTasks().contains(dependent))) {
268      getChildTasks().remove(dependent);
269      if ((dependent.getParentTasks() != null) && (dependent.getParentTasks().contains(this))) {
270        dependent.getParentTasks().remove(this);
271      }
272    }
273  }
274
275  public void setStarted() {
276    this.started = true;
277  }
278
279  public boolean started() {
280    return started;
281  }
282
283  public boolean done() {
284    return isdone;
285  }
286
287  public void setDone() {
288    isdone = true;
289  }
290
291  public void setQueued() {
292    queued = true;
293  }
294
295  public boolean getQueued() {
296    return queued;
297  }
298
299  public void setInitialized() {
300    initialized = true;
301  }
302
303  public boolean getInitialized() {
304    return initialized;
305  }
306
307  public boolean isRunnable() {
308    boolean isrunnable = true;
309    if (parentTasks != null) {
310      for (Task<? extends Serializable> parent : parentTasks) {
311        if (!parent.done()) {
312          isrunnable = false;
313          break;
314        }
315      }
316    }
317    return isrunnable;
318  }
319
320  protected String id;
321  protected T work;
322
323  public void setWork(T work) {
324    this.work = work;
325  }
326
327  public T getWork() {
328    return work;
329  }
330
331  public void setId(String id) {
332    this.id = id;
333  }
334
335  public String getId() {
336    return id;
337  }
338
339  public boolean isMapRedTask() {
340    return false;
341  }
342
343  public boolean isMapRedLocalTask() {
344    return false;
345  }
346
347  public boolean hasReduce() {
348    return false;
349  }
350
351  public HashMap<String, Long> getCounters() {
352    return taskCounters;
353  }
354
355  /**
356   * Should be overridden to return the type of the specific task among the types in StageType.
357   *
358   * @return StageType.* or null if not overridden
359   */
360  public abstract StageType getType();
361
362  /**
363   * If this task uses any map-reduce intermediate data (either for reading or for writing),
364   * localize them (using the supplied Context). Map-Reduce intermediate directories are allocated
365   * using Context.getMRTmpFileURI() and can be localized using localizeMRTmpFileURI().
366   *
367   * This method is declared abstract to force any task code to explicitly deal with this aspect of
368   * execution.
369   *
370   * @param ctx
371   *          context object with which to localize
372   */
373  abstract protected void localizeMRTmpFilesImpl(Context ctx);
374
375  /**
376   * Localize a task tree
377   *
378   * @param ctx
379   *          context object with which to localize
380   */
381  public final void localizeMRTmpFiles(Context ctx) {
382    localizeMRTmpFilesImpl(ctx);
383
384    if (childTasks == null) {
385      return;
386    }
387
388    for (Task<? extends Serializable> t : childTasks) {
389      t.localizeMRTmpFiles(ctx);
390    }
391  }
392
393  /**
394   * Subscribe the feed of publisher. To prevent cycles, a task can only subscribe to its ancestor.
395   * Feed is a generic form of execution-time feedback (type, value) pair from one task to another
396   * task. Examples include dynamic partitions (which are only available at execution time). The
397   * MoveTask may pass the list of dynamic partitions to the StatsTask since after the MoveTask the
398   * list of dynamic partitions are lost (MoveTask moves them to the table's destination directory
399   * which is mixed with old partitions).
400   *
401   * @param publisher
402   *          this feed provider.
403   */
404  public void subscribeFeed(Task<? extends Serializable> publisher) {
405    if (publisher != this && publisher.ancestorOrSelf(this)) {
406      if (publisher.getFeedSubscribers() == null) {
407        publisher.setFeedSubscribers(new LinkedList<Task<? extends Serializable>>());
408      }
409      publisher.getFeedSubscribers().add(this);
410    }
411  }
412
413  // return true if this task is an ancestor of itself of parameter desc
414  private boolean ancestorOrSelf(Task<? extends Serializable> desc) {
415    if (this == desc) {
416      return true;
417    }
418    List<Task<? extends Serializable>> deps = getDependentTasks();
419    if (deps != null) {
420      for (Task<? extends Serializable> d : deps) {
421        if (d.ancestorOrSelf(desc)) {
422          return true;
423        }
424      }
425    }
426    return false;
427  }
428
429  public List<Task<? extends Serializable>> getFeedSubscribers() {
430    return feedSubscribers;
431  }
432
433  public void setFeedSubscribers(List<Task<? extends Serializable>> s) {
434    feedSubscribers = s;
435  }
436
437  // push the feed to its subscribers
438  protected void pushFeed(FeedType feedType, Object feedValue) {
439    if (feedSubscribers != null) {
440      for (Task<? extends Serializable> s : feedSubscribers) {
441        s.receiveFeed(feedType, feedValue);
442      }
443    }
444  }
445
446  // a subscriber accept the feed and do something depending on the Task type
447  protected void receiveFeed(FeedType feedType, Object feedValue) {
448  }
449
450  protected void cloneConf() {
451    if (!clonedConf) {
452      clonedConf = true;
453      conf = new HiveConf(conf);
454    }
455  }
456
457
458  public int getTaskTag() {
459    return taskTag;
460  }
461
462  public void setTaskTag(int taskTag) {
463    this.taskTag = taskTag;
464  }
465
466  public boolean isLocalMode() {
467    return isLocalMode;
468  }
469
470  public void setLocalMode(boolean isLocalMode) {
471    this.isLocalMode = isLocalMode;
472  }
473}