PageRenderTime 46ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java

#
Java | 473 lines | 304 code | 75 blank | 94 comment | 48 complexity | e8d8b6a94175d023ec35c9c37bb9b5cc MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.exec;
  19. import java.io.IOException;
  20. import java.io.Serializable;
  21. import java.util.ArrayList;
  22. import java.util.HashMap;
  23. import java.util.LinkedList;
  24. import java.util.List;
  25. import org.apache.commons.logging.Log;
  26. import org.apache.commons.logging.LogFactory;
  27. import org.apache.hadoop.hive.conf.HiveConf;
  28. import org.apache.hadoop.hive.ql.Context;
  29. import org.apache.hadoop.hive.ql.DriverContext;
  30. import org.apache.hadoop.hive.ql.QueryPlan;
  31. import org.apache.hadoop.hive.ql.lib.Node;
  32. import org.apache.hadoop.hive.ql.metadata.Hive;
  33. import org.apache.hadoop.hive.ql.metadata.HiveException;
  34. import org.apache.hadoop.hive.ql.plan.api.StageType;
  35. import org.apache.hadoop.hive.ql.session.SessionState;
  36. import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
  37. import org.apache.hadoop.util.StringUtils;
  38. /**
  39. * Task implementation.
  40. **/
  41. public abstract class Task<T extends Serializable> implements Serializable, Node {
  42. private static final long serialVersionUID = 1L;
  43. protected transient boolean started;
  44. protected transient boolean initialized;
  45. protected transient boolean isdone;
  46. protected transient boolean queued;
  47. protected transient HiveConf conf;
  48. protected transient Hive db;
  49. protected transient Log LOG;
  50. protected transient LogHelper console;
  51. protected transient QueryPlan queryPlan;
  52. protected transient TaskHandle taskHandle;
  53. protected transient HashMap<String, Long> taskCounters;
  54. protected transient DriverContext driverContext;
  55. protected transient boolean clonedConf = false;
  56. protected Task<? extends Serializable> backupTask;
  57. protected List<Task<? extends Serializable>> backupChildrenTasks = new ArrayList<Task<? extends Serializable>>();
  58. protected int taskTag;
  59. private boolean isLocalMode =false;
  60. public static final int NO_TAG = 0;
  61. public static final int COMMON_JOIN = 1;
  62. public static final int CONVERTED_MAPJOIN = 2;
  63. public static final int CONVERTED_LOCAL_MAPJOIN = 3;
  64. public static final int BACKUP_COMMON_JOIN = 4;
  65. public static final int LOCAL_MAPJOIN=5;
  66. // Descendants tasks who subscribe feeds from this task
  67. protected transient List<Task<? extends Serializable>> feedSubscribers;
  68. public static enum FeedType {
  69. DYNAMIC_PARTITIONS, // list of dynamic partitions
  70. };
  71. // Bean methods
  72. protected List<Task<? extends Serializable>> childTasks;
  73. protected List<Task<? extends Serializable>> parentTasks;
  74. public Task() {
  75. isdone = false;
  76. started = false;
  77. initialized = false;
  78. queued = false;
  79. LOG = LogFactory.getLog(this.getClass().getName());
  80. this.taskCounters = new HashMap<String, Long>();
  81. taskTag = Task.NO_TAG;
  82. }
  83. public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) {
  84. this.queryPlan = queryPlan;
  85. isdone = false;
  86. started = false;
  87. setInitialized();
  88. this.conf = conf;
  89. try {
  90. db = Hive.get(conf);
  91. } catch (HiveException e) {
  92. // Bail out ungracefully - we should never hit
  93. // this here - but would have hit it in SemanticAnalyzer
  94. LOG.error(StringUtils.stringifyException(e));
  95. throw new RuntimeException(e);
  96. }
  97. this.driverContext = driverContext;
  98. console = new LogHelper(LOG);
  99. }
  100. /**
  101. * This method is called in the Driver on every task. It updates counters and calls execute(),
  102. * which is overridden in each task
  103. *
  104. * @return return value of execute()
  105. */
  106. public int executeTask() {
  107. try {
  108. SessionState ss = SessionState.get();
  109. this.setStarted();
  110. if (ss != null) {
  111. ss.getHiveHistory().logPlanProgress(queryPlan);
  112. }
  113. int retval = execute(driverContext);
  114. this.setDone();
  115. if (ss != null) {
  116. ss.getHiveHistory().logPlanProgress(queryPlan);
  117. }
  118. return retval;
  119. } catch (IOException e) {
  120. throw new RuntimeException(e.getMessage());
  121. }
  122. }
  123. /**
  124. * This method is overridden in each Task. TODO execute should return a TaskHandle.
  125. *
  126. * @return status of executing the task
  127. */
  128. protected abstract int execute(DriverContext driverContext);
  129. // dummy method - FetchTask overwrites this
  130. public boolean fetch(ArrayList<String> res) throws IOException {
  131. assert false;
  132. return false;
  133. }
  134. public void setChildTasks(List<Task<? extends Serializable>> childTasks) {
  135. this.childTasks = childTasks;
  136. }
  137. public List<? extends Node> getChildren() {
  138. return getChildTasks();
  139. }
  140. public List<Task<? extends Serializable>> getChildTasks() {
  141. return childTasks;
  142. }
  143. public void setParentTasks(List<Task<? extends Serializable>> parentTasks) {
  144. this.parentTasks = parentTasks;
  145. }
  146. public List<Task<? extends Serializable>> getParentTasks() {
  147. return parentTasks;
  148. }
  149. public Task<? extends Serializable> getBackupTask() {
  150. return backupTask;
  151. }
  152. public void setBackupTask(Task<? extends Serializable> backupTask) {
  153. this.backupTask = backupTask;
  154. }
  155. public List<Task<? extends Serializable>> getBackupChildrenTasks() {
  156. return backupChildrenTasks;
  157. }
  158. public void setBackupChildrenTasks(List<Task<? extends Serializable>> backupChildrenTasks) {
  159. this.backupChildrenTasks = backupChildrenTasks;
  160. }
  161. public Task<? extends Serializable> getAndInitBackupTask() {
  162. if (backupTask != null) {
  163. // first set back the backup task with its children task.
  164. for (Task<? extends Serializable> backupChild : backupChildrenTasks) {
  165. backupChild.getParentTasks().add(backupTask);
  166. }
  167. // recursively remove task from its children tasks if this task doesn't have any parent task
  168. this.removeFromChildrenTasks();
  169. }
  170. return backupTask;
  171. }
  172. public void removeFromChildrenTasks() {
  173. List<Task<? extends Serializable>> childrenTasks = this.getChildTasks();
  174. if (childrenTasks == null) {
  175. return;
  176. }
  177. for (Task<? extends Serializable> childTsk : childrenTasks) {
  178. // remove this task from its children tasks
  179. childTsk.getParentTasks().remove(this);
  180. // recursively remove non-parent task from its children
  181. List<Task<? extends Serializable>> siblingTasks = childTsk.getParentTasks();
  182. if (siblingTasks == null || siblingTasks.size() == 0) {
  183. childTsk.removeFromChildrenTasks();
  184. }
  185. }
  186. return;
  187. }
  188. /**
  189. * The default dependent tasks are just child tasks, but different types could implement their own
  190. * (e.g. ConditionalTask will use the listTasks as dependents).
  191. *
  192. * @return a list of tasks that are dependent on this task.
  193. */
  194. public List<Task<? extends Serializable>> getDependentTasks() {
  195. return getChildTasks();
  196. }
  197. /**
  198. * Add a dependent task on the current task. Return if the dependency already existed or is this a
  199. * new one
  200. *
  201. * @return true if the task got added false if it already existed
  202. */
  203. public boolean addDependentTask(Task<? extends Serializable> dependent) {
  204. boolean ret = false;
  205. if (getChildTasks() == null) {
  206. setChildTasks(new ArrayList<Task<? extends Serializable>>());
  207. }
  208. if (!getChildTasks().contains(dependent)) {
  209. ret = true;
  210. getChildTasks().add(dependent);
  211. if (dependent.getParentTasks() == null) {
  212. dependent.setParentTasks(new ArrayList<Task<? extends Serializable>>());
  213. }
  214. if (!dependent.getParentTasks().contains(this)) {
  215. dependent.getParentTasks().add(this);
  216. }
  217. }
  218. return ret;
  219. }
  220. /**
  221. * Remove the dependent task.
  222. *
  223. * @param dependent
  224. * the task to remove
  225. */
  226. public void removeDependentTask(Task<? extends Serializable> dependent) {
  227. if ((getChildTasks() != null) && (getChildTasks().contains(dependent))) {
  228. getChildTasks().remove(dependent);
  229. if ((dependent.getParentTasks() != null) && (dependent.getParentTasks().contains(this))) {
  230. dependent.getParentTasks().remove(this);
  231. }
  232. }
  233. }
  234. public void setStarted() {
  235. this.started = true;
  236. }
  237. public boolean started() {
  238. return started;
  239. }
  240. public boolean done() {
  241. return isdone;
  242. }
  243. public void setDone() {
  244. isdone = true;
  245. }
  246. public void setQueued() {
  247. queued = true;
  248. }
  249. public boolean getQueued() {
  250. return queued;
  251. }
  252. public void setInitialized() {
  253. initialized = true;
  254. }
  255. public boolean getInitialized() {
  256. return initialized;
  257. }
  258. public boolean isRunnable() {
  259. boolean isrunnable = true;
  260. if (parentTasks != null) {
  261. for (Task<? extends Serializable> parent : parentTasks) {
  262. if (!parent.done()) {
  263. isrunnable = false;
  264. break;
  265. }
  266. }
  267. }
  268. return isrunnable;
  269. }
  270. protected String id;
  271. protected T work;
  272. public void setWork(T work) {
  273. this.work = work;
  274. }
  275. public T getWork() {
  276. return work;
  277. }
  278. public void setId(String id) {
  279. this.id = id;
  280. }
  281. public String getId() {
  282. return id;
  283. }
  284. public boolean isMapRedTask() {
  285. return false;
  286. }
  287. public boolean isMapRedLocalTask() {
  288. return false;
  289. }
  290. public boolean hasReduce() {
  291. return false;
  292. }
  293. public HashMap<String, Long> getCounters() {
  294. return taskCounters;
  295. }
  296. /**
  297. * Should be overridden to return the type of the specific task among the types in StageType.
  298. *
  299. * @return StageType.* or null if not overridden
  300. */
  301. public abstract StageType getType();
  302. /**
  303. * If this task uses any map-reduce intermediate data (either for reading or for writing),
  304. * localize them (using the supplied Context). Map-Reduce intermediate directories are allocated
  305. * using Context.getMRTmpFileURI() and can be localized using localizeMRTmpFileURI().
  306. *
  307. * This method is declared abstract to force any task code to explicitly deal with this aspect of
  308. * execution.
  309. *
  310. * @param ctx
  311. * context object with which to localize
  312. */
  313. abstract protected void localizeMRTmpFilesImpl(Context ctx);
  314. /**
  315. * Localize a task tree
  316. *
  317. * @param ctx
  318. * context object with which to localize
  319. */
  320. public final void localizeMRTmpFiles(Context ctx) {
  321. localizeMRTmpFilesImpl(ctx);
  322. if (childTasks == null) {
  323. return;
  324. }
  325. for (Task<? extends Serializable> t : childTasks) {
  326. t.localizeMRTmpFiles(ctx);
  327. }
  328. }
  329. /**
  330. * Subscribe the feed of publisher. To prevent cycles, a task can only subscribe to its ancestor.
  331. * Feed is a generic form of execution-time feedback (type, value) pair from one task to another
  332. * task. Examples include dynamic partitions (which are only available at execution time). The
  333. * MoveTask may pass the list of dynamic partitions to the StatsTask since after the MoveTask the
  334. * list of dynamic partitions are lost (MoveTask moves them to the table's destination directory
  335. * which is mixed with old partitions).
  336. *
  337. * @param publisher
  338. * this feed provider.
  339. */
  340. public void subscribeFeed(Task<? extends Serializable> publisher) {
  341. if (publisher != this && publisher.ancestorOrSelf(this)) {
  342. if (publisher.getFeedSubscribers() == null) {
  343. publisher.setFeedSubscribers(new LinkedList<Task<? extends Serializable>>());
  344. }
  345. publisher.getFeedSubscribers().add(this);
  346. }
  347. }
  348. // return true if this task is an ancestor of itself of parameter desc
  349. private boolean ancestorOrSelf(Task<? extends Serializable> desc) {
  350. if (this == desc) {
  351. return true;
  352. }
  353. List<Task<? extends Serializable>> deps = getDependentTasks();
  354. if (deps != null) {
  355. for (Task<? extends Serializable> d : deps) {
  356. if (d.ancestorOrSelf(desc)) {
  357. return true;
  358. }
  359. }
  360. }
  361. return false;
  362. }
  363. public List<Task<? extends Serializable>> getFeedSubscribers() {
  364. return feedSubscribers;
  365. }
  366. public void setFeedSubscribers(List<Task<? extends Serializable>> s) {
  367. feedSubscribers = s;
  368. }
  369. // push the feed to its subscribers
  370. protected void pushFeed(FeedType feedType, Object feedValue) {
  371. if (feedSubscribers != null) {
  372. for (Task<? extends Serializable> s : feedSubscribers) {
  373. s.receiveFeed(feedType, feedValue);
  374. }
  375. }
  376. }
  377. // a subscriber accept the feed and do something depending on the Task type
  378. protected void receiveFeed(FeedType feedType, Object feedValue) {
  379. }
  380. protected void cloneConf() {
  381. if (!clonedConf) {
  382. clonedConf = true;
  383. conf = new HiveConf(conf);
  384. }
  385. }
  386. public int getTaskTag() {
  387. return taskTag;
  388. }
  389. public void setTaskTag(int taskTag) {
  390. this.taskTag = taskTag;
  391. }
  392. public boolean isLocalMode() {
  393. return isLocalMode;
  394. }
  395. public void setLocalMode(boolean isLocalMode) {
  396. this.isLocalMode = isLocalMode;
  397. }
  398. }