PageRenderTime 65ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java

https://github.com/jlym/hive
Java | 475 lines | 306 code | 75 blank | 94 comment | 48 complexity | 72feac7958fc7934b8a2033bec344c20 MD5 | raw file
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.exec;
  19. import java.io.IOException;
  20. import java.io.Serializable;
  21. import java.util.ArrayList;
  22. import java.util.HashMap;
  23. import java.util.LinkedList;
  24. import java.util.List;
  25. import org.apache.commons.logging.Log;
  26. import org.apache.commons.logging.LogFactory;
  27. import org.apache.hadoop.hive.conf.HiveConf;
  28. import org.apache.hadoop.hive.ql.Context;
  29. import org.apache.hadoop.hive.ql.DriverContext;
  30. import org.apache.hadoop.hive.ql.QueryPlan;
  31. import org.apache.hadoop.hive.ql.lib.Node;
  32. import org.apache.hadoop.hive.ql.metadata.Hive;
  33. import org.apache.hadoop.hive.ql.metadata.HiveException;
  34. import org.apache.hadoop.hive.ql.session.SessionState;
  35. import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
  36. import org.apache.hadoop.util.StringUtils;
  37. /**
  38. * Task implementation.
  39. **/
  40. public abstract class Task<T extends Serializable> implements Serializable, Node {
  41. private static final long serialVersionUID = 1L;
  42. protected transient boolean started;
  43. protected transient boolean initialized;
  44. protected transient boolean isdone;
  45. protected transient boolean queued;
  46. protected transient HiveConf conf;
  47. protected transient Hive db;
  48. protected transient Log LOG;
  49. protected transient LogHelper console;
  50. protected transient QueryPlan queryPlan;
  51. protected transient TaskHandle taskHandle;
  52. protected transient HashMap<String, Long> taskCounters;
  53. protected transient DriverContext driverContext;
  54. protected transient boolean clonedConf = false;
  55. protected Task<? extends Serializable> backupTask;
  56. protected List<Task<? extends Serializable>> backupChildrenTasks = new ArrayList<Task<? extends Serializable>>();
  57. protected int taskTag;
  58. private boolean isLocalMode =false;
  59. public static final int NO_TAG = 0;
  60. public static final int COMMON_JOIN = 1;
  61. public static final int CONVERTED_MAPJOIN = 2;
  62. public static final int CONVERTED_LOCAL_MAPJOIN = 3;
  63. public static final int BACKUP_COMMON_JOIN = 4;
  64. public static final int LOCAL_MAPJOIN=5;
  65. // Descendants tasks who subscribe feeds from this task
  66. protected transient List<Task<? extends Serializable>> feedSubscribers;
  67. public static enum FeedType {
  68. DYNAMIC_PARTITIONS, // list of dynamic partitions
  69. };
  70. // Bean methods
  71. protected List<Task<? extends Serializable>> childTasks;
  72. protected List<Task<? extends Serializable>> parentTasks;
  73. public Task() {
  74. isdone = false;
  75. started = false;
  76. initialized = false;
  77. queued = false;
  78. LOG = LogFactory.getLog(this.getClass().getName());
  79. this.taskCounters = new HashMap<String, Long>();
  80. taskTag = Task.NO_TAG;
  81. }
  82. public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) {
  83. this.queryPlan = queryPlan;
  84. isdone = false;
  85. started = false;
  86. setInitialized();
  87. this.conf = conf;
  88. try {
  89. db = Hive.get(conf);
  90. } catch (HiveException e) {
  91. // Bail out ungracefully - we should never hit
  92. // this here - but would have hit it in SemanticAnalyzer
  93. LOG.error(StringUtils.stringifyException(e));
  94. throw new RuntimeException(e);
  95. }
  96. this.driverContext = driverContext;
  97. console = new LogHelper(LOG);
  98. }
  99. /**
  100. * This method is called in the Driver on every task. It updates counters and calls execute(),
  101. * which is overridden in each task
  102. *
  103. * @return return value of execute()
  104. */
  105. public int executeTask() {
  106. try {
  107. SessionState ss = SessionState.get();
  108. this.setStarted();
  109. if (ss != null) {
  110. ss.getHiveHistory().logPlanProgress(queryPlan);
  111. }
  112. int retval = execute(driverContext);
  113. this.setDone();
  114. if (ss != null) {
  115. ss.getHiveHistory().logPlanProgress(queryPlan);
  116. }
  117. return retval;
  118. } catch (IOException e) {
  119. throw new RuntimeException(e.getMessage());
  120. }
  121. }
  122. /**
  123. * This method is overridden in each Task. TODO execute should return a TaskHandle.
  124. *
  125. * @return status of executing the task
  126. */
  127. protected abstract int execute(DriverContext driverContext);
  128. // dummy method - FetchTask overwrites this
  129. public boolean fetch(ArrayList<String> res) throws IOException {
  130. assert false;
  131. return false;
  132. }
  133. public void setChildTasks(List<Task<? extends Serializable>> childTasks) {
  134. this.childTasks = childTasks;
  135. }
  136. public List<? extends Node> getChildren() {
  137. return getChildTasks();
  138. }
  139. public List<Task<? extends Serializable>> getChildTasks() {
  140. return childTasks;
  141. }
  142. public void setParentTasks(List<Task<? extends Serializable>> parentTasks) {
  143. this.parentTasks = parentTasks;
  144. }
  145. public List<Task<? extends Serializable>> getParentTasks() {
  146. return parentTasks;
  147. }
  148. public Task<? extends Serializable> getBackupTask() {
  149. return backupTask;
  150. }
  151. public void setBackupTask(Task<? extends Serializable> backupTask) {
  152. this.backupTask = backupTask;
  153. }
  154. public List<Task<? extends Serializable>> getBackupChildrenTasks() {
  155. return backupChildrenTasks;
  156. }
  157. public void setBackupChildrenTasks(List<Task<? extends Serializable>> backupChildrenTasks) {
  158. this.backupChildrenTasks = backupChildrenTasks;
  159. }
  160. public Task<? extends Serializable> getAndInitBackupTask() {
  161. if (backupTask != null) {
  162. // first set back the backup task with its children task.
  163. for (Task<? extends Serializable> backupChild : backupChildrenTasks) {
  164. backupChild.getParentTasks().add(backupTask);
  165. }
  166. // recursively remove task from its children tasks if this task doesn't have any parent task
  167. this.removeFromChildrenTasks();
  168. }
  169. return backupTask;
  170. }
  171. public void removeFromChildrenTasks() {
  172. List<Task<? extends Serializable>> childrenTasks = this.getChildTasks();
  173. if (childrenTasks == null) {
  174. return;
  175. }
  176. for (Task<? extends Serializable> childTsk : childrenTasks) {
  177. // remove this task from its children tasks
  178. childTsk.getParentTasks().remove(this);
  179. // recursively remove non-parent task from its children
  180. List<Task<? extends Serializable>> siblingTasks = childTsk.getParentTasks();
  181. if (siblingTasks == null || siblingTasks.size() == 0) {
  182. childTsk.removeFromChildrenTasks();
  183. }
  184. }
  185. return;
  186. }
  187. /**
  188. * The default dependent tasks are just child tasks, but different types could implement their own
  189. * (e.g. ConditionalTask will use the listTasks as dependents).
  190. *
  191. * @return a list of tasks that are dependent on this task.
  192. */
  193. public List<Task<? extends Serializable>> getDependentTasks() {
  194. return getChildTasks();
  195. }
  196. /**
  197. * Add a dependent task on the current task. Return if the dependency already existed or is this a
  198. * new one
  199. *
  200. * @return true if the task got added false if it already existed
  201. */
  202. public boolean addDependentTask(Task<? extends Serializable> dependent) {
  203. boolean ret = false;
  204. if (getChildTasks() == null) {
  205. setChildTasks(new ArrayList<Task<? extends Serializable>>());
  206. }
  207. if (!getChildTasks().contains(dependent)) {
  208. ret = true;
  209. getChildTasks().add(dependent);
  210. if (dependent.getParentTasks() == null) {
  211. dependent.setParentTasks(new ArrayList<Task<? extends Serializable>>());
  212. }
  213. if (!dependent.getParentTasks().contains(this)) {
  214. dependent.getParentTasks().add(this);
  215. }
  216. }
  217. return ret;
  218. }
  219. /**
  220. * Remove the dependent task.
  221. *
  222. * @param dependent
  223. * the task to remove
  224. */
  225. public void removeDependentTask(Task<? extends Serializable> dependent) {
  226. if ((getChildTasks() != null) && (getChildTasks().contains(dependent))) {
  227. getChildTasks().remove(dependent);
  228. if ((dependent.getParentTasks() != null) && (dependent.getParentTasks().contains(this))) {
  229. dependent.getParentTasks().remove(this);
  230. }
  231. }
  232. }
  233. public void setStarted() {
  234. this.started = true;
  235. }
  236. public boolean started() {
  237. return started;
  238. }
  239. public boolean done() {
  240. return isdone;
  241. }
  242. public void setDone() {
  243. isdone = true;
  244. }
  245. public void setQueued() {
  246. queued = true;
  247. }
  248. public boolean getQueued() {
  249. return queued;
  250. }
  251. public void setInitialized() {
  252. initialized = true;
  253. }
  254. public boolean getInitialized() {
  255. return initialized;
  256. }
  257. public boolean isRunnable() {
  258. boolean isrunnable = true;
  259. if (parentTasks != null) {
  260. for (Task<? extends Serializable> parent : parentTasks) {
  261. if (!parent.done()) {
  262. isrunnable = false;
  263. break;
  264. }
  265. }
  266. }
  267. return isrunnable;
  268. }
  269. protected String id;
  270. protected T work;
  271. public void setWork(T work) {
  272. this.work = work;
  273. }
  274. public T getWork() {
  275. return work;
  276. }
  277. public void setId(String id) {
  278. this.id = id;
  279. }
  280. public String getId() {
  281. return id;
  282. }
  283. public boolean isMapRedTask() {
  284. return false;
  285. }
  286. public boolean isMapRedLocalTask() {
  287. return false;
  288. }
  289. public boolean hasReduce() {
  290. return false;
  291. }
  292. public HashMap<String, Long> getCounters() {
  293. return taskCounters;
  294. }
  295. /**
  296. * Should be overridden to return the type of the specific task among the types in TaskType.
  297. *
  298. * @return TaskTypeType.* or -1 if not overridden
  299. */
  300. public int getType() {
  301. assert false;
  302. return -1;
  303. }
  304. /**
  305. * If this task uses any map-reduce intermediate data (either for reading or for writing),
  306. * localize them (using the supplied Context). Map-Reduce intermediate directories are allocated
  307. * using Context.getMRTmpFileURI() and can be localized using localizeMRTmpFileURI().
  308. *
  309. * This method is declared abstract to force any task code to explicitly deal with this aspect of
  310. * execution.
  311. *
  312. * @param ctx
  313. * context object with which to localize
  314. */
  315. abstract protected void localizeMRTmpFilesImpl(Context ctx);
  316. /**
  317. * Localize a task tree
  318. *
  319. * @param ctx
  320. * context object with which to localize
  321. */
  322. public final void localizeMRTmpFiles(Context ctx) {
  323. localizeMRTmpFilesImpl(ctx);
  324. if (childTasks == null) {
  325. return;
  326. }
  327. for (Task<? extends Serializable> t : childTasks) {
  328. t.localizeMRTmpFiles(ctx);
  329. }
  330. }
  331. /**
  332. * Subscribe the feed of publisher. To prevent cycles, a task can only subscribe to its ancestor.
  333. * Feed is a generic form of execution-time feedback (type, value) pair from one task to another
  334. * task. Examples include dynamic partitions (which are only available at execution time). The
  335. * MoveTask may pass the list of dynamic partitions to the StatsTask since after the MoveTask the
  336. * list of dynamic partitions are lost (MoveTask moves them to the table's destination directory
  337. * which is mixed with old partitions).
  338. *
  339. * @param publisher
  340. * this feed provider.
  341. */
  342. public void subscribeFeed(Task<? extends Serializable> publisher) {
  343. if (publisher != this && publisher.ancestorOrSelf(this)) {
  344. if (publisher.getFeedSubscribers() == null) {
  345. publisher.setFeedSubscribers(new LinkedList<Task<? extends Serializable>>());
  346. }
  347. publisher.getFeedSubscribers().add(this);
  348. }
  349. }
  350. // return true if this task is an ancestor of itself of parameter desc
  351. private boolean ancestorOrSelf(Task<? extends Serializable> desc) {
  352. if (this == desc) {
  353. return true;
  354. }
  355. List<Task<? extends Serializable>> deps = getDependentTasks();
  356. if (deps != null) {
  357. for (Task<? extends Serializable> d : deps) {
  358. if (d.ancestorOrSelf(desc)) {
  359. return true;
  360. }
  361. }
  362. }
  363. return false;
  364. }
  365. public List<Task<? extends Serializable>> getFeedSubscribers() {
  366. return feedSubscribers;
  367. }
  368. public void setFeedSubscribers(List<Task<? extends Serializable>> s) {
  369. feedSubscribers = s;
  370. }
  371. // push the feed to its subscribers
  372. protected void pushFeed(FeedType feedType, Object feedValue) {
  373. if (feedSubscribers != null) {
  374. for (Task<? extends Serializable> s : feedSubscribers) {
  375. s.receiveFeed(feedType, feedValue);
  376. }
  377. }
  378. }
  379. // a subscriber accept the feed and do something depending on the Task type
  380. protected void receiveFeed(FeedType feedType, Object feedValue) {
  381. }
  382. protected void cloneConf() {
  383. if (!clonedConf) {
  384. clonedConf = true;
  385. conf = new HiveConf(conf);
  386. }
  387. }
  388. public int getTaskTag() {
  389. return taskTag;
  390. }
  391. public void setTaskTag(int taskTag) {
  392. this.taskTag = taskTag;
  393. }
  394. public boolean isLocalMode() {
  395. return isLocalMode;
  396. }
  397. public void setLocalMode(boolean isLocalMode) {
  398. this.isLocalMode = isLocalMode;
  399. }
  400. }