PageRenderTime 52ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java

#
Java | 401 lines | 284 code | 52 blank | 65 comment | 29 complexity | 26f85ed5000018df859468436fbec43d MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.optimizer.physical;
  19. import java.io.ByteArrayInputStream;
  20. import java.io.File;
  21. import java.io.Serializable;
  22. import java.io.UnsupportedEncodingException;
  23. import java.util.ArrayList;
  24. import java.util.HashMap;
  25. import java.util.LinkedHashMap;
  26. import java.util.List;
  27. import java.util.Map;
  28. import org.apache.hadoop.fs.Path;
  29. import org.apache.hadoop.hive.conf.HiveConf;
  30. import org.apache.hadoop.hive.ql.exec.ConditionalTask;
  31. import org.apache.hadoop.hive.ql.exec.JoinOperator;
  32. import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
  33. import org.apache.hadoop.hive.ql.exec.Operator;
  34. import org.apache.hadoop.hive.ql.exec.OperatorFactory;
  35. import org.apache.hadoop.hive.ql.exec.RowSchema;
  36. import org.apache.hadoop.hive.ql.exec.TableScanOperator;
  37. import org.apache.hadoop.hive.ql.exec.Task;
  38. import org.apache.hadoop.hive.ql.exec.TaskFactory;
  39. import org.apache.hadoop.hive.ql.exec.Utilities;
  40. import org.apache.hadoop.hive.ql.io.HiveInputFormat;
  41. import org.apache.hadoop.hive.ql.parse.ParseContext;
  42. import org.apache.hadoop.hive.ql.parse.SemanticException;
  43. import org.apache.hadoop.hive.ql.plan.ConditionalResolverSkewJoin;
  44. import org.apache.hadoop.hive.ql.plan.ConditionalWork;
  45. import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
  46. import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
  47. import org.apache.hadoop.hive.ql.plan.FetchWork;
  48. import org.apache.hadoop.hive.ql.plan.JoinDesc;
  49. import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
  50. import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
  51. import org.apache.hadoop.hive.ql.plan.MapredWork;
  52. import org.apache.hadoop.hive.ql.plan.PartitionDesc;
  53. import org.apache.hadoop.hive.ql.plan.PlanUtils;
  54. import org.apache.hadoop.hive.ql.plan.TableDesc;
  55. import org.apache.hadoop.hive.ql.plan.TableScanDesc;
  56. import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
  57. import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
  58. import org.apache.hadoop.hive.ql.metadata.HiveException;
  59. /**
  60. * GenMRSkewJoinProcessor.
  61. *
  62. */
  63. public final class GenMRSkewJoinProcessor {
  64. private GenMRSkewJoinProcessor() {
  65. // prevent instantiation
  66. }
  67. /**
  68. * Create tasks for processing skew joins. The idea is (HIVE-964) to use
  69. * separated jobs and map-joins to handle skew joins.
  70. * <p>
  71. * <ul>
  72. * <li>
  73. * Number of mr jobs to handle skew keys is the number of table minus 1 (we
  74. * can stream the last table, so big keys in the last table will not be a
  75. * problem).
  76. * <li>
  77. * At runtime in Join, we output big keys in one table into one corresponding
  78. * directories, and all same keys in other tables into different dirs(one for
  79. * each table). The directories will look like:
  80. * <ul>
  81. * <li>
  82. * dir-T1-bigkeys(containing big keys in T1), dir-T2-keys(containing keys
  83. * which is big in T1),dir-T3-keys(containing keys which is big in T1), ...
  84. * <li>
  85. * dir-T1-keys(containing keys which is big in T2), dir-T2-bigkeys(containing
  86. * big keys in T2),dir-T3-keys(containing keys which is big in T2), ...
  87. * <li>
  88. * dir-T1-keys(containing keys which is big in T3), dir-T2-keys(containing big
  89. * keys in T3),dir-T3-bigkeys(containing keys which is big in T3), ... .....
  90. * </ul>
  91. * </ul>
  92. * For each table, we launch one mapjoin job, taking the directory containing
  93. * big keys in this table and corresponding dirs in other tables as input.
  94. * (Actally one job for one row in the above.)
  95. *
  96. * <p>
  97. * For more discussions, please check
  98. * https://issues.apache.org/jira/browse/HIVE-964.
  99. *
  100. */
  101. public static void processSkewJoin(JoinOperator joinOp,
  102. Task<? extends Serializable> currTask, ParseContext parseCtx)
  103. throws SemanticException {
  104. // We are trying to adding map joins to handle skew keys, and map join right
  105. // now does not work with outer joins
  106. if (!GenMRSkewJoinProcessor.skewJoinEnabled(parseCtx.getConf(), joinOp)) {
  107. return;
  108. }
  109. String baseTmpDir = parseCtx.getContext().getMRTmpFileURI();
  110. JoinDesc joinDescriptor = joinOp.getConf();
  111. Map<Byte, List<ExprNodeDesc>> joinValues = joinDescriptor.getExprs();
  112. int numAliases = joinValues.size();
  113. Map<Byte, String> bigKeysDirMap = new HashMap<Byte, String>();
  114. Map<Byte, Map<Byte, String>> smallKeysDirMap = new HashMap<Byte, Map<Byte, String>>();
  115. Map<Byte, String> skewJoinJobResultsDir = new HashMap<Byte, String>();
  116. Byte[] tags = joinDescriptor.getTagOrder();
  117. for (int i = 0; i < numAliases; i++) {
  118. Byte alias = tags[i];
  119. String bigKeysDir = getBigKeysDir(baseTmpDir, alias);
  120. bigKeysDirMap.put(alias, bigKeysDir);
  121. Map<Byte, String> smallKeysMap = new HashMap<Byte, String>();
  122. smallKeysDirMap.put(alias, smallKeysMap);
  123. for (Byte src2 : tags) {
  124. if (!src2.equals(alias)) {
  125. smallKeysMap.put(src2, getSmallKeysDir(baseTmpDir, alias, src2));
  126. }
  127. }
  128. skewJoinJobResultsDir.put(alias, getBigKeysSkewJoinResultDir(baseTmpDir,
  129. alias));
  130. }
  131. joinDescriptor.setHandleSkewJoin(true);
  132. joinDescriptor.setBigKeysDirMap(bigKeysDirMap);
  133. joinDescriptor.setSmallKeysDirMap(smallKeysDirMap);
  134. joinDescriptor.setSkewKeyDefinition(HiveConf.getIntVar(parseCtx.getConf(),
  135. HiveConf.ConfVars.HIVESKEWJOINKEY));
  136. HashMap<String, Task<? extends Serializable>> bigKeysDirToTaskMap =
  137. new HashMap<String, Task<? extends Serializable>>();
  138. List<Serializable> listWorks = new ArrayList<Serializable>();
  139. List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
  140. MapredWork currPlan = (MapredWork) currTask.getWork();
  141. TableDesc keyTblDesc = (TableDesc) currPlan.getKeyDesc().clone();
  142. List<String> joinKeys = Utilities
  143. .getColumnNames(keyTblDesc.getProperties());
  144. List<String> joinKeyTypes = Utilities.getColumnTypes(keyTblDesc
  145. .getProperties());
  146. Map<Byte, TableDesc> tableDescList = new HashMap<Byte, TableDesc>();
  147. Map<Byte, List<ExprNodeDesc>> newJoinValues = new HashMap<Byte, List<ExprNodeDesc>>();
  148. Map<Byte, List<ExprNodeDesc>> newJoinKeys = new HashMap<Byte, List<ExprNodeDesc>>();
  149. // used for create mapJoinDesc, should be in order
  150. List<TableDesc> newJoinValueTblDesc = new ArrayList<TableDesc>();
  151. for (Byte tag : tags) {
  152. newJoinValueTblDesc.add(null);
  153. }
  154. for (int i = 0; i < numAliases; i++) {
  155. Byte alias = tags[i];
  156. List<ExprNodeDesc> valueCols = joinValues.get(alias);
  157. String colNames = "";
  158. String colTypes = "";
  159. int columnSize = valueCols.size();
  160. List<ExprNodeDesc> newValueExpr = new ArrayList<ExprNodeDesc>();
  161. List<ExprNodeDesc> newKeyExpr = new ArrayList<ExprNodeDesc>();
  162. boolean first = true;
  163. for (int k = 0; k < columnSize; k++) {
  164. TypeInfo type = valueCols.get(k).getTypeInfo();
  165. String newColName = i + "_VALUE_" + k; // any name, it does not matter.
  166. newValueExpr
  167. .add(new ExprNodeColumnDesc(type, newColName, "" + i, false));
  168. if (!first) {
  169. colNames = colNames + ",";
  170. colTypes = colTypes + ",";
  171. }
  172. first = false;
  173. colNames = colNames + newColName;
  174. colTypes = colTypes + valueCols.get(k).getTypeString();
  175. }
  176. // we are putting join keys at last part of the spilled table
  177. for (int k = 0; k < joinKeys.size(); k++) {
  178. if (!first) {
  179. colNames = colNames + ",";
  180. colTypes = colTypes + ",";
  181. }
  182. first = false;
  183. colNames = colNames + joinKeys.get(k);
  184. colTypes = colTypes + joinKeyTypes.get(k);
  185. newKeyExpr.add(new ExprNodeColumnDesc(TypeInfoFactory
  186. .getPrimitiveTypeInfo(joinKeyTypes.get(k)), joinKeys.get(k),
  187. "" + i, false));
  188. }
  189. newJoinValues.put(alias, newValueExpr);
  190. newJoinKeys.put(alias, newKeyExpr);
  191. tableDescList.put(alias, Utilities.getTableDesc(colNames, colTypes));
  192. // construct value table Desc
  193. String valueColNames = "";
  194. String valueColTypes = "";
  195. first = true;
  196. for (int k = 0; k < columnSize; k++) {
  197. String newColName = i + "_VALUE_" + k; // any name, it does not matter.
  198. if (!first) {
  199. valueColNames = valueColNames + ",";
  200. valueColTypes = valueColTypes + ",";
  201. }
  202. valueColNames = valueColNames + newColName;
  203. valueColTypes = valueColTypes + valueCols.get(k).getTypeString();
  204. first = false;
  205. }
  206. newJoinValueTblDesc.set(Byte.valueOf((byte) i), Utilities.getTableDesc(
  207. valueColNames, valueColTypes));
  208. }
  209. joinDescriptor.setSkewKeysValuesTables(tableDescList);
  210. joinDescriptor.setKeyTableDesc(keyTblDesc);
  211. for (int i = 0; i < numAliases - 1; i++) {
  212. Byte src = tags[i];
  213. MapredWork newPlan = PlanUtils.getMapRedWork();
  214. // This code has been only added for testing
  215. boolean mapperCannotSpanPartns =
  216. parseCtx.getConf().getBoolVar(
  217. HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
  218. newPlan.setMapperCannotSpanPartns(mapperCannotSpanPartns);
  219. MapredWork clonePlan = null;
  220. try {
  221. String xmlPlan = currPlan.toXML();
  222. StringBuilder sb = new StringBuilder(xmlPlan);
  223. ByteArrayInputStream bis;
  224. bis = new ByteArrayInputStream(sb.toString().getBytes("UTF-8"));
  225. clonePlan = Utilities.deserializeMapRedWork(bis, parseCtx.getConf());
  226. } catch (UnsupportedEncodingException e) {
  227. throw new SemanticException(e);
  228. }
  229. Operator<? extends Serializable>[] parentOps = new TableScanOperator[tags.length];
  230. for (int k = 0; k < tags.length; k++) {
  231. Operator<? extends Serializable> ts = OperatorFactory.get(
  232. TableScanDesc.class, (RowSchema) null);
  233. ((TableScanOperator)ts).setTableDesc(tableDescList.get((byte)k));
  234. parentOps[k] = ts;
  235. }
  236. Operator<? extends Serializable> tblScan_op = parentOps[i];
  237. ArrayList<String> aliases = new ArrayList<String>();
  238. String alias = src.toString();
  239. aliases.add(alias);
  240. String bigKeyDirPath = bigKeysDirMap.get(src);
  241. newPlan.getPathToAliases().put(bigKeyDirPath, aliases);
  242. newPlan.getAliasToWork().put(alias, tblScan_op);
  243. PartitionDesc part = new PartitionDesc(tableDescList.get(src), null);
  244. newPlan.getPathToPartitionInfo().put(bigKeyDirPath, part);
  245. newPlan.getAliasToPartnInfo().put(alias, part);
  246. Operator<? extends Serializable> reducer = clonePlan.getReducer();
  247. assert reducer instanceof JoinOperator;
  248. JoinOperator cloneJoinOp = (JoinOperator) reducer;
  249. MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc,
  250. newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc,joinDescriptor
  251. .getOutputColumnNames(), i, joinDescriptor.getConds(),
  252. joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin());
  253. mapJoinDescriptor.setTagOrder(tags);
  254. mapJoinDescriptor.setHandleSkewJoin(false);
  255. MapredLocalWork localPlan = new MapredLocalWork(
  256. new LinkedHashMap<String, Operator<? extends Serializable>>(),
  257. new LinkedHashMap<String, FetchWork>());
  258. Map<Byte, String> smallTblDirs = smallKeysDirMap.get(src);
  259. for (int j = 0; j < numAliases; j++) {
  260. if (j == i) {
  261. continue;
  262. }
  263. Byte small_alias = tags[j];
  264. Operator<? extends Serializable> tblScan_op2 = parentOps[j];
  265. localPlan.getAliasToWork().put(small_alias.toString(), tblScan_op2);
  266. Path tblDir = new Path(smallTblDirs.get(small_alias));
  267. localPlan.getAliasToFetchWork().put(small_alias.toString(),
  268. new FetchWork(tblDir.toString(), tableDescList.get(small_alias)));
  269. }
  270. newPlan.setMapLocalWork(localPlan);
  271. // construct a map join and set it as the child operator of tblScan_op
  272. MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory
  273. .getAndMakeChild(mapJoinDescriptor, (RowSchema) null, parentOps);
  274. // change the children of the original join operator to point to the map
  275. // join operator
  276. List<Operator<? extends Serializable>> childOps = cloneJoinOp
  277. .getChildOperators();
  278. for (Operator<? extends Serializable> childOp : childOps) {
  279. childOp.replaceParent(cloneJoinOp, mapJoinOp);
  280. }
  281. mapJoinOp.setChildOperators(childOps);
  282. HiveConf jc = new HiveConf(parseCtx.getConf(),
  283. GenMRSkewJoinProcessor.class);
  284. newPlan.setNumMapTasks(HiveConf
  285. .getIntVar(jc, HiveConf.ConfVars.HIVESKEWJOINMAPJOINNUMMAPTASK));
  286. newPlan
  287. .setMinSplitSize(HiveConf.getLongVar(jc, HiveConf.ConfVars.HIVESKEWJOINMAPJOINMINSPLIT));
  288. newPlan.setInputformat(HiveInputFormat.class.getName());
  289. Task<? extends Serializable> skewJoinMapJoinTask = TaskFactory.get(
  290. newPlan, jc);
  291. bigKeysDirToTaskMap.put(bigKeyDirPath, skewJoinMapJoinTask);
  292. listWorks.add(skewJoinMapJoinTask.getWork());
  293. listTasks.add(skewJoinMapJoinTask);
  294. }
  295. ConditionalWork cndWork = new ConditionalWork(listWorks);
  296. ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork,
  297. parseCtx.getConf());
  298. cndTsk.setListTasks(listTasks);
  299. cndTsk.setResolver(new ConditionalResolverSkewJoin());
  300. cndTsk
  301. .setResolverCtx(new ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx(
  302. bigKeysDirToTaskMap));
  303. List<Task<? extends Serializable>> oldChildTasks = currTask.getChildTasks();
  304. currTask.setChildTasks(new ArrayList<Task<? extends Serializable>>());
  305. currTask.addDependentTask(cndTsk);
  306. if (oldChildTasks != null) {
  307. for (Task<? extends Serializable> tsk : cndTsk.getListTasks()) {
  308. for (Task<? extends Serializable> oldChild : oldChildTasks) {
  309. tsk.addDependentTask(oldChild);
  310. }
  311. }
  312. }
  313. return;
  314. }
  315. public static boolean skewJoinEnabled(HiveConf conf, JoinOperator joinOp) {
  316. if (conf != null && !conf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) {
  317. return false;
  318. }
  319. if (!joinOp.getConf().isNoOuterJoin()) {
  320. return false;
  321. }
  322. byte pos = 0;
  323. for (Byte tag : joinOp.getConf().getTagOrder()) {
  324. if (tag != pos) {
  325. return false;
  326. }
  327. pos++;
  328. }
  329. return true;
  330. }
  331. private static String skewJoinPrefix = "hive_skew_join";
  332. private static String UNDERLINE = "_";
  333. private static String BIGKEYS = "bigkeys";
  334. private static String SMALLKEYS = "smallkeys";
  335. private static String RESULTS = "results";
  336. static String getBigKeysDir(String baseDir, Byte srcTbl) {
  337. return baseDir + File.separator + skewJoinPrefix + UNDERLINE + BIGKEYS
  338. + UNDERLINE + srcTbl;
  339. }
  340. static String getBigKeysSkewJoinResultDir(String baseDir, Byte srcTbl) {
  341. return baseDir + File.separator + skewJoinPrefix + UNDERLINE + BIGKEYS
  342. + UNDERLINE + RESULTS + UNDERLINE + srcTbl;
  343. }
  344. static String getSmallKeysDir(String baseDir, Byte srcTblBigTbl,
  345. Byte srcTblSmallTbl) {
  346. return baseDir + File.separator + skewJoinPrefix + UNDERLINE + SMALLKEYS
  347. + UNDERLINE + srcTblBigTbl + UNDERLINE + srcTblSmallTbl;
  348. }
  349. }