PageRenderTime 47ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java

#
Java | 497 lines | 394 code | 44 blank | 59 comment | 83 complexity | 110b057574c0dad5d32ed325c04cfb49 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements.See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership.The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License.You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.optimizer;
  19. import java.io.IOException;
  20. import java.io.Serializable;
  21. import java.util.ArrayList;
  22. import java.util.Collection;
  23. import java.util.Collections;
  24. import java.util.HashSet;
  25. import java.util.Iterator;
  26. import java.util.LinkedHashMap;
  27. import java.util.List;
  28. import java.util.Map;
  29. import java.util.Set;
  30. import java.util.Stack;
  31. import java.util.Map.Entry;
  32. import org.apache.commons.logging.Log;
  33. import org.apache.commons.logging.LogFactory;
  34. import org.apache.hadoop.fs.FileStatus;
  35. import org.apache.hadoop.fs.FileSystem;
  36. import org.apache.hadoop.fs.Path;
  37. import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
  38. import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
  39. import org.apache.hadoop.hive.ql.exec.Operator;
  40. import org.apache.hadoop.hive.ql.exec.TableScanOperator;
  41. import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
  42. import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
  43. import org.apache.hadoop.hive.ql.lib.Dispatcher;
  44. import org.apache.hadoop.hive.ql.lib.GraphWalker;
  45. import org.apache.hadoop.hive.ql.lib.Node;
  46. import org.apache.hadoop.hive.ql.lib.NodeProcessor;
  47. import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
  48. import org.apache.hadoop.hive.ql.lib.Rule;
  49. import org.apache.hadoop.hive.ql.lib.RuleRegExp;
  50. import org.apache.hadoop.hive.ql.metadata.HiveException;
  51. import org.apache.hadoop.hive.ql.metadata.Partition;
  52. import org.apache.hadoop.hive.ql.metadata.Table;
  53. import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
  54. import org.apache.hadoop.hive.ql.parse.ParseContext;
  55. import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
  56. import org.apache.hadoop.hive.ql.parse.QBJoinTree;
  57. import org.apache.hadoop.hive.ql.parse.SemanticException;
  58. import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
  59. import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
  60. import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
  61. import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
  62. import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
  63. /**
  64. *this transformation does bucket map join optimization.
  65. */
  66. public class BucketMapJoinOptimizer implements Transform {
  67. private static final Log LOG = LogFactory.getLog(GroupByOptimizer.class
  68. .getName());
  69. public BucketMapJoinOptimizer() {
  70. }
  71. @Override
  72. public ParseContext transform(ParseContext pctx) throws SemanticException {
  73. Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
  74. BucketMapjoinOptProcCtx bucketMapJoinOptimizeCtx = new BucketMapjoinOptProcCtx();
  75. // process map joins with no reducers pattern
  76. opRules.put(new RuleRegExp("R1", "MAPJOIN%"), getBucketMapjoinProc(pctx));
  77. opRules.put(new RuleRegExp("R2", "RS%.*MAPJOIN"), getBucketMapjoinRejectProc(pctx));
  78. opRules.put(new RuleRegExp(new String("R3"), "UNION%.*MAPJOIN%"),
  79. getBucketMapjoinRejectProc(pctx));
  80. opRules.put(new RuleRegExp(new String("R4"), "MAPJOIN%.*MAPJOIN%"),
  81. getBucketMapjoinRejectProc(pctx));
  82. // The dispatcher fires the processor corresponding to the closest matching
  83. // rule and passes the context along
  84. Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules,
  85. bucketMapJoinOptimizeCtx);
  86. GraphWalker ogw = new DefaultGraphWalker(disp);
  87. // Create a list of topop nodes
  88. ArrayList<Node> topNodes = new ArrayList<Node>();
  89. topNodes.addAll(pctx.getTopOps().values());
  90. ogw.startWalking(topNodes, null);
  91. return pctx;
  92. }
  93. private NodeProcessor getBucketMapjoinRejectProc(ParseContext pctx) {
  94. return new NodeProcessor () {
  95. @Override
  96. public Object process(Node nd, Stack<Node> stack,
  97. NodeProcessorCtx procCtx, Object... nodeOutputs)
  98. throws SemanticException {
  99. MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
  100. BucketMapjoinOptProcCtx context = (BucketMapjoinOptProcCtx) procCtx;
  101. context.listOfRejectedMapjoins.add(mapJoinOp);
  102. return null;
  103. }
  104. };
  105. }
  106. private NodeProcessor getBucketMapjoinProc(ParseContext pctx) {
  107. return new BucketMapjoinOptProc(pctx);
  108. }
  109. private NodeProcessor getDefaultProc() {
  110. return new NodeProcessor() {
  111. @Override
  112. public Object process(Node nd, Stack<Node> stack,
  113. NodeProcessorCtx procCtx, Object... nodeOutputs)
  114. throws SemanticException {
  115. return null;
  116. }
  117. };
  118. }
  119. class BucketMapjoinOptProc implements NodeProcessor {
  120. protected ParseContext pGraphContext;
  121. public BucketMapjoinOptProc(ParseContext pGraphContext) {
  122. super();
  123. this.pGraphContext = pGraphContext;
  124. }
  125. @Override
  126. public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
  127. Object... nodeOutputs) throws SemanticException {
  128. MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
  129. BucketMapjoinOptProcCtx context = (BucketMapjoinOptProcCtx) procCtx;
  130. if(context.getListOfRejectedMapjoins().contains(mapJoinOp)) {
  131. return null;
  132. }
  133. QBJoinTree joinCxt = this.pGraphContext.getMapJoinContext().get(mapJoinOp);
  134. if(joinCxt == null) {
  135. return null;
  136. }
  137. List<String> joinAliases = new ArrayList<String>();
  138. String[] srcs = joinCxt.getBaseSrc();
  139. String[] left = joinCxt.getLeftAliases();
  140. List<String> mapAlias = joinCxt.getMapAliases();
  141. String baseBigAlias = null;
  142. for(String s : left) {
  143. if(s != null && !joinAliases.contains(s)) {
  144. joinAliases.add(s);
  145. if(!mapAlias.contains(s)) {
  146. baseBigAlias = s;
  147. }
  148. }
  149. }
  150. for(String s : srcs) {
  151. if(s != null && !joinAliases.contains(s)) {
  152. joinAliases.add(s);
  153. if(!mapAlias.contains(s)) {
  154. baseBigAlias = s;
  155. }
  156. }
  157. }
  158. MapJoinDesc mjDecs = mapJoinOp.getConf();
  159. LinkedHashMap<String, Integer> aliasToBucketNumberMapping = new LinkedHashMap<String, Integer>();
  160. LinkedHashMap<String, List<String>> aliasToBucketFileNamesMapping = new LinkedHashMap<String, List<String>>();
  161. // right now this code does not work with "a join b on a.key = b.key and
  162. // a.ds = b.ds", where ds is a partition column. It only works with joins
  163. // with only one partition presents in each join source tables.
  164. Map<String, Operator<? extends Serializable>> topOps = this.pGraphContext.getTopOps();
  165. Map<TableScanOperator, Table> topToTable = this.pGraphContext.getTopToTable();
  166. // (partition to bucket file names) and (partition to bucket number) for
  167. // the big table;
  168. LinkedHashMap<Partition, List<String>> bigTblPartsToBucketFileNames = new LinkedHashMap<Partition, List<String>>();
  169. LinkedHashMap<Partition, Integer> bigTblPartsToBucketNumber = new LinkedHashMap<Partition, Integer>();
  170. for (int index = 0; index < joinAliases.size(); index++) {
  171. String alias = joinAliases.get(index);
  172. TableScanOperator tso = (TableScanOperator) topOps.get(alias);
  173. if (tso == null) {
  174. return null;
  175. }
  176. Table tbl = topToTable.get(tso);
  177. if(tbl.isPartitioned()) {
  178. PrunedPartitionList prunedParts = null;
  179. try {
  180. prunedParts = pGraphContext.getOpToPartList().get(tso);
  181. if (prunedParts == null) {
  182. prunedParts = PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), pGraphContext.getConf(), alias,
  183. pGraphContext.getPrunedPartitions());
  184. pGraphContext.getOpToPartList().put(tso, prunedParts);
  185. }
  186. } catch (HiveException e) {
  187. // Has to use full name to make sure it does not conflict with
  188. // org.apache.commons.lang.StringUtils
  189. LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
  190. throw new SemanticException(e.getMessage(), e);
  191. }
  192. int partNumber = prunedParts.getConfirmedPartns().size()
  193. + prunedParts.getUnknownPartns().size();
  194. if (partNumber > 1) {
  195. // only allow one partition for small tables
  196. if(alias != baseBigAlias) {
  197. return null;
  198. }
  199. // here is the big table,and we get more than one partitions.
  200. // construct a mapping of (Partition->bucket file names) and
  201. // (Partition -> bucket number)
  202. Iterator<Partition> iter = prunedParts.getConfirmedPartns()
  203. .iterator();
  204. while (iter.hasNext()) {
  205. Partition p = iter.next();
  206. if (!checkBucketColumns(p.getBucketCols(), mjDecs, index)) {
  207. return null;
  208. }
  209. List<String> fileNames = getOnePartitionBucketFileNames(p);
  210. bigTblPartsToBucketFileNames.put(p, fileNames);
  211. bigTblPartsToBucketNumber.put(p, p.getBucketCount());
  212. }
  213. iter = prunedParts.getUnknownPartns().iterator();
  214. while (iter.hasNext()) {
  215. Partition p = iter.next();
  216. if (!checkBucketColumns(p.getBucketCols(), mjDecs, index)) {
  217. return null;
  218. }
  219. List<String> fileNames = getOnePartitionBucketFileNames(p);
  220. bigTblPartsToBucketFileNames.put(p, fileNames);
  221. bigTblPartsToBucketNumber.put(p, p.getBucketCount());
  222. }
  223. // If there are more than one partition for the big
  224. // table,aliasToBucketFileNamesMapping and partsToBucketNumber will
  225. // not contain mappings for the big table. Instead, the mappings are
  226. // contained in bigTblPartsToBucketFileNames and
  227. // bigTblPartsToBucketNumber
  228. } else {
  229. Partition part = null;
  230. Iterator<Partition> iter = prunedParts.getConfirmedPartns()
  231. .iterator();
  232. if (iter.hasNext()) {
  233. part = iter.next();
  234. }
  235. if (part == null) {
  236. iter = prunedParts.getUnknownPartns().iterator();
  237. if (iter.hasNext()) {
  238. part = iter.next();
  239. }
  240. }
  241. assert part != null;
  242. Integer num = new Integer(part.getBucketCount());
  243. aliasToBucketNumberMapping.put(alias, num);
  244. if (!checkBucketColumns(part.getBucketCols(), mjDecs, index)) {
  245. return null;
  246. }
  247. List<String> fileNames = getOnePartitionBucketFileNames(part);
  248. aliasToBucketFileNamesMapping.put(alias, fileNames);
  249. if (alias == baseBigAlias) {
  250. bigTblPartsToBucketFileNames.put(part, fileNames);
  251. bigTblPartsToBucketNumber.put(part, num);
  252. }
  253. }
  254. } else {
  255. if (!checkBucketColumns(tbl.getBucketCols(), mjDecs, index)) {
  256. return null;
  257. }
  258. Integer num = new Integer(tbl.getNumBuckets());
  259. aliasToBucketNumberMapping.put(alias, num);
  260. List<String> fileNames = new ArrayList<String>();
  261. try {
  262. FileSystem fs = FileSystem.get(tbl.getDataLocation(), this.pGraphContext.getConf());
  263. FileStatus[] files = fs.listStatus(new Path(tbl.getDataLocation().toString()));
  264. if(files != null) {
  265. for(FileStatus file : files) {
  266. fileNames.add(file.getPath().toString());
  267. }
  268. }
  269. } catch (IOException e) {
  270. throw new SemanticException(e);
  271. }
  272. aliasToBucketFileNamesMapping.put(alias, fileNames);
  273. }
  274. }
  275. // All tables or partitions are bucketed, and their bucket number is
  276. // stored in 'bucketNumbers', we need to check if the number of buckets in
  277. // the big table can be divided by no of buckets in small tables.
  278. if (bigTblPartsToBucketNumber.size() > 0) {
  279. Iterator<Entry<Partition, Integer>> bigTblPartToBucketNumber = bigTblPartsToBucketNumber
  280. .entrySet().iterator();
  281. while (bigTblPartToBucketNumber.hasNext()) {
  282. int bucketNumberInPart = bigTblPartToBucketNumber.next().getValue();
  283. if (!checkBucketNumberAgainstBigTable(aliasToBucketNumberMapping,
  284. bucketNumberInPart)) {
  285. return null;
  286. }
  287. }
  288. } else {
  289. int bucketNoInBigTbl = aliasToBucketNumberMapping.get(baseBigAlias).intValue();
  290. if (!checkBucketNumberAgainstBigTable(aliasToBucketNumberMapping,
  291. bucketNoInBigTbl)) {
  292. return null;
  293. }
  294. }
  295. MapJoinDesc desc = mapJoinOp.getConf();
  296. LinkedHashMap<String, LinkedHashMap<String, ArrayList<String>>> aliasBucketFileNameMapping =
  297. new LinkedHashMap<String, LinkedHashMap<String, ArrayList<String>>>();
  298. //sort bucket names for the big table
  299. if(bigTblPartsToBucketNumber.size() > 0) {
  300. Collection<List<String>> bucketNamesAllParts = bigTblPartsToBucketFileNames.values();
  301. for(List<String> partBucketNames : bucketNamesAllParts) {
  302. Collections.sort(partBucketNames);
  303. }
  304. } else {
  305. Collections.sort(aliasToBucketFileNamesMapping.get(baseBigAlias));
  306. }
  307. // go through all small tables and get the mapping from bucket file name
  308. // in the big table to bucket file names in small tables.
  309. for (int j = 0; j < joinAliases.size(); j++) {
  310. String alias = joinAliases.get(j);
  311. if(alias.equals(baseBigAlias)) {
  312. continue;
  313. }
  314. Collections.sort(aliasToBucketFileNamesMapping.get(alias));
  315. LinkedHashMap<String, ArrayList<String>> mapping = new LinkedHashMap<String, ArrayList<String>>();
  316. aliasBucketFileNameMapping.put(alias, mapping);
  317. // for each bucket file in big table, get the corresponding bucket file
  318. // name in the small table.
  319. if (bigTblPartsToBucketNumber.size() > 0) {
  320. //more than 1 partition in the big table, do the mapping for each partition
  321. Iterator<Entry<Partition, List<String>>> bigTblPartToBucketNames = bigTblPartsToBucketFileNames
  322. .entrySet().iterator();
  323. Iterator<Entry<Partition, Integer>> bigTblPartToBucketNum = bigTblPartsToBucketNumber
  324. .entrySet().iterator();
  325. while (bigTblPartToBucketNames.hasNext()) {
  326. assert bigTblPartToBucketNum.hasNext();
  327. int bigTblBucketNum = bigTblPartToBucketNum.next().getValue().intValue();
  328. List<String> bigTblBucketNameList = bigTblPartToBucketNames.next().getValue();
  329. fillMapping(baseBigAlias, aliasToBucketNumberMapping,
  330. aliasToBucketFileNamesMapping, alias, mapping, bigTblBucketNum,
  331. bigTblBucketNameList, desc.getBucketFileNameMapping());
  332. }
  333. } else {
  334. List<String> bigTblBucketNameList = aliasToBucketFileNamesMapping.get(baseBigAlias);
  335. int bigTblBucketNum = aliasToBucketNumberMapping.get(baseBigAlias);
  336. fillMapping(baseBigAlias, aliasToBucketNumberMapping,
  337. aliasToBucketFileNamesMapping, alias, mapping, bigTblBucketNum,
  338. bigTblBucketNameList, desc.getBucketFileNameMapping());
  339. }
  340. }
  341. desc.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
  342. desc.setBigTableAlias(baseBigAlias);
  343. return null;
  344. }
  345. private void fillMapping(String baseBigAlias,
  346. LinkedHashMap<String, Integer> aliasToBucketNumberMapping,
  347. LinkedHashMap<String, List<String>> aliasToBucketFileNamesMapping,
  348. String alias, LinkedHashMap<String, ArrayList<String>> mapping,
  349. int bigTblBucketNum, List<String> bigTblBucketNameList,
  350. LinkedHashMap<String, Integer> bucketFileNameMapping) {
  351. for (int index = 0; index < bigTblBucketNameList.size(); index++) {
  352. String inputBigTBLBucket = bigTblBucketNameList.get(index);
  353. int smallTblBucketNum = aliasToBucketNumberMapping.get(alias);
  354. ArrayList<String> resultFileNames = new ArrayList<String>();
  355. if (bigTblBucketNum >= smallTblBucketNum) {
  356. // if the big table has more buckets than the current small table,
  357. // use "MOD" to get small table bucket names. For example, if the big
  358. // table has 4 buckets and the small table has 2 buckets, then the
  359. // mapping should be 0->0, 1->1, 2->0, 3->1.
  360. int toAddSmallIndex = index % smallTblBucketNum;
  361. if(toAddSmallIndex < aliasToBucketFileNamesMapping.get(alias).size()) {
  362. resultFileNames.add(aliasToBucketFileNamesMapping.get(alias).get(toAddSmallIndex));
  363. }
  364. } else {
  365. int jump = smallTblBucketNum / bigTblBucketNum;
  366. for (int i = index; i < aliasToBucketFileNamesMapping.get(alias).size(); i = i + jump) {
  367. if(i <= aliasToBucketFileNamesMapping.get(alias).size()) {
  368. resultFileNames.add(aliasToBucketFileNamesMapping.get(alias).get(i));
  369. }
  370. }
  371. }
  372. mapping.put(inputBigTBLBucket, resultFileNames);
  373. bucketFileNameMapping.put(inputBigTBLBucket, index);
  374. }
  375. }
  376. private boolean checkBucketNumberAgainstBigTable(
  377. LinkedHashMap<String, Integer> aliasToBucketNumber,
  378. int bucketNumberInPart) {
  379. Iterator<Integer> iter = aliasToBucketNumber.values().iterator();
  380. while(iter.hasNext()) {
  381. int nxt = iter.next().intValue();
  382. boolean ok = (nxt >= bucketNumberInPart) ? nxt % bucketNumberInPart == 0
  383. : bucketNumberInPart % nxt == 0;
  384. if(!ok) {
  385. return false;
  386. }
  387. }
  388. return true;
  389. }
  390. private List<String> getOnePartitionBucketFileNames(Partition part)
  391. throws SemanticException {
  392. List<String> fileNames = new ArrayList<String>();
  393. try {
  394. FileSystem fs = FileSystem.get(part.getDataLocation(), this.pGraphContext.getConf());
  395. FileStatus[] files = fs.listStatus(new Path(part.getDataLocation()
  396. .toString()));
  397. if (files != null) {
  398. for (FileStatus file : files) {
  399. fileNames.add(file.getPath().toString());
  400. }
  401. }
  402. } catch (IOException e) {
  403. throw new SemanticException(e);
  404. }
  405. return fileNames;
  406. }
  407. private boolean checkBucketColumns(List<String> bucketColumns, MapJoinDesc mjDesc, int index) {
  408. List<ExprNodeDesc> keys = mjDesc.getKeys().get((byte)index);
  409. if (keys == null || bucketColumns == null || bucketColumns.size() == 0) {
  410. return false;
  411. }
  412. //get all join columns from join keys stored in MapJoinDesc
  413. List<String> joinCols = new ArrayList<String>();
  414. List<ExprNodeDesc> joinKeys = new ArrayList<ExprNodeDesc>();
  415. joinKeys.addAll(keys);
  416. while (joinKeys.size() > 0) {
  417. ExprNodeDesc node = joinKeys.remove(0);
  418. if (node instanceof ExprNodeColumnDesc) {
  419. joinCols.addAll(node.getCols());
  420. } else if (node instanceof ExprNodeGenericFuncDesc) {
  421. ExprNodeGenericFuncDesc udfNode = ((ExprNodeGenericFuncDesc) node);
  422. GenericUDF udf = udfNode.getGenericUDF();
  423. if (!FunctionRegistry.isDeterministic(udf)) {
  424. return false;
  425. }
  426. joinKeys.addAll(0, udfNode.getChildExprs());
  427. } else {
  428. return false;
  429. }
  430. }
  431. // to see if the join columns from a table is exactly this same as its
  432. // bucket columns
  433. if (joinCols.size() == 0 || joinCols.size() != bucketColumns.size()) {
  434. return false;
  435. }
  436. for (String col : joinCols) {
  437. if (!bucketColumns.contains(col)) {
  438. return false;
  439. }
  440. }
  441. return true;
  442. }
  443. }
  444. class BucketMapjoinOptProcCtx implements NodeProcessorCtx {
  445. // we only convert map joins that follows a root table scan in the same
  446. // mapper. That means there is no reducer between the root table scan and
  447. // mapjoin.
  448. Set<MapJoinOperator> listOfRejectedMapjoins = new HashSet<MapJoinOperator>();
  449. public Set<MapJoinOperator> getListOfRejectedMapjoins() {
  450. return listOfRejectedMapjoins;
  451. }
  452. }
  453. }