PageRenderTime 45ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java

#
Java | 189 lines | 116 code | 25 blank | 48 comment | 25 complexity | 119ff65b58e50fab17c3a406d476bca1 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.optimizer.pcr;
  19. import java.io.Serializable;
  20. import java.util.ArrayList;
  21. import java.util.Stack;
  22. import org.apache.commons.logging.Log;
  23. import org.apache.commons.logging.LogFactory;
  24. import org.apache.hadoop.hive.ql.exec.FilterOperator;
  25. import org.apache.hadoop.hive.ql.exec.Operator;
  26. import org.apache.hadoop.hive.ql.exec.TableScanOperator;
  27. import org.apache.hadoop.hive.ql.lib.Node;
  28. import org.apache.hadoop.hive.ql.lib.NodeProcessor;
  29. import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
  30. import org.apache.hadoop.hive.ql.metadata.HiveException;
  31. import org.apache.hadoop.hive.ql.metadata.Partition;
  32. import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
  33. import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
  34. import org.apache.hadoop.hive.ql.parse.SemanticException;
  35. import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
  36. /**
  37. * PcrOpProcFactory contains processors that process expression tree of filter operators
  38. * following table scan operators. It walks the expression tree of the filter operator
  39. * to remove partition predicates when possible. If the filter operator can be removed,
  40. * the whole operator is marked to be removed later on, otherwise the predicate is changed
  41. */
  42. public final class PcrOpProcFactory {
  43. // The log
  44. private static final Log LOG = LogFactory
  45. .getLog("hive.ql.optimizer.pcr.OpProcFactory");
  46. /**
  47. * Remove partition condition in a filter operator when possible. This is
  48. * called only when the filter follows a table scan operator.
  49. */
  50. public static class FilterPCR implements NodeProcessor {
  51. @Override
  52. public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
  53. Object... nodeOutputs) throws SemanticException {
  54. PcrOpWalkerCtx owc = (PcrOpWalkerCtx) procCtx;
  55. FilterOperator fop = (FilterOperator) nd;
  56. FilterOperator fop2 = null;
  57. // The stack contains either ... TS, Filter or
  58. // ... TS, Filter, Filter with the head of the stack being the rightmost
  59. // symbol. So we just pop out the two elements from the top and if the
  60. // second one of them is not a table scan then the operator on the top of
  61. // the stack is the Table scan operator.
  62. Node tmp = stack.pop();
  63. Node tmp2 = stack.pop();
  64. TableScanOperator top = null;
  65. Operator<? extends Serializable> pop = null;
  66. if (tmp2 instanceof TableScanOperator) {
  67. top = (TableScanOperator) tmp2;
  68. pop = top;
  69. } else {
  70. top = (TableScanOperator) stack.peek();
  71. fop2 = (FilterOperator) tmp2;
  72. pop = fop2;
  73. }
  74. stack.push(tmp2);
  75. stack.push(tmp);
  76. // If fop2 exists (i.e this is not the top level filter and fop2 is not
  77. // a sampling filter then we ignore the current filter
  78. if (fop2 != null && !fop2.getConf().getIsSamplingPred()) {
  79. return null;
  80. }
  81. // ignore the predicate in case it is not a sampling predicate
  82. if (fop.getConf().getIsSamplingPred()) {
  83. return null;
  84. }
  85. if (fop.getParentOperators().size() > 1) {
  86. // It's not likely if there is no bug. But in case it happens, we must
  87. // have found a wrong filter operator. We skip the optimization then.
  88. return null;
  89. }
  90. PrunedPartitionList prunedPartList = owc.getParseContext().getOpToPartList().get(top);
  91. if (prunedPartList == null) {
  92. // We never pruned the partition. Try to prune it.
  93. ExprNodeDesc ppr_pred = owc.getParseContext().getOpToPartPruner().get(top);
  94. if (ppr_pred == null) {
  95. // no partition predicate found, skip.
  96. return null;
  97. }
  98. try {
  99. prunedPartList = PartitionPruner.prune(owc.getParseContext().getTopToTable().get(top),
  100. ppr_pred, owc.getParseContext().getConf(),
  101. (String) owc.getParseContext().getTopOps().keySet()
  102. .toArray()[0], owc.getParseContext().getPrunedPartitions());
  103. if (prunedPartList != null) {
  104. owc.getParseContext().getOpToPartList().put(top, prunedPartList);
  105. }
  106. } catch (HiveException e) {
  107. // Has to use full name to make sure it does not conflict with
  108. // org.apache.commons.lang.StringUtils
  109. throw new SemanticException(e.getMessage(), e);
  110. }
  111. }
  112. // Otherwise this is not a sampling predicate. We need to process it.
  113. ExprNodeDesc predicate = fop.getConf().getPredicate();
  114. String alias = top.getConf().getAlias();
  115. ArrayList<Partition> partitions = new ArrayList<Partition>();
  116. if (prunedPartList == null) {
  117. return null;
  118. }
  119. for (Partition p : prunedPartList.getConfirmedPartns()) {
  120. if (!p.getTable().isPartitioned()) {
  121. return null;
  122. }
  123. }
  124. for (Partition p : prunedPartList.getUnknownPartns()) {
  125. if (!p.getTable().isPartitioned()) {
  126. return null;
  127. }
  128. }
  129. partitions.addAll(prunedPartList.getConfirmedPartns());
  130. partitions.addAll(prunedPartList.getUnknownPartns());
  131. PcrExprProcFactory.NodeInfoWrapper wrapper = PcrExprProcFactory.walkExprTree(
  132. alias, partitions, predicate);
  133. if (wrapper.state == PcrExprProcFactory.WalkState.TRUE) {
  134. owc.getOpToRemove().add(new PcrOpWalkerCtx.OpToDeleteInfo(pop, fop));
  135. } else if (wrapper.state != PcrExprProcFactory.WalkState.FALSE) {
  136. fop.getConf().setPredicate(wrapper.outExpr);
  137. } else {
  138. LOG.warn("Filter passes no row");
  139. fop.getConf().setPredicate(wrapper.outExpr);
  140. }
  141. return null;
  142. }
  143. }
  144. /**
  145. * Default processor which does nothing
  146. */
  147. public static class DefaultPCR implements NodeProcessor {
  148. @Override
  149. public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
  150. Object... nodeOutputs) throws SemanticException {
  151. // Nothing needs to be done.
  152. return null;
  153. }
  154. }
  155. public static NodeProcessor getFilterProc() {
  156. return new FilterPCR();
  157. }
  158. public static NodeProcessor getDefaultProc() {
  159. return new DefaultPCR();
  160. }
  161. private PcrOpProcFactory() {
  162. // prevent instantiation
  163. }
  164. }