PageRenderTime 47ms CodeModel.GetById 17ms app.highlight 24ms RepoModel.GetById 2ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java

#
Java | 189 lines | 116 code | 25 blank | 48 comment | 25 complexity | 119ff65b58e50fab17c3a406d476bca1 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.optimizer.pcr;
 20
 21import java.io.Serializable;
 22import java.util.ArrayList;
 23import java.util.Stack;
 24
 25import org.apache.commons.logging.Log;
 26import org.apache.commons.logging.LogFactory;
 27import org.apache.hadoop.hive.ql.exec.FilterOperator;
 28import org.apache.hadoop.hive.ql.exec.Operator;
 29import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 30import org.apache.hadoop.hive.ql.lib.Node;
 31import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 32import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 33import org.apache.hadoop.hive.ql.metadata.HiveException;
 34import org.apache.hadoop.hive.ql.metadata.Partition;
 35import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
 36import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 37import org.apache.hadoop.hive.ql.parse.SemanticException;
 38import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 39
 40/**
 41 * PcrOpProcFactory contains processors that process expression tree of filter operators
 42 * following table scan operators. It walks the expression tree of the filter operator
 43 * to remove partition predicates when possible. If the filter operator can be removed,
 44 * the whole operator is marked to be removed later on, otherwise the predicate is changed
 45 */
 46public final class PcrOpProcFactory {
 47
 48  // The log
 49  private static final Log LOG = LogFactory
 50      .getLog("hive.ql.optimizer.pcr.OpProcFactory");
 51
 52  /**
 53   * Remove partition condition in a filter operator when possible. This is
 54   * called only when the filter follows a table scan operator.
 55   */
 56  public static class FilterPCR implements NodeProcessor {
 57
 58    @Override
 59    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
 60        Object... nodeOutputs) throws SemanticException {
 61      PcrOpWalkerCtx owc = (PcrOpWalkerCtx) procCtx;
 62      FilterOperator fop = (FilterOperator) nd;
 63      FilterOperator fop2 = null;
 64
 65      // The stack contains either ... TS, Filter or
 66      // ... TS, Filter, Filter with the head of the stack being the rightmost
 67      // symbol. So we just pop out the two elements from the top and if the
 68      // second one of them is not a table scan then the operator on the top of
 69      // the stack is the Table scan operator.
 70      Node tmp = stack.pop();
 71      Node tmp2 = stack.pop();
 72      TableScanOperator top = null;
 73      Operator<? extends Serializable> pop = null;
 74      if (tmp2 instanceof TableScanOperator) {
 75        top = (TableScanOperator) tmp2;
 76        pop = top;
 77      } else {
 78        top = (TableScanOperator) stack.peek();
 79        fop2 = (FilterOperator) tmp2;
 80        pop = fop2;
 81      }
 82      stack.push(tmp2);
 83      stack.push(tmp);
 84
 85      // If fop2 exists (i.e this is not the top level filter and fop2 is not
 86      // a sampling filter then we ignore the current filter
 87      if (fop2 != null && !fop2.getConf().getIsSamplingPred()) {
 88        return null;
 89      }
 90
 91      // ignore the predicate in case it is not a sampling predicate
 92      if (fop.getConf().getIsSamplingPred()) {
 93        return null;
 94      }
 95
 96      if (fop.getParentOperators().size() > 1) {
 97        // It's not likely if there is no bug. But in case it happens, we must
 98        // have found a wrong filter operator. We skip the optimization then.
 99        return null;
100      }
101
102
103      PrunedPartitionList prunedPartList = owc.getParseContext().getOpToPartList().get(top);
104      if (prunedPartList == null) {
105        // We never pruned the partition. Try to prune it.
106        ExprNodeDesc ppr_pred = owc.getParseContext().getOpToPartPruner().get(top);
107        if (ppr_pred == null) {
108          // no partition predicate found, skip.
109          return null;
110        }
111        try {
112          prunedPartList = PartitionPruner.prune(owc.getParseContext().getTopToTable().get(top),
113              ppr_pred, owc.getParseContext().getConf(),
114              (String) owc.getParseContext().getTopOps().keySet()
115              .toArray()[0], owc.getParseContext().getPrunedPartitions());
116          if (prunedPartList != null) {
117            owc.getParseContext().getOpToPartList().put(top, prunedPartList);
118          }
119        } catch (HiveException e) {
120          // Has to use full name to make sure it does not conflict with
121          // org.apache.commons.lang.StringUtils
122          throw new SemanticException(e.getMessage(), e);
123        }
124      }
125
126      // Otherwise this is not a sampling predicate. We need to process it.
127      ExprNodeDesc predicate = fop.getConf().getPredicate();
128      String alias = top.getConf().getAlias();
129
130      ArrayList<Partition> partitions = new ArrayList<Partition>();
131      if (prunedPartList == null) {
132        return null;
133      }
134
135      for (Partition p : prunedPartList.getConfirmedPartns()) {
136        if (!p.getTable().isPartitioned()) {
137          return null;
138        }
139      }
140      for (Partition p : prunedPartList.getUnknownPartns()) {
141        if (!p.getTable().isPartitioned()) {
142          return null;
143        }
144      }
145
146      partitions.addAll(prunedPartList.getConfirmedPartns());
147      partitions.addAll(prunedPartList.getUnknownPartns());
148
149      PcrExprProcFactory.NodeInfoWrapper wrapper = PcrExprProcFactory.walkExprTree(
150          alias, partitions, predicate);
151
152      if (wrapper.state == PcrExprProcFactory.WalkState.TRUE) {
153        owc.getOpToRemove().add(new PcrOpWalkerCtx.OpToDeleteInfo(pop, fop));
154      } else if (wrapper.state != PcrExprProcFactory.WalkState.FALSE) {
155        fop.getConf().setPredicate(wrapper.outExpr);
156      } else {
157        LOG.warn("Filter passes no row");
158        fop.getConf().setPredicate(wrapper.outExpr);
159      }
160
161      return null;
162    }
163  }
164
165  /**
166   * Default processor which does nothing
167   */
168  public static class DefaultPCR implements NodeProcessor {
169
170    @Override
171    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
172        Object... nodeOutputs) throws SemanticException {
173      // Nothing needs to be done.
174      return null;
175    }
176  }
177
178  public static NodeProcessor getFilterProc() {
179    return new FilterPCR();
180  }
181
182  public static NodeProcessor getDefaultProc() {
183    return new DefaultPCR();
184  }
185
186  private PcrOpProcFactory() {
187    // prevent instantiation
188  }
189}