PageRenderTime 41ms CodeModel.GetById 13ms app.highlight 22ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java

#
Java | 248 lines | 162 code | 20 blank | 66 comment | 29 complexity | 156fda6ccd09ebc47c832ce26f66ef09 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18package org.apache.hadoop.hive.ql.index;
 19
 20import java.util.ArrayList;
 21import java.util.HashMap;
 22import java.util.HashSet;
 23import java.util.LinkedHashMap;
 24import java.util.List;
 25import java.util.Map;
 26import java.util.Set;
 27import java.util.Stack;
 28
 29import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 30import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 31import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 32import org.apache.hadoop.hive.ql.lib.Dispatcher;
 33import org.apache.hadoop.hive.ql.lib.GraphWalker;
 34import org.apache.hadoop.hive.ql.lib.Node;
 35import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 36import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 37import org.apache.hadoop.hive.ql.lib.Rule;
 38import org.apache.hadoop.hive.ql.parse.SemanticException;
 39import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 40import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 41import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 42import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 43import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 44import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
 45
 46/**
 47 * IndexPredicateAnalyzer decomposes predicates, separating the parts
 48 * which can be satisfied by an index from the parts which cannot.
 49 * Currently, it only supports pure conjunctions over binary expressions
 50 * comparing a column reference with a constant value.  It is assumed
 51 * that all column aliases encountered refer to the same table.
 52 */
 53public class IndexPredicateAnalyzer
 54{
 55  private Set<String> udfNames;
 56
 57  private Set<String> allowedColumnNames;
 58  
 59  public IndexPredicateAnalyzer() {
 60    udfNames = new HashSet<String>();
 61  }
 62
 63  /**
 64   * Registers a comparison operator as one which can be satisfied
 65   * by an index search.  Unless this is called, analyzePredicate
 66   * will never find any indexable conditions.
 67   *
 68   * @param udfName name of comparison operator as returned
 69   * by either {@link GenericUDFBridge#getUdfName} (for simple UDF's)
 70   * or udf.getClass().getName() (for generic UDF's).
 71   */
 72  public void addComparisonOp(String udfName) {
 73    udfNames.add(udfName);
 74  }
 75
 76  /**
 77   * Clears the set of column names allowed in comparisons.  (Initially, all
 78   * column names are allowed.)
 79   */
 80  public void clearAllowedColumnNames() {
 81    allowedColumnNames = new HashSet<String>();
 82  }
 83
 84  /**
 85   * Adds a column name to the set of column names allowed.
 86   *
 87   * @param columnName name of column to be allowed
 88   */
 89  public void allowColumnName(String columnName) {
 90    if (allowedColumnNames == null) {
 91      clearAllowedColumnNames();
 92    }
 93    allowedColumnNames.add(columnName);
 94  }
 95
 96  /**
 97   * Analyzes a predicate.
 98   *
 99   * @param predicate predicate to be analyzed
100   *
101   * @param searchConditions receives conditions produced by analysis
102   *
103   * @return residual predicate which could not be translated to
104   * searchConditions
105   */
106  public ExprNodeDesc analyzePredicate(
107    ExprNodeDesc predicate,
108    final List<IndexSearchCondition> searchConditions) {
109
110    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
111    NodeProcessor nodeProcessor = new NodeProcessor() {
112      @Override
113      public Object process(Node nd, Stack<Node> stack,
114        NodeProcessorCtx procCtx, Object... nodeOutputs)
115        throws SemanticException {
116
117        // We can only push down stuff which appears as part of
118        // a pure conjunction:  reject OR, CASE, etc.
119        for (Node ancestor : stack) {
120          if (nd == ancestor) {
121            break;
122          }
123          if (!FunctionRegistry.isOpAnd((ExprNodeDesc) ancestor)) {
124            return nd;
125          }
126        }
127
128        return analyzeExpr((ExprNodeDesc) nd, searchConditions, nodeOutputs);
129      }
130    };
131
132    Dispatcher disp = new DefaultRuleDispatcher(
133      nodeProcessor, opRules, null);
134    GraphWalker ogw = new DefaultGraphWalker(disp);
135    ArrayList<Node> topNodes = new ArrayList<Node>();
136    topNodes.add(predicate);
137    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
138    try {
139      ogw.startWalking(topNodes, nodeOutput);
140    } catch (SemanticException ex) {
141      throw new RuntimeException(ex);
142    }
143    ExprNodeDesc residualPredicate = (ExprNodeDesc) nodeOutput.get(predicate);
144    return residualPredicate;
145  }
146
147  private ExprNodeDesc analyzeExpr(
148    ExprNodeDesc expr,
149    List<IndexSearchCondition> searchConditions,
150    Object... nodeOutputs) {
151
152    if (!(expr instanceof ExprNodeGenericFuncDesc)) {
153      return expr;
154    }
155    if (FunctionRegistry.isOpAnd(expr)) {
156      assert(nodeOutputs.length == 2);
157      ExprNodeDesc residual1 = (ExprNodeDesc) nodeOutputs[0];
158      ExprNodeDesc residual2 = (ExprNodeDesc) nodeOutputs[1];
159      if (residual1 == null) {
160        return residual2;
161      }
162      if (residual2 == null) {
163        return residual1;
164      }
165      List<ExprNodeDesc> residuals = new ArrayList<ExprNodeDesc>();
166      residuals.add(residual1);
167      residuals.add(residual2);
168      return new ExprNodeGenericFuncDesc(
169        TypeInfoFactory.booleanTypeInfo,
170        FunctionRegistry.getGenericUDFForAnd(),
171        residuals);
172    }
173
174    String udfName;
175    ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) expr;
176    if (funcDesc.getGenericUDF() instanceof GenericUDFBridge) {
177      GenericUDFBridge func = (GenericUDFBridge) funcDesc.getGenericUDF();
178      udfName = func.getUdfName();
179    } else {
180      udfName = funcDesc.getGenericUDF().getClass().getName();
181    }
182    if (!udfNames.contains(udfName)) {
183      return expr;
184    }
185
186    ExprNodeDesc child1 = (ExprNodeDesc) nodeOutputs[0];
187    ExprNodeDesc child2 = (ExprNodeDesc) nodeOutputs[1];
188    ExprNodeColumnDesc columnDesc = null;
189    ExprNodeConstantDesc constantDesc = null;
190    if ((child1 instanceof ExprNodeColumnDesc)
191      && (child2 instanceof ExprNodeConstantDesc)) {
192      // COL <op> CONSTANT
193      columnDesc = (ExprNodeColumnDesc) child1;
194      constantDesc = (ExprNodeConstantDesc) child2;
195    } else if ((child2 instanceof ExprNodeColumnDesc)
196      && (child1 instanceof ExprNodeConstantDesc)) {
197      // CONSTANT <op> COL
198      columnDesc = (ExprNodeColumnDesc) child2;
199      constantDesc = (ExprNodeConstantDesc) child1;
200    }
201    if (columnDesc == null) {
202      return expr;
203    }
204    if (allowedColumnNames != null) {
205      if (!allowedColumnNames.contains(columnDesc.getColumn())) {
206        return expr;
207      }
208    }
209    searchConditions.add(
210      new IndexSearchCondition(
211        columnDesc,
212        udfName,
213        constantDesc,
214        expr));
215
216    // we converted the expression to a search condition, so
217    // remove it from the residual predicate
218    return null;
219  }
220
221  /**
222   * Translates search conditions back to ExprNodeDesc form (as
223   * a left-deep conjunction).
224   *
225   * @param searchConditions (typically produced by analyzePredicate)
226   *
227   * @return ExprNodeDesc form of search conditions
228   */
229  public ExprNodeDesc translateSearchConditions(
230    List<IndexSearchCondition> searchConditions) {
231
232    ExprNodeDesc expr = null;
233    for (IndexSearchCondition searchCondition : searchConditions) {
234      if (expr == null) {
235        expr = searchCondition.getComparisonExpr();
236        continue;
237      }
238      List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
239      children.add(expr);
240      children.add(searchCondition.getComparisonExpr());
241      expr = new ExprNodeGenericFuncDesc(
242        TypeInfoFactory.booleanTypeInfo,
243        FunctionRegistry.getGenericUDFForAnd(),
244        children);
245    }
246    return expr;
247  }
248}