PageRenderTime 42ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java

#
Java | 248 lines | 162 code | 20 blank | 66 comment | 29 complexity | 156fda6ccd09ebc47c832ce26f66ef09 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.index;
  19. import java.util.ArrayList;
  20. import java.util.HashMap;
  21. import java.util.HashSet;
  22. import java.util.LinkedHashMap;
  23. import java.util.List;
  24. import java.util.Map;
  25. import java.util.Set;
  26. import java.util.Stack;
  27. import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
  28. import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
  29. import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
  30. import org.apache.hadoop.hive.ql.lib.Dispatcher;
  31. import org.apache.hadoop.hive.ql.lib.GraphWalker;
  32. import org.apache.hadoop.hive.ql.lib.Node;
  33. import org.apache.hadoop.hive.ql.lib.NodeProcessor;
  34. import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
  35. import org.apache.hadoop.hive.ql.lib.Rule;
  36. import org.apache.hadoop.hive.ql.parse.SemanticException;
  37. import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
  38. import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
  39. import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
  40. import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
  41. import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
  42. import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
  43. /**
  44. * IndexPredicateAnalyzer decomposes predicates, separating the parts
  45. * which can be satisfied by an index from the parts which cannot.
  46. * Currently, it only supports pure conjunctions over binary expressions
  47. * comparing a column reference with a constant value. It is assumed
  48. * that all column aliases encountered refer to the same table.
  49. */
  50. public class IndexPredicateAnalyzer
  51. {
  52. private Set<String> udfNames;
  53. private Set<String> allowedColumnNames;
  54. public IndexPredicateAnalyzer() {
  55. udfNames = new HashSet<String>();
  56. }
  57. /**
  58. * Registers a comparison operator as one which can be satisfied
  59. * by an index search. Unless this is called, analyzePredicate
  60. * will never find any indexable conditions.
  61. *
  62. * @param udfName name of comparison operator as returned
  63. * by either {@link GenericUDFBridge#getUdfName} (for simple UDF's)
  64. * or udf.getClass().getName() (for generic UDF's).
  65. */
  66. public void addComparisonOp(String udfName) {
  67. udfNames.add(udfName);
  68. }
  69. /**
  70. * Clears the set of column names allowed in comparisons. (Initially, all
  71. * column names are allowed.)
  72. */
  73. public void clearAllowedColumnNames() {
  74. allowedColumnNames = new HashSet<String>();
  75. }
  76. /**
  77. * Adds a column name to the set of column names allowed.
  78. *
  79. * @param columnName name of column to be allowed
  80. */
  81. public void allowColumnName(String columnName) {
  82. if (allowedColumnNames == null) {
  83. clearAllowedColumnNames();
  84. }
  85. allowedColumnNames.add(columnName);
  86. }
  87. /**
  88. * Analyzes a predicate.
  89. *
  90. * @param predicate predicate to be analyzed
  91. *
  92. * @param searchConditions receives conditions produced by analysis
  93. *
  94. * @return residual predicate which could not be translated to
  95. * searchConditions
  96. */
  97. public ExprNodeDesc analyzePredicate(
  98. ExprNodeDesc predicate,
  99. final List<IndexSearchCondition> searchConditions) {
  100. Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
  101. NodeProcessor nodeProcessor = new NodeProcessor() {
  102. @Override
  103. public Object process(Node nd, Stack<Node> stack,
  104. NodeProcessorCtx procCtx, Object... nodeOutputs)
  105. throws SemanticException {
  106. // We can only push down stuff which appears as part of
  107. // a pure conjunction: reject OR, CASE, etc.
  108. for (Node ancestor : stack) {
  109. if (nd == ancestor) {
  110. break;
  111. }
  112. if (!FunctionRegistry.isOpAnd((ExprNodeDesc) ancestor)) {
  113. return nd;
  114. }
  115. }
  116. return analyzeExpr((ExprNodeDesc) nd, searchConditions, nodeOutputs);
  117. }
  118. };
  119. Dispatcher disp = new DefaultRuleDispatcher(
  120. nodeProcessor, opRules, null);
  121. GraphWalker ogw = new DefaultGraphWalker(disp);
  122. ArrayList<Node> topNodes = new ArrayList<Node>();
  123. topNodes.add(predicate);
  124. HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
  125. try {
  126. ogw.startWalking(topNodes, nodeOutput);
  127. } catch (SemanticException ex) {
  128. throw new RuntimeException(ex);
  129. }
  130. ExprNodeDesc residualPredicate = (ExprNodeDesc) nodeOutput.get(predicate);
  131. return residualPredicate;
  132. }
  133. private ExprNodeDesc analyzeExpr(
  134. ExprNodeDesc expr,
  135. List<IndexSearchCondition> searchConditions,
  136. Object... nodeOutputs) {
  137. if (!(expr instanceof ExprNodeGenericFuncDesc)) {
  138. return expr;
  139. }
  140. if (FunctionRegistry.isOpAnd(expr)) {
  141. assert(nodeOutputs.length == 2);
  142. ExprNodeDesc residual1 = (ExprNodeDesc) nodeOutputs[0];
  143. ExprNodeDesc residual2 = (ExprNodeDesc) nodeOutputs[1];
  144. if (residual1 == null) {
  145. return residual2;
  146. }
  147. if (residual2 == null) {
  148. return residual1;
  149. }
  150. List<ExprNodeDesc> residuals = new ArrayList<ExprNodeDesc>();
  151. residuals.add(residual1);
  152. residuals.add(residual2);
  153. return new ExprNodeGenericFuncDesc(
  154. TypeInfoFactory.booleanTypeInfo,
  155. FunctionRegistry.getGenericUDFForAnd(),
  156. residuals);
  157. }
  158. String udfName;
  159. ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) expr;
  160. if (funcDesc.getGenericUDF() instanceof GenericUDFBridge) {
  161. GenericUDFBridge func = (GenericUDFBridge) funcDesc.getGenericUDF();
  162. udfName = func.getUdfName();
  163. } else {
  164. udfName = funcDesc.getGenericUDF().getClass().getName();
  165. }
  166. if (!udfNames.contains(udfName)) {
  167. return expr;
  168. }
  169. ExprNodeDesc child1 = (ExprNodeDesc) nodeOutputs[0];
  170. ExprNodeDesc child2 = (ExprNodeDesc) nodeOutputs[1];
  171. ExprNodeColumnDesc columnDesc = null;
  172. ExprNodeConstantDesc constantDesc = null;
  173. if ((child1 instanceof ExprNodeColumnDesc)
  174. && (child2 instanceof ExprNodeConstantDesc)) {
  175. // COL <op> CONSTANT
  176. columnDesc = (ExprNodeColumnDesc) child1;
  177. constantDesc = (ExprNodeConstantDesc) child2;
  178. } else if ((child2 instanceof ExprNodeColumnDesc)
  179. && (child1 instanceof ExprNodeConstantDesc)) {
  180. // CONSTANT <op> COL
  181. columnDesc = (ExprNodeColumnDesc) child2;
  182. constantDesc = (ExprNodeConstantDesc) child1;
  183. }
  184. if (columnDesc == null) {
  185. return expr;
  186. }
  187. if (allowedColumnNames != null) {
  188. if (!allowedColumnNames.contains(columnDesc.getColumn())) {
  189. return expr;
  190. }
  191. }
  192. searchConditions.add(
  193. new IndexSearchCondition(
  194. columnDesc,
  195. udfName,
  196. constantDesc,
  197. expr));
  198. // we converted the expression to a search condition, so
  199. // remove it from the residual predicate
  200. return null;
  201. }
  202. /**
  203. * Translates search conditions back to ExprNodeDesc form (as
  204. * a left-deep conjunction).
  205. *
  206. * @param searchConditions (typically produced by analyzePredicate)
  207. *
  208. * @return ExprNodeDesc form of search conditions
  209. */
  210. public ExprNodeDesc translateSearchConditions(
  211. List<IndexSearchCondition> searchConditions) {
  212. ExprNodeDesc expr = null;
  213. for (IndexSearchCondition searchCondition : searchConditions) {
  214. if (expr == null) {
  215. expr = searchCondition.getComparisonExpr();
  216. continue;
  217. }
  218. List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
  219. children.add(expr);
  220. children.add(searchCondition.getComparisonExpr());
  221. expr = new ExprNodeGenericFuncDesc(
  222. TypeInfoFactory.booleanTypeInfo,
  223. FunctionRegistry.getGenericUDFForAnd(),
  224. children);
  225. }
  226. return expr;
  227. }
  228. }