PageRenderTime 50ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java

#
Java | 208 lines | 118 code | 35 blank | 55 comment | 8 complexity | 412a2e7b05ee2d7b09ef03407b6a2ad2 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.optimizer.lineage;
  19. import java.io.Serializable;
  20. import java.util.ArrayList;
  21. import java.util.HashMap;
  22. import java.util.LinkedHashMap;
  23. import java.util.LinkedHashSet;
  24. import java.util.List;
  25. import java.util.Map;
  26. import java.util.Stack;
  27. import org.apache.hadoop.hive.ql.exec.ColumnInfo;
  28. import org.apache.hadoop.hive.ql.exec.Operator;
  29. import org.apache.hadoop.hive.ql.hooks.LineageInfo;
  30. import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo;
  31. import org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency;
  32. import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
  33. import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
  34. import org.apache.hadoop.hive.ql.lib.Dispatcher;
  35. import org.apache.hadoop.hive.ql.lib.GraphWalker;
  36. import org.apache.hadoop.hive.ql.lib.Node;
  37. import org.apache.hadoop.hive.ql.lib.NodeProcessor;
  38. import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
  39. import org.apache.hadoop.hive.ql.lib.Rule;
  40. import org.apache.hadoop.hive.ql.lib.RuleRegExp;
  41. import org.apache.hadoop.hive.ql.parse.SemanticException;
  42. import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
  43. import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
  44. import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
  45. import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
  46. import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
  47. import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
  48. /**
  49. * Expression processor factory for lineage. Each processor is responsible to
  50. * create the leaf level column info objects that the expression depends upon
  51. * and also generates a string representation of the expression.
  52. */
  53. public class ExprProcFactory {
  54. /**
  55. * Processor for column expressions.
  56. */
  57. public static class ColumnExprProcessor implements NodeProcessor {
  58. @Override
  59. public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
  60. Object... nodeOutputs) throws SemanticException {
  61. ExprNodeColumnDesc cd = (ExprNodeColumnDesc) nd;
  62. ExprProcCtx epc = (ExprProcCtx) procCtx;
  63. // assert that the input operator is not null as there are no
  64. // exprs associated with table scans.
  65. assert (epc.getInputOperator() != null);
  66. ColumnInfo inp_ci = null;
  67. for (ColumnInfo tmp_ci : epc.getInputOperator().getSchema()
  68. .getSignature()) {
  69. if (tmp_ci.getInternalName().equals(cd.getColumn())) {
  70. inp_ci = tmp_ci;
  71. break;
  72. }
  73. }
  74. // Insert the dependencies of inp_ci to that of the current operator, ci
  75. LineageCtx lc = epc.getLineageCtx();
  76. Dependency dep = lc.getIndex().getDependency(epc.getInputOperator(), inp_ci);
  77. return dep;
  78. }
  79. }
  80. /**
  81. * Processor for any function or field expression.
  82. */
  83. public static class GenericExprProcessor implements NodeProcessor {
  84. @Override
  85. public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
  86. Object... nodeOutputs) throws SemanticException {
  87. assert (nd instanceof ExprNodeGenericFuncDesc || nd instanceof ExprNodeFieldDesc);
  88. // Concatenate the dependencies of all the children to compute the new
  89. // dependency.
  90. Dependency dep = new Dependency();
  91. LinkedHashSet<BaseColumnInfo> bci_set = new LinkedHashSet<BaseColumnInfo>();
  92. LineageInfo.DependencyType new_type = LineageInfo.DependencyType.EXPRESSION;
  93. for (Object child : nodeOutputs) {
  94. if (child == null) {
  95. continue;
  96. }
  97. Dependency child_dep = (Dependency) child;
  98. new_type = LineageCtx.getNewDependencyType(child_dep.getType(), new_type);
  99. bci_set.addAll(child_dep.getBaseCols());
  100. }
  101. dep.setBaseCols(new ArrayList<BaseColumnInfo>(bci_set));
  102. dep.setType(new_type);
  103. return dep;
  104. }
  105. }
  106. /**
  107. * Processor for constants and null expressions. For such expressions the
  108. * processor simply returns a null dependency vector.
  109. */
  110. public static class DefaultExprProcessor implements NodeProcessor {
  111. @Override
  112. public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
  113. Object... nodeOutputs) throws SemanticException {
  114. assert (nd instanceof ExprNodeConstantDesc || nd instanceof ExprNodeNullDesc);
  115. // Create a dependency that has no basecols
  116. Dependency dep = new Dependency();
  117. dep.setType(LineageInfo.DependencyType.SIMPLE);
  118. dep.setBaseCols(new ArrayList<BaseColumnInfo>());
  119. return dep;
  120. }
  121. }
  122. public static NodeProcessor getDefaultExprProcessor() {
  123. return new DefaultExprProcessor();
  124. }
  125. public static NodeProcessor getGenericFuncProcessor() {
  126. return new GenericExprProcessor();
  127. }
  128. public static NodeProcessor getFieldProcessor() {
  129. return new GenericExprProcessor();
  130. }
  131. public static NodeProcessor getColumnProcessor() {
  132. return new ColumnExprProcessor();
  133. }
  134. /**
  135. * Gets the expression dependencies for the expression.
  136. *
  137. * @param lctx
  138. * The lineage context containing the input operators dependencies.
  139. * @param inpOp
  140. * The input operator to the current operator.
  141. * @param expr
  142. * The expression that is being processed.
  143. * @throws SemanticException
  144. */
  145. public static Dependency getExprDependency(LineageCtx lctx,
  146. Operator<? extends Serializable> inpOp, ExprNodeDesc expr)
  147. throws SemanticException {
  148. // Create the walker, the rules dispatcher and the context.
  149. ExprProcCtx exprCtx = new ExprProcCtx(lctx, inpOp);
  150. // create a walker which walks the tree in a DFS manner while maintaining
  151. // the operator stack. The dispatcher
  152. // generates the plan from the operator tree
  153. Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>();
  154. exprRules.put(
  155. new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"),
  156. getColumnProcessor());
  157. exprRules.put(
  158. new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"),
  159. getFieldProcessor());
  160. exprRules.put(new RuleRegExp("R3", ExprNodeGenericFuncDesc.class.getName()
  161. + "%"), getGenericFuncProcessor());
  162. // The dispatcher fires the processor corresponding to the closest matching
  163. // rule and passes the context along
  164. Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
  165. exprRules, exprCtx);
  166. GraphWalker egw = new DefaultGraphWalker(disp);
  167. List<Node> startNodes = new ArrayList<Node>();
  168. startNodes.add(expr);
  169. HashMap<Node, Object> outputMap = new HashMap<Node, Object>();
  170. egw.startWalking(startNodes, outputMap);
  171. return (Dependency)outputMap.get(expr);
  172. }
  173. }