PageRenderTime 51ms CodeModel.GetById 14ms app.highlight 31ms RepoModel.GetById 1ms app.codeStats 1ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java

#
Java | 208 lines | 118 code | 35 blank | 55 comment | 8 complexity | 412a2e7b05ee2d7b09ef03407b6a2ad2 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.optimizer.lineage;
 20
 21import java.io.Serializable;
 22import java.util.ArrayList;
 23import java.util.HashMap;
 24import java.util.LinkedHashMap;
 25import java.util.LinkedHashSet;
 26import java.util.List;
 27import java.util.Map;
 28import java.util.Stack;
 29
 30import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 31import org.apache.hadoop.hive.ql.exec.Operator;
 32import org.apache.hadoop.hive.ql.hooks.LineageInfo;
 33import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo;
 34import org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency;
 35import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 36import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 37import org.apache.hadoop.hive.ql.lib.Dispatcher;
 38import org.apache.hadoop.hive.ql.lib.GraphWalker;
 39import org.apache.hadoop.hive.ql.lib.Node;
 40import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 41import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 42import org.apache.hadoop.hive.ql.lib.Rule;
 43import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 44import org.apache.hadoop.hive.ql.parse.SemanticException;
 45import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 46import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 47import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 48import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 49import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 50import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
 51
 52/**
 53 * Expression processor factory for lineage. Each processor is responsible to
 54 * create the leaf level column info objects that the expression depends upon
 55 * and also generates a string representation of the expression.
 56 */
 57public class ExprProcFactory {
 58
 59  /**
 60   * Processor for column expressions.
 61   */
 62  public static class ColumnExprProcessor implements NodeProcessor {
 63
 64    @Override
 65    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
 66        Object... nodeOutputs) throws SemanticException {
 67
 68      ExprNodeColumnDesc cd = (ExprNodeColumnDesc) nd;
 69      ExprProcCtx epc = (ExprProcCtx) procCtx;
 70
 71      // assert that the input operator is not null as there are no
 72      // exprs associated with table scans.
 73      assert (epc.getInputOperator() != null);
 74
 75      ColumnInfo inp_ci = null;
 76      for (ColumnInfo tmp_ci : epc.getInputOperator().getSchema()
 77          .getSignature()) {
 78        if (tmp_ci.getInternalName().equals(cd.getColumn())) {
 79          inp_ci = tmp_ci;
 80          break;
 81        }
 82      }
 83
 84      // Insert the dependencies of inp_ci to that of the current operator, ci
 85      LineageCtx lc = epc.getLineageCtx();
 86      Dependency dep = lc.getIndex().getDependency(epc.getInputOperator(), inp_ci);
 87
 88      return dep;
 89    }
 90
 91  }
 92
 93  /**
 94   * Processor for any function or field expression.
 95   */
 96  public static class GenericExprProcessor implements NodeProcessor {
 97
 98    @Override
 99    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
100        Object... nodeOutputs) throws SemanticException {
101
102      assert (nd instanceof ExprNodeGenericFuncDesc || nd instanceof ExprNodeFieldDesc);
103
104      // Concatenate the dependencies of all the children to compute the new
105      // dependency.
106      Dependency dep = new Dependency();
107
108      LinkedHashSet<BaseColumnInfo> bci_set = new LinkedHashSet<BaseColumnInfo>();
109      LineageInfo.DependencyType new_type = LineageInfo.DependencyType.EXPRESSION;
110
111      for (Object child : nodeOutputs) {
112        if (child == null) {
113          continue;
114        }
115
116        Dependency child_dep = (Dependency) child;
117        new_type = LineageCtx.getNewDependencyType(child_dep.getType(), new_type);
118        bci_set.addAll(child_dep.getBaseCols());
119      }
120
121      dep.setBaseCols(new ArrayList<BaseColumnInfo>(bci_set));
122      dep.setType(new_type);
123
124      return dep;
125    }
126
127  }
128
129  /**
130   * Processor for constants and null expressions. For such expressions the
131   * processor simply returns a null dependency vector.
132   */
133  public static class DefaultExprProcessor implements NodeProcessor {
134
135    @Override
136    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
137        Object... nodeOutputs) throws SemanticException {
138      assert (nd instanceof ExprNodeConstantDesc || nd instanceof ExprNodeNullDesc);
139
140      // Create a dependency that has no basecols
141      Dependency dep = new Dependency();
142      dep.setType(LineageInfo.DependencyType.SIMPLE);
143      dep.setBaseCols(new ArrayList<BaseColumnInfo>());
144      return dep;
145    }
146  }
147
148  public static NodeProcessor getDefaultExprProcessor() {
149    return new DefaultExprProcessor();
150  }
151
152  public static NodeProcessor getGenericFuncProcessor() {
153    return new GenericExprProcessor();
154  }
155
156  public static NodeProcessor getFieldProcessor() {
157    return new GenericExprProcessor();
158  }
159
160  public static NodeProcessor getColumnProcessor() {
161    return new ColumnExprProcessor();
162  }
163
164  /**
165   * Gets the expression dependencies for the expression.
166   *
167   * @param lctx
168   *          The lineage context containing the input operators dependencies.
169   * @param inpOp
170   *          The input operator to the current operator.
171   * @param expr
172   *          The expression that is being processed.
173   * @throws SemanticException
174   */
175  public static Dependency getExprDependency(LineageCtx lctx,
176      Operator<? extends Serializable> inpOp, ExprNodeDesc expr)
177      throws SemanticException {
178
179    // Create the walker, the rules dispatcher and the context.
180    ExprProcCtx exprCtx = new ExprProcCtx(lctx, inpOp);
181
182    // create a walker which walks the tree in a DFS manner while maintaining
183    // the operator stack. The dispatcher
184    // generates the plan from the operator tree
185    Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>();
186    exprRules.put(
187        new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"),
188        getColumnProcessor());
189    exprRules.put(
190        new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"),
191        getFieldProcessor());
192    exprRules.put(new RuleRegExp("R3", ExprNodeGenericFuncDesc.class.getName()
193        + "%"), getGenericFuncProcessor());
194
195    // The dispatcher fires the processor corresponding to the closest matching
196    // rule and passes the context along
197    Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
198        exprRules, exprCtx);
199    GraphWalker egw = new DefaultGraphWalker(disp);
200
201    List<Node> startNodes = new ArrayList<Node>();
202    startNodes.add(expr);
203
204    HashMap<Node, Object> outputMap = new HashMap<Node, Object>();
205    egw.startWalking(startNodes, outputMap);
206    return (Dependency)outputMap.get(expr);
207  }
208}