PageRenderTime 51ms CodeModel.GetById 11ms app.highlight 34ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java

#
Java | 543 lines | 368 code | 47 blank | 128 comment | 77 complexity | 9664a05a34c0860216b1ea9e462bebf4 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18package org.apache.hadoop.hive.ql.ppd;
 19
 20import java.io.Serializable;
 21import java.util.ArrayList;
 22import java.util.HashSet;
 23import java.util.Iterator;
 24import java.util.List;
 25import java.util.Map;
 26import java.util.Set;
 27import java.util.Stack;
 28import java.util.Map.Entry;
 29
 30import org.apache.commons.logging.Log;
 31import org.apache.commons.logging.LogFactory;
 32import org.apache.hadoop.hive.conf.HiveConf;
 33import org.apache.hadoop.hive.ql.exec.FilterOperator;
 34import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 35import org.apache.hadoop.hive.ql.exec.JoinOperator;
 36import org.apache.hadoop.hive.ql.exec.Operator;
 37import org.apache.hadoop.hive.ql.exec.OperatorFactory;
 38import org.apache.hadoop.hive.ql.exec.RowSchema;
 39import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 40import org.apache.hadoop.hive.ql.exec.Utilities;
 41import org.apache.hadoop.hive.ql.lib.Node;
 42import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 43import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 44import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
 45import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
 46import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 47import org.apache.hadoop.hive.ql.metadata.Table;
 48import org.apache.hadoop.hive.ql.parse.OpParseContext;
 49import org.apache.hadoop.hive.ql.parse.RowResolver;
 50import org.apache.hadoop.hive.ql.parse.SemanticException;
 51import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 52import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 53import org.apache.hadoop.hive.ql.plan.FilterDesc;
 54import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
 55import org.apache.hadoop.hive.ql.plan.JoinDesc;
 56import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 57import org.apache.hadoop.hive.serde2.Deserializer;
 58import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 59import org.apache.hadoop.mapred.JobConf;
 60
 61/**
 62 * Operator factory for predicate pushdown processing of operator graph Each
 63 * operator determines the pushdown predicates by walking the expression tree.
 64 * Each operator merges its own pushdown predicates with those of its children
 65 * Finally the TableScan operator gathers all the predicates and inserts a
 66 * filter operator after itself. TODO: Further optimizations 1) Multi-insert
 67 * case 2) Create a filter operator for those predicates that couldn't be pushed
 68 * to the previous operators in the data flow 3) Merge multiple sequential
 69 * filter predicates into so that plans are more readable 4) Remove predicates
 70 * from filter operators that have been pushed. Currently these pushed
 71 * predicates are evaluated twice.
 72 */
 73public final class OpProcFactory {
 74
 75  protected static final Log LOG = LogFactory.getLog(OpProcFactory.class
 76    .getName());
 77
 78  /**
 79   * Processor for Script Operator Prevents any predicates being pushed.
 80   */
 81  public static class ScriptPPD extends DefaultPPD implements NodeProcessor {
 82
 83    @Override
 84    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
 85        Object... nodeOutputs) throws SemanticException {
 86      LOG.info("Processing for " + nd.getName() + "("
 87          + ((Operator) nd).getIdentifier() + ")");
 88      // script operator is a black-box to hive so no optimization here
 89      // assuming that nothing can be pushed above the script op
 90      // same with LIMIT op
 91      return null;
 92    }
 93
 94  }
 95
 96  public static class LateralViewForwardPPD extends DefaultPPD implements NodeProcessor {
 97
 98    @Override
 99    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
100        Object... nodeOutputs) throws SemanticException {
101      LOG.info("Processing for " + nd.getName() + "("
102          + ((Operator) nd).getIdentifier() + ")");
103      OpWalkerInfo owi = (OpWalkerInfo) procCtx;
104
105      ExprWalkerInfo childPreds = owi
106      .getPrunedPreds((Operator<? extends Serializable>) nd.getChildren()
107      .get(0));
108
109      owi.putPrunedPreds((Operator<? extends Serializable>) nd, childPreds);
110      return null;
111    }
112
113  }
114
115  /**
116   * Combines predicates of its child into a single expression and adds a filter
117   * op as new child.
118   */
119  public static class TableScanPPD extends DefaultPPD implements NodeProcessor {
120
121    @Override
122    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
123        Object... nodeOutputs) throws SemanticException {
124      LOG.info("Processing for " + nd.getName() + "("
125          + ((Operator) nd).getIdentifier() + ")");
126      OpWalkerInfo owi = (OpWalkerInfo) procCtx;
127      TableScanOperator tsOp = (TableScanOperator) nd;
128      mergeWithChildrenPred(tsOp, owi, null, null, false);
129      ExprWalkerInfo pushDownPreds = owi.getPrunedPreds(tsOp);
130      return createFilter(tsOp, pushDownPreds, owi);
131    }
132
133  }
134
135  /**
136   * Determines the push down predicates in its where expression and then
137   * combines it with the push down predicates that are passed from its children.
138   */
139  public static class FilterPPD extends DefaultPPD implements NodeProcessor {
140
141    @Override
142    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
143        Object... nodeOutputs) throws SemanticException {
144      LOG.info("Processing for " + nd.getName() + "("
145          + ((Operator) nd).getIdentifier() + ")");
146      OpWalkerInfo owi = (OpWalkerInfo) procCtx;
147      Operator<? extends Serializable> op = (Operator<? extends Serializable>) nd;
148      ExprNodeDesc predicate = (((FilterOperator) nd).getConf()).getPredicate();
149      // get pushdown predicates for this operator's predicate
150      ExprWalkerInfo ewi = ExprWalkerProcFactory.extractPushdownPreds(owi, op,
151          predicate);
152      if (!ewi.isDeterministic()) {
153        /* predicate is not deterministic */
154        if (op.getChildren() != null && op.getChildren().size() == 1) {
155          createFilter(op, owi
156              .getPrunedPreds((Operator<? extends Serializable>) (op
157              .getChildren().get(0))), owi);
158        }
159
160        return null;
161      }
162
163      logExpr(nd, ewi);
164      owi.putPrunedPreds(op, ewi);
165      // merge it with children predicates
166      mergeWithChildrenPred(op, owi, ewi, null, false);
167
168      return null;
169    }
170  }
171
172  /**
173   * Determines predicates for which alias can be pushed to it's parents. See
174   * the comments for getQualifiedAliases function.
175   */
176  public static class JoinPPD extends DefaultPPD implements NodeProcessor {
177    @Override
178    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
179        Object... nodeOutputs) throws SemanticException {
180      LOG.info("Processing for " + nd.getName() + "("
181          + ((Operator) nd).getIdentifier() + ")");
182      OpWalkerInfo owi = (OpWalkerInfo) procCtx;
183      Set<String> aliases = getQualifiedAliases((JoinOperator) nd, owi
184          .getRowResolver(nd));
185      mergeWithChildrenPred(nd, owi, null, aliases, false);
186      return null;
187    }
188
189    /**
190     * Figures out the aliases for whom it is safe to push predicates based on
191     * ANSI SQL semantics For inner join, all predicates for all aliases can be
192     * pushed For full outer join, none of the predicates can be pushed as that
193     * would limit the number of rows for join For left outer join, all the
194     * predicates on the left side aliases can be pushed up For right outer
195     * join, all the predicates on the right side aliases can be pushed up Joins
196     * chain containing both left and right outer joins are treated as full
197     * outer join. TODO: further optimization opportunity for the case a.c1 =
198     * b.c1 and b.c2 = c.c2 a and b are first joined and then the result with c.
199     * But the second join op currently treats a and b as separate aliases and
200     * thus disallowing predicate expr containing both tables a and b (such as
201     * a.c3 + a.c4 > 20). Such predicates also can be pushed just above the
202     * second join and below the first join
203     *
204     * @param op
205     *          Join Operator
206     * @param rr
207     *          Row resolver
208     * @return set of qualified aliases
209     */
210    private Set<String> getQualifiedAliases(JoinOperator op, RowResolver rr) {
211      Set<String> aliases = new HashSet<String>();
212      int loj = Integer.MAX_VALUE;
213      int roj = -1;
214      boolean oj = false;
215      JoinCondDesc[] conds = op.getConf().getConds();
216      Map<Integer, Set<String>> posToAliasMap = op.getPosToAliasMap();
217      for (JoinCondDesc jc : conds) {
218        if (jc.getType() == JoinDesc.FULL_OUTER_JOIN) {
219          oj = true;
220          break;
221        } else if (jc.getType() == JoinDesc.LEFT_OUTER_JOIN) {
222          if (jc.getLeft() < loj) {
223            loj = jc.getLeft();
224          }
225        } else if (jc.getType() == JoinDesc.RIGHT_OUTER_JOIN) {
226          if (jc.getRight() > roj) {
227            roj = jc.getRight();
228          }
229        }
230      }
231      if (oj || (loj != Integer.MAX_VALUE && roj != -1)) {
232        return aliases;
233      }
234      for (Entry<Integer, Set<String>> pa : posToAliasMap.entrySet()) {
235        if (loj != Integer.MAX_VALUE) {
236          if (pa.getKey() <= loj) {
237            aliases.addAll(pa.getValue());
238          }
239        } else if (roj != -1) {
240          if (pa.getKey() >= roj) {
241            aliases.addAll(pa.getValue());
242          }
243        } else {
244          aliases.addAll(pa.getValue());
245        }
246      }
247      Set<String> aliases2 = rr.getTableNames();
248      aliases.retainAll(aliases2);
249      return aliases;
250    }
251  }
252
253  /**
254   * Processor for ReduceSink operator.
255   *
256   */
257  public static class ReduceSinkPPD extends DefaultPPD implements NodeProcessor {
258    @Override
259    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
260        Object... nodeOutputs) throws SemanticException {
261      LOG.info("Processing for " + nd.getName() + "("
262          + ((Operator) nd).getIdentifier() + ")");
263      OpWalkerInfo owi = (OpWalkerInfo) procCtx;
264      Set<String> aliases = owi.getRowResolver(nd).getTableNames();
265      boolean ignoreAliases = false;
266      if (aliases.size() == 1 && aliases.contains("")) {
267        // Reduce sink of group by operator
268        ignoreAliases = true;
269      }
270      mergeWithChildrenPred(nd, owi, null, aliases, ignoreAliases);
271      return null;
272    }
273
274  }
275
276  /**
277   * Default processor which just merges its children.
278   */
279  public static class DefaultPPD implements NodeProcessor {
280
281    @Override
282    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
283        Object... nodeOutputs) throws SemanticException {
284      LOG.info("Processing for " + nd.getName() + "("
285          + ((Operator) nd).getIdentifier() + ")");
286      mergeWithChildrenPred(nd, (OpWalkerInfo) procCtx, null, null, false);
287      return null;
288    }
289
290    /**
291     * @param nd
292     * @param ewi
293     */
294    protected void logExpr(Node nd, ExprWalkerInfo ewi) {
295      for (Entry<String, List<ExprNodeDesc>> e : ewi.getFinalCandidates()
296          .entrySet()) {
297        LOG.info("Pushdown Predicates of " + nd.getName() + " For Alias : "
298            + e.getKey());
299        for (ExprNodeDesc n : e.getValue()) {
300          LOG.info("\t" + n.getExprString());
301        }
302      }
303    }
304
305    /**
306     * Take current operators pushdown predicates and merges them with
307     * children's pushdown predicates.
308     *
309     * @param nd
310     *          current operator
311     * @param owi
312     *          operator context during this walk
313     * @param ewi
314     *          pushdown predicates (part of expression walker info)
315     * @param aliases
316     *          aliases that this operator can pushdown. null means that all
317     *          aliases can be pushed down
318     * @param ignoreAliases
319     * @throws SemanticException
320     */
321    protected void mergeWithChildrenPred(Node nd, OpWalkerInfo owi,
322        ExprWalkerInfo ewi, Set<String> aliases, boolean ignoreAliases)
323        throws SemanticException {
324      if (nd.getChildren() == null || nd.getChildren().size() > 1) {
325        // ppd for multi-insert query is not yet implemented
326        // no-op for leafs
327        return;
328      }
329      Operator<? extends Serializable> op = (Operator<? extends Serializable>) nd;
330      ExprWalkerInfo childPreds = owi
331          .getPrunedPreds((Operator<? extends Serializable>) nd.getChildren()
332          .get(0));
333      if (childPreds == null) {
334        return;
335      }
336      if (ewi == null) {
337        ewi = new ExprWalkerInfo();
338      }
339      for (Entry<String, List<ExprNodeDesc>> e : childPreds
340          .getFinalCandidates().entrySet()) {
341        if (ignoreAliases || aliases == null || aliases.contains(e.getKey())
342            || e.getKey() == null) {
343          // e.getKey() (alias) can be null in case of constant expressions. see
344          // input8.q
345          ExprWalkerInfo extractPushdownPreds = ExprWalkerProcFactory
346              .extractPushdownPreds(owi, op, e.getValue());
347          ewi.merge(extractPushdownPreds);
348          logExpr(nd, extractPushdownPreds);
349        }
350      }
351      owi.putPrunedPreds((Operator<? extends Serializable>) nd, ewi);
352    }
353  }
354
355  protected static Object createFilter(Operator op,
356      ExprWalkerInfo pushDownPreds, OpWalkerInfo owi) {
357    if (pushDownPreds == null || pushDownPreds.getFinalCandidates() == null
358        || pushDownPreds.getFinalCandidates().size() == 0) {
359      return null;
360    }
361
362    RowResolver inputRR = owi.getRowResolver(op);
363
364    // combine all predicates into a single expression
365    List<ExprNodeDesc> preds = null;
366    ExprNodeDesc condn = null;
367    Iterator<List<ExprNodeDesc>> iterator = pushDownPreds.getFinalCandidates()
368        .values().iterator();
369    while (iterator.hasNext()) {
370      preds = iterator.next();
371      int i = 0;
372      if (condn == null) {
373        condn = preds.get(0);
374        i++;
375      }
376
377      for (; i < preds.size(); i++) {
378        List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(2);
379        children.add(condn);
380        children.add(preds.get(i));
381        condn = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
382            FunctionRegistry.getGenericUDFForAnd(), children);
383      }
384    }
385
386    if (condn == null) {
387      return null;
388    }
389    
390    if (op instanceof TableScanOperator) {
391      boolean pushFilterToStorage;
392      HiveConf hiveConf = owi.getParseContext().getConf();
393      pushFilterToStorage =
394        hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_STORAGE);
395      if (pushFilterToStorage) {
396        condn = pushFilterToStorageHandler(
397          (TableScanOperator) op,
398          condn,
399          owi,
400          hiveConf);
401        if (condn == null) {
402          // we pushed the whole thing down
403          return null;
404        }
405      }
406    }
407
408    // add new filter op
409    List<Operator<? extends Serializable>> originalChilren = op
410        .getChildOperators();
411    op.setChildOperators(null);
412    Operator<FilterDesc> output = OperatorFactory.getAndMakeChild(
413        new FilterDesc(condn, false), new RowSchema(inputRR.getColumnInfos()),
414        op);
415    output.setChildOperators(originalChilren);
416    for (Operator<? extends Serializable> ch : originalChilren) {
417      List<Operator<? extends Serializable>> parentOperators = ch
418          .getParentOperators();
419      int pos = parentOperators.indexOf(op);
420      assert pos != -1;
421      parentOperators.remove(pos);
422      parentOperators.add(pos, output); // add the new op as the old
423    }
424    OpParseContext ctx = new OpParseContext(inputRR);
425    owi.put(output, ctx);
426    return output;
427  }
428
429  /**
430   * Attempts to push a predicate down into a storage handler.  For
431   * native tables, this is a no-op.
432   *
433   * @param tableScanOp table scan against which predicate applies
434   *
435   * @param originalPredicate predicate to be pushed down
436   *
437   * @param owi object walk info
438   *
439   * @param hiveConf Hive configuration
440   *
441   * @return portion of predicate which needs to be evaluated
442   * by Hive as a post-filter, or null if it was possible
443   * to push down the entire predicate
444   */
445  private static ExprNodeDesc pushFilterToStorageHandler(
446    TableScanOperator tableScanOp,
447    ExprNodeDesc originalPredicate,
448    OpWalkerInfo owi,
449    HiveConf hiveConf) {
450
451    TableScanDesc tableScanDesc = tableScanOp.getConf();
452    Table tbl = owi.getParseContext().getTopToTable().get(tableScanOp);
453    if (!tbl.isNonNative()) {
454      return originalPredicate;
455    }
456    HiveStorageHandler storageHandler = tbl.getStorageHandler();
457    if (!(storageHandler instanceof HiveStoragePredicateHandler)) {
458      // The storage handler does not provide predicate decomposition
459      // support, so we'll implement the entire filter in Hive.  However,
460      // we still provide the full predicate to the storage handler in
461      // case it wants to do any of its own prefiltering.
462      tableScanDesc.setFilterExpr(originalPredicate);
463      return originalPredicate;
464    }
465    HiveStoragePredicateHandler predicateHandler =
466      (HiveStoragePredicateHandler) storageHandler;
467    JobConf jobConf = new JobConf(owi.getParseContext().getConf());
468    Utilities.setColumnNameList(jobConf, tableScanOp);
469    Utilities.copyTableJobPropertiesToConf(
470      Utilities.getTableDesc(tbl),
471      jobConf);
472    Deserializer deserializer = tbl.getDeserializer();
473    HiveStoragePredicateHandler.DecomposedPredicate decomposed =
474      predicateHandler.decomposePredicate(
475        jobConf,
476        deserializer,
477        originalPredicate);
478    if (decomposed == null) {
479      // not able to push anything down
480      if (LOG.isDebugEnabled()) {
481        LOG.debug("No pushdown possible for predicate:  "
482          + originalPredicate.getExprString());
483      }
484      return originalPredicate;
485    }
486    if (LOG.isDebugEnabled()) {
487      LOG.debug("Original predicate:  "
488        + originalPredicate.getExprString());
489      if (decomposed.pushedPredicate != null) {
490        LOG.debug(
491          "Pushed predicate:  "
492          + decomposed.pushedPredicate.getExprString());
493      }
494      if (decomposed.residualPredicate != null) {
495        LOG.debug(
496          "Residual predicate:  "
497          + decomposed.residualPredicate.getExprString());
498      }
499    }
500    tableScanDesc.setFilterExpr(decomposed.pushedPredicate);
501    return decomposed.residualPredicate;
502  }
503  
504  public static NodeProcessor getFilterProc() {
505    return new FilterPPD();
506  }
507
508  public static NodeProcessor getJoinProc() {
509    return new JoinPPD();
510  }
511
512  public static NodeProcessor getRSProc() {
513    return new ReduceSinkPPD();
514  }
515
516  public static NodeProcessor getTSProc() {
517    return new TableScanPPD();
518  }
519
520  public static NodeProcessor getDefaultProc() {
521    return new DefaultPPD();
522  }
523
524  public static NodeProcessor getSCRProc() {
525    return new ScriptPPD();
526  }
527
528  public static NodeProcessor getLIMProc() {
529    return new ScriptPPD();
530  }
531
532  public static NodeProcessor getUDTFProc() {
533    return new ScriptPPD();
534  }
535
536  public static NodeProcessor getLVFProc() {
537    return new LateralViewForwardPPD();
538  }
539
540  private OpProcFactory() {
541    // prevent instantiation
542  }
543}