PageRenderTime 85ms CodeModel.GetById 15ms app.highlight 63ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java

#
Java | 917 lines | 674 code | 118 blank | 125 comment | 159 complexity | d6fd62d3562776f7b3c7c429521ca302 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.exec;
 20
 21import java.io.Serializable;
 22import java.util.ArrayList;
 23import java.util.HashMap;
 24import java.util.Iterator;
 25import java.util.List;
 26import java.util.Map;
 27import java.util.Set;
 28
 29import org.apache.commons.logging.Log;
 30import org.apache.commons.logging.LogFactory;
 31import org.apache.hadoop.conf.Configuration;
 32import org.apache.hadoop.hive.conf.HiveConf;
 33import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer;
 34import org.apache.hadoop.hive.ql.exec.persistence.RowContainer;
 35import org.apache.hadoop.hive.ql.metadata.HiveException;
 36import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
 37import org.apache.hadoop.hive.ql.plan.JoinDesc;
 38import org.apache.hadoop.hive.ql.plan.TableDesc;
 39import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
 40import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 41import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 42import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 43import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 44import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 45import org.apache.hadoop.io.BooleanWritable;
 46
 47/**
 48 * Join operator implementation.
 49 */
 50public abstract class CommonJoinOperator<T extends JoinDesc> extends
 51    Operator<T> implements Serializable {
 52  private static final long serialVersionUID = 1L;
 53  protected static final Log LOG = LogFactory.getLog(CommonJoinOperator.class
 54      .getName());
 55
 56  /**
 57   * IntermediateObject.
 58   *
 59   */
 60  public static class IntermediateObject {
 61    ArrayList<Object>[] objs;
 62    int curSize;
 63
 64    public IntermediateObject(ArrayList<Object>[] objs, int curSize) {
 65      this.objs = objs;
 66      this.curSize = curSize;
 67    }
 68
 69    public ArrayList<Object>[] getObjs() {
 70      return objs;
 71    }
 72
 73    public int getCurSize() {
 74      return curSize;
 75    }
 76
 77    public void pushObj(ArrayList<Object> newObj) {
 78      objs[curSize++] = newObj;
 79    }
 80
 81    public void popObj() {
 82      curSize--;
 83    }
 84
 85    public Object topObj() {
 86      return objs[curSize - 1];
 87    }
 88  }
 89
 90  protected transient int numAliases; // number of aliases
 91  /**
 92   * The expressions for join inputs.
 93   */
 94  protected transient Map<Byte, List<ExprNodeEvaluator>> joinValues;
 95
 96  /**
 97   * The filters for join
 98   */
 99  protected transient Map<Byte, List<ExprNodeEvaluator>> joinFilters;
100
101  /**
102   * The ObjectInspectors for the join inputs.
103   */
104  protected transient Map<Byte, List<ObjectInspector>> joinValuesObjectInspectors;
105
106  /**
107   * The ObjectInspectors for join filters.
108   */
109  protected transient
110    Map<Byte, List<ObjectInspector>> joinFilterObjectInspectors;
111  /**
112   * The standard ObjectInspectors for the join inputs.
113   */
114  protected transient Map<Byte, List<ObjectInspector>> joinValuesStandardObjectInspectors;
115  /**
116   * The standard ObjectInspectors for the row container.
117   */
118  protected transient
119    Map<Byte, List<ObjectInspector>> rowContainerStandardObjectInspectors;
120
121  protected transient Byte[] order; // order in which the results should
122  // be output
123  protected transient JoinCondDesc[] condn;
124  public transient boolean noOuterJoin;
125  protected transient Object[] dummyObj; // for outer joins, contains the
126  // potential nulls for the concerned
127  // aliases
128  protected transient RowContainer<ArrayList<Object>>[] dummyObjVectors; // empty
129  // rows
130  // for
131  // each
132  // table
133  protected transient int totalSz; // total size of the composite object
134
135  // keys are the column names. basically this maps the position of the column
136  // in
137  // the output of the CommonJoinOperator to the input columnInfo.
138  private transient Map<Integer, Set<String>> posToAliasMap;
139
140  transient LazyBinarySerDe[] spillTableSerDe;
141  protected transient Map<Byte, TableDesc> spillTableDesc; // spill tables are
142  // used if the join
143  // input is too large
144  // to fit in memory
145
146  HashMap<Byte, AbstractRowContainer<ArrayList<Object>>> storage; // map b/w table alias
147  // to RowContainer
148  int joinEmitInterval = -1;
149  int joinCacheSize = 0;
150  int nextSz = 0;
151  transient Byte lastAlias = null;
152
153  transient boolean handleSkewJoin = false;
154
155  protected transient int countAfterReport;
156  protected transient int heartbeatInterval;
157  protected static final int NOTSKIPBIGTABLE = -1;
158
159  public CommonJoinOperator() {
160  }
161
162  public CommonJoinOperator(CommonJoinOperator<T> clone) {
163    this.joinEmitInterval = clone.joinEmitInterval;
164    this.joinCacheSize = clone.joinCacheSize;
165    this.nextSz = clone.nextSz;
166    this.childOperators = clone.childOperators;
167    this.parentOperators = clone.parentOperators;
168    this.counterNames = clone.counterNames;
169    this.counterNameToEnum = clone.counterNameToEnum;
170    this.done = clone.done;
171    this.operatorId = clone.operatorId;
172    this.storage = clone.storage;
173    this.condn = clone.condn;
174    this.conf = clone.getConf();
175    this.setSchema(clone.getSchema());
176    this.alias = clone.alias;
177    this.beginTime = clone.beginTime;
178    this.inputRows = clone.inputRows;
179    this.childOperatorsArray = clone.childOperatorsArray;
180    this.childOperatorsTag = clone.childOperatorsTag;
181    this.colExprMap = clone.colExprMap;
182    this.counters = clone.counters;
183    this.dummyObj = clone.dummyObj;
184    this.dummyObjVectors = clone.dummyObjVectors;
185    this.forwardCache = clone.forwardCache;
186    this.groupKeyObject = clone.groupKeyObject;
187    this.handleSkewJoin = clone.handleSkewJoin;
188    this.hconf = clone.hconf;
189    this.id = clone.id;
190    this.inputObjInspectors = clone.inputObjInspectors;
191    this.inputRows = clone.inputRows;
192    this.noOuterJoin = clone.noOuterJoin;
193    this.numAliases = clone.numAliases;
194    this.operatorId = clone.operatorId;
195    this.posToAliasMap = clone.posToAliasMap;
196    this.spillTableDesc = clone.spillTableDesc;
197    this.statsMap = clone.statsMap;
198    this.joinFilters = clone.joinFilters;
199    this.joinFilterObjectInspectors = clone.joinFilterObjectInspectors;
200  }
201
202
203  protected static <T extends JoinDesc> ObjectInspector getJoinOutputObjectInspector(
204      Byte[] order, Map<Byte, List<ObjectInspector>> aliasToObjectInspectors,
205      T conf) {
206    ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
207    for (Byte alias : order) {
208      List<ObjectInspector> oiList = aliasToObjectInspectors.get(alias);
209      structFieldObjectInspectors.addAll(oiList);
210    }
211
212    StructObjectInspector joinOutputObjectInspector = ObjectInspectorFactory
213        .getStandardStructObjectInspector(conf.getOutputColumnNames(),
214        structFieldObjectInspectors);
215    return joinOutputObjectInspector;
216  }
217
218  Configuration hconf;
219
220  @Override
221  protected void initializeOp(Configuration hconf) throws HiveException {
222    this.handleSkewJoin = conf.getHandleSkewJoin();
223    this.hconf = hconf;
224
225    heartbeatInterval = HiveConf.getIntVar(hconf,
226        HiveConf.ConfVars.HIVESENDHEARTBEAT);
227    countAfterReport = 0;
228
229    totalSz = 0;
230    // Map that contains the rows for each alias
231    storage = new HashMap<Byte, AbstractRowContainer<ArrayList<Object>>>();
232
233    numAliases = conf.getExprs().size();
234
235    joinValues = new HashMap<Byte, List<ExprNodeEvaluator>>();
236
237    joinFilters = new HashMap<Byte, List<ExprNodeEvaluator>>();
238
239    order = conf.getTagOrder();
240    condn = conf.getConds();
241    noOuterJoin = conf.isNoOuterJoin();
242
243    totalSz = JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(),
244        order,NOTSKIPBIGTABLE);
245
246    //process join filters
247    joinFilters = new HashMap<Byte, List<ExprNodeEvaluator>>();
248    JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(),order,NOTSKIPBIGTABLE);
249
250
251    joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues,
252        inputObjInspectors,NOTSKIPBIGTABLE);
253    joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters,
254        inputObjInspectors,NOTSKIPBIGTABLE);
255    joinValuesStandardObjectInspectors = JoinUtil.getStandardObjectInspectors(
256        joinValuesObjectInspectors,NOTSKIPBIGTABLE);
257
258    if (noOuterJoin) {
259      rowContainerStandardObjectInspectors = joinValuesStandardObjectInspectors;
260    } else {
261      Map<Byte, List<ObjectInspector>> rowContainerObjectInspectors =
262        new HashMap<Byte, List<ObjectInspector>>();
263      for (Byte alias : order) {
264        ArrayList<ObjectInspector> rcOIs = new ArrayList<ObjectInspector>();
265        rcOIs.addAll(joinValuesObjectInspectors.get(alias));
266        // for each alias, add object inspector for boolean as the last element
267        rcOIs.add(
268            PrimitiveObjectInspectorFactory.writableBooleanObjectInspector);
269        rowContainerObjectInspectors.put(alias, rcOIs);
270      }
271      rowContainerStandardObjectInspectors =
272        JoinUtil.getStandardObjectInspectors(rowContainerObjectInspectors,NOTSKIPBIGTABLE);
273    }
274
275
276
277
278    dummyObj = new Object[numAliases];
279    dummyObjVectors = new RowContainer[numAliases];
280
281    joinEmitInterval = HiveConf.getIntVar(hconf,
282        HiveConf.ConfVars.HIVEJOINEMITINTERVAL);
283    joinCacheSize = HiveConf.getIntVar(hconf,
284        HiveConf.ConfVars.HIVEJOINCACHESIZE);
285
286    // construct dummy null row (indicating empty table) and
287    // construct spill table serde which is used if input is too
288    // large to fit into main memory.
289    byte pos = 0;
290    for (Byte alias : order) {
291      int sz = conf.getExprs().get(alias).size();
292      ArrayList<Object> nr = new ArrayList<Object>(sz);
293
294      for (int j = 0; j < sz; j++) {
295        nr.add(null);
296      }
297
298      if (!noOuterJoin) {
299        // add whether the row is filtered or not
300        // this value does not matter for the dummyObj
301        // because the join values are already null
302        nr.add(new BooleanWritable(false));
303      }
304      dummyObj[pos] = nr;
305      // there should be only 1 dummy object in the RowContainer
306      RowContainer<ArrayList<Object>> values = JoinUtil.getRowContainer(hconf,
307          rowContainerStandardObjectInspectors.get((byte)pos),
308          alias, 1, spillTableDesc, conf, noOuterJoin);
309
310      values.add((ArrayList<Object>) dummyObj[pos]);
311      dummyObjVectors[pos] = values;
312
313      // if serde is null, the input doesn't need to be spilled out
314      // e.g., the output columns does not contains the input table
315      RowContainer rc = JoinUtil.getRowContainer(hconf,
316          rowContainerStandardObjectInspectors.get((byte)pos),
317          alias, joinCacheSize,spillTableDesc, conf,noOuterJoin);
318      storage.put(pos, rc);
319
320      pos++;
321    }
322
323    forwardCache = new Object[totalSz];
324
325    outputObjInspector = getJoinOutputObjectInspector(order,
326        joinValuesStandardObjectInspectors, conf);
327    LOG.info("JOIN "
328        + ((StructObjectInspector) outputObjInspector).getTypeName()
329        + " totalsz = " + totalSz);
330
331  }
332
333
334
335
336transient boolean newGroupStarted = false;
337  @Override
338  public void startGroup() throws HiveException {
339    LOG.trace("Join: Starting new group");
340    newGroupStarted = true;
341    for (AbstractRowContainer<ArrayList<Object>> alw : storage.values()) {
342      alw.clear();
343    }
344  }
345
346  protected int getNextSize(int sz) {
347    // A very simple counter to keep track of join entries for a key
348    if (sz >= 100000) {
349      return sz + 100000;
350    }
351
352    return 2 * sz;
353  }
354
355  protected transient Byte alias;
356
357  transient Object[] forwardCache;
358
359  private void createForwardJoinObject(IntermediateObject intObj,
360      boolean[] nullsArr) throws HiveException {
361    int p = 0;
362    for (int i = 0; i < numAliases; i++) {
363      Byte alias = order[i];
364      int sz = joinValues.get(alias).size();
365      if (nullsArr[i]) {
366        for (int j = 0; j < sz; j++) {
367          forwardCache[p++] = null;
368        }
369      } else {
370        ArrayList<Object> obj = intObj.getObjs()[i];
371        for (int j = 0; j < sz; j++) {
372          forwardCache[p++] = obj.get(j);
373        }
374      }
375    }
376
377    forward(forwardCache, outputObjInspector);
378    countAfterReport = 0;
379  }
380
381  private void copyOldArray(boolean[] src, boolean[] dest) {
382    for (int i = 0; i < src.length; i++) {
383      dest[i] = src[i];
384    }
385  }
386
387  private ArrayList<boolean[]> joinObjectsInnerJoin(
388      ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
389      ArrayList<Object> newObj, IntermediateObject intObj, int left,
390      boolean newObjNull) {
391    if (newObjNull) {
392      return resNulls;
393    }
394    Iterator<boolean[]> nullsIter = inputNulls.iterator();
395    while (nullsIter.hasNext()) {
396      boolean[] oldNulls = nullsIter.next();
397      boolean oldObjNull = oldNulls[left];
398      if (!oldObjNull) {
399        boolean[] newNulls = new boolean[intObj.getCurSize()];
400        copyOldArray(oldNulls, newNulls);
401        newNulls[oldNulls.length] = false;
402        resNulls.add(newNulls);
403      }
404    }
405    return resNulls;
406  }
407
408  /**
409   * Implement semi join operator.
410   */
411  private ArrayList<boolean[]> joinObjectsLeftSemiJoin(
412      ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
413      ArrayList<Object> newObj, IntermediateObject intObj, int left,
414      boolean newObjNull) {
415    if (newObjNull) {
416      return resNulls;
417    }
418    Iterator<boolean[]> nullsIter = inputNulls.iterator();
419    while (nullsIter.hasNext()) {
420      boolean[] oldNulls = nullsIter.next();
421      boolean oldObjNull = oldNulls[left];
422      if (!oldObjNull) {
423        boolean[] newNulls = new boolean[intObj.getCurSize()];
424        copyOldArray(oldNulls, newNulls);
425        newNulls[oldNulls.length] = false;
426        resNulls.add(newNulls);
427      }
428    }
429    return resNulls;
430  }
431
432  private ArrayList<boolean[]> joinObjectsLeftOuterJoin(
433      ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
434      ArrayList<Object> newObj, IntermediateObject intObj, int left,
435      boolean newObjNull) {
436    // newObj is null if is already null or
437    // if the row corresponding to the left alias does not pass through filter
438    int filterIndex = joinValues.get(order[left]).size();
439    if(filterIndex < intObj.getObjs()[left].size()) {
440      newObjNull = newObjNull || ((BooleanWritable) (intObj.getObjs()[left].get(filterIndex))).get();
441    }
442
443    Iterator<boolean[]> nullsIter = inputNulls.iterator();
444    while (nullsIter.hasNext()) {
445      boolean[] oldNulls = nullsIter.next();
446      boolean oldObjNull = oldNulls[left];
447      boolean[] newNulls = new boolean[intObj.getCurSize()];
448      copyOldArray(oldNulls, newNulls);
449      if (oldObjNull) {
450        newNulls[oldNulls.length] = true;
451      } else {
452        newNulls[oldNulls.length] = newObjNull;
453      }
454      resNulls.add(newNulls);
455    }
456    return resNulls;
457  }
458
459  private ArrayList<boolean[]> joinObjectsRightOuterJoin(
460      ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
461      ArrayList<Object> newObj, IntermediateObject intObj, int left,
462      boolean newObjNull, boolean firstRow) {
463    if (newObjNull) {
464      return resNulls;
465    }
466
467    if (inputNulls.isEmpty() && firstRow) {
468      boolean[] newNulls = new boolean[intObj.getCurSize()];
469      for (int i = 0; i < intObj.getCurSize() - 1; i++) {
470        newNulls[i] = true;
471      }
472      newNulls[intObj.getCurSize() - 1] = newObjNull;
473      resNulls.add(newNulls);
474      return resNulls;
475    }
476
477    boolean allOldObjsNull = firstRow;
478
479    Iterator<boolean[]> nullsIter = inputNulls.iterator();
480    while (nullsIter.hasNext()) {
481      boolean[] oldNulls = nullsIter.next();
482      if (!oldNulls[left]) {
483        allOldObjsNull = false;
484        break;
485      }
486    }
487
488    // if the row does not pass through filter, all old Objects are null
489    if (((BooleanWritable)newObj.get(newObj.size()-1)).get()) {
490      allOldObjsNull = true;
491    }
492    nullsIter = inputNulls.iterator();
493    while (nullsIter.hasNext()) {
494      boolean[] oldNulls = nullsIter.next();
495      boolean oldObjNull = oldNulls[left] || allOldObjsNull;
496
497      if (!oldObjNull) {
498        boolean[] newNulls = new boolean[intObj.getCurSize()];
499        copyOldArray(oldNulls, newNulls);
500        newNulls[oldNulls.length] = newObjNull;
501        resNulls.add(newNulls);
502      } else if (allOldObjsNull) {
503        boolean[] newNulls = new boolean[intObj.getCurSize()];
504        for (int i = 0; i < intObj.getCurSize() - 1; i++) {
505          newNulls[i] = true;
506        }
507        newNulls[oldNulls.length] = newObjNull;
508        resNulls.add(newNulls);
509        return resNulls;
510      }
511    }
512    return resNulls;
513  }
514
515  private ArrayList<boolean[]> joinObjectsFullOuterJoin(
516      ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
517      ArrayList<Object> newObj, IntermediateObject intObj, int left,
518      boolean newObjNull, boolean firstRow) {
519    if (newObjNull) {
520      Iterator<boolean[]> nullsIter = inputNulls.iterator();
521      while (nullsIter.hasNext()) {
522        boolean[] oldNulls = nullsIter.next();
523        boolean[] newNulls = new boolean[intObj.getCurSize()];
524        copyOldArray(oldNulls, newNulls);
525        newNulls[oldNulls.length] = newObjNull;
526        resNulls.add(newNulls);
527      }
528      return resNulls;
529    }
530
531    if (inputNulls.isEmpty() && firstRow) {
532      boolean[] newNulls = new boolean[intObj.getCurSize()];
533      for (int i = 0; i < intObj.getCurSize() - 1; i++) {
534        newNulls[i] = true;
535      }
536      newNulls[intObj.getCurSize() - 1] = newObjNull;
537      resNulls.add(newNulls);
538      return resNulls;
539    }
540
541    boolean allOldObjsNull = firstRow;
542
543    Iterator<boolean[]> nullsIter = inputNulls.iterator();
544    while (nullsIter.hasNext()) {
545      boolean[] oldNulls = nullsIter.next();
546      if (!oldNulls[left]) {
547        allOldObjsNull = false;
548        break;
549      }
550    }
551
552    // if the row does not pass through filter, all old Objects are null
553    if (((BooleanWritable)newObj.get(newObj.size()-1)).get()) {
554      allOldObjsNull = true;
555    }
556    boolean rhsPreserved = false;
557
558    nullsIter = inputNulls.iterator();
559    while (nullsIter.hasNext()) {
560      boolean[] oldNulls = nullsIter.next();
561      // old obj is null even if the row corresponding to the left alias
562      // does not pass through filter
563      boolean oldObjNull = oldNulls[left] || ((BooleanWritable)
564        (intObj.getObjs()[left].get(joinValues.get(order[left]).size()))).get()
565        || allOldObjsNull;
566      if (!oldObjNull) {
567        boolean[] newNulls = new boolean[intObj.getCurSize()];
568        copyOldArray(oldNulls, newNulls);
569        newNulls[oldNulls.length] = newObjNull;
570        resNulls.add(newNulls);
571      } else if (oldObjNull) {
572        boolean[] newNulls = new boolean[intObj.getCurSize()];
573        copyOldArray(oldNulls, newNulls);
574        newNulls[oldNulls.length] = true;
575        resNulls.add(newNulls);
576
577        if (allOldObjsNull && !rhsPreserved) {
578          newNulls = new boolean[intObj.getCurSize()];
579          for (int i = 0; i < oldNulls.length; i++) {
580            newNulls[i] = true;
581          }
582          newNulls[oldNulls.length] = false;
583          resNulls.add(newNulls);
584          rhsPreserved = true;
585        }
586      }
587    }
588    return resNulls;
589  }
590
591  /*
592   * The new input is added to the list of existing inputs. Each entry in the
593   * array of inputNulls denotes the entries in the intermediate object to be
594   * used. The intermediate object is augmented with the new object, and list of
595   * nulls is changed appropriately. The list will contain all non-nulls for a
596   * inner join. The outer joins are processed appropriately.
597   */
598  private ArrayList<boolean[]> joinObjects(ArrayList<boolean[]> inputNulls,
599      ArrayList<Object> newObj, IntermediateObject intObj, int joinPos,
600      boolean firstRow) {
601    ArrayList<boolean[]> resNulls = new ArrayList<boolean[]>();
602    boolean newObjNull = newObj == dummyObj[joinPos] ? true : false;
603    if (joinPos == 0) {
604      if (newObjNull) {
605        return null;
606      }
607      boolean[] nulls = new boolean[1];
608      nulls[0] = newObjNull;
609      resNulls.add(nulls);
610      return resNulls;
611    }
612
613    int left = condn[joinPos - 1].getLeft();
614    int type = condn[joinPos - 1].getType();
615
616    // process all nulls for RIGHT and FULL OUTER JOINS
617    if (((type == JoinDesc.RIGHT_OUTER_JOIN) || (type == JoinDesc.FULL_OUTER_JOIN))
618        && !newObjNull && (inputNulls == null) && firstRow) {
619      boolean[] newNulls = new boolean[intObj.getCurSize()];
620      for (int i = 0; i < newNulls.length - 1; i++) {
621        newNulls[i] = true;
622      }
623      newNulls[newNulls.length - 1] = false;
624      resNulls.add(newNulls);
625      return resNulls;
626    }
627
628    if (inputNulls == null) {
629      return null;
630    }
631
632    if (type == JoinDesc.INNER_JOIN) {
633      return joinObjectsInnerJoin(resNulls, inputNulls, newObj, intObj, left,
634          newObjNull);
635    } else if (type == JoinDesc.LEFT_OUTER_JOIN) {
636      return joinObjectsLeftOuterJoin(resNulls, inputNulls, newObj, intObj,
637          left, newObjNull);
638    } else if (type == JoinDesc.RIGHT_OUTER_JOIN) {
639      return joinObjectsRightOuterJoin(resNulls, inputNulls, newObj, intObj,
640          left, newObjNull, firstRow);
641    } else if (type == JoinDesc.LEFT_SEMI_JOIN) {
642      return joinObjectsLeftSemiJoin(resNulls, inputNulls, newObj, intObj,
643          left, newObjNull);
644    }
645
646    assert (type == JoinDesc.FULL_OUTER_JOIN);
647    return joinObjectsFullOuterJoin(resNulls, inputNulls, newObj, intObj, left,
648        newObjNull, firstRow);
649  }
650
651  /*
652   * genObject is a recursive function. For the inputs, a array of bitvectors is
653   * maintained (inputNulls) where each entry denotes whether the element is to
654   * be used or not (whether it is null or not). The size of the bitvector is
655   * same as the number of inputs under consideration currently. When all inputs
656   * are accounted for, the output is forwarded appropriately.
657   */
658  private void genObject(ArrayList<boolean[]> inputNulls, int aliasNum,
659      IntermediateObject intObj, boolean firstRow) throws HiveException {
660    boolean childFirstRow = firstRow;
661    boolean skipping = false;
662
663    if (aliasNum < numAliases) {
664
665      // search for match in the rhs table
666      AbstractRowContainer<ArrayList<Object>> aliasRes = storage.get(order[aliasNum]);
667
668      for (ArrayList<Object> newObj = aliasRes.first(); newObj != null; newObj = aliasRes
669          .next()) {
670
671        // check for skipping in case of left semi join
672        if (aliasNum > 0
673            && condn[aliasNum - 1].getType() == JoinDesc.LEFT_SEMI_JOIN
674            && newObj != dummyObj[aliasNum]) { // successful match
675          skipping = true;
676        }
677
678        intObj.pushObj(newObj);
679
680        // execute the actual join algorithm
681        ArrayList<boolean[]> newNulls = joinObjects(inputNulls, newObj, intObj,
682            aliasNum, childFirstRow);
683
684        // recursively call the join the other rhs tables
685        genObject(newNulls, aliasNum + 1, intObj, firstRow);
686
687        intObj.popObj();
688        firstRow = false;
689
690        // if left-semi-join found a match, skipping the rest of the rows in the
691        // rhs table of the semijoin
692        if (skipping) {
693          break;
694        }
695      }
696    } else {
697      if (inputNulls == null) {
698        return;
699      }
700      Iterator<boolean[]> nullsIter = inputNulls.iterator();
701      while (nullsIter.hasNext()) {
702        boolean[] nullsVec = nullsIter.next();
703        createForwardJoinObject(intObj, nullsVec);
704      }
705    }
706  }
707
708  /**
709   * Forward a record of join results.
710   *
711   * @throws HiveException
712   */
713  @Override
714  public void endGroup() throws HiveException {
715    LOG.trace("Join Op: endGroup called: numValues=" + numAliases);
716
717    checkAndGenObject();
718  }
719
720  private void genUniqueJoinObject(int aliasNum, int forwardCachePos)
721      throws HiveException {
722    AbstractRowContainer<ArrayList<Object>> alias = storage.get(order[aliasNum]);
723    for (ArrayList<Object> row = alias.first(); row != null; row = alias.next()) {
724      int sz = joinValues.get(order[aliasNum]).size();
725      int p = forwardCachePos;
726      for (int j = 0; j < sz; j++) {
727        forwardCache[p++] = row.get(j);
728      }
729      if (aliasNum == numAliases - 1) {
730        forward(forwardCache, outputObjInspector);
731        countAfterReport = 0;
732      } else {
733        genUniqueJoinObject(aliasNum + 1, p);
734      }
735    }
736  }
737
738  private void genAllOneUniqueJoinObject()
739      throws HiveException {
740    int p = 0;
741    for (int i = 0; i < numAliases; i++) {
742      int sz = joinValues.get(order[i]).size();
743      ArrayList<Object> obj = storage.get(order[i]).first();
744      for (int j = 0; j < sz; j++) {
745        forwardCache[p++] = obj.get(j);
746      }
747    }
748
749    forward(forwardCache, outputObjInspector);
750    countAfterReport = 0;
751  }
752
753  protected void checkAndGenObject() throws HiveException {
754    if (condn[0].getType() == JoinDesc.UNIQUE_JOIN) {
755      new IntermediateObject(new ArrayList[numAliases], 0);
756
757      // Check if results need to be emitted.
758      // Results only need to be emitted if there is a non-null entry in a table
759      // that is preserved or if there are no non-null entries
760      boolean preserve = false; // Will be true if there is a non-null entry
761      // in a preserved table
762      boolean hasNulls = false; // Will be true if there are null entries
763      boolean allOne = true;
764      for (int i = 0; i < numAliases; i++) {
765        Byte alias = order[i];
766        AbstractRowContainer<ArrayList<Object>> alw = storage.get(alias);
767
768        if (alw.size() != 1) {
769          allOne = false;
770        }
771
772        if (alw.size() == 0) {
773          alw.add((ArrayList<Object>) dummyObj[i]);
774          hasNulls = true;
775        } else if (condn[i].getPreserved()) {
776          preserve = true;
777        }
778      }
779
780      if (hasNulls && !preserve) {
781        return;
782      }
783
784      if (allOne) {
785        LOG.info("calling genAllOneUniqueJoinObject");
786        genAllOneUniqueJoinObject();
787        LOG.info("called genAllOneUniqueJoinObject");
788      } else {
789        LOG.trace("calling genUniqueJoinObject");
790        genUniqueJoinObject(0, 0);
791        LOG.trace("called genUniqueJoinObject");
792      }
793    } else {
794      // does any result need to be emitted
795      boolean mayHasMoreThanOne = false;
796      boolean hasEmpty = false;
797      for (int i = 0; i < numAliases; i++) {
798        Byte alias = order[i];
799        AbstractRowContainer<ArrayList<Object>> alw = storage.get(alias);
800
801        if (noOuterJoin) {
802          if (alw.size() == 0) {
803            LOG.trace("No data for alias=" + i);
804            return;
805          } else if (alw.size() > 1) {
806            mayHasMoreThanOne = true;
807          }
808        } else {
809          if (alw.size() == 0) {
810            hasEmpty = true;
811            alw.add((ArrayList<Object>) dummyObj[i]);
812          } else if (!hasEmpty && alw.size() == 1) {
813            ArrayList<Object> row = alw.first();
814            int numValues = joinValues.get(alias).size();
815            if (row == dummyObj[alias]
816                || (row.size() > numValues && ((BooleanWritable) (row.get(numValues))).get())) {
817              hasEmpty = true;
818            }
819          } else {
820            mayHasMoreThanOne = true;
821            if (!hasEmpty) {
822              int numValues = joinValues.get(alias).size();
823              for (ArrayList<Object> row = alw.first(); row != null; row = alw.next()) {
824                if (row == dummyObj[alias]
825                    || (row.size() > numValues && ((BooleanWritable) (row.get(numValues))).get())) {
826                  hasEmpty = true;
827                  break;
828                }
829              }
830            }
831          }
832        }
833      }
834
835      if (!hasEmpty && !mayHasMoreThanOne) {
836        LOG.trace("calling genAllOneUniqueJoinObject");
837        genAllOneUniqueJoinObject();
838        LOG.trace("called genAllOneUniqueJoinObject");
839      } else if (!hasEmpty) {
840        LOG.trace("calling genUniqueJoinObject");
841        genUniqueJoinObject(0, 0);
842        LOG.trace("called genUniqueJoinObject");
843      } else {
844        LOG.trace("calling genObject");
845        genObject(null, 0, new IntermediateObject(new ArrayList[numAliases], 0),
846            true);
847        LOG.trace("called genObject");
848      }
849    }
850  }
851
852  protected void reportProgress() {
853    // Send some status periodically
854    countAfterReport++;
855
856    if ((countAfterReport % heartbeatInterval) == 0
857        && (reporter != null)) {
858      reporter.progress();
859      countAfterReport = 0;
860    }
861  }
862
863  /**
864   * Returns true if the row does not pass through filters.
865   */
866  protected static Boolean isFiltered(Object row,
867      List<ExprNodeEvaluator> filters, List<ObjectInspector> ois)
868      throws HiveException {
869    // apply join filters on the row.
870    Boolean ret = false;
871    for (int j = 0; j < filters.size(); j++) {
872      Object condition = filters.get(j).evaluate(row);
873      ret = (Boolean) ((PrimitiveObjectInspector)
874          ois.get(j)).getPrimitiveJavaObject(condition);
875      if (ret == null || !ret) {
876        return true;
877      }
878    }
879    return false;
880  }
881
882  /**
883   * All done.
884   *
885   */
886  @Override
887  public void closeOp(boolean abort) throws HiveException {
888    LOG.trace("Join Op close");
889    for (AbstractRowContainer<ArrayList<Object>> alw : storage.values()) {
890      if (alw != null) {
891        alw.clear(); // clean up the temp files
892      }
893    }
894    storage.clear();
895  }
896
897  @Override
898  public String getName() {
899    return "JOIN";
900  }
901
902  /**
903   * @return the posToAliasMap
904   */
905  public Map<Integer, Set<String>> getPosToAliasMap() {
906    return posToAliasMap;
907  }
908
909  /**
910   * @param posToAliasMap
911   *          the posToAliasMap to set
912   */
913  public void setPosToAliasMap(Map<Integer, Set<String>> posToAliasMap) {
914    this.posToAliasMap = posToAliasMap;
915  }
916
917}