/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
Java | 917 lines | 674 code | 118 blank | 125 comment | 159 complexity | d6fd62d3562776f7b3c7c429521ca302 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hadoop.hive.ql.exec;
- import java.io.Serializable;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.Iterator;
- import java.util.List;
- import java.util.Map;
- import java.util.Set;
- import org.apache.commons.logging.Log;
- import org.apache.commons.logging.LogFactory;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.hive.conf.HiveConf;
- import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer;
- import org.apache.hadoop.hive.ql.exec.persistence.RowContainer;
- import org.apache.hadoop.hive.ql.metadata.HiveException;
- import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
- import org.apache.hadoop.hive.ql.plan.JoinDesc;
- import org.apache.hadoop.hive.ql.plan.TableDesc;
- import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
- import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
- import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
- import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
- import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
- import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
- import org.apache.hadoop.io.BooleanWritable;
- /**
- * Join operator implementation.
- */
- public abstract class CommonJoinOperator<T extends JoinDesc> extends
- Operator<T> implements Serializable {
- private static final long serialVersionUID = 1L;
- protected static final Log LOG = LogFactory.getLog(CommonJoinOperator.class
- .getName());
- /**
- * IntermediateObject.
- *
- */
- public static class IntermediateObject {
- ArrayList<Object>[] objs;
- int curSize;
- public IntermediateObject(ArrayList<Object>[] objs, int curSize) {
- this.objs = objs;
- this.curSize = curSize;
- }
- public ArrayList<Object>[] getObjs() {
- return objs;
- }
- public int getCurSize() {
- return curSize;
- }
- public void pushObj(ArrayList<Object> newObj) {
- objs[curSize++] = newObj;
- }
- public void popObj() {
- curSize--;
- }
- public Object topObj() {
- return objs[curSize - 1];
- }
- }
- protected transient int numAliases; // number of aliases
- /**
- * The expressions for join inputs.
- */
- protected transient Map<Byte, List<ExprNodeEvaluator>> joinValues;
- /**
- * The filters for join
- */
- protected transient Map<Byte, List<ExprNodeEvaluator>> joinFilters;
- /**
- * The ObjectInspectors for the join inputs.
- */
- protected transient Map<Byte, List<ObjectInspector>> joinValuesObjectInspectors;
- /**
- * The ObjectInspectors for join filters.
- */
- protected transient
- Map<Byte, List<ObjectInspector>> joinFilterObjectInspectors;
- /**
- * The standard ObjectInspectors for the join inputs.
- */
- protected transient Map<Byte, List<ObjectInspector>> joinValuesStandardObjectInspectors;
- /**
- * The standard ObjectInspectors for the row container.
- */
- protected transient
- Map<Byte, List<ObjectInspector>> rowContainerStandardObjectInspectors;
- protected transient Byte[] order; // order in which the results should
- // be output
- protected transient JoinCondDesc[] condn;
- public transient boolean noOuterJoin;
- protected transient Object[] dummyObj; // for outer joins, contains the
- // potential nulls for the concerned
- // aliases
- protected transient RowContainer<ArrayList<Object>>[] dummyObjVectors; // empty
- // rows
- // for
- // each
- // table
- protected transient int totalSz; // total size of the composite object
- // keys are the column names. basically this maps the position of the column
- // in
- // the output of the CommonJoinOperator to the input columnInfo.
- private transient Map<Integer, Set<String>> posToAliasMap;
- transient LazyBinarySerDe[] spillTableSerDe;
- protected transient Map<Byte, TableDesc> spillTableDesc; // spill tables are
- // used if the join
- // input is too large
- // to fit in memory
- HashMap<Byte, AbstractRowContainer<ArrayList<Object>>> storage; // map b/w table alias
- // to RowContainer
- int joinEmitInterval = -1;
- int joinCacheSize = 0;
- int nextSz = 0;
- transient Byte lastAlias = null;
- transient boolean handleSkewJoin = false;
- protected transient int countAfterReport;
- protected transient int heartbeatInterval;
- protected static final int NOTSKIPBIGTABLE = -1;
- public CommonJoinOperator() {
- }
- public CommonJoinOperator(CommonJoinOperator<T> clone) {
- this.joinEmitInterval = clone.joinEmitInterval;
- this.joinCacheSize = clone.joinCacheSize;
- this.nextSz = clone.nextSz;
- this.childOperators = clone.childOperators;
- this.parentOperators = clone.parentOperators;
- this.counterNames = clone.counterNames;
- this.counterNameToEnum = clone.counterNameToEnum;
- this.done = clone.done;
- this.operatorId = clone.operatorId;
- this.storage = clone.storage;
- this.condn = clone.condn;
- this.conf = clone.getConf();
- this.setSchema(clone.getSchema());
- this.alias = clone.alias;
- this.beginTime = clone.beginTime;
- this.inputRows = clone.inputRows;
- this.childOperatorsArray = clone.childOperatorsArray;
- this.childOperatorsTag = clone.childOperatorsTag;
- this.colExprMap = clone.colExprMap;
- this.counters = clone.counters;
- this.dummyObj = clone.dummyObj;
- this.dummyObjVectors = clone.dummyObjVectors;
- this.forwardCache = clone.forwardCache;
- this.groupKeyObject = clone.groupKeyObject;
- this.handleSkewJoin = clone.handleSkewJoin;
- this.hconf = clone.hconf;
- this.id = clone.id;
- this.inputObjInspectors = clone.inputObjInspectors;
- this.inputRows = clone.inputRows;
- this.noOuterJoin = clone.noOuterJoin;
- this.numAliases = clone.numAliases;
- this.operatorId = clone.operatorId;
- this.posToAliasMap = clone.posToAliasMap;
- this.spillTableDesc = clone.spillTableDesc;
- this.statsMap = clone.statsMap;
- this.joinFilters = clone.joinFilters;
- this.joinFilterObjectInspectors = clone.joinFilterObjectInspectors;
- }
- protected static <T extends JoinDesc> ObjectInspector getJoinOutputObjectInspector(
- Byte[] order, Map<Byte, List<ObjectInspector>> aliasToObjectInspectors,
- T conf) {
- ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
- for (Byte alias : order) {
- List<ObjectInspector> oiList = aliasToObjectInspectors.get(alias);
- structFieldObjectInspectors.addAll(oiList);
- }
- StructObjectInspector joinOutputObjectInspector = ObjectInspectorFactory
- .getStandardStructObjectInspector(conf.getOutputColumnNames(),
- structFieldObjectInspectors);
- return joinOutputObjectInspector;
- }
- Configuration hconf;
- @Override
- protected void initializeOp(Configuration hconf) throws HiveException {
- this.handleSkewJoin = conf.getHandleSkewJoin();
- this.hconf = hconf;
- heartbeatInterval = HiveConf.getIntVar(hconf,
- HiveConf.ConfVars.HIVESENDHEARTBEAT);
- countAfterReport = 0;
- totalSz = 0;
- // Map that contains the rows for each alias
- storage = new HashMap<Byte, AbstractRowContainer<ArrayList<Object>>>();
- numAliases = conf.getExprs().size();
- joinValues = new HashMap<Byte, List<ExprNodeEvaluator>>();
- joinFilters = new HashMap<Byte, List<ExprNodeEvaluator>>();
- order = conf.getTagOrder();
- condn = conf.getConds();
- noOuterJoin = conf.isNoOuterJoin();
- totalSz = JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(),
- order,NOTSKIPBIGTABLE);
- //process join filters
- joinFilters = new HashMap<Byte, List<ExprNodeEvaluator>>();
- JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(),order,NOTSKIPBIGTABLE);
- joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues,
- inputObjInspectors,NOTSKIPBIGTABLE);
- joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters,
- inputObjInspectors,NOTSKIPBIGTABLE);
- joinValuesStandardObjectInspectors = JoinUtil.getStandardObjectInspectors(
- joinValuesObjectInspectors,NOTSKIPBIGTABLE);
- if (noOuterJoin) {
- rowContainerStandardObjectInspectors = joinValuesStandardObjectInspectors;
- } else {
- Map<Byte, List<ObjectInspector>> rowContainerObjectInspectors =
- new HashMap<Byte, List<ObjectInspector>>();
- for (Byte alias : order) {
- ArrayList<ObjectInspector> rcOIs = new ArrayList<ObjectInspector>();
- rcOIs.addAll(joinValuesObjectInspectors.get(alias));
- // for each alias, add object inspector for boolean as the last element
- rcOIs.add(
- PrimitiveObjectInspectorFactory.writableBooleanObjectInspector);
- rowContainerObjectInspectors.put(alias, rcOIs);
- }
- rowContainerStandardObjectInspectors =
- JoinUtil.getStandardObjectInspectors(rowContainerObjectInspectors,NOTSKIPBIGTABLE);
- }
- dummyObj = new Object[numAliases];
- dummyObjVectors = new RowContainer[numAliases];
- joinEmitInterval = HiveConf.getIntVar(hconf,
- HiveConf.ConfVars.HIVEJOINEMITINTERVAL);
- joinCacheSize = HiveConf.getIntVar(hconf,
- HiveConf.ConfVars.HIVEJOINCACHESIZE);
- // construct dummy null row (indicating empty table) and
- // construct spill table serde which is used if input is too
- // large to fit into main memory.
- byte pos = 0;
- for (Byte alias : order) {
- int sz = conf.getExprs().get(alias).size();
- ArrayList<Object> nr = new ArrayList<Object>(sz);
- for (int j = 0; j < sz; j++) {
- nr.add(null);
- }
- if (!noOuterJoin) {
- // add whether the row is filtered or not
- // this value does not matter for the dummyObj
- // because the join values are already null
- nr.add(new BooleanWritable(false));
- }
- dummyObj[pos] = nr;
- // there should be only 1 dummy object in the RowContainer
- RowContainer<ArrayList<Object>> values = JoinUtil.getRowContainer(hconf,
- rowContainerStandardObjectInspectors.get((byte)pos),
- alias, 1, spillTableDesc, conf, noOuterJoin);
- values.add((ArrayList<Object>) dummyObj[pos]);
- dummyObjVectors[pos] = values;
- // if serde is null, the input doesn't need to be spilled out
- // e.g., the output columns does not contains the input table
- RowContainer rc = JoinUtil.getRowContainer(hconf,
- rowContainerStandardObjectInspectors.get((byte)pos),
- alias, joinCacheSize,spillTableDesc, conf,noOuterJoin);
- storage.put(pos, rc);
- pos++;
- }
- forwardCache = new Object[totalSz];
- outputObjInspector = getJoinOutputObjectInspector(order,
- joinValuesStandardObjectInspectors, conf);
- LOG.info("JOIN "
- + ((StructObjectInspector) outputObjInspector).getTypeName()
- + " totalsz = " + totalSz);
- }
- transient boolean newGroupStarted = false;
- @Override
- public void startGroup() throws HiveException {
- LOG.trace("Join: Starting new group");
- newGroupStarted = true;
- for (AbstractRowContainer<ArrayList<Object>> alw : storage.values()) {
- alw.clear();
- }
- }
- protected int getNextSize(int sz) {
- // A very simple counter to keep track of join entries for a key
- if (sz >= 100000) {
- return sz + 100000;
- }
- return 2 * sz;
- }
- protected transient Byte alias;
- transient Object[] forwardCache;
- private void createForwardJoinObject(IntermediateObject intObj,
- boolean[] nullsArr) throws HiveException {
- int p = 0;
- for (int i = 0; i < numAliases; i++) {
- Byte alias = order[i];
- int sz = joinValues.get(alias).size();
- if (nullsArr[i]) {
- for (int j = 0; j < sz; j++) {
- forwardCache[p++] = null;
- }
- } else {
- ArrayList<Object> obj = intObj.getObjs()[i];
- for (int j = 0; j < sz; j++) {
- forwardCache[p++] = obj.get(j);
- }
- }
- }
- forward(forwardCache, outputObjInspector);
- countAfterReport = 0;
- }
- private void copyOldArray(boolean[] src, boolean[] dest) {
- for (int i = 0; i < src.length; i++) {
- dest[i] = src[i];
- }
- }
- private ArrayList<boolean[]> joinObjectsInnerJoin(
- ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
- ArrayList<Object> newObj, IntermediateObject intObj, int left,
- boolean newObjNull) {
- if (newObjNull) {
- return resNulls;
- }
- Iterator<boolean[]> nullsIter = inputNulls.iterator();
- while (nullsIter.hasNext()) {
- boolean[] oldNulls = nullsIter.next();
- boolean oldObjNull = oldNulls[left];
- if (!oldObjNull) {
- boolean[] newNulls = new boolean[intObj.getCurSize()];
- copyOldArray(oldNulls, newNulls);
- newNulls[oldNulls.length] = false;
- resNulls.add(newNulls);
- }
- }
- return resNulls;
- }
- /**
- * Implement semi join operator.
- */
- private ArrayList<boolean[]> joinObjectsLeftSemiJoin(
- ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
- ArrayList<Object> newObj, IntermediateObject intObj, int left,
- boolean newObjNull) {
- if (newObjNull) {
- return resNulls;
- }
- Iterator<boolean[]> nullsIter = inputNulls.iterator();
- while (nullsIter.hasNext()) {
- boolean[] oldNulls = nullsIter.next();
- boolean oldObjNull = oldNulls[left];
- if (!oldObjNull) {
- boolean[] newNulls = new boolean[intObj.getCurSize()];
- copyOldArray(oldNulls, newNulls);
- newNulls[oldNulls.length] = false;
- resNulls.add(newNulls);
- }
- }
- return resNulls;
- }
- private ArrayList<boolean[]> joinObjectsLeftOuterJoin(
- ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
- ArrayList<Object> newObj, IntermediateObject intObj, int left,
- boolean newObjNull) {
- // newObj is null if is already null or
- // if the row corresponding to the left alias does not pass through filter
- int filterIndex = joinValues.get(order[left]).size();
- if(filterIndex < intObj.getObjs()[left].size()) {
- newObjNull = newObjNull || ((BooleanWritable) (intObj.getObjs()[left].get(filterIndex))).get();
- }
- Iterator<boolean[]> nullsIter = inputNulls.iterator();
- while (nullsIter.hasNext()) {
- boolean[] oldNulls = nullsIter.next();
- boolean oldObjNull = oldNulls[left];
- boolean[] newNulls = new boolean[intObj.getCurSize()];
- copyOldArray(oldNulls, newNulls);
- if (oldObjNull) {
- newNulls[oldNulls.length] = true;
- } else {
- newNulls[oldNulls.length] = newObjNull;
- }
- resNulls.add(newNulls);
- }
- return resNulls;
- }
- private ArrayList<boolean[]> joinObjectsRightOuterJoin(
- ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
- ArrayList<Object> newObj, IntermediateObject intObj, int left,
- boolean newObjNull, boolean firstRow) {
- if (newObjNull) {
- return resNulls;
- }
- if (inputNulls.isEmpty() && firstRow) {
- boolean[] newNulls = new boolean[intObj.getCurSize()];
- for (int i = 0; i < intObj.getCurSize() - 1; i++) {
- newNulls[i] = true;
- }
- newNulls[intObj.getCurSize() - 1] = newObjNull;
- resNulls.add(newNulls);
- return resNulls;
- }
- boolean allOldObjsNull = firstRow;
- Iterator<boolean[]> nullsIter = inputNulls.iterator();
- while (nullsIter.hasNext()) {
- boolean[] oldNulls = nullsIter.next();
- if (!oldNulls[left]) {
- allOldObjsNull = false;
- break;
- }
- }
- // if the row does not pass through filter, all old Objects are null
- if (((BooleanWritable)newObj.get(newObj.size()-1)).get()) {
- allOldObjsNull = true;
- }
- nullsIter = inputNulls.iterator();
- while (nullsIter.hasNext()) {
- boolean[] oldNulls = nullsIter.next();
- boolean oldObjNull = oldNulls[left] || allOldObjsNull;
- if (!oldObjNull) {
- boolean[] newNulls = new boolean[intObj.getCurSize()];
- copyOldArray(oldNulls, newNulls);
- newNulls[oldNulls.length] = newObjNull;
- resNulls.add(newNulls);
- } else if (allOldObjsNull) {
- boolean[] newNulls = new boolean[intObj.getCurSize()];
- for (int i = 0; i < intObj.getCurSize() - 1; i++) {
- newNulls[i] = true;
- }
- newNulls[oldNulls.length] = newObjNull;
- resNulls.add(newNulls);
- return resNulls;
- }
- }
- return resNulls;
- }
- private ArrayList<boolean[]> joinObjectsFullOuterJoin(
- ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
- ArrayList<Object> newObj, IntermediateObject intObj, int left,
- boolean newObjNull, boolean firstRow) {
- if (newObjNull) {
- Iterator<boolean[]> nullsIter = inputNulls.iterator();
- while (nullsIter.hasNext()) {
- boolean[] oldNulls = nullsIter.next();
- boolean[] newNulls = new boolean[intObj.getCurSize()];
- copyOldArray(oldNulls, newNulls);
- newNulls[oldNulls.length] = newObjNull;
- resNulls.add(newNulls);
- }
- return resNulls;
- }
- if (inputNulls.isEmpty() && firstRow) {
- boolean[] newNulls = new boolean[intObj.getCurSize()];
- for (int i = 0; i < intObj.getCurSize() - 1; i++) {
- newNulls[i] = true;
- }
- newNulls[intObj.getCurSize() - 1] = newObjNull;
- resNulls.add(newNulls);
- return resNulls;
- }
- boolean allOldObjsNull = firstRow;
- Iterator<boolean[]> nullsIter = inputNulls.iterator();
- while (nullsIter.hasNext()) {
- boolean[] oldNulls = nullsIter.next();
- if (!oldNulls[left]) {
- allOldObjsNull = false;
- break;
- }
- }
- // if the row does not pass through filter, all old Objects are null
- if (((BooleanWritable)newObj.get(newObj.size()-1)).get()) {
- allOldObjsNull = true;
- }
- boolean rhsPreserved = false;
- nullsIter = inputNulls.iterator();
- while (nullsIter.hasNext()) {
- boolean[] oldNulls = nullsIter.next();
- // old obj is null even if the row corresponding to the left alias
- // does not pass through filter
- boolean oldObjNull = oldNulls[left] || ((BooleanWritable)
- (intObj.getObjs()[left].get(joinValues.get(order[left]).size()))).get()
- || allOldObjsNull;
- if (!oldObjNull) {
- boolean[] newNulls = new boolean[intObj.getCurSize()];
- copyOldArray(oldNulls, newNulls);
- newNulls[oldNulls.length] = newObjNull;
- resNulls.add(newNulls);
- } else if (oldObjNull) {
- boolean[] newNulls = new boolean[intObj.getCurSize()];
- copyOldArray(oldNulls, newNulls);
- newNulls[oldNulls.length] = true;
- resNulls.add(newNulls);
- if (allOldObjsNull && !rhsPreserved) {
- newNulls = new boolean[intObj.getCurSize()];
- for (int i = 0; i < oldNulls.length; i++) {
- newNulls[i] = true;
- }
- newNulls[oldNulls.length] = false;
- resNulls.add(newNulls);
- rhsPreserved = true;
- }
- }
- }
- return resNulls;
- }
- /*
- * The new input is added to the list of existing inputs. Each entry in the
- * array of inputNulls denotes the entries in the intermediate object to be
- * used. The intermediate object is augmented with the new object, and list of
- * nulls is changed appropriately. The list will contain all non-nulls for a
- * inner join. The outer joins are processed appropriately.
- */
- private ArrayList<boolean[]> joinObjects(ArrayList<boolean[]> inputNulls,
- ArrayList<Object> newObj, IntermediateObject intObj, int joinPos,
- boolean firstRow) {
- ArrayList<boolean[]> resNulls = new ArrayList<boolean[]>();
- boolean newObjNull = newObj == dummyObj[joinPos] ? true : false;
- if (joinPos == 0) {
- if (newObjNull) {
- return null;
- }
- boolean[] nulls = new boolean[1];
- nulls[0] = newObjNull;
- resNulls.add(nulls);
- return resNulls;
- }
- int left = condn[joinPos - 1].getLeft();
- int type = condn[joinPos - 1].getType();
- // process all nulls for RIGHT and FULL OUTER JOINS
- if (((type == JoinDesc.RIGHT_OUTER_JOIN) || (type == JoinDesc.FULL_OUTER_JOIN))
- && !newObjNull && (inputNulls == null) && firstRow) {
- boolean[] newNulls = new boolean[intObj.getCurSize()];
- for (int i = 0; i < newNulls.length - 1; i++) {
- newNulls[i] = true;
- }
- newNulls[newNulls.length - 1] = false;
- resNulls.add(newNulls);
- return resNulls;
- }
- if (inputNulls == null) {
- return null;
- }
- if (type == JoinDesc.INNER_JOIN) {
- return joinObjectsInnerJoin(resNulls, inputNulls, newObj, intObj, left,
- newObjNull);
- } else if (type == JoinDesc.LEFT_OUTER_JOIN) {
- return joinObjectsLeftOuterJoin(resNulls, inputNulls, newObj, intObj,
- left, newObjNull);
- } else if (type == JoinDesc.RIGHT_OUTER_JOIN) {
- return joinObjectsRightOuterJoin(resNulls, inputNulls, newObj, intObj,
- left, newObjNull, firstRow);
- } else if (type == JoinDesc.LEFT_SEMI_JOIN) {
- return joinObjectsLeftSemiJoin(resNulls, inputNulls, newObj, intObj,
- left, newObjNull);
- }
- assert (type == JoinDesc.FULL_OUTER_JOIN);
- return joinObjectsFullOuterJoin(resNulls, inputNulls, newObj, intObj, left,
- newObjNull, firstRow);
- }
- /*
- * genObject is a recursive function. For the inputs, a array of bitvectors is
- * maintained (inputNulls) where each entry denotes whether the element is to
- * be used or not (whether it is null or not). The size of the bitvector is
- * same as the number of inputs under consideration currently. When all inputs
- * are accounted for, the output is forwarded appropriately.
- */
- private void genObject(ArrayList<boolean[]> inputNulls, int aliasNum,
- IntermediateObject intObj, boolean firstRow) throws HiveException {
- boolean childFirstRow = firstRow;
- boolean skipping = false;
- if (aliasNum < numAliases) {
- // search for match in the rhs table
- AbstractRowContainer<ArrayList<Object>> aliasRes = storage.get(order[aliasNum]);
- for (ArrayList<Object> newObj = aliasRes.first(); newObj != null; newObj = aliasRes
- .next()) {
- // check for skipping in case of left semi join
- if (aliasNum > 0
- && condn[aliasNum - 1].getType() == JoinDesc.LEFT_SEMI_JOIN
- && newObj != dummyObj[aliasNum]) { // successful match
- skipping = true;
- }
- intObj.pushObj(newObj);
- // execute the actual join algorithm
- ArrayList<boolean[]> newNulls = joinObjects(inputNulls, newObj, intObj,
- aliasNum, childFirstRow);
- // recursively call the join the other rhs tables
- genObject(newNulls, aliasNum + 1, intObj, firstRow);
- intObj.popObj();
- firstRow = false;
- // if left-semi-join found a match, skipping the rest of the rows in the
- // rhs table of the semijoin
- if (skipping) {
- break;
- }
- }
- } else {
- if (inputNulls == null) {
- return;
- }
- Iterator<boolean[]> nullsIter = inputNulls.iterator();
- while (nullsIter.hasNext()) {
- boolean[] nullsVec = nullsIter.next();
- createForwardJoinObject(intObj, nullsVec);
- }
- }
- }
- /**
- * Forward a record of join results.
- *
- * @throws HiveException
- */
- @Override
- public void endGroup() throws HiveException {
- LOG.trace("Join Op: endGroup called: numValues=" + numAliases);
- checkAndGenObject();
- }
- private void genUniqueJoinObject(int aliasNum, int forwardCachePos)
- throws HiveException {
- AbstractRowContainer<ArrayList<Object>> alias = storage.get(order[aliasNum]);
- for (ArrayList<Object> row = alias.first(); row != null; row = alias.next()) {
- int sz = joinValues.get(order[aliasNum]).size();
- int p = forwardCachePos;
- for (int j = 0; j < sz; j++) {
- forwardCache[p++] = row.get(j);
- }
- if (aliasNum == numAliases - 1) {
- forward(forwardCache, outputObjInspector);
- countAfterReport = 0;
- } else {
- genUniqueJoinObject(aliasNum + 1, p);
- }
- }
- }
- private void genAllOneUniqueJoinObject()
- throws HiveException {
- int p = 0;
- for (int i = 0; i < numAliases; i++) {
- int sz = joinValues.get(order[i]).size();
- ArrayList<Object> obj = storage.get(order[i]).first();
- for (int j = 0; j < sz; j++) {
- forwardCache[p++] = obj.get(j);
- }
- }
- forward(forwardCache, outputObjInspector);
- countAfterReport = 0;
- }
- protected void checkAndGenObject() throws HiveException {
- if (condn[0].getType() == JoinDesc.UNIQUE_JOIN) {
- new IntermediateObject(new ArrayList[numAliases], 0);
- // Check if results need to be emitted.
- // Results only need to be emitted if there is a non-null entry in a table
- // that is preserved or if there are no non-null entries
- boolean preserve = false; // Will be true if there is a non-null entry
- // in a preserved table
- boolean hasNulls = false; // Will be true if there are null entries
- boolean allOne = true;
- for (int i = 0; i < numAliases; i++) {
- Byte alias = order[i];
- AbstractRowContainer<ArrayList<Object>> alw = storage.get(alias);
- if (alw.size() != 1) {
- allOne = false;
- }
- if (alw.size() == 0) {
- alw.add((ArrayList<Object>) dummyObj[i]);
- hasNulls = true;
- } else if (condn[i].getPreserved()) {
- preserve = true;
- }
- }
- if (hasNulls && !preserve) {
- return;
- }
- if (allOne) {
- LOG.info("calling genAllOneUniqueJoinObject");
- genAllOneUniqueJoinObject();
- LOG.info("called genAllOneUniqueJoinObject");
- } else {
- LOG.trace("calling genUniqueJoinObject");
- genUniqueJoinObject(0, 0);
- LOG.trace("called genUniqueJoinObject");
- }
- } else {
- // does any result need to be emitted
- boolean mayHasMoreThanOne = false;
- boolean hasEmpty = false;
- for (int i = 0; i < numAliases; i++) {
- Byte alias = order[i];
- AbstractRowContainer<ArrayList<Object>> alw = storage.get(alias);
- if (noOuterJoin) {
- if (alw.size() == 0) {
- LOG.trace("No data for alias=" + i);
- return;
- } else if (alw.size() > 1) {
- mayHasMoreThanOne = true;
- }
- } else {
- if (alw.size() == 0) {
- hasEmpty = true;
- alw.add((ArrayList<Object>) dummyObj[i]);
- } else if (!hasEmpty && alw.size() == 1) {
- ArrayList<Object> row = alw.first();
- int numValues = joinValues.get(alias).size();
- if (row == dummyObj[alias]
- || (row.size() > numValues && ((BooleanWritable) (row.get(numValues))).get())) {
- hasEmpty = true;
- }
- } else {
- mayHasMoreThanOne = true;
- if (!hasEmpty) {
- int numValues = joinValues.get(alias).size();
- for (ArrayList<Object> row = alw.first(); row != null; row = alw.next()) {
- if (row == dummyObj[alias]
- || (row.size() > numValues && ((BooleanWritable) (row.get(numValues))).get())) {
- hasEmpty = true;
- break;
- }
- }
- }
- }
- }
- }
- if (!hasEmpty && !mayHasMoreThanOne) {
- LOG.trace("calling genAllOneUniqueJoinObject");
- genAllOneUniqueJoinObject();
- LOG.trace("called genAllOneUniqueJoinObject");
- } else if (!hasEmpty) {
- LOG.trace("calling genUniqueJoinObject");
- genUniqueJoinObject(0, 0);
- LOG.trace("called genUniqueJoinObject");
- } else {
- LOG.trace("calling genObject");
- genObject(null, 0, new IntermediateObject(new ArrayList[numAliases], 0),
- true);
- LOG.trace("called genObject");
- }
- }
- }
- protected void reportProgress() {
- // Send some status periodically
- countAfterReport++;
- if ((countAfterReport % heartbeatInterval) == 0
- && (reporter != null)) {
- reporter.progress();
- countAfterReport = 0;
- }
- }
- /**
- * Returns true if the row does not pass through filters.
- */
- protected static Boolean isFiltered(Object row,
- List<ExprNodeEvaluator> filters, List<ObjectInspector> ois)
- throws HiveException {
- // apply join filters on the row.
- Boolean ret = false;
- for (int j = 0; j < filters.size(); j++) {
- Object condition = filters.get(j).evaluate(row);
- ret = (Boolean) ((PrimitiveObjectInspector)
- ois.get(j)).getPrimitiveJavaObject(condition);
- if (ret == null || !ret) {
- return true;
- }
- }
- return false;
- }
- /**
- * All done.
- *
- */
- @Override
- public void closeOp(boolean abort) throws HiveException {
- LOG.trace("Join Op close");
- for (AbstractRowContainer<ArrayList<Object>> alw : storage.values()) {
- if (alw != null) {
- alw.clear(); // clean up the temp files
- }
- }
- storage.clear();
- }
- @Override
- public String getName() {
- return "JOIN";
- }
- /**
- * @return the posToAliasMap
- */
- public Map<Integer, Set<String>> getPosToAliasMap() {
- return posToAliasMap;
- }
- /**
- * @param posToAliasMap
- * the posToAliasMap to set
- */
- public void setPosToAliasMap(Map<Integer, Set<String>> posToAliasMap) {
- this.posToAliasMap = posToAliasMap;
- }
- }