PageRenderTime 79ms CodeModel.GetById 17ms app.highlight 56ms RepoModel.GetById 2ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java

#
Java | 601 lines | 476 code | 63 blank | 62 comment | 126 complexity | 963c5d1aa35d22db119bcb983661a813 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.You may obtain a copy of the License at
  9 *
 10 * http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18package org.apache.hadoop.hive.ql.exec;
 19
 20import java.io.Serializable;
 21import java.util.ArrayList;
 22import java.util.HashMap;
 23import java.util.Iterator;
 24import java.util.List;
 25import java.util.Map;
 26
 27import org.apache.commons.logging.Log;
 28import org.apache.commons.logging.LogFactory;
 29import org.apache.hadoop.conf.Configuration;
 30import org.apache.hadoop.fs.Path;
 31import org.apache.hadoop.hive.conf.HiveConf;
 32import org.apache.hadoop.hive.ql.exec.persistence.RowContainer;
 33import org.apache.hadoop.hive.ql.metadata.HiveException;
 34import org.apache.hadoop.hive.ql.plan.FetchWork;
 35import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 36import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
 37import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
 38import org.apache.hadoop.hive.ql.plan.MapredLocalWork.BucketMapJoinContext;
 39import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 40import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 41import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
 42import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 43import org.apache.hadoop.io.WritableComparable;
 44import org.apache.hadoop.io.WritableComparator;
 45import org.apache.hadoop.mapred.JobConf;
 46import org.apache.hadoop.util.ReflectionUtils;
 47
 48/**
 49 * Sorted Merge Map Join Operator.
 50 */
 51public class SMBMapJoinOperator extends AbstractMapJoinOperator<SMBJoinDesc> implements
 52    Serializable {
 53
 54  private static final long serialVersionUID = 1L;
 55
 56  private static final Log LOG = LogFactory.getLog(SMBMapJoinOperator.class
 57      .getName());
 58
 59  private MapredLocalWork localWork = null;
 60  private Map<String, FetchOperator> fetchOperators;
 61  transient ArrayList<Object>[] keyWritables;
 62  transient ArrayList<Object>[] nextKeyWritables;
 63  RowContainer<ArrayList<Object>>[] nextGroupStorage;
 64  RowContainer<ArrayList<Object>>[] candidateStorage;
 65
 66  transient Map<Byte, String> tagToAlias;
 67  private transient boolean[] fetchOpDone;
 68  private transient boolean[] foundNextKeyGroup;
 69  transient boolean firstFetchHappened = false;
 70  private transient boolean inputFileChanged = false;
 71  transient boolean localWorkInited = false;
 72
 73  public SMBMapJoinOperator() {
 74  }
 75
 76  public SMBMapJoinOperator(AbstractMapJoinOperator<? extends MapJoinDesc> mapJoinOp) {
 77    super(mapJoinOp);
 78  }
 79
 80  @Override
 81  protected void initializeOp(Configuration hconf) throws HiveException {
 82    super.initializeOp(hconf);
 83
 84    firstRow = true;
 85
 86    closeCalled = false;
 87
 88    this.firstFetchHappened = false;
 89    this.inputFileChanged = false;
 90
 91    // get the largest table alias from order
 92    int maxAlias = 0;
 93    for (Byte alias: order) {
 94      if (alias > maxAlias) {
 95        maxAlias = alias;
 96      }
 97    }
 98    maxAlias += 1;
 99
100    nextGroupStorage = new RowContainer[maxAlias];
101    candidateStorage = new RowContainer[maxAlias];
102    keyWritables = new ArrayList[maxAlias];
103    nextKeyWritables = new ArrayList[maxAlias];
104    fetchOpDone = new boolean[maxAlias];
105    foundNextKeyGroup = new boolean[maxAlias];
106
107    int bucketSize = HiveConf.getIntVar(hconf,
108        HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE);
109    byte storePos = (byte) 0;
110    for (Byte alias : order) {
111      RowContainer rc = JoinUtil.getRowContainer(hconf,
112          rowContainerStandardObjectInspectors.get(storePos),
113          alias, bucketSize,spillTableDesc, conf,noOuterJoin);
114      nextGroupStorage[storePos] = rc;
115      RowContainer candidateRC = JoinUtil.getRowContainer(hconf,
116          rowContainerStandardObjectInspectors.get((byte)storePos),
117          alias,bucketSize,spillTableDesc, conf,noOuterJoin);
118      candidateStorage[alias] = candidateRC;
119      storePos++;
120    }
121    tagToAlias = conf.getTagToAlias();
122
123    for (Byte alias : order) {
124      if(alias != (byte) posBigTable) {
125        fetchOpDone[alias] = false;
126      }
127      foundNextKeyGroup[alias] = false;
128    }
129  }
130
131  @Override
132  public void initializeLocalWork(Configuration hconf) throws HiveException {
133    initializeMapredLocalWork(this.getConf(), hconf, this.getConf().getLocalWork(), LOG);
134    super.initializeLocalWork(hconf);
135  }
136
137  public void initializeMapredLocalWork(MapJoinDesc conf, Configuration hconf,
138      MapredLocalWork localWork, Log l4j) throws HiveException {
139    if (localWork == null || localWorkInited) {
140      return;
141    }
142    localWorkInited = true;
143    this.localWork = localWork;
144    fetchOperators = new HashMap<String, FetchOperator>();
145
146    Map<FetchOperator, JobConf> fetchOpJobConfMap = new HashMap<FetchOperator, JobConf>();
147    // create map local operators
148    for (Map.Entry<String, FetchWork> entry : localWork.getAliasToFetchWork()
149        .entrySet()) {
150      JobConf jobClone = new JobConf(hconf);
151      Operator<? extends Serializable> tableScan = localWork.getAliasToWork()
152      .get(entry.getKey());
153      if(tableScan instanceof TableScanOperator) {
154        ArrayList<Integer> list = ((TableScanOperator)tableScan).getNeededColumnIDs();
155        if (list != null) {
156          ColumnProjectionUtils.appendReadColumnIDs(jobClone, list);
157        }
158      } else {
159        ColumnProjectionUtils.setFullyReadColumns(jobClone);
160      }
161      FetchOperator fetchOp = new FetchOperator(entry.getValue(),jobClone);
162      fetchOpJobConfMap.put(fetchOp, jobClone);
163      fetchOperators.put(entry.getKey(), fetchOp);
164      l4j.info("fetchoperator for " + entry.getKey() + " created");
165    }
166
167    for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
168      Operator<? extends Serializable> forwardOp = localWork.getAliasToWork()
169          .get(entry.getKey());
170      // All the operators need to be initialized before process
171      forwardOp.setExecContext(this.getExecContext());
172      FetchOperator fetchOp = entry.getValue();
173      JobConf jobConf = fetchOpJobConfMap.get(fetchOp);
174      if (jobConf == null) {
175        jobConf = this.getExecContext().getJc();
176      }
177      forwardOp.initialize(jobConf, new ObjectInspector[] {fetchOp.getOutputObjectInspector()});
178      l4j.info("fetchoperator for " + entry.getKey() + " initialized");
179    }
180  }
181
182  // The input file has changed - load the correct hash bucket
183  @Override
184  public void cleanUpInputFileChangedOp() throws HiveException {
185    inputFileChanged = true;
186  }
187
188  @Override
189  public void processOp(Object row, int tag) throws HiveException {
190
191    if (tag == posBigTable) {
192      if (inputFileChanged) {
193        if (firstFetchHappened) {
194          // we need to first join and flush out data left by the previous file.
195          joinFinalLeftData();
196        }
197        // set up the fetch operator for the new input file.
198        for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
199          String alias = entry.getKey();
200          FetchOperator fetchOp = entry.getValue();
201          fetchOp.clearFetchContext();
202          setUpFetchOpContext(fetchOp, alias);
203        }
204        firstFetchHappened = false;
205        inputFileChanged = false;
206      }
207    }
208
209    if (!firstFetchHappened) {
210      firstFetchHappened = true;
211      // fetch the first group for all small table aliases
212      for (Byte t : order) {
213        if(t != (byte)posBigTable) {
214          fetchNextGroup(t);
215        }
216      }
217    }
218
219    byte alias = (byte) tag;
220    // compute keys and values as StandardObjects
221
222    // compute keys and values as StandardObjects
223    ArrayList<Object> key = JoinUtil.computeKeys(row, joinKeys.get(alias),
224        joinKeysObjectInspectors.get(alias));
225    ArrayList<Object> value = JoinUtil.computeValues(row, joinValues.get(alias),
226        joinValuesObjectInspectors.get(alias), joinFilters.get(alias),
227        joinFilterObjectInspectors.get(alias), noOuterJoin);
228
229
230    //have we reached a new key group?
231    boolean nextKeyGroup = processKey(alias, key);
232    if (nextKeyGroup) {
233      //assert this.nextGroupStorage.get(alias).size() == 0;
234      this.nextGroupStorage[alias].add(value);
235      foundNextKeyGroup[tag] = true;
236      if (tag != posBigTable) {
237        return;
238      }
239    }
240
241    reportProgress();
242    numMapRowsRead++;
243
244    // the big table has reached a new key group. try to let the small tables
245    // catch up with the big table.
246    if (nextKeyGroup) {
247      assert tag == (byte)posBigTable;
248      List<Byte> smallestPos = null;
249      do {
250        smallestPos = joinOneGroup();
251        //jump out the loop if we need input from the big table
252      } while (smallestPos != null && smallestPos.size() > 0
253          && !smallestPos.contains((byte)this.posBigTable));
254
255      return;
256    }
257
258    assert !nextKeyGroup;
259    candidateStorage[tag].add(value);
260  }
261
262  /*
263   * this happens either when the input file of the big table is changed or in
264   * closeop. It needs to fetch all the left data from the small tables and try
265   * to join them.
266   */
267  private void joinFinalLeftData() throws HiveException {
268    RowContainer bigTblRowContainer = this.candidateStorage[this.posBigTable];
269
270    boolean allFetchOpDone = allFetchOpDone();
271    // if all left data in small tables are less than and equal to the left data
272    // in big table, let's them catch up
273    while (bigTblRowContainer != null && bigTblRowContainer.size() > 0
274        && !allFetchOpDone) {
275      joinOneGroup();
276      bigTblRowContainer = this.candidateStorage[this.posBigTable];
277      allFetchOpDone = allFetchOpDone();
278    }
279
280    while (!allFetchOpDone) {
281      List<Byte> ret = joinOneGroup();
282      if (ret == null || ret.size() == 0) {
283        break;
284      }
285      reportProgress();
286      numMapRowsRead++;
287      allFetchOpDone = allFetchOpDone();
288    }
289
290    boolean dataInCache = true;
291    while (dataInCache) {
292      for (byte t : order) {
293        if (this.foundNextKeyGroup[t]
294            && this.nextKeyWritables[t] != null) {
295          promoteNextGroupToCandidate(t);
296        }
297      }
298      joinOneGroup();
299      dataInCache = false;
300      for (byte r : order) {
301        if (this.candidateStorage[r].size() > 0) {
302          dataInCache = true;
303          break;
304        }
305      }
306    }
307  }
308
309  private boolean allFetchOpDone() {
310    boolean allFetchOpDone = true;
311    for (Byte tag : order) {
312      if(tag == (byte) posBigTable) {
313        continue;
314      }
315      allFetchOpDone = allFetchOpDone && fetchOpDone[tag];
316    }
317    return allFetchOpDone;
318  }
319
320  private List<Byte> joinOneGroup() throws HiveException {
321    int smallestPos = -1;
322    smallestPos = findSmallestKey();
323    List<Byte> listOfNeedFetchNext = null;
324    if(smallestPos >= 0) {
325      listOfNeedFetchNext = joinObject(smallestPos);
326      if (listOfNeedFetchNext.size() > 0) {
327        // listOfNeedFetchNext contains all tables that we have joined data in their
328        // candidateStorage, and we need to clear candidate storage and promote their
329        // nextGroupStorage to candidateStorage and fetch data until we reach a
330        // new group.
331        for (Byte b : listOfNeedFetchNext) {
332          fetchNextGroup(b);
333        }
334      }
335    }
336    return listOfNeedFetchNext;
337  }
338
339  private List<Byte> joinObject(int smallestPos) throws HiveException {
340    List<Byte> needFetchList = new ArrayList<Byte>();
341    ArrayList<Object> smallKey = keyWritables[smallestPos];
342    needFetchList.add((byte)smallestPos);
343    this.storage.put((byte) smallestPos, this.candidateStorage[smallestPos]);
344    for (Byte i : order) {
345      if ((byte) smallestPos == i) {
346        continue;
347      }
348      ArrayList<Object> key = keyWritables[i];
349      if (key == null) {
350        putDummyOrEmpty(i);
351      } else {
352        int cmp = compareKeys(key, smallKey);
353        if (cmp == 0) {
354          this.storage.put((byte) i, this.candidateStorage[i]);
355          needFetchList.add(i);
356          continue;
357        } else {
358          putDummyOrEmpty(i);
359        }
360      }
361    }
362    checkAndGenObject();
363    for (Byte pos : needFetchList) {
364      this.candidateStorage[pos].clear();
365      this.keyWritables[pos] = null;
366    }
367    return needFetchList;
368  }
369
370  private void fetchNextGroup(Byte t) throws HiveException {
371    if (foundNextKeyGroup[t]) {
372      // first promote the next group to be the current group if we reached a
373      // new group in the previous fetch
374      if (this.nextKeyWritables[t] != null) {
375        promoteNextGroupToCandidate(t);
376      } else {
377        this.keyWritables[t] = null;
378        this.candidateStorage[t] = null;
379        this.nextGroupStorage[t] = null;
380      }
381      foundNextKeyGroup[t] = false;
382    }
383    //for the big table, we only need to promote the next group to the current group.
384    if(t == (byte)posBigTable) {
385      return;
386    }
387
388    //for tables other than the big table, we need to fetch more data until reach a new group or done.
389    while (!foundNextKeyGroup[t]) {
390      if (fetchOpDone[t]) {
391        break;
392      }
393      fetchOneRow(t);
394    }
395    if (!foundNextKeyGroup[t] && fetchOpDone[t]) {
396      this.nextKeyWritables[t] = null;
397    }
398  }
399
400  private void promoteNextGroupToCandidate(Byte t) throws HiveException {
401    this.keyWritables[t] = this.nextKeyWritables[t];
402    this.nextKeyWritables[t] = null;
403    RowContainer<ArrayList<Object>> oldRowContainer = this.candidateStorage[t];
404    oldRowContainer.clear();
405    this.candidateStorage[t] = this.nextGroupStorage[t];
406    this.nextGroupStorage[t] = oldRowContainer;
407  }
408
409  private int compareKeys (ArrayList<Object> k1, ArrayList<Object> k2) {
410    int ret = 0;
411
412   // join keys have difference sizes?
413    ret = k1.size() - k2.size();
414    if (ret != 0) {
415      return ret;
416    }
417
418    for (int i = 0; i < k1.size(); i++) {
419      WritableComparable key_1 = (WritableComparable) k1.get(i);
420      WritableComparable key_2 = (WritableComparable) k2.get(i);
421      if (key_1 == null && key_2 == null) {
422        return -1; // just return k1 is smaller than k2
423      } else if (key_1 == null) {
424        return -1;
425      } else if (key_2 == null) {
426        return 1;
427      }
428      ret = WritableComparator.get(key_1.getClass()).compare(key_1, key_2);
429      if(ret != 0) {
430        return ret;
431      }
432    }
433    return ret;
434  }
435
436  private void putDummyOrEmpty(Byte i) {
437    // put a empty list or null
438    if (noOuterJoin) {
439      storage.put(i, emptyList);
440    } else {
441      storage.put(i, dummyObjVectors[i.intValue()]);
442    }
443  }
444
445  private int findSmallestKey() {
446    byte index = -1;
447    ArrayList<Object> smallestOne = null;
448
449    for (byte i : order) {
450      ArrayList<Object> key = keyWritables[i];
451      if (key == null) {
452        continue;
453      }
454      if (smallestOne == null) {
455        smallestOne = key;
456        index = i;
457        continue;
458      }
459      int cmp = compareKeys(key, smallestOne);
460      if (cmp < 0) {
461        smallestOne = key;
462        index = i;
463        continue;
464      }
465    }
466    return index;
467  }
468
469  private boolean processKey(byte alias, ArrayList<Object> key)
470      throws HiveException {
471    ArrayList<Object> keyWritable = keyWritables[alias];
472    if (keyWritable == null) {
473      //the first group.
474      keyWritables[alias] = key;
475      return false;
476    } else {
477      int cmp = compareKeys(key, keyWritable);
478      if (cmp != 0) {
479        nextKeyWritables[alias] = key;
480        return true;
481      }
482      return false;
483    }
484  }
485
486  private void setUpFetchOpContext(FetchOperator fetchOp, String alias) {
487    String currentInputFile = this.getExecContext().getCurrentInputFile();
488    BucketMapJoinContext bucketMatcherCxt = this.localWork
489        .getBucketMapjoinContext();
490    Class<? extends BucketMatcher> bucketMatcherCls = bucketMatcherCxt
491        .getBucketMatcherClass();
492    BucketMatcher bucketMatcher = (BucketMatcher) ReflectionUtils.newInstance(
493        bucketMatcherCls, null);
494    this.getExecContext().setFileId(bucketMatcherCxt.getBucketFileNameMapping().get(currentInputFile));
495    LOG.info("set task id: " + this.getExecContext().getFileId());
496
497    bucketMatcher.setAliasBucketFileNameMapping(bucketMatcherCxt
498        .getAliasBucketFileNameMapping());
499    List<Path> aliasFiles = bucketMatcher.getAliasBucketFiles(currentInputFile,
500        bucketMatcherCxt.getMapJoinBigTableAlias(), alias);
501    Iterator<Path> iter = aliasFiles.iterator();
502    fetchOp.setupContext(iter, null);
503  }
504
505  private void fetchOneRow(byte tag) {
506    if (fetchOperators != null) {
507      String tble = this.tagToAlias.get(tag);
508      FetchOperator fetchOp = fetchOperators.get(tble);
509
510      Operator<? extends Serializable> forwardOp = localWork.getAliasToWork()
511          .get(tble);
512      try {
513        InspectableObject row = fetchOp.getNextRow();
514        if (row == null) {
515          this.fetchOpDone[tag] = true;
516          return;
517        }
518        forwardOp.process(row.o, 0);
519        // check if any operator had a fatal error or early exit during
520        // execution
521        if (forwardOp.getDone()) {
522          this.fetchOpDone[tag] = true;
523        }
524      } catch (Throwable e) {
525        if (e instanceof OutOfMemoryError) {
526          // Don't create a new object if we are already out of memory
527          throw (OutOfMemoryError) e;
528        } else {
529          throw new RuntimeException("Map local work failed", e);
530        }
531      }
532    }
533  }
534
535  transient boolean closeCalled = false;
536  @Override
537  public void closeOp(boolean abort) throws HiveException {
538    if(closeCalled) {
539      return;
540    }
541    closeCalled = true;
542
543    if (inputFileChanged || !firstFetchHappened) {
544      //set up the fetch operator for the new input file.
545      for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
546        String alias = entry.getKey();
547        FetchOperator fetchOp = entry.getValue();
548        fetchOp.clearFetchContext();
549        setUpFetchOpContext(fetchOp, alias);
550      }
551      firstFetchHappened = true;
552      for (Byte t : order) {
553        if(t != (byte)posBigTable) {
554          fetchNextGroup(t);
555        }
556      }
557      inputFileChanged = false;
558    }
559
560    joinFinalLeftData();
561
562    //clean up
563    for (Byte alias : order) {
564      if(alias != (byte) posBigTable) {
565        fetchOpDone[alias] = false;
566      }
567      foundNextKeyGroup[alias] = false;
568    }
569
570    localWorkInited = false;
571
572    super.closeOp(abort);
573    if (fetchOperators != null) {
574      for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
575        Operator<? extends Serializable> forwardOp = localWork
576            .getAliasToWork().get(entry.getKey());
577        forwardOp.close(abort);
578      }
579    }
580  }
581
582  @Override
583  protected boolean allInitializedParentsAreClosed() {
584    return true;
585  }
586
587  /**
588   * Implements the getName function for the Node Interface.
589   *
590   * @return the name of the operator
591   */
592  @Override
593  public String getName() {
594    return "MAPJOIN";
595  }
596
597  @Override
598  public OperatorType getType() {
599    return OperatorType.MAPJOIN;
600  }
601}