PageRenderTime 56ms CodeModel.GetById 26ms app.highlight 25ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java

#
Java | 317 lines | 223 code | 55 blank | 39 comment | 38 complexity | fc25693d795272bb278fec5d080b846f MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.exec;
 20
 21import java.io.Serializable;
 22import java.util.ArrayList;
 23import java.util.HashMap;
 24import java.util.Map;
 25
 26import org.apache.commons.logging.Log;
 27import org.apache.commons.logging.LogFactory;
 28import org.apache.hadoop.conf.Configuration;
 29import org.apache.hadoop.filecache.DistributedCache;
 30import org.apache.hadoop.fs.FileSystem;
 31import org.apache.hadoop.fs.Path;
 32import org.apache.hadoop.hive.conf.HiveConf;
 33import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx;
 34import org.apache.hadoop.hive.ql.exec.persistence.AbstractMapJoinKey;
 35import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper;
 36import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectValue;
 37import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer;
 38import org.apache.hadoop.hive.ql.metadata.HiveException;
 39import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 40import org.apache.hadoop.hive.ql.plan.TableDesc;
 41import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 42import org.apache.hadoop.hive.serde2.SerDe;
 43import org.apache.hadoop.hive.serde2.SerDeException;
 44import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 45import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 46import org.apache.hadoop.util.ReflectionUtils;
 47
 48/**
 49 * Map side Join operator implementation.
 50 */
 51public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implements Serializable {
 52  private static final long serialVersionUID = 1L;
 53  private static final Log LOG = LogFactory.getLog(MapJoinOperator.class.getName());
 54
 55
 56  protected transient Map<Byte, HashMapWrapper<AbstractMapJoinKey, MapJoinObjectValue>> mapJoinTables;
 57
 58  private static final transient String[] FATAL_ERR_MSG = {
 59      null, // counter value 0 means no error
 60      "Mapside join size exceeds hive.mapjoin.maxsize. "
 61          + "Please increase that or remove the mapjoin hint."};
 62
 63  protected transient Map<Byte, MapJoinRowContainer<ArrayList<Object>>> rowContainerMap;
 64  transient int metadataKeyTag;
 65  transient int[] metadataValueTag;
 66  transient int maxMapJoinSize;
 67  private int bigTableAlias;
 68
 69  public MapJoinOperator() {
 70  }
 71
 72  public MapJoinOperator(AbstractMapJoinOperator<? extends MapJoinDesc> mjop) {
 73    super(mjop);
 74  }
 75
 76  @Override
 77  protected void initializeOp(Configuration hconf) throws HiveException {
 78
 79    super.initializeOp(hconf);
 80
 81    maxMapJoinSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAXMAPJOINSIZE);
 82
 83    metadataValueTag = new int[numAliases];
 84    for (int pos = 0; pos < numAliases; pos++) {
 85      metadataValueTag[pos] = -1;
 86    }
 87
 88    metadataKeyTag = -1;
 89    bigTableAlias = order[posBigTable];
 90
 91    mapJoinTables = new HashMap<Byte, HashMapWrapper<AbstractMapJoinKey, MapJoinObjectValue>>();
 92    rowContainerMap = new HashMap<Byte, MapJoinRowContainer<ArrayList<Object>>>();
 93    // initialize the hash tables for other tables
 94    for (int pos = 0; pos < numAliases; pos++) {
 95      if (pos == posBigTable) {
 96        continue;
 97      }
 98
 99      HashMapWrapper<AbstractMapJoinKey, MapJoinObjectValue> hashTable = new HashMapWrapper<AbstractMapJoinKey, MapJoinObjectValue>();
100
101      mapJoinTables.put(Byte.valueOf((byte) pos), hashTable);
102      MapJoinRowContainer<ArrayList<Object>> rowContainer = new MapJoinRowContainer<ArrayList<Object>>();
103      rowContainerMap.put(Byte.valueOf((byte) pos), rowContainer);
104    }
105
106
107  }
108
109  @Override
110  protected void fatalErrorMessage(StringBuilder errMsg, long counterCode) {
111    errMsg.append("Operator " + getOperatorId() + " (id=" + id + "): "
112        + FATAL_ERR_MSG[(int) counterCode]);
113  }
114
115  public void generateMapMetaData() throws HiveException, SerDeException {
116    // generate the meta data for key
117    // index for key is -1
118    TableDesc keyTableDesc = conf.getKeyTblDesc();
119    SerDe keySerializer = (SerDe) ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(),
120        null);
121    keySerializer.initialize(null, keyTableDesc.getProperties());
122    MapJoinMetaData.put(Integer.valueOf(metadataKeyTag), new HashTableSinkObjectCtx(
123        ObjectInspectorUtils.getStandardObjectInspector(keySerializer.getObjectInspector(),
124            ObjectInspectorCopyOption.WRITABLE), keySerializer, keyTableDesc, hconf));
125
126    // index for values is just alias
127    for (int tag = 0; tag < order.length; tag++) {
128      int alias = (int) order[tag];
129
130      if (alias == this.bigTableAlias) {
131        continue;
132      }
133
134
135      TableDesc valueTableDesc = conf.getValueTblDescs().get(tag);
136      SerDe valueSerDe = (SerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(),
137          null);
138      valueSerDe.initialize(null, valueTableDesc.getProperties());
139
140      MapJoinMetaData.put(Integer.valueOf(alias), new HashTableSinkObjectCtx(ObjectInspectorUtils
141          .getStandardObjectInspector(valueSerDe.getObjectInspector(),
142              ObjectInspectorCopyOption.WRITABLE), valueSerDe, valueTableDesc, hconf));
143    }
144  }
145
146  private void loadHashTable() throws HiveException {
147    boolean localMode = HiveConf.getVar(hconf, HiveConf.ConfVars.HADOOPJT).equals("local");
148    String baseDir = null;
149    HashMapWrapper<AbstractMapJoinKey, MapJoinObjectValue> hashtable;
150    Byte pos;
151
152    String currentInputFile = HiveConf.getVar(hconf, HiveConf.ConfVars.HADOOPMAPFILENAME);
153    LOG.info("******* Load from HashTable File: input : " + currentInputFile);
154
155    String currentFileName;
156
157    if (this.getExecContext().getLocalWork().getInputFileChangeSensitive()) {
158      currentFileName = this.getFileName(currentInputFile);
159    } else {
160      currentFileName = "-";
161    }
162
163    try {
164      if (localMode) {
165        baseDir = this.getExecContext().getLocalWork().getTmpFileURI();
166      } else {
167        Path[] localArchives;
168        String stageID = this.getExecContext().getLocalWork().getStageID();
169        String suffix = Utilities.generateTarFileName(stageID);
170        FileSystem localFs = FileSystem.getLocal(hconf);
171        localArchives = DistributedCache.getLocalCacheArchives(this.hconf);
172        Path archive;
173        for (int j = 0; j < localArchives.length; j++) {
174          archive = localArchives[j];
175          if (!archive.getName().endsWith(suffix)) {
176            continue;
177          }
178          Path archiveLocalLink = archive.makeQualified(localFs);
179          baseDir = archiveLocalLink.toUri().getPath();
180        }
181      }
182      for (Map.Entry<Byte, HashMapWrapper<AbstractMapJoinKey, MapJoinObjectValue>> entry : mapJoinTables
183          .entrySet()) {
184        pos = entry.getKey();
185        hashtable = entry.getValue();
186        String filePath = Utilities.generatePath(baseDir, pos, currentFileName);
187        Path path = new Path(filePath);
188        LOG.info("\tLoad back 1 hashtable file from tmp file uri:" + path.toString());
189        hashtable.initilizePersistentHash(path.toUri().getPath());
190      }
191    } catch (Exception e) {
192      LOG.error("Load Distributed Cache Error");
193      throw new HiveException(e.getMessage());
194    }
195  }
196
197  // Load the hash table
198  @Override
199  public void cleanUpInputFileChangedOp() throws HiveException {
200    try {
201      if (firstRow) {
202        // generate the map metadata
203        generateMapMetaData();
204        firstRow = false;
205      }
206
207      loadHashTable();
208    } catch (SerDeException e) {
209      e.printStackTrace();
210      throw new HiveException(e);
211    }
212  }
213
214  @Override
215  public void processOp(Object row, int tag) throws HiveException {
216
217    try {
218      if (firstRow) {
219        // generate the map metadata
220        generateMapMetaData();
221        firstRow = false;
222      }
223
224      // get alias
225      alias = order[tag];
226      // alias = (byte)tag;
227
228      if ((lastAlias == null) || (!lastAlias.equals(alias))) {
229        nextSz = joinEmitInterval;
230      }
231
232      // compute keys and values as StandardObjects
233      AbstractMapJoinKey key = JoinUtil.computeMapJoinKeys(row, joinKeys.get(alias),
234          joinKeysObjectInspectors.get(alias));
235      ArrayList<Object> value = JoinUtil.computeValues(row, joinValues.get(alias),
236          joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors
237              .get(alias), noOuterJoin);
238
239
240      // Add the value to the ArrayList
241      storage.get((byte) tag).add(value);
242
243      for (Byte pos : order) {
244        if (pos.intValue() != tag) {
245
246          MapJoinObjectValue o = mapJoinTables.get(pos).get(key);
247          MapJoinRowContainer<ArrayList<Object>> rowContainer = rowContainerMap.get(pos);
248
249          // there is no join-value or join-key has all null elements
250          if (o == null || key.hasAnyNulls()) {
251            if (noOuterJoin) {
252              storage.put(pos, emptyList);
253            } else {
254              storage.put(pos, dummyObjVectors[pos.intValue()]);
255            }
256          } else {
257            rowContainer.reset(o.getObj());
258            storage.put(pos, rowContainer);
259          }
260        }
261      }
262
263      // generate the output records
264      checkAndGenObject();
265
266      // done with the row
267      storage.get((byte) tag).clear();
268
269      for (Byte pos : order) {
270        if (pos.intValue() != tag) {
271          storage.put(pos, null);
272        }
273      }
274
275    } catch (SerDeException e) {
276      e.printStackTrace();
277      throw new HiveException(e);
278    }
279  }
280
281  private String getFileName(String path) {
282    if (path == null || path.length() == 0) {
283      return null;
284    }
285
286    int last_separator = path.lastIndexOf(Path.SEPARATOR) + 1;
287    String fileName = path.substring(last_separator);
288    return fileName;
289
290  }
291
292  @Override
293  public void closeOp(boolean abort) throws HiveException {
294
295    if (mapJoinTables != null) {
296      for (HashMapWrapper hashTable : mapJoinTables.values()) {
297        hashTable.close();
298      }
299    }
300    super.closeOp(abort);
301  }
302
303  /**
304   * Implements the getName function for the Node Interface.
305   *
306   * @return the name of the operator
307   */
308  @Override
309  public String getName() {
310    return "MAPJOIN";
311  }
312
313  @Override
314  public OperatorType getType() {
315    return OperatorType.MAPJOIN;
316  }
317}