PageRenderTime 42ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java

#
Java | 357 lines | 260 code | 42 blank | 55 comment | 43 complexity | 9a853633667c944f2afa961c592b56d5 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.exec;
  19. import java.io.FileNotFoundException;
  20. import java.io.IOException;
  21. import java.io.Serializable;
  22. import java.util.ArrayList;
  23. import java.util.Arrays;
  24. import java.util.HashMap;
  25. import java.util.List;
  26. import java.util.Map;
  27. import org.apache.commons.logging.Log;
  28. import org.apache.commons.logging.LogFactory;
  29. import org.apache.hadoop.conf.Configuration;
  30. import org.apache.hadoop.fs.FileSystem;
  31. import org.apache.hadoop.fs.Path;
  32. import org.apache.hadoop.hive.ql.exec.persistence.RowContainer;
  33. import org.apache.hadoop.hive.ql.metadata.HiveException;
  34. import org.apache.hadoop.hive.ql.plan.JoinDesc;
  35. import org.apache.hadoop.hive.ql.plan.TableDesc;
  36. import org.apache.hadoop.hive.serde2.SerDe;
  37. import org.apache.hadoop.hive.serde2.SerDeException;
  38. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  39. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
  40. import org.apache.hadoop.hive.serde2.objectinspector.StructField;
  41. import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
  42. import org.apache.hadoop.io.LongWritable;
  43. import org.apache.hadoop.util.ReflectionUtils;
  44. /**
  45. * At runtime in Join, we output big keys in one table into one corresponding
  46. * directories, and all same keys in other tables into different dirs(one for
  47. * each table). The directories will look like:
  48. * <ul>
  49. * <li>
  50. * dir-T1-bigkeys(containing big keys in T1), dir-T2-keys(containing keys which
  51. * is big in T1),dir-T3-keys(containing keys which is big in T1), ...
  52. * <li>
  53. * dir-T1-keys(containing keys which is big in T2), dir-T2-bigkeys(containing
  54. * big keys in T2),dir-T3-keys(containing keys which is big in T2), ...
  55. * <li>
  56. * dir-T1-keys(containing keys which is big in T3), dir-T2-keys(containing big
  57. * keys in T3),dir-T3-bigkeys(containing keys which is big in T3), ... .....
  58. * </ul>
  59. *
  60. * <p>
  61. * For each skew key, we first write all values to a local tmp file. At the time
  62. * of ending the current group, the local tmp file will be uploaded to hdfs.
  63. * Right now, we use one file per skew key.
  64. *
  65. * <p>
  66. * For more info, please see https://issues.apache.org/jira/browse/HIVE-964.
  67. *
  68. */
  69. public class SkewJoinHandler {
  70. protected static final Log LOG = LogFactory.getLog(SkewJoinHandler.class
  71. .getName());
  72. public int currBigKeyTag = -1;
  73. private int rowNumber = 0;
  74. private int currTag = -1;
  75. private int skewKeyDefinition = -1;
  76. private Map<Byte, StructObjectInspector> skewKeysTableObjectInspector = null;
  77. private Map<Byte, SerDe> tblSerializers = null;
  78. private Map<Byte, TableDesc> tblDesc = null;
  79. private Map<Byte, Boolean> bigKeysExistingMap = null;
  80. private LongWritable skewjoinFollowupJobs;
  81. private final boolean noOuterJoin;
  82. Configuration hconf = null;
  83. List<Object> dummyKey = null;
  84. String taskId;
  85. private final CommonJoinOperator<? extends Serializable> joinOp;
  86. private final int numAliases;
  87. private final JoinDesc conf;
  88. public SkewJoinHandler(CommonJoinOperator<? extends Serializable> joinOp) {
  89. this.joinOp = joinOp;
  90. numAliases = joinOp.numAliases;
  91. conf = joinOp.getConf();
  92. noOuterJoin = joinOp.noOuterJoin;
  93. }
  94. public void initiliaze(Configuration hconf) {
  95. this.hconf = hconf;
  96. JoinDesc desc = joinOp.getConf();
  97. skewKeyDefinition = desc.getSkewKeyDefinition();
  98. skewKeysTableObjectInspector = new HashMap<Byte, StructObjectInspector>(
  99. numAliases);
  100. tblDesc = desc.getSkewKeysValuesTables();
  101. tblSerializers = new HashMap<Byte, SerDe>(numAliases);
  102. bigKeysExistingMap = new HashMap<Byte, Boolean>(numAliases);
  103. taskId = Utilities.getTaskId(hconf);
  104. for (int i = 0; i < numAliases; i++) {
  105. Byte alias = conf.getTagOrder()[i];
  106. List<ObjectInspector> skewTableKeyInspectors = new ArrayList<ObjectInspector>();
  107. StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias];
  108. StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY
  109. .toString());
  110. List<? extends StructField> keyFields = ((StructObjectInspector) sf
  111. .getFieldObjectInspector()).getAllStructFieldRefs();
  112. int keyFieldSize = keyFields.size();
  113. for (int k = 0; k < keyFieldSize; k++) {
  114. skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector());
  115. }
  116. TableDesc joinKeyDesc = desc.getKeyTableDesc();
  117. List<String> keyColNames = Utilities.getColumnNames(joinKeyDesc
  118. .getProperties());
  119. StructObjectInspector structTblKeyInpector = ObjectInspectorFactory
  120. .getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors);
  121. try {
  122. SerDe serializer = (SerDe) ReflectionUtils.newInstance(tblDesc.get(
  123. alias).getDeserializerClass(), null);
  124. serializer.initialize(null, tblDesc.get(alias).getProperties());
  125. tblSerializers.put((byte) i, serializer);
  126. } catch (SerDeException e) {
  127. LOG.error("Skewjoin will be disabled due to " + e.getMessage(), e);
  128. joinOp.handleSkewJoin = false;
  129. break;
  130. }
  131. TableDesc valTblDesc = JoinUtil.getSpillTableDesc(alias,joinOp.spillTableDesc,conf,noOuterJoin);
  132. List<String> valColNames = new ArrayList<String>();
  133. if (valTblDesc != null) {
  134. valColNames = Utilities.getColumnNames(valTblDesc.getProperties());
  135. }
  136. StructObjectInspector structTblValInpector = ObjectInspectorFactory
  137. .getStandardStructObjectInspector(valColNames,
  138. joinOp.joinValuesStandardObjectInspectors.get((byte) i));
  139. StructObjectInspector structTblInpector = ObjectInspectorFactory
  140. .getUnionStructObjectInspector(Arrays
  141. .asList(new StructObjectInspector[] {structTblValInpector, structTblKeyInpector}));
  142. skewKeysTableObjectInspector.put((byte) i, structTblInpector);
  143. }
  144. // reset rowcontainer's serde, objectinspector, and tableDesc.
  145. for (int i = 0; i < numAliases; i++) {
  146. Byte alias = conf.getTagOrder()[i];
  147. RowContainer<ArrayList<Object>> rc = (RowContainer)joinOp.storage.get(Byte
  148. .valueOf((byte) i));
  149. if (rc != null) {
  150. rc.setSerDe(tblSerializers.get((byte) i), skewKeysTableObjectInspector
  151. .get((byte) i));
  152. rc.setTableDesc(tblDesc.get(alias));
  153. }
  154. }
  155. }
  156. void endGroup() throws IOException, HiveException {
  157. if (skewKeyInCurrentGroup) {
  158. String specPath = conf.getBigKeysDirMap().get((byte) currBigKeyTag);
  159. RowContainer<ArrayList<Object>> bigKey = (RowContainer)joinOp.storage.get(Byte
  160. .valueOf((byte) currBigKeyTag));
  161. Path outputPath = getOperatorOutputPath(specPath);
  162. FileSystem destFs = outputPath.getFileSystem(hconf);
  163. bigKey.copyToDFSDirecory(destFs, outputPath);
  164. for (int i = 0; i < numAliases; i++) {
  165. if (((byte) i) == currBigKeyTag) {
  166. continue;
  167. }
  168. RowContainer<ArrayList<Object>> values = (RowContainer)joinOp.storage.get(Byte
  169. .valueOf((byte) i));
  170. if (values != null) {
  171. specPath = conf.getSmallKeysDirMap().get((byte) currBigKeyTag).get(
  172. (byte) i);
  173. values.copyToDFSDirecory(destFs, getOperatorOutputPath(specPath));
  174. }
  175. }
  176. }
  177. skewKeyInCurrentGroup = false;
  178. }
  179. boolean skewKeyInCurrentGroup = false;
  180. public void handleSkew(int tag) throws HiveException {
  181. if (joinOp.newGroupStarted || tag != currTag) {
  182. rowNumber = 0;
  183. currTag = tag;
  184. }
  185. if (joinOp.newGroupStarted) {
  186. currBigKeyTag = -1;
  187. joinOp.newGroupStarted = false;
  188. dummyKey = (List<Object>) joinOp.getGroupKeyObject();
  189. skewKeyInCurrentGroup = false;
  190. for (int i = 0; i < numAliases; i++) {
  191. RowContainer<ArrayList<Object>> rc = (RowContainer)joinOp.storage.get(Byte
  192. .valueOf((byte) i));
  193. if (rc != null) {
  194. rc.setKeyObject(dummyKey);
  195. }
  196. }
  197. }
  198. rowNumber++;
  199. if (currBigKeyTag == -1 && (tag < numAliases - 1)
  200. && rowNumber >= skewKeyDefinition) {
  201. // the first time we see a big key. If this key is not in the last
  202. // table (the last table can always be streamed), we define that we get
  203. // a skew key now.
  204. currBigKeyTag = tag;
  205. updateSkewJoinJobCounter(tag);
  206. // right now we assume that the group by is an ArrayList object. It may
  207. // change in future.
  208. if (!(dummyKey instanceof List)) {
  209. throw new RuntimeException("Bug in handle skew key in a seperate job.");
  210. }
  211. skewKeyInCurrentGroup = true;
  212. bigKeysExistingMap.put(Byte.valueOf((byte) currBigKeyTag), Boolean.TRUE);
  213. }
  214. }
  215. public void close(boolean abort) throws HiveException {
  216. if (!abort) {
  217. try {
  218. endGroup();
  219. commit();
  220. } catch (IOException e) {
  221. throw new HiveException(e);
  222. }
  223. } else {
  224. for (int bigKeyTbl = 0; bigKeyTbl < numAliases; bigKeyTbl++) {
  225. // if we did not see a skew key in this table, continue to next
  226. // table
  227. if (!bigKeysExistingMap.get((byte) bigKeyTbl)) {
  228. continue;
  229. }
  230. try {
  231. String specPath = conf.getBigKeysDirMap().get((byte) bigKeyTbl);
  232. Path bigKeyPath = getOperatorOutputPath(specPath);
  233. FileSystem fs = bigKeyPath.getFileSystem(hconf);
  234. delete(bigKeyPath, fs);
  235. for (int smallKeyTbl = 0; smallKeyTbl < numAliases; smallKeyTbl++) {
  236. if (((byte) smallKeyTbl) == bigKeyTbl) {
  237. continue;
  238. }
  239. specPath = conf.getSmallKeysDirMap().get((byte) bigKeyTbl).get(
  240. (byte) smallKeyTbl);
  241. delete(getOperatorOutputPath(specPath), fs);
  242. }
  243. } catch (IOException e) {
  244. throw new HiveException(e);
  245. }
  246. }
  247. }
  248. }
  249. private void delete(Path operatorOutputPath, FileSystem fs) {
  250. try {
  251. fs.delete(operatorOutputPath, true);
  252. } catch (IOException e) {
  253. LOG.error(e);
  254. }
  255. }
  256. private void commit() throws IOException {
  257. for (int bigKeyTbl = 0; bigKeyTbl < numAliases; bigKeyTbl++) {
  258. // if we did not see a skew key in this table, continue to next table
  259. // we are trying to avoid an extra call of FileSystem.exists()
  260. Boolean existing = bigKeysExistingMap.get(Byte.valueOf((byte) bigKeyTbl));
  261. if (existing == null || !existing) {
  262. continue;
  263. }
  264. String specPath = conf.getBigKeysDirMap().get(
  265. Byte.valueOf((byte) bigKeyTbl));
  266. commitOutputPathToFinalPath(specPath, false);
  267. for (int smallKeyTbl = 0; smallKeyTbl < numAliases; smallKeyTbl++) {
  268. if (smallKeyTbl == bigKeyTbl) {
  269. continue;
  270. }
  271. specPath = conf.getSmallKeysDirMap()
  272. .get(Byte.valueOf((byte) bigKeyTbl)).get(
  273. Byte.valueOf((byte) smallKeyTbl));
  274. // the file may not exist, and we just ignore this
  275. commitOutputPathToFinalPath(specPath, true);
  276. }
  277. }
  278. }
  279. private void commitOutputPathToFinalPath(String specPath,
  280. boolean ignoreNonExisting) throws IOException {
  281. Path outPath = getOperatorOutputPath(specPath);
  282. Path finalPath = getOperatorFinalPath(specPath);
  283. FileSystem fs = outPath.getFileSystem(hconf);
  284. // for local file system in Hadoop-0.17.2.1, it will throw IOException when
  285. // file not existing.
  286. try {
  287. if (!fs.rename(outPath, finalPath)) {
  288. throw new IOException("Unable to rename output to: " + finalPath);
  289. }
  290. } catch (FileNotFoundException e) {
  291. if (!ignoreNonExisting) {
  292. throw e;
  293. }
  294. } catch (IOException e) {
  295. if (!fs.exists(outPath) && ignoreNonExisting) {
  296. return;
  297. }
  298. throw e;
  299. }
  300. }
  301. private Path getOperatorOutputPath(String specPath) throws IOException {
  302. Path tmpPath = Utilities.toTempPath(specPath);
  303. return new Path(tmpPath, Utilities.toTempPath(taskId));
  304. }
  305. private Path getOperatorFinalPath(String specPath) throws IOException {
  306. Path tmpPath = Utilities.toTempPath(specPath);
  307. return new Path(tmpPath, taskId);
  308. }
  309. public void setSkewJoinJobCounter(LongWritable skewjoinFollowupJobs) {
  310. this.skewjoinFollowupJobs = skewjoinFollowupJobs;
  311. }
  312. public void updateSkewJoinJobCounter(int tag) {
  313. this.skewjoinFollowupJobs.set(this.skewjoinFollowupJobs.get() + 1);
  314. }
  315. }