PageRenderTime 40ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 1ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java

#
Java | 601 lines | 476 code | 63 blank | 62 comment | 126 complexity | 963c5d1aa35d22db119bcb983661a813 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements.See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership.The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License.You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.exec;
  19. import java.io.Serializable;
  20. import java.util.ArrayList;
  21. import java.util.HashMap;
  22. import java.util.Iterator;
  23. import java.util.List;
  24. import java.util.Map;
  25. import org.apache.commons.logging.Log;
  26. import org.apache.commons.logging.LogFactory;
  27. import org.apache.hadoop.conf.Configuration;
  28. import org.apache.hadoop.fs.Path;
  29. import org.apache.hadoop.hive.conf.HiveConf;
  30. import org.apache.hadoop.hive.ql.exec.persistence.RowContainer;
  31. import org.apache.hadoop.hive.ql.metadata.HiveException;
  32. import org.apache.hadoop.hive.ql.plan.FetchWork;
  33. import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
  34. import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
  35. import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
  36. import org.apache.hadoop.hive.ql.plan.MapredLocalWork.BucketMapJoinContext;
  37. import org.apache.hadoop.hive.ql.plan.api.OperatorType;
  38. import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
  39. import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
  40. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  41. import org.apache.hadoop.io.WritableComparable;
  42. import org.apache.hadoop.io.WritableComparator;
  43. import org.apache.hadoop.mapred.JobConf;
  44. import org.apache.hadoop.util.ReflectionUtils;
  45. /**
  46. * Sorted Merge Map Join Operator.
  47. */
  48. public class SMBMapJoinOperator extends AbstractMapJoinOperator<SMBJoinDesc> implements
  49. Serializable {
  50. private static final long serialVersionUID = 1L;
  51. private static final Log LOG = LogFactory.getLog(SMBMapJoinOperator.class
  52. .getName());
  53. private MapredLocalWork localWork = null;
  54. private Map<String, FetchOperator> fetchOperators;
  55. transient ArrayList<Object>[] keyWritables;
  56. transient ArrayList<Object>[] nextKeyWritables;
  57. RowContainer<ArrayList<Object>>[] nextGroupStorage;
  58. RowContainer<ArrayList<Object>>[] candidateStorage;
  59. transient Map<Byte, String> tagToAlias;
  60. private transient boolean[] fetchOpDone;
  61. private transient boolean[] foundNextKeyGroup;
  62. transient boolean firstFetchHappened = false;
  63. private transient boolean inputFileChanged = false;
  64. transient boolean localWorkInited = false;
  65. public SMBMapJoinOperator() {
  66. }
  67. public SMBMapJoinOperator(AbstractMapJoinOperator<? extends MapJoinDesc> mapJoinOp) {
  68. super(mapJoinOp);
  69. }
  70. @Override
  71. protected void initializeOp(Configuration hconf) throws HiveException {
  72. super.initializeOp(hconf);
  73. firstRow = true;
  74. closeCalled = false;
  75. this.firstFetchHappened = false;
  76. this.inputFileChanged = false;
  77. // get the largest table alias from order
  78. int maxAlias = 0;
  79. for (Byte alias: order) {
  80. if (alias > maxAlias) {
  81. maxAlias = alias;
  82. }
  83. }
  84. maxAlias += 1;
  85. nextGroupStorage = new RowContainer[maxAlias];
  86. candidateStorage = new RowContainer[maxAlias];
  87. keyWritables = new ArrayList[maxAlias];
  88. nextKeyWritables = new ArrayList[maxAlias];
  89. fetchOpDone = new boolean[maxAlias];
  90. foundNextKeyGroup = new boolean[maxAlias];
  91. int bucketSize = HiveConf.getIntVar(hconf,
  92. HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE);
  93. byte storePos = (byte) 0;
  94. for (Byte alias : order) {
  95. RowContainer rc = JoinUtil.getRowContainer(hconf,
  96. rowContainerStandardObjectInspectors.get(storePos),
  97. alias, bucketSize,spillTableDesc, conf,noOuterJoin);
  98. nextGroupStorage[storePos] = rc;
  99. RowContainer candidateRC = JoinUtil.getRowContainer(hconf,
  100. rowContainerStandardObjectInspectors.get((byte)storePos),
  101. alias,bucketSize,spillTableDesc, conf,noOuterJoin);
  102. candidateStorage[alias] = candidateRC;
  103. storePos++;
  104. }
  105. tagToAlias = conf.getTagToAlias();
  106. for (Byte alias : order) {
  107. if(alias != (byte) posBigTable) {
  108. fetchOpDone[alias] = false;
  109. }
  110. foundNextKeyGroup[alias] = false;
  111. }
  112. }
  113. @Override
  114. public void initializeLocalWork(Configuration hconf) throws HiveException {
  115. initializeMapredLocalWork(this.getConf(), hconf, this.getConf().getLocalWork(), LOG);
  116. super.initializeLocalWork(hconf);
  117. }
  118. public void initializeMapredLocalWork(MapJoinDesc conf, Configuration hconf,
  119. MapredLocalWork localWork, Log l4j) throws HiveException {
  120. if (localWork == null || localWorkInited) {
  121. return;
  122. }
  123. localWorkInited = true;
  124. this.localWork = localWork;
  125. fetchOperators = new HashMap<String, FetchOperator>();
  126. Map<FetchOperator, JobConf> fetchOpJobConfMap = new HashMap<FetchOperator, JobConf>();
  127. // create map local operators
  128. for (Map.Entry<String, FetchWork> entry : localWork.getAliasToFetchWork()
  129. .entrySet()) {
  130. JobConf jobClone = new JobConf(hconf);
  131. Operator<? extends Serializable> tableScan = localWork.getAliasToWork()
  132. .get(entry.getKey());
  133. if(tableScan instanceof TableScanOperator) {
  134. ArrayList<Integer> list = ((TableScanOperator)tableScan).getNeededColumnIDs();
  135. if (list != null) {
  136. ColumnProjectionUtils.appendReadColumnIDs(jobClone, list);
  137. }
  138. } else {
  139. ColumnProjectionUtils.setFullyReadColumns(jobClone);
  140. }
  141. FetchOperator fetchOp = new FetchOperator(entry.getValue(),jobClone);
  142. fetchOpJobConfMap.put(fetchOp, jobClone);
  143. fetchOperators.put(entry.getKey(), fetchOp);
  144. l4j.info("fetchoperator for " + entry.getKey() + " created");
  145. }
  146. for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
  147. Operator<? extends Serializable> forwardOp = localWork.getAliasToWork()
  148. .get(entry.getKey());
  149. // All the operators need to be initialized before process
  150. forwardOp.setExecContext(this.getExecContext());
  151. FetchOperator fetchOp = entry.getValue();
  152. JobConf jobConf = fetchOpJobConfMap.get(fetchOp);
  153. if (jobConf == null) {
  154. jobConf = this.getExecContext().getJc();
  155. }
  156. forwardOp.initialize(jobConf, new ObjectInspector[] {fetchOp.getOutputObjectInspector()});
  157. l4j.info("fetchoperator for " + entry.getKey() + " initialized");
  158. }
  159. }
  160. // The input file has changed - load the correct hash bucket
  161. @Override
  162. public void cleanUpInputFileChangedOp() throws HiveException {
  163. inputFileChanged = true;
  164. }
  165. @Override
  166. public void processOp(Object row, int tag) throws HiveException {
  167. if (tag == posBigTable) {
  168. if (inputFileChanged) {
  169. if (firstFetchHappened) {
  170. // we need to first join and flush out data left by the previous file.
  171. joinFinalLeftData();
  172. }
  173. // set up the fetch operator for the new input file.
  174. for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
  175. String alias = entry.getKey();
  176. FetchOperator fetchOp = entry.getValue();
  177. fetchOp.clearFetchContext();
  178. setUpFetchOpContext(fetchOp, alias);
  179. }
  180. firstFetchHappened = false;
  181. inputFileChanged = false;
  182. }
  183. }
  184. if (!firstFetchHappened) {
  185. firstFetchHappened = true;
  186. // fetch the first group for all small table aliases
  187. for (Byte t : order) {
  188. if(t != (byte)posBigTable) {
  189. fetchNextGroup(t);
  190. }
  191. }
  192. }
  193. byte alias = (byte) tag;
  194. // compute keys and values as StandardObjects
  195. // compute keys and values as StandardObjects
  196. ArrayList<Object> key = JoinUtil.computeKeys(row, joinKeys.get(alias),
  197. joinKeysObjectInspectors.get(alias));
  198. ArrayList<Object> value = JoinUtil.computeValues(row, joinValues.get(alias),
  199. joinValuesObjectInspectors.get(alias), joinFilters.get(alias),
  200. joinFilterObjectInspectors.get(alias), noOuterJoin);
  201. //have we reached a new key group?
  202. boolean nextKeyGroup = processKey(alias, key);
  203. if (nextKeyGroup) {
  204. //assert this.nextGroupStorage.get(alias).size() == 0;
  205. this.nextGroupStorage[alias].add(value);
  206. foundNextKeyGroup[tag] = true;
  207. if (tag != posBigTable) {
  208. return;
  209. }
  210. }
  211. reportProgress();
  212. numMapRowsRead++;
  213. // the big table has reached a new key group. try to let the small tables
  214. // catch up with the big table.
  215. if (nextKeyGroup) {
  216. assert tag == (byte)posBigTable;
  217. List<Byte> smallestPos = null;
  218. do {
  219. smallestPos = joinOneGroup();
  220. //jump out the loop if we need input from the big table
  221. } while (smallestPos != null && smallestPos.size() > 0
  222. && !smallestPos.contains((byte)this.posBigTable));
  223. return;
  224. }
  225. assert !nextKeyGroup;
  226. candidateStorage[tag].add(value);
  227. }
  228. /*
  229. * this happens either when the input file of the big table is changed or in
  230. * closeop. It needs to fetch all the left data from the small tables and try
  231. * to join them.
  232. */
  233. private void joinFinalLeftData() throws HiveException {
  234. RowContainer bigTblRowContainer = this.candidateStorage[this.posBigTable];
  235. boolean allFetchOpDone = allFetchOpDone();
  236. // if all left data in small tables are less than and equal to the left data
  237. // in big table, let's them catch up
  238. while (bigTblRowContainer != null && bigTblRowContainer.size() > 0
  239. && !allFetchOpDone) {
  240. joinOneGroup();
  241. bigTblRowContainer = this.candidateStorage[this.posBigTable];
  242. allFetchOpDone = allFetchOpDone();
  243. }
  244. while (!allFetchOpDone) {
  245. List<Byte> ret = joinOneGroup();
  246. if (ret == null || ret.size() == 0) {
  247. break;
  248. }
  249. reportProgress();
  250. numMapRowsRead++;
  251. allFetchOpDone = allFetchOpDone();
  252. }
  253. boolean dataInCache = true;
  254. while (dataInCache) {
  255. for (byte t : order) {
  256. if (this.foundNextKeyGroup[t]
  257. && this.nextKeyWritables[t] != null) {
  258. promoteNextGroupToCandidate(t);
  259. }
  260. }
  261. joinOneGroup();
  262. dataInCache = false;
  263. for (byte r : order) {
  264. if (this.candidateStorage[r].size() > 0) {
  265. dataInCache = true;
  266. break;
  267. }
  268. }
  269. }
  270. }
  271. private boolean allFetchOpDone() {
  272. boolean allFetchOpDone = true;
  273. for (Byte tag : order) {
  274. if(tag == (byte) posBigTable) {
  275. continue;
  276. }
  277. allFetchOpDone = allFetchOpDone && fetchOpDone[tag];
  278. }
  279. return allFetchOpDone;
  280. }
  281. private List<Byte> joinOneGroup() throws HiveException {
  282. int smallestPos = -1;
  283. smallestPos = findSmallestKey();
  284. List<Byte> listOfNeedFetchNext = null;
  285. if(smallestPos >= 0) {
  286. listOfNeedFetchNext = joinObject(smallestPos);
  287. if (listOfNeedFetchNext.size() > 0) {
  288. // listOfNeedFetchNext contains all tables that we have joined data in their
  289. // candidateStorage, and we need to clear candidate storage and promote their
  290. // nextGroupStorage to candidateStorage and fetch data until we reach a
  291. // new group.
  292. for (Byte b : listOfNeedFetchNext) {
  293. fetchNextGroup(b);
  294. }
  295. }
  296. }
  297. return listOfNeedFetchNext;
  298. }
  299. private List<Byte> joinObject(int smallestPos) throws HiveException {
  300. List<Byte> needFetchList = new ArrayList<Byte>();
  301. ArrayList<Object> smallKey = keyWritables[smallestPos];
  302. needFetchList.add((byte)smallestPos);
  303. this.storage.put((byte) smallestPos, this.candidateStorage[smallestPos]);
  304. for (Byte i : order) {
  305. if ((byte) smallestPos == i) {
  306. continue;
  307. }
  308. ArrayList<Object> key = keyWritables[i];
  309. if (key == null) {
  310. putDummyOrEmpty(i);
  311. } else {
  312. int cmp = compareKeys(key, smallKey);
  313. if (cmp == 0) {
  314. this.storage.put((byte) i, this.candidateStorage[i]);
  315. needFetchList.add(i);
  316. continue;
  317. } else {
  318. putDummyOrEmpty(i);
  319. }
  320. }
  321. }
  322. checkAndGenObject();
  323. for (Byte pos : needFetchList) {
  324. this.candidateStorage[pos].clear();
  325. this.keyWritables[pos] = null;
  326. }
  327. return needFetchList;
  328. }
  329. private void fetchNextGroup(Byte t) throws HiveException {
  330. if (foundNextKeyGroup[t]) {
  331. // first promote the next group to be the current group if we reached a
  332. // new group in the previous fetch
  333. if (this.nextKeyWritables[t] != null) {
  334. promoteNextGroupToCandidate(t);
  335. } else {
  336. this.keyWritables[t] = null;
  337. this.candidateStorage[t] = null;
  338. this.nextGroupStorage[t] = null;
  339. }
  340. foundNextKeyGroup[t] = false;
  341. }
  342. //for the big table, we only need to promote the next group to the current group.
  343. if(t == (byte)posBigTable) {
  344. return;
  345. }
  346. //for tables other than the big table, we need to fetch more data until reach a new group or done.
  347. while (!foundNextKeyGroup[t]) {
  348. if (fetchOpDone[t]) {
  349. break;
  350. }
  351. fetchOneRow(t);
  352. }
  353. if (!foundNextKeyGroup[t] && fetchOpDone[t]) {
  354. this.nextKeyWritables[t] = null;
  355. }
  356. }
  357. private void promoteNextGroupToCandidate(Byte t) throws HiveException {
  358. this.keyWritables[t] = this.nextKeyWritables[t];
  359. this.nextKeyWritables[t] = null;
  360. RowContainer<ArrayList<Object>> oldRowContainer = this.candidateStorage[t];
  361. oldRowContainer.clear();
  362. this.candidateStorage[t] = this.nextGroupStorage[t];
  363. this.nextGroupStorage[t] = oldRowContainer;
  364. }
  365. private int compareKeys (ArrayList<Object> k1, ArrayList<Object> k2) {
  366. int ret = 0;
  367. // join keys have difference sizes?
  368. ret = k1.size() - k2.size();
  369. if (ret != 0) {
  370. return ret;
  371. }
  372. for (int i = 0; i < k1.size(); i++) {
  373. WritableComparable key_1 = (WritableComparable) k1.get(i);
  374. WritableComparable key_2 = (WritableComparable) k2.get(i);
  375. if (key_1 == null && key_2 == null) {
  376. return -1; // just return k1 is smaller than k2
  377. } else if (key_1 == null) {
  378. return -1;
  379. } else if (key_2 == null) {
  380. return 1;
  381. }
  382. ret = WritableComparator.get(key_1.getClass()).compare(key_1, key_2);
  383. if(ret != 0) {
  384. return ret;
  385. }
  386. }
  387. return ret;
  388. }
  389. private void putDummyOrEmpty(Byte i) {
  390. // put a empty list or null
  391. if (noOuterJoin) {
  392. storage.put(i, emptyList);
  393. } else {
  394. storage.put(i, dummyObjVectors[i.intValue()]);
  395. }
  396. }
  397. private int findSmallestKey() {
  398. byte index = -1;
  399. ArrayList<Object> smallestOne = null;
  400. for (byte i : order) {
  401. ArrayList<Object> key = keyWritables[i];
  402. if (key == null) {
  403. continue;
  404. }
  405. if (smallestOne == null) {
  406. smallestOne = key;
  407. index = i;
  408. continue;
  409. }
  410. int cmp = compareKeys(key, smallestOne);
  411. if (cmp < 0) {
  412. smallestOne = key;
  413. index = i;
  414. continue;
  415. }
  416. }
  417. return index;
  418. }
  419. private boolean processKey(byte alias, ArrayList<Object> key)
  420. throws HiveException {
  421. ArrayList<Object> keyWritable = keyWritables[alias];
  422. if (keyWritable == null) {
  423. //the first group.
  424. keyWritables[alias] = key;
  425. return false;
  426. } else {
  427. int cmp = compareKeys(key, keyWritable);
  428. if (cmp != 0) {
  429. nextKeyWritables[alias] = key;
  430. return true;
  431. }
  432. return false;
  433. }
  434. }
  435. private void setUpFetchOpContext(FetchOperator fetchOp, String alias) {
  436. String currentInputFile = this.getExecContext().getCurrentInputFile();
  437. BucketMapJoinContext bucketMatcherCxt = this.localWork
  438. .getBucketMapjoinContext();
  439. Class<? extends BucketMatcher> bucketMatcherCls = bucketMatcherCxt
  440. .getBucketMatcherClass();
  441. BucketMatcher bucketMatcher = (BucketMatcher) ReflectionUtils.newInstance(
  442. bucketMatcherCls, null);
  443. this.getExecContext().setFileId(bucketMatcherCxt.getBucketFileNameMapping().get(currentInputFile));
  444. LOG.info("set task id: " + this.getExecContext().getFileId());
  445. bucketMatcher.setAliasBucketFileNameMapping(bucketMatcherCxt
  446. .getAliasBucketFileNameMapping());
  447. List<Path> aliasFiles = bucketMatcher.getAliasBucketFiles(currentInputFile,
  448. bucketMatcherCxt.getMapJoinBigTableAlias(), alias);
  449. Iterator<Path> iter = aliasFiles.iterator();
  450. fetchOp.setupContext(iter, null);
  451. }
  452. private void fetchOneRow(byte tag) {
  453. if (fetchOperators != null) {
  454. String tble = this.tagToAlias.get(tag);
  455. FetchOperator fetchOp = fetchOperators.get(tble);
  456. Operator<? extends Serializable> forwardOp = localWork.getAliasToWork()
  457. .get(tble);
  458. try {
  459. InspectableObject row = fetchOp.getNextRow();
  460. if (row == null) {
  461. this.fetchOpDone[tag] = true;
  462. return;
  463. }
  464. forwardOp.process(row.o, 0);
  465. // check if any operator had a fatal error or early exit during
  466. // execution
  467. if (forwardOp.getDone()) {
  468. this.fetchOpDone[tag] = true;
  469. }
  470. } catch (Throwable e) {
  471. if (e instanceof OutOfMemoryError) {
  472. // Don't create a new object if we are already out of memory
  473. throw (OutOfMemoryError) e;
  474. } else {
  475. throw new RuntimeException("Map local work failed", e);
  476. }
  477. }
  478. }
  479. }
  480. transient boolean closeCalled = false;
  481. @Override
  482. public void closeOp(boolean abort) throws HiveException {
  483. if(closeCalled) {
  484. return;
  485. }
  486. closeCalled = true;
  487. if (inputFileChanged || !firstFetchHappened) {
  488. //set up the fetch operator for the new input file.
  489. for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
  490. String alias = entry.getKey();
  491. FetchOperator fetchOp = entry.getValue();
  492. fetchOp.clearFetchContext();
  493. setUpFetchOpContext(fetchOp, alias);
  494. }
  495. firstFetchHappened = true;
  496. for (Byte t : order) {
  497. if(t != (byte)posBigTable) {
  498. fetchNextGroup(t);
  499. }
  500. }
  501. inputFileChanged = false;
  502. }
  503. joinFinalLeftData();
  504. //clean up
  505. for (Byte alias : order) {
  506. if(alias != (byte) posBigTable) {
  507. fetchOpDone[alias] = false;
  508. }
  509. foundNextKeyGroup[alias] = false;
  510. }
  511. localWorkInited = false;
  512. super.closeOp(abort);
  513. if (fetchOperators != null) {
  514. for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
  515. Operator<? extends Serializable> forwardOp = localWork
  516. .getAliasToWork().get(entry.getKey());
  517. forwardOp.close(abort);
  518. }
  519. }
  520. }
  521. @Override
  522. protected boolean allInitializedParentsAreClosed() {
  523. return true;
  524. }
  525. /**
  526. * Implements the getName function for the Node Interface.
  527. *
  528. * @return the name of the operator
  529. */
  530. @Override
  531. public String getName() {
  532. return "MAPJOIN";
  533. }
  534. @Override
  535. public OperatorType getType() {
  536. return OperatorType.MAPJOIN;
  537. }
  538. }