PageRenderTime 53ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java

http://github.com/apache/hive
Java | 921 lines | 664 code | 129 blank | 128 comment | 139 complexity | 40b517c5e7cd81595a829e5f564d1230 MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.udf.ptf;
  19. import java.util.AbstractList;
  20. import java.util.ArrayList;
  21. import java.util.HashMap;
  22. import java.util.Iterator;
  23. import java.util.List;
  24. import java.util.Map;
  25. import org.apache.commons.lang3.ArrayUtils;
  26. import org.apache.hadoop.conf.Configuration;
  27. import org.apache.hadoop.hive.conf.HiveConf;
  28. import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
  29. import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
  30. import org.apache.hadoop.hive.ql.exec.PTFPartition;
  31. import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator;
  32. import org.apache.hadoop.hive.ql.exec.PTFRollingPartition;
  33. import org.apache.hadoop.hive.ql.exec.WindowFunctionInfo;
  34. import org.apache.hadoop.hive.ql.metadata.HiveException;
  35. import org.apache.hadoop.hive.ql.parse.SemanticException;
  36. import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
  37. import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType;
  38. import org.apache.hadoop.hive.ql.plan.PTFDesc;
  39. import org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef;
  40. import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
  41. import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
  42. import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
  43. import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef;
  44. import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
  45. import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
  46. import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
  47. import org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing;
  48. import org.apache.hadoop.hive.serde2.AbstractSerDe;
  49. import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
  50. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
  51. import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
  52. import org.apache.hadoop.hive.serde2.objectinspector.StructField;
  53. import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
  54. import org.slf4j.Logger;
  55. import org.slf4j.LoggerFactory;
  56. @SuppressWarnings("deprecation")
  57. public class WindowingTableFunction extends TableFunctionEvaluator {
  58. public static final Logger LOG =LoggerFactory.getLogger(WindowingTableFunction.class.getName());
  59. static class WindowingFunctionInfoHelper {
  60. private boolean supportsWindow;
  61. WindowingFunctionInfoHelper() {
  62. }
  63. public WindowingFunctionInfoHelper(boolean supportsWindow) {
  64. this.supportsWindow = supportsWindow;
  65. }
  66. public boolean isSupportsWindow() {
  67. return supportsWindow;
  68. }
  69. public void setSupportsWindow(boolean supportsWindow) {
  70. this.supportsWindow = supportsWindow;
  71. }
  72. }
  73. StreamingState streamingState;
  74. RankLimit rnkLimitDef;
  75. // There is some information about the windowing functions that needs to be initialized
  76. // during query compilation time, and made available to during the map/reduce tasks via
  77. // plan serialization.
  78. Map<String, WindowingFunctionInfoHelper> windowingFunctionHelpers = null;
  79. public Map<String, WindowingFunctionInfoHelper> getWindowingFunctionHelpers() {
  80. return windowingFunctionHelpers;
  81. }
  82. public void setWindowingFunctionHelpers(
  83. Map<String, WindowingFunctionInfoHelper> windowingFunctionHelpers) {
  84. this.windowingFunctionHelpers = windowingFunctionHelpers;
  85. }
  86. @SuppressWarnings({ "unchecked", "rawtypes" })
  87. @Override
  88. public void execute(PTFPartitionIterator<Object> pItr, PTFPartition outP) throws HiveException {
  89. ArrayList<List<?>> oColumns = new ArrayList<List<?>>();
  90. PTFPartition iPart = pItr.getPartition();
  91. StructObjectInspector inputOI = iPart.getOutputOI();
  92. WindowTableFunctionDef wTFnDef = (WindowTableFunctionDef) getTableDef();
  93. for(WindowFunctionDef wFn : wTFnDef.getWindowFunctions()) {
  94. boolean processWindow = processWindow(wFn.getWindowFrame());
  95. pItr.reset();
  96. if ( !processWindow ) {
  97. Object out = evaluateFunctionOnPartition(wFn, iPart);
  98. if ( !wFn.isPivotResult()) {
  99. out = new SameList(iPart.size(), out);
  100. }
  101. oColumns.add((List<?>)out);
  102. } else {
  103. oColumns.add(executeFnwithWindow(wFn, iPart));
  104. }
  105. }
  106. /*
  107. * Output Columns in the following order
  108. * - the columns representing the output from Window Fns
  109. * - the input Rows columns
  110. */
  111. for(int i=0; i < iPart.size(); i++) {
  112. ArrayList oRow = new ArrayList();
  113. Object iRow = iPart.getAt(i);
  114. for(int j=0; j < oColumns.size(); j++) {
  115. oRow.add(oColumns.get(j).get(i));
  116. }
  117. for(StructField f : inputOI.getAllStructFieldRefs()) {
  118. oRow.add(inputOI.getStructFieldData(iRow, f));
  119. }
  120. outP.append(oRow);
  121. }
  122. }
  123. // Evaluate the result given a partition and the row number to process
  124. private Object evaluateWindowFunction(WindowFunctionDef wFn, int rowToProcess, PTFPartition partition)
  125. throws HiveException {
  126. BasePartitionEvaluator partitionEval = wFn.getWFnEval()
  127. .getPartitionWindowingEvaluator(wFn.getWindowFrame(), partition, wFn.getArgs(), wFn.getOI(), nullsLast);
  128. return partitionEval.iterate(rowToProcess, ptfDesc.getLlInfo());
  129. }
  130. // Evaluate the result given a partition
  131. private Object evaluateFunctionOnPartition(WindowFunctionDef wFn,
  132. PTFPartition partition) throws HiveException {
  133. BasePartitionEvaluator partitionEval = wFn.getWFnEval()
  134. .getPartitionWindowingEvaluator(wFn.getWindowFrame(), partition, wFn.getArgs(), wFn.getOI(), nullsLast);
  135. return partitionEval.getPartitionAgg();
  136. }
  137. // Evaluate the function result for each row in the partition
  138. ArrayList<Object> executeFnwithWindow(
  139. WindowFunctionDef wFnDef,
  140. PTFPartition iPart)
  141. throws HiveException {
  142. ArrayList<Object> vals = new ArrayList<Object>();
  143. for(int i=0; i < iPart.size(); i++) {
  144. Object out = evaluateWindowFunction(wFnDef, i, iPart);
  145. vals.add(out);
  146. }
  147. return vals;
  148. }
  149. private static boolean processWindow(WindowFrameDef frame) {
  150. if ( frame == null ) {
  151. return false;
  152. }
  153. if ( frame.getStart().getAmt() == BoundarySpec.UNBOUNDED_AMOUNT &&
  154. frame.getEnd().getAmt() == BoundarySpec.UNBOUNDED_AMOUNT ) {
  155. return false;
  156. }
  157. return true;
  158. }
  159. private boolean streamingPossible(Configuration cfg, WindowFunctionDef wFnDef)
  160. throws HiveException {
  161. WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
  162. WindowingFunctionInfoHelper wFnInfo = getWindowingFunctionInfoHelper(wFnDef.getName());
  163. if (!wFnInfo.isSupportsWindow()) {
  164. return true;
  165. }
  166. BoundaryDef start = wdwFrame.getStart();
  167. BoundaryDef end = wdwFrame.getEnd();
  168. /*
  169. * Currently we are not handling dynamic sized windows implied by range
  170. * based windows.
  171. */
  172. if (wdwFrame.getWindowType() == WindowType.RANGE) {
  173. return false;
  174. }
  175. /*
  176. * Windows that are unbounded following don't benefit from Streaming.
  177. */
  178. if (end.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT) {
  179. return false;
  180. }
  181. /*
  182. * let function decide if it can handle this special case.
  183. */
  184. if (start.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT) {
  185. return true;
  186. }
  187. int windowLimit = HiveConf.getIntVar(cfg, ConfVars.HIVEJOINCACHESIZE);
  188. if (windowLimit < (start.getAmt() + end.getAmt() + 1)) {
  189. return false;
  190. }
  191. return true;
  192. }
  193. /*
  194. * (non-Javadoc)
  195. *
  196. * @see
  197. * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#canAcceptInputAsStream
  198. * ()
  199. *
  200. * WindowTableFunction supports streaming if all functions meet one of these
  201. * conditions: 1. The Function implements ISupportStreamingModeForWindowing 2.
  202. * Or returns a non null Object for the getWindowingEvaluator, that implements
  203. * ISupportStreamingModeForWindowing. 3. Is an invocation on a 'fixed' window.
  204. * So no Unbounded Preceding or Following.
  205. */
  206. @SuppressWarnings("resource")
  207. private int[] setCanAcceptInputAsStream(Configuration cfg) throws HiveException {
  208. canAcceptInputAsStream = false;
  209. if (ptfDesc.getLlInfo().getLeadLagExprs() != null) {
  210. return null;
  211. }
  212. WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
  213. int startPos = Integer.MAX_VALUE;
  214. int endPos = Integer.MIN_VALUE;
  215. for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
  216. WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
  217. WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
  218. GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
  219. boolean streamingPossible = streamingPossible(cfg, wFnDef);
  220. GenericUDAFEvaluator streamingEval = streamingPossible ? fnEval
  221. .getWindowingEvaluator(wdwFrame) : null;
  222. if (streamingEval != null
  223. && streamingEval instanceof ISupportStreamingModeForWindowing) {
  224. continue;
  225. }
  226. BoundaryDef start = wdwFrame.getStart();
  227. BoundaryDef end = wdwFrame.getEnd();
  228. if (wdwFrame.getWindowType() == WindowType.ROWS) {
  229. if (!end.isUnbounded() && !start.isUnbounded()) {
  230. startPos = Math.min(startPos, wdwFrame.getStart().getRelativeOffset());
  231. endPos = Math.max(endPos, wdwFrame.getEnd().getRelativeOffset());
  232. continue;
  233. }
  234. }
  235. return null;
  236. }
  237. int windowLimit = HiveConf.getIntVar(cfg, ConfVars.HIVEJOINCACHESIZE);
  238. if (windowLimit < (endPos - startPos + 1)) {
  239. return null;
  240. }
  241. canAcceptInputAsStream = true;
  242. return new int[] {startPos, endPos};
  243. }
  244. private void initializeWindowingFunctionInfoHelpers() throws SemanticException {
  245. // getWindowFunctionInfo() cannot be called during map/reduce tasks. So cache necessary
  246. // values during query compilation, and rely on plan serialization to bring this info
  247. // to the object during the map/reduce tasks.
  248. if (windowingFunctionHelpers != null) {
  249. return;
  250. }
  251. windowingFunctionHelpers = new HashMap<String, WindowingFunctionInfoHelper>();
  252. WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
  253. for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
  254. WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
  255. WindowFunctionInfo wFnInfo = FunctionRegistry.getWindowFunctionInfo(wFn.getName());
  256. boolean supportsWindow = wFnInfo.isSupportsWindow();
  257. windowingFunctionHelpers.put(wFn.getName(), new WindowingFunctionInfoHelper(supportsWindow));
  258. }
  259. }
  260. @Override
  261. protected void setOutputOI(StructObjectInspector outputOI) {
  262. super.setOutputOI(outputOI);
  263. // Call here because at this point the WindowTableFunctionDef has been set
  264. try {
  265. initializeWindowingFunctionInfoHelpers();
  266. } catch (SemanticException err) {
  267. throw new RuntimeException("Unexpected error while setting up windowing function", err);
  268. }
  269. }
  270. private WindowingFunctionInfoHelper getWindowingFunctionInfoHelper(String fnName) {
  271. WindowingFunctionInfoHelper wFnInfoHelper = windowingFunctionHelpers.get(fnName);
  272. if (wFnInfoHelper == null) {
  273. // Should not happen
  274. throw new RuntimeException("No cached WindowingFunctionInfoHelper for " + fnName);
  275. }
  276. return wFnInfoHelper;
  277. }
  278. @Override
  279. public void initializeStreaming(Configuration cfg,
  280. StructObjectInspector inputOI, boolean isMapSide) throws HiveException {
  281. int[] span = setCanAcceptInputAsStream(cfg);
  282. if (!canAcceptInputAsStream) {
  283. return;
  284. }
  285. WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
  286. for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
  287. WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
  288. WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
  289. GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
  290. GenericUDAFEvaluator streamingEval = fnEval
  291. .getWindowingEvaluator(wdwFrame);
  292. if (streamingEval != null) {
  293. wFnDef.setWFnEval(streamingEval);
  294. if (wFnDef.isPivotResult()) {
  295. ListObjectInspector listOI = (ListObjectInspector) wFnDef.getOI();
  296. wFnDef.setOI(listOI.getListElementObjectInspector());
  297. }
  298. }
  299. }
  300. if ( tabDef.getRankLimit() != -1 ) {
  301. rnkLimitDef = new RankLimit(tabDef.getRankLimit(),
  302. tabDef.getRankLimitFunction(), tabDef.getWindowFunctions());
  303. }
  304. streamingState = new StreamingState(cfg, inputOI, isMapSide, tabDef,
  305. span[0], span[1]);
  306. }
  307. /*
  308. * (non-Javadoc)
  309. *
  310. * @see
  311. * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#startPartition()
  312. */
  313. @Override
  314. public void startPartition() throws HiveException {
  315. WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
  316. streamingState.reset(tabDef);
  317. }
  318. /*
  319. * (non-Javadoc)
  320. *
  321. * @see
  322. * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#processRow(java
  323. * .lang.Object)
  324. *
  325. * - hand row to each Function, provided there are enough rows for Function's
  326. * window. - call getNextObject on each Function. - output as many rows as
  327. * possible, based on minimum sz of Output List
  328. */
  329. @Override
  330. public List<Object> processRow(Object row) throws HiveException {
  331. /*
  332. * Once enough rows have been output, there is no need to process input rows.
  333. */
  334. if ( streamingState.rankLimitReached() ) {
  335. return null;
  336. }
  337. streamingState.rollingPart.append(row);
  338. //Get back converted row
  339. row = streamingState.rollingPart.getAt(streamingState.rollingPart.size() -1);
  340. WindowTableFunctionDef tabDef = (WindowTableFunctionDef) tableDef;
  341. for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
  342. WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
  343. GenericUDAFEvaluator fnEval = wFn.getWFnEval();
  344. int a = 0;
  345. if (wFn.getArgs() != null) {
  346. for (PTFExpressionDef arg : wFn.getArgs()) {
  347. streamingState.funcArgs[i][a++] = arg.getExprEvaluator().evaluate(row);
  348. }
  349. }
  350. if (fnEval != null &&
  351. fnEval instanceof ISupportStreamingModeForWindowing) {
  352. fnEval.aggregate(streamingState.aggBuffers[i], streamingState.funcArgs[i]);
  353. Object out = ((ISupportStreamingModeForWindowing) fnEval)
  354. .getNextResult(streamingState.aggBuffers[i]);
  355. if (out != null) {
  356. streamingState.fnOutputs[i]
  357. .add(out == ISupportStreamingModeForWindowing.NULL_RESULT ? null
  358. : out);
  359. }
  360. } else {
  361. int rowToProcess = streamingState.rollingPart.rowToProcess(wFn.getWindowFrame());
  362. if (rowToProcess >= 0) {
  363. Object out = evaluateWindowFunction(wFn, rowToProcess, streamingState.rollingPart);
  364. streamingState.fnOutputs[i].add(out);
  365. }
  366. }
  367. }
  368. List<Object> oRows = new ArrayList<Object>();
  369. while (true) {
  370. boolean hasRow = streamingState.hasOutputRow();
  371. if (!hasRow) {
  372. break;
  373. }
  374. oRows.add(streamingState.nextOutputRow());
  375. }
  376. return oRows.size() == 0 ? null : oRows;
  377. }
  378. /*
  379. * (non-Javadoc)
  380. *
  381. * @see
  382. * org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#finishPartition()
  383. *
  384. * for fns that are not ISupportStreamingModeForWindowing give them the
  385. * remaining rows (rows whose span went beyond the end of the partition) for
  386. * rest of the functions invoke terminate.
  387. *
  388. * while numOutputRows < numInputRows for each Fn that doesn't have enough o/p
  389. * invoke getNextObj if there is no O/p then flag this as an error.
  390. */
  391. @Override
  392. public List<Object> finishPartition() throws HiveException {
  393. /*
  394. * Once enough rows have been output, there is no need to generate more output.
  395. */
  396. if ( streamingState.rankLimitReached() ) {
  397. return null;
  398. }
  399. WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
  400. for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
  401. WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
  402. GenericUDAFEvaluator fnEval = wFn.getWFnEval();
  403. int numRowsRemaining = wFn.getWindowFrame().getEnd().getRelativeOffset();
  404. if (fnEval != null &&
  405. fnEval instanceof ISupportStreamingModeForWindowing) {
  406. fnEval.terminate(streamingState.aggBuffers[i]);
  407. WindowingFunctionInfoHelper wFnInfo = getWindowingFunctionInfoHelper(wFn.getName());
  408. if (!wFnInfo.isSupportsWindow()) {
  409. numRowsRemaining = ((ISupportStreamingModeForWindowing) fnEval)
  410. .getRowsRemainingAfterTerminate();
  411. }
  412. if (numRowsRemaining != BoundarySpec.UNBOUNDED_AMOUNT) {
  413. while (numRowsRemaining > 0) {
  414. Object out = ((ISupportStreamingModeForWindowing) fnEval)
  415. .getNextResult(streamingState.aggBuffers[i]);
  416. if (out != null) {
  417. streamingState.fnOutputs[i]
  418. .add(out == ISupportStreamingModeForWindowing.NULL_RESULT ? null
  419. : out);
  420. }
  421. numRowsRemaining--;
  422. }
  423. }
  424. } else {
  425. while (numRowsRemaining > 0) {
  426. int rowToProcess = streamingState.rollingPart.size() - numRowsRemaining;
  427. if (rowToProcess >= 0) {
  428. Object out = evaluateWindowFunction(wFn, rowToProcess, streamingState.rollingPart);
  429. streamingState.fnOutputs[i].add(out);
  430. }
  431. numRowsRemaining--;
  432. }
  433. }
  434. }
  435. List<Object> oRows = new ArrayList<Object>();
  436. while (!streamingState.rollingPart.processedAllRows() &&
  437. !streamingState.rankLimitReached() ) {
  438. boolean hasRow = streamingState.hasOutputRow();
  439. if (!hasRow && !streamingState.rankLimitReached() ) {
  440. throw new HiveException(
  441. "Internal Error: cannot generate all output rows for a Partition");
  442. }
  443. if ( hasRow ) {
  444. oRows.add(streamingState.nextOutputRow());
  445. }
  446. }
  447. return oRows.size() == 0 ? null : oRows;
  448. }
  449. @Override
  450. public boolean canIterateOutput() {
  451. return true;
  452. }
  453. @SuppressWarnings("rawtypes")
  454. @Override
  455. public Iterator<Object> iterator(PTFPartitionIterator<Object> pItr) throws HiveException {
  456. WindowTableFunctionDef wTFnDef = (WindowTableFunctionDef) getTableDef();
  457. ArrayList<Object> output = new ArrayList<Object>();
  458. List<?>[] outputFromPivotFunctions = new List<?>[wTFnDef.getWindowFunctions().size()];
  459. ArrayList<Integer> wFnsWithWindows = new ArrayList<Integer>();
  460. PTFPartition iPart = pItr.getPartition();
  461. int i=0;
  462. for(WindowFunctionDef wFn : wTFnDef.getWindowFunctions()) {
  463. boolean processWindow = processWindow(wFn.getWindowFrame());
  464. pItr.reset();
  465. if ( !processWindow && !wFn.isPivotResult() ) {
  466. Object out = evaluateFunctionOnPartition(wFn, iPart);
  467. output.add(out);
  468. } else if (wFn.isPivotResult()) {
  469. GenericUDAFEvaluator streamingEval = wFn.getWFnEval().getWindowingEvaluator(wFn.getWindowFrame());
  470. if ( streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing ) {
  471. ISupportStreamingModeForWindowing strEval = (ISupportStreamingModeForWindowing) streamingEval;
  472. if ( strEval.getRowsRemainingAfterTerminate() == 0 ) {
  473. wFn.setWFnEval(streamingEval);
  474. if ( wFn.getOI() instanceof ListObjectInspector ) {
  475. ListObjectInspector listOI = (ListObjectInspector) wFn.getOI();
  476. wFn.setOI(listOI.getListElementObjectInspector());
  477. }
  478. output.add(null);
  479. wFnsWithWindows.add(i);
  480. } else {
  481. outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
  482. output.add(null);
  483. }
  484. } else {
  485. outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
  486. output.add(null);
  487. }
  488. } else {
  489. output.add(null);
  490. wFnsWithWindows.add(i);
  491. }
  492. i++;
  493. }
  494. for(i=0; i < iPart.getOutputOI().getAllStructFieldRefs().size(); i++) {
  495. output.add(null);
  496. }
  497. if ( wTFnDef.getRankLimit() != -1 ) {
  498. rnkLimitDef = new RankLimit(wTFnDef.getRankLimit(),
  499. wTFnDef.getRankLimitFunction(), wTFnDef.getWindowFunctions());
  500. }
  501. return new WindowingIterator(iPart, output, outputFromPivotFunctions,
  502. ArrayUtils.toPrimitive(wFnsWithWindows.toArray(new Integer[wFnsWithWindows.size()])));
  503. }
  504. public static class WindowingTableFunctionResolver extends TableFunctionResolver
  505. {
  506. /*
  507. * OI of object constructed from output of Wdw Fns; before it is put
  508. * in the Wdw Processing Partition. Set by Translator/Deserializer.
  509. */
  510. private transient StructObjectInspector wdwProcessingOutputOI;
  511. public StructObjectInspector getWdwProcessingOutputOI() {
  512. return wdwProcessingOutputOI;
  513. }
  514. public void setWdwProcessingOutputOI(StructObjectInspector wdwProcessingOutputOI) {
  515. this.wdwProcessingOutputOI = wdwProcessingOutputOI;
  516. }
  517. @Override
  518. protected TableFunctionEvaluator createEvaluator(PTFDesc ptfDesc, PartitionedTableFunctionDef tDef)
  519. {
  520. return new WindowingTableFunction();
  521. }
  522. @Override
  523. public void setupOutputOI() throws SemanticException {
  524. setOutputOI(wdwProcessingOutputOI);
  525. }
  526. /*
  527. * Setup the OI based on the:
  528. * - Input TableDef's columns
  529. * - the Window Functions.
  530. */
  531. @Override
  532. public void initializeOutputOI() throws HiveException {
  533. setupOutputOI();
  534. }
  535. @Override
  536. public boolean transformsRawInput() {
  537. return false;
  538. }
  539. /*
  540. * (non-Javadoc)
  541. * @see org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver#carryForwardNames()
  542. * Setting to true is correct only for special internal Functions.
  543. */
  544. @Override
  545. public boolean carryForwardNames() {
  546. return true;
  547. }
  548. /*
  549. * (non-Javadoc)
  550. * @see org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver#getOutputNames()
  551. * Set to null only because carryForwardNames is true.
  552. */
  553. @Override
  554. public ArrayList<String> getOutputColumnNames() {
  555. return null;
  556. }
  557. }
  558. public static class SameList<E> extends AbstractList<E> {
  559. int sz;
  560. E val;
  561. public SameList(int sz, E val) {
  562. this.sz = sz;
  563. this.val = val;
  564. }
  565. @Override
  566. public E get(int index) {
  567. return val;
  568. }
  569. @Override
  570. public int size() {
  571. return sz;
  572. }
  573. }
  574. public class WindowingIterator implements Iterator<Object> {
  575. ArrayList<Object> output;
  576. List<?>[] outputFromPivotFunctions;
  577. int currIdx;
  578. PTFPartition iPart;
  579. /*
  580. * these are the functions that have a Window.
  581. * Fns w/o a Window have already been processed.
  582. */
  583. int[] wFnsToProcess;
  584. WindowTableFunctionDef wTFnDef;
  585. PTFDesc ptfDesc;
  586. StructObjectInspector inputOI;
  587. AggregationBuffer[] aggBuffers;
  588. Object[][] args;
  589. RankLimit rnkLimit;
  590. WindowingIterator(PTFPartition iPart, ArrayList<Object> output,
  591. List<?>[] outputFromPivotFunctions, int[] wFnsToProcess) {
  592. this.iPart = iPart;
  593. this.output = output;
  594. this.outputFromPivotFunctions = outputFromPivotFunctions;
  595. this.wFnsToProcess = wFnsToProcess;
  596. this.currIdx = 0;
  597. wTFnDef = (WindowTableFunctionDef) getTableDef();
  598. ptfDesc = getQueryDef();
  599. inputOI = iPart.getOutputOI();
  600. aggBuffers = new AggregationBuffer[wTFnDef.getWindowFunctions().size()];
  601. args = new Object[wTFnDef.getWindowFunctions().size()][];
  602. try {
  603. for (int j : wFnsToProcess) {
  604. WindowFunctionDef wFn = wTFnDef.getWindowFunctions().get(j);
  605. aggBuffers[j] = wFn.getWFnEval().getNewAggregationBuffer();
  606. args[j] = new Object[wFn.getArgs() == null ? 0 : wFn.getArgs().size()];
  607. }
  608. } catch (HiveException he) {
  609. throw new RuntimeException(he);
  610. }
  611. if ( WindowingTableFunction.this.rnkLimitDef != null ) {
  612. rnkLimit = new RankLimit(WindowingTableFunction.this.rnkLimitDef);
  613. }
  614. }
  615. @Override
  616. public boolean hasNext() {
  617. if ( rnkLimit != null && rnkLimit.limitReached() ) {
  618. return false;
  619. }
  620. return currIdx < iPart.size();
  621. }
  622. // Given the data in a partition, evaluate the result for the next row for
  623. // streaming and batch mode
  624. @Override
  625. public Object next() {
  626. int i;
  627. for(i = 0; i < outputFromPivotFunctions.length; i++ ) {
  628. if ( outputFromPivotFunctions[i] != null ) {
  629. output.set(i, outputFromPivotFunctions[i].get(currIdx));
  630. }
  631. }
  632. try {
  633. for (int j : wFnsToProcess) {
  634. WindowFunctionDef wFn = wTFnDef.getWindowFunctions().get(j);
  635. if (wFn.getWFnEval() instanceof ISupportStreamingModeForWindowing) {
  636. Object iRow = iPart.getAt(currIdx);
  637. int a = 0;
  638. if (wFn.getArgs() != null) {
  639. for (PTFExpressionDef arg : wFn.getArgs()) {
  640. args[j][a++] = arg.getExprEvaluator().evaluate(iRow);
  641. }
  642. }
  643. wFn.getWFnEval().aggregate(aggBuffers[j], args[j]);
  644. Object out = ((ISupportStreamingModeForWindowing) wFn.getWFnEval())
  645. .getNextResult(aggBuffers[j]);
  646. if (out != null) {
  647. if (out == ISupportStreamingModeForWindowing.NULL_RESULT) {
  648. out = null;
  649. } else {
  650. out = ObjectInspectorUtils.copyToStandardObject(out, wFn.getOI());
  651. }
  652. }
  653. output.set(j, out);
  654. } else {
  655. Object out = evaluateWindowFunction(wFn, currIdx, iPart);
  656. output.set(j, out);
  657. }
  658. }
  659. Object iRow = iPart.getAt(currIdx);
  660. i = wTFnDef.getWindowFunctions().size();
  661. for (StructField f : inputOI.getAllStructFieldRefs()) {
  662. output.set(i++, inputOI.getStructFieldData(iRow, f));
  663. }
  664. } catch (HiveException he) {
  665. throw new RuntimeException(he);
  666. }
  667. if ( rnkLimit != null ) {
  668. rnkLimit.updateRank(output);
  669. }
  670. currIdx++;
  671. return output;
  672. }
  673. @Override
  674. public void remove() {
  675. throw new UnsupportedOperationException();
  676. }
  677. }
  678. class StreamingState {
  679. PTFRollingPartition rollingPart;
  680. List<Object>[] fnOutputs;
  681. AggregationBuffer[] aggBuffers;
  682. Object[][] funcArgs;
  683. RankLimit rnkLimit;
  684. @SuppressWarnings("unchecked")
  685. StreamingState(Configuration cfg, StructObjectInspector inputOI,
  686. boolean isMapSide, WindowTableFunctionDef tabDef, int precedingSpan,
  687. int followingSpan) throws HiveException {
  688. AbstractSerDe serde = isMapSide ? tabDef.getInput().getOutputShape().getSerde()
  689. : tabDef.getRawInputShape().getSerde();
  690. StructObjectInspector outputOI = isMapSide ? tabDef.getInput()
  691. .getOutputShape().getOI() : tabDef.getRawInputShape().getOI();
  692. rollingPart = PTFPartition.createRolling(cfg, serde, inputOI, outputOI,
  693. precedingSpan, followingSpan);
  694. int numFns = tabDef.getWindowFunctions().size();
  695. fnOutputs = new ArrayList[numFns];
  696. aggBuffers = new AggregationBuffer[numFns];
  697. funcArgs = new Object[numFns][];
  698. for (int i = 0; i < numFns; i++) {
  699. fnOutputs[i] = new ArrayList<Object>();
  700. WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
  701. funcArgs[i] = new Object[wFn.getArgs() == null ? 0 : wFn.getArgs().size()];
  702. aggBuffers[i] = wFn.getWFnEval().getNewAggregationBuffer();
  703. }
  704. if ( WindowingTableFunction.this.rnkLimitDef != null ) {
  705. rnkLimit = new RankLimit(WindowingTableFunction.this.rnkLimitDef);
  706. }
  707. }
  708. void reset(WindowTableFunctionDef tabDef) throws HiveException {
  709. int numFns = tabDef.getWindowFunctions().size();
  710. rollingPart.reset();
  711. for (int i = 0; i < fnOutputs.length; i++) {
  712. fnOutputs[i].clear();
  713. }
  714. for (int i = 0; i < numFns; i++) {
  715. WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
  716. aggBuffers[i] = wFn.getWFnEval().getNewAggregationBuffer();
  717. }
  718. if ( rnkLimit != null ) {
  719. rnkLimit.reset();
  720. }
  721. }
  722. boolean hasOutputRow() {
  723. if ( rankLimitReached() ) {
  724. return false;
  725. }
  726. for (int i = 0; i < fnOutputs.length; i++) {
  727. if (fnOutputs[i].size() == 0) {
  728. return false;
  729. }
  730. }
  731. return true;
  732. }
  733. private List<Object> nextOutputRow() throws HiveException {
  734. List<Object> oRow = new ArrayList<Object>();
  735. Object iRow = rollingPart.nextOutputRow();
  736. int i = 0;
  737. for (; i < fnOutputs.length; i++) {
  738. oRow.add(fnOutputs[i].remove(0));
  739. }
  740. for (StructField f : rollingPart.getOutputOI().getAllStructFieldRefs()) {
  741. oRow.add(rollingPart.getOutputOI().getStructFieldData(iRow, f));
  742. }
  743. if ( rnkLimit != null ) {
  744. rnkLimit.updateRank(oRow);
  745. }
  746. return oRow;
  747. }
  748. boolean rankLimitReached() {
  749. return rnkLimit != null && rnkLimit.limitReached();
  750. }
  751. }
  752. static class RankLimit {
  753. /*
  754. * Rows with a rank <= rankLimit are output.
  755. * Only the first row with rank = rankLimit is output.
  756. */
  757. final int rankLimit;
  758. /*
  759. * the rankValue of the last row output.
  760. */
  761. int currentRank;
  762. /*
  763. * index of Rank function.
  764. */
  765. final int rankFnIdx;
  766. final PrimitiveObjectInspector fnOutOI;
  767. RankLimit(int rankLimit, int rankFnIdx, List<WindowFunctionDef> wdwFnDefs) {
  768. this.rankLimit = rankLimit;
  769. this.rankFnIdx = rankFnIdx;
  770. this.fnOutOI = (PrimitiveObjectInspector) wdwFnDefs.get(rankFnIdx).getOI();
  771. this.currentRank = -1;
  772. }
  773. RankLimit(RankLimit rl) {
  774. this.rankLimit = rl.rankLimit;
  775. this.rankFnIdx = rl.rankFnIdx;
  776. this.fnOutOI = rl.fnOutOI;
  777. this.currentRank = -1;
  778. }
  779. void reset() {
  780. this.currentRank = -1;
  781. }
  782. void updateRank(List<Object> oRow) {
  783. int r = (Integer) fnOutOI.getPrimitiveJavaObject(oRow.get(rankFnIdx));
  784. if ( r > currentRank ) {
  785. currentRank = r;
  786. }
  787. }
  788. boolean limitReached() {
  789. return currentRank >= rankLimit;
  790. }
  791. }
  792. }