PageRenderTime 48ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java

http://github.com/apache/hive
Java | 938 lines | 705 code | 119 blank | 114 comment | 62 complexity | 7616ef53f5da096c685d2a48be29b6c7 MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.udf.ptf;
  19. import java.util.ArrayList;
  20. import java.util.HashMap;
  21. import java.util.List;
  22. import java.util.Map;
  23. import org.apache.hadoop.hive.ql.exec.ColumnInfo;
  24. import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
  25. import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
  26. import org.apache.hadoop.hive.ql.exec.PTFPartition;
  27. import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator;
  28. import org.apache.hadoop.hive.ql.exec.Utilities;
  29. import org.apache.hadoop.hive.ql.metadata.HiveException;
  30. import org.apache.hadoop.hive.ql.parse.ASTNode;
  31. import org.apache.hadoop.hive.ql.parse.PTFTranslator;
  32. import org.apache.hadoop.hive.ql.parse.RowResolver;
  33. import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
  34. import org.apache.hadoop.hive.ql.parse.SemanticException;
  35. import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck;
  36. import org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx;
  37. import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
  38. import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
  39. import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
  40. import org.apache.hadoop.hive.ql.plan.PTFDesc;
  41. import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
  42. import org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef;
  43. import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
  44. import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
  45. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  46. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
  47. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
  48. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
  49. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
  50. import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
  51. import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
  52. import org.apache.hadoop.hive.serde2.objectinspector.StructField;
  53. import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
  54. import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
  55. import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
  56. /**
  57. * return rows that meet a specified pattern. Use symbols to specify a list of expressions
  58. * to match.
  59. * Pattern is used to specify a Path. The results list can contain expressions based on
  60. * the input columns and also the matched Path.
  61. * <ol>
  62. * <li><b>pattern:</b> pattern for the Path. Path is 'dot' separated list of symbols.
  63. * Each element is treated as a symbol. Elements that end in '*' or '+' are interpreted with
  64. * the usual meaning of zero or more, one or more respectively. For e.g.
  65. * "LATE.EARLY*.ONTIMEOREARLY" implies a sequence of flights
  66. * where the first occurrence was LATE, followed by zero or more EARLY flights,
  67. * followed by a ONTIME or EARLY flight.
  68. * <li><b>symbols</b> specify a list of name, expression pairs. For e.g.
  69. * 'LATE', arrival_delay &gt; 0, 'EARLY', arrival_delay &lt; 0 , 'ONTIME', arrival_delay == 0.
  70. * These symbols can be used in the Pattern defined above.
  71. * <li><b>resultSelectList</b> specified as a select list.
  72. * The expressions in the selectList are evaluated in the context where all the
  73. * input columns are available, plus the attribute
  74. * "tpath" is available. Path is a collection of rows that represents the matching Path.
  75. * </ol>
  76. */
  77. public class MatchPath extends TableFunctionEvaluator
  78. {
  79. private transient String patternStr;
  80. private transient SymbolsInfo symInfo;
  81. private transient String resultExprStr;
  82. private transient SymbolFunction syFn;
  83. private ResultExprInfo resultExprInfo;
  84. /*
  85. * the names of the Columns of the input to MatchPath. Used to setup the tpath Struct column.
  86. */
  87. private HashMap<String,String> inputColumnNamesMap;
  88. @Override
  89. public void execute(PTFPartitionIterator<Object> pItr, PTFPartition outP) throws HiveException
  90. {
  91. while (pItr.hasNext())
  92. {
  93. Object iRow = pItr.next();
  94. SymbolFunctionResult syFnRes = SymbolFunction.match(syFn, iRow, pItr);
  95. if (syFnRes.matches )
  96. {
  97. int sz = syFnRes.nextRow - (pItr.getIndex() - 1);
  98. Object selectListInput = MatchPath.getSelectListInput(iRow,
  99. tableDef.getInput().getOutputShape().getOI(), pItr, sz);
  100. ArrayList<Object> oRow = new ArrayList<Object>();
  101. for(ExprNodeEvaluator resExprEval : resultExprInfo.resultExprEvals)
  102. {
  103. oRow.add(resExprEval.evaluate(selectListInput));
  104. }
  105. outP.append(oRow);
  106. }
  107. }
  108. }
  109. static void throwErrorWithSignature(String message) throws SemanticException
  110. {
  111. throw new SemanticException(String.format(
  112. "MatchPath signature is: SymbolPattern, one or more SymbolName, " +
  113. "expression pairs, the result expression as a select list. Error %s",
  114. message));
  115. }
  116. public HashMap<String,String> getInputColumnNames() {
  117. return inputColumnNamesMap;
  118. }
  119. public void setInputColumnNames(HashMap<String,String> inputColumnNamesMap) {
  120. this.inputColumnNamesMap = inputColumnNamesMap;
  121. }
  122. public static class MatchPathResolver extends TableFunctionResolver
  123. {
  124. @Override
  125. protected TableFunctionEvaluator createEvaluator(PTFDesc ptfDesc,
  126. PartitionedTableFunctionDef tDef)
  127. {
  128. return new MatchPath();
  129. }
  130. /**
  131. * <ul>
  132. * <li> check structure of Arguments:
  133. * <ol>
  134. * <li> First arg should be a String
  135. * <li> then there should be an even number of Arguments:
  136. * String, expression; expression should be Convertible to Boolean.
  137. * <li> finally there should be a String.
  138. * </ol>
  139. * <li> convert pattern into a NNode chain.
  140. * <li> convert symbol args into a Symbol Map.
  141. * <li> parse selectList into SelectList struct. The inputOI used to translate
  142. * these expressions should be based on the
  143. * columns in the Input, the 'path.attr'
  144. * </ul>
  145. */
  146. @Override
  147. public void setupOutputOI() throws SemanticException
  148. {
  149. MatchPath evaluator = (MatchPath) getEvaluator();
  150. PartitionedTableFunctionDef tDef = evaluator.getTableDef();
  151. List<PTFExpressionDef> args = tDef.getArgs();
  152. int argsNum = args == null ? 0 : args.size();
  153. if ( argsNum < 4 )
  154. {
  155. throwErrorWithSignature("at least 4 arguments required");
  156. }
  157. validateAndSetupPatternStr(evaluator, args);
  158. validateAndSetupSymbolInfo(evaluator, args, argsNum);
  159. validateAndSetupResultExprStr(evaluator, args, argsNum);
  160. setupSymbolFunctionChain(evaluator);
  161. /*
  162. * setup OI for input to resultExpr select list
  163. */
  164. RowResolver selectListInputRR = MatchPath.createSelectListRR(evaluator, tDef.getInput());
  165. /*
  166. * parse ResultExpr Str and setup OI.
  167. */
  168. ResultExpressionParser resultExprParser =
  169. new ResultExpressionParser(evaluator.resultExprStr, selectListInputRR);
  170. try {
  171. resultExprParser.translate();
  172. }
  173. catch(HiveException he) {
  174. throw new SemanticException(he);
  175. }
  176. evaluator.resultExprInfo = resultExprParser.getResultExprInfo();
  177. StructObjectInspector OI = evaluator.resultExprInfo.resultOI;
  178. setOutputOI(OI);
  179. }
  180. @Override
  181. public List<String> getReferencedColumns() throws SemanticException {
  182. MatchPath matchPath = (MatchPath) evaluator;
  183. List<String> columns = new ArrayList<>();
  184. for (ExprNodeDesc exprNode : matchPath.resultExprInfo.resultExprNodes) {
  185. Utilities.mergeUniqElems(columns, exprNode.getCols());
  186. }
  187. for (ExprNodeDesc exprNode : matchPath.symInfo.symbolExprsDecs) {
  188. Utilities.mergeUniqElems(columns, exprNode.getCols());
  189. }
  190. return columns;
  191. }
  192. /*
  193. * validate and setup patternStr
  194. */
  195. private void validateAndSetupPatternStr(MatchPath evaluator,
  196. List<PTFExpressionDef> args) throws SemanticException {
  197. PTFExpressionDef symboPatternArg = args.get(0);
  198. ObjectInspector symbolPatternArgOI = symboPatternArg.getOI();
  199. if ( !ObjectInspectorUtils.isConstantObjectInspector(symbolPatternArgOI) ||
  200. (symbolPatternArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) ||
  201. ((PrimitiveObjectInspector)symbolPatternArgOI).getPrimitiveCategory() !=
  202. PrimitiveObjectInspector.PrimitiveCategory.STRING )
  203. {
  204. throwErrorWithSignature("Currently the symbol Pattern must be a Constant String.");
  205. }
  206. evaluator.patternStr = ((ConstantObjectInspector)symbolPatternArgOI).
  207. getWritableConstantValue().toString();
  208. }
  209. /*
  210. * validate and setup SymbolInfo
  211. */
  212. private void validateAndSetupSymbolInfo(MatchPath evaluator,
  213. List<PTFExpressionDef> args,
  214. int argsNum) throws SemanticException {
  215. int symbolArgsSz = argsNum - 2;
  216. if ( symbolArgsSz % 2 != 0)
  217. {
  218. throwErrorWithSignature("Symbol Name, Expression need to be specified in pairs: " +
  219. "there are odd number of symbol args");
  220. }
  221. evaluator.symInfo = new SymbolsInfo(symbolArgsSz/2);
  222. for(int i=1; i <= symbolArgsSz; i += 2)
  223. {
  224. PTFExpressionDef symbolNameArg = args.get(i);
  225. ObjectInspector symbolNameArgOI = symbolNameArg.getOI();
  226. if ( !ObjectInspectorUtils.isConstantObjectInspector(symbolNameArgOI) ||
  227. (symbolNameArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) ||
  228. ((PrimitiveObjectInspector)symbolNameArgOI).getPrimitiveCategory() !=
  229. PrimitiveObjectInspector.PrimitiveCategory.STRING )
  230. {
  231. throwErrorWithSignature(
  232. String.format("Currently a Symbol Name(%s) must be a Constant String",
  233. symbolNameArg.getExpressionTreeString()));
  234. }
  235. String symbolName = ((ConstantObjectInspector)symbolNameArgOI).
  236. getWritableConstantValue().toString();
  237. PTFExpressionDef symolExprArg = args.get(i+1);
  238. ObjectInspector symolExprArgOI = symolExprArg.getOI();
  239. if ( (symolExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) ||
  240. ((PrimitiveObjectInspector)symolExprArgOI).getPrimitiveCategory() !=
  241. PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN )
  242. {
  243. throwErrorWithSignature(String.format("Currently a Symbol Expression(%s) " +
  244. "must be a boolean expression", symolExprArg.getExpressionTreeString()));
  245. }
  246. evaluator.symInfo.add(symbolName, symolExprArg);
  247. }
  248. }
  249. /*
  250. * validate and setup resultExprStr
  251. */
  252. private void validateAndSetupResultExprStr(MatchPath evaluator,
  253. List<PTFExpressionDef> args,
  254. int argsNum) throws SemanticException {
  255. PTFExpressionDef resultExprArg = args.get(argsNum - 1);
  256. ObjectInspector resultExprArgOI = resultExprArg.getOI();
  257. if ( !ObjectInspectorUtils.isConstantObjectInspector(resultExprArgOI) ||
  258. (resultExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) ||
  259. ((PrimitiveObjectInspector)resultExprArgOI).getPrimitiveCategory() !=
  260. PrimitiveObjectInspector.PrimitiveCategory.STRING )
  261. {
  262. throwErrorWithSignature("Currently the result Expr parameter must be a Constant String.");
  263. }
  264. evaluator.resultExprStr = ((ConstantObjectInspector)resultExprArgOI).
  265. getWritableConstantValue().toString();
  266. }
  267. /*
  268. * setup SymbolFunction chain.
  269. */
  270. private void setupSymbolFunctionChain(MatchPath evaluator) throws SemanticException {
  271. SymbolParser syP = new SymbolParser(evaluator.patternStr,
  272. evaluator.symInfo.symbolExprsNames,
  273. evaluator.symInfo.symbolExprsEvaluators, evaluator.symInfo.symbolExprsOIs);
  274. syP.parse();
  275. evaluator.syFn = syP.getSymbolFunction();
  276. }
  277. @Override
  278. public boolean transformsRawInput()
  279. {
  280. return false;
  281. }
  282. @Override
  283. public void initializeOutputOI() throws HiveException {
  284. try {
  285. MatchPath evaluator = (MatchPath) getEvaluator();
  286. PartitionedTableFunctionDef tDef = evaluator.getTableDef();
  287. List<PTFExpressionDef> args = tDef.getArgs();
  288. int argsNum = args.size();
  289. validateAndSetupPatternStr(evaluator, args);
  290. validateAndSetupSymbolInfo(evaluator, args, argsNum);
  291. validateAndSetupResultExprStr(evaluator, args, argsNum);
  292. setupSymbolFunctionChain(evaluator);
  293. /*
  294. * setup OI for input to resultExpr select list
  295. */
  296. StructObjectInspector selectListInputOI = MatchPath.createSelectListOI( evaluator,
  297. tDef.getInput());
  298. ResultExprInfo resultExprInfo = evaluator.resultExprInfo;
  299. ArrayList<ObjectInspector> selectListExprOIs = new ArrayList<ObjectInspector>();
  300. resultExprInfo.resultExprEvals = new ArrayList<ExprNodeEvaluator>();
  301. for(int i=0 ; i < resultExprInfo.resultExprNodes.size(); i++) {
  302. ExprNodeDesc selectColumnExprNode =resultExprInfo.resultExprNodes.get(i);
  303. ExprNodeEvaluator selectColumnExprEval =
  304. ExprNodeEvaluatorFactory.get(selectColumnExprNode);
  305. ObjectInspector selectColumnOI = selectColumnExprEval.initialize(selectListInputOI);
  306. resultExprInfo.resultExprEvals.add(selectColumnExprEval);
  307. selectListExprOIs.add(selectColumnOI);
  308. }
  309. resultExprInfo.resultOI = ObjectInspectorFactory.getStandardStructObjectInspector(
  310. resultExprInfo.resultExprNames, selectListExprOIs);
  311. setOutputOI(resultExprInfo.resultOI);
  312. }
  313. catch(SemanticException se) {
  314. throw new HiveException(se);
  315. }
  316. }
  317. @Override
  318. public ArrayList<String> getOutputColumnNames() {
  319. MatchPath evaluator = (MatchPath) getEvaluator();
  320. return evaluator.resultExprInfo.getResultExprNames();
  321. }
  322. }
  323. public ResultExprInfo getResultExprInfo() {
  324. return resultExprInfo;
  325. }
  326. public void setResultExprInfo(ResultExprInfo resultExprInfo) {
  327. this.resultExprInfo = resultExprInfo;
  328. }
  329. static class SymbolsInfo {
  330. int sz;
  331. ArrayList<ExprNodeDesc> symbolExprsDecs;
  332. ArrayList<ExprNodeEvaluator> symbolExprsEvaluators;
  333. ArrayList<ObjectInspector> symbolExprsOIs;
  334. ArrayList<String> symbolExprsNames;
  335. SymbolsInfo(int sz)
  336. {
  337. this.sz = sz;
  338. symbolExprsEvaluators = new ArrayList<ExprNodeEvaluator>(sz);
  339. symbolExprsOIs = new ArrayList<ObjectInspector>(sz);
  340. symbolExprsNames = new ArrayList<String>(sz);
  341. symbolExprsDecs = new ArrayList<>(sz);
  342. }
  343. void add(String name, PTFExpressionDef arg)
  344. {
  345. symbolExprsNames.add(name);
  346. symbolExprsEvaluators.add(arg.getExprEvaluator());
  347. symbolExprsOIs.add(arg.getOI());
  348. symbolExprsDecs.add(arg.getExprNode());
  349. }
  350. }
  351. public static class ResultExprInfo {
  352. ArrayList<String> resultExprNames;
  353. ArrayList<ExprNodeDesc> resultExprNodes;
  354. private transient ArrayList<ExprNodeEvaluator> resultExprEvals;
  355. private transient StructObjectInspector resultOI;
  356. public ArrayList<String> getResultExprNames() {
  357. return resultExprNames;
  358. }
  359. public void setResultExprNames(ArrayList<String> resultExprNames) {
  360. this.resultExprNames = resultExprNames;
  361. }
  362. public ArrayList<ExprNodeDesc> getResultExprNodes() {
  363. return resultExprNodes;
  364. }
  365. public void setResultExprNodes(ArrayList<ExprNodeDesc> resultExprNodes) {
  366. this.resultExprNodes = resultExprNodes;
  367. }
  368. }
  369. public static abstract class SymbolFunction
  370. {
  371. SymbolFunctionResult result;
  372. public SymbolFunction()
  373. {
  374. result = new SymbolFunctionResult();
  375. }
  376. public static SymbolFunctionResult match(SymbolFunction syFn, Object row,
  377. PTFPartitionIterator<Object> pItr) throws HiveException
  378. {
  379. int resetToIdx = pItr.getIndex() - 1;
  380. try
  381. {
  382. return syFn.match(row, pItr);
  383. } finally
  384. {
  385. pItr.resetToIndex(resetToIdx);
  386. }
  387. }
  388. protected abstract SymbolFunctionResult match(Object row, PTFPartitionIterator<Object> pItr)
  389. throws HiveException;
  390. protected abstract boolean isOptional();
  391. }
  392. public static class Symbol extends SymbolFunction {
  393. ExprNodeEvaluator symbolExprEval;
  394. Converter converter;
  395. public Symbol(ExprNodeEvaluator symbolExprEval, ObjectInspector symbolOI)
  396. {
  397. this.symbolExprEval = symbolExprEval;
  398. converter = ObjectInspectorConverters.getConverter(
  399. symbolOI,
  400. PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
  401. }
  402. @Override
  403. protected SymbolFunctionResult match(Object row, PTFPartitionIterator<Object> pItr)
  404. throws HiveException
  405. {
  406. Object val = null;
  407. val = symbolExprEval.evaluate(row);
  408. val = converter.convert(val);
  409. result.matches = ((Boolean) val).booleanValue();
  410. result.nextRow = pItr.getIndex();
  411. return result;
  412. }
  413. @Override
  414. protected boolean isOptional()
  415. {
  416. return false;
  417. }
  418. }
  419. public static class Star extends SymbolFunction {
  420. SymbolFunction symbolFn;
  421. public Star(SymbolFunction symbolFn)
  422. {
  423. this.symbolFn = symbolFn;
  424. }
  425. @Override
  426. protected SymbolFunctionResult match(Object row, PTFPartitionIterator<Object> pItr)
  427. throws HiveException
  428. {
  429. result.matches = true;
  430. SymbolFunctionResult rowResult = symbolFn.match(row, pItr);
  431. while (rowResult.matches && pItr.hasNext())
  432. {
  433. row = pItr.next();
  434. rowResult = symbolFn.match(row, pItr);
  435. }
  436. result.nextRow = pItr.getIndex();
  437. if(pItr.hasNext()) {
  438. result.nextRow -= 1;
  439. }
  440. return result;
  441. }
  442. @Override
  443. protected boolean isOptional()
  444. {
  445. return true;
  446. }
  447. }
  448. public static class Plus extends SymbolFunction {
  449. SymbolFunction symbolFn;
  450. public Plus(SymbolFunction symbolFn)
  451. {
  452. this.symbolFn = symbolFn;
  453. }
  454. @Override
  455. protected SymbolFunctionResult match(Object row, PTFPartitionIterator<Object> pItr)
  456. throws HiveException
  457. {
  458. SymbolFunctionResult rowResult = symbolFn.match(row, pItr);
  459. if (!rowResult.matches)
  460. {
  461. result.matches = false;
  462. result.nextRow = pItr.getIndex() - 1;
  463. return result;
  464. }
  465. result.matches = true;
  466. while (rowResult.matches && pItr.hasNext())
  467. {
  468. row = pItr.next();
  469. rowResult = symbolFn.match(row, pItr);
  470. }
  471. result.nextRow = pItr.getIndex() - 1;
  472. return result;
  473. }
  474. @Override
  475. protected boolean isOptional()
  476. {
  477. return false;
  478. }
  479. }
  480. public static class Chain extends SymbolFunction
  481. {
  482. ArrayList<SymbolFunction> components;
  483. public Chain(ArrayList<SymbolFunction> components)
  484. {
  485. this.components = components;
  486. }
  487. /*
  488. * Iterate over the Symbol Functions in the Chain:
  489. * - If we are not at the end of the Iterator (i.e. row != null )
  490. * - match the current componentFn
  491. * - if it returns false, then return false
  492. * - otherwise set row to the next row from the Iterator.
  493. * - if we are at the end of the Iterator
  494. * - skip any optional Symbol Fns (star patterns) at the end.
  495. * - but if we come to a non optional Symbol Fn, return false.
  496. * - if we match all Fns in the chain return true.
  497. */
  498. @Override
  499. protected SymbolFunctionResult match(Object row, PTFPartitionIterator<Object> pItr)
  500. throws HiveException
  501. {
  502. SymbolFunctionResult componentResult = null;
  503. for (SymbolFunction sFn : components)
  504. {
  505. if (row != null)
  506. {
  507. componentResult = sFn.match(row, pItr);
  508. if (!componentResult.matches)
  509. {
  510. result.matches = false;
  511. result.nextRow = componentResult.nextRow;
  512. return result;
  513. }
  514. row = pItr.resetToIndex(componentResult.nextRow);
  515. }
  516. else
  517. {
  518. if (!sFn.isOptional())
  519. {
  520. result.matches = false;
  521. result.nextRow = componentResult.nextRow;
  522. return result;
  523. }
  524. }
  525. }
  526. result.matches = true;
  527. result.nextRow = componentResult.nextRow;
  528. return result;
  529. }
  530. @Override
  531. protected boolean isOptional()
  532. {
  533. return false;
  534. }
  535. }
  536. public static class SymbolFunctionResult
  537. {
  538. /*
  539. * does the row match the pattern represented by this SymbolFunction
  540. */
  541. public boolean matches;
  542. /*
  543. * what is the index of the row beyond the set of rows that match this pattern.
  544. */
  545. public int nextRow;
  546. }
  547. public static class SymbolParser
  548. {
  549. String patternStr;
  550. String[] symbols;
  551. HashMap<String, Object[]> symbolExprEvalMap;
  552. ArrayList<SymbolFunction> symbolFunctions;
  553. Chain symbolFnChain;
  554. public SymbolParser(String patternStr, ArrayList<String> symbolNames,
  555. ArrayList<ExprNodeEvaluator> symbolExprEvals, ArrayList<ObjectInspector> symbolExprOIs)
  556. {
  557. super();
  558. this.patternStr = patternStr;
  559. symbolExprEvalMap = new HashMap<String, Object[]>();
  560. int sz = symbolNames.size();
  561. for(int i=0; i < sz; i++)
  562. {
  563. String symbolName = symbolNames.get(i);
  564. ExprNodeEvaluator symbolExprEval = symbolExprEvals.get(i);
  565. ObjectInspector symbolExprOI = symbolExprOIs.get(i);
  566. symbolExprEvalMap.put(symbolName.toLowerCase(),
  567. new Object[] {symbolExprEval, symbolExprOI});
  568. }
  569. }
  570. public SymbolFunction getSymbolFunction()
  571. {
  572. return symbolFnChain;
  573. }
  574. public void parse() throws SemanticException
  575. {
  576. symbols = patternStr.split("\\.");
  577. symbolFunctions = new ArrayList<SymbolFunction>();
  578. for(String symbol : symbols)
  579. {
  580. boolean isStar = symbol.endsWith("*");
  581. boolean isPlus = symbol.endsWith("+");
  582. symbol = (isStar || isPlus) ? symbol.substring(0, symbol.length() - 1) : symbol;
  583. Object[] symbolDetails = symbolExprEvalMap.get(symbol.toLowerCase());
  584. if ( symbolDetails == null )
  585. {
  586. throw new SemanticException(String.format("Unknown Symbol %s", symbol));
  587. }
  588. ExprNodeEvaluator symbolExprEval = (ExprNodeEvaluator) symbolDetails[0];
  589. ObjectInspector symbolExprOI = (ObjectInspector) symbolDetails[1];
  590. SymbolFunction sFn = new Symbol(symbolExprEval, symbolExprOI);
  591. if ( isStar )
  592. {
  593. sFn = new Star(sFn);
  594. }
  595. else if ( isPlus )
  596. {
  597. sFn = new Plus(sFn);
  598. }
  599. symbolFunctions.add(sFn);
  600. }
  601. symbolFnChain = new Chain(symbolFunctions);
  602. }
  603. }
  604. /*
  605. * ResultExpression is a Select List with the following variation:
  606. * - the select keyword is optional. The parser checks if the expression doesn't start with
  607. * select; if not it prefixes it.
  608. * - Window Fn clauses are not permitted.
  609. * - expressions can operate on the input columns plus the psuedo column 'path'
  610. * which is array of
  611. * structs. The shape of the struct is
  612. * the same as the input.
  613. */
  614. public static class ResultExpressionParser {
  615. String resultExprString;
  616. RowResolver selectListInputRowResolver;
  617. TypeCheckCtx selectListInputTypeCheckCtx;
  618. StructObjectInspector selectListInputOI;
  619. List<WindowExpressionSpec> selectSpec;
  620. ResultExprInfo resultExprInfo;
  621. public ResultExpressionParser(String resultExprString,
  622. RowResolver selectListInputRowResolver)
  623. {
  624. this.resultExprString = resultExprString;
  625. this.selectListInputRowResolver = selectListInputRowResolver;
  626. }
  627. public void translate() throws SemanticException, HiveException
  628. {
  629. setupSelectListInputInfo();
  630. fixResultExprString();
  631. parse();
  632. validateSelectExpr();
  633. buildSelectListEvaluators();
  634. }
  635. public ResultExprInfo getResultExprInfo() {
  636. return resultExprInfo;
  637. }
  638. private void buildSelectListEvaluators() throws SemanticException, HiveException
  639. {
  640. resultExprInfo = new ResultExprInfo();
  641. resultExprInfo.resultExprEvals = new ArrayList<ExprNodeEvaluator>();
  642. resultExprInfo.resultExprNames = new ArrayList<String>();
  643. resultExprInfo.resultExprNodes = new ArrayList<ExprNodeDesc>();
  644. //result
  645. ArrayList<ObjectInspector> selectListExprOIs = new ArrayList<ObjectInspector>();
  646. int i = 0;
  647. for(WindowExpressionSpec expr : selectSpec)
  648. {
  649. String selectColName = expr.getAlias();
  650. ASTNode selectColumnNode = expr.getExpression();
  651. ExprNodeDesc selectColumnExprNode =
  652. ResultExpressionParser.buildExprNode(selectColumnNode,
  653. selectListInputTypeCheckCtx);
  654. ExprNodeEvaluator selectColumnExprEval =
  655. ExprNodeEvaluatorFactory.get(selectColumnExprNode);
  656. ObjectInspector selectColumnOI = null;
  657. selectColumnOI = selectColumnExprEval.initialize(selectListInputOI);
  658. selectColName = getColumnName(selectColName, selectColumnExprNode, i);
  659. resultExprInfo.resultExprEvals.add(selectColumnExprEval);
  660. selectListExprOIs.add(selectColumnOI);
  661. resultExprInfo.resultExprNodes.add(selectColumnExprNode);
  662. resultExprInfo.resultExprNames.add(selectColName);
  663. i++;
  664. }
  665. resultExprInfo.resultOI = ObjectInspectorFactory.getStandardStructObjectInspector(
  666. resultExprInfo.resultExprNames, selectListExprOIs);
  667. }
  668. private void setupSelectListInputInfo() throws SemanticException
  669. {
  670. selectListInputTypeCheckCtx = new TypeCheckCtx(selectListInputRowResolver);
  671. selectListInputTypeCheckCtx.setUnparseTranslator(null);
  672. /*
  673. * create SelectListOI
  674. */
  675. selectListInputOI = PTFTranslator.getStandardStructOI(selectListInputRowResolver);
  676. }
  677. private void fixResultExprString()
  678. {
  679. String r = resultExprString.trim();
  680. if (r.length()<6 || !r.substring(0, 6).toLowerCase().equals("select"))
  681. {
  682. r = "select " + r;
  683. }
  684. resultExprString = r;
  685. }
  686. private void parse() throws SemanticException
  687. {
  688. selectSpec = SemanticAnalyzer.parseSelect(resultExprString);
  689. }
  690. private void validateSelectExpr() throws SemanticException
  691. {
  692. for (WindowExpressionSpec expr : selectSpec)
  693. {
  694. PTFTranslator.validateNoLeadLagInValueBoundarySpec(expr.getExpression());
  695. }
  696. }
  697. private String getColumnName(String alias, ExprNodeDesc exprNode, int colIdx)
  698. {
  699. if (alias != null)
  700. {
  701. return alias;
  702. }
  703. else if (exprNode instanceof ExprNodeColumnDesc)
  704. {
  705. ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) exprNode;
  706. return colDesc.getColumn();
  707. }
  708. return "matchpath_col_" + colIdx;
  709. }
  710. public static ExprNodeDesc buildExprNode(ASTNode expr,
  711. TypeCheckCtx typeCheckCtx) throws SemanticException
  712. {
  713. // todo: use SemanticAnalyzer::genExprNodeDesc
  714. // currently SA not available to PTFTranslator.
  715. Map<ASTNode, ExprNodeDesc> map = ExprNodeTypeCheck
  716. .genExprNode(expr, typeCheckCtx);
  717. ExprNodeDesc desc = map.get(expr);
  718. if (desc == null) {
  719. String errMsg = typeCheckCtx.getError();
  720. if ( errMsg == null) {
  721. errMsg = "Error in parsing ";
  722. }
  723. throw new SemanticException(errMsg);
  724. }
  725. return desc;
  726. }
  727. }
  728. public static final String PATHATTR_NAME = "tpath";
  729. /*
  730. * add array<struct> to the list of columns
  731. */
  732. protected static RowResolver createSelectListRR(MatchPath evaluator,
  733. PTFInputDef inpDef) throws SemanticException {
  734. RowResolver rr = new RowResolver();
  735. RowResolver inputRR = inpDef.getOutputShape().getRr();
  736. evaluator.inputColumnNamesMap = new HashMap<String,String>();
  737. ArrayList<String> inputColumnNames = new ArrayList<String>();
  738. ArrayList<ObjectInspector> inpColOIs = new ArrayList<ObjectInspector>();
  739. for (ColumnInfo inpCInfo : inputRR.getColumnInfos()) {
  740. ColumnInfo cInfo = new ColumnInfo(inpCInfo);
  741. String colAlias = cInfo.getAlias();
  742. String[] tabColAlias = inputRR.reverseLookup(inpCInfo.getInternalName());
  743. if (tabColAlias != null) {
  744. colAlias = tabColAlias[1];
  745. }
  746. ASTNode inExpr = null;
  747. inExpr = PTFTranslator.getASTNode(inpCInfo, inputRR);
  748. if ( inExpr != null ) {
  749. rr.putExpression(inExpr, cInfo);
  750. colAlias = inExpr.toStringTree().toLowerCase();
  751. }
  752. else {
  753. colAlias = colAlias == null ? cInfo.getInternalName() : colAlias;
  754. rr.put(cInfo.getTabAlias(), colAlias, cInfo);
  755. }
  756. evaluator.inputColumnNamesMap.put(cInfo.getInternalName(), colAlias);
  757. inputColumnNames.add(colAlias);
  758. inpColOIs.add(cInfo.getObjectInspector());
  759. }
  760. StandardListObjectInspector pathAttrOI =
  761. ObjectInspectorFactory.getStandardListObjectInspector(
  762. ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames,
  763. inpColOIs));
  764. ColumnInfo pathColumn = new ColumnInfo(PATHATTR_NAME,
  765. TypeInfoUtils.getTypeInfoFromObjectInspector(pathAttrOI),
  766. null,
  767. false, false);
  768. rr.put(null, PATHATTR_NAME, pathColumn);
  769. return rr;
  770. }
  771. protected static StructObjectInspector createSelectListOI(MatchPath evaluator, PTFInputDef inpDef) {
  772. StructObjectInspector inOI = inpDef.getOutputShape().getOI();
  773. ArrayList<String> inputColumnNames = new ArrayList<String>();
  774. ArrayList<String> selectListNames = new ArrayList<String>();
  775. ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
  776. for(StructField f : inOI.getAllStructFieldRefs()) {
  777. String inputColName = evaluator.inputColumnNamesMap.get(f.getFieldName());
  778. if ( inputColName != null ) {
  779. inputColumnNames.add(inputColName);
  780. selectListNames.add(f.getFieldName());
  781. fieldOIs.add(f.getFieldObjectInspector());
  782. }
  783. }
  784. StandardListObjectInspector pathAttrOI =
  785. ObjectInspectorFactory.getStandardListObjectInspector(
  786. ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames,
  787. fieldOIs));
  788. ArrayList<ObjectInspector> selectFieldOIs = new ArrayList<ObjectInspector>();
  789. selectFieldOIs.addAll(fieldOIs);
  790. selectFieldOIs.add(pathAttrOI);
  791. selectListNames.add(MatchPath.PATHATTR_NAME);
  792. return ObjectInspectorFactory.getStandardStructObjectInspector(
  793. selectListNames, selectFieldOIs);
  794. }
  795. public static Object getSelectListInput(Object currRow, ObjectInspector rowOI,
  796. PTFPartitionIterator<Object> pItr, int sz) throws HiveException {
  797. ArrayList<Object> oRow = new ArrayList<Object>();
  798. List<?> currRowAsStdObject = (List<?>) ObjectInspectorUtils
  799. .copyToStandardObject(currRow, rowOI);
  800. oRow.addAll(currRowAsStdObject);
  801. oRow.add(getPath(currRow, rowOI, pItr, sz));
  802. return oRow;
  803. }
  804. public static ArrayList<Object> getPath(Object currRow, ObjectInspector rowOI,
  805. PTFPartitionIterator<Object> pItr, int sz) throws HiveException {
  806. int idx = pItr.getIndex() - 1;
  807. ArrayList<Object> path = new ArrayList<Object>();
  808. path.add(ObjectInspectorUtils.copyToStandardObject(currRow, rowOI));
  809. int pSz = 1;
  810. while (pSz < sz && pItr.hasNext())
  811. {
  812. currRow = pItr.next();
  813. path.add(ObjectInspectorUtils.copyToStandardObject(currRow, rowOI));
  814. pSz++;
  815. }
  816. pItr.resetToIndex(idx);
  817. return path;
  818. }
  819. }