PageRenderTime 56ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java

http://github.com/apache/hive
Java | 709 lines | 514 code | 58 blank | 137 comment | 152 complexity | 19ef92f886bd5ab2a9665743fcbe096f MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.exec;
  19. import java.util.ArrayDeque;
  20. import java.util.ArrayList;
  21. import java.util.Collection;
  22. import java.util.Collections;
  23. import java.util.Deque;
  24. import java.util.HashSet;
  25. import java.util.LinkedHashSet;
  26. import java.util.List;
  27. import java.util.Map;
  28. import java.util.Set;
  29. import java.util.Stack;
  30. import org.apache.hadoop.hive.ql.exec.NodeUtils.Function;
  31. import org.apache.hadoop.hive.ql.parse.SemanticException;
  32. import org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo;
  33. import org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator;
  34. import org.apache.hadoop.hive.ql.plan.BaseWork;
  35. import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
  36. import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
  37. import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
  38. import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
  39. import org.apache.hadoop.hive.ql.plan.MapWork;
  40. import org.apache.hadoop.hive.ql.plan.OperatorDesc;
  41. import org.apache.hadoop.mapred.OutputCollector;
  42. import org.slf4j.Logger;
  43. import org.slf4j.LoggerFactory;
  44. import com.google.common.collect.ImmutableMultimap;
  45. import com.google.common.collect.Lists;
  46. import com.google.common.collect.Multimap;
  47. public class OperatorUtils {
  48. private static final Logger LOG = LoggerFactory.getLogger(OperatorUtils.class);
  49. /**
  50. * Return the ancestor of the specified operator at the provided path or null if the path is invalid.
  51. *
  52. * The method is equivalent to following code:
  53. * <pre>{@code
  54. * op.getParentOperators().get(path[0])
  55. * .getParentOperators().get(path[1])
  56. * ...
  57. * .getParentOperators().get(path[n])
  58. * }</pre>
  59. * with additional checks about the validity of the provided path and the type of the ancestor.
  60. *
  61. * @param op the operator for which we
  62. * @param clazz the class of the ancestor operator
  63. * @param path the path leading to the desired ancestor
  64. * @param <T> the type of the ancestor
  65. * @return the ancestor of the specified operator at the provided path or null if the path is invalid.
  66. */
  67. public static <T> T ancestor(Operator<?> op, Class<T> clazz, int... path) {
  68. Operator<?> target = op;
  69. for (int i = 0; i < path.length; i++) {
  70. if (target.getParentOperators() == null || path[i] > target.getParentOperators().size()) {
  71. return null;
  72. }
  73. target = target.getParentOperators().get(path[i]);
  74. }
  75. return clazz.isInstance(target) ? clazz.cast(target) : null;
  76. }
  77. public static <T> Set<T> findOperators(Operator<?> start, Class<T> clazz) {
  78. return findOperators(start, clazz, new HashSet<T>());
  79. }
  80. public static <T> T findSingleOperator(Operator<?> start, Class<T> clazz) {
  81. Set<T> found = findOperators(start, clazz, new HashSet<T>());
  82. return found.size() == 1 ? found.iterator().next() : null;
  83. }
  84. public static <T> Set<T> findOperators(Collection<Operator<?>> starts, Class<T> clazz) {
  85. Set<T> found = new HashSet<T>();
  86. for (Operator<?> start : starts) {
  87. if (start == null) {
  88. continue;
  89. }
  90. findOperators(start, clazz, found);
  91. }
  92. return found;
  93. }
  94. @SuppressWarnings("unchecked")
  95. private static <T> Set<T> findOperators(Operator<?> start, Class<T> clazz, Set<T> found) {
  96. if (clazz.isInstance(start)) {
  97. found.add((T) start);
  98. }
  99. if (start.getChildOperators() != null) {
  100. for (Operator<?> child : start.getChildOperators()) {
  101. findOperators(child, clazz, found);
  102. }
  103. }
  104. return found;
  105. }
  106. public static <T> Set<T> findOperatorsUpstream(Operator<?> start, Class<T> clazz) {
  107. return findOperatorsUpstream(start, clazz, new HashSet<T>());
  108. }
  109. public static <T> T findSingleOperatorUpstream(Operator<?> start, Class<T> clazz) {
  110. Set<T> found = findOperatorsUpstream(start, clazz, new HashSet<T>());
  111. return found.size() == 1 ? found.iterator().next() : null;
  112. }
  113. public static <T> T findSingleOperatorUpstreamJoinAccounted(Operator<?> start, Class<T> clazz) {
  114. Set<T> found = findOperatorsUpstreamJoinAccounted(start, clazz, new HashSet<T>());
  115. return found.size() >= 1 ? found.iterator().next(): null;
  116. }
  117. public static <T> Set<T> findOperatorsUpstream(Collection<Operator<?>> starts, Class<T> clazz) {
  118. Set<T> found = new HashSet<T>();
  119. for (Operator<?> start : starts) {
  120. findOperatorsUpstream(start, clazz, found);
  121. }
  122. return found;
  123. }
  124. @SuppressWarnings("unchecked")
  125. private static <T> Set<T> findOperatorsUpstream(Operator<?> start, Class<T> clazz, Set<T> found) {
  126. if (clazz.isInstance(start)) {
  127. found.add((T) start);
  128. }
  129. if (start.getParentOperators() != null) {
  130. for (Operator<?> parent : start.getParentOperators()) {
  131. findOperatorsUpstream(parent, clazz, found);
  132. }
  133. }
  134. return found;
  135. }
  136. public static <T> Set<T> findOperatorsUpstreamJoinAccounted(Operator<?> start, Class<T> clazz,
  137. Set<T> found) {
  138. if (clazz.isInstance(start)) {
  139. found.add((T) start);
  140. }
  141. int onlyIncludeIndex = -1;
  142. if (start instanceof AbstractMapJoinOperator) {
  143. AbstractMapJoinOperator mapJoinOp = (AbstractMapJoinOperator) start;
  144. MapJoinDesc desc = (MapJoinDesc) mapJoinOp.getConf();
  145. onlyIncludeIndex = desc.getPosBigTable();
  146. }
  147. if (start.getParentOperators() != null) {
  148. int i = 0;
  149. for (Operator<?> parent : start.getParentOperators()) {
  150. if (onlyIncludeIndex >= 0) {
  151. if (onlyIncludeIndex == i) {
  152. findOperatorsUpstreamJoinAccounted(parent, clazz, found);
  153. }
  154. } else {
  155. findOperatorsUpstreamJoinAccounted(parent, clazz, found);
  156. }
  157. i++;
  158. }
  159. }
  160. return found;
  161. }
  162. public static void setChildrenCollector(List<Operator<? extends OperatorDesc>> childOperators, OutputCollector out) {
  163. if (childOperators == null) {
  164. return;
  165. }
  166. for (Operator<? extends OperatorDesc> op : childOperators) {
  167. if (op.getName().equals(ReduceSinkOperator.getOperatorName())) {
  168. op.setOutputCollector(out);
  169. } else {
  170. setChildrenCollector(op.getChildOperators(), out);
  171. }
  172. }
  173. }
  174. public static void setChildrenCollector(List<Operator<? extends OperatorDesc>> childOperators, Map<String, OutputCollector> outMap) {
  175. if (childOperators == null) {
  176. return;
  177. }
  178. for (Operator<? extends OperatorDesc> op : childOperators) {
  179. if (op.getIsReduceSink()) {
  180. String outputName = op.getReduceOutputName();
  181. if (outMap.containsKey(outputName)) {
  182. LOG.info("Setting output collector: " + op + " --> " + outputName);
  183. op.setOutputCollector(outMap.get(outputName));
  184. }
  185. } else {
  186. setChildrenCollector(op.getChildOperators(), outMap);
  187. }
  188. }
  189. }
  190. /**
  191. * Starting at the input operator, finds the last operator in the stream that
  192. * is an instance of the input class.
  193. *
  194. * @param op the starting operator
  195. * @param clazz the class that the operator that we are looking for instantiates
  196. * @return null if no such operator exists or multiple branches are found in
  197. * the stream, the last operator otherwise
  198. */
  199. @SuppressWarnings("unchecked")
  200. public static <T> T findLastOperator(Operator<?> op, Class<T> clazz) {
  201. Operator<?> currentOp = op;
  202. T lastOp = null;
  203. while (currentOp != null) {
  204. if (clazz.isInstance(currentOp)) {
  205. lastOp = (T) currentOp;
  206. }
  207. if (currentOp.getChildOperators().size() == 1) {
  208. currentOp = currentOp.getChildOperators().get(0);
  209. }
  210. else {
  211. currentOp = null;
  212. }
  213. }
  214. return lastOp;
  215. }
  216. public static void iterateParents(Operator<?> operator, Function<Operator<?>> function) {
  217. iterateParents(operator, function, new HashSet<Operator<?>>());
  218. }
  219. private static void iterateParents(Operator<?> operator, Function<Operator<?>> function, Set<Operator<?>> visited) {
  220. if (!visited.add(operator)) {
  221. return;
  222. }
  223. function.apply(operator);
  224. if (operator.getNumParent() > 0) {
  225. for (Operator<?> parent : operator.getParentOperators()) {
  226. iterateParents(parent, function, visited);
  227. }
  228. }
  229. }
  230. /**
  231. * Given an operator and a set of classes, it classifies the operators it finds
  232. * in the stream depending on the classes they instantiate.
  233. *
  234. * If a given operator object is an instance of more than one of the input classes,
  235. * e.g. the operator instantiates one of the classes in the input set that is a
  236. * subclass of another class in the set, the operator will be associated to both
  237. * classes in the output map.
  238. *
  239. * @param start the start operator
  240. * @param classes the set of classes
  241. * @return a multimap from each of the classes to the operators that instantiate
  242. * them
  243. */
  244. public static Multimap<Class<? extends Operator<?>>, Operator<?>> classifyOperators(
  245. Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
  246. ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>> resultMap =
  247. new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
  248. List<Operator<?>> ops = new ArrayList<Operator<?>>();
  249. ops.add(start);
  250. while (!ops.isEmpty()) {
  251. List<Operator<?>> allChildren = new ArrayList<Operator<?>>();
  252. for (Operator<?> op: ops) {
  253. for (Class<? extends Operator<?>> clazz: classes) {
  254. if (clazz.isInstance(op)) {
  255. resultMap.put(clazz, op);
  256. }
  257. }
  258. allChildren.addAll(op.getChildOperators());
  259. }
  260. ops = allChildren;
  261. }
  262. return resultMap.build();
  263. }
  264. /**
  265. * Given an operator and a set of classes, it classifies the operators it finds
  266. * upstream depending on the classes it instantiates.
  267. *
  268. * If a given operator object is an instance of more than one of the input classes,
  269. * e.g. the operator instantiates one of the classes in the input set that is a
  270. * subclass of another class in the set, the operator will be associated to both
  271. * classes in the output map.
  272. *
  273. * @param start the start operator
  274. * @param classes the set of classes
  275. * @return a multimap from each of the classes to the operators that instantiate
  276. * them
  277. */
  278. public static Multimap<Class<? extends Operator<?>>, Operator<?>> classifyOperatorsUpstream(
  279. Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
  280. ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>> resultMap =
  281. new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
  282. List<Operator<?>> ops = new ArrayList<Operator<?>>();
  283. ops.add(start);
  284. while (!ops.isEmpty()) {
  285. List<Operator<?>> allParent = new ArrayList<Operator<?>>();
  286. for (Operator<?> op: ops) {
  287. for (Class<? extends Operator<?>> clazz: classes) {
  288. if (clazz.isInstance(op)) {
  289. resultMap.put(clazz, op);
  290. }
  291. }
  292. if (op.getParentOperators() != null) {
  293. allParent.addAll(op.getParentOperators());
  294. }
  295. }
  296. ops = allParent;
  297. }
  298. return resultMap.build();
  299. }
  300. /**
  301. * Given an operator and a set of classes, it returns the number of operators it finds
  302. * upstream that instantiate any of the given classes.
  303. *
  304. * @param start the start operator
  305. * @param classes the set of classes
  306. * @return the number of operators
  307. */
  308. public static int countOperatorsUpstream(Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
  309. Multimap<Class<? extends Operator<?>>, Operator<?>> ops = classifyOperatorsUpstream(start, classes);
  310. int numberOperators = 0;
  311. Set<Operator<?>> uniqueOperators = new HashSet<Operator<?>>();
  312. for (Operator<?> op : ops.values()) {
  313. if (uniqueOperators.add(op)) {
  314. numberOperators++;
  315. }
  316. }
  317. return numberOperators;
  318. }
  319. public static void setMemoryAvailable(final List<Operator<? extends OperatorDesc>> operators,
  320. final long memoryAvailableToTask) {
  321. if (operators == null) {
  322. return;
  323. }
  324. for (Operator<? extends OperatorDesc> op : operators) {
  325. if (op.getConf() != null) {
  326. op.getConf().setMaxMemoryAvailable(memoryAvailableToTask);
  327. }
  328. if (op.getChildOperators() != null && !op.getChildOperators().isEmpty()) {
  329. setMemoryAvailable(op.getChildOperators(), memoryAvailableToTask);
  330. }
  331. }
  332. }
  333. /**
  334. * Given the input operator 'op', walk up the operator tree from 'op', and collect all the
  335. * roots that can be reached from it. The results are stored in 'roots'.
  336. */
  337. public static void findRoots(Operator<?> op, Collection<Operator<?>> roots) {
  338. List<Operator<?>> parents = op.getParentOperators();
  339. if (parents == null || parents.isEmpty()) {
  340. roots.add(op);
  341. return;
  342. }
  343. for (Operator<?> p : parents) {
  344. findRoots(p, roots);
  345. }
  346. }
  347. /**
  348. * Remove the branch that contains the specified operator. Do nothing if there's no branching,
  349. * i.e. all the upstream operators have only one child.
  350. */
  351. public static void removeBranch(SparkPartitionPruningSinkOperator op) {
  352. Operator<?> child = op;
  353. Operator<?> curr = op;
  354. while (curr.getChildOperators().size() <= 1) {
  355. child = curr;
  356. if (curr.getParentOperators() == null || curr.getParentOperators().isEmpty()) {
  357. return;
  358. }
  359. curr = curr.getParentOperators().get(0);
  360. }
  361. curr.removeChild(child);
  362. }
  363. /**
  364. * Remove operator from the tree, disconnecting it from its
  365. * parents and children.
  366. */
  367. public static void removeOperator(Operator<?> op) {
  368. if (op.getNumParent() != 0) {
  369. List<Operator<? extends OperatorDesc>> allParent =
  370. Lists.newArrayList(op.getParentOperators());
  371. for (Operator<?> parentOp : allParent) {
  372. parentOp.removeChild(op);
  373. }
  374. }
  375. if (op.getNumChild() != 0) {
  376. List<Operator<? extends OperatorDesc>> allChildren =
  377. Lists.newArrayList(op.getChildOperators());
  378. for (Operator<?> childOp : allChildren) {
  379. childOp.removeParent(op);
  380. }
  381. }
  382. }
  383. public static String getOpNamePretty(Operator<?> op) {
  384. if (op instanceof TableScanOperator) {
  385. return op.toString() + " (" + ((TableScanOperator) op).getConf().getAlias() + ")";
  386. }
  387. return op.toString();
  388. }
  389. /**
  390. * Return true if contain branch otherwise return false
  391. */
  392. public static boolean isInBranch(SparkPartitionPruningSinkOperator op) {
  393. Operator<?> curr = op;
  394. while (curr.getChildOperators().size() <= 1) {
  395. if (curr.getParentOperators() == null || curr.getParentOperators().isEmpty()) {
  396. return false;
  397. }
  398. curr = curr.getParentOperators().get(0);
  399. }
  400. return true;
  401. }
  402. public static Set<Operator<?>> getOp(BaseWork work, Class<?> clazz) {
  403. Set<Operator<?>> ops = new HashSet<Operator<?>>();
  404. if (work instanceof MapWork) {
  405. Collection<Operator<?>> opSet = ((MapWork) work).getAliasToWork().values();
  406. Stack<Operator<?>> opStack = new Stack<Operator<?>>();
  407. opStack.addAll(opSet);
  408. while (!opStack.empty()) {
  409. Operator<?> op = opStack.pop();
  410. ops.add(op);
  411. if (op.getChildOperators() != null) {
  412. opStack.addAll(op.getChildOperators());
  413. }
  414. }
  415. } else {
  416. ops.addAll(work.getAllOperators());
  417. }
  418. Set<Operator<? extends OperatorDesc>> matchingOps =
  419. new HashSet<Operator<? extends OperatorDesc>>();
  420. for (Operator<? extends OperatorDesc> op : ops) {
  421. if (clazz.isInstance(op)) {
  422. matchingOps.add(op);
  423. }
  424. }
  425. return matchingOps;
  426. }
  427. public static Operator<?> findOperatorByMarker(Operator<?> start, String marker) {
  428. Deque<Operator<?>> queue = new ArrayDeque<>();
  429. queue.add(start);
  430. while (!queue.isEmpty()) {
  431. Operator<?> op = queue.remove();
  432. if (marker.equals(op.getMarker())) {
  433. return op;
  434. }
  435. if (op.getChildOperators() != null) {
  436. queue.addAll(op.getChildOperators());
  437. }
  438. }
  439. return null;
  440. }
  441. public static Set<Operator<?>>
  442. findWorkOperatorsAndSemiJoinEdges(Operator<?> start,
  443. final Map<ReduceSinkOperator, SemiJoinBranchInfo> rsToSemiJoinBranchInfo,
  444. Set<ReduceSinkOperator> semiJoinOps, Set<TerminalOperator<?>> terminalOps) {
  445. Set<Operator<?>> found = new HashSet<>();
  446. findWorkOperatorsAndSemiJoinEdges(start,
  447. found, rsToSemiJoinBranchInfo, semiJoinOps, terminalOps);
  448. return found;
  449. }
  450. private static void
  451. findWorkOperatorsAndSemiJoinEdges(Operator<?> start, Set<Operator<?>> found,
  452. final Map<ReduceSinkOperator, SemiJoinBranchInfo> rsToSemiJoinBranchInfo,
  453. Set<ReduceSinkOperator> semiJoinOps, Set<TerminalOperator<?>> terminalOps) {
  454. found.add(start);
  455. if (start.getParentOperators() != null) {
  456. for (Operator<?> parent : start.getParentOperators()) {
  457. if (parent instanceof ReduceSinkOperator) {
  458. continue;
  459. }
  460. if (!found.contains(parent)) {
  461. findWorkOperatorsAndSemiJoinEdges(parent, found, rsToSemiJoinBranchInfo, semiJoinOps, terminalOps);
  462. }
  463. }
  464. }
  465. if (start instanceof TerminalOperator) {
  466. // This could be RS1 in semijoin edge which looks like,
  467. // SEL->GBY1->RS1->GBY2->RS2
  468. boolean semiJoin = false;
  469. if (start.getChildOperators().size() == 1) {
  470. Operator<?> gb2 = start.getChildOperators().get(0);
  471. if (gb2 instanceof GroupByOperator && gb2.getChildOperators().size() == 1) {
  472. Operator<?> rs2 = gb2.getChildOperators().get(0);
  473. if (rs2 instanceof ReduceSinkOperator && (rsToSemiJoinBranchInfo.get(rs2) != null)) {
  474. // Semijoin edge found. Add all the operators to the set
  475. found.add(start);
  476. found.add(gb2);
  477. found.add(rs2);
  478. semiJoinOps.add((ReduceSinkOperator)rs2);
  479. semiJoin = true;
  480. }
  481. }
  482. }
  483. if (!semiJoin) {
  484. terminalOps.add((TerminalOperator)start);
  485. }
  486. return;
  487. }
  488. if (start.getChildOperators() != null) {
  489. for (Operator<?> child : start.getChildOperators()) {
  490. if (!found.contains(child)) {
  491. findWorkOperatorsAndSemiJoinEdges(child, found, rsToSemiJoinBranchInfo, semiJoinOps, terminalOps);
  492. }
  493. }
  494. }
  495. return;
  496. }
  497. private static List<ExprNodeDesc> backtrackAll(List<ExprNodeDesc> exprs, Operator<? extends OperatorDesc> start,
  498. Operator<? extends OperatorDesc> terminal) {
  499. List<ExprNodeDesc> backtrackedExprs = new ArrayList<>();
  500. try {
  501. for (ExprNodeDesc expr : exprs) {
  502. ExprNodeDesc backtrackedExpr = ExprNodeDescUtils.backtrack(expr, start, terminal);
  503. if(backtrackedExpr == null) {
  504. return null;
  505. }
  506. backtrackedExprs.add(backtrackedExpr);
  507. }
  508. } catch (SemanticException e) {
  509. return null;
  510. }
  511. return backtrackedExprs;
  512. }
  513. // set of expressions are considered compatible if following are true:
  514. // * they are both same size
  515. // * if the are column expressions their table alias is same as well (this is checked because otherwise
  516. // expressions coming out of multiple RS (e.g. children of JOIN) are ended up same
  517. private static boolean areBacktrackedExprsCompatible(final List<ExprNodeDesc> orgexprs,
  518. final List<ExprNodeDesc> backtrackedExprs) {
  519. if(backtrackedExprs == null || backtrackedExprs.size() != orgexprs.size()) {
  520. return false;
  521. }
  522. for(int i=0; i<orgexprs.size(); i++) {
  523. if(orgexprs.get(i) instanceof ExprNodeColumnDesc && backtrackedExprs.get(i) instanceof ExprNodeColumnDesc) {
  524. ExprNodeColumnDesc orgColExpr = (ExprNodeColumnDesc)orgexprs.get(i);
  525. ExprNodeColumnDesc backExpr = (ExprNodeColumnDesc)backtrackedExprs.get(i);
  526. String orgTabAlias = orgColExpr.getTabAlias();
  527. String backTabAlias = backExpr.getTabAlias();
  528. if(orgTabAlias != null && backTabAlias != null && !orgTabAlias.equals(backTabAlias)) {
  529. return false;
  530. }
  531. }
  532. }
  533. return true;
  534. }
  535. /***
  536. * This method backtracks the given expressions to the source RS. Note that expressions could
  537. * further be backtracked to e.g. table source, but we are interested in RS only because this
  538. * is used to estimate number of rows for group by and estimation will be better at RS since all
  539. * the filters etc will have already been applied
  540. * @param start
  541. * @param exprs
  542. * @return null if RS is not found
  543. */
  544. public static Operator<? extends OperatorDesc> findSourceRS(Operator<?> start, List<ExprNodeDesc> exprs) {
  545. Operator currRS = null; //keep track of the RS
  546. if (start instanceof ReduceSinkOperator) {
  547. currRS = start;
  548. }
  549. if (start instanceof UnionOperator) {
  550. //Union keeps the schema same but can change the cardinality, therefore we don't want to backtrack further
  551. // into Union
  552. return currRS;
  553. }
  554. List<Operator<? extends OperatorDesc>> parents = start.getParentOperators();
  555. if (parents == null | parents.isEmpty()) {
  556. // reached end e.g. TS operator
  557. return null;
  558. }
  559. Operator<? extends OperatorDesc> nextOp = null;
  560. List<ExprNodeDesc> backtrackedExprs = null;
  561. for (int i = 0; i < parents.size(); i++) {
  562. backtrackedExprs = backtrackAll(exprs, start, parents.get(i));
  563. if (areBacktrackedExprsCompatible(exprs, backtrackedExprs)) {
  564. nextOp = parents.get(i);
  565. break;
  566. }
  567. }
  568. if (nextOp != null) {
  569. Operator<? extends OperatorDesc> nextRS = findSourceRS(nextOp, backtrackedExprs);
  570. if (nextRS != null) {
  571. currRS = nextRS;
  572. }
  573. }
  574. return currRS;
  575. }
  576. /***
  577. * Given group by operator on reduce side, this tries to get to the group by on map side (partial/merge).
  578. * @param reduceSideGbOp Make sure this is group by side reducer
  579. * @return map side gb if any, else null
  580. */
  581. public static GroupByOperator findMapSideGb(final GroupByOperator reduceSideGbOp) {
  582. Operator<? extends OperatorDesc> parentOp = reduceSideGbOp;
  583. while(parentOp.getParentOperators() != null && parentOp.getParentOperators().size() > 0) {
  584. if(parentOp.getParentOperators().size() > 1) {
  585. return null;
  586. }
  587. parentOp = parentOp.getParentOperators().get(0);
  588. if(parentOp instanceof GroupByOperator) {
  589. return (GroupByOperator)parentOp;
  590. }
  591. }
  592. return null;
  593. }
  594. /**
  595. * Determines if the two trees are using independent inputs.
  596. */
  597. public static boolean treesWithIndependentInputs(Operator<?> tree1, Operator<?> tree2) {
  598. Set<String> tables1 = signaturesOf(OperatorUtils.findOperatorsUpstream(tree1, TableScanOperator.class));
  599. Set<String> tables2 = signaturesOf(OperatorUtils.findOperatorsUpstream(tree2, TableScanOperator.class));
  600. tables1.retainAll(tables2);
  601. return tables1.isEmpty();
  602. }
  603. private static Set<String> signaturesOf(Set<TableScanOperator> ops) {
  604. Set<String> ret = new HashSet<>();
  605. for (TableScanOperator o : ops) {
  606. ret.add(o.getConf().getQualifiedTable());
  607. }
  608. return ret;
  609. }
  610. /**
  611. * Given start, end operators and a colName, look for the original TS colExpr
  612. * that it originates from. This method checks column Expr mappings (aliases)
  613. * from the start operator and its parents up to the end operator.
  614. * It then returns the unwrapped Expr found within a ExprNodeGenericFuncDesc (if any).
  615. * @param colName colName to backtrack
  616. * @param start start backtracking from this Op
  617. * @param end end backtracking at this Op
  618. * @return the original column name or null if not found
  619. */
  620. public static ExprNodeColumnDesc findTableOriginColExpr(ExprNodeColumnDesc colName, Operator<?> start, Operator<?> end)
  621. throws SemanticException {
  622. ExprNodeDesc res = ExprNodeDescUtils.backtrack(colName, start, end, false, true);
  623. return (res != null) ? ExprNodeDescUtils.getColumnExpr(res) : null;
  624. }
  625. public static Set<Operator<?>> getAllOperatorsForSimpleFetch(Set<Operator<?>> opSet) {
  626. Set<Operator<?>> returnSet = new LinkedHashSet<Operator<?>>();
  627. Stack<Operator<?>> opStack = new Stack<Operator<?>>();
  628. // add all children
  629. opStack.addAll(opSet);
  630. while (!opStack.empty()) {
  631. Operator<?> op = opStack.pop();
  632. returnSet.add(op);
  633. if (op.getChildOperators() != null) {
  634. opStack.addAll(op.getChildOperators());
  635. }
  636. }
  637. return returnSet;
  638. }
  639. /**
  640. * Given a {@link FetchTask} this returns a set of all the operators within the task
  641. * @param task - Fetch Task
  642. */
  643. public static Set<Operator<?>> getAllFetchOperators(FetchTask task) {
  644. if (task.getWork().getSource() == null) {
  645. return Collections.EMPTY_SET;
  646. }
  647. Set<Operator<?>> operatorList = new HashSet<>();
  648. operatorList.add(task.getWork().getSource());
  649. return getAllOperatorsForSimpleFetch(operatorList);
  650. }
  651. }