PageRenderTime 55ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/test/org/apache/pig/test/TestNewPlanPushDownForeachFlatten.java

https://github.com/zjffdu/pig
Java | 1174 lines | 936 code | 151 blank | 87 comment | 38 complexity | 6cdf17e02386fb5a9b5c6c00d5414861 MD5 | raw file
Possible License(s): Apache-2.0, CPL-1.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.pig.test;
  19. import java.util.ArrayList;
  20. import java.util.HashSet;
  21. import java.util.List;
  22. import java.util.Properties;
  23. import java.util.Set;
  24. import org.apache.pig.ExecType;
  25. import org.apache.pig.FilterFunc;
  26. import org.apache.pig.PigServer;
  27. import org.apache.pig.data.Tuple;
  28. import org.apache.pig.impl.PigContext;
  29. import org.apache.pig.test.utils.Identity;
  30. import org.apache.pig.newplan.Operator;
  31. import org.apache.pig.newplan.OperatorPlan;
  32. import org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer;
  33. import org.apache.pig.newplan.optimizer.PlanOptimizer;
  34. import org.apache.pig.newplan.optimizer.Rule;
  35. import org.apache.pig.newplan.logical.relational.LOCross;
  36. import org.apache.pig.newplan.logical.relational.LOForEach;
  37. import org.apache.pig.newplan.logical.relational.LOJoin;
  38. import org.apache.pig.newplan.logical.relational.LOLimit;
  39. import org.apache.pig.newplan.logical.relational.LOLoad;
  40. import org.apache.pig.newplan.logical.relational.LOSort;
  41. import org.apache.pig.newplan.logical.relational.LogicalPlan;
  42. import org.apache.pig.newplan.logical.rules.LoadTypeCastInserter;
  43. import org.apache.pig.newplan.logical.rules.OptimizerUtils;
  44. import org.apache.pig.newplan.logical.rules.PushDownForEachFlatten;
  45. import org.junit.Assert;
  46. import org.junit.Test;
  47. import org.junit.Before;
  48. /**
  49. * Test the logical optimizer.
  50. */
  51. public class TestNewPlanPushDownForeachFlatten {
  52. PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
  53. @Before
  54. public void tearDown() {
  55. }
  56. /**
  57. *
  58. * A simple filter UDF for testing
  59. *
  60. */
  61. static public class MyFilterFunc extends FilterFunc {
  62. @Override
  63. public Boolean exec(Tuple input) {
  64. return false;
  65. }
  66. }
  67. /**
  68. * Old plan is empty, so is the optimized new plan.
  69. */
  70. @Test
  71. public void testErrorEmptyInput() throws Exception {
  72. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( "" );
  73. Assert.assertTrue( newLogicalPlan.getOperators().hasNext() == false );
  74. }
  75. /**
  76. * No foreach in the plan, no effect.
  77. */
  78. @Test
  79. public void testErrorNonForeachInput() throws Exception {
  80. String query = "A = load 'myfile' as (name, age, gpa);" +
  81. "store A into 'output';";
  82. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  83. Operator load = newLogicalPlan.getSources().get( 0 );
  84. Assert.assertTrue( load instanceof LOLoad );
  85. List<Operator> nexts = newLogicalPlan.getSuccessors( load );
  86. Assert.assertTrue( nexts != null && nexts.size() == 1 );
  87. }
  88. @Test
  89. public void testForeachNoFlatten() throws Exception {
  90. String query = "A = load 'myfile' as (name, age, gpa);" +
  91. "B = foreach A generate $0, $1, $2;" +
  92. "C = order B by $0, $1;" +
  93. "D = store C into 'dummy';";
  94. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  95. Operator load = newLogicalPlan.getSources().get( 0 );
  96. Assert.assertTrue( load instanceof LOLoad );
  97. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  98. Assert.assertTrue( foreach instanceof LOForEach );
  99. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  100. Assert.assertTrue( foreach instanceof LOForEach );
  101. Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  102. Assert.assertTrue( sort instanceof LOSort );
  103. }
  104. @Test
  105. public void testForeachNoSuccessors() throws Exception {
  106. String query = "A = load 'myfile' as (name, age, gpa);" +
  107. "B = foreach A generate flatten($1);" +
  108. "Store B into 'output';";
  109. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  110. Operator load = newLogicalPlan.getSources().get( 0 );
  111. Assert.assertTrue( load instanceof LOLoad );
  112. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  113. Assert.assertTrue( foreach instanceof LOForEach );
  114. }
  115. @Test
  116. public void testForeachStreaming() throws Exception {
  117. String query = "A = load 'myfile' as (name, age, gpa);" +
  118. "B = foreach A generate flatten($1);" +
  119. "C = stream B through `" + "pc -l" + "`;" +
  120. "Store C into 'output';";
  121. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  122. Operator load = newLogicalPlan.getSources().get( 0 );
  123. Assert.assertTrue( load instanceof LOLoad );
  124. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  125. Assert.assertTrue( foreach instanceof LOForEach );
  126. }
  127. @Test
  128. public void testForeachDistinct() throws Exception {
  129. String query = "A = load 'myfile' as (name, age, gpa);" +
  130. "B = foreach A generate flatten($1);" +
  131. "C = distinct B;" +
  132. "store C into 'output';";
  133. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  134. Operator load = newLogicalPlan.getSources().get( 0 );
  135. Assert.assertTrue( load instanceof LOLoad );
  136. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  137. Assert.assertTrue( foreach instanceof LOForEach );
  138. }
  139. @Test
  140. public void testForeachForeach() throws Exception {
  141. String query = "A = load 'myfile' as (name, age, gpa);" +
  142. "B = foreach A generate $0, $1, flatten(1);" +
  143. "C = foreach B generate $0;" +
  144. "store C into 'output';";
  145. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  146. Operator load = newLogicalPlan.getSources().get( 0 );
  147. Assert.assertTrue( load instanceof LOLoad );
  148. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  149. Assert.assertTrue( foreach instanceof LOForEach );
  150. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  151. Assert.assertTrue( foreach instanceof LOForEach );
  152. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  153. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  154. Assert.assertTrue( foreach instanceof LOForEach );
  155. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  156. }
  157. @Test
  158. public void testForeachFilter() throws Exception {
  159. String query = "A = load 'myfile' as (name, age, gpa);" +
  160. "B = foreach A generate $0, $1, flatten($2);" +
  161. "C = filter B by $1 < 18;" +
  162. "store C into 'output';";
  163. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  164. Operator load = newLogicalPlan.getSources().get( 0 );
  165. Assert.assertTrue( load instanceof LOLoad );
  166. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  167. Assert.assertTrue( foreach instanceof LOForEach );
  168. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  169. Assert.assertTrue( foreach instanceof LOForEach );
  170. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  171. }
  172. @Test
  173. public void testForeachSplitOutput() throws Exception {
  174. String query = "A = load 'myfile' as (name, age, gpa);" +
  175. "B = foreach A generate $0, $1, flatten($2);" +
  176. "split B into C if $1 < 18, D if $1 >= 18;" +
  177. "store C into 'output1';" +
  178. "store D into 'output2';";
  179. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  180. Operator load = newLogicalPlan.getSources().get( 0 );
  181. Assert.assertTrue( load instanceof LOLoad );
  182. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  183. Assert.assertTrue( foreach instanceof LOForEach );
  184. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  185. Assert.assertTrue( foreach instanceof LOForEach );
  186. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  187. }
  188. @Test
  189. public void testForeachLimit() throws Exception {
  190. String query = "A = load 'myfile' as (name, age, gpa);" +
  191. "B = foreach A generate $0, $1, flatten($2);" +
  192. "C = limit B 10;" +
  193. "store C into 'output';";
  194. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  195. Operator load = newLogicalPlan.getSources().get( 0 );
  196. Assert.assertTrue( load instanceof LOLoad );
  197. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  198. Assert.assertTrue( foreach instanceof LOForEach );
  199. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  200. Assert.assertTrue( foreach instanceof LOForEach );
  201. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  202. }
  203. @Test
  204. public void testForeachUnion() throws Exception {
  205. String query = "A = load 'myfile' as (name, age, gpa);" +
  206. "B = foreach A generate $0, $1, flatten($2);" +
  207. "C = load 'anotherfile' as (name, age, preference);" +
  208. "D = union B, C;" +
  209. "store D into 'output';";
  210. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  211. List<Operator> loads = newLogicalPlan.getSources();
  212. Assert.assertTrue( loads.size() == 2 );
  213. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  214. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  215. Operator load = null;
  216. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  217. load = loads.get( 0 );
  218. else
  219. load = loads.get( 1 );
  220. Assert.assertTrue( load instanceof LOLoad );
  221. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  222. Assert.assertTrue( foreach instanceof LOForEach );
  223. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  224. Assert.assertTrue( foreach instanceof LOForEach );
  225. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  226. }
  227. @Test
  228. public void testForeachCogroup() throws Exception {
  229. String query = "A = load 'myfile' as (name, age, gpa);" +
  230. "B = foreach A generate $0, $1, flatten($2);" +
  231. "C = load 'anotherfile' as (name, age, preference);" +
  232. "D = cogroup B by $0, C by $0;" +
  233. "store D into 'output';";
  234. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  235. List<Operator> loads = newLogicalPlan.getSources();
  236. Assert.assertTrue( loads.size() == 2 );
  237. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  238. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  239. Operator load = null;
  240. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  241. load = loads.get( 0 );
  242. else
  243. load = loads.get( 1 );
  244. Assert.assertTrue( load instanceof LOLoad );
  245. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  246. Assert.assertTrue( foreach instanceof LOForEach );
  247. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  248. Assert.assertTrue( foreach instanceof LOForEach );
  249. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  250. }
  251. @Test
  252. public void testForeachGroupBy() throws Exception {
  253. String query = "A = load 'myfile' as (name, age, gpa);" +
  254. "B = foreach A generate $0, $1, flatten($2);" +
  255. "C = group B by $0;" +
  256. "store C into 'output';";
  257. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  258. Operator load = newLogicalPlan.getSources().get( 0 );
  259. Assert.assertTrue( load instanceof LOLoad );
  260. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  261. Assert.assertTrue( foreach instanceof LOForEach );
  262. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  263. Assert.assertTrue( foreach instanceof LOForEach );
  264. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  265. }
  266. @Test
  267. public void testForeachSort() throws Exception {
  268. String query = "A = load 'myfile' as (name, age, gpa);" +
  269. "B = foreach A generate $0, $1, flatten($2);" +
  270. "C = order B by $0, $1;" +
  271. "D = store C into 'dummy';";
  272. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  273. Operator load = newLogicalPlan.getSources().get( 0 );
  274. Assert.assertTrue( load instanceof LOLoad );
  275. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  276. Assert.assertTrue( foreach instanceof LOForEach );
  277. Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  278. Assert.assertTrue( sort instanceof LOSort );
  279. foreach = newLogicalPlan.getSuccessors( sort ).get( 0 );
  280. Assert.assertTrue( foreach instanceof LOForEach );
  281. }
  282. /**
  283. * Non-pure-projection, not optimizable.
  284. */
  285. @Test
  286. public void testForeachSortNegative1() throws Exception {
  287. String query = "A = load 'myfile' as (name, age, gpa);" +
  288. "B = foreach A generate $0 + 5, $1, flatten($2);" +
  289. "C = order B by $0, $1;" +
  290. "D = store C into 'dummy';";
  291. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  292. Operator load = newLogicalPlan.getSources().get( 0 );
  293. Assert.assertTrue( load instanceof LOLoad );
  294. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  295. Assert.assertTrue( foreach instanceof LOForEach );
  296. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  297. Assert.assertTrue( foreach instanceof LOForEach );
  298. Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  299. Assert.assertTrue( sort instanceof LOSort );
  300. }
  301. /**
  302. * If the flattened field is referenced in the sort condition, then no optimization can be done.
  303. */
  304. @Test
  305. public void testForeachSortNegative2() throws Exception {
  306. String query = "A = load 'myfile' as (name, age, gpa:tuple(x,y));" +
  307. "B = foreach A generate $0, $1, flatten($2);" +
  308. "C = order B by $0, $3;" +
  309. "D = store C into 'dummy';";
  310. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  311. Operator load = newLogicalPlan.getSources().get( 0 );
  312. Assert.assertTrue( load instanceof LOLoad );
  313. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  314. Assert.assertTrue( foreach instanceof LOForEach );
  315. Operator foreach1 = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  316. Assert.assertTrue( foreach1 instanceof LOForEach );
  317. Operator sort = newLogicalPlan.getSuccessors( foreach1 ).get( 0 );
  318. Assert.assertTrue( sort instanceof LOSort );
  319. }
  320. @Test
  321. public void testForeachFlattenAddedColumnSort() throws Exception {
  322. String query = "A = load 'myfile' as (name, age, gpa);" +
  323. "B = foreach A generate $0, $1, flatten(1);" +
  324. "C = order B by $0, $1;" +
  325. "store C into 'output';";
  326. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  327. Operator load = newLogicalPlan.getSources().get( 0 );
  328. Assert.assertTrue( load instanceof LOLoad );
  329. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  330. Assert.assertTrue( foreach instanceof LOForEach );
  331. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  332. Assert.assertTrue( foreach instanceof LOForEach );
  333. Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  334. Assert.assertTrue( sort instanceof LOSort );
  335. }
  336. @Test
  337. public void testForeachUDFSort() throws Exception {
  338. String query = "A = load 'myfile' as (name, age, gpa);" +
  339. "B = foreach A generate $0, $1, " + Identity.class.getName() + "($2) ;" +
  340. "C = order B by $0, $1;" +
  341. "store C into 'output';";
  342. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  343. Operator load = newLogicalPlan.getSources().get( 0 );
  344. Assert.assertTrue( load instanceof LOLoad );
  345. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  346. Assert.assertTrue( foreach instanceof LOForEach );
  347. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  348. Assert.assertTrue( foreach instanceof LOForEach );
  349. Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  350. Assert.assertTrue( sort instanceof LOSort );
  351. }
  352. @Test
  353. public void testForeachCastSort() throws Exception {
  354. String query = "A = load 'myfile' as (name, age, gpa);" +
  355. "B = foreach A generate (chararray)$0, $1, flatten($2);" +
  356. "C = order B by $0, $1;" +
  357. "store C into 'output';";
  358. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  359. Operator load = newLogicalPlan.getSources().get( 0 );
  360. Assert.assertTrue( load instanceof LOLoad );
  361. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  362. Assert.assertTrue( foreach instanceof LOForEach );
  363. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  364. Assert.assertTrue( foreach instanceof LOForEach );
  365. Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  366. Assert.assertTrue( sort instanceof LOSort );
  367. }
  368. @Test
  369. public void testForeachCross() throws Exception {
  370. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  371. "B = foreach A generate $0, $1, flatten($2);" +
  372. "C = load 'anotherfile' as (name, age, preference);" +
  373. "D = cross B, C;" +
  374. "E = limit D 10;" +
  375. "store E into 'output';";
  376. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  377. List<Operator> loads = newLogicalPlan.getSources();
  378. Assert.assertTrue( loads.size() == 2 );
  379. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  380. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  381. Operator op = null;
  382. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  383. op = loads.get( 0 );
  384. else
  385. op = loads.get( 1 );
  386. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  387. Assert.assertTrue( op instanceof LOForEach );
  388. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  389. Assert.assertTrue( op instanceof LOForEach );
  390. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  391. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  392. Assert.assertTrue( op instanceof LOCross );
  393. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  394. Assert.assertTrue( op instanceof LOForEach );
  395. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  396. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  397. Assert.assertTrue( op instanceof LOLimit );
  398. }
  399. @Test
  400. public void testForeachCross1() throws Exception {
  401. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  402. "B = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  403. "C = foreach B generate $0, $1, flatten($2);" +
  404. "D = cross A, C;" +
  405. "E = limit D 10;" +
  406. "store E into 'output';";
  407. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  408. List<Operator> loads = newLogicalPlan.getSources();
  409. Assert.assertTrue( loads.size() == 2 );
  410. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  411. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  412. Operator op = null;
  413. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "B" ) )
  414. op = loads.get( 0 );
  415. else
  416. op = loads.get( 1 );
  417. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  418. Assert.assertTrue( op instanceof LOForEach );
  419. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  420. Assert.assertTrue( op instanceof LOForEach );
  421. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  422. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  423. Assert.assertTrue( op instanceof LOCross );
  424. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  425. Assert.assertTrue( op instanceof LOForEach );
  426. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  427. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  428. Assert.assertTrue( op instanceof LOLimit );
  429. }
  430. // TODO
  431. // The following test case testForeachCross2 has multiple foreach flatten
  432. // A new rule should optimize this case
  433. @Test
  434. public void testForeachCross2() throws Exception {
  435. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  436. "B = foreach A generate $0, $1, flatten($2);" +
  437. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  438. "D = foreach C generate $0, $1, flatten($2);" +
  439. "E = cross B, D;" +
  440. "F = limit E 10;" +
  441. "store F into 'output';";
  442. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  443. // No optimization about foreach flatten.
  444. Operator store = newLogicalPlan.getSinks().get( 0 );
  445. Operator limit = newLogicalPlan.getPredecessors(store).get(0);
  446. Operator cross = newLogicalPlan.getPredecessors(limit).get(0);
  447. Assert.assertTrue( cross instanceof LOCross );
  448. }
  449. /**
  450. * This actually is a valid case, even though the optimization may not provide any performance benefit. However, detecting
  451. * such a case requires more coding. Thus, we allow optimization to go thru in this case.
  452. */
  453. @Test
  454. public void testForeachFlattenAddedColumnCross() throws Exception {
  455. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  456. "B = foreach A generate $0, $1, flatten(1);" +
  457. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  458. "D = cross B, C;" +
  459. "E = limit D 10;" +
  460. "store E into 'output';";
  461. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  462. List<Operator> loads = newLogicalPlan.getSources();
  463. Assert.assertTrue( loads.size() == 2 );
  464. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  465. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  466. Operator op = null;
  467. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  468. op = loads.get( 0 );
  469. else
  470. op = loads.get( 1 );
  471. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  472. Assert.assertTrue( op instanceof LOForEach );
  473. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  474. Assert.assertTrue( op instanceof LOForEach );
  475. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  476. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  477. Assert.assertTrue( op instanceof LOCross );
  478. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  479. Assert.assertTrue( op instanceof LOForEach );
  480. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  481. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  482. Assert.assertTrue( op instanceof LOLimit );
  483. }
  484. /**
  485. * This is a valid, positive test case. Optimization should go thru.
  486. */
  487. @Test
  488. public void testForeachUDFCross() throws Exception {
  489. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  490. "B = foreach A generate $0, flatten($1), " + Identity.class.getName() + "($2) ;" +
  491. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  492. "D = cross B, C;" +
  493. "E = limit D 10;" +
  494. "store E into 'output';";
  495. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  496. List<Operator> loads = newLogicalPlan.getSources();
  497. Assert.assertTrue( loads.size() == 2 );
  498. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  499. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  500. Operator op = null;
  501. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  502. op = loads.get( 0 );
  503. else
  504. op = loads.get( 1 );
  505. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  506. Assert.assertTrue( op instanceof LOForEach );
  507. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  508. Assert.assertTrue( op instanceof LOForEach );
  509. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  510. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  511. Assert.assertTrue( op instanceof LOCross );
  512. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  513. Assert.assertTrue( op instanceof LOForEach );
  514. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  515. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  516. Assert.assertTrue( op instanceof LOLimit );
  517. }
  518. /**
  519. * Cast should NOT matter to cross. This is a valid positive test case.
  520. */
  521. @Test
  522. public void testForeachCastCross() throws Exception {
  523. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  524. "B = foreach A generate $0, (int)$1, flatten( $2 );" +
  525. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  526. "D = cross B, C;" +
  527. "E = limit D 10;" +
  528. "store E into 'output';";
  529. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  530. List<Operator> loads = newLogicalPlan.getSources();
  531. Assert.assertTrue( loads.size() == 2 );
  532. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  533. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  534. Operator op = null;
  535. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  536. op = loads.get( 0 );
  537. else
  538. op = loads.get( 1 );
  539. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  540. Assert.assertTrue( op instanceof LOForEach );
  541. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  542. Assert.assertTrue( op instanceof LOForEach );
  543. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  544. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  545. Assert.assertTrue( op instanceof LOCross );
  546. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  547. Assert.assertTrue( op instanceof LOForEach );
  548. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  549. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  550. Assert.assertTrue( op instanceof LOLimit );
  551. }
  552. @Test
  553. public void testForeachFRJoin() throws Exception {
  554. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  555. "B = foreach A generate $0, $1, flatten($2);" +
  556. "C = load 'anotherfile' as (name, age, preference);" +
  557. "D = join B by $0, C by $0 using 'replicated';" +
  558. "E = limit D 10;" +
  559. "store E into 'output';";
  560. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  561. List<Operator> loads = newLogicalPlan.getSources();
  562. Assert.assertTrue( loads.size() == 2 );
  563. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  564. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  565. Operator op = null;
  566. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  567. op = loads.get( 0 );
  568. else
  569. op = loads.get( 1 );
  570. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  571. Assert.assertTrue( op instanceof LOForEach );
  572. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  573. Assert.assertTrue( op instanceof LOForEach );
  574. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  575. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  576. Assert.assertTrue( op instanceof LOJoin );
  577. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  578. Assert.assertTrue( op instanceof LOForEach );
  579. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  580. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  581. Assert.assertTrue( op instanceof LOLimit );
  582. }
  583. @Test
  584. public void testForeachFRJoin1() throws Exception {
  585. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  586. "B = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  587. "C = foreach B generate $0, $1, flatten($2);" +
  588. "D = join A by $0, C by $0 using 'replicated';" +
  589. "E = limit D 10;" +
  590. "store E into 'output';";
  591. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  592. List<Operator> loads = newLogicalPlan.getSources();
  593. Assert.assertTrue( loads.size() == 2 );
  594. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  595. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  596. Operator op = null;
  597. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "B" ) )
  598. op = loads.get( 0 );
  599. else
  600. op = loads.get( 1 );
  601. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  602. Assert.assertTrue( op instanceof LOForEach );
  603. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  604. Assert.assertTrue( op instanceof LOForEach );
  605. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  606. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  607. Assert.assertTrue( op instanceof LOJoin );
  608. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  609. Assert.assertTrue( op instanceof LOForEach );
  610. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  611. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  612. Assert.assertTrue( op instanceof LOLimit );
  613. }
  614. // TODO
  615. // The following test case testForeachFRJoin2 has multiple foreach flatten
  616. // A new rule should optimize this case
  617. @Test
  618. public void testForeachFRJoin2() throws Exception {
  619. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  620. "B = foreach A generate $0, $1, flatten($2);" +
  621. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  622. "D = foreach C generate $0, $1, flatten($2);" +
  623. "E = join B by $0, D by $0 using 'replicated';" +
  624. "F = limit E 10;" +
  625. "store F into 'output';";
  626. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  627. // No optimization about foreach flatten.
  628. Operator store = newLogicalPlan.getSinks().get( 0 );
  629. Operator limit = newLogicalPlan.getPredecessors( store ).get( 0 );
  630. Operator join = newLogicalPlan.getPredecessors( limit ).get( 0 );
  631. Assert.assertTrue( join instanceof LOJoin );
  632. }
  633. /**
  634. * Valid positive test case, even though the benefit from the optimization is questionable. However, putting in additinal check for
  635. * this condition requires extra coding.
  636. */
  637. @Test
  638. public void testForeachFlattenAddedColumnFRJoin() throws Exception {
  639. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  640. "B = foreach A generate $0, $1, flatten(1);" +
  641. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  642. "D = join B by $0, C by $0 using 'replicated';" +
  643. "E = limit D 10;" +
  644. "store E into 'output';";
  645. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  646. List<Operator> loads = newLogicalPlan.getSources();
  647. Assert.assertTrue( loads.size() == 2 );
  648. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  649. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  650. Operator op = null;
  651. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  652. op = loads.get( 0 );
  653. else
  654. op = loads.get( 1 );
  655. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  656. Assert.assertTrue( op instanceof LOForEach );
  657. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  658. Assert.assertTrue( op instanceof LOForEach );
  659. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  660. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  661. Assert.assertTrue( op instanceof LOJoin );
  662. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  663. Assert.assertTrue( op instanceof LOForEach );
  664. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  665. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  666. Assert.assertTrue( op instanceof LOLimit );
  667. }
  668. /**
  669. * This is actually a valid, positive test case. UDF doesn't prevent optimization.
  670. * @throws Exception
  671. */
  672. @Test
  673. public void testForeachUDFFRJoin() throws Exception {
  674. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  675. "B = foreach A generate $0, flatten($1), " + Identity.class.getName() + "($2) ;" +
  676. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  677. "D = join B by $0, C by $0 using 'replicated';" +
  678. "E = limit D 10;" +
  679. "store E into 'output';";
  680. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  681. List<Operator> loads = newLogicalPlan.getSources();
  682. Assert.assertTrue( loads.size() == 2 );
  683. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  684. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  685. Operator op = null;
  686. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  687. op = loads.get( 0 );
  688. else
  689. op = loads.get( 1 );
  690. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  691. Assert.assertTrue( op instanceof LOForEach );
  692. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  693. Assert.assertTrue( op instanceof LOForEach );
  694. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  695. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  696. Assert.assertTrue( op instanceof LOJoin );
  697. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  698. Assert.assertTrue( op instanceof LOForEach );
  699. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  700. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  701. Assert.assertTrue( op instanceof LOLimit );
  702. }
  703. /**
  704. * This is actually a valid, positive test case. Cast doesn't prevent optimization.
  705. * @throws Exception
  706. */
  707. @Test
  708. public void testForeachCastFRJoin() throws Exception {
  709. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  710. "B = foreach A generate $0, (int)$1, flatten($2);" +
  711. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  712. "D = join B by $0, C by $0 using 'replicated';" +
  713. "E = limit D 10;" +
  714. "store E into 'output';";
  715. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  716. List<Operator> loads = newLogicalPlan.getSources();
  717. Assert.assertTrue( loads.size() == 2 );
  718. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  719. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  720. Operator op = null;
  721. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  722. op = loads.get( 0 );
  723. else
  724. op = loads.get( 1 );
  725. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  726. Assert.assertTrue( op instanceof LOForEach );
  727. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  728. Assert.assertTrue( op instanceof LOForEach );
  729. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  730. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  731. Assert.assertTrue( op instanceof LOJoin );
  732. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  733. Assert.assertTrue( op instanceof LOForEach );
  734. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  735. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  736. Assert.assertTrue( op instanceof LOLimit );
  737. }
  738. @Test
  739. public void testForeachInnerJoin() throws Exception {
  740. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  741. "B = foreach A generate $0, $1, flatten($2);" +
  742. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  743. "D = join B by $0, C by $0;" +
  744. "E = limit D 10;" +
  745. "store E into 'output';";
  746. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  747. List<Operator> loads = newLogicalPlan.getSources();
  748. Assert.assertTrue( loads.size() == 2 );
  749. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  750. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  751. Operator op = null;
  752. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  753. op = loads.get( 0 );
  754. else
  755. op = loads.get( 1 );
  756. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  757. Assert.assertTrue( op instanceof LOForEach );
  758. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  759. Assert.assertTrue( op instanceof LOForEach );
  760. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  761. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  762. Assert.assertTrue( op instanceof LOJoin );
  763. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  764. Assert.assertTrue( op instanceof LOForEach );
  765. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  766. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  767. Assert.assertTrue( op instanceof LOLimit );
  768. }
  769. @Test
  770. public void testForeachInnerJoin1() throws Exception {
  771. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  772. "B = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  773. "C = foreach B generate $0, $1, flatten($2);" +
  774. "D = join A by $0, C by $0;" +
  775. "E = limit D 10;" +
  776. "store E into 'output';";
  777. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  778. List<Operator> loads = newLogicalPlan.getSources();
  779. Assert.assertTrue( loads.size() == 2 );
  780. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  781. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  782. Operator op = null;
  783. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "B" ) )
  784. op = loads.get( 0 );
  785. else
  786. op = loads.get( 1 );
  787. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  788. Assert.assertTrue( op instanceof LOForEach );
  789. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  790. Assert.assertTrue( op instanceof LOForEach );
  791. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  792. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  793. Assert.assertTrue( op instanceof LOJoin );
  794. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  795. Assert.assertTrue( op instanceof LOForEach );
  796. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  797. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  798. Assert.assertTrue( op instanceof LOLimit );
  799. }
  800. // TODO
  801. // The following test case testForeachInnerJoin2 has multiple foreach flatten
  802. // A new rule should optimize this case
  803. @Test
  804. public void testForeachInnerJoin2() throws Exception {
  805. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  806. "B = foreach A generate $0, $1, flatten($2);" +
  807. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  808. "D = foreach C generate $0, $1, flatten($2);" +
  809. "E = join B by $0, D by $0;" +
  810. "F = limit E 10;" +
  811. "store F into 'output';";
  812. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  813. // No optimization about foreach flatten.
  814. Operator store = newLogicalPlan.getSinks().get( 0 );
  815. Operator limit = newLogicalPlan.getPredecessors( store ).get( 0 );
  816. Operator join = newLogicalPlan.getPredecessors( limit ).get( 0 );
  817. Assert.assertTrue( join instanceof LOJoin );
  818. }
  819. /**
  820. * This is actually a valid positive test case, even though the benefit of such optimization is questionable. However,
  821. * checking for such condition requires additional coding effort.
  822. */
  823. @Test
  824. public void testForeachFlattenAddedColumnInnerJoin() throws Exception {
  825. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  826. "B = foreach A generate $0, $1, flatten(1);" +
  827. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  828. "D = join B by $0, C by $0;" +
  829. "E = limit D 10;" +
  830. "store E into 'output';";
  831. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  832. List<Operator> loads = newLogicalPlan.getSources();
  833. Assert.assertTrue( loads.size() == 2 );
  834. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  835. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  836. Operator op = null;
  837. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  838. op = loads.get( 0 );
  839. else
  840. op = loads.get( 1 );
  841. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  842. Assert.assertTrue( op instanceof LOForEach );
  843. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  844. Assert.assertTrue( op instanceof LOForEach );
  845. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  846. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  847. Assert.assertTrue( op instanceof LOJoin );
  848. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  849. Assert.assertTrue( op instanceof LOForEach );
  850. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  851. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  852. Assert.assertTrue( op instanceof LOLimit );
  853. }
  854. /**
  855. * UDF doesn't prevent optimization.
  856. */
  857. @Test
  858. public void testForeachUDFInnerJoin() throws Exception {
  859. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  860. "B = foreach A generate $0, flatten($1), " + Identity.class.getName() + "($2) ;" +
  861. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  862. "D = join B by $0, C by $0;" +
  863. "E = limit D 10;" +
  864. "store E into 'output';";
  865. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  866. List<Operator> loads = newLogicalPlan.getSources();
  867. Assert.assertTrue( loads.size() == 2 );
  868. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  869. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  870. Operator op = null;
  871. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  872. op = loads.get( 0 );
  873. else
  874. op = loads.get( 1 );
  875. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  876. Assert.assertTrue( op instanceof LOForEach );
  877. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  878. Assert.assertTrue( op instanceof LOForEach );
  879. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  880. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  881. Assert.assertTrue( op instanceof LOJoin );
  882. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  883. Assert.assertTrue( op instanceof LOForEach );
  884. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  885. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  886. Assert.assertTrue( op instanceof LOLimit );
  887. }
  888. /**
  889. * Cast doesn't prevent optimization.
  890. */
  891. @Test
  892. public void testForeachCastInnerJoin() throws Exception {
  893. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  894. "B = foreach A generate $0, (int)$1, flatten($2);" +
  895. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  896. "D = join B by $0, C by $0;" +
  897. "E = limit D 10;" +
  898. "store E into 'output';";
  899. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  900. List<Operator> loads = newLogicalPlan.getSources();
  901. Assert.assertTrue( loads.size() == 2 );
  902. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  903. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  904. Operator op = null;
  905. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  906. op = loads.get( 0 );
  907. else
  908. op = loads.get( 1 );
  909. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  910. Assert.assertTrue( op instanceof LOForEach );
  911. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  912. Assert.assertTrue( op instanceof LOForEach );
  913. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  914. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  915. Assert.assertTrue( op instanceof LOJoin );
  916. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  917. Assert.assertTrue( op instanceof LOForEach );
  918. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  919. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  920. Assert.assertTrue( op instanceof LOLimit );
  921. }
  922. // See PIG-1172
  923. @Test
  924. public void testForeachJoinRequiredField() throws Exception {
  925. String query = "A = load 'myfile' as (bg:bag{t:tuple(a0,a1)});" +
  926. "B = FOREACH A generate flatten($0);" +
  927. "C = load '3.txt' AS (c0, c1);" +
  928. "D = JOIN B by a1, C by c1;" +
  929. "E = limit D 10;" +
  930. "store E into 'output';";
  931. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  932. // No optimization about foreach flatten.
  933. Operator store = newLogicalPlan.getSinks().get( 0 );
  934. Operator limit = newLogicalPlan.getPredecessors( store ).get( 0 );
  935. Operator join = newLogicalPlan.getPredecessors( limit ).get( 0 );
  936. Assert.assertTrue( join instanceof LOJoin );
  937. }
  938. // See PIG-1374
  939. @Test
  940. public void testForeachRequiredField() throws Exception {
  941. String query = "A = load 'myfile' as (b:bag{t:tuple(a0:chararray,a1:int)});" +
  942. "B = foreach A generate flatten($0);" +
  943. "C = order B by $1 desc;" +
  944. "store C into 'output';";
  945. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  946. Operator load = newLogicalPlan.getSources().get( 0 );
  947. Assert.assertTrue( load instanceof LOLoad );
  948. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  949. Assert.assertTrue( foreach instanceof LOForEach );
  950. Operator foreach1 = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  951. Assert.assertTrue( foreach1 instanceof LOForEach );
  952. Operator sort = newLogicalPlan.getSuccessors( foreach1 ).get( 0 );
  953. Assert.assertTrue( sort instanceof LOSort );
  954. }
  955. // See PIG-1706
  956. @Test
  957. public void testForeachWithUserDefinedSchema() throws Exception {
  958. String query = "a = load '1.txt' as (a0:int, a1, a2:bag{t:(i1:int, i2:int)});" +
  959. "b = load '2.txt' as (b0:int, b1);" +
  960. "c = foreach a generate a0, flatten(a2) as (q1, q2);" +
  961. "d = join c by a0, b by b0;" +
  962. "store d into 'output';";
  963. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  964. Operator store = newLogicalPlan.getSinks().get( 0 );
  965. LOForEach foreach = (LOForEach)newLogicalPlan.getPredecessors(store).get(0);
  966. Assert.assertTrue(foreach.getSchema().getField(1).alias.equals("q1"));
  967. Assert.assertTrue(foreach.getSchema().getField(2).alias.equals("q2"));
  968. }
  969. // See PIG-1751
  970. @Test
  971. public void testForeachWithUserDefinedSchema2() throws Exception {
  972. String query = "a = load '1.txt' as (a0:chararray);" +
  973. "b = load '2.txt' as (b0:chararray);" +
  974. "c = foreach b generate flatten(STRSPLIT(b0)) as c0;" +
  975. "d = join c by (chararray)c0, a by a0;" +
  976. "store d into 'output';";
  977. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  978. Operator store = newLogicalPlan.getSinks().get( 0 );
  979. Operator op = newLogicalPlan.getPredecessors(store).get(0);
  980. Assert.assertTrue(op instanceof LOJoin);
  981. }
  982. public class MyPlanOptimizer extends LogicalPlanOptimizer {
  983. protected MyPlanOptimizer(OperatorPlan p, int iterations) {
  984. super(p, iterations, new HashSet<String>());
  985. }
  986. protected List<Set<Rule>> buildRuleSets() {
  987. List<Set<Rule>> ls = new ArrayList<Set<Rule>>();

Large files files are truncated, but you can click here to view the full file