PageRenderTime 51ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 1ms

/test/org/apache/pig/test/TestNewPlanPushDownForeachFlatten.java

https://github.com/dorefiend/pig
Java | 1233 lines | 983 code | 161 blank | 89 comment | 39 complexity | 0ed0b10be3725514d871129da1305043 MD5 | raw file
Possible License(s): Apache-2.0, CPL-1.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.pig.test;
  19. import java.util.ArrayList;
  20. import java.util.HashSet;
  21. import java.util.List;
  22. import java.util.Properties;
  23. import java.util.Set;
  24. import org.apache.pig.ExecType;
  25. import org.apache.pig.FilterFunc;
  26. import org.apache.pig.PigServer;
  27. import org.apache.pig.data.Tuple;
  28. import org.apache.pig.impl.PigContext;
  29. import org.apache.pig.test.utils.Identity;
  30. import org.apache.pig.newplan.Operator;
  31. import org.apache.pig.newplan.OperatorPlan;
  32. import org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer;
  33. import org.apache.pig.newplan.optimizer.PlanOptimizer;
  34. import org.apache.pig.newplan.optimizer.Rule;
  35. import org.apache.pig.newplan.logical.optimizer.ProjectionPatcher;
  36. import org.apache.pig.newplan.logical.optimizer.SchemaPatcher;
  37. import org.apache.pig.newplan.logical.relational.LOCross;
  38. import org.apache.pig.newplan.logical.relational.LOForEach;
  39. import org.apache.pig.newplan.logical.relational.LOJoin;
  40. import org.apache.pig.newplan.logical.relational.LOLimit;
  41. import org.apache.pig.newplan.logical.relational.LOLoad;
  42. import org.apache.pig.newplan.logical.relational.LOSort;
  43. import org.apache.pig.newplan.logical.relational.LogicalPlan;
  44. import org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune;
  45. import org.apache.pig.newplan.logical.rules.LoadTypeCastInserter;
  46. import org.apache.pig.newplan.logical.rules.OptimizerUtils;
  47. import org.apache.pig.newplan.logical.rules.PushDownForEachFlatten;
  48. import org.junit.Assert;
  49. import org.junit.Test;
  50. import org.junit.Before;
  51. /**
  52. * Test the logical optimizer.
  53. */
  54. public class TestNewPlanPushDownForeachFlatten {
  55. PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
  56. @Before
  57. public void tearDown() {
  58. }
  59. /**
  60. *
  61. * A simple filter UDF for testing
  62. *
  63. */
  64. static public class MyFilterFunc extends FilterFunc {
  65. @Override
  66. public Boolean exec(Tuple input) {
  67. return false;
  68. }
  69. }
  70. /**
  71. * Old plan is empty, so is the optimized new plan.
  72. */
  73. @Test
  74. public void testErrorEmptyInput() throws Exception {
  75. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( "" );
  76. Assert.assertTrue( newLogicalPlan.getOperators().hasNext() == false );
  77. }
  78. /**
  79. * No foreach in the plan, no effect.
  80. */
  81. @Test
  82. public void testErrorNonForeachInput() throws Exception {
  83. String query = "A = load 'myfile' as (name, age, gpa);" +
  84. "store A into 'output';";
  85. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  86. Operator load = newLogicalPlan.getSources().get( 0 );
  87. Assert.assertTrue( load instanceof LOLoad );
  88. List<Operator> nexts = newLogicalPlan.getSuccessors( load );
  89. Assert.assertTrue( nexts != null && nexts.size() == 1 );
  90. }
  91. @Test
  92. public void testForeachNoFlatten() throws Exception {
  93. String query = "A = load 'myfile' as (name, age, gpa);" +
  94. "B = foreach A generate $0, $1, $2;" +
  95. "C = order B by $0, $1;" +
  96. "D = store C into 'dummy';";
  97. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  98. Operator load = newLogicalPlan.getSources().get( 0 );
  99. Assert.assertTrue( load instanceof LOLoad );
  100. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  101. Assert.assertTrue( foreach instanceof LOForEach );
  102. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  103. Assert.assertTrue( foreach instanceof LOForEach );
  104. Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  105. Assert.assertTrue( sort instanceof LOSort );
  106. }
  107. @Test
  108. public void testForeachNoSuccessors() throws Exception {
  109. String query = "A = load 'myfile' as (name, age, gpa);" +
  110. "B = foreach A generate flatten($1);" +
  111. "Store B into 'output';";
  112. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  113. Operator load = newLogicalPlan.getSources().get( 0 );
  114. Assert.assertTrue( load instanceof LOLoad );
  115. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  116. Assert.assertTrue( foreach instanceof LOForEach );
  117. }
  118. @Test
  119. public void testForeachStreaming() throws Exception {
  120. String query = "A = load 'myfile' as (name, age, gpa);" +
  121. "B = foreach A generate flatten($1);" +
  122. "C = stream B through `" + "pc -l" + "`;" +
  123. "Store C into 'output';";
  124. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  125. Operator load = newLogicalPlan.getSources().get( 0 );
  126. Assert.assertTrue( load instanceof LOLoad );
  127. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  128. Assert.assertTrue( foreach instanceof LOForEach );
  129. }
  130. @Test
  131. public void testForeachDistinct() throws Exception {
  132. String query = "A = load 'myfile' as (name, age, gpa);" +
  133. "B = foreach A generate flatten($1);" +
  134. "C = distinct B;" +
  135. "store C into 'output';";
  136. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  137. Operator load = newLogicalPlan.getSources().get( 0 );
  138. Assert.assertTrue( load instanceof LOLoad );
  139. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  140. Assert.assertTrue( foreach instanceof LOForEach );
  141. }
  142. @Test
  143. public void testForeachForeach() throws Exception {
  144. String query = "A = load 'myfile' as (name, age, gpa);" +
  145. "B = foreach A generate $0, $1, flatten(1);" +
  146. "C = foreach B generate $0;" +
  147. "store C into 'output';";
  148. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  149. Operator load = newLogicalPlan.getSources().get( 0 );
  150. Assert.assertTrue( load instanceof LOLoad );
  151. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  152. Assert.assertTrue( foreach instanceof LOForEach );
  153. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  154. Assert.assertTrue( foreach instanceof LOForEach );
  155. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  156. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  157. Assert.assertTrue( foreach instanceof LOForEach );
  158. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  159. }
  160. @Test
  161. public void testForeachFilter() throws Exception {
  162. String query = "A = load 'myfile' as (name, age, gpa);" +
  163. "B = foreach A generate $0, $1, flatten($2);" +
  164. "C = filter B by $1 < 18;" +
  165. "store C into 'output';";
  166. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  167. Operator load = newLogicalPlan.getSources().get( 0 );
  168. Assert.assertTrue( load instanceof LOLoad );
  169. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  170. Assert.assertTrue( foreach instanceof LOForEach );
  171. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  172. Assert.assertTrue( foreach instanceof LOForEach );
  173. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  174. }
  175. @Test
  176. public void testForeachSplitOutput() throws Exception {
  177. String query = "A = load 'myfile' as (name, age, gpa);" +
  178. "B = foreach A generate $0, $1, flatten($2);" +
  179. "split B into C if $1 < 18, D if $1 >= 18;" +
  180. "store C into 'output1';" +
  181. "store D into 'output2';";
  182. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  183. Operator load = newLogicalPlan.getSources().get( 0 );
  184. Assert.assertTrue( load instanceof LOLoad );
  185. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  186. Assert.assertTrue( foreach instanceof LOForEach );
  187. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  188. Assert.assertTrue( foreach instanceof LOForEach );
  189. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  190. }
  191. @Test
  192. public void testForeachLimit() throws Exception {
  193. String query = "A = load 'myfile' as (name, age, gpa);" +
  194. "B = foreach A generate $0, $1, flatten($2);" +
  195. "C = limit B 10;" +
  196. "store C into 'output';";
  197. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  198. Operator load = newLogicalPlan.getSources().get( 0 );
  199. Assert.assertTrue( load instanceof LOLoad );
  200. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  201. Assert.assertTrue( foreach instanceof LOForEach );
  202. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  203. Assert.assertTrue( foreach instanceof LOForEach );
  204. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  205. }
  206. @Test
  207. public void testForeachUnion() throws Exception {
  208. String query = "A = load 'myfile' as (name, age, gpa);" +
  209. "B = foreach A generate $0, $1, flatten($2);" +
  210. "C = load 'anotherfile' as (name, age, preference);" +
  211. "D = union B, C;" +
  212. "store D into 'output';";
  213. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  214. List<Operator> loads = newLogicalPlan.getSources();
  215. Assert.assertTrue( loads.size() == 2 );
  216. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  217. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  218. Operator load = null;
  219. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  220. load = loads.get( 0 );
  221. else
  222. load = loads.get( 1 );
  223. Assert.assertTrue( load instanceof LOLoad );
  224. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  225. Assert.assertTrue( foreach instanceof LOForEach );
  226. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  227. Assert.assertTrue( foreach instanceof LOForEach );
  228. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  229. }
  230. @Test
  231. public void testForeachCogroup() throws Exception {
  232. String query = "A = load 'myfile' as (name, age, gpa);" +
  233. "B = foreach A generate $0, $1, flatten($2);" +
  234. "C = load 'anotherfile' as (name, age, preference);" +
  235. "D = cogroup B by $0, C by $0;" +
  236. "store D into 'output';";
  237. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  238. List<Operator> loads = newLogicalPlan.getSources();
  239. Assert.assertTrue( loads.size() == 2 );
  240. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  241. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  242. Operator load = null;
  243. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  244. load = loads.get( 0 );
  245. else
  246. load = loads.get( 1 );
  247. Assert.assertTrue( load instanceof LOLoad );
  248. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  249. Assert.assertTrue( foreach instanceof LOForEach );
  250. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  251. Assert.assertTrue( foreach instanceof LOForEach );
  252. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  253. }
  254. @Test
  255. public void testForeachGroupBy() throws Exception {
  256. String query = "A = load 'myfile' as (name, age, gpa);" +
  257. "B = foreach A generate $0, $1, flatten($2);" +
  258. "C = group B by $0;" +
  259. "store C into 'output';";
  260. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  261. Operator load = newLogicalPlan.getSources().get( 0 );
  262. Assert.assertTrue( load instanceof LOLoad );
  263. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  264. Assert.assertTrue( foreach instanceof LOForEach );
  265. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  266. Assert.assertTrue( foreach instanceof LOForEach );
  267. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
  268. }
  269. @Test
  270. public void testForeachSort() throws Exception {
  271. String query = "A = load 'myfile' as (name, age, gpa);" +
  272. "B = foreach A generate $0, $1, flatten($2);" +
  273. "C = order B by $0, $1;" +
  274. "D = store C into 'dummy';";
  275. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  276. Operator load = newLogicalPlan.getSources().get( 0 );
  277. Assert.assertTrue( load instanceof LOLoad );
  278. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  279. Assert.assertTrue( foreach instanceof LOForEach );
  280. Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  281. Assert.assertTrue( sort instanceof LOSort );
  282. foreach = newLogicalPlan.getSuccessors( sort ).get( 0 );
  283. Assert.assertTrue( foreach instanceof LOForEach );
  284. }
  285. /**
  286. * Non-pure-projection, not optimizable.
  287. */
  288. @Test
  289. public void testForeachSortNegative1() throws Exception {
  290. String query = "A = load 'myfile' as (name, age, gpa);" +
  291. "B = foreach A generate $0 + 5, $1, flatten($2);" +
  292. "C = order B by $0, $1;" +
  293. "D = store C into 'dummy';";
  294. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  295. Operator load = newLogicalPlan.getSources().get( 0 );
  296. Assert.assertTrue( load instanceof LOLoad );
  297. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  298. Assert.assertTrue( foreach instanceof LOForEach );
  299. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  300. Assert.assertTrue( foreach instanceof LOForEach );
  301. Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  302. Assert.assertTrue( sort instanceof LOSort );
  303. }
  304. /**
  305. * If the flattened field is referenced in the sort condition, then no optimization can be done.
  306. */
  307. @Test
  308. public void testForeachSortNegative2() throws Exception {
  309. String query = "A = load 'myfile' as (name, age, gpa:tuple(x,y));" +
  310. "B = foreach A generate $0, $1, flatten($2);" +
  311. "C = order B by $0, $3;" +
  312. "D = store C into 'dummy';";
  313. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  314. Operator load = newLogicalPlan.getSources().get( 0 );
  315. Assert.assertTrue( load instanceof LOLoad );
  316. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  317. Assert.assertTrue( foreach instanceof LOForEach );
  318. Operator foreach1 = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  319. Assert.assertTrue( foreach1 instanceof LOForEach );
  320. Operator sort = newLogicalPlan.getSuccessors( foreach1 ).get( 0 );
  321. Assert.assertTrue( sort instanceof LOSort );
  322. }
  323. @Test
  324. public void testForeachFlattenAddedColumnSort() throws Exception {
  325. String query = "A = load 'myfile' as (name, age, gpa);" +
  326. "B = foreach A generate $0, $1, flatten(1);" +
  327. "C = order B by $0, $1;" +
  328. "store C into 'output';";
  329. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  330. Operator load = newLogicalPlan.getSources().get( 0 );
  331. Assert.assertTrue( load instanceof LOLoad );
  332. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  333. Assert.assertTrue( foreach instanceof LOForEach );
  334. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  335. Assert.assertTrue( foreach instanceof LOForEach );
  336. Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  337. Assert.assertTrue( sort instanceof LOSort );
  338. }
  339. @Test
  340. public void testForeachUDFSort() throws Exception {
  341. String query = "A = load 'myfile' as (name, age, gpa);" +
  342. "B = foreach A generate $0, $1, " + Identity.class.getName() + "($2) ;" +
  343. "C = order B by $0, $1;" +
  344. "store C into 'output';";
  345. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  346. Operator load = newLogicalPlan.getSources().get( 0 );
  347. Assert.assertTrue( load instanceof LOLoad );
  348. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  349. Assert.assertTrue( foreach instanceof LOForEach );
  350. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  351. Assert.assertTrue( foreach instanceof LOForEach );
  352. Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  353. Assert.assertTrue( sort instanceof LOSort );
  354. }
  355. @Test
  356. public void testForeachCastSort() throws Exception {
  357. String query = "A = load 'myfile' as (name, age, gpa);" +
  358. "B = foreach A generate (chararray)$0, $1, flatten($2);" +
  359. "C = order B by $0, $1;" +
  360. "store C into 'output';";
  361. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  362. Operator load = newLogicalPlan.getSources().get( 0 );
  363. Assert.assertTrue( load instanceof LOLoad );
  364. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  365. Assert.assertTrue( foreach instanceof LOForEach );
  366. foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  367. Assert.assertTrue( foreach instanceof LOForEach );
  368. Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  369. Assert.assertTrue( sort instanceof LOSort );
  370. }
  371. @Test
  372. public void testForeachCross() throws Exception {
  373. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  374. "B = foreach A generate $0, $1, flatten($2);" +
  375. "C = load 'anotherfile' as (name, age, preference);" +
  376. "D = cross B, C;" +
  377. "E = limit D 10;" +
  378. "store E into 'output';";
  379. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  380. List<Operator> loads = newLogicalPlan.getSources();
  381. Assert.assertTrue( loads.size() == 2 );
  382. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  383. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  384. Operator op = null;
  385. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  386. op = loads.get( 0 );
  387. else
  388. op = loads.get( 1 );
  389. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  390. Assert.assertTrue( op instanceof LOForEach );
  391. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  392. Assert.assertTrue( op instanceof LOForEach );
  393. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  394. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  395. Assert.assertTrue( op instanceof LOCross );
  396. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  397. Assert.assertTrue( op instanceof LOForEach );
  398. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  399. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  400. Assert.assertTrue( op instanceof LOLimit );
  401. }
  402. @Test
  403. public void testForeachCross1() throws Exception {
  404. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  405. "B = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  406. "C = foreach B generate $0, $1, flatten($2);" +
  407. "D = cross A, C;" +
  408. "E = limit D 10;" +
  409. "store E into 'output';";
  410. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  411. List<Operator> loads = newLogicalPlan.getSources();
  412. Assert.assertTrue( loads.size() == 2 );
  413. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  414. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  415. Operator op = null;
  416. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "B" ) )
  417. op = loads.get( 0 );
  418. else
  419. op = loads.get( 1 );
  420. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  421. Assert.assertTrue( op instanceof LOForEach );
  422. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  423. Assert.assertTrue( op instanceof LOForEach );
  424. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  425. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  426. Assert.assertTrue( op instanceof LOCross );
  427. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  428. Assert.assertTrue( op instanceof LOForEach );
  429. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  430. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  431. Assert.assertTrue( op instanceof LOLimit );
  432. }
  433. // TODO
  434. // The following test case testForeachCross2 has multiple foreach flatten
  435. // A new rule should optimize this case
  436. @Test
  437. public void testForeachCross2() throws Exception {
  438. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  439. "B = foreach A generate $0, $1, flatten($2);" +
  440. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  441. "D = foreach C generate $0, $1, flatten($2);" +
  442. "E = cross B, D;" +
  443. "F = limit E 10;" +
  444. "store F into 'output';";
  445. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  446. // No optimization about foreach flatten.
  447. Operator store = newLogicalPlan.getSinks().get( 0 );
  448. Operator limit = newLogicalPlan.getPredecessors(store).get(0);
  449. Operator cross = newLogicalPlan.getPredecessors(limit).get(0);
  450. Assert.assertTrue( cross instanceof LOCross );
  451. }
  452. /**
  453. * This actually is a valid case, even though the optimization may not provide any performance benefit. However, detecting
  454. * such a case requires more coding. Thus, we allow optimization to go thru in this case.
  455. */
  456. @Test
  457. public void testForeachFlattenAddedColumnCross() throws Exception {
  458. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  459. "B = foreach A generate $0, $1, flatten(1);" +
  460. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  461. "D = cross B, C;" +
  462. "E = limit D 10;" +
  463. "store E into 'output';";
  464. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  465. List<Operator> loads = newLogicalPlan.getSources();
  466. Assert.assertTrue( loads.size() == 2 );
  467. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  468. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  469. Operator op = null;
  470. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  471. op = loads.get( 0 );
  472. else
  473. op = loads.get( 1 );
  474. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  475. Assert.assertTrue( op instanceof LOForEach );
  476. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  477. Assert.assertTrue( op instanceof LOForEach );
  478. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  479. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  480. Assert.assertTrue( op instanceof LOCross );
  481. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  482. Assert.assertTrue( op instanceof LOForEach );
  483. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  484. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  485. Assert.assertTrue( op instanceof LOLimit );
  486. }
  487. /**
  488. * This is a valid, positive test case. Optimization should go thru.
  489. */
  490. @Test
  491. public void testForeachUDFCross() throws Exception {
  492. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  493. "B = foreach A generate $0, flatten($1), " + Identity.class.getName() + "($2) ;" +
  494. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  495. "D = cross B, C;" +
  496. "E = limit D 10;" +
  497. "store E into 'output';";
  498. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  499. List<Operator> loads = newLogicalPlan.getSources();
  500. Assert.assertTrue( loads.size() == 2 );
  501. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  502. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  503. Operator op = null;
  504. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  505. op = loads.get( 0 );
  506. else
  507. op = loads.get( 1 );
  508. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  509. Assert.assertTrue( op instanceof LOForEach );
  510. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  511. Assert.assertTrue( op instanceof LOForEach );
  512. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  513. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  514. Assert.assertTrue( op instanceof LOCross );
  515. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  516. Assert.assertTrue( op instanceof LOForEach );
  517. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  518. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  519. Assert.assertTrue( op instanceof LOLimit );
  520. }
  521. /**
  522. * Cast should NOT matter to cross. This is a valid positive test case.
  523. */
  524. @Test
  525. public void testForeachCastCross() throws Exception {
  526. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  527. "B = foreach A generate $0, (int)$1, flatten( $2 );" +
  528. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  529. "D = cross B, C;" +
  530. "E = limit D 10;" +
  531. "store E into 'output';";
  532. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  533. List<Operator> loads = newLogicalPlan.getSources();
  534. Assert.assertTrue( loads.size() == 2 );
  535. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  536. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  537. Operator op = null;
  538. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  539. op = loads.get( 0 );
  540. else
  541. op = loads.get( 1 );
  542. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  543. Assert.assertTrue( op instanceof LOForEach );
  544. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  545. Assert.assertTrue( op instanceof LOForEach );
  546. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  547. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  548. Assert.assertTrue( op instanceof LOCross );
  549. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  550. Assert.assertTrue( op instanceof LOForEach );
  551. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  552. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  553. Assert.assertTrue( op instanceof LOLimit );
  554. }
  555. @Test
  556. public void testForeachFRJoin() throws Exception {
  557. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  558. "B = foreach A generate $0, $1, flatten($2);" +
  559. "C = load 'anotherfile' as (name, age, preference);" +
  560. "D = join B by $0, C by $0 using 'replicated';" +
  561. "E = limit D 10;" +
  562. "store E into 'output';";
  563. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  564. List<Operator> loads = newLogicalPlan.getSources();
  565. Assert.assertTrue( loads.size() == 2 );
  566. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  567. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  568. Operator op = null;
  569. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  570. op = loads.get( 0 );
  571. else
  572. op = loads.get( 1 );
  573. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  574. Assert.assertTrue( op instanceof LOForEach );
  575. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  576. Assert.assertTrue( op instanceof LOForEach );
  577. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  578. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  579. Assert.assertTrue( op instanceof LOJoin );
  580. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  581. Assert.assertTrue( op instanceof LOForEach );
  582. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  583. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  584. Assert.assertTrue( op instanceof LOLimit );
  585. }
  586. @Test
  587. public void testForeachFRJoin1() throws Exception {
  588. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  589. "B = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  590. "C = foreach B generate $0, $1, flatten($2);" +
  591. "D = join A by $0, C by $0 using 'replicated';" +
  592. "E = limit D 10;" +
  593. "store E into 'output';";
  594. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  595. List<Operator> loads = newLogicalPlan.getSources();
  596. Assert.assertTrue( loads.size() == 2 );
  597. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  598. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  599. Operator op = null;
  600. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "B" ) )
  601. op = loads.get( 0 );
  602. else
  603. op = loads.get( 1 );
  604. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  605. Assert.assertTrue( op instanceof LOForEach );
  606. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  607. Assert.assertTrue( op instanceof LOForEach );
  608. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  609. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  610. Assert.assertTrue( op instanceof LOJoin );
  611. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  612. Assert.assertTrue( op instanceof LOForEach );
  613. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  614. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  615. Assert.assertTrue( op instanceof LOLimit );
  616. }
  617. // TODO
  618. // The following test case testForeachFRJoin2 has multiple foreach flatten
  619. // A new rule should optimize this case
  620. @Test
  621. public void testForeachFRJoin2() throws Exception {
  622. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  623. "B = foreach A generate $0, $1, flatten($2);" +
  624. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  625. "D = foreach C generate $0, $1, flatten($2);" +
  626. "E = join B by $0, D by $0 using 'replicated';" +
  627. "F = limit E 10;" +
  628. "store F into 'output';";
  629. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  630. // No optimization about foreach flatten.
  631. Operator store = newLogicalPlan.getSinks().get( 0 );
  632. Operator limit = newLogicalPlan.getPredecessors( store ).get( 0 );
  633. Operator join = newLogicalPlan.getPredecessors( limit ).get( 0 );
  634. Assert.assertTrue( join instanceof LOJoin );
  635. }
  636. /**
  637. * Valid positive test case, even though the benefit from the optimization is questionable. However, putting in additinal check for
  638. * this condition requires extra coding.
  639. */
  640. @Test
  641. public void testForeachFlattenAddedColumnFRJoin() throws Exception {
  642. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  643. "B = foreach A generate $0, $1, flatten(1);" +
  644. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  645. "D = join B by $0, C by $0 using 'replicated';" +
  646. "E = limit D 10;" +
  647. "store E into 'output';";
  648. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  649. List<Operator> loads = newLogicalPlan.getSources();
  650. Assert.assertTrue( loads.size() == 2 );
  651. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  652. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  653. Operator op = null;
  654. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  655. op = loads.get( 0 );
  656. else
  657. op = loads.get( 1 );
  658. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  659. Assert.assertTrue( op instanceof LOForEach );
  660. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  661. Assert.assertTrue( op instanceof LOForEach );
  662. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  663. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  664. Assert.assertTrue( op instanceof LOJoin );
  665. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  666. Assert.assertTrue( op instanceof LOForEach );
  667. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  668. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  669. Assert.assertTrue( op instanceof LOLimit );
  670. }
  671. /**
  672. * This is actually a valid, positive test case. UDF doesn't prevent optimization.
  673. * @throws Exception
  674. */
  675. @Test
  676. public void testForeachUDFFRJoin() throws Exception {
  677. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  678. "B = foreach A generate $0, flatten($1), " + Identity.class.getName() + "($2) ;" +
  679. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  680. "D = join B by $0, C by $0 using 'replicated';" +
  681. "E = limit D 10;" +
  682. "store E into 'output';";
  683. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  684. List<Operator> loads = newLogicalPlan.getSources();
  685. Assert.assertTrue( loads.size() == 2 );
  686. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  687. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  688. Operator op = null;
  689. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  690. op = loads.get( 0 );
  691. else
  692. op = loads.get( 1 );
  693. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  694. Assert.assertTrue( op instanceof LOForEach );
  695. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  696. Assert.assertTrue( op instanceof LOForEach );
  697. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  698. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  699. Assert.assertTrue( op instanceof LOJoin );
  700. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  701. Assert.assertTrue( op instanceof LOForEach );
  702. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  703. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  704. Assert.assertTrue( op instanceof LOLimit );
  705. }
  706. /**
  707. * This is actually a valid, positive test case. Cast doesn't prevent optimization.
  708. * @throws Exception
  709. */
  710. @Test
  711. public void testForeachCastFRJoin() throws Exception {
  712. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  713. "B = foreach A generate $0, (int)$1, flatten($2);" +
  714. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  715. "D = join B by $0, C by $0 using 'replicated';" +
  716. "E = limit D 10;" +
  717. "store E into 'output';";
  718. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  719. List<Operator> loads = newLogicalPlan.getSources();
  720. Assert.assertTrue( loads.size() == 2 );
  721. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  722. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  723. Operator op = null;
  724. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  725. op = loads.get( 0 );
  726. else
  727. op = loads.get( 1 );
  728. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  729. Assert.assertTrue( op instanceof LOForEach );
  730. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  731. Assert.assertTrue( op instanceof LOForEach );
  732. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  733. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  734. Assert.assertTrue( op instanceof LOJoin );
  735. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  736. Assert.assertTrue( op instanceof LOForEach );
  737. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  738. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  739. Assert.assertTrue( op instanceof LOLimit );
  740. }
  741. @Test
  742. public void testForeachInnerJoin() throws Exception {
  743. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  744. "B = foreach A generate $0, $1, flatten($2);" +
  745. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  746. "D = join B by $0, C by $0;" +
  747. "E = limit D 10;" +
  748. "store E into 'output';";
  749. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  750. List<Operator> loads = newLogicalPlan.getSources();
  751. Assert.assertTrue( loads.size() == 2 );
  752. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  753. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  754. Operator op = null;
  755. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  756. op = loads.get( 0 );
  757. else
  758. op = loads.get( 1 );
  759. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  760. Assert.assertTrue( op instanceof LOForEach );
  761. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  762. Assert.assertTrue( op instanceof LOForEach );
  763. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  764. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  765. Assert.assertTrue( op instanceof LOJoin );
  766. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  767. Assert.assertTrue( op instanceof LOForEach );
  768. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  769. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  770. Assert.assertTrue( op instanceof LOLimit );
  771. }
  772. @Test
  773. public void testForeachInnerJoin1() throws Exception {
  774. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  775. "B = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  776. "C = foreach B generate $0, $1, flatten($2);" +
  777. "D = join A by $0, C by $0;" +
  778. "E = limit D 10;" +
  779. "store E into 'output';";
  780. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  781. List<Operator> loads = newLogicalPlan.getSources();
  782. Assert.assertTrue( loads.size() == 2 );
  783. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  784. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  785. Operator op = null;
  786. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "B" ) )
  787. op = loads.get( 0 );
  788. else
  789. op = loads.get( 1 );
  790. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  791. Assert.assertTrue( op instanceof LOForEach );
  792. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  793. Assert.assertTrue( op instanceof LOForEach );
  794. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  795. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  796. Assert.assertTrue( op instanceof LOJoin );
  797. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  798. Assert.assertTrue( op instanceof LOForEach );
  799. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  800. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  801. Assert.assertTrue( op instanceof LOLimit );
  802. }
  803. // TODO
  804. // The following test case testForeachInnerJoin2 has multiple foreach flatten
  805. // A new rule should optimize this case
  806. @Test
  807. public void testForeachInnerJoin2() throws Exception {
  808. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  809. "B = foreach A generate $0, $1, flatten($2);" +
  810. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  811. "D = foreach C generate $0, $1, flatten($2);" +
  812. "E = join B by $0, D by $0;" +
  813. "F = limit E 10;" +
  814. "store F into 'output';";
  815. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  816. // No optimization about foreach flatten.
  817. Operator store = newLogicalPlan.getSinks().get( 0 );
  818. Operator limit = newLogicalPlan.getPredecessors( store ).get( 0 );
  819. Operator join = newLogicalPlan.getPredecessors( limit ).get( 0 );
  820. Assert.assertTrue( join instanceof LOJoin );
  821. }
  822. /**
  823. * This is actually a valid positive test case, even though the benefit of such optimization is questionable. However,
  824. * checking for such condition requires additional coding effort.
  825. */
  826. @Test
  827. public void testForeachFlattenAddedColumnInnerJoin() throws Exception {
  828. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  829. "B = foreach A generate $0, $1, flatten(1);" +
  830. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  831. "D = join B by $0, C by $0;" +
  832. "E = limit D 10;" +
  833. "store E into 'output';";
  834. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  835. List<Operator> loads = newLogicalPlan.getSources();
  836. Assert.assertTrue( loads.size() == 2 );
  837. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  838. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  839. Operator op = null;
  840. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  841. op = loads.get( 0 );
  842. else
  843. op = loads.get( 1 );
  844. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  845. Assert.assertTrue( op instanceof LOForEach );
  846. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  847. Assert.assertTrue( op instanceof LOForEach );
  848. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  849. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  850. Assert.assertTrue( op instanceof LOJoin );
  851. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  852. Assert.assertTrue( op instanceof LOForEach );
  853. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  854. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  855. Assert.assertTrue( op instanceof LOLimit );
  856. }
  857. /**
  858. * UDF doesn't prevent optimization.
  859. */
  860. @Test
  861. public void testForeachUDFInnerJoin() throws Exception {
  862. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  863. "B = foreach A generate $0, flatten($1), " + Identity.class.getName() + "($2) ;" +
  864. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  865. "D = join B by $0, C by $0;" +
  866. "E = limit D 10;" +
  867. "store E into 'output';";
  868. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  869. List<Operator> loads = newLogicalPlan.getSources();
  870. Assert.assertTrue( loads.size() == 2 );
  871. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  872. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  873. Operator op = null;
  874. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  875. op = loads.get( 0 );
  876. else
  877. op = loads.get( 1 );
  878. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  879. Assert.assertTrue( op instanceof LOForEach );
  880. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  881. Assert.assertTrue( op instanceof LOForEach );
  882. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  883. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  884. Assert.assertTrue( op instanceof LOJoin );
  885. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  886. Assert.assertTrue( op instanceof LOForEach );
  887. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  888. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  889. Assert.assertTrue( op instanceof LOLimit );
  890. }
  891. /**
  892. * Cast doesn't prevent optimization.
  893. */
  894. @Test
  895. public void testForeachCastInnerJoin() throws Exception {
  896. String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
  897. "B = foreach A generate $0, (int)$1, flatten($2);" +
  898. "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
  899. "D = join B by $0, C by $0;" +
  900. "E = limit D 10;" +
  901. "store E into 'output';";
  902. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  903. List<Operator> loads = newLogicalPlan.getSources();
  904. Assert.assertTrue( loads.size() == 2 );
  905. Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
  906. Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
  907. Operator op = null;
  908. if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
  909. op = loads.get( 0 );
  910. else
  911. op = loads.get( 1 );
  912. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  913. Assert.assertTrue( op instanceof LOForEach );
  914. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  915. Assert.assertTrue( op instanceof LOForEach );
  916. Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
  917. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  918. Assert.assertTrue( op instanceof LOJoin );
  919. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  920. Assert.assertTrue( op instanceof LOForEach );
  921. Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
  922. op = newLogicalPlan.getSuccessors( op ).get( 0 );
  923. Assert.assertTrue( op instanceof LOLimit );
  924. }
  925. // See PIG-1172
  926. @Test
  927. public void testForeachJoinRequiredField() throws Exception {
  928. String query = "A = load 'myfile' as (bg:bag{t:tuple(a0,a1)});" +
  929. "B = FOREACH A generate flatten($0);" +
  930. "C = load '3.txt' AS (c0, c1);" +
  931. "D = JOIN B by a1, C by c1;" +
  932. "E = limit D 10;" +
  933. "store E into 'output';";
  934. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  935. // No optimization about foreach flatten.
  936. Operator store = newLogicalPlan.getSinks().get( 0 );
  937. Operator limit = newLogicalPlan.getPredecessors( store ).get( 0 );
  938. Operator join = newLogicalPlan.getPredecessors( limit ).get( 0 );
  939. Assert.assertTrue( join instanceof LOJoin );
  940. }
  941. // See PIG-1374
  942. @Test
  943. public void testForeachRequiredField() throws Exception {
  944. String query = "A = load 'myfile' as (b:bag{t:tuple(a0:chararray,a1:int)});" +
  945. "B = foreach A generate flatten($0);" +
  946. "C = order B by $1 desc;" +
  947. "store C into 'output';";
  948. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  949. Operator load = newLogicalPlan.getSources().get( 0 );
  950. Assert.assertTrue( load instanceof LOLoad );
  951. Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
  952. Assert.assertTrue( foreach instanceof LOForEach );
  953. Operator foreach1 = newLogicalPlan.getSuccessors( foreach ).get( 0 );
  954. Assert.assertTrue( foreach1 instanceof LOForEach );
  955. Operator sort = newLogicalPlan.getSuccessors( foreach1 ).get( 0 );
  956. Assert.assertTrue( sort instanceof LOSort );
  957. }
  958. // See PIG-1706
  959. @Test
  960. public void testForeachWithUserDefinedSchema() throws Exception {
  961. String query = "a = load '1.txt' as (a0:int, a1, a2:bag{t:(i1:int, i2:int)});" +
  962. "b = load '2.txt' as (b0:int, b1);" +
  963. "c = foreach a generate a0, flatten(a2) as (q1, q2);" +
  964. "d = join c by a0, b by b0;" +
  965. "store d into 'output';";
  966. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  967. Operator store = newLogicalPlan.getSinks().get( 0 );
  968. LOForEach foreach = (LOForEach)newLogicalPlan.getPredecessors(store).get(0);
  969. Assert.assertTrue(foreach.getSchema().getField(1).alias.equals("q1"));
  970. Assert.assertTrue(foreach.getSchema().getField(2).alias.equals("q2"));
  971. }
  972. // See PIG-1751
  973. @Test
  974. public void testForeachWithUserDefinedSchema2() throws Exception {
  975. String query = "a = load '1.txt' as (a0:chararray);" +
  976. "b = load '2.txt' as (b0:chararray);" +
  977. "c = foreach b generate flatten(STRSPLIT(b0)) as c0;" +
  978. "d = join c by (chararray)c0, a by a0;" +
  979. "store d into 'output';";
  980. LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
  981. Operator store = newLogicalPlan.getSinks().get( 0 );
  982. Operator op = newLogicalPlan.getPredecessors(store).get(0);
  983. Assert.assertTrue(op instanceof LOJoin);
  984. }
  985. // See PIG-2721
  986. @Test
  987. public void testForeachSortWithUserDefinedSchema() throws Exception {
  988. String query =
  989. "a = load '1.tx…

Large files files are truncated, but you can click here to view the full file