/test/org/apache/pig/test/TestNewPlanPushDownForeachFlatten.java
Java | 1233 lines | 983 code | 161 blank | 89 comment | 39 complexity | 0ed0b10be3725514d871129da1305043 MD5 | raw file
Possible License(s): Apache-2.0, CPL-1.0
Large files files are truncated, but you can click here to view the full file
- /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.pig.test;
- import java.util.ArrayList;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Properties;
- import java.util.Set;
- import org.apache.pig.ExecType;
- import org.apache.pig.FilterFunc;
- import org.apache.pig.PigServer;
- import org.apache.pig.data.Tuple;
- import org.apache.pig.impl.PigContext;
- import org.apache.pig.test.utils.Identity;
- import org.apache.pig.newplan.Operator;
- import org.apache.pig.newplan.OperatorPlan;
- import org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer;
- import org.apache.pig.newplan.optimizer.PlanOptimizer;
- import org.apache.pig.newplan.optimizer.Rule;
- import org.apache.pig.newplan.logical.optimizer.ProjectionPatcher;
- import org.apache.pig.newplan.logical.optimizer.SchemaPatcher;
- import org.apache.pig.newplan.logical.relational.LOCross;
- import org.apache.pig.newplan.logical.relational.LOForEach;
- import org.apache.pig.newplan.logical.relational.LOJoin;
- import org.apache.pig.newplan.logical.relational.LOLimit;
- import org.apache.pig.newplan.logical.relational.LOLoad;
- import org.apache.pig.newplan.logical.relational.LOSort;
- import org.apache.pig.newplan.logical.relational.LogicalPlan;
- import org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune;
- import org.apache.pig.newplan.logical.rules.LoadTypeCastInserter;
- import org.apache.pig.newplan.logical.rules.OptimizerUtils;
- import org.apache.pig.newplan.logical.rules.PushDownForEachFlatten;
- import org.junit.Assert;
- import org.junit.Test;
- import org.junit.Before;
- /**
- * Test the logical optimizer.
- */
- public class TestNewPlanPushDownForeachFlatten {
- PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
-
- @Before
- public void tearDown() {
- }
- /**
- *
- * A simple filter UDF for testing
- *
- */
- static public class MyFilterFunc extends FilterFunc {
-
- @Override
- public Boolean exec(Tuple input) {
- return false;
- }
- }
-
- /**
- * Old plan is empty, so is the optimized new plan.
- */
- @Test
- public void testErrorEmptyInput() throws Exception {
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( "" );
-
- Assert.assertTrue( newLogicalPlan.getOperators().hasNext() == false );
- }
- /**
- * No foreach in the plan, no effect.
- */
- @Test
- public void testErrorNonForeachInput() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "store A into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- List<Operator> nexts = newLogicalPlan.getSuccessors( load );
- Assert.assertTrue( nexts != null && nexts.size() == 1 );
- }
-
- @Test
- public void testForeachNoFlatten() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate $0, $1, $2;" +
- "C = order B by $0, $1;" +
- "D = store C into 'dummy';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( sort instanceof LOSort );
- }
-
- @Test
- public void testForeachNoSuccessors() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate flatten($1);" +
- "Store B into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- }
-
- @Test
- public void testForeachStreaming() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate flatten($1);" +
- "C = stream B through `" + "pc -l" + "`;" +
- "Store C into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- }
-
- @Test
- public void testForeachDistinct() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate flatten($1);" +
- "C = distinct B;" +
- "store C into 'output';";
-
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- }
-
- @Test
- public void testForeachForeach() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate $0, $1, flatten(1);" +
- "C = foreach B generate $0;" +
- "store C into 'output';";
-
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
- }
-
- @Test
- public void testForeachFilter() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = filter B by $1 < 18;" +
- "store C into 'output';";
-
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
- }
- @Test
- public void testForeachSplitOutput() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "split B into C if $1 < 18, D if $1 >= 18;" +
- "store C into 'output1';" +
- "store D into 'output2';";
-
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
- }
- @Test
- public void testForeachLimit() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = limit B 10;" +
- "store C into 'output';";
-
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
- }
- @Test
- public void testForeachUnion() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = load 'anotherfile' as (name, age, preference);" +
- "D = union B, C;" +
- "store D into 'output';";
-
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator load = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- load = loads.get( 0 );
- else
- load = loads.get( 1 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
- }
-
- @Test
- public void testForeachCogroup() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = load 'anotherfile' as (name, age, preference);" +
- "D = cogroup B by $0, C by $0;" +
- "store D into 'output';";
-
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator load = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- load = loads.get( 0 );
- else
- load = loads.get( 1 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
- }
-
- @Test
- public void testForeachGroupBy() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = group B by $0;" +
- "store C into 'output';";
-
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)foreach ) );
- }
-
- @Test
- public void testForeachSort() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = order B by $0, $1;" +
- "D = store C into 'dummy';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( sort instanceof LOSort );
- foreach = newLogicalPlan.getSuccessors( sort ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- }
-
- /**
- * Non-pure-projection, not optimizable.
- */
- @Test
- public void testForeachSortNegative1() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate $0 + 5, $1, flatten($2);" +
- "C = order B by $0, $1;" +
- "D = store C into 'dummy';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( sort instanceof LOSort );
- }
-
-
- /**
- * If the flattened field is referenced in the sort condition, then no optimization can be done.
- */
- @Test
- public void testForeachSortNegative2() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:tuple(x,y));" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = order B by $0, $3;" +
- "D = store C into 'dummy';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Operator foreach1 = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach1 instanceof LOForEach );
- Operator sort = newLogicalPlan.getSuccessors( foreach1 ).get( 0 );
- Assert.assertTrue( sort instanceof LOSort );
- }
- @Test
- public void testForeachFlattenAddedColumnSort() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate $0, $1, flatten(1);" +
- "C = order B by $0, $1;" +
- "store C into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( sort instanceof LOSort );
- }
-
- @Test
- public void testForeachUDFSort() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate $0, $1, " + Identity.class.getName() + "($2) ;" +
- "C = order B by $0, $1;" +
- "store C into 'output';";
-
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( sort instanceof LOSort );
- }
-
- @Test
- public void testForeachCastSort() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa);" +
- "B = foreach A generate (chararray)$0, $1, flatten($2);" +
- "C = order B by $0, $1;" +
- "store C into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- foreach = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Operator sort = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( sort instanceof LOSort );
- }
-
- @Test
- public void testForeachCross() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = load 'anotherfile' as (name, age, preference);" +
- "D = cross B, C;" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOCross );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
- @Test
- public void testForeachCross1() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "C = foreach B generate $0, $1, flatten($2);" +
- "D = cross A, C;" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "B" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOCross );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
- // TODO
- // The following test case testForeachCross2 has multiple foreach flatten
- // A new rule should optimize this case
- @Test
- public void testForeachCross2() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = foreach C generate $0, $1, flatten($2);" +
- "E = cross B, D;" +
- "F = limit E 10;" +
- "store F into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- // No optimization about foreach flatten.
- Operator store = newLogicalPlan.getSinks().get( 0 );
- Operator limit = newLogicalPlan.getPredecessors(store).get(0);
- Operator cross = newLogicalPlan.getPredecessors(limit).get(0);
- Assert.assertTrue( cross instanceof LOCross );
- }
-
- /**
- * This actually is a valid case, even though the optimization may not provide any performance benefit. However, detecting
- * such a case requires more coding. Thus, we allow optimization to go thru in this case.
- */
- @Test
- public void testForeachFlattenAddedColumnCross() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, $1, flatten(1);" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = cross B, C;" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOCross );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
- /**
- * This is a valid, positive test case. Optimization should go thru.
- */
- @Test
- public void testForeachUDFCross() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, flatten($1), " + Identity.class.getName() + "($2) ;" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = cross B, C;" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOCross );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
-
- /**
- * Cast should NOT matter to cross. This is a valid positive test case.
- */
- @Test
- public void testForeachCastCross() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, (int)$1, flatten( $2 );" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = cross B, C;" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOCross );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
-
- @Test
- public void testForeachFRJoin() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = load 'anotherfile' as (name, age, preference);" +
- "D = join B by $0, C by $0 using 'replicated';" +
- "E = limit D 10;" +
- "store E into 'output';";
-
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOJoin );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
- @Test
- public void testForeachFRJoin1() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "C = foreach B generate $0, $1, flatten($2);" +
- "D = join A by $0, C by $0 using 'replicated';" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "B" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOJoin );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
- // TODO
- // The following test case testForeachFRJoin2 has multiple foreach flatten
- // A new rule should optimize this case
- @Test
- public void testForeachFRJoin2() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = foreach C generate $0, $1, flatten($2);" +
- "E = join B by $0, D by $0 using 'replicated';" +
- "F = limit E 10;" +
- "store F into 'output';";
-
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
- // No optimization about foreach flatten.
- Operator store = newLogicalPlan.getSinks().get( 0 );
- Operator limit = newLogicalPlan.getPredecessors( store ).get( 0 );
- Operator join = newLogicalPlan.getPredecessors( limit ).get( 0 );
- Assert.assertTrue( join instanceof LOJoin );
- }
-
- /**
- * Valid positive test case, even though the benefit from the optimization is questionable. However, putting in additinal check for
- * this condition requires extra coding.
- */
- @Test
- public void testForeachFlattenAddedColumnFRJoin() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, $1, flatten(1);" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = join B by $0, C by $0 using 'replicated';" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOJoin );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
- /**
- * This is actually a valid, positive test case. UDF doesn't prevent optimization.
- * @throws Exception
- */
- @Test
- public void testForeachUDFFRJoin() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, flatten($1), " + Identity.class.getName() + "($2) ;" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = join B by $0, C by $0 using 'replicated';" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOJoin );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
- /**
- * This is actually a valid, positive test case. Cast doesn't prevent optimization.
- * @throws Exception
- */
- @Test
- public void testForeachCastFRJoin() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, (int)$1, flatten($2);" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = join B by $0, C by $0 using 'replicated';" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOJoin );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
- @Test
- public void testForeachInnerJoin() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = join B by $0, C by $0;" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOJoin );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
-
- @Test
- public void testForeachInnerJoin1() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "C = foreach B generate $0, $1, flatten($2);" +
- "D = join A by $0, C by $0;" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "B" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOJoin );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
- // TODO
- // The following test case testForeachInnerJoin2 has multiple foreach flatten
- // A new rule should optimize this case
- @Test
- public void testForeachInnerJoin2() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, $1, flatten($2);" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = foreach C generate $0, $1, flatten($2);" +
- "E = join B by $0, D by $0;" +
- "F = limit E 10;" +
- "store F into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
- // No optimization about foreach flatten.
- Operator store = newLogicalPlan.getSinks().get( 0 );
- Operator limit = newLogicalPlan.getPredecessors( store ).get( 0 );
- Operator join = newLogicalPlan.getPredecessors( limit ).get( 0 );
- Assert.assertTrue( join instanceof LOJoin );
- }
-
- /**
- * This is actually a valid positive test case, even though the benefit of such optimization is questionable. However,
- * checking for such condition requires additional coding effort.
- */
- @Test
- public void testForeachFlattenAddedColumnInnerJoin() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, $1, flatten(1);" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = join B by $0, C by $0;" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOJoin );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
- /**
- * UDF doesn't prevent optimization.
- */
- @Test
- public void testForeachUDFInnerJoin() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, flatten($1), " + Identity.class.getName() + "($2) ;" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = join B by $0, C by $0;" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOJoin );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
- /**
- * Cast doesn't prevent optimization.
- */
- @Test
- public void testForeachCastInnerJoin() throws Exception {
- String query = "A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));" +
- "B = foreach A generate $0, (int)$1, flatten($2);" +
- "C = load 'anotherfile' as (name, age, preference:(course_name, instructor));" +
- "D = join B by $0, C by $0;" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- List<Operator> loads = newLogicalPlan.getSources();
- Assert.assertTrue( loads.size() == 2 );
- Assert.assertTrue( loads.get( 0 ) instanceof LOLoad );
- Assert.assertTrue( loads.get( 1 ) instanceof LOLoad );
- Operator op = null;
- if( ((LOLoad)loads.get( 0 )).getAlias().equals( "A" ) )
- op = loads.get( 0 );
- else
- op = loads.get( 1 );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( !OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOJoin );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOForEach );
- Assert.assertTrue( OptimizerUtils.hasFlatten( (LOForEach)op ) );
- op = newLogicalPlan.getSuccessors( op ).get( 0 );
- Assert.assertTrue( op instanceof LOLimit );
- }
- // See PIG-1172
- @Test
- public void testForeachJoinRequiredField() throws Exception {
- String query = "A = load 'myfile' as (bg:bag{t:tuple(a0,a1)});" +
- "B = FOREACH A generate flatten($0);" +
- "C = load '3.txt' AS (c0, c1);" +
- "D = JOIN B by a1, C by c1;" +
- "E = limit D 10;" +
- "store E into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
- // No optimization about foreach flatten.
- Operator store = newLogicalPlan.getSinks().get( 0 );
- Operator limit = newLogicalPlan.getPredecessors( store ).get( 0 );
- Operator join = newLogicalPlan.getPredecessors( limit ).get( 0 );
- Assert.assertTrue( join instanceof LOJoin );
- }
-
- // See PIG-1374
- @Test
- public void testForeachRequiredField() throws Exception {
- String query = "A = load 'myfile' as (b:bag{t:tuple(a0:chararray,a1:int)});" +
- "B = foreach A generate flatten($0);" +
- "C = order B by $1 desc;" +
- "store C into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator load = newLogicalPlan.getSources().get( 0 );
- Assert.assertTrue( load instanceof LOLoad );
- Operator foreach = newLogicalPlan.getSuccessors( load ).get( 0 );
- Assert.assertTrue( foreach instanceof LOForEach );
- Operator foreach1 = newLogicalPlan.getSuccessors( foreach ).get( 0 );
- Assert.assertTrue( foreach1 instanceof LOForEach );
- Operator sort = newLogicalPlan.getSuccessors( foreach1 ).get( 0 );
- Assert.assertTrue( sort instanceof LOSort );
- }
-
- // See PIG-1706
- @Test
- public void testForeachWithUserDefinedSchema() throws Exception {
- String query = "a = load '1.txt' as (a0:int, a1, a2:bag{t:(i1:int, i2:int)});" +
- "b = load '2.txt' as (b0:int, b1);" +
- "c = foreach a generate a0, flatten(a2) as (q1, q2);" +
- "d = join c by a0, b by b0;" +
- "store d into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator store = newLogicalPlan.getSinks().get( 0 );
- LOForEach foreach = (LOForEach)newLogicalPlan.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().getField(1).alias.equals("q1"));
- Assert.assertTrue(foreach.getSchema().getField(2).alias.equals("q2"));
- }
-
- // See PIG-1751
- @Test
- public void testForeachWithUserDefinedSchema2() throws Exception {
- String query = "a = load '1.txt' as (a0:chararray);" +
- "b = load '2.txt' as (b0:chararray);" +
- "c = foreach b generate flatten(STRSPLIT(b0)) as c0;" +
- "d = join c by (chararray)c0, a by a0;" +
- "store d into 'output';";
- LogicalPlan newLogicalPlan = migrateAndOptimizePlan( query );
-
- Operator store = newLogicalPlan.getSinks().get( 0 );
- Operator op = newLogicalPlan.getPredecessors(store).get(0);
- Assert.assertTrue(op instanceof LOJoin);
- }
- // See PIG-2721
- @Test
- public void testForeachSortWithUserDefinedSchema() throws Exception {
- String query =
- "a = load '1.tx…
Large files files are truncated, but you can click here to view the full file