/test/org/apache/pig/test/TestNewPlanColumnPrune.java
Java | 416 lines | 308 code | 75 blank | 33 comment | 3 complexity | 5ca0a1fdd257b9003b875492a5fbe801 MD5 | raw file
Possible License(s): Apache-2.0, CPL-1.0
- /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License" + you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.pig.test;
- import java.util.ArrayList;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Map;
- import java.util.Properties;
- import java.util.Set;
- import junit.framework.TestCase;
- import org.apache.pig.ExecType;
- import org.apache.pig.PigServer;
- import org.apache.pig.impl.PigContext;
- import org.apache.pig.newplan.Operator;
- import org.apache.pig.newplan.OperatorPlan;
- import org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer;
- import org.apache.pig.newplan.logical.relational.LOLoad;
- import org.apache.pig.newplan.logical.relational.LogicalPlan;
- import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator;
- import org.apache.pig.newplan.logical.rules.AddForEach;
- import org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune;
- import org.apache.pig.newplan.logical.rules.MapKeysPruneHelper;
- import org.apache.pig.newplan.optimizer.PlanOptimizer;
- import org.apache.pig.newplan.optimizer.Rule;
- public class TestNewPlanColumnPrune extends TestCase {
- LogicalPlan plan = null;
- PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
-
- private LogicalPlan buildPlan(String query) throws Exception{
- PigServer pigServer = new PigServer( pc );
- return Util.buildLp(pigServer, query);
- }
-
-
- public void testNoPrune() throws Exception {
- // no foreach
- String query = "a = load 'd.txt' as (id, v1, v2);" +
- "b = filter a by v1==NULL;" +
- "store b into 'empty';";
- LogicalPlan newLogicalPlan = buildPlan(query);
-
- PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a = load 'd.txt' as (id, v1, v2);" +
- "b = filter a by v1==NULL;" +
- "store b into 'empty';";
- LogicalPlan expected = buildPlan(query);
-
- assertTrue(expected.isEqual(newLogicalPlan));
-
- // no schema
- query = "a = load 'd.txt';" +
- "b = foreach a generate $0, $1;" +
- "store b into 'empty';";
- newLogicalPlan = buildPlan(query);
-
- optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a = load 'd.txt';"+
- "b = foreach a generate $0, $1;"+
- "store b into 'empty';";
- expected = buildPlan(query);
- assertTrue(expected.isEqual(newLogicalPlan));
- }
-
- public void testPrune() throws Exception {
- // only foreach
- String query = "a = load 'd.txt' as (id, v1, v2);" +
- "b = foreach a generate id;"+
- "store b into 'empty';";
- LogicalPlan newLogicalPlan = buildPlan(query);
-
- PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a = load 'd.txt' as (id);" +
- "b = foreach a generate id;"+
- "store b into 'empty';";
- LogicalPlan expected = buildPlan(query);
-
- assertTrue(expected.isEqual(newLogicalPlan));
-
- // with filter
- query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);"+
- "b = filter a by v1 != NULL AND (v2+v3)<100;"+
- "c = foreach b generate id;"+
- "store c into 'empty';";
- newLogicalPlan = buildPlan(query);
-
- optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a = load 'd.txt' as (id, v1, v3, v2);" +
- "b = filter a by v1 != NULL AND (v2+v3)<100;" +
- "c = foreach b generate id;" +
- "store c into 'empty';";
- expected = buildPlan(query);
- assertTrue(expected.isEqual(newLogicalPlan));
-
- // with 2 foreach
- query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
- "b = foreach a generate v2, v5, v4;" +
- "c = foreach b generate v5, v4;" +
- "store c into 'empty';";
- newLogicalPlan = buildPlan(query);
-
- optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a = load 'd.txt' as (v5, v4);" +
- "b = foreach a generate v5, v4;" +
- "c = foreach b generate v5, v4;" +
- "store c into 'empty';";
- expected = buildPlan(query);
- assertTrue(expected.isEqual(newLogicalPlan));
-
- // with 2 foreach
- query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
- "b = foreach a generate id, v1, v5, v3, v4;" +
- "c = foreach b generate v5, v4;" +
- "store c into 'empty';";
- newLogicalPlan = buildPlan(query);
-
- optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a = load 'd.txt' as (v5, v4);" +
- "b = foreach a generate v5, v4;" +
- "c = foreach b generate v5, v4;" +
- "store c into 'empty';";
- expected = buildPlan(query);
- assertTrue(expected.isEqual(newLogicalPlan));
-
- // with 2 foreach and filter in between
- query = "a =load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
- "b = foreach a generate v2, v5, v4;" +
- "c = filter b by v2 != NULL;" +
- "d = foreach c generate v5, v4;" +
- "store d into 'empty';";
- newLogicalPlan = buildPlan(query);
-
- optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a =load 'd.txt' as (v5, v4, v2);" +
- "b = foreach a generate v2, v5, v4;" +
- "c = filter b by v2 != NULL;" +
- "d = foreach c generate v5, v4;" +
- "store d into 'empty';";
- expected = buildPlan(query);
- assertTrue(expected.isEqual(newLogicalPlan));
-
- // with 2 foreach after join
- query = "a =load 'd.txt' as (id, v1, v2, v3);" +
- "b = load 'c.txt' as (id, v4, v5, v6);" +
- "c = join a by id, b by id;" +
- "d = foreach c generate a::id, v5, v3, v4;" +
- "store d into 'empty';";
- newLogicalPlan = buildPlan(query);
-
- optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a =load 'd.txt' as (id, v3);" +
- "b = load 'c.txt' as (id, v4, v5);" +
- "c = join a by id, b by id;" +
- "d = foreach c generate a::id, v5, v3, v4;" +
- "store d into 'empty';";
- expected = buildPlan(query);
- assertTrue(expected.isEqual(newLogicalPlan));
-
- // with BinStorage, insert foreach after load
- query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
- "c = filter a by v2 != NULL;" +
- "d = foreach c generate v5, v4;" +
- "store d into 'empty';";
- newLogicalPlan = buildPlan(query);
-
- optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
- "b = foreach a generate v5, v4, v2;" +
- "c = filter b by v2 != NULL;" +
- "d = foreach c generate v5, v4;" +
- "store d into 'empty';";
- expected = buildPlan(query);
- assertTrue(expected.isEqual(newLogicalPlan));
-
- // with BinStorage, not to insert foreach after load if there is already one
- query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
- "b = foreach a generate v5, v4, v2;" +
- "c = filter b by v2 != NULL;" +
- "d = foreach c generate v5;" +
- "store d into 'empty';";
- newLogicalPlan = buildPlan(query);
-
- optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
- "b = foreach a generate v5, v2;" +
- "c = filter b by v2 != NULL;" +
- "d = foreach c generate v5;" +
- "store d into 'empty';";
- expected = buildPlan(query);
- assertTrue(expected.isEqual(newLogicalPlan));
-
- // with BinStorage, not to insert foreach after load if there is already one
- query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
- "b = foreach a generate v5, v4, v2, 10;" +
- "c = filter b by v2 != NULL;" +
- "d = foreach c generate v5;" +
- "store d into 'empty';";
- newLogicalPlan = buildPlan(query);
-
- optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
- "b = foreach a generate v5, v2, 10;" +
- "c = filter b by v2 != NULL;" +
- "d = foreach c generate v5;" +
- "store d into 'empty';";
- expected = buildPlan(query);
- assertTrue(expected.isEqual(newLogicalPlan));
- }
-
- @SuppressWarnings("unchecked")
- public void testPruneWithMapKey() throws Exception {
- // only foreach
- String query = "a =load 'd.txt' as (id, v1, m:map[]);" +
- "b = foreach a generate id, m#'path';" +
- "store b into 'empty';";
- LogicalPlan newLogicalPlan = buildPlan(query);
-
- PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a =load 'd.txt' as (id, m:map[]);" +
- "b = foreach a generate id, m#'path';" +
- "store b into 'empty';";
- LogicalPlan expected = buildPlan(query);
-
- assertTrue(expected.isEqual(newLogicalPlan));
-
- LOLoad op = (LOLoad)newLogicalPlan.getSources().get(0);
- Map<Integer,Set<String>> annotation =
- (Map<Integer, Set<String>>) op.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
- assertEquals(annotation.size(), 1);
- Set<String> s = new HashSet<String>();
- s.add("path");
- assertEquals(annotation.get(2), s);
-
- // foreach with join
- query = "a =load 'd.txt' as (id, v1, m:map[]);" +
- "b = load 'd.txt' as (id, v1, m:map[]);" +
- "c = join a by id, b by id;" +
- "d = filter c by a::m#'path' != NULL;" +
- "e = foreach d generate a::id, b::id, b::m#'path', a::m;" +
- "store e into 'empty';";
- newLogicalPlan = buildPlan(query);
-
- optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a =load 'd.txt' as (id, m:map[]);" +
- "b = load 'd.txt' as (id, m:map[]);" +
- "c = join a by id, b by id;" +
- "d = filter c by a::m#'path' != NULL;" +
- "e = foreach d generate a::id, b::id, b::m#'path', a::m;" +
- "store e into 'empty';";
- expected = buildPlan(query);
-
- assertTrue(expected.isEqual(newLogicalPlan));
-
- List<Operator> ll = newLogicalPlan.getSources();
- assertEquals(ll.size(), 2);
- LOLoad loada = null;
- LOLoad loadb = null;
- for(Operator opp: ll) {
- if (((LogicalRelationalOperator)opp).getAlias().equals("a")) {
- loada = (LOLoad)opp;
- continue;
- }
-
- if (((LogicalRelationalOperator)opp).getAlias().equals("b")) {
- loadb = (LOLoad)opp;
- continue;
- }
- }
-
- annotation =
- (Map<Integer, Set<String>>) loada.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
- assertNull(annotation);
-
- annotation =
- (Map<Integer, Set<String>>) loadb.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
- assertEquals(annotation.size(), 1);
-
- s = new HashSet<String>();
- s.add("path");
- assertEquals(annotation.get(2), s);
- }
-
- public void testPruneWithBag() throws Exception {
- // filter above foreach
- String query = "a =load 'd.txt' as (id, v:bag{t:(s1,s2,s3)});" +
- "b = filter a by id>10;" +
- "c = foreach b generate id, FLATTEN(v);" +
- "d = foreach c generate id, v::s2;" +
- "store d into 'empty';";
- LogicalPlan newLogicalPlan = buildPlan(query);
-
- PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a =load 'd.txt' as (id, v:bag{t:(s1,s2,s3)});" +
- "b = filter a by id>10;" +
- "c = foreach b generate id, FLATTEN(v);" +
- "d = foreach c generate id, v::s2;" +
- "store d into 'empty';";
- LogicalPlan expected = buildPlan(query);
-
- assertTrue(expected.isEqual(newLogicalPlan));
- }
-
- public void testAddForeach() throws Exception {
- // filter above foreach
- String query = "a =load 'd.txt' as (id, v1, v2);" +
- "b = filter a by v1>10;" +
- "c = foreach b generate id;" +
- "store c into 'empty';";
- LogicalPlan newLogicalPlan = buildPlan(query);
-
- PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a =load 'd.txt' as (id, v1);" +
- "b = filter a by v1>10;" +
- "c = foreach b generate id;" +
- "store c into 'empty';";
- LogicalPlan expected = buildPlan(query);
-
- assertTrue(expected.isEqual(newLogicalPlan));
-
- // join with foreach
- query = "a =load 'd.txt' as (id, v1, v2);" +
- "b = load 'd.txt' as (id, v1, v2);" +
- "c = join a by id, b by id;" +
- "d = filter c by a::v1>b::v1;" +
- "e = foreach d generate a::id;" +
- "store e into 'empty';";
- newLogicalPlan = buildPlan(query);
-
- optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
- optimizer.optimize();
-
- query = "a =load 'd.txt' as (id, v1);" +
- "b = load 'd.txt' as (id, v1);" +
- "c = join a by id, b by id;" +
- "d = foreach c generate a::id, a::v1, b::v1;" +
- "e = filter d by a::v1>b::v1;" +
- "f = foreach e generate a::id;" +
- "store f into 'empty';";
- expected = buildPlan(query);
-
- assertTrue(expected.isEqual(newLogicalPlan));
- }
-
- public class MyPlanOptimizer extends LogicalPlanOptimizer {
- protected MyPlanOptimizer(OperatorPlan p, int iterations) {
- super(p, iterations, null);
- }
-
- protected List<Set<Rule>> buildRuleSets() {
- List<Set<Rule>> ls = new ArrayList<Set<Rule>>();
-
- Rule r = new ColumnMapKeyPrune("ColumnMapKeyPrune");
- Set<Rule> s = new HashSet<Rule>();
- s.add(r);
- ls.add(s);
-
- r = new AddForEach("AddForEach");
- s = new HashSet<Rule>();
- s.add(r);
- ls.add(s);
-
- return ls;
- }
- }
- }