/test/org/apache/pig/test/TestLogicalPlanBuilder.java
Java | 2151 lines | 1657 code | 221 blank | 273 comment | 19 complexity | 982b2ab5858dbef794f71fc44491c377 MD5 | raw file
Possible License(s): Apache-2.0, CPL-1.0
Large files files are truncated, but you can click here to view the full file
- /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.pig.test;
- import static org.junit.Assert.assertEquals;
- import java.io.IOException;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- import java.util.Properties;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.mapreduce.InputFormat;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.RecordReader;
- import org.apache.pig.ExecType;
- import org.apache.pig.FuncSpec;
- import org.apache.pig.LoadCaster;
- import org.apache.pig.LoadFunc;
- import org.apache.pig.PigException;
- import org.apache.pig.PigServer;
- import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
- import org.apache.pig.builtin.PigStorage;
- import org.apache.pig.data.BagFactory;
- import org.apache.pig.data.DataBag;
- import org.apache.pig.data.DataType;
- import org.apache.pig.data.Tuple;
- import org.apache.pig.data.TupleFactory;
- import org.apache.pig.impl.PigContext;
- import org.apache.pig.impl.builtin.GFAny;
- import org.apache.pig.impl.logicalLayer.schema.Schema;
- import org.apache.pig.impl.util.LogUtils;
- import org.apache.pig.impl.util.Utils;
- import org.apache.pig.newplan.Operator;
- import org.apache.pig.newplan.logical.expression.ConstantExpression;
- import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
- import org.apache.pig.newplan.logical.expression.ProjectExpression;
- import org.apache.pig.newplan.logical.relational.LOCogroup;
- import org.apache.pig.newplan.logical.relational.LOForEach;
- import org.apache.pig.newplan.logical.relational.LOGenerate;
- import org.apache.pig.newplan.logical.relational.LOLoad;
- import org.apache.pig.newplan.logical.relational.LOSort;
- import org.apache.pig.newplan.logical.relational.LogicalPlan;
- import org.apache.pig.newplan.logical.relational.LogicalSchema;
- import org.apache.pig.test.utils.Identity;
- import org.junit.Before;
- import org.junit.Test;
- import junit.framework.Assert;
- import junit.framework.AssertionFailedError;
- public class TestLogicalPlanBuilder {
- PigContext pigContext = new PigContext(ExecType.LOCAL, new Properties());
- private PigServer pigServer = null;
- @Before
- public void setUp() throws Exception {
- pigServer = new PigServer( pigContext );
- pigContext.connect();
- }
- @Test
- public void testQuery1() throws Exception {
- String query = "foreach (load 'a') generate $1,$2;";
- buildPlan(query);
- }
- @Test
- public void testQuery2() throws Exception {
- String query = "foreach (load 'a' using " + PigStorage.class.getName() + "(':')) generate $1, 'aoeuaoeu' ;";
- buildPlan(query);
- }
- // TODO FIX Query3 and Query4
- @Test
- public void testQuery3() throws Exception {
- String query = "foreach (cogroup (load 'a' as (u:int)) by $0, (load 'b' as (v:int) ) by $0) generate org.apache.pig.builtin.AVG($1);";
- buildPlan(query);
- }
- @Test
- public void testQuery4() throws Exception {
- String query = "foreach (load 'a' as (u:int, v:bag{T:tuple(t:double)})) generate AVG($1) ;";
- buildPlan(query);
- }
- @Test
- public void testQuery5() throws Exception {
- String query = "foreach (group (load 'a') ALL) generate $1 ;";
- buildPlan(query);
- }
- @Test
- public void testQuery6() throws Exception {
- String query = "foreach (group (load 'a') by $1) generate group, '1' ;";
- buildPlan(query);
- }
- @Test
- public void testQuery7() throws Exception {
- String query = "foreach (load 'a' using " + PigStorage.class.getName() + "()) generate $1 ;";
- buildPlan(query);
- }
- @Test
- public void testQuery10() throws Exception {
- String query = "foreach (cogroup (load 'a') by ($1), (load 'b') by ($1)) generate $1.$1, $2.$1 ;";
- buildPlan(query);
- }
- // TODO FIX Query11 and Query12
- @Test
- public void testQuery11() throws Exception {
- String query = " foreach (group (load 'a' as (u:int)) by $0, (load 'b' as (v:long)) by $0) generate group, AVG($1) ;";
- buildPlan(query);
- }
- @Test
- public void testQuery12() throws Exception {
- String query = "foreach (load 'a' using " + PigStorage.class.getName() + "() as (v: long, u:bag{T:tuple(t:double)} ) ) generate AVG($1) ;";
- buildPlan(query);
- }
- @Test
- public void testQuery13() throws Exception {
- String query = "foreach (cogroup (load 'a') ALL) generate group ;";
- buildPlan(query);
- }
- @Test
- public void testQuery14() throws Exception {
- String query = "foreach (group (load 'a') by ($6, $7)) generate flatten(group) ;";
- buildPlan(query);
- }
- @Test
- public void testQuery15() throws Exception {
- String query = " foreach (load 'a') generate $1, 'hello', $3 ;";
- buildPlan(query);
- }
- @Test
- public void testQuery100() throws Exception {
- // test define syntax
- String query = "define FUNC ARITY();";
- buildPlan(query);
- }
- @Test
- public void testQueryFail1() throws Exception {
- String query = " foreach (group (A = load 'a') by $1) generate A.'1' ;";
- try {
- buildPlan(query);
- } catch (AssertionFailedError e) {
- return;
- }
- Assert.fail("Test case should fail" );
- }
- @Test
- public void testQueryFail2() throws Exception {
- String query = "foreach group (load 'a') by $1 generate $1.* ;";
- try {
- buildPlan(query);
- } catch (AssertionFailedError e) {
- return;
- }
- Assert.fail("Test case should fail" );
- }
- @Test
- public void testQueryFail3() throws Exception {
- String query = "A = generate DISTINCT foreach (load 'a');";
- try {
- LogicalPlan lp = buildPlan(query);
- System.out.println( lp.toString() );
- } catch (AssertionFailedError e) {
- return;
- }
- Assert.fail("Test case should fail" );
- }
- @Test
- public void testQueryFail4() throws Exception {
- String query = "A = generate [ORDER BY $0][$3, $4] foreach (load 'a');";
- try {
- buildPlan(query);
- } catch (AssertionFailedError e) {
- return;
- }
- Assert.fail("Test case should fail" );
- }
- @Test
- public void testQueryFail5() throws Exception {
- String query = "A = generate " + TestApplyFunc.class.getName() + "($2.*) foreach (load 'a');";
- try {
- buildPlan(query);
- } catch (AssertionFailedError e) {
- return;
- }
- Assert.fail("Test case should fail" );
- }
- /**
- * User generate functions must be in default package Bug 831620 - fixed
- */
- // TODO FIX Query17
- @Test
- public void testQuery17() throws Exception {
- String query = "foreach (load 'A')" + "generate " + TestApplyFunc.class.getName() + "($1);";
- buildPlan(query);
- }
- static public class TestApplyFunc extends org.apache.pig.EvalFunc<Tuple> {
- @Override
- public Tuple exec(Tuple input) throws IOException {
- Tuple output = TupleFactory.getInstance().newTuple(input.getAll());
- return output;
- }
- }
- /**
- * Validate that parallel is parsed correctly Bug 831714 - fixed
- */
- @Test //PIG-1996
- public void testQuery18() throws Exception {
- String query = "store (FOREACH (group (load 'a') ALL PARALLEL 16) generate group ) into 'y';";
- LogicalPlan lp = buildPlan(query);
- Operator root = lp.getSources().get(0);
- List<Operator> listOp = lp.getSuccessors(root);
- Operator lo = listOp.get(0);
- if (lo instanceof LOCogroup) {
- Assert.assertEquals( 1, ((LOCogroup) lo).getRequestedParallelism() );//Local mode, paraallel = 1
- } else {
- Assert.fail("Error: Unexpected Parse Tree output");
- }
- }
- @Test
- public void testQuery19() throws Exception {
- String query = "a = load 'a';" +
- "b = filter a by $1 == '3';";
- buildPlan( query );
- }
- @Test
- public void testQuery20() throws Exception {
- String query = "foreach (load 'a') generate ($1 == '3'? $2 : $3) ;";
- buildPlan(query);
- }
- @Test
- public void testQuery21() throws Exception {
- String query = "A = load 'a';" +
- "B = load 'b';" +
- "foreach (cogroup A by ($1), B by ($1)) generate A, flatten(B.($1, $2, $3));";
- buildPlan( query );
- }
- @Test
- public void testQuery22() throws Exception {
- String query = "A = load 'a';" +
- "B = load 'b';" +
- "C = cogroup A by ($1), B by ($1);" +
- "foreach C { " +
- "B = order B by $0; " +
- "generate FLATTEN(A), B.($1, $2, $3) ;" + "};" ;
- buildPlan(query);
- }
- @Test
- public void testQuery22Fail() throws Exception {
- String query = "A = load 'a' as (a:int, b: double);" +
- "B = group A by (*, $0);";
- try {
- buildPlan(query);
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Grouping attributes can either be star (*"));
- }
- }
- @Test
- public void testQuery23() throws Exception {
- String query = "A = load 'a';" +
- "B = load 'b';" +
- "C = cogroup A by ($1), B by ($1);" +
- "foreach C { " +
- "A = Distinct A; " +
- "B = FILTER A BY $1 < 'z'; " +
- //TODO
- //A sequence of filters within a foreach translates to
- //a split statement. Currently it breaks as adding an
- //additional output to the filter fails as filter supports
- //single output
- "C = FILTER A BY $2 == $3;" +
- "B = ORDER B BY $1;" +
- "GENERATE A, FLATTEN(B.$0);" +
- "};";
- buildPlan(query);
- }
- @Test
- public void testQuery23Fail() throws Exception {
- String query = "A = load 'a' as (a: int, b:double);" +
- "B = load 'b';" +
- "C = cogroup A by (*, $0), B by ($0, $1);";
- boolean exceptionThrown = false;
- try {
- buildPlan(query);
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("The arity of cogroup/group by columns " +
- "do not match"));
- exceptionThrown = true;
- }
- Assert.assertTrue(exceptionThrown);
- }
- @Test
- public void testQuery23Fail2() throws Exception {
- String query = "A = load 'a';" +
- "B = load 'b';" +
- "C = cogroup A by (*, $0), B by ($0, $1);";
- boolean exceptionThrown = false;
- try {
- buildPlan(query);
- } catch (AssertionFailedError e) {
- exceptionThrown = true;
- }
- Assert.assertTrue(exceptionThrown);
- }
- @Test
- public void testQuery23Fail3() throws Exception {
- String query = "A = load 'a' as (a: int, b:double);" +
- "B = load 'b' as (a:int);" +
- "C = cogroup A by *, B by *;";
- boolean exceptionThrown = false;
- try {
- buildPlan(query);
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("The arity of cogroup/group by columns " +
- "do not match"));
- exceptionThrown = true;
- }
- Assert.assertTrue(exceptionThrown);
- }
- @Test
- public void testQuery24() throws Exception {
- String query = "a = load 'a';" + "foreach a generate (($0 == $1) ? 'a' : $2), $4 ;";
- buildPlan(query);
- }
- @Test
- public void testQuery25() throws Exception {
- String query = "foreach (load 'a' as (u:bag{}, v, w) ) {" +
- "B = FILTER $0 BY (($1 == $2) AND ('a' < 'b'));" +
- "generate B;" +
- "};";
- buildPlan(query);
- }
- @Test
- public void testQuery26() throws Exception {
- String query = "foreach (load 'a') generate ((NOT (($1 == $2) OR ('a' < 'b'))) ? 'a' : $2), 'x' ;";
- buildPlan(query);
- }
- // TODO FIX Query27 and Query28
- @Test
- public void testQuery27() throws Exception {
- String query = "foreach (load 'a' as (u, v:bag{}, w, x:bag{}, y) ){" +
- "A = DISTINCT $3.$1;" +
- " generate " + TestApplyFunc.class.getName() + "($2, $1.($1, $4));" +
- "};";
- buildPlan(query);
- }
- @Test
- public void testQuery28() throws Exception {
- String query = "foreach (load 'a') generate " + TestApplyFunc.class.getName() + "($2, " + TestApplyFunc.class.getName() + "($2.$3));";
- buildPlan(query);
- }
- @Test
- public void testQuery29() throws Exception {
- String query = "load 'myfile' using " + TestStorageFunc.class.getName() + "() as (col1);";
- buildPlan(query);
- }
- @Test
- public void testQuery30() throws Exception {
- String query = "load 'myfile' using " + TestStorageFunc.class.getName() + "() as (col1, col2);";
- buildPlan(query);
- }
- public static class TestStorageFunc extends LoadFunc{
- public Tuple getNext() throws IOException {
- return null;
- }
- @Override
- public InputFormat getInputFormat() throws IOException {
- return null;
- }
- @Override
- public LoadCaster getLoadCaster() throws IOException {
- return null;
- }
- @Override
- public void prepareToRead(RecordReader reader, PigSplit split)
- throws IOException {
- }
- @Override
- public String relativeToAbsolutePath(String location, Path curDir)
- throws IOException {
- return null;
- }
- @Override
- public void setLocation(String location, Job job) throws IOException {
- }
- }
- @Test
- public void testQuery31() throws Exception {
- String query = "load 'myfile' as (col1, col2);";
- buildPlan(query);
- }
- @Test
- public void testQuery32() throws Exception {
- String query = "foreach (load 'myfile' as (col1, col2 : tuple(sub1, sub2), col3 : tuple(bag1))) generate col1 ;";
- buildPlan(query);
- }
- @Test
- public void testQuery33() throws Exception {
- String query = "A = load 'a' as (aCol1, aCol2);" +
- "B = load 'b' as (bCol1, bCol2);" +
- "C = cogroup A by (aCol1), B by bCol1;" +
- "foreach C generate group, A.aCol1;";
- buildPlan(query);
- }
- @Test
- //TODO: Nested schemas don't work now. Probably a bug in the new parser.
- public void testQuery34() throws Exception {
- String query = "A = load 'a' as (aCol1, aCol2 : tuple(subCol1, subCol2));" +
- "A = filter A by aCol1 == '1';" +
- "B = load 'b' as (bCol1, bCol2);" +
- "foreach (cogroup A by (aCol1), B by bCol1 ) generate A.aCol2, B.bCol2 ;";
- buildPlan(query);
- }
- @Test
- public void testQuery35() throws Exception {
- String query = "foreach (load 'a' as (col1, col2)) generate col1, col2 ;";
- buildPlan(query);
- }
- @Test
- public void testQuery36() throws Exception {
- String query = "foreach (cogroup ( load 'a' as (col1, col2)) by col1) generate $1.(col2, col1);";
- buildPlan(query);
- }
- @Test
- public void testQueryFail37() throws Exception {
- String query = "A = load 'a'; asdasdas";
- try{
- buildPlan(query);
- }catch(AssertionFailedError e){
- return;
- }
- Assert.fail( "Query should fail." );
- }
- @Test
- public void testQuery38() throws Exception {
- String query = "c = cross (load 'a'), (load 'b');";
- buildPlan(query);
- }
- // TODO FIX Query39 and Query40
- @Test
- public void testQuery39() throws Exception{
- String query = "a = load 'a' as (url, host, ranking:double);" +
- "b = group a by (url,host); " +
- "c = foreach b generate flatten(group.url), SUM(a.ranking) as totalRank;";
- buildPlan(query);
- query += "d = filter c by totalRank > 10;" +
- "e = foreach d generate totalRank;";
- buildPlan( query );
- }
- @Test
- public void testQueryFail39() throws Exception{
- String query = "a = load 'a' as (url, host, ranking);" +
- "b = group a by (url,host); " +
- "c = foreach b generate flatten(group.url), SUM(a.ranking) as totalRank;" +
- "d = filter c by totalRank > '10';" +
- "e = foreach d generate url;";
- try {
- buildPlan(query);//url has been falttened and hence the failure
- } catch(AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Exception"));
- }
- }
- @Test
- public void testQuery40() throws Exception {
- String query = "a = FILTER (load 'a') BY IsEmpty($2);";
- buildPlan( query +"a = FILTER (load 'a') BY (IsEmpty($2) AND ($3 == $2));" );
- }
- @Test
- public void testQueryFail41() throws Exception {
- try {
- buildPlan("a = load 'a';" + "b = a as (host,url);");
- } catch (AssertionFailedError e) {
- return;
- }
- // TODO
- // the following statement was earlier present
- // eventually when we do allow assignments of the form
- // above, we should test with the line below
- // uncommented
- //buildPlan("foreach b generate host;");
- Assert.fail( "Query should fail." );
- }
- @Test
- public void testQuery42() throws Exception {
- String q = "a = load 'a';" +
- "b = foreach a generate $0 as url, $1 as ranking;" +
- "foreach b generate url;";
- buildPlan( q );
- }
- @Test
- public void testQuery43() throws Exception {
- String q = "a = load 'a' as (url,hitCount);" +
- "b = load 'a' as (url,ranking);" +
- "c = cogroup a by url, b by url;" +
- "d = foreach c generate group,flatten(a),flatten(b);" +
- "e = foreach d generate group, a::url, b::url, b::ranking;";
- buildPlan( q );
- }
- @Test
- public void testQueryFail43() throws Exception {
- String q = "a = load 'a' as (name, age, gpa);" +
- "b = load 'b' as (name, height);";
- try {
- String query = q + "c = cogroup a by (name, age), b by (height);";
- buildPlan(query);
- } catch (AssertionFailedError e) {
- return;
- }
- Assert.fail( "Query should fail." );
- }
- @Test
- public void testQuery44() throws Exception {
- String q = "a = load 'a' as (url, pagerank);" +
- "b = load 'b' as (url, query, ranking);" +
- "c = cogroup a by (pagerank#'nonspam', url) , b by (ranking, url) ;" +
- "foreach c generate group.url;";
- buildPlan( q );
- }
- @Test
- public void testQueryFail44() throws Throwable {
- PigServer pig = null;
- try {
- pig = new PigServer("local");
- } catch (IOException e) {
- Assert.assertTrue(false); // pig server failed for some reason
- }
- pig.registerFunction("myTr",
- new FuncSpec(GFAny.class.getName() + "('tr o 0')"));
- try{
- pig.registerQuery("b = foreach (load 'a') generate myTr(myTr(*));");
- }catch(Exception e){
- return;
- }
- Assert.assertTrue(false);
- }
- @Test
- public void testQuery57() throws Exception {
- String query = "foreach (load 'a' as (u:int, v:long, w:int)) generate ($1+$2), ($1-$2), ($1*$2), ($1/$2), ($1%$2), -($1) ;";
- buildPlan(query);
- }
- @Test
- public void testQuery58() throws Exception {
- String query = "a = load 'a' as (name, age, gpa);" +
- "b = group a by name;" +
- "foreach b {d = a.name; generate group, d;};";
- buildPlan(query);
- }
- @Test
- public void testQueryFail58() throws Exception{
- String query = "a = load 'a' as (url, host, ranking);" +
- "b = group a by url; ";
- try {
- buildPlan(query + "c = foreach b generate group.url;");
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Exception"));
- }
- }
- @Test
- public void testQuery59() throws Exception {
- String query = "a = load 'a' as (name, age, gpa);" +
- "b = load 'b' as (name, height);" +
- "c = join a by name, b by name;";
- buildPlan(query);
- }
- @Test
- public void testQuery60() throws Exception {
- String query = "a = load 'a' as (name, age, gpa);" +
- "b = load 'b' as (name, height);" +
- "c = cross a,b;";
- buildPlan(query);
- }
- @Test
- public void testQuery61() throws Exception {
- String query = "a = load 'a' as (name, age, gpa);" +
- "b = load 'b' as (name, height);" +
- "c = union a,b;";
- buildPlan(query);
- }
- @Test
- public void testQuery62() throws Exception {
- String query = "a = load 'a' as (name, age, gpa);" +
- "b = load 'b' as (name, height);" +
- "c = cross a,b;" +
- "d = order c by b::name, height, a::gpa;" +
- "e = order a by name, age, gpa desc;" +
- "f = order a by $0 asc, age, gpa desc;" +
- "g = order a by * asc;" +
- "h = cogroup a by name, b by name;" +
- "i = foreach h {i1 = order a by *; generate i1;};";
- buildPlan(query);
- }
- @Test
- public void testQueryFail62() throws Exception {
- String query = "a = load 'a' as (name, age, gpa);" +
- "b = load 'b' as (name, height);" +
- "c = cross a,b;" +
- "d = order c by name, b::name, height, a::gpa;";
- try {
- buildPlan(query);
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Exception"));
- }
- }
- @Test
- public void testQuery63() throws Exception {
- String query = "a = load 'a' as (name, details: tuple(age, gpa));" +
- "b = group a by details;" +
- "d = foreach b generate group.age;" +
- "e = foreach a generate name, details;";
- buildPlan(query);
- }
- @Test
- public void testQueryFail63() throws Exception {
- String query = "foreach (load 'myfile' as (col1, col2 : (sub1, sub2), col3 : (bag1))) generate col1 ;";
- try {
- buildPlan(query);
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Exception"));
- }
- }
- @Test
- public void testQuery64() throws Exception {
- String query = "a = load 'a' as (name: chararray, details: tuple(age, gpa), mymap: map[]);" +
- "c = load 'a' as (name, details: bag{mytuple: tuple(age: int, gpa)});" +
- "b = group a by details;" +
- "d = foreach b generate group.age;" +
- "e = foreach a generate name, details;" +
- "f = LOAD 'myfile' AS (garage: bag{tuple1: tuple(num_tools: int)}, links: bag{tuple2: tuple(websites: chararray)}, page: bag{something_stupid: tuple(yeah_double: double)}, coordinates: bag{another_tuple: tuple(ok_float: float, bite_the_array: bytearray, bag_of_unknown: bag{})});";
- buildPlan(query);
- }
- @Test
- public void testQueryFail64() throws Exception {
- String query = "foreach (load 'myfile' as (col1, col2 : bag{age: int})) generate col1 ;";
- try {
- buildPlan(query);
- } catch (AssertionFailedError e) {
- return;
- }
- Assert.fail( "query should fail" );
- }
- @Test
- public void testQuery65() throws Exception {
- String q = "a = load 'a' as (name, age, gpa);" +
- "b = load 'b' as (name, height);" +
- "c = cogroup a by (name, age), b by (name, height);" +
- "d = foreach c generate group.name, a.name as aName, b.name as bname;";
- buildPlan( q );
- }
- @Test
- public void testQueryFail65() throws Exception {
- String q = "a = load 'a' as (name, age, gpa);" +
- "b = load 'b' as (name, height);" +
- "c = cogroup a by (name, age), b by (name, height);" +
- "d = foreach c generate group.name, a.name, b.height as age, a.age;";
- try {
- buildPlan( q );
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Exception"));
- }
- }
- @Test
- public void testQuery67() throws Exception {
- String q = " a = load 'input1' as (name, age, gpa);" +
- " b = foreach a generate age, age * 10L, gpa/0.2f, {(16, 4.0e-2, 'hello')};";
- buildPlan( q );
- }
- @Test
- public void testQuery68() throws Exception {
- String q = " a = load 'input1';" +
- " b = foreach a generate 10, {(16, 4.0e-2, 'hello'), (0.5f, 12l, 'another tuple')};";
- buildPlan( q );
- }
- @Test
- public void testQuery69() throws Exception {
- String q = " a = load 'input1';" +
- " b = foreach a generate {(16, 4.0e-2, 'hello'), (0.5f, 'another tuple', 12L, (1))};";
- buildPlan( q );
- }
- @Test
- public void testQuery70() throws Exception {
- String q = " a = load 'input1';" +
- " b = foreach a generate ['10'#'hello', '4.0e-2'#10L, '0.5f'#(1), 'world'#42, '42'#{('guide')}] as mymap:map[];" +
- " c = foreach b generate mymap#'10';";
- buildPlan( q );
- }
- @Test
- public void testQueryFail67() throws Exception {
- String q = " a = load 'input1' as (name, age, gpa);" +
- " b = foreach a generate age, age * 10L, gpa/0.2f, {16, 4.0e-2, 'hello'};";
- try {
- buildPlan(q);
- } catch (AssertionFailedError e) {
- return;
- }
- Assert.fail( "query should fail" );
- }
- @Test
- public void testQueryFail68() throws Exception {
- String q = " a = load 'input1' as (name, age, gpa);";
- try {
- buildPlan( q +
- " b = foreach a generate {(16 L, 4.0e-2, 'hello'), (0.5f, 'another tuple', 12L, {()})};");
- } catch (AssertionFailedError e) {
- return;
- }
- Assert.fail( "query should fail" );
- }
- @Test
- public void testQuery71() throws Exception {
- String q = "split (load 'a') into x if $0 > '7', y if $0 < '7';" +
- "b = foreach x generate $0;" +
- "c = foreach y generate $1;";
- buildPlan( q );
- }
- @Test
- public void testQuery72() throws Exception {
- String q = "split (load 'a') into x if $0 > 7, y if $0 < 7;" +
- "b = foreach x generate (int)$0;" +
- "c = foreach y generate (bag{})$1;" +
- "d = foreach y generate (int)($2/2);" +
- "e = foreach y generate (bag{tuple(int, float)})($2);" +
- "f = foreach x generate (tuple(int, float))($3);" +
- "g = foreach x generate (tuple())($4);" +
- "h = foreach x generate (chararray)($5);";
- buildPlan( q );
- }
- @Test
- public void testQueryFail72() throws Exception {
- boolean catchEx = false;
- String q = "split (load 'a') into x if $0 > '7', y if $0 < '7';";
- try {
- buildPlan( q + "c = foreach y generate (bag)$1;");
- } catch (AssertionFailedError e) {
- catchEx = true;
- }
- Assert.assertTrue( catchEx );
- catchEx = false;
- try {
- buildPlan( q + "c = foreach y generate (bag{int, float})$1;");
- } catch (AssertionFailedError e) {
- catchEx = true;
- }
- Assert.assertTrue( catchEx );
- catchEx = false;
- try {
- buildPlan( q + "c = foreach y generate (tuple)$1;");
- } catch (AssertionFailedError e) {
- catchEx = true;
- }
- Assert.assertTrue( catchEx );
- }
- @Test
- public void testQuery73() throws Exception {
- String q = "split (load 'a') into x if $0 > '7', y if $0 < '7';" +
- "b = filter x by $0 matches '^fred.*';" +
- "c = foreach y generate $0, ($0 matches 'yuri.*' ? $1 - 10 : $1);";
- buildPlan( q );
- }
- @Test
- public void testQuery74() throws Exception {
- String q = "a = load 'a' as (field1: int, field2: long);" +
- "b = load 'a' as (field1: bytearray, field2: double);" +
- "c = group a by field1, b by field1;" +
- "d = cogroup a by ((field1+field2)*field1), b by field1;";
- buildPlan( q );
- }
- @Test
- public void testQuery77() throws Exception {
- buildPlan("limit (load 'a') 100;");
- }
- @Test
- public void testLimitWithLong() throws Exception {
- buildPlan("limit (load 'a') 100L;");
- }
- @Test
- public void testQuery75() throws Exception {
- String q = "a = union (load 'a'), (load 'b'), (load 'c');";
- buildPlan( q + "b = foreach a {generate $0;};");
- }
- @Test
- public void testQuery76() throws Exception {
- String q = "split (load 'a') into x if $0 > '7', y if $0 < '7';" +
- "b = filter x by $0 IS NULL;" +
- "c = filter y by $0 IS NOT NULL;" +
- "d = foreach b generate $0, ($1 IS NULL ? 0 : $1 - 7);" +
- "e = foreach c generate $0, ($1 IS NOT NULL ? $1 - 5 : 0);";
- buildPlan( q );
- }
- @Test
- public void testQuery80() throws Exception {
- String q = "a = load 'input1' as (name, age, gpa);" +
- "b = filter a by age < '20';" +
- "c = group b by age;" +
- "d = foreach c {"
- + "cf = filter b by gpa < '3.0';"
- + "cp = cf.gpa;"
- + "cd = distinct cp;"
- + "co = order cd by gpa;"
- + "generate group, flatten(co);"
- //+ "generate group, flatten(cd);"
- + "};";
- buildPlan(q);
- }
- @Test
- public void testQuery81() throws Exception {
- String q = "a = load 'input1' using PigStorage() as (name, age, gpa);" +
- "split a into b if name lt 'f', c if (name gte 'f' and name lte 'h'), d if name gt 'h';";
- buildPlan( q );
- }
- @Test
- public void testQueryFail81() throws Exception {
- String q = "a = load 'input1' using PigStorage() as (name, age, gpa);";
- try {
- buildPlan(q + "split a into b if name lt 'f', c if (name ge 'f' and name le 'h'), d if name gt 'h';");
- } catch (AssertionFailedError e) {
- return;
- }
- Assert.fail( "Query should fail." );
- }
- @Test
- public void testQuery82() throws Exception {
- String q = "a = load 'myfile';" +
- "b = group a by $0;" +
- "c = foreach b {"
- + "c1 = order $1 by *;"
- + "c2 = $1.$0;"
- + "generate flatten(c1), c2;"
- + "};";
- buildPlan(q);
- }
- @Test
- public void testQueryFail82() throws Exception {
- String q = "a = load 'myfile';" +
- "b = group a by $0;" +
- "c = foreach b {"
- + "c1 = order $1 by *;"
- + "c2 = $1;"
- + "generate flatten(c1), c2;"
- + "};";
- try {
- buildPlan(q);
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Exception"));
- }
- }
- @Test
- public void testQuery83() throws Exception {
- String q = "a = load 'input1' as (name, age, gpa);" +
- "b = filter a by age < '20';" +
- "c = group b by (name,age);" +
- "d = foreach c {"
- + "cf = filter b by gpa < '3.0';"
- + "cp = cf.gpa;"
- + "cd = distinct cp;"
- + "co = order cd by gpa;"
- + "generate group, flatten(co);"
- + "};";
- buildPlan(q);
- }
- @Test
- public void testQuery84() throws Exception {
- String q = "a = load 'input1' as (name, age, gpa);" +
- "b = filter a by age < '20';" +
- "c = group b by (name,age);" +
- "d = foreach c {"
- + "cf = filter b by gpa < '3.0';"
- + "cp = cf.$2;"
- + "cd = distinct cp;"
- + "co = order cd by gpa;"
- + "generate group, flatten(co);"
- + "};";
- buildPlan(q);
- }
- @Test
- public void testQuery85() throws Exception {
- LogicalPlan lp;
- String query = "a = load 'myfile' as (name, age, gpa);" +
- "b = group a by (name, age);";
- lp = buildPlan( query + "store b into 'output';");
- Operator store = lp.getSinks().get(0);
- LOCogroup cogroup = (LOCogroup) lp.getPredecessors(store).get(0);
- LogicalSchema actual = cogroup.getSchema();
- System.out.println( actual.toString( false ) );
- Assert.assertTrue( actual.toString( false ).equals( "group:tuple(name:bytearray,age:bytearray),a:bag{:tuple(name:bytearray,age:bytearray,gpa:bytearray)}" ) );
- lp = buildPlan(query +
- "c = foreach b generate group.name, group.age, COUNT(a.gpa);" +
- "store c into 'output';");
- store = lp.getSinks().get(0);
- LOForEach foreach = (LOForEach) lp.getPredecessors(store).get(0);
- Assert.assertTrue( foreach.getSchema().toString( false ).equals("name:bytearray,age:bytearray,:long") );
- }
- @Test
- public void testQuery86() throws Exception {
- LogicalPlan lp;
- String query = "a = load 'myfile' as (name:Chararray, age:Int, gpa:Float);" +
- "b = group a by (name, age);" +
- "store b into 'output';";
- lp = buildPlan( query );
- Operator store = lp.getSinks().get(0);
- LOCogroup cogroup = (LOCogroup) lp.getPredecessors(store).get(0);
- Schema.FieldSchema nameFs = new Schema.FieldSchema("name", DataType.CHARARRAY);
- Schema.FieldSchema ageFs = new Schema.FieldSchema("age", DataType.INTEGER);
- Schema.FieldSchema gpaFs = new Schema.FieldSchema("gpa", DataType.FLOAT);
- Schema groupSchema = new Schema(nameFs);
- groupSchema.add(ageFs);
- Schema.FieldSchema groupFs = new Schema.FieldSchema("group", groupSchema, DataType.TUPLE);
- Schema loadSchema = new Schema(nameFs);
- loadSchema.add(ageFs);
- loadSchema.add(gpaFs);
- Schema.FieldSchema bagFs = new Schema.FieldSchema("a", loadSchema, DataType.BAG);
- Schema cogroupExpectedSchema = new Schema(groupFs);
- cogroupExpectedSchema.add(bagFs);
- Assert.assertTrue(cogroup.getSchema().toString(false).equals("group:tuple(name:chararray,age:int),a:bag{:tuple(name:chararray,age:int,gpa:float)}"));
- }
- @Test
- public void testQuery87() throws Exception {
- String query = "a = load 'myfile';" +
- "b = group a by $0;" +
- "c = foreach b {c1 = order $1 by $1; generate flatten(c1); };" +
- "store c into 'output';";
- LogicalPlan lp = buildPlan( query );
- Operator store = lp.getSinks().get(0);
- LOForEach foreach = (LOForEach) lp.getPredecessors(store).get(0);
- LogicalPlan nestedPlan = foreach.getInnerPlan();
- LOGenerate gen = (LOGenerate) nestedPlan.getSinks().get(0);
- LOSort nestedSort = (LOSort)nestedPlan.getPredecessors(gen).get(0);
- LogicalExpressionPlan sortPlan = nestedSort.getSortColPlans().get(0);
- Assert.assertTrue(sortPlan.getSinks().size() == 1);
- }
- @Test
- public void testQuery88() throws Exception {
- String query = "a = load 'myfile';" +
- "b = group a by $0;" +
- "c = order b by $1 ;" +
- "store c into 'output';";
- LogicalPlan lp = buildPlan( query );
- Operator store = lp.getSinks().get(0);
- LOSort sort = (LOSort) lp.getPredecessors(store).get(0);
- // LOProject project1 = (LOProject) sort.getSortColPlans().get(0).getSinks().get(0) ;
- // LOCogroup cogroup = (LOCogroup) lp.getPredecessors(sort).get(0) ;
- // assertEquals(project1.getExpression(), cogroup) ;
- }
- @Test
- public void testQuery89() throws Exception {
- String query = "a = load 'myfile';" +
- "b = foreach a generate $0, $100;" +
- "c = load 'myfile' as (i: int);" +
- "d = foreach c generate $0 as zero, i;";
- buildPlan( query );
- }
- @Test
- public void testQueryFail89() throws Exception {
- String q = "c = load 'myfile' as (i: int);";
- try {
- buildPlan(q + "d = foreach c generate $0, $5;");
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Out of bound access"));
- }
- }
- @Test
- public void testQuery90() throws Exception {
- LogicalPlan lp;
- LOForEach foreach;
- String query = "a = load 'myfile' as (name:Chararray, age:Int, gpa:Float);" +
- "b = group a by (name, age);";
- //the first and second elements in group, i.e., name and age are renamed as myname and myage
- lp = buildPlan(query +
- "c = foreach b generate flatten(group) as (myname, myage), COUNT(a) as mycount;" +
- "store c into 'output';");
- Operator store = lp.getSinks().get(0);
- foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().isEqual(Utils.parseSchema("myname: chararray, age: int, mycount: long")));
- //the schema of group is unchanged
- lp = buildPlan( query +
- "c = foreach b generate flatten(group), COUNT(a) as mycount;" +
- "store c into 'output';" );
- store = lp.getSinks().get(0);
- foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().toString( false ).equals("group::name:chararray,group::age:int,mycount:long"));
- //group is renamed as mygroup
- lp = buildPlan(query +
- "c = foreach b generate group as mygroup, COUNT(a) as mycount;" +
- "store c into 'output';");
- store = lp.getSinks().get(0);
- foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().toString( false ).equals("mygroup:tuple(name:chararray,age:int),mycount:long"));
- //group is renamed as mygroup and the elements are renamed as myname and myage
- lp = buildPlan(query +
- "c = foreach b generate group as mygroup:(myname, myage), COUNT(a) as mycount;" +
- "store c into 'output';");
- store = lp.getSinks().get(0);
- foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().toString( false ).equals("mygroup:tuple(myname:chararray,myage:int),mycount:long"));
- /*
- //setting the schema of flattened bag that has no schema with the user defined schema
- String q = "a = load 'myfile' as (name:Chararray, age:Int, gpa:Float);" +
- "c = load 'another_file';" +
- "d = cogroup a by $0, c by $0;";
- lp = buildPlan( q + "e = foreach d generate flatten(DIFF(a, c)) as (x, y, z), COUNT(a) as mycount;" + "store e into 'output';" );
- store = lp.getSinks().get(0);
- foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: bytearray, y: bytearray, z: bytearray, mycount: long")));
- //setting the schema of flattened bag that has no schema with the user defined schema
- q = query +
- "c = load 'another_file';" +
- "d = cogroup a by $0, c by $0;" +
- "e = foreach d generate flatten(DIFF(a, c)) as (x: int, y: float, z), COUNT(a) as mycount;";
- lp = buildPlan(q);
- store = lp.getSinks().get(0);
- foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: int, y: float, z: bytearray, mycount: long")));
- //setting the schema of flattened bag that has no schema with the user defined schema
- q = query +
- "c = load 'another_file';" +
- "d = cogroup a by $0, c by $0;" +
- "e = foreach d generate flatten(DIFF(a, c)) as x, COUNT(a) as mycount;";
- lp = buildPlan(q);
- store = lp.getSinks().get(0);
- foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: bytearray, mycount: long")));
- //setting the schema of flattened bag that has no schema with the user defined schema
- q = query +
- "c = load 'another_file';" +
- "d = cogroup a by $0, c by $0;" +
- "e = foreach d generate flatten(DIFF(a, c)) as x: int, COUNT(a) as mycount;";
- lp = buildPlan(q);
- store = lp.getSinks().get(0);
- foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: int, mycount: long")));
- */
- }
- @Test
- public void testQueryFail90() throws Exception {
- String query = "a = load 'myfile' as (name:Chararray, age:Int, gpa:Float);" +
- "b = group a by (name, age);";
- try {
- buildPlan( query + "c = foreach b generate group as mygroup:(myname, myage), COUNT(a) as mycount;");
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Schema size mismatch"));
- }
- try {
- buildPlan( query + "c = foreach b generate group as mygroup:(myname: int, myage), COUNT(a) as mycount;");
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Type mismatch"));
- }
- try {
- buildPlan( query + "c = foreach b generate group as mygroup:(myname, myage: chararray), COUNT(a) as mycount;");
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Type mismatch"));
- }
- try {
- buildPlan( query + "c = foreach b generate group as mygroup:{t: (myname, myage)}, COUNT(a) as mycount;");
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Incompatable field schema"));
- }
- try {
- buildPlan( query + "c = foreach b generate flatten(group) as (myname, myage, mygpa), COUNT(a) as mycount;");
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Incompatable schema"));
- }
- }
- @Test
- public void testQuery91() throws Exception {
- String query = "a = load 'myfile' as (name:Chararray, age:Int, gpa:Float);" +
- "b = group a by name;";
- buildPlan(query + "c = foreach b generate SUM(a.age) + SUM(a.gpa);");
- }
- @Test
- public void testQuery92() throws Exception {
- String query = "a = load 'myfile' as (name, age, gpa);" +
- "b = group a by name;" +
- "c = foreach b { "
- + " alias = name#'alias'; "
- + " af = alias#'first'; "
- + " al = alias#'last'; "
- + " generate SUM(a.age) + SUM(a.gpa); "
- + "};";
- buildPlan( query );
- }
- @Test
- public void testQuery93() throws Exception {
- String query = "a = load 'one' as (name, age, gpa);" +
- "b = group a by name;" +
- "c = foreach b generate flatten(a);" +
- "d = foreach c generate name;" +
- // test that we can refer to "name" field and not a::name
- "e = foreach d generate name;";
- buildPlan( query );
- }
- @Test
- public void testQueryFail93() throws Exception {
- String query = "a = load 'one' as (name, age, gpa);" +
- "b = group a by name;"+
- "c = foreach b generate flatten(a);"+
- "d = foreach c generate name;"+
- // test that we can refer to "name" field and a::name
- "e = foreach d generate a::name;";
- buildPlan( query );
- }
- @Test
- public void testQuery94() throws Exception {
- String query = "a = load 'one' as (name, age, gpa);" +
- "b = load 'two' as (name, age, somethingelse);"+
- "c = cogroup a by name, b by name;"+
- "d = foreach c generate flatten(a), flatten(b);"+
- // test that we can refer to "a::name" field and not name
- // test that we can refer to "b::name" field and not name
- "e = foreach d generate a::name, b::name;"+
- // test that we can refer to gpa and somethingelse
- "f = foreach d generate gpa, somethingelse;";
- buildPlan( query );
- }
- @Test
- public void testQueryFail94() throws Exception {
- String query = "a = load 'one' as (name, age, gpa);" +
- "b = load 'two' as (name, age, somethingelse);"+
- "c = cogroup a by name, b by name;"+
- "d = foreach c generate flatten(a), flatten(b);"+
- "e = foreach d generate name;";
- // test that we can refer to "a::name" field and not name
- try {
- buildPlan(query);
- } catch (AssertionFailedError e) {
- Assert.assertTrue(e.getMessage().contains("Invalid field projection. Projected field [name] does not exist"));
- }
- }
- @Test
- public void testQuery95() throws Exception {
- String query = "a = load 'myfile' as (name, age, gpa);" +
- "b = group a by name;" +
- "c = foreach b {d = order a by $1; generate flatten(d), MAX(a.age) as max_age;};" +
- "store c into 'output';";
- LogicalPlan lp = buildPlan(query);
- Operator store = lp.getSinks().get(0);
- LOForEach foreach = (LOForEach)lp.getPredecessors(store).get(0);
- LOCogroup cogroup = (LOCogroup) lp.getPredecessors(foreach).get(0);
- String s = cogroup.getSchema().toString(false);
- Assert.assertTrue( s.equals("group:bytearray,a:bag{:tuple(name:bytearray,age:bytearray,gpa:bytearray)}"));
- s = foreach.getSchema().toString(false);
- Assert.assertTrue( s.equals("d::name:bytearray,d::age:bytearray,d::gpa:bytearray,max_age:double"));
- }
- @Test
- public void testQuery96() throws Exception {
- String query = "a = load 'input' as (name, age, gpa);" +
- "b = filter a by age < 20;" +
- "c = group b by age;" +
- "d = foreach c {"
- + "cf = filter b by gpa < 3.0;"
- + "cd = distinct cf.gpa;"
- + "co = order cd by $0;"
- + "generate group, flatten(co);"
- + "};" +
- "store d into 'output';";
- LogicalPlan lp = buildPlan(query);
- Operator store = lp.getSinks().get(0);
- LOForEach foreach = (LOForEach)lp.getPredecessors(store).get(0);
- LogicalPlan foreachPlans = foreach.getInnerPlan();
- // LogicalPlan flattenPlan = foreachPlans.get(1);
- // LogicalOperator project = flattenPlan.getLeaves().get(0);
- // Assert.assertTrue(project instanceof LOProject);
- // LogicalOperator sort = flattenPlan.getPredecessors(project).get(0);
- // Assert.assertTrue(sort instanceof LOSort);
- // LogicalOperator distinct = flattenPlan.getPredecessors(sort).get(0);
- // Assert.assertTrue(distinct instanceof LODistinct);
- //
- // //testing the presence of the nested foreach
- // LogicalOperator nestedForeach = flattenPlan.getPredecessors(distinct).get(0);
- // Assert.assertTrue(nestedForeach instanceof LOForEach);
- // LogicalPlan nestedForeachPlan = ((LOForEach)nestedForeach).getForEachPlans().get(0);
- // LogicalOperator nestedProject = nestedForeachPlan.getRoots().get(0);
- // Assert.assertTrue(nestedProject instanceof LOProject);
- // Assert.assertTrue(((LOProject)nestedProject).getCol() == 2);
- //
- // //testing the filter inner plan for the absence of the project connected to project
- // LogicalOperator filter = flattenPlan.getPredecessors(nestedForeach).get(0);
- // Assert.assertTrue(filter instanceof LOFilter);
- // LogicalPlan comparisonPlan = ((LOFilter)filter).getComparisonPlan();
- // LOLesserThan lessThan = (LOLesserThan)comparisonPlan.getLeaves().get(0);
- // LOProject filterProject = (LOProject)lessThan.getLhsOperand();
- // Assert.assertTrue(null == comparisonPlan.getPredecessors(filterProject));
- }
- /*
- @Test
- public void testQuery97() throws FrontendException, ParseException {
- LogicalPlan lp;
- LOForEach foreach;
- String query = "a = load 'one' as (name, age, gpa);";
- String store = "store b into 'output';";
- lp = buildPlan(query + "b = foreach a generate 1;" + store);
- foreach = (LOForEach)lp.getPredecessors(op);
- // Assert.assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromSt…
Large files files are truncated, but you can click here to view the full file