/test/e2e/pig/tests/nightly.conf
Perl | 4623 lines | 4186 code | 112 blank | 325 comment | 102 complexity | 735ef9c41158ef2319bd7bea7c6a5f6f MD5 | raw file
Possible License(s): Apache-2.0, CPL-1.0
Large files files are truncated, but you can click here to view the full file
- #!/usr/bin/env perl
- ############################################################################
- # Licensed to the Apache Software Foundation (ASF) under one or more
- # contributor license agreements. See the NOTICE file distributed with
- # this work for additional information regarding copyright ownership.
- # The ASF licenses this file to You under the Apache License, Version 2.0
- # (the "License"); you may not use this file except in compliance with
- # the License. You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
-
- ###############################################################################
- # Nightly tests for pig.
- #
- #
- #PigSetup::setup();
- #my $me = `whoami`;
- #chomp $me;
- $cfg = {
- 'driver' => 'Pig',
- 'nummachines' => 5,
- 'verify_with_pig' => 1,
- 'verify_pig_version' => 'old',
- 'groups' => [
- {
- 'name' => 'Checkin',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- store a into ':OUTPATH:';\,
- },
- {
- 'num' => 2,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- c = filter a by age < 50;
- d = filter b by age < 50;
- e = cogroup c by (name, age), d by (name, age) ;
- f = foreach e generate flatten(c), flatten(d);
- g = group f by registration;
- h = foreach g generate group, SUM(f.d::contributions);
- i = order h by $1;
- store i into ':OUTPATH:';\,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- 'sortArgs' => ['-t', ' ', '+1', '-2'],
- }
- ]
- },
- {
- 'name' => 'LoaderDefaultDir',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/dir/studenttab10k' as (name, age, gpa);
- store a into ':OUTPATH:';\,
- },
- ]
- },
- {
- 'name' => 'LoaderPigStorageArg',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
- store a into ':OUTPATH:';\,
- },
- {
- # load with control character
- 'num' => 2,
- 'pig' => q#a = load ':INPATH:/singlefile/studentctrla10k' using PigStorage('\\u0001') as (name, age, gpa);
- store a into ':OUTPATH:';#,
- },
- {
- # load and store with control character
- 'num' => 3,
- 'pig' => q#a = load ':INPATH:/singlefile/studentctrla10k' using PigStorage('\\u0001') as (name, age, gpa);
- store a into ':OUTPATH:.intermediate' using PigStorage('\\u0001');
- b = load ':OUTPATH:.intermediate' using PigStorage('\\u0001') as (name, age, gpa);
- store b into ':OUTPATH:'; #,
- 'notmq' => 1,
- },
- ]
- },
- {
- # Results doctored, if you change this query you need to copy the
- # expected results into test/nightly/benchmarks
- 'name' => 'LoaderBinStorage',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Swap(name, age), TOKENIZE((chararray)name), org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
- store b into ':OUTPATH:.intermediate' using BinStorage();
- c = load ':OUTPATH:.intermediate' using BinStorage();
- store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
- 'notmq' => 1,
- },
- ]
- },
- {
- # Results doctored, if you change this query you need to copy the
- # expected results into test/nightly/benchmarks
- 'name' => 'LoaderTextLoader',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/textdoc' using TextLoader();
- b = foreach a generate TOKENIZE((chararray)$0);
- store b into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
- },
- ]
- },
- {
- 'name' => 'FilterBoolean',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by name == 'fred allen' and age > 50;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 2,
- 'pig' => q\a = load ':INPATH:/dir/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by name != 'fred allen' or age < 10;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 3,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by not (age == 50);
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 4,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 5,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by age >= 50 or name > 'fred' and gpa <= 3.0 or name >= 'bob';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test filter <= and >= for chararray, int and double
- {
- 'num' => 6,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
- b = filter a by age >= 40 and age <=50 and gpa >= 2.0 and gpa <= 3.0 and name >= 'bob' and name <= 'fred';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test filter <= and >= for bytearray, long and float
- {
- 'num' => 7,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
- b = filter a by age >= 40 and age <=50 and gpa >= 2.0f and gpa <= 3.0f and name >= 'bob' and name <= 'fred';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test filter < and > for chararray, int and double
- {
- 'num' => 8,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
- b = filter a by age > 40 and age <50 and gpa > 2.0 and gpa < 3.0 and name > 'bob' and name < 'fred';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test filter < and > for bytearray, long and float
- {
- 'num' => 9,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
- b = filter a by age > 40 and age <50 and gpa > 2.0f and gpa < 3.0f and name > 'bob' and name < 'fred';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test filter <= and >= for explicit cast for chararray, int and double
- {
- 'num' => 10,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by (int)age >= 40 and (int)age <=50 and (double)gpa >= 2.0 and (double)gpa <= 3.0 and (chararray)name >= 'bob' and (chararray)name <= 'fred';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test filter <= and >= for explicit cast for bytearray, long and float
- {
- 'num' => 11,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by (long)age >= 40 and (long)age <=50 and (float)gpa >= 2.0f and (float)gpa <= 3.0f and name >= 'bob' and name <= 'fred';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test filter < and > for explicit cast for chararray, int and double
- {
- 'num' => 12,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by (int)age > 40 and (int)age <50 and (double)gpa > 2.0 and (double)gpa < 3.0 and (chararray)name > 'bob' and (chararray)name < 'fred';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test filter < and > for explicit cast for bytearray, long and float
- {
- 'num' => 13,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by (long)age > 40 and (long)age <50 and (float)gpa > 2.0f and (float)gpa < 3.0f and name > 'bob' and name < 'fred';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test AND with nulls
- {
- 'num' => 14,
- 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
- b = filter a by name == 'fred allen' and age > 50;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test OR with nulls
- {
- 'num' => 15,
- 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
- b = filter a by name != 'fred allen' or age < 10;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test with nulls filter <= and >= for chararray, int and double
- {
- 'num' => 16,
- 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
- b = filter a by age >= 40 and age <=50 and gpa >= 2.0 and gpa <= 3.0 and name >= 'bob' and name <= 'fred';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test with nulls filter < and > for explicit cast for chararray, int and double
- {
- 'num' => 17,
- 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
- b = filter a by (int)age > 40 and (int)age <50 and (double)gpa > 2.0 and (double)gpa < 3.0 and (chararray)name > 'bob' and (chararray)name < 'fred';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 18,
- 'ignore' => 1, # PIG-2593
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
- b = filter a by instate;
- store b into ':OUTPATH:' using PigStorage;\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
- b = filter a by instate == 'true';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 19,
- 'ignore' => 1, # PIG-2593
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
- b = filter a by not instate;
- store b into ':OUTPATH:' using PigStorage;\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
- b = filter a by instate == 'false';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 20,
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
- b = filter a by instate is null;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 21,
- 'ignore' => 1, # TODO Need to file a JIRA-2
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
- b = filter a by instate == true;
- store b into ':OUTPATH:' using PigStorage;\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
- b = filter a by instate == 'true';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 22,
- 'ignore' => 1, # TODO Need to file a JIRA-2
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
- b = filter a by instate == false;
- store b into ':OUTPATH:' using PigStorage;\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
- b = filter a by instate == 'false';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 23,
- 'ignore' => 1, # TODO Need to file a JIRA-1
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
- b = filter a by instate;
- store b into ':OUTPATH:' using PigStorage;\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
- b = filter a by instate == 'true';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 24,
- 'ignore' => 1, # TODO Need to file a JIRA-1
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
- b = filter a by not instate;
- store b into ':OUTPATH:' using PigStorage;\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
- b = filter a by instate == 'false';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 25,
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
- b = filter a by instate is null;
- store b into ':OUTPATH:' using PigStorage;\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
- b = filter a by instate is null;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 26,
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
- b = filter a by instate == true;
- store b into ':OUTPATH:' using PigStorage;\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
- b = filter a by instate == 'true';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 27,
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
- b = filter a by instate == false;
- store b into ':OUTPATH:' using PigStorage;\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
- b = filter a by instate == 'false';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- ],
- },
- {
- 'name' => 'FilterEq',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by name == 'alice johnson' and age == 64 and gpa == 3.99;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 2,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by name > 'fred allen' and age > 40 and gpa > 2.50;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 3,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by name >= 'fred allen' and age >= 40 and gpa >= 2.50;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 4,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by name lt 'fred allen' and age < 40 and gpa < 2.50;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 5,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by name lte 'fred allen' and age <= 40 and gpa <= 2.50;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 6,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage();
- b = filter a by $0 neq 'fred allen' and $1 != '40' and $2 != '2.50';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test for filter == for chararray, int and double
- {
- 'num' => 7,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
- b = filter a by name == 'fred allen' and age == 61 and gpa == 1.42;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test for filter == for bytearray, long and float
- {
- 'num' => 8,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
- b = filter a by name == 'fred allen' and age == 61 and gpa == 1.42f;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test for filter != for chararray, int and double
- {
- 'num' => 9,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
- b = filter a by $0 != 'fred allen' and $1 != 40 and $2 != 2.50;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test for filter != for bytearray, long and float
- {
- 'num' => 10,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
- b = filter a by $0 != 'fred allen' and $1 != 40 and $2 != 2.50f;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test for filter == for explicit casts to chararray, int and double
- {
- 'num' => 11,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by (chararray)name == 'fred allen' and (int)age == 61 and (double)gpa == 1.42;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test for filter == for explicit casts to bytearray, long and float
- {
- 'num' => 12,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by name == 'fred allen' and (long)age == 61 and (float)gpa == 1.42f;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test for filter != for explicit casts to chararray, int and double
- {
- 'num' => 13,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() ;
- b = filter a by (chararray)$0 != 'fred allen' and (int)$1 != 40 and (double)$2 != 2.50;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- # test for filter != for explicit casts to bytearray, long and float
- {
- 'num' => 14,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() ;
- b = filter a by $0 != 'fred allen' and (long)$1 != 40 and (float)$2 != 2.50f;
- store b into ':OUTPATH:' using PigStorage;\,
- },
- ]
- },
- {
- 'name' => 'FilterMatches',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = filter a by name matches '^fred.*';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 2,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage();
- b = filter a by not $0 matches '^fred.*';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- # test for filter on matches for chararray (declared and explicit cast)
- 'num' => 3,
- 'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
- b = filter a by name matches '^fred.*' and (chararray)registration matches '^dem.*';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 4,
- 'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
- b = filter a by name matches 'f.ed' and (chararray)registration matches 'd.m';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 5,
- 'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
- b = filter a by name matches 'f[^f]ed.*';
- store b into ':OUTPATH:' using PigStorage;\,
- },
- {
- 'num' => 6,
- 'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches '.*\\\\wan.*';\nstore b into ':OUTPATH:' using PigStorage;",
- },
- {
- 'num' => 7,
- 'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches '^e.*\\\\sc.*';\nstore b into ':OUTPATH:' using PigStorage;",
- },
- {
- 'num' => 8,
- 'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches 'ethan white';\nstore b into ':OUTPATH:' using PigStorage;",
- },
- {
- 'num' => 9,
- 'pig' => "a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);\nb = filter a by gpa matches '\\\\d\\\\.45';\nstore b into ':OUTPATH:' using PigStorage;",
- },
- ]
- },
- {
- 'name' => 'FilterUdf',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\
- a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- c = cogroup a by (name, age), b by (name, age);
- d = filter c by not IsEmpty(a);
- e = filter d by not IsEmpty(b);
- f = foreach e generate flatten(a), flatten(b);
- store f into ':OUTPATH:';\,
- },
- {
- 'num' => 2,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- c = filter a by age < 50;
- d = filter b by age < 50;
- e = cogroup c by (name, age), d by (name, age);
- f = filter e by COUNT(c)> 0 AND COUNT(d)>0;
- store f into ':OUTPATH:';\,
- 'rc' => 0
- },
- ]
- },
- # TODO Group that don't flatten via Agg functions
- {
- 'name' => 'GroupAggFunc',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by name;
- c = foreach b generate group, COUNT(a.age);
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 2,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- b = group a by $0;
- c = foreach b generate group, COUNT(a.$1);
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 3,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by (name, age);
- c = foreach b generate group.name, group.age, COUNT(a.gpa);
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 5,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a all;
- c = foreach b generate COUNT(a.$0);
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 6,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by name;
- c = foreach b generate group, SUM(a.age);
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 7,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by name;
- c = foreach b generate group, SUM(a.gpa);
- store c into ':OUTPATH:';\,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- },
- {
- 'num' => 8,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by name;
- c = foreach b generate group, AVG(a.age);
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 9,
- 'ignore23' => 'I cannot get it right due to float precision, temporarily disable',
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by name;
- c = foreach b generate group, AVG(a.gpa);
- store c into ':OUTPATH:';\,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- },
- {
- 'num' => 10,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by name;
- c = foreach b generate group, MIN(a.gpa);
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 11,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by name;
- c = foreach b generate group, MAX(a.gpa);
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 12,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by (name, age);
- c = foreach b generate flatten(group), SUM(a.gpa);
- store c into ':OUTPATH:';\,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- },
- {
- 'num' => 13,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by (name);
- c = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- d = cogroup b by group, c by name;
- e = foreach d generate flatten(group), SUM(c.gpa), COUNT(c.name);
- store e into ':OUTPATH:';\,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- },
- {
- 'num' => 14,
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
- b = group a by (name);
- e = foreach b generate COUNT(a.name);
- store e into ':OUTPATH:';\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
- b = group a by (name);
- e = foreach b generate COUNT(a.name);
- store e into ':OUTPATH:';\,
- }
- ],
- },
- {
- 'name' => 'MapPartialAgg',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by name;
- c = foreach b generate group, COUNT(a.age);
- store c into ':OUTPATH:';\,
- 'java_params' => ['-Dpig.exec.mapPartAgg=true']
- },
- {
- #multiquery with group in one sub query
- 'num' => 2,
- 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
- b = filter a by age < 22; store b into ':OUTPATH:.1';
- c = group b by age;
- d = foreach c generate group, SUM(b.gpa);
- store d into ':OUTPATH:.2'; #,
- 'java_params' => ['-Dpig.exec.mapPartAgg=true']
-
- },
- {
- #multi query with two group on diff columns
- 'num' => 3,
- 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
- g1 = group a by name;
- f1 = foreach g1 generate group as name, MAX(a.gpa);
- store f1 into ':OUTPATH:.1';
- g2 = group a by age;
- f2 = foreach g2 generate group as age, AVG(a.gpa);
- store f2 into ':OUTPATH:.2'; #,
- 'java_params' => ['-Dpig.exec.mapPartAgg=true']
-
- },
- {
- #multi query with three groups on diff columns, group key being an expression
- 'num' => 4,
- 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
- g1 = group a by name;
- f1 = foreach g1 generate group as name, MAX(a.gpa);
- store f1 into ':OUTPATH:.1';
- g2 = group a by age%10;
- f2 = foreach g2 generate group as age_mod10, AVG(a.gpa);
- store f2 into ':OUTPATH:.2';
- g3 = group a by age;
- f3 = foreach g3 generate group%10, AVG(a.gpa);
- store f3 into ':OUTPATH:.3';
- g4 = group a by gpa;
- f4 = foreach g4 generate group as gpa, COUNT(a);
- store f4 into ':OUTPATH:.4';
-
- #,
- 'java_params' => ['-Dpig.exec.mapPartAgg=true']
-
- },
- {
- #aggregation gets more than one tuple for every tuple from load func
-
- 'num' => 5,
- 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
- b = foreach a generate name, age, gpa, flatten(TOBAG(age,age)) as x;
- c = group b by age;
- d = foreach c generate group, AVG(b.gpa);
- store d into ':OUTPATH:'; #,
- 'java_params' => ['-Dpig.exec.mapPartAgg=true']
-
- },
-
- ],
- },
- {
- 'name' => 'EvalFunc',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by name lt 'b';
- c = foreach b generate ARITY(name, age, gpa);
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 2,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age, gpa);
- b = filter a by name lt 'b';
- c = foreach b generate TOKENIZE(name);
- d = foreach c generate flatten($0);
- store d into ':OUTPATH:';\,
- },
- {
- 'num' => 3,
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by name lt 'b';
- c = foreach b generate org.apache.pig.test.udf.evalfunc.Swap(name, age);
- store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
- },
- {
- 'num' => 4,
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by name lt 'b';
- c = foreach b generate org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
- store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
- },
- {
- 'num' => 5,
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
- b = foreach a generate org.apache.pig.test.udf.evalfunc.TestBoolean(instate);
- store b into ':OUTPATH:';\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
- b = foreach a generate (instate is null ? '' : (instate == 'true' ? 'false' : 'true'));
- store b into ':OUTPATH:';\,
- }
- ]
- },
- # TODO DIFF
- # TODO User defined grouping function
- {
- 'name' => 'CoGroupFlatten',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- c = filter a by age < 20;
- d = filter b by age < 20;
- e = cogroup c by name, d by name;
- f = foreach e generate flatten (c), flatten(d);
- store f into ':OUTPATH:';\,
- },
- {
- 'num' => 2,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- c = filter a by $1 < 20;
- d = filter b by $1 < 20;
- e = cogroup c by $0, d by $0;
- f = foreach e generate flatten (c), flatten(d);
- store f into ':OUTPATH:';\,
- },
- {
- 'num' => 3,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- c = filter a by age < 20;
- d = filter b by age < 20;
- e = cogroup c by (name, age), d by (name, age);
- f = foreach e generate flatten (c), flatten(d);
- store f into ':OUTPATH:';\,
- },
- {
- 'num' => 4,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- d = filter b by age < 20;
- e = cogroup a by (name, age) inner, d by (name, age);
- f = foreach e generate flatten (a), flatten(d);
- store f into ':OUTPATH:';\,
- },
- {
- 'num' => 5,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- c = filter a by age < 20;
- e = cogroup c by (name, age), b by (name, age) inner;
- f = foreach e generate flatten (c), flatten(b);
- store f into ':OUTPATH:';\,
- },
- {
- 'num' => 6,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- e = cogroup a by (name, age) inner, b by (name, age) inner;
- f = foreach e generate flatten (a), flatten(b);
- store f into ':OUTPATH:';\,
- },
- {
- # Test cogrouping data loaded from two separate loaders. We don't have any data that can join with studenttab that isn't also loaded with PigStorage, so the
- # first step is an intermediate load and store using BinStorage.
- 'num' => 7,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- store a into ':OUTPATH:.intermediate' using BinStorage();
- b = load ':OUTPATH:.intermediate' using BinStorage() as (name, age, gpa);
- c = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- e = cogroup b by (name, age) inner, c by (name, age) inner;
- f = foreach e generate flatten (b), flatten(c);
- store f into ':OUTPATH:';\,
- 'notmq' => 1,
- },
-
- ]
- },
- {
- 'name' => 'CoGroup',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- c = cogroup a by name, b by name;
- d = foreach c generate flatten(group), COUNT(a) + COUNT(b);
- store d into ':OUTPATH:';\,
- },
- ]
- },
- {
- 'name' => 'Join',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- c = filter a by age < 20;
- d = filter b by age < 20;
- e = join c by name, d by name;
- store e into ':OUTPATH:';\,
- },
- {
- 'num' => 2,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- c = filter a by age < 20;
- d = filter b by age < 20;
- e = join c by $0, d by $0;
- store e into ':OUTPATH:';\,
- },
- {
- 'num' => 3,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- c = filter a by age < 20;
- d = filter b by age < 20;
- e = join c by (name, age), d by (name, age);
- store e into ':OUTPATH:';\,
- },
- # self join with implict split
- # JIRA PIG-429
- {
- 'num' => 4,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- b = filter a by $1 > 25;
- c = join a by $0, b by $0;
- store c into ':OUTPATH:';\,
- },
- # join with one input having schema and another without
- # JIRA PIG-428
- {
- 'num' => 5,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray,age:int, gpa:double);
- another = load ':INPATH:/singlefile/studenttab10k';
- c = foreach another generate $0, $1+ 10, $2 + 10.0;
- d = join a by $0, c by $0;
- store d into ':OUTPATH:';\,
- },
- # self join using fragment replicate join
- # no types
- {
- 'num' => 6,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- c = join a by name, b by name using 'repl';
- store c into ':OUTPATH:';\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- c = join a by name, b by name ;
- store c into ':OUTPATH:';\,
- },
- # self join using fragment replicate join
- # with types and no cast for join key
- {
- 'num' => 7,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
- b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
- c = join a by name, b by name using 'repl';
- store c into ':OUTPATH:';\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
- b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
- c = join a by name, b by name ;
- store c into ':OUTPATH:';\,
- },
- # self join using fragment replicate join
- # with types and cast for join key
- {
- 'num' => 8,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
- b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
- c = join a by gpa, b by gpa using 'repl';
- store c into ':OUTPATH:';\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
- b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
- c = join a by gpa, b by gpa ;
- store c into ':OUTPATH:';\,
- },
- # left outer join
- {
- 'num' => 9,
- 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
- b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:long, registration:chararray, contributions:double);
- c = join a by name left outer, b by name;
- store c into ':OUTPATH:';\,
- },
- # right outer join
- {
- 'num' => 10,
- 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
- b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:long, registration:chararray, contributions:double);
- c = join a by name right outer, b by name;
- store c into ':OUTPATH:';\,
- },
- # full outer join
- {
- 'num' => 11,
- 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
- b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:long, registration:chararray, contributions:double);
- c = join a by name full outer, b by name;
- store c into ':OUTPATH:';\,
- },
- # see PIG-1209 join package now uses internalcachedBag, so every tuple on reduce side in this test will spilled to disk.
- {
- 'num' => 12,
- 'java_params' => ['-Dpig.cachedbag.memusage=0'],
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
- b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
- c = filter a by age < 20;
- d = filter b by age < 20;
- e = join c by name, d by name;
- store e into ':OUTPATH:';\,
- },
- {
- 'num' => 13,
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
- b = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
- c = filter a by age < 20;
- d = filter b by age < 20;
- e = join c by instate, d by instate parallel 5;
- store e into ':OUTPATH:';\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
- b = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
- c = filter a by age < 20;
- d = filter b by age < 20;
- e = join c by instate, d by instate parallel 5;
- store e into ':OUTPATH:';\,
- }
- ]
- },
- {
- 'name' => 'Foreach',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate *;
- store b into ':OUTPATH:';\,
- },
- {
- 'num' => 2,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- b = foreach a generate *;
- store b into ':OUTPATH:';\,
- },
- {
- 'num' => 3,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate name, age;
- store b into ':OUTPATH:';\,
- },
- {
- 'num' => 4,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- b = foreach a generate $0, $2;
- store b into ':OUTPATH:';\,
- },
- {
- # test filter, projection, sort , duplicate elimination
- 'num' => 5,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by age < 20;
- c = group b by age;
- d = foreach c {
- cf = filter b by gpa < 3.0;
- cp = cf.gpa;
- cd = distinct cp;
- co = order cd by $0;
- generate group, flatten(co);
- }
- store d into ':OUTPATH:';\,
- },
- {
- # test flatten for map and scalar
- 'num' => 6,
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate flatten(name) as n, flatten(org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, gpa)) as m;
- store b into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
- },
- {
- # test flatten for UDF that returns bag with multiple tuples with multiple columns
- 'num' => 7,
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate name, flatten(org.apache.pig.test.udf.evalfunc.CreateTupleBag(age, gpa)) as foo;
- store b into ':OUTPATH:';\,
- },
- {
- 'num' => 8,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age: int, gpa);
- c = group a by name;
- d = foreach c generate flatten(group), MAX(a.age) + MIN(a.age);
- store d into ':OUTPATH:';\,
- },
- {
- # test filter, projection, sort , duplicate elimination
- 'num' => 9,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by age < 20;
- c = group b by age;
- d = foreach c {
- cf = filter b by gpa >= 3.0 and gpa <= 3.5;
- cp = cf.gpa;
- cd = distinct cp;
- co = order cd by $0;
- generate group, flatten(co);
- }
- store d into ':OUTPATH:';\,
- },
- {
- # test filter, projection, sort , duplicate elimination
- 'num' => 10,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by age < 20;
- c = group b by age;
- d = foreach c {
- cf = filter b by (gpa == 4.0 or gpa != 2.0) and name > 'a';
- cp = cf.gpa;
- cd = distinct cp;
- co = order cd by $0;
- generate group, flatten(co);
- }
- store d into ':OUTPATH:';\,
- },
- {
- # test filter, projection, sort , duplicate elimination
- 'num' => 11,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by age < 20;
- c = foreach b {
- exp1 = age + gpa;
- exp2 = exp1 + age;
- generate exp1, exp2;
- }
- store c into ':OUTPATH:';\,
- },
- {
- # test a udf with no args
- 'num' => 12,
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
- store b into ':OUTPATH:';\,
- },
- {
- 'num' => 13,
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
- b = foreach a generate *;
- store b into ':OUTPATH:';\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
- b = foreach a generate *;
- store b into ':OUTPATH:';\,
- }
- ]
- },
- {
- 'name' => 'Order',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate name;
- c = order b by name;
- store c into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '+0', '-1'],
- },
- {
- 'num' => 2,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- b = foreach a generate $1;
- c = order b by $0;
- store c into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '+0', '-1'],
- },
- {
- 'num' => 3,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate gpa;
- c = order b by gpa;
- store c into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '+0', '-1'],
- },
- {
- 'num' => 4,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- b = order a by *;
- store b into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' '],
- },
- {
- 'num' => 5,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate name, age;
- c = order b by name, age;
- store c into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '+0', '-2'],
- },
- {
- 'num' => 6,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- c = order a by $0;
- store c into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '+0', '-1'],
- },
- {
- 'num' => 7,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- c = order a by $1;
- store c into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '+1', '-2'],
- },
- {
- 'num' => 8,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- c = order a by $0, $1;
- store c into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '+0', '-2'],
- },
- {
- 'num' => 9,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- c = order a by $1, $0;
- store c into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '+1', '-2', '+0', '-1'],
- },
- {
- 'num' => 10,
- 'ignore' => 'order by UDF is not supported',
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/studenttab10k';
- c = order a by * using org.apache.pig.test.udf.orderby.OrdDesc;
- store c into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '-r'],
- },
- {
- 'num' => 11,
- 'ignore' => 'order by UDF is not supported',
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/studenttab10k';
- c = order a by $0 using org.apache.pig.test.udf.orderby.OrdDesc;
- store c into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '-r', '+0', '-1'],
- },
- {
- 'num' => 12,
- 'ignore' => 'order by UDF is not supported',
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/studenttab10k';
- c = order a by $0, $1 using org.apache.pig.test.udf.orderby.OrdDesc;
- store c into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '-r', '+0', '-2'],
- },
- # ALERT All these tests with inner order bys aren't testing the inner
- # ordering. We need to develop a sorting tool to do that.
- {
- 'num' => 13,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- b = group a by $0;
- c = foreach b {c1 = order $1 by $1; generate flatten(c1); };
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 14,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- b = group a by $0;
- c = foreach b {c1 = order $1 by *; generate flatten(c1); };
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 15,
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/studenttab10k';
- b = group a by $0;
- c = foreach b {c1 = order $1 by * using org.apache.pig.test.udf.orderby.OrdDesc; generate flatten(c1); };
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 16,
- 'pig' => q\register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/studenttab10k';
- b = group a by $0;
- c = foreach b {c1 = order $1 by $1 using org.apache.pig.test.udf.orderby.OrdDesc; generate flatten(c1);};
- store c into ':OUTPATH:';\,
- },
- {
- 'num' => 17,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- b = group a by $0;
- c = foreach b {c1 = order $1 by $1; generate flatten(c1), MAX($1.$1); };
- store c into ':OUTPATH:';\,
- },
- {
- # test to make sure the weighted range patitioning
- # works correctly when a sort key value repeats across
- # reduce partitions
- 'num' => 18,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
- b = order a by $1 parallel 100;
- store b into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '+1', '-2'],
- },
- {
- 'num' => 19,
- 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
- b = foreach a generate instate;
- c = order b by instate;
- store c into ':OUTPATH:';\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
- b = foreach a generate instate;
- c = order b by instate;
- store c into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' ', '+0', '-1'],
- },
- ]
- },
- {
- 'name' => 'Distinct',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate name;
- c = distinct b;
- store c into ':OUTPATH:';\,
- },
- {
- 'nu…
Large files files are truncated, but you can click here to view the full file