/test/e2e/pig/tests/macro.conf
Unknown | 614 lines | 560 code | 54 blank | 0 comment | 0 complexity | d5906a9e5c81aa6fd0a84b227ad62fcb MD5 | raw file
Possible License(s): Apache-2.0, CPL-1.0
- # Licensed to the Apache Software Foundation (ASF) under one
- # or more contributor license agreements. See the NOTICE file
- # distributed with this work for additional information
- # regarding copyright ownership. The ASF licenses this file
- # to you under the Apache License, Version 2.0 (the
- # "License"); you may not use this file except in compliance
- # with the License. You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing,
- # software distributed under the License is distributed on an
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- # KIND, either express or implied. See the License for the
- # specific language governing permissions and limitations
- # under the License.
- ###############################################################################
- # Nightly tests for pig.
- #
- # Author: Alan F. Gates (gates@)
- # $Header:$
- #
- #use Yahoo::Miners::Test::PigSetup;
- #PigSetup::setup();
- #my $me = `whoami`;
- #chomp $me;
- $cfg = {
- 'driver' => 'Pig',
- 'groups' => [
- {
- 'name' => 'Macro_DefinitionAndInline',
- 'tests' => [
- {
- # simple macro, no args
- 'num' => 1,
- 'pig' => q#define simple_macro() returns void {
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate age, name;
- store b into ':OUTPATH:';
- }
- simple_macro();#,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate age, name;
- store b into ':OUTPATH:';#,
- },{
- # input args, no return
- 'num' => 2,
- 'pig' => q#define simple_macro(loadfile) returns void {
- a = load '$loadfile' as (name, age, gpa);
- b = foreach a generate age, name;
- store b into ':OUTPATH:';
- }
- simple_macro(':INPATH:/singlefile/studenttab10k');#,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate age, name;
- store b into ':OUTPATH:';#,
- },{
- # input args, return value
- 'num' => 3,
- 'pig' => q#define simple_macro(loadfile) returns b {
- a = load '$loadfile' as (name, age, gpa);
- $b = foreach a generate age, name;
- }
- x = simple_macro(':INPATH:/singlefile/studenttab10k');
- store x into ':OUTPATH:';#,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = foreach a generate age, name;
- store b into ':OUTPATH:';#,
- },
- {
- # input args, filter on double and int, return value
- 'num' => 4,
- 'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
- b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- store x into ':OUTPATH:';#,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by gpa >= 3.0 and age <= 40;
- c = foreach b generate age, name;
- store c into ':OUTPATH:';#,
- },
- {
- #Definition multiple input, no output, multiple returns value
- #x = with multiple input, no output, multiple returns value
- #Query based on FilterEq from nightly.conf
- 'num' => 5,
- 'pig' => q\define test (in1, in2) returns r1, r2 {
- a = load '$in1' using PigStorage() as (name, age, gpa);
- $r1 = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
- b = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
- $r2 = filter b by name matches 'f.ed' and (chararray)registration matches 'd.m';
- }
- x1, x2 = test(':INPATH:/singlefile/studenttab10k', ':INPATH:/singlefile/votertab10k');
- store x1 into ':OUTPATH:.1' using PigStorage;
- store x2 into ':OUTPATH:.2' using PigStorage;\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- a1 = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
- store a1 into ':OUTPATH:.1' using PigStorage;
- b = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
- b2 = filter b by name matches 'f.ed' and (chararray)registration matches 'd.m';
- store b2 into ':OUTPATH:.2' using PigStorage;\,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- },
- {
- # use positional parameters inside macro
- 'num' => 6,
- 'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
- b = filter $in_relation by $2 >= $min_gpa and $1 <= $max_age;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- store x into ':OUTPATH:';#,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by gpa >= 3.0 and age <= 40;
- c = foreach b generate age, name;
- store c into ':OUTPATH:';#,
- },
- {
- # Test nested macros
- 'num' => 7,
- 'pig' => q\define sum_it(in, relation, scol) returns d {
- $d = foreach $in generate group, SUM($relation.$scol);
- }
- define group_it(in_relation, group_key, sum_col) returns c {
- b = group $in_relation by $group_key ;
- $c = sum_it(b, $in_relation, $sum_col);
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = group_it(a, 'name', 'age');
- store x into ':OUTPATH:';\,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by name;
- c = foreach b generate group, SUM(a.age);
- store c into ':OUTPATH:';#,
- },
- {
- # single macro definition invoked multiple times
- 'num' => 8,
- 'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
- b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- store x into ':OUTPATH:.1';
- z = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- y = simple_macro(a, '2.0', '50');
- store y into ':OUTPATH:.2';#,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by gpa >= 3.0 and age <= 40;
- c = foreach b generate age, name;
- store c into ':OUTPATH:.1';
- d = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- e = filter d by gpa >= 2.0 and age <= 50;
- f = foreach e generate age, name;
- store f into ':OUTPATH:.2';#,
- },
- {
- # macro arg used as function arg
- 'num' => 9,
- 'pig' => q#define simple_macro(loadfile, sep) returns b {
- a = load '$loadfile' using PigStorage('$sep') as (name, age, gpa);
- $b = foreach a generate age, name;
- }
- x = simple_macro(':INPATH:/singlefile/studentcolon10k', ':');
- store x into ':OUTPATH:';#,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
- b = foreach a generate age, name;
- store b into ':OUTPATH:';#,
- },
- {
- # Multiple returns via split in the data flow
- 'num' => 10,
- 'pig' => q\define test (in1) returns r1, r2 {
- a = load '$in1' using PigStorage() as (name, age, gpa);
- $r1 = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
- $r2 = filter a by name > 'fred';
- }
- x1, x2 = test(':INPATH:/singlefile/studenttab10k');
- store x1 into ':OUTPATH:.1' using PigStorage;
- store x2 into ':OUTPATH:.2' using PigStorage;\,
- 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- a1 = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
- store a1 into ':OUTPATH:.1' using PigStorage;
- a2 = filter a by name > 'fred';
- store a2 into ':OUTPATH:.2' using PigStorage;\,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- },
- {
- # parameter substitution at the top level
- 'num' => 11,
- 'pig_params' => ['-p', qq(loadfile='singlefile/studenttab10k')],
- 'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
- b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/$loadfile' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- store x into ':OUTPATH:';#,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by gpa >= 3.0 and age <= 40;
- c = foreach b generate age, name;
- store c into ':OUTPATH:';#,
- }
- ]
- },
- {
- 'name' => 'Macro_Scope',
- 'tests' => [
- {
- # re-use of variable in macro and global scope
- 'num' => 1,
- 'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns b {
- $b = filter $in_relation by $2 >= $min_gpa and $1 <= $max_age;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- b = foreach x generate age, name;
- store b into ':OUTPATH:';#,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by gpa >= 3.0 and age <= 40;
- c = foreach b generate age, name;
- store c into ':OUTPATH:';#,
- },
- {
- #Definition where there is a name collision between parameters in parent/child macro
- 'num' => 2,
- 'pig' => q\define sum_it(in_relation, relation, sum_col) returns c {
- b = foreach $in_relation generate group, SUM($relation.$sum_col);
- $c = order b by $1;
- }
- define group_it(in_relation, group_key, sum_col) returns c {
- b = group $in_relation by $group_key ;
- $c = sum_it(b, $in_relation, $sum_col);
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = group_it(a, name, age);
- store x into ':OUTPATH:';\,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = group a by name;
- c = foreach b generate group, SUM(a.age);
- store c into ':OUTPATH:';#,
- },
- {
- #Definition where there is a name collision between macro and returns value in main pig script
- 'num' => 3,
- 'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
- b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- c = simple_macro(a, '3.0', '40');
- store c into ':OUTPATH:';#,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by gpa >= 3.0 and age <= 40;
- c = foreach b generate age, name;
- store c into ':OUTPATH:';#,
- }
- ]
- },
- {
- 'name' => 'Macro_Schema',
- 'tests' => [
- {
- # macro that does not change the schema
- 'num' => 1,
- 'pig' => q\define test(in) returns a {
- $a = filter $in by age > 30;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
- b = test(a);
- describe b;\,
- 'rc' => 0,
- 'expected_out_regex'=> "b: {name: chararray,age: int,gpa: double}"
- },
- {
- # macro that does change the schema
- 'num' => 2,
- 'pig' => q\define test(in) returns a {
- $a = foreach $in generate name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
- b = test(a);
- describe b;\,
- 'rc' => 0,
- 'expected_out_regex'=> "b: {name: chararray}"
- }
- ]
- },
- {
- 'name' => 'Macro_Misc',
- 'tests' => [
- {
- #Comments in macro
- 'num' => 1,
- 'pig' => q#define simple_macro(in_relation, min_gpa, max_age) returns c {
- b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
- $c = foreach b generate age, name;
- -- add a comment
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- store x into ':OUTPATH:';#,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by gpa >= 3.0 and age <= 40;
- c = foreach b generate age, name;
- store c into ':OUTPATH:';#,
- },
- {
- #register
- 'num' => 2,
- 'pig' => q\define test (in) returns b {
- $b = foreach $in generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
- }
- register :FUNCPATH:/testudf.jar;
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = test(a);
- store x into ':OUTPATH:';\,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- register :FUNCPATH:/testudf.jar;
- b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
- store b into ':OUTPATH:';#,
- },
- {
- #define for streaming combines with define for macros
- 'num' => 3,
- ,'pig' => q#define CMD `perl -ne 'print $_;'`;
- define test(in) returns B {
- $B = stream $in through CMD as (name, age, gpa);
- }
- A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = test(A);
- store x into ':OUTPATH:';#,
- 'verify_pig_script' => q#A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- define CMD `perl -ne 'print $_;'`;
- B = stream A through CMD as (name, age, gpa);
- store B into ':OUTPATH:';#,
- 'floatpostprocess' => 1,
- 'delimiter' => ' '
- },
- {
- #JIRA: PIG-2681
- 'num' => 4,
- ,'pig' => q\
- define test (in,out) returns b {
- a = load '$in' as (name, age, gpa);
- $b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
- store $b into '$out';
- }
- register :FUNCPATH:/testudf.jar;
- x = test(':INPATH:/singlefile/studenttab10k',':OUTPATH:');
- \,
- }
- ]
- },
- {
- 'name' => 'Macro_Import',
- 'tests' => [
- {
- 'num' => 1,
- 'pig' => q#import ':SCRIPTHOMEPATH:/macro1.pig';
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- store x into ':OUTPATH:';#,
- 'verify_pig_script' => q#a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- b = filter a by gpa >= 3.0 and age <= 40;
- c = foreach b generate age, name;
- store c into ':OUTPATH:';#,
- }
- ]
- },
- {
- 'name' => 'Macro_Error',
- 'tests' => [
- {
- # parameter names repeated
- 'num' => 1,
- 'ignore' => 'https://issues.apache.org/jira/browse/PIG-2247',
- 'pig' => q#define simple_macro(in_relation, min_gpa, min_gpa) returns c {
- b = filter $in_relation by gpa >= $min_gpa;
- $c = foreach b generate age, name;
- }
-
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- store x into ':OUTPATH:';#,
- 'expected_err_regex' => "Multiple arguments min_gpa found"
- },
- {
- # undefined parameter in macro
- 'num' => 2,
- 'pig' => q#define simple_macro(in_relation, min_gpa) returns c {
- b = filter $in_relation by gpa >= $min_gpa and age <= $max_age;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0');
- store x into ':OUTPATH:';#,
- 'expected_err_regex' => "Macro inline failed for macro 'simple_macro'. Reason: Undefined parameter : max_age"
- },
- {
- # name collision between arg and return value
- 'num' => 3,
- 'pig' => q#define simple_macro(in_relation, min_gpa, c) returns c {
- b = filter $in_relation by gpa >= $min_gpa and age <= $c;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- store x into ':OUTPATH:';#,
- 'expected_err_regex' => "Multiple values found for c"
- },
- {
- # keyword as macro name
- 'num' => 4,
- 'pig' => q#define foreach(in_relation, min_gpa) returns c {
- b = filter $in_relation by gpa >= $min_gpa;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- store x into ':OUTPATH:';#,
- 'expected_err_regex' => "mismatched input 'foreach' expecting IDENTIFIER"
- },
- {
- # UDF as macro name
- 'num' => 5,
- 'ignore' => 'https://issues.apache.org/jira/browse/PIG-2248',
- 'pig' => q#define COUNT(in_relation, min_gpa) returns c {
- b = filter $in_relation by gpa >= $min_gpa;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = COUNT(a, '3.0');
- store x into ':OUTPATH:';#,
- 'expected_err_regex' => "macro name hides UDF COUNT"
- },
- {
- # redefine a macro
- 'num' => 6,
- 'pig' => q#define simple_macro(in_relation, min_gpa) returns c {
- b = filter $in_relation by gpa >= $min_gpa;
- $c = foreach b generate age, name;
- }
- define simple_macro(in, min_age) returns d {
- b = filter $in by age >= $min_age;
- $d = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- store x into ':OUTPATH:';#,
- 'expected_err_regex' => "Duplicated macro name 'simple_macro'"
- },
- {
- # invoke non-existent macro
- 'num' => 7,
- 'pig' => q#
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = nosuch_macro('a', '3.0', '40');
- store x into ':OUTPATH:';#,
- 'expected_err_regex' => "Cannot expand macro 'nosuch_macro'. Reason: Macro must be defined before expansion."
- },
- {
- # Specifies two returns, but only actually returns one
- 'num' => 8,
- 'pig' => q#define simple(in_relation, min_gpa) returns c,d {
- b = filter $in_relation by gpa >= $min_gpa;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x, y = simple(a, '3.0');
- store x into ':OUTPATH:.1';
- store y into ':OUTPATH:.2';#,
- 'expected_err_regex' => "Invalid macro definition: . Reason: Macro 'simple' missing return alias: d"
- },
- {
- # syntax error in a macro, check for correct line number
- 'num' => 9,
- 'pig' => q#define simple(in_relation, min_gpa) returns c {
- b = fiter $in_relation by gpa >= $min_gpa;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple(a, '3.0');
- store x into ':OUTPATH:';#,
- 'expected_err_regex' => "line 2"
- },
- {
- # too many args passed to macro
- 'num' => 10,
- 'pig' => q#define simple_macro(in_relation, min_gpa) returns c {
- b = filter $in_relation by gpa >= $min_gpa;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- store x into ':OUTPATH:';#,
- 'expected_err_regex' => "Failed to expand macro 'simple_macro'. Reason: Expected number of parameters: 2 actual number of inputs: 3"
- },
- {
- # return two values, but script only accepts 1
- 'num' => 11,
- 'pig' => q#define simple(in_relation, min_gpa) returns c,d {
- b = filter $in_relation by gpa >= $min_gpa;
- $c = foreach b generate age, name;
- $d = foreach b generate name, age;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple(a, '3.0');
- store x into ':OUTPATH:';#,
- 'expected_err_regex' => "Failed to expand macro 'simple'. Reason: Expected number of return aliases: 2 actual number of return values: 1"
- },
- {
- # return 1 value, but script expects 2
- 'num' => 12,
- 'pig' => q#define simple(in_relation, min_gpa) returns c {
- b = filter $in_relation by gpa >= $min_gpa;
- $c = foreach b generate age, name;
- }
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x, y = simple(a, '3.0');
- store x into ':OUTPATH:.1';
- store y into ':OUTPATH:.2';#,
- 'expected_err_regex' => "Failed to expand macro 'simple'. Reason: Expected number of return aliases: 1 actual number of return values: 2"
- }
- ]
- },
- {
- 'name' => 'Macro_Import_Error',
- 'tests' => [
- {
- # import non-existent file
- 'num' => 1,
- 'ignore' => 1, # different error message for different version of hadoop
- 'pig' => q#import 'nosuchfile';
-
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro('a', '3.0', '40');
- store x into ':OUTPATH:';#,
- 'expected_err_regex' => "Failed to import file 'nosuchfile'. Reason: Can't find the Specified file nosuchfile"
- },
- {
- # import a macro with a syntax error
- 'num' => 2,
- 'pig' => q#import ':SCRIPTHOMEPATH:/macro_bad1.pig';
- a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
- x = simple_macro(a, '3.0', '40');
- store x into ':OUTPATH:';#,
- 'expected_err_regex' => "Invalid macro definition"
- }
- ]
- }
- ],
- },
- ;
- # import non-existent file, import script with error