PageRenderTime 177ms CodeModel.GetById 6ms app.highlight 154ms RepoModel.GetById 2ms app.codeStats 1ms

/test/e2e/pig/tests/nightly.conf

https://github.com/ftian/pig
Perl | 4983 lines | 4534 code | 111 blank | 338 comment | 102 complexity | baec46f614bd4cfd568cf24e6026f5d7 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1#!/usr/bin/env perl
   2
   3############################################################################           
   4#  Licensed to the Apache Software Foundation (ASF) under one or more                  
   5#  contributor license agreements.  See the NOTICE file distributed with               
   6#  this work for additional information regarding copyright ownership.                 
   7#  The ASF licenses this file to You under the Apache License, Version 2.0             
   8#  (the "License"); you may not use this file except in compliance with                
   9#  the License.  You may obtain a copy of the License at                               
  10#                                                                                      
  11#      http://www.apache.org/licenses/LICENSE-2.0                                      
  12#                                                                                      
  13#  Unless required by applicable law or agreed to in writing, software                 
  14#  distributed under the License is distributed on an "AS IS" BASIS,                   
  15#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.            
  16#  See the License for the specific language governing permissions and                 
  17#  limitations under the License.                                                      
  18                                                                                       
  19###############################################################################
  20# Nightly tests for pig.
  21#
  22#
  23
  24#PigSetup::setup();
  25
  26#my $me = `whoami`;
  27#chomp $me;
  28
  29$cfg = {
  30	'driver' => 'Pig',
  31	'nummachines' => 5,
  32	'verify_with_pig' => 1,
  33	'verify_pig_version' => 'old',
  34
  35	'groups' => [
  36		{
  37		'name' => 'Checkin',
  38		'tests' => [
  39			{
  40			'num' => 1,
  41			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  42store a into ':OUTPATH:';\,
  43			},
  44			{
  45			'num' => 2,
  46			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  47b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  48c = filter a by age < 50;
  49d = filter b by age < 50;
  50e = cogroup c by (name, age), d by (name, age) ;
  51f = foreach e generate flatten(c), flatten(d);
  52g = group f by registration;
  53h = foreach g generate group, SUM(f.d::contributions);
  54i = order h by $1;
  55store i into ':OUTPATH:';\,
  56                        'floatpostprocess' => 1,
  57                        'delimiter' => '	',
  58			'sortArgs' => ['-t', '	', '-k', '2,2'],
  59			}
  60		]
  61		},
  62		{
  63		'name' => 'LoaderDefaultDir',
  64		'tests' => [
  65			{
  66			'num' => 1,
  67			'pig' => q\a = load ':INPATH:/dir/studenttab10k' as (name, age, gpa);
  68store a into ':OUTPATH:';\,
  69			},
  70		]
  71		},
  72		{
  73		'name' => 'LoaderPigStorageArg',
  74		'tests' => [
  75			{
  76			'num' => 1,
  77			'pig' => q\a = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
  78store a into ':OUTPATH:';\,
  79			},
  80			{
  81			# load with control character
  82			'num' => 2,
  83			'pig' => q#a = load ':INPATH:/singlefile/studentctrla10k' using PigStorage('\\u0001') as (name, age, gpa);
  84store a into ':OUTPATH:';#,
  85			},
  86			{
  87	        	# load and store with control character
  88			'num' => 3,
  89			'pig' => q#a = load ':INPATH:/singlefile/studentctrla10k' using PigStorage('\\u0001') as (name, age, gpa);
  90store a into ':OUTPATH:.intermediate' using PigStorage('\\u0001');
  91b = load ':OUTPATH:.intermediate' using PigStorage('\\u0001') as (name, age, gpa); 
  92store b into ':OUTPATH:'; #,
  93            'notmq' => 1,
  94			},
  95		]
  96		},
  97		{
  98		# Results doctored, if you change this query you need to copy the
  99		# expected results into test/nightly/benchmarks
 100		'name' => 'LoaderBinStorage',
 101		'tests' => [
 102			{
 103			'num' => 1,
 104			'pig' => q\register :FUNCPATH:/testudf.jar;
 105a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 106b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Swap(name, age), TOKENIZE((chararray)name), org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age); 
 107store b into ':OUTPATH:.intermediate' using BinStorage();
 108c = load ':OUTPATH:.intermediate' using BinStorage();
 109store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
 110            'notmq' => 1,
 111			},
 112		]
 113		},
 114		{
 115		# Results doctored, if you change this query you need to copy the
 116		# expected results into test/nightly/benchmarks
 117		'name' => 'LoaderTextLoader',
 118		'tests' => [
 119			{
 120			'num' => 1,
 121			'pig' => q\register :FUNCPATH:/testudf.jar;
 122a = load ':INPATH:/singlefile/textdoc' using TextLoader();
 123b = foreach a generate TOKENIZE((chararray)$0);
 124store b into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
 125			},
 126		]
 127		},
 128		{
 129		'name' => 'FilterBoolean',
 130		'tests' => [
 131			{
 132			'num' => 1,
 133			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 134b = filter a by name == 'fred allen' and age > 50;
 135store b into ':OUTPATH:' using PigStorage;\,
 136			},
 137			{
 138			'num' => 2,
 139			'pig' => q\a = load ':INPATH:/dir/studenttab10k' using PigStorage() as (name, age, gpa);
 140b = filter a by name != 'fred allen' or age < 10;
 141store b into ':OUTPATH:' using PigStorage;\,
 142			},
 143			{
 144			'num' => 3,
 145			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 146b = filter a by not (age == 50);
 147store b into ':OUTPATH:' using PigStorage;\,
 148			},
 149			{
 150			'num' => 4,
 151			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 152b = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
 153store b into ':OUTPATH:' using PigStorage;\,
 154			},
 155			{
 156			'num' => 5,
 157			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 158b = filter a by age >= 50 or name > 'fred' and gpa <= 3.0 or name >= 'bob';
 159store b into ':OUTPATH:' using PigStorage;\,
 160			},
 161            # test filter <= and >= for chararray, int and double
 162			{
 163			'num' => 6,
 164			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
 165b = filter a by age >= 40 and age <=50 and gpa >= 2.0 and gpa <= 3.0 and  name >= 'bob' and name <= 'fred';
 166store b into ':OUTPATH:' using PigStorage;\,
 167			},
 168            # test filter <= and >= for bytearray, long and float
 169			{
 170			'num' => 7,
 171			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
 172b = filter a by age >= 40 and age <=50 and gpa >= 2.0f and gpa <= 3.0f and  name >= 'bob' and name <= 'fred';
 173store b into ':OUTPATH:' using PigStorage;\,
 174			},
 175            # test filter < and > for chararray, int and double
 176			{
 177			'num' => 8,
 178			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
 179b = filter a by age > 40 and age <50 and gpa > 2.0 and gpa < 3.0 and  name > 'bob' and name < 'fred';
 180store b into ':OUTPATH:' using PigStorage;\,
 181			},
 182            # test filter < and > for bytearray, long and float
 183			{
 184			'num' => 9,
 185			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
 186b = filter a by age > 40 and age <50 and gpa > 2.0f and gpa < 3.0f and  name > 'bob' and name < 'fred';
 187store b into ':OUTPATH:' using PigStorage;\,
 188			},
 189            # test filter <= and >= for explicit cast for chararray, int and double
 190			{
 191			'num' => 10,
 192			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 193b = filter a by (int)age >= 40 and (int)age <=50 and (double)gpa >= 2.0 and (double)gpa <= 3.0 and  (chararray)name >= 'bob' and (chararray)name <= 'fred';
 194store b into ':OUTPATH:' using PigStorage;\,
 195			},
 196            # test filter <= and >= for explicit cast for bytearray, long and float
 197			{
 198			'num' => 11,
 199			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 200b = filter a by (long)age >= 40 and (long)age <=50 and (float)gpa >= 2.0f and (float)gpa <= 3.0f and  name >= 'bob' and name <= 'fred';
 201store b into ':OUTPATH:' using PigStorage;\,
 202			},
 203            # test filter < and > for explicit cast for chararray, int and double
 204			{
 205			'num' => 12,
 206			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 207b = filter a by (int)age > 40 and (int)age <50 and (double)gpa > 2.0 and (double)gpa < 3.0 and  (chararray)name > 'bob' and (chararray)name < 'fred';
 208store b into ':OUTPATH:' using PigStorage;\,
 209			},
 210            # test filter < and > for explicit cast for bytearray, long and float
 211			{
 212			'num' => 13,
 213			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 214b = filter a by (long)age > 40 and (long)age <50 and (float)gpa > 2.0f and (float)gpa < 3.0f and  name > 'bob' and name < 'fred';
 215store b into ':OUTPATH:' using PigStorage;\,
 216			},
 217            # test AND with nulls
 218			{
 219			'num' => 14,
 220			'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
 221b = filter a by name == 'fred allen' and age > 50;
 222store b into ':OUTPATH:' using PigStorage;\,
 223			},
 224            # test OR with nulls
 225			{
 226			'num' => 15,
 227			'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
 228b = filter a by name != 'fred allen' or age < 10;
 229store b into ':OUTPATH:' using PigStorage;\,
 230			},
 231            # test with nulls filter <= and >= for chararray, int and double
 232			{
 233			'num' => 16,
 234			'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
 235b = filter a by age >= 40 and age <=50 and gpa >= 2.0 and gpa <= 3.0 and  name >= 'bob' and name <= 'fred';
 236store b into ':OUTPATH:' using PigStorage;\,
 237			},
 238            # test with nulls filter < and > for explicit cast for chararray, int and double
 239			{
 240			'num' => 17,
 241			'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
 242b = filter a by (int)age > 40 and (int)age <50 and (double)gpa > 2.0 and (double)gpa < 3.0 and  (chararray)name > 'bob' and (chararray)name < 'fred';
 243store b into ':OUTPATH:' using PigStorage;\,
 244			},
 245                        {
 246                        'num' => 18,
 247                        'ignore' => 1, # PIG-2593 this case is not supported as instate need to be declared as boolean
 248                        'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
 249b = filter a by instate;
 250store b into ':OUTPATH:' using PigStorage;\,
 251                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
 252b = filter a by instate == 'true';
 253store b into ':OUTPATH:' using PigStorage;\,
 254                        },
 255                        {
 256                        'num' => 19,
 257                        'ignore' => 1, # PIG-2593 this case is not supported as instate need to be declared as boolean
 258			'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
 259b = filter a by not instate;
 260store b into ':OUTPATH:' using PigStorage;\,
 261                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
 262b = filter a by instate == 'false';
 263store b into ':OUTPATH:' using PigStorage;\,
 264                        },
 265                        {
 266                        'num' => 20,
 267			'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
 268b = filter a by instate is null;
 269store b into ':OUTPATH:' using PigStorage;\,
 270                        },
 271                        {
 272                        'num' => 21,
 273			'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
 274b = filter a by instate == true;
 275store b into ':OUTPATH:' using PigStorage;\,
 276                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
 277b = filter a by instate == 'true';
 278store b into ':OUTPATH:' using PigStorage;\,
 279                        },
 280                        {
 281                        'num' => 22,
 282			'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
 283b = filter a by instate == false;
 284store b into ':OUTPATH:' using PigStorage;\,
 285                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
 286b = filter a by instate == 'false';
 287store b into ':OUTPATH:' using PigStorage;\,
 288                        },
 289                        {
 290                        'num' => 23,
 291			'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
 292b = filter a by instate;
 293store b into ':OUTPATH:' using PigStorage;\,
 294                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
 295b = filter a by instate == 'true';
 296store b into ':OUTPATH:' using PigStorage;\,
 297                        },
 298                        {
 299                        'num' => 24,
 300			'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
 301b = filter a by not instate;
 302store b into ':OUTPATH:' using PigStorage;\,
 303                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
 304b = filter a by instate == 'false';
 305store b into ':OUTPATH:' using PigStorage;\,
 306                        },
 307                        {
 308                        'num' => 25,
 309			'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
 310b = filter a by instate is null;
 311store b into ':OUTPATH:' using PigStorage;\,
 312                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
 313b = filter a by instate is null;
 314store b into ':OUTPATH:' using PigStorage;\,
 315                        },
 316                        {
 317                        'num' => 26,
 318			'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
 319b = filter a by instate == true;
 320store b into ':OUTPATH:' using PigStorage;\,
 321                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
 322b = filter a by instate == 'true';
 323store b into ':OUTPATH:' using PigStorage;\,
 324                        },
 325                        {
 326                        'num' => 27,
 327			'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
 328b = filter a by instate == false;
 329store b into ':OUTPATH:' using PigStorage;\,
 330                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
 331b = filter a by instate == 'false';
 332store b into ':OUTPATH:' using PigStorage;\,
 333                        },
 334		    ],
 335		},
 336		{
 337		'name' => 'FilterEq',
 338		'tests' => [
 339			{
 340			'num' => 1,
 341			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 342b = filter a by name == 'alice johnson' and age == 64 and gpa == 3.99;
 343store b into ':OUTPATH:' using PigStorage;\,
 344			},
 345			{
 346			'num' => 2,
 347			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 348b = filter a by name > 'fred allen' and age > 40 and gpa > 2.50;
 349store b into ':OUTPATH:' using PigStorage;\,
 350			},
 351			{
 352			'num' => 3,
 353			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 354b = filter a by name >= 'fred allen' and age >= 40 and gpa >= 2.50;
 355store b into ':OUTPATH:' using PigStorage;\,
 356			},
 357			{
 358			'num' => 4,
 359			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 360b = filter a by name lt 'fred allen' and age < 40 and gpa < 2.50;
 361store b into ':OUTPATH:' using PigStorage;\,
 362			},
 363			{
 364			'num' => 5,
 365			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 366b = filter a by name lte 'fred allen' and age <= 40 and gpa <= 2.50;
 367store b into ':OUTPATH:' using PigStorage;\,
 368			},
 369			{
 370			'num' => 6,
 371			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage();
 372b = filter a by $0 neq 'fred allen' and $1 != '40' and $2 != '2.50';
 373store b into ':OUTPATH:' using PigStorage;\,
 374			},
 375            # test for filter == for chararray, int and double
 376			{
 377			'num' => 7,
 378			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
 379b = filter a by  name == 'fred allen' and age == 61 and gpa == 1.42;
 380store b into ':OUTPATH:' using PigStorage;\,
 381			},
 382            # test for filter == for bytearray, long and float
 383			{
 384			'num' => 8,
 385			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
 386b = filter a by  name == 'fred allen' and age == 61 and gpa == 1.42f;
 387store b into ':OUTPATH:' using PigStorage;\,
 388			},
 389            # test for filter != for chararray, int and double
 390			{
 391			'num' => 9,
 392			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
 393b = filter a by $0 != 'fred allen' and $1 != 40 and $2 != 2.50;
 394store b into ':OUTPATH:' using PigStorage;\,
 395			},
 396            # test for filter != for bytearray, long and float
 397			{
 398			'num' => 10,
 399			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
 400b = filter a by $0 != 'fred allen' and $1 != 40 and $2 != 2.50f;
 401store b into ':OUTPATH:' using PigStorage;\,
 402			},
 403            # test for filter == for explicit casts to chararray, int and double
 404			{
 405			'num' => 11,
 406			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 407b = filter a by  (chararray)name == 'fred allen' and (int)age == 61 and (double)gpa == 1.42;
 408store b into ':OUTPATH:' using PigStorage;\,
 409			},
 410            # test for filter == for explicit casts to bytearray, long and float
 411			{
 412			'num' => 12,
 413			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 414b = filter a by  name == 'fred allen' and (long)age == 61 and (float)gpa == 1.42f;
 415store b into ':OUTPATH:' using PigStorage;\,
 416			},
 417            # test for filter != for explicit casts to chararray, int and double
 418			{
 419			'num' => 13,
 420			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() ;
 421b = filter a by (chararray)$0 != 'fred allen' and (int)$1 != 40 and (double)$2 != 2.50;
 422store b into ':OUTPATH:' using PigStorage;\,
 423			},
 424            # test for filter != for explicit casts to bytearray, long and float
 425			{
 426			'num' => 14,
 427			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() ;
 428b = filter a by $0 != 'fred allen' and (long)$1 != 40 and (float)$2 != 2.50f;
 429store b into ':OUTPATH:' using PigStorage;\,
 430			},
 431		]
 432		},
 433		{
 434		'name' => 'FilterMatches',
 435		'tests' => [
 436			{
 437			'num' => 1,
 438			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 439b = filter a by name matches '^fred.*';
 440store b into ':OUTPATH:' using PigStorage;\,
 441			},
 442			{
 443			'num' => 2,
 444			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage();
 445b = filter a by not $0 matches '^fred.*';
 446store b into ':OUTPATH:' using PigStorage;\,
 447			},
 448			{
 449            # test for filter on matches for chararray (declared and explicit cast)
 450			'num' => 3,
 451			'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
 452b = filter a by name matches '^fred.*' and (chararray)registration matches '^dem.*';
 453store b into ':OUTPATH:' using PigStorage;\,
 454			},
 455			{
 456			'num' => 4,
 457			'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
 458b = filter a by name matches 'f.ed' and (chararray)registration matches 'd.m';
 459store b into ':OUTPATH:' using PigStorage;\,
 460			},
 461			{
 462			'num' => 5,
 463			'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
 464b = filter a by name matches 'f[^f]ed.*';
 465store b into ':OUTPATH:' using PigStorage;\,
 466			},
 467			{
 468			'num' => 6,
 469			'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches '.*\\\\wan.*';\nstore b into ':OUTPATH:' using PigStorage;",
 470			},
 471			{
 472			'num' => 7,
 473			'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches '^e.*\\\\sc.*';\nstore b into ':OUTPATH:' using PigStorage;",
 474			},
 475			{
 476			'num' => 8,
 477			'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches 'ethan white';\nstore b into ':OUTPATH:' using PigStorage;",
 478			},
 479			{
 480			'num' => 9,
 481			'pig' => "a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);\nb = filter a by gpa matches '\\\\d\\\\.45';\nstore b into ':OUTPATH:' using PigStorage;",
 482			},
 483		]
 484		},
 485		{
 486		'name' => 'FilterUdf',
 487		'tests' => [
 488			{
 489			'num' => 1,
 490			'pig' => q\
 491a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 492b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 493c = cogroup a by (name, age), b by (name, age);
 494d = filter c by not IsEmpty(a);
 495e = filter d by not IsEmpty(b);
 496f = foreach e generate flatten(a), flatten(b);
 497store f into ':OUTPATH:';\,
 498			},
 499			{
 500			    'num' => 2,
 501			    'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 502				           b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 503				           c = filter a by age < 50;
 504				           d = filter b by age < 50;
 505				           e = cogroup c by (name, age), d by (name, age);
 506				           f = filter e by COUNT(c)> 0 AND COUNT(d)>0;
 507				           store f into ':OUTPATH:';\,
 508			    'rc' => 0
 509			},
 510		]
 511		},
 512		# TODO Group that don't flatten via Agg functions
 513		{
 514		'name' => 'GroupAggFunc',
 515		'tests' => [
 516			{
 517			'num' => 1,
 518			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 519b = group a by name;
 520c = foreach b generate group, COUNT(a.age);
 521store c into ':OUTPATH:';\,
 522			},
 523			{
 524			'num' => 2,
 525			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
 526b = group a by $0;
 527c = foreach b generate group, COUNT(a.$1);
 528store c into ':OUTPATH:';\,
 529			},
 530			{
 531			'num' => 3,
 532			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 533b = group a by (name, age);
 534c = foreach b generate group.name, group.age, COUNT(a.gpa);
 535store c into ':OUTPATH:';\,
 536			},
 537			{
 538			'num' => 5,
 539			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 540b = group a all;
 541c = foreach b generate COUNT(a.$0);
 542store c into ':OUTPATH:';\,
 543			},
 544			{
 545			'num' => 6,
 546			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 547b = group a by name;
 548c = foreach b generate group, SUM(a.age);
 549store c into ':OUTPATH:';\,
 550			},
 551			{
 552			'num' => 7,
 553			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 554b = group a by name;
 555c = foreach b generate group, SUM(a.gpa);
 556store c into ':OUTPATH:';\,
 557                        'floatpostprocess' => 1,
 558                        'delimiter' => '	',
 559			},
 560			{
 561			'num' => 8,
 562			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 563b = group a by name;
 564c = foreach b generate group, AVG(a.age);
 565store c into ':OUTPATH:';\,
 566			},
 567			{
 568			'num' => 9,
 569                        'ignore23' => 'I cannot get it right due to float precision, temporarily disable',
 570			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 571b = group a by name;
 572c = foreach b generate group, AVG(a.gpa);
 573store c into ':OUTPATH:';\,
 574                        'floatpostprocess' => 1,
 575                        'delimiter' => '	',
 576			},
 577			{
 578			'num' => 10,
 579			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 580b = group a by name;
 581c = foreach b generate group, MIN(a.gpa);
 582store c into ':OUTPATH:';\,
 583			},
 584			{
 585			'num' => 11,
 586			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 587b = group a by name;
 588c = foreach b generate group, MAX(a.gpa);
 589store c into ':OUTPATH:';\,
 590			},
 591            {
 592			'num' => 12,
 593			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 594b = group a by (name, age);
 595c = foreach b generate flatten(group), SUM(a.gpa);
 596store c into ':OUTPATH:';\,
 597                        'floatpostprocess' => 1,
 598                        'delimiter' => '	',
 599			},
 600            {
 601			'num' => 13,
 602			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 603b = group a by (name);
 604c = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 605d = cogroup b by group, c by name;
 606e = foreach d generate flatten(group), SUM(c.gpa), COUNT(c.name);
 607store e into ':OUTPATH:';\,
 608                        'floatpostprocess' => 1,
 609                        'delimiter' => '	',
 610			},
 611            {
 612			'num' => 14,
 613			'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
 614b = group a by (name);
 615e = foreach b generate COUNT(a.name);
 616store e into ':OUTPATH:';\,
 617			'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
 618b = group a by (name);
 619e = foreach b generate COUNT(a.name);
 620store e into ':OUTPATH:';\,
 621			}
 622			],
 623		},
 624		{
 625        'name' => 'MapPartialAgg',
 626        'tests' => [
 627                    {
 628            'num' => 1,
 629            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 630b = group a by name;
 631c = foreach b generate group, COUNT(a.age);
 632store c into ':OUTPATH:';\,
 633             'java_params' => ['-Dpig.exec.mapPartAgg=true']
 634            },
 635            {
 636            #multiquery with group in one sub query
 637            'num' => 2,
 638            'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float); 
 639                        b = filter a by age < 22; store b into ':OUTPATH:.1'; 
 640                        c = group b by age; 
 641                        d = foreach c generate group, SUM(b.gpa);
 642                        store d into ':OUTPATH:.2'; #,
 643            'java_params' => ['-Dpig.exec.mapPartAgg=true']
 644            
 645            },
 646            {
 647             #multi query with two group on diff columns
 648            'num' => 3,
 649            'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float); 
 650                        g1 = group a by name;
 651                        f1 = foreach g1 generate group as name, MAX(a.gpa);
 652                        store f1 into ':OUTPATH:.1'; 
 653                        g2 = group a by age;
 654                        f2 = foreach g2 generate group as age, AVG(a.gpa);
 655                        store f2 into ':OUTPATH:.2'; #,
 656            'java_params' => ['-Dpig.exec.mapPartAgg=true']
 657            
 658            },
 659            {
 660             #multi query with three groups on diff columns, group key being an expression
 661            'num' => 4,
 662            'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float); 
 663                        g1 = group a by name;
 664                        f1 = foreach g1 generate group as name, MAX(a.gpa);
 665                        store f1 into ':OUTPATH:.1'; 
 666                        g2 = group a by age%10;
 667                        f2 = foreach g2 generate group as age_mod10, AVG(a.gpa);
 668                        store f2 into ':OUTPATH:.2'; 
 669                        g3 = group a by age;
 670                        f3 = foreach g3 generate group%10, AVG(a.gpa);
 671                        store f3 into ':OUTPATH:.3';                         
 672                        g4 = group a by gpa;
 673                        f4 = foreach g4 generate group as gpa, COUNT(a);
 674                        store f4 into ':OUTPATH:.4';                                                 
 675                        
 676                        #,
 677            'java_params' => ['-Dpig.exec.mapPartAgg=true']
 678            
 679            },
 680            {
 681            #aggregation gets more than one tuple for every tuple from load func 
 682            
 683            'num' => 5,
 684            'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float); 
 685                        b = foreach a generate name, age, gpa, flatten(TOBAG(age,age)) as x;
 686                        c = group b by age; 
 687                        d = foreach c generate group, AVG(b.gpa);
 688                        store d into ':OUTPATH:'; #,
 689            'java_params' => ['-Dpig.exec.mapPartAgg=true']
 690            
 691            },            
 692        
 693            ],
 694        },
 695		{
 696		'name' => 'EvalFunc',
 697		'tests' => [
 698			{
 699			'num' => 1,
 700			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 701b = filter a by name lt 'b';
 702c = foreach b generate ARITY(name, age, gpa);
 703store c into ':OUTPATH:';\,
 704			},
 705			{
 706			'num' => 2,
 707			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age, gpa);
 708b = filter a by name lt 'b';
 709c = foreach b generate TOKENIZE(name);
 710d = foreach c generate flatten($0);
 711store d into ':OUTPATH:';\,
 712			},
 713			{
 714			'num' => 3,
 715			'pig' => q\register :FUNCPATH:/testudf.jar;
 716a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 717b = filter a by name lt 'b';
 718c = foreach b generate org.apache.pig.test.udf.evalfunc.Swap(name, age);
 719store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
 720			},
 721			{
 722			'num' => 4,
 723			'pig' => q\register :FUNCPATH:/testudf.jar;
 724a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 725b = filter a by name lt 'b';
 726c = foreach b generate org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
 727store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
 728			},
 729                        {
 730                        'num' => 5,
 731			'pig' => q\register :FUNCPATH:/testudf.jar;
 732a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
 733b = foreach a generate org.apache.pig.test.udf.evalfunc.TestBoolean(instate);
 734store b into ':OUTPATH:';\,
 735                        'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
 736b = foreach a generate (instate is null ? '' : (instate == 'true' ? 'false' : 'true'));
 737store b into ':OUTPATH:';\,
 738                        }
 739
 740		]
 741		},
 742		# TODO DIFF
 743		# TODO User defined grouping function
 744		{
 745		'name' => 'CoGroupFlatten',
 746		'tests' => [
 747			{
 748			'num' => 1,
 749			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 750b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 751c = filter a by age < 20;
 752d = filter b by age < 20;
 753e = cogroup c by name, d by name;
 754f = foreach e generate flatten (c), flatten(d);
 755store f into ':OUTPATH:';\,
 756			},
 757			{
 758			'num' => 2,
 759			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 760b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 761c = filter a by $1 < 20;
 762d = filter b by $1 < 20;
 763e = cogroup c by $0, d by $0;
 764f = foreach e generate flatten (c), flatten(d);
 765store f into ':OUTPATH:';\,
 766			},
 767			{
 768			'num' => 3,
 769			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 770b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 771c = filter a by age < 20;
 772d = filter b by age < 20;
 773e = cogroup c by (name, age), d by (name, age);
 774f = foreach e generate flatten (c), flatten(d);
 775store f into ':OUTPATH:';\,
 776			},
 777			{
 778			'num' => 4,
 779			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 780b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 781d = filter b by age < 20;
 782e = cogroup a by (name, age) inner, d by (name, age);
 783f = foreach e generate flatten (a), flatten(d);
 784store f into ':OUTPATH:';\,
 785			},
 786			{
 787			'num' => 5,
 788			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 789b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 790c = filter a by age < 20;
 791e = cogroup c by (name, age), b by (name, age) inner;
 792f = foreach e generate flatten (c), flatten(b);
 793store f into ':OUTPATH:';\,
 794			},
 795			{
 796			'num' => 6,
 797			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 798b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 799e = cogroup a by (name, age) inner, b by (name, age) inner;
 800f = foreach e generate flatten (a), flatten(b);
 801store f into ':OUTPATH:';\,
 802			},
 803            {
 804            # Test cogrouping data loaded from two separate loaders.  We don't have any data that can join with studenttab that isn't also loaded with PigStorage, so the
 805            # first step is an intermediate load and store using BinStorage.
 806    		'num' => 7,
 807			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 808store a into ':OUTPATH:.intermediate' using BinStorage();
 809b = load ':OUTPATH:.intermediate' using BinStorage() as (name, age, gpa);
 810c = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 811e = cogroup b by (name, age) inner, c by (name, age) inner;
 812f = foreach e generate flatten (b), flatten(c);
 813store f into ':OUTPATH:';\,
 814            'notmq' => 1,
 815            },
 816            
 817		]
 818		},
 819		{
 820		'name' => 'CoGroup',
 821		'tests' => [
 822			{
 823			'num' => 1,
 824			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 825b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 826c = cogroup a by name, b by name;
 827d = foreach c generate flatten(group), COUNT(a) + COUNT(b);
 828store d into ':OUTPATH:';\,
 829			},
 830		]
 831		},
 832		{
 833		'name' => 'Join',
 834		'tests' => [
 835			{
 836			'num' => 1,
 837			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 838b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 839c = filter a by age < 20;
 840d = filter b by age < 20;
 841e = join c by name, d by name;
 842store e into ':OUTPATH:';\,
 843			},
 844			{
 845			'num' => 2,
 846			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 847b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 848c = filter a by age < 20;
 849d = filter b by age < 20;
 850e = join c by $0, d by $0;
 851store e into ':OUTPATH:';\,
 852			},
 853			{
 854			'num' => 3,
 855			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 856b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 857c = filter a by age < 20;
 858d = filter b by age < 20;
 859e = join c by (name, age), d by (name, age);
 860store e into ':OUTPATH:';\,
 861			},
 862            # self join with implict split
 863            # JIRA PIG-429
 864			{
 865			'num' => 4,
 866			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
 867b = filter a by $1 > 25;
 868c = join a by $0, b by $0;
 869store c into ':OUTPATH:';\,
 870			},
 871            # join with one input having schema and another without
 872            # JIRA PIG-428
 873			{
 874			'num' => 5,
 875			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray,age:int, gpa:double);
 876another = load ':INPATH:/singlefile/studenttab10k';
 877c = foreach another generate $0, $1+ 10, $2 + 10.0;
 878d = join a by $0, c by $0;
 879store d into ':OUTPATH:';\,
 880			},
 881            # self join using fragment replicate join
 882            # no types
 883			{
 884			'num' => 6,
 885			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 886b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 887c = join a by name, b by name using 'repl';
 888store c into ':OUTPATH:';\,
 889            'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 890b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 891c = join a by name, b by name ;
 892store c into ':OUTPATH:';\,
 893			},
 894            # self join using fragment replicate join
 895            # with types and no cast for join key
 896			{
 897			'num' => 7,
 898			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
 899b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
 900c = join a by name, b by name using 'repl';
 901store c into ':OUTPATH:';\,
 902            'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
 903b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
 904c = join a by name, b by name ;
 905store c into ':OUTPATH:';\,
 906
 907			},
 908            # self join using fragment replicate join
 909            # with types and cast for join key
 910			{
 911			'num' => 8,
 912			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
 913b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
 914c = join a by gpa, b by gpa using 'repl';
 915store c into ':OUTPATH:';\,
 916            'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
 917b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
 918c = join a by gpa, b by gpa ;
 919store c into ':OUTPATH:';\,
 920
 921			},
 922            # left outer join
 923			{
 924			'num' => 9,
 925			'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
 926b = load ':INPATH:/singlefile/voternulltab10k' as  (name:chararray, age:long, registration:chararray, contributions:double); 
 927c = join a by name left outer, b by name;
 928store c into ':OUTPATH:';\,
 929			},
 930            # right outer join
 931			{
 932			'num' => 10,
 933			'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
 934b = load ':INPATH:/singlefile/voternulltab10k' as  (name:chararray, age:long, registration:chararray, contributions:double); 
 935c = join a by name right outer, b by name;
 936store c into ':OUTPATH:';\,
 937			},
 938            # full outer join
 939			{
 940			'num' => 11,
 941			'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
 942b = load ':INPATH:/singlefile/voternulltab10k' as  (name:chararray, age:long, registration:chararray, contributions:double); 
 943c = join a by name full outer, b by name;
 944store c into ':OUTPATH:';\,
 945			},
 946            # see PIG-1209 join package now uses internalcachedBag, so every tuple on reduce side in this test will spilled to disk.
 947			{
 948			'num' => 12,
 949	 		'java_params' => ['-Dpig.cachedbag.memusage=0'],
 950			 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
 951			 b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
 952			 c = filter a by age < 20;
 953			 d = filter b by age < 20;
 954			 e = join c by name, d by name;
 955			 store e into ':OUTPATH:';\,
 956			},
 957                        {
 958			'num' => 13,
 959			'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
 960b = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
 961c = filter a by age < 20;
 962d = filter b by age < 20;
 963e = join c by instate, d by instate parallel 5;
 964store e into ':OUTPATH:';\,
 965			'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
 966b = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
 967c = filter a by age < 20;
 968d = filter b by age < 20;
 969e = join c by instate, d by instate parallel 5;
 970store e into ':OUTPATH:';\,
 971			}
 972		]
 973		},
 974		{
 975		'name' => 'Foreach',
 976		'tests' => [
 977			{
 978			'num' => 1,
 979			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 980b = foreach a generate *;
 981store b into ':OUTPATH:';\,
 982			},
 983			{
 984			'num' => 2,
 985			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
 986b = foreach a generate *;
 987store b into ':OUTPATH:';\,
 988			},
 989			{
 990			'num' => 3,
 991			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
 992b = foreach a generate name, age;
 993store b into ':OUTPATH:';\,
 994			},
 995			{
 996			'num' => 4,
 997			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
 998b = foreach a generate $0, $2;
 999store b into ':OUTPATH:';\,
1000			},
1001			{
1002                # test filter, projection, sort , duplicate elimination
1003                'num' => 5,
1004                'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
1005b = filter a by age < 20;
1006c = group b by age;
1007d = foreach c { 
1008    cf = filter b by gpa < 3.0; 
1009    cp = cf.gpa; 
1010	cd = distinct cp;
1011    co = order cd by $0; 
1012    generate group, flatten(co);
1013    }
1014store d into ':OUTPATH:';\,
1015            },
1016			{
1017			# test flatten for map and scalar
1018			'num' => 6,
1019			'pig' => q\register :FUNCPATH:/testudf.jar;
1020a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
1021b = foreach a generate flatten(name) as n, flatten(org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, gpa)) as m;
1022store b into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
1023			},
1024			{
1025			# test flatten for UDF that returns bag with multiple tuples with multiple columns
1026			'num' => 7,
1027			'pig' => q\register :FUNCPATH:/testudf.jar;
1028a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
1029b = foreach a generate name, flatten(org.apache.pig.test.udf.evalfunc.CreateTupleBag(age, gpa)) as foo;
1030store b into ':OUTPATH:';\,
1031			},
1032			{
1033			'num' => 8,
1034			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age: int, gpa);
1035c = group a by name;
1036d = foreach c generate flatten(group), MAX(a.age) + MIN(a.age);
1037store d into ':OUTPATH:';\,
1038			},
1039			{
1040                # test filter, projection, sort , duplicate elimination
1041                'num' => 9,
1042                'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
1043b = filter a by age < 20;
1044c = group b by age;
1045d = foreach c { 
1046    cf = filter b by gpa >= 3.0 and gpa <= 3.5; 
1047    cp = cf.gpa; 
1048	cd = distinct cp;
1049    co = order cd by $0; 
1050    generate group, flatten(co);
1051    }
1052store d into ':OUTPATH:';\,
1053            },
1054			{
1055                # test filter, projection, sort , duplicate elimination
1056                'num' => 10,
1057                'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
1058b = filter a by age < 20;
1059c = group b by age;
1060d = foreach c { 
1061    cf = filter b by (gpa == 4.0 or gpa != 2.0) and name > 'a'; 
1062    cp = cf.gpa; 
1063	cd = distinct cp;
1064    co = order cd by $0; 
1065    generate group, flatten(co);
1066    }
1067store d into ':OUTPATH:';\,
1068            },
1069			{
1070                # test filter, projection, sort , duplicate elimination
1071                'num' => 11,
1072                'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
1073b = filter a by age < 20;
1074c = foreach b { 
1075    exp1 = age + gpa;
1076    exp2 = exp1 + age;
1077    generate exp1, exp2;
1078    }
1079store c into ':OUTPATH:';\,
1080            },
1081			{
1082                # test a udf with no args
1083                'num' => 12,
1084                'pig' => q\register :FUNCPATH:/testudf.jar;
1085a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
1086b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
1087store b into ':OUTPATH:';\,
1088            },
1089            		{
1090			'num' => 13,
1091		'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
1092b = foreach a generate *;
1093store b into ':OUTPATH:';\,
1094		'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
1095b = foreach a generate *;
1096store b into ':OUTPATH:';\,
1097			}
1098		]
1099		},
1100		{
1101		'name' => 'Order',
1102		'tests' => [
1103			{
1104			'num' => 1,
1105			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
1106b = foreach a generate name;
1107c = order b by name;
1108store c into ':OUTPATH:';\,
1109			'sortArgs' => ['-t', '	', '-k', '1,1'],
1110			},
1111			{
1112			'num' => 2,
1113			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
1114b = foreach a generate $1;
1115c = order b by $0;
1116store c into ':OUTPATH:';\,
1117			'sortArgs' => ['-t', '	', '-k', '1,1'],
1118			},
1119			{
1120			'num' => 3,
1121			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
1122b = foreach a generate gpa;
1123c = order b by gpa;
1124store c into ':OUTPATH:';\,
1125			'sortArgs' => ['-t', '	', '-k', '1,1'],
1126			},
1127			{
1128			'num' => 4,
1129			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
1130b = order a by *;
1131store b into ':OUTPATH:';\,
1132			'sortArgs' => ['-t', '	'],
1133			},
1134			{
1135			'num' => 5,
1136			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
1137b = foreach a generate name, age;
1138c = order b by name, age;
1139store c into ':OUTPATH:';\,
1140			'sortArgs' => ['-t', '	', '-k', '1,2'],
1141			},
1142            {
1143			'num' => 6,
1144			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
1145c = order a by $0;
1146store c into ':OUTPATH:';\,
1147			'sortArgs' => ['-t', '	', '-k', '1,1'],
1148			},
1149            {
1150			'num' => 7,
1151			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
1152c = order a by $1;
1153store c into ':OUTPATH:';\,
1154			'sortArgs' => ['-t', '	', '-k', '2,2'],
1155			},
1156            {
1157			'num' => 8,
1158			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
1159c = order a by $0, $1;
1160store c into ':OUTPATH:';\,
1161			'sortArgs' => ['-t', '	', '-k', '1,2'],
1162			},
1163            {
1164			'num' => 9,
1165			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
1166c = order a by $1, $0;
1167store c into ':OUTPATH:';\,
1168			'sortArgs' => ['-t', '	', '-k', '2,2', '-k', '1,1'],
1169			},
1170            {
1171			'num' => 10,
1172			'ignore' => 'order by UDF is not supported',
1173			'pig' => q\register :FUNCPATH:/testudf.jar;
1174a = load ':INPATH:/singlefile/studenttab10k';
1175c = order a by * using org.apache.pig.test.udf.orderby.OrdDesc;
1176store c into ':OUTPATH:';\,
1177			'sortArgs' => ['-t', '	', '-r'],
1178			},
1179            {
1180			'num' => 11,
1181			'ignore' => 'order by UDF is not supported',
1182			'pig' => q\register :FUNCPATH:/testudf.jar;
1183a = load ':INPATH:/singlefile/studenttab10k';
1184c = order a by $0 using org.apache.pig.test.udf.orderby.OrdDesc;
1185store c into ':OUTPATH:';\,
1186			'sortArgs' => ['-t', '	', '-r', '-k', '1,1'],
1187			},
1188            {
1189			'num' => 12,
1190			'ignore' => 'order by UDF is not supported',
1191			'pig' => q\register :FUNCPATH:/testudf.jar;
1192a = load ':INPATH:/singlefile/studenttab10k';
1193c = order a by $0, $1 using org.apache.pig.test.udf.orderby.OrdDesc;
1194store c into ':OUTPATH:';\,
1195			'sortArgs' => ['-t', '	', '-r', '-k', '1,2'],
1196			},
1197# ALERT All these tests with inner order bys aren't testing the inner
1198# ordering.  We need to develop a sorting tool to do that.
1199            {
1200			'num' => 13,
1201			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
1202b = group a by $0;
1203c = foreach b {c1 = order $1 by $1; generate flatten(c1); };
1204store c into ':OUTPATH:';\,
1205			},
1206            {
1207			'num' => 14,
1208			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
1209b = group a by $0;
1210c = foreach b {c1 = order $1 by *; generate flatten(c1); };
1211store c into ':OUTPATH:';\,
1212			},
1213            {
1214			'num' => 15,
1215			'pig' => q\register :FUNCPATH:/testudf.jar;
1216a = load ':INPATH:/singlefile/studenttab10k';
1217b = group a by $0;
1218c = foreach b {c1 = order $1 by * using org.apache.pig.test.udf.orderby.OrdDesc; generate flatten(c1); };
1219store c into ':OUTPATH:';\,
1220			},
1221            {
1222			'num' => 16,
1223			'pig' => q\register :FUNCPATH:/testudf.jar;
1224a = load ':INPATH:/singlefile/studenttab10k';
1225b = group a by $0;
1226c = foreach b {c1 = order $1 by $1 using org.apache.pig.test.udf.orderby.OrdDesc; generate flatten(c1);};
1227store c into ':OUTPATH:';\,
1228			},
1229            {
1230			'num' => 17,
1231			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
1232b = group a by $0;
1233c = foreach b {c1 = order $1 by $1; generate flatten(c1), MAX($1.$1); };
1234store c into ':OUTPATH:';\,
1235			},
1236            {
1237            # test to make sure the weighted range patitioning
1238            # works correctly when a sort key value repeats across
1239            # reduce partitions
1240			'num' => 18,
1241			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
1242b = order a by $1 parallel 100;
1243store b into ':OUTPATH:';\,
1244			'sortArgs' => ['-t', '	', '-k', '2,2'],
1245			},
1246			{
1247			'num' => 19,
1248			'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
1249b = foreach a generate instate;
1250c = order b by instate;
1251store c into ':OUTPATH:';\,
1252			'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
1253b = foreach a generate instate;
1254c = order b by instate;
1255store c into ':OUTPATH:';\,
1256			'sortArgs' => ['-t', '	', '-k', '1,1'],
1257			},
1258		]
1259		},
1260		{
1261		'name' => 'Distinct',
1262		'tests' => [
1263			{
1264			'num' => 1,
1265			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
1266b = foreach a generate name;
1267c = distinct b;
1268store c into ':OUTPATH:';\,
1269			},
1270			{
1271			'num' => 2,
1272			'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
1273b = foreach a generate $1;
1274c = distinct b;
1275store c 

Large files files are truncated, but you can click here to view the full file