PageRenderTime 67ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 1ms

/test/e2e/pig/tests/nightly.conf

https://github.com/ftian/pig
Perl | 4983 lines | 4534 code | 111 blank | 338 comment | 102 complexity | baec46f614bd4cfd568cf24e6026f5d7 MD5 | raw file
Possible License(s): Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. #!/usr/bin/env perl
  2. ############################################################################
  3. # Licensed to the Apache Software Foundation (ASF) under one or more
  4. # contributor license agreements. See the NOTICE file distributed with
  5. # this work for additional information regarding copyright ownership.
  6. # The ASF licenses this file to You under the Apache License, Version 2.0
  7. # (the "License"); you may not use this file except in compliance with
  8. # the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. ###############################################################################
  18. # Nightly tests for pig.
  19. #
  20. #
  21. #PigSetup::setup();
  22. #my $me = `whoami`;
  23. #chomp $me;
  24. $cfg = {
  25. 'driver' => 'Pig',
  26. 'nummachines' => 5,
  27. 'verify_with_pig' => 1,
  28. 'verify_pig_version' => 'old',
  29. 'groups' => [
  30. {
  31. 'name' => 'Checkin',
  32. 'tests' => [
  33. {
  34. 'num' => 1,
  35. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  36. store a into ':OUTPATH:';\,
  37. },
  38. {
  39. 'num' => 2,
  40. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  41. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  42. c = filter a by age < 50;
  43. d = filter b by age < 50;
  44. e = cogroup c by (name, age), d by (name, age) ;
  45. f = foreach e generate flatten(c), flatten(d);
  46. g = group f by registration;
  47. h = foreach g generate group, SUM(f.d::contributions);
  48. i = order h by $1;
  49. store i into ':OUTPATH:';\,
  50. 'floatpostprocess' => 1,
  51. 'delimiter' => ' ',
  52. 'sortArgs' => ['-t', ' ', '-k', '2,2'],
  53. }
  54. ]
  55. },
  56. {
  57. 'name' => 'LoaderDefaultDir',
  58. 'tests' => [
  59. {
  60. 'num' => 1,
  61. 'pig' => q\a = load ':INPATH:/dir/studenttab10k' as (name, age, gpa);
  62. store a into ':OUTPATH:';\,
  63. },
  64. ]
  65. },
  66. {
  67. 'name' => 'LoaderPigStorageArg',
  68. 'tests' => [
  69. {
  70. 'num' => 1,
  71. 'pig' => q\a = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
  72. store a into ':OUTPATH:';\,
  73. },
  74. {
  75. # load with control character
  76. 'num' => 2,
  77. 'pig' => q#a = load ':INPATH:/singlefile/studentctrla10k' using PigStorage('\\u0001') as (name, age, gpa);
  78. store a into ':OUTPATH:';#,
  79. },
  80. {
  81. # load and store with control character
  82. 'num' => 3,
  83. 'pig' => q#a = load ':INPATH:/singlefile/studentctrla10k' using PigStorage('\\u0001') as (name, age, gpa);
  84. store a into ':OUTPATH:.intermediate' using PigStorage('\\u0001');
  85. b = load ':OUTPATH:.intermediate' using PigStorage('\\u0001') as (name, age, gpa);
  86. store b into ':OUTPATH:'; #,
  87. 'notmq' => 1,
  88. },
  89. ]
  90. },
  91. {
  92. # Results doctored, if you change this query you need to copy the
  93. # expected results into test/nightly/benchmarks
  94. 'name' => 'LoaderBinStorage',
  95. 'tests' => [
  96. {
  97. 'num' => 1,
  98. 'pig' => q\register :FUNCPATH:/testudf.jar;
  99. a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  100. b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Swap(name, age), TOKENIZE((chararray)name), org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
  101. store b into ':OUTPATH:.intermediate' using BinStorage();
  102. c = load ':OUTPATH:.intermediate' using BinStorage();
  103. store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
  104. 'notmq' => 1,
  105. },
  106. ]
  107. },
  108. {
  109. # Results doctored, if you change this query you need to copy the
  110. # expected results into test/nightly/benchmarks
  111. 'name' => 'LoaderTextLoader',
  112. 'tests' => [
  113. {
  114. 'num' => 1,
  115. 'pig' => q\register :FUNCPATH:/testudf.jar;
  116. a = load ':INPATH:/singlefile/textdoc' using TextLoader();
  117. b = foreach a generate TOKENIZE((chararray)$0);
  118. store b into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
  119. },
  120. ]
  121. },
  122. {
  123. 'name' => 'FilterBoolean',
  124. 'tests' => [
  125. {
  126. 'num' => 1,
  127. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  128. b = filter a by name == 'fred allen' and age > 50;
  129. store b into ':OUTPATH:' using PigStorage;\,
  130. },
  131. {
  132. 'num' => 2,
  133. 'pig' => q\a = load ':INPATH:/dir/studenttab10k' using PigStorage() as (name, age, gpa);
  134. b = filter a by name != 'fred allen' or age < 10;
  135. store b into ':OUTPATH:' using PigStorage;\,
  136. },
  137. {
  138. 'num' => 3,
  139. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  140. b = filter a by not (age == 50);
  141. store b into ':OUTPATH:' using PigStorage;\,
  142. },
  143. {
  144. 'num' => 4,
  145. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  146. b = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
  147. store b into ':OUTPATH:' using PigStorage;\,
  148. },
  149. {
  150. 'num' => 5,
  151. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  152. b = filter a by age >= 50 or name > 'fred' and gpa <= 3.0 or name >= 'bob';
  153. store b into ':OUTPATH:' using PigStorage;\,
  154. },
  155. # test filter <= and >= for chararray, int and double
  156. {
  157. 'num' => 6,
  158. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
  159. b = filter a by age >= 40 and age <=50 and gpa >= 2.0 and gpa <= 3.0 and name >= 'bob' and name <= 'fred';
  160. store b into ':OUTPATH:' using PigStorage;\,
  161. },
  162. # test filter <= and >= for bytearray, long and float
  163. {
  164. 'num' => 7,
  165. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
  166. b = filter a by age >= 40 and age <=50 and gpa >= 2.0f and gpa <= 3.0f and name >= 'bob' and name <= 'fred';
  167. store b into ':OUTPATH:' using PigStorage;\,
  168. },
  169. # test filter < and > for chararray, int and double
  170. {
  171. 'num' => 8,
  172. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
  173. b = filter a by age > 40 and age <50 and gpa > 2.0 and gpa < 3.0 and name > 'bob' and name < 'fred';
  174. store b into ':OUTPATH:' using PigStorage;\,
  175. },
  176. # test filter < and > for bytearray, long and float
  177. {
  178. 'num' => 9,
  179. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
  180. b = filter a by age > 40 and age <50 and gpa > 2.0f and gpa < 3.0f and name > 'bob' and name < 'fred';
  181. store b into ':OUTPATH:' using PigStorage;\,
  182. },
  183. # test filter <= and >= for explicit cast for chararray, int and double
  184. {
  185. 'num' => 10,
  186. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  187. b = filter a by (int)age >= 40 and (int)age <=50 and (double)gpa >= 2.0 and (double)gpa <= 3.0 and (chararray)name >= 'bob' and (chararray)name <= 'fred';
  188. store b into ':OUTPATH:' using PigStorage;\,
  189. },
  190. # test filter <= and >= for explicit cast for bytearray, long and float
  191. {
  192. 'num' => 11,
  193. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  194. b = filter a by (long)age >= 40 and (long)age <=50 and (float)gpa >= 2.0f and (float)gpa <= 3.0f and name >= 'bob' and name <= 'fred';
  195. store b into ':OUTPATH:' using PigStorage;\,
  196. },
  197. # test filter < and > for explicit cast for chararray, int and double
  198. {
  199. 'num' => 12,
  200. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  201. b = filter a by (int)age > 40 and (int)age <50 and (double)gpa > 2.0 and (double)gpa < 3.0 and (chararray)name > 'bob' and (chararray)name < 'fred';
  202. store b into ':OUTPATH:' using PigStorage;\,
  203. },
  204. # test filter < and > for explicit cast for bytearray, long and float
  205. {
  206. 'num' => 13,
  207. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  208. b = filter a by (long)age > 40 and (long)age <50 and (float)gpa > 2.0f and (float)gpa < 3.0f and name > 'bob' and name < 'fred';
  209. store b into ':OUTPATH:' using PigStorage;\,
  210. },
  211. # test AND with nulls
  212. {
  213. 'num' => 14,
  214. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
  215. b = filter a by name == 'fred allen' and age > 50;
  216. store b into ':OUTPATH:' using PigStorage;\,
  217. },
  218. # test OR with nulls
  219. {
  220. 'num' => 15,
  221. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
  222. b = filter a by name != 'fred allen' or age < 10;
  223. store b into ':OUTPATH:' using PigStorage;\,
  224. },
  225. # test with nulls filter <= and >= for chararray, int and double
  226. {
  227. 'num' => 16,
  228. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
  229. b = filter a by age >= 40 and age <=50 and gpa >= 2.0 and gpa <= 3.0 and name >= 'bob' and name <= 'fred';
  230. store b into ':OUTPATH:' using PigStorage;\,
  231. },
  232. # test with nulls filter < and > for explicit cast for chararray, int and double
  233. {
  234. 'num' => 17,
  235. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
  236. b = filter a by (int)age > 40 and (int)age <50 and (double)gpa > 2.0 and (double)gpa < 3.0 and (chararray)name > 'bob' and (chararray)name < 'fred';
  237. store b into ':OUTPATH:' using PigStorage;\,
  238. },
  239. {
  240. 'num' => 18,
  241. 'ignore' => 1, # PIG-2593 this case is not supported as instate need to be declared as boolean
  242. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  243. b = filter a by instate;
  244. store b into ':OUTPATH:' using PigStorage;\,
  245. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  246. b = filter a by instate == 'true';
  247. store b into ':OUTPATH:' using PigStorage;\,
  248. },
  249. {
  250. 'num' => 19,
  251. 'ignore' => 1, # PIG-2593 this case is not supported as instate need to be declared as boolean
  252. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  253. b = filter a by not instate;
  254. store b into ':OUTPATH:' using PigStorage;\,
  255. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  256. b = filter a by instate == 'false';
  257. store b into ':OUTPATH:' using PigStorage;\,
  258. },
  259. {
  260. 'num' => 20,
  261. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  262. b = filter a by instate is null;
  263. store b into ':OUTPATH:' using PigStorage;\,
  264. },
  265. {
  266. 'num' => 21,
  267. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  268. b = filter a by instate == true;
  269. store b into ':OUTPATH:' using PigStorage;\,
  270. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  271. b = filter a by instate == 'true';
  272. store b into ':OUTPATH:' using PigStorage;\,
  273. },
  274. {
  275. 'num' => 22,
  276. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  277. b = filter a by instate == false;
  278. store b into ':OUTPATH:' using PigStorage;\,
  279. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  280. b = filter a by instate == 'false';
  281. store b into ':OUTPATH:' using PigStorage;\,
  282. },
  283. {
  284. 'num' => 23,
  285. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  286. b = filter a by instate;
  287. store b into ':OUTPATH:' using PigStorage;\,
  288. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  289. b = filter a by instate == 'true';
  290. store b into ':OUTPATH:' using PigStorage;\,
  291. },
  292. {
  293. 'num' => 24,
  294. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  295. b = filter a by not instate;
  296. store b into ':OUTPATH:' using PigStorage;\,
  297. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  298. b = filter a by instate == 'false';
  299. store b into ':OUTPATH:' using PigStorage;\,
  300. },
  301. {
  302. 'num' => 25,
  303. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  304. b = filter a by instate is null;
  305. store b into ':OUTPATH:' using PigStorage;\,
  306. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  307. b = filter a by instate is null;
  308. store b into ':OUTPATH:' using PigStorage;\,
  309. },
  310. {
  311. 'num' => 26,
  312. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  313. b = filter a by instate == true;
  314. store b into ':OUTPATH:' using PigStorage;\,
  315. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  316. b = filter a by instate == 'true';
  317. store b into ':OUTPATH:' using PigStorage;\,
  318. },
  319. {
  320. 'num' => 27,
  321. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  322. b = filter a by instate == false;
  323. store b into ':OUTPATH:' using PigStorage;\,
  324. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  325. b = filter a by instate == 'false';
  326. store b into ':OUTPATH:' using PigStorage;\,
  327. },
  328. ],
  329. },
  330. {
  331. 'name' => 'FilterEq',
  332. 'tests' => [
  333. {
  334. 'num' => 1,
  335. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  336. b = filter a by name == 'alice johnson' and age == 64 and gpa == 3.99;
  337. store b into ':OUTPATH:' using PigStorage;\,
  338. },
  339. {
  340. 'num' => 2,
  341. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  342. b = filter a by name > 'fred allen' and age > 40 and gpa > 2.50;
  343. store b into ':OUTPATH:' using PigStorage;\,
  344. },
  345. {
  346. 'num' => 3,
  347. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  348. b = filter a by name >= 'fred allen' and age >= 40 and gpa >= 2.50;
  349. store b into ':OUTPATH:' using PigStorage;\,
  350. },
  351. {
  352. 'num' => 4,
  353. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  354. b = filter a by name lt 'fred allen' and age < 40 and gpa < 2.50;
  355. store b into ':OUTPATH:' using PigStorage;\,
  356. },
  357. {
  358. 'num' => 5,
  359. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  360. b = filter a by name lte 'fred allen' and age <= 40 and gpa <= 2.50;
  361. store b into ':OUTPATH:' using PigStorage;\,
  362. },
  363. {
  364. 'num' => 6,
  365. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage();
  366. b = filter a by $0 neq 'fred allen' and $1 != '40' and $2 != '2.50';
  367. store b into ':OUTPATH:' using PigStorage;\,
  368. },
  369. # test for filter == for chararray, int and double
  370. {
  371. 'num' => 7,
  372. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
  373. b = filter a by name == 'fred allen' and age == 61 and gpa == 1.42;
  374. store b into ':OUTPATH:' using PigStorage;\,
  375. },
  376. # test for filter == for bytearray, long and float
  377. {
  378. 'num' => 8,
  379. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
  380. b = filter a by name == 'fred allen' and age == 61 and gpa == 1.42f;
  381. store b into ':OUTPATH:' using PigStorage;\,
  382. },
  383. # test for filter != for chararray, int and double
  384. {
  385. 'num' => 9,
  386. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
  387. b = filter a by $0 != 'fred allen' and $1 != 40 and $2 != 2.50;
  388. store b into ':OUTPATH:' using PigStorage;\,
  389. },
  390. # test for filter != for bytearray, long and float
  391. {
  392. 'num' => 10,
  393. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
  394. b = filter a by $0 != 'fred allen' and $1 != 40 and $2 != 2.50f;
  395. store b into ':OUTPATH:' using PigStorage;\,
  396. },
  397. # test for filter == for explicit casts to chararray, int and double
  398. {
  399. 'num' => 11,
  400. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  401. b = filter a by (chararray)name == 'fred allen' and (int)age == 61 and (double)gpa == 1.42;
  402. store b into ':OUTPATH:' using PigStorage;\,
  403. },
  404. # test for filter == for explicit casts to bytearray, long and float
  405. {
  406. 'num' => 12,
  407. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  408. b = filter a by name == 'fred allen' and (long)age == 61 and (float)gpa == 1.42f;
  409. store b into ':OUTPATH:' using PigStorage;\,
  410. },
  411. # test for filter != for explicit casts to chararray, int and double
  412. {
  413. 'num' => 13,
  414. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() ;
  415. b = filter a by (chararray)$0 != 'fred allen' and (int)$1 != 40 and (double)$2 != 2.50;
  416. store b into ':OUTPATH:' using PigStorage;\,
  417. },
  418. # test for filter != for explicit casts to bytearray, long and float
  419. {
  420. 'num' => 14,
  421. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() ;
  422. b = filter a by $0 != 'fred allen' and (long)$1 != 40 and (float)$2 != 2.50f;
  423. store b into ':OUTPATH:' using PigStorage;\,
  424. },
  425. ]
  426. },
  427. {
  428. 'name' => 'FilterMatches',
  429. 'tests' => [
  430. {
  431. 'num' => 1,
  432. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  433. b = filter a by name matches '^fred.*';
  434. store b into ':OUTPATH:' using PigStorage;\,
  435. },
  436. {
  437. 'num' => 2,
  438. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage();
  439. b = filter a by not $0 matches '^fred.*';
  440. store b into ':OUTPATH:' using PigStorage;\,
  441. },
  442. {
  443. # test for filter on matches for chararray (declared and explicit cast)
  444. 'num' => 3,
  445. 'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
  446. b = filter a by name matches '^fred.*' and (chararray)registration matches '^dem.*';
  447. store b into ':OUTPATH:' using PigStorage;\,
  448. },
  449. {
  450. 'num' => 4,
  451. 'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
  452. b = filter a by name matches 'f.ed' and (chararray)registration matches 'd.m';
  453. store b into ':OUTPATH:' using PigStorage;\,
  454. },
  455. {
  456. 'num' => 5,
  457. 'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
  458. b = filter a by name matches 'f[^f]ed.*';
  459. store b into ':OUTPATH:' using PigStorage;\,
  460. },
  461. {
  462. 'num' => 6,
  463. 'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches '.*\\\\wan.*';\nstore b into ':OUTPATH:' using PigStorage;",
  464. },
  465. {
  466. 'num' => 7,
  467. 'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches '^e.*\\\\sc.*';\nstore b into ':OUTPATH:' using PigStorage;",
  468. },
  469. {
  470. 'num' => 8,
  471. 'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches 'ethan white';\nstore b into ':OUTPATH:' using PigStorage;",
  472. },
  473. {
  474. 'num' => 9,
  475. 'pig' => "a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);\nb = filter a by gpa matches '\\\\d\\\\.45';\nstore b into ':OUTPATH:' using PigStorage;",
  476. },
  477. ]
  478. },
  479. {
  480. 'name' => 'FilterUdf',
  481. 'tests' => [
  482. {
  483. 'num' => 1,
  484. 'pig' => q\
  485. a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  486. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  487. c = cogroup a by (name, age), b by (name, age);
  488. d = filter c by not IsEmpty(a);
  489. e = filter d by not IsEmpty(b);
  490. f = foreach e generate flatten(a), flatten(b);
  491. store f into ':OUTPATH:';\,
  492. },
  493. {
  494. 'num' => 2,
  495. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  496. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  497. c = filter a by age < 50;
  498. d = filter b by age < 50;
  499. e = cogroup c by (name, age), d by (name, age);
  500. f = filter e by COUNT(c)> 0 AND COUNT(d)>0;
  501. store f into ':OUTPATH:';\,
  502. 'rc' => 0
  503. },
  504. ]
  505. },
  506. # TODO Group that don't flatten via Agg functions
  507. {
  508. 'name' => 'GroupAggFunc',
  509. 'tests' => [
  510. {
  511. 'num' => 1,
  512. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  513. b = group a by name;
  514. c = foreach b generate group, COUNT(a.age);
  515. store c into ':OUTPATH:';\,
  516. },
  517. {
  518. 'num' => 2,
  519. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  520. b = group a by $0;
  521. c = foreach b generate group, COUNT(a.$1);
  522. store c into ':OUTPATH:';\,
  523. },
  524. {
  525. 'num' => 3,
  526. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  527. b = group a by (name, age);
  528. c = foreach b generate group.name, group.age, COUNT(a.gpa);
  529. store c into ':OUTPATH:';\,
  530. },
  531. {
  532. 'num' => 5,
  533. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  534. b = group a all;
  535. c = foreach b generate COUNT(a.$0);
  536. store c into ':OUTPATH:';\,
  537. },
  538. {
  539. 'num' => 6,
  540. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  541. b = group a by name;
  542. c = foreach b generate group, SUM(a.age);
  543. store c into ':OUTPATH:';\,
  544. },
  545. {
  546. 'num' => 7,
  547. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  548. b = group a by name;
  549. c = foreach b generate group, SUM(a.gpa);
  550. store c into ':OUTPATH:';\,
  551. 'floatpostprocess' => 1,
  552. 'delimiter' => ' ',
  553. },
  554. {
  555. 'num' => 8,
  556. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  557. b = group a by name;
  558. c = foreach b generate group, AVG(a.age);
  559. store c into ':OUTPATH:';\,
  560. },
  561. {
  562. 'num' => 9,
  563. 'ignore23' => 'I cannot get it right due to float precision, temporarily disable',
  564. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  565. b = group a by name;
  566. c = foreach b generate group, AVG(a.gpa);
  567. store c into ':OUTPATH:';\,
  568. 'floatpostprocess' => 1,
  569. 'delimiter' => ' ',
  570. },
  571. {
  572. 'num' => 10,
  573. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  574. b = group a by name;
  575. c = foreach b generate group, MIN(a.gpa);
  576. store c into ':OUTPATH:';\,
  577. },
  578. {
  579. 'num' => 11,
  580. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  581. b = group a by name;
  582. c = foreach b generate group, MAX(a.gpa);
  583. store c into ':OUTPATH:';\,
  584. },
  585. {
  586. 'num' => 12,
  587. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  588. b = group a by (name, age);
  589. c = foreach b generate flatten(group), SUM(a.gpa);
  590. store c into ':OUTPATH:';\,
  591. 'floatpostprocess' => 1,
  592. 'delimiter' => ' ',
  593. },
  594. {
  595. 'num' => 13,
  596. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  597. b = group a by (name);
  598. c = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  599. d = cogroup b by group, c by name;
  600. e = foreach d generate flatten(group), SUM(c.gpa), COUNT(c.name);
  601. store e into ':OUTPATH:';\,
  602. 'floatpostprocess' => 1,
  603. 'delimiter' => ' ',
  604. },
  605. {
  606. 'num' => 14,
  607. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  608. b = group a by (name);
  609. e = foreach b generate COUNT(a.name);
  610. store e into ':OUTPATH:';\,
  611. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  612. b = group a by (name);
  613. e = foreach b generate COUNT(a.name);
  614. store e into ':OUTPATH:';\,
  615. }
  616. ],
  617. },
  618. {
  619. 'name' => 'MapPartialAgg',
  620. 'tests' => [
  621. {
  622. 'num' => 1,
  623. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  624. b = group a by name;
  625. c = foreach b generate group, COUNT(a.age);
  626. store c into ':OUTPATH:';\,
  627. 'java_params' => ['-Dpig.exec.mapPartAgg=true']
  628. },
  629. {
  630. #multiquery with group in one sub query
  631. 'num' => 2,
  632. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  633. b = filter a by age < 22; store b into ':OUTPATH:.1';
  634. c = group b by age;
  635. d = foreach c generate group, SUM(b.gpa);
  636. store d into ':OUTPATH:.2'; #,
  637. 'java_params' => ['-Dpig.exec.mapPartAgg=true']
  638. },
  639. {
  640. #multi query with two group on diff columns
  641. 'num' => 3,
  642. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  643. g1 = group a by name;
  644. f1 = foreach g1 generate group as name, MAX(a.gpa);
  645. store f1 into ':OUTPATH:.1';
  646. g2 = group a by age;
  647. f2 = foreach g2 generate group as age, AVG(a.gpa);
  648. store f2 into ':OUTPATH:.2'; #,
  649. 'java_params' => ['-Dpig.exec.mapPartAgg=true']
  650. },
  651. {
  652. #multi query with three groups on diff columns, group key being an expression
  653. 'num' => 4,
  654. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  655. g1 = group a by name;
  656. f1 = foreach g1 generate group as name, MAX(a.gpa);
  657. store f1 into ':OUTPATH:.1';
  658. g2 = group a by age%10;
  659. f2 = foreach g2 generate group as age_mod10, AVG(a.gpa);
  660. store f2 into ':OUTPATH:.2';
  661. g3 = group a by age;
  662. f3 = foreach g3 generate group%10, AVG(a.gpa);
  663. store f3 into ':OUTPATH:.3';
  664. g4 = group a by gpa;
  665. f4 = foreach g4 generate group as gpa, COUNT(a);
  666. store f4 into ':OUTPATH:.4';
  667. #,
  668. 'java_params' => ['-Dpig.exec.mapPartAgg=true']
  669. },
  670. {
  671. #aggregation gets more than one tuple for every tuple from load func
  672. 'num' => 5,
  673. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  674. b = foreach a generate name, age, gpa, flatten(TOBAG(age,age)) as x;
  675. c = group b by age;
  676. d = foreach c generate group, AVG(b.gpa);
  677. store d into ':OUTPATH:'; #,
  678. 'java_params' => ['-Dpig.exec.mapPartAgg=true']
  679. },
  680. ],
  681. },
  682. {
  683. 'name' => 'EvalFunc',
  684. 'tests' => [
  685. {
  686. 'num' => 1,
  687. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  688. b = filter a by name lt 'b';
  689. c = foreach b generate ARITY(name, age, gpa);
  690. store c into ':OUTPATH:';\,
  691. },
  692. {
  693. 'num' => 2,
  694. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age, gpa);
  695. b = filter a by name lt 'b';
  696. c = foreach b generate TOKENIZE(name);
  697. d = foreach c generate flatten($0);
  698. store d into ':OUTPATH:';\,
  699. },
  700. {
  701. 'num' => 3,
  702. 'pig' => q\register :FUNCPATH:/testudf.jar;
  703. a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  704. b = filter a by name lt 'b';
  705. c = foreach b generate org.apache.pig.test.udf.evalfunc.Swap(name, age);
  706. store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
  707. },
  708. {
  709. 'num' => 4,
  710. 'pig' => q\register :FUNCPATH:/testudf.jar;
  711. a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  712. b = filter a by name lt 'b';
  713. c = foreach b generate org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
  714. store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
  715. },
  716. {
  717. 'num' => 5,
  718. 'pig' => q\register :FUNCPATH:/testudf.jar;
  719. a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  720. b = foreach a generate org.apache.pig.test.udf.evalfunc.TestBoolean(instate);
  721. store b into ':OUTPATH:';\,
  722. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  723. b = foreach a generate (instate is null ? '' : (instate == 'true' ? 'false' : 'true'));
  724. store b into ':OUTPATH:';\,
  725. }
  726. ]
  727. },
  728. # TODO DIFF
  729. # TODO User defined grouping function
  730. {
  731. 'name' => 'CoGroupFlatten',
  732. 'tests' => [
  733. {
  734. 'num' => 1,
  735. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  736. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  737. c = filter a by age < 20;
  738. d = filter b by age < 20;
  739. e = cogroup c by name, d by name;
  740. f = foreach e generate flatten (c), flatten(d);
  741. store f into ':OUTPATH:';\,
  742. },
  743. {
  744. 'num' => 2,
  745. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  746. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  747. c = filter a by $1 < 20;
  748. d = filter b by $1 < 20;
  749. e = cogroup c by $0, d by $0;
  750. f = foreach e generate flatten (c), flatten(d);
  751. store f into ':OUTPATH:';\,
  752. },
  753. {
  754. 'num' => 3,
  755. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  756. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  757. c = filter a by age < 20;
  758. d = filter b by age < 20;
  759. e = cogroup c by (name, age), d by (name, age);
  760. f = foreach e generate flatten (c), flatten(d);
  761. store f into ':OUTPATH:';\,
  762. },
  763. {
  764. 'num' => 4,
  765. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  766. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  767. d = filter b by age < 20;
  768. e = cogroup a by (name, age) inner, d by (name, age);
  769. f = foreach e generate flatten (a), flatten(d);
  770. store f into ':OUTPATH:';\,
  771. },
  772. {
  773. 'num' => 5,
  774. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  775. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  776. c = filter a by age < 20;
  777. e = cogroup c by (name, age), b by (name, age) inner;
  778. f = foreach e generate flatten (c), flatten(b);
  779. store f into ':OUTPATH:';\,
  780. },
  781. {
  782. 'num' => 6,
  783. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  784. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  785. e = cogroup a by (name, age) inner, b by (name, age) inner;
  786. f = foreach e generate flatten (a), flatten(b);
  787. store f into ':OUTPATH:';\,
  788. },
  789. {
  790. # Test cogrouping data loaded from two separate loaders. We don't have any data that can join with studenttab that isn't also loaded with PigStorage, so the
  791. # first step is an intermediate load and store using BinStorage.
  792. 'num' => 7,
  793. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  794. store a into ':OUTPATH:.intermediate' using BinStorage();
  795. b = load ':OUTPATH:.intermediate' using BinStorage() as (name, age, gpa);
  796. c = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  797. e = cogroup b by (name, age) inner, c by (name, age) inner;
  798. f = foreach e generate flatten (b), flatten(c);
  799. store f into ':OUTPATH:';\,
  800. 'notmq' => 1,
  801. },
  802. ]
  803. },
  804. {
  805. 'name' => 'CoGroup',
  806. 'tests' => [
  807. {
  808. 'num' => 1,
  809. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  810. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  811. c = cogroup a by name, b by name;
  812. d = foreach c generate flatten(group), COUNT(a) + COUNT(b);
  813. store d into ':OUTPATH:';\,
  814. },
  815. ]
  816. },
  817. {
  818. 'name' => 'Join',
  819. 'tests' => [
  820. {
  821. 'num' => 1,
  822. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  823. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  824. c = filter a by age < 20;
  825. d = filter b by age < 20;
  826. e = join c by name, d by name;
  827. store e into ':OUTPATH:';\,
  828. },
  829. {
  830. 'num' => 2,
  831. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  832. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  833. c = filter a by age < 20;
  834. d = filter b by age < 20;
  835. e = join c by $0, d by $0;
  836. store e into ':OUTPATH:';\,
  837. },
  838. {
  839. 'num' => 3,
  840. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  841. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  842. c = filter a by age < 20;
  843. d = filter b by age < 20;
  844. e = join c by (name, age), d by (name, age);
  845. store e into ':OUTPATH:';\,
  846. },
  847. # self join with implict split
  848. # JIRA PIG-429
  849. {
  850. 'num' => 4,
  851. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  852. b = filter a by $1 > 25;
  853. c = join a by $0, b by $0;
  854. store c into ':OUTPATH:';\,
  855. },
  856. # join with one input having schema and another without
  857. # JIRA PIG-428
  858. {
  859. 'num' => 5,
  860. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray,age:int, gpa:double);
  861. another = load ':INPATH:/singlefile/studenttab10k';
  862. c = foreach another generate $0, $1+ 10, $2 + 10.0;
  863. d = join a by $0, c by $0;
  864. store d into ':OUTPATH:';\,
  865. },
  866. # self join using fragment replicate join
  867. # no types
  868. {
  869. 'num' => 6,
  870. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  871. b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  872. c = join a by name, b by name using 'repl';
  873. store c into ':OUTPATH:';\,
  874. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  875. b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  876. c = join a by name, b by name ;
  877. store c into ':OUTPATH:';\,
  878. },
  879. # self join using fragment replicate join
  880. # with types and no cast for join key
  881. {
  882. 'num' => 7,
  883. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  884. b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  885. c = join a by name, b by name using 'repl';
  886. store c into ':OUTPATH:';\,
  887. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  888. b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  889. c = join a by name, b by name ;
  890. store c into ':OUTPATH:';\,
  891. },
  892. # self join using fragment replicate join
  893. # with types and cast for join key
  894. {
  895. 'num' => 8,
  896. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  897. b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
  898. c = join a by gpa, b by gpa using 'repl';
  899. store c into ':OUTPATH:';\,
  900. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  901. b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
  902. c = join a by gpa, b by gpa ;
  903. store c into ':OUTPATH:';\,
  904. },
  905. # left outer join
  906. {
  907. 'num' => 9,
  908. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
  909. b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:long, registration:chararray, contributions:double);
  910. c = join a by name left outer, b by name;
  911. store c into ':OUTPATH:';\,
  912. },
  913. # right outer join
  914. {
  915. 'num' => 10,
  916. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
  917. b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:long, registration:chararray, contributions:double);
  918. c = join a by name right outer, b by name;
  919. store c into ':OUTPATH:';\,
  920. },
  921. # full outer join
  922. {
  923. 'num' => 11,
  924. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
  925. b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:long, registration:chararray, contributions:double);
  926. c = join a by name full outer, b by name;
  927. store c into ':OUTPATH:';\,
  928. },
  929. # see PIG-1209 join package now uses internalcachedBag, so every tuple on reduce side in this test will spilled to disk.
  930. {
  931. 'num' => 12,
  932. 'java_params' => ['-Dpig.cachedbag.memusage=0'],
  933. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  934. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  935. c = filter a by age < 20;
  936. d = filter b by age < 20;
  937. e = join c by name, d by name;
  938. store e into ':OUTPATH:';\,
  939. },
  940. {
  941. 'num' => 13,
  942. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  943. b = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  944. c = filter a by age < 20;
  945. d = filter b by age < 20;
  946. e = join c by instate, d by instate parallel 5;
  947. store e into ':OUTPATH:';\,
  948. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  949. b = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  950. c = filter a by age < 20;
  951. d = filter b by age < 20;
  952. e = join c by instate, d by instate parallel 5;
  953. store e into ':OUTPATH:';\,
  954. }
  955. ]
  956. },
  957. {
  958. 'name' => 'Foreach',
  959. 'tests' => [
  960. {
  961. 'num' => 1,
  962. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  963. b = foreach a generate *;
  964. store b into ':OUTPATH:';\,
  965. },
  966. {
  967. 'num' => 2,
  968. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  969. b = foreach a generate *;
  970. store b into ':OUTPATH:';\,
  971. },
  972. {
  973. 'num' => 3,
  974. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  975. b = foreach a generate name, age;
  976. store b into ':OUTPATH:';\,
  977. },
  978. {
  979. 'num' => 4,
  980. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  981. b = foreach a generate $0, $2;
  982. store b into ':OUTPATH:';\,
  983. },
  984. {
  985. # test filter, projection, sort , duplicate elimination
  986. 'num' => 5,
  987. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  988. b = filter a by age < 20;
  989. c = group b by age;
  990. d = foreach c {
  991. cf = filter b by gpa < 3.0;
  992. cp = cf.gpa;
  993. cd = distinct cp;
  994. co = order cd by $0;
  995. generate group, flatten(co);
  996. }
  997. store d into ':OUTPATH:';\,
  998. },
  999. {
  1000. # test flatten for map and scalar
  1001. 'num' => 6,
  1002. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1003. a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1004. b = foreach a generate flatten(name) as n, flatten(org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, gpa)) as m;
  1005. store b into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
  1006. },
  1007. {
  1008. # test flatten for UDF that returns bag with multiple tuples with multiple columns
  1009. 'num' => 7,
  1010. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1011. a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1012. b = foreach a generate name, flatten(org.apache.pig.test.udf.evalfunc.CreateTupleBag(age, gpa)) as foo;
  1013. store b into ':OUTPATH:';\,
  1014. },
  1015. {
  1016. 'num' => 8,
  1017. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age: int, gpa);
  1018. c = group a by name;
  1019. d = foreach c generate flatten(group), MAX(a.age) + MIN(a.age);
  1020. store d into ':OUTPATH:';\,
  1021. },
  1022. {
  1023. # test filter, projection, sort , duplicate elimination
  1024. 'num' => 9,
  1025. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1026. b = filter a by age < 20;
  1027. c = group b by age;
  1028. d = foreach c {
  1029. cf = filter b by gpa >= 3.0 and gpa <= 3.5;
  1030. cp = cf.gpa;
  1031. cd = distinct cp;
  1032. co = order cd by $0;
  1033. generate group, flatten(co);
  1034. }
  1035. store d into ':OUTPATH:';\,
  1036. },
  1037. {
  1038. # test filter, projection, sort , duplicate elimination
  1039. 'num' => 10,
  1040. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1041. b = filter a by age < 20;
  1042. c = group b by age;
  1043. d = foreach c {
  1044. cf = filter b by (gpa == 4.0 or gpa != 2.0) and name > 'a';
  1045. cp = cf.gpa;
  1046. cd = distinct cp;
  1047. co = order cd by $0;
  1048. generate group, flatten(co);
  1049. }
  1050. store d into ':OUTPATH:';\,
  1051. },
  1052. {
  1053. # test filter, projection, sort , duplicate elimination
  1054. 'num' => 11,
  1055. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1056. b = filter a by age < 20;
  1057. c = foreach b {
  1058. exp1 = age + gpa;
  1059. exp2 = exp1 + age;
  1060. generate exp1, exp2;
  1061. }
  1062. store c into ':OUTPATH:';\,
  1063. },
  1064. {
  1065. # test a udf with no args
  1066. 'num' => 12,
  1067. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1068. a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1069. b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
  1070. store b into ':OUTPATH:';\,
  1071. },
  1072. {
  1073. 'num' => 13,
  1074. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  1075. b = foreach a generate *;
  1076. store b into ':OUTPATH:';\,
  1077. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  1078. b = foreach a generate *;
  1079. store b into ':OUTPATH:';\,
  1080. }
  1081. ]
  1082. },
  1083. {
  1084. 'name' => 'Order',
  1085. 'tests' => [
  1086. {
  1087. 'num' => 1,
  1088. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1089. b = foreach a generate name;
  1090. c = order b by name;
  1091. store c into ':OUTPATH:';\,
  1092. 'sortArgs' => ['-t', ' ', '-k', '1,1'],
  1093. },
  1094. {
  1095. 'num' => 2,
  1096. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1097. b = foreach a generate $1;
  1098. c = order b by $0;
  1099. store c into ':OUTPATH:';\,
  1100. 'sortArgs' => ['-t', ' ', '-k', '1,1'],
  1101. },
  1102. {
  1103. 'num' => 3,
  1104. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1105. b = foreach a generate gpa;
  1106. c = order b by gpa;
  1107. store c into ':OUTPATH:';\,
  1108. 'sortArgs' => ['-t', ' ', '-k', '1,1'],
  1109. },
  1110. {
  1111. 'num' => 4,
  1112. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1113. b = order a by *;
  1114. store b into ':OUTPATH:';\,
  1115. 'sortArgs' => ['-t', ' '],
  1116. },
  1117. {
  1118. 'num' => 5,
  1119. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1120. b = foreach a generate name, age;
  1121. c = order b by name, age;
  1122. store c into ':OUTPATH:';\,
  1123. 'sortArgs' => ['-t', ' ', '-k', '1,2'],
  1124. },
  1125. {
  1126. 'num' => 6,
  1127. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1128. c = order a by $0;
  1129. store c into ':OUTPATH:';\,
  1130. 'sortArgs' => ['-t', ' ', '-k', '1,1'],
  1131. },
  1132. {
  1133. 'num' => 7,
  1134. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1135. c = order a by $1;
  1136. store c into ':OUTPATH:';\,
  1137. 'sortArgs' => ['-t', ' ', '-k', '2,2'],
  1138. },
  1139. {
  1140. 'num' => 8,
  1141. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1142. c = order a by $0, $1;
  1143. store c into ':OUTPATH:';\,
  1144. 'sortArgs' => ['-t', ' ', '-k', '1,2'],
  1145. },
  1146. {
  1147. 'num' => 9,
  1148. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1149. c = order a by $1, $0;
  1150. store c into ':OUTPATH:';\,
  1151. 'sortArgs' => ['-t', ' ', '-k', '2,2', '-k', '1,1'],
  1152. },
  1153. {
  1154. 'num' => 10,
  1155. 'ignore' => 'order by UDF is not supported',
  1156. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1157. a = load ':INPATH:/singlefile/studenttab10k';
  1158. c = order a by * using org.apache.pig.test.udf.orderby.OrdDesc;
  1159. store c into ':OUTPATH:';\,
  1160. 'sortArgs' => ['-t', ' ', '-r'],
  1161. },
  1162. {
  1163. 'num' => 11,
  1164. 'ignore' => 'order by UDF is not supported',
  1165. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1166. a = load ':INPATH:/singlefile/studenttab10k';
  1167. c = order a by $0 using org.apache.pig.test.udf.orderby.OrdDesc;
  1168. store c into ':OUTPATH:';\,
  1169. 'sortArgs' => ['-t', ' ', '-r', '-k', '1,1'],
  1170. },
  1171. {
  1172. 'num' => 12,
  1173. 'ignore' => 'order by UDF is not supported',
  1174. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1175. a = load ':INPATH:/singlefile/studenttab10k';
  1176. c = order a by $0, $1 using org.apache.pig.test.udf.orderby.OrdDesc;
  1177. store c into ':OUTPATH:';\,
  1178. 'sortArgs' => ['-t', ' ', '-r', '-k', '1,2'],
  1179. },
  1180. # ALERT All these tests with inner order bys aren't testing the inner
  1181. # ordering. We need to develop a sorting tool to do that.
  1182. {
  1183. 'num' => 13,
  1184. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1185. b = group a by $0;
  1186. c = foreach b {c1 = order $1 by $1; generate flatten(c1); };
  1187. store c into ':OUTPATH:';\,
  1188. },
  1189. {
  1190. 'num' => 14,
  1191. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1192. b = group a by $0;
  1193. c = foreach b {c1 = order $1 by *; generate flatten(c1); };
  1194. store c into ':OUTPATH:';\,
  1195. },
  1196. {
  1197. 'num' => 15,
  1198. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1199. a = load ':INPATH:/singlefile/studenttab10k';
  1200. b = group a by $0;
  1201. c = foreach b {c1 = order $1 by * using org.apache.pig.test.udf.orderby.OrdDesc; generate flatten(c1); };
  1202. store c into ':OUTPATH:';\,
  1203. },
  1204. {
  1205. 'num' => 16,
  1206. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1207. a = load ':INPATH:/singlefile/studenttab10k';
  1208. b = group a by $0;
  1209. c = foreach b {c1 = order $1 by $1 using org.apache.pig.test.udf.orderby.OrdDesc; generate flatten(c1);};
  1210. store c into ':OUTPATH:';\,
  1211. },
  1212. {
  1213. 'num' => 17,
  1214. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1215. b = group a by $0;
  1216. c = foreach b {c1 = order $1 by $1; generate flatten(c1), MAX($1.$1); };
  1217. store c into ':OUTPATH:';\,
  1218. },
  1219. {
  1220. # test to make sure the weighted range patitioning
  1221. # works correctly when a sort key value repeats across
  1222. # reduce partitions
  1223. 'num' => 18,
  1224. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1225. b = order a by $1 parallel 100;
  1226. store b into ':OUTPATH:';\,
  1227. 'sortArgs' => ['-t', ' ', '-k', '2,2'],
  1228. },
  1229. {
  1230. 'num' => 19,
  1231. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  1232. b = foreach a generate instate;
  1233. c = order b by instate;
  1234. store c into ':OUTPATH:';\,
  1235. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
  1236. b = foreach a generate instate;
  1237. c = order b by instate;
  1238. store c into ':OUTPATH:';\,
  1239. 'sortArgs' => ['-t', ' ', '-k', '1,1'],
  1240. },
  1241. ]
  1242. },
  1243. {
  1244. 'name' => 'Distinct',
  1245. 'tests' => [
  1246. {
  1247. 'num' => 1,
  1248. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1249. b = foreach a generate name;
  1250. c = distinct b;
  1251. store c into ':OUTPATH:';\,
  1252. },
  1253. {
  1254. 'num' => 2,
  1255. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1256. b = foreach a generate $1;
  1257. c = distinct b;
  1258. store c

Large files files are truncated, but you can click here to view the full file