PageRenderTime 63ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/test/e2e/pig/tests/nightly.conf

https://github.com/zjffdu/pig
Perl | 4623 lines | 4186 code | 112 blank | 325 comment | 102 complexity | 735ef9c41158ef2319bd7bea7c6a5f6f MD5 | raw file
Possible License(s): Apache-2.0, CPL-1.0

Large files files are truncated, but you can click here to view the full file

  1. #!/usr/bin/env perl
  2. ############################################################################
  3. # Licensed to the Apache Software Foundation (ASF) under one or more
  4. # contributor license agreements. See the NOTICE file distributed with
  5. # this work for additional information regarding copyright ownership.
  6. # The ASF licenses this file to You under the Apache License, Version 2.0
  7. # (the "License"); you may not use this file except in compliance with
  8. # the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. ###############################################################################
  18. # Nightly tests for pig.
  19. #
  20. #
  21. #PigSetup::setup();
  22. #my $me = `whoami`;
  23. #chomp $me;
  24. $cfg = {
  25. 'driver' => 'Pig',
  26. 'nummachines' => 5,
  27. 'verify_with_pig' => 1,
  28. 'verify_pig_version' => 'old',
  29. 'groups' => [
  30. {
  31. 'name' => 'Checkin',
  32. 'tests' => [
  33. {
  34. 'num' => 1,
  35. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  36. store a into ':OUTPATH:';\,
  37. },
  38. {
  39. 'num' => 2,
  40. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  41. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  42. c = filter a by age < 50;
  43. d = filter b by age < 50;
  44. e = cogroup c by (name, age), d by (name, age) ;
  45. f = foreach e generate flatten(c), flatten(d);
  46. g = group f by registration;
  47. h = foreach g generate group, SUM(f.d::contributions);
  48. i = order h by $1;
  49. store i into ':OUTPATH:';\,
  50. 'floatpostprocess' => 1,
  51. 'delimiter' => ' ',
  52. 'sortArgs' => ['-t', ' ', '+1', '-2'],
  53. }
  54. ]
  55. },
  56. {
  57. 'name' => 'LoaderDefaultDir',
  58. 'tests' => [
  59. {
  60. 'num' => 1,
  61. 'pig' => q\a = load ':INPATH:/dir/studenttab10k' as (name, age, gpa);
  62. store a into ':OUTPATH:';\,
  63. },
  64. ]
  65. },
  66. {
  67. 'name' => 'LoaderPigStorageArg',
  68. 'tests' => [
  69. {
  70. 'num' => 1,
  71. 'pig' => q\a = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa);
  72. store a into ':OUTPATH:';\,
  73. },
  74. {
  75. # load with control character
  76. 'num' => 2,
  77. 'pig' => q#a = load ':INPATH:/singlefile/studentctrla10k' using PigStorage('\\u0001') as (name, age, gpa);
  78. store a into ':OUTPATH:';#,
  79. },
  80. {
  81. # load and store with control character
  82. 'num' => 3,
  83. 'pig' => q#a = load ':INPATH:/singlefile/studentctrla10k' using PigStorage('\\u0001') as (name, age, gpa);
  84. store a into ':OUTPATH:.intermediate' using PigStorage('\\u0001');
  85. b = load ':OUTPATH:.intermediate' using PigStorage('\\u0001') as (name, age, gpa);
  86. store b into ':OUTPATH:'; #,
  87. 'notmq' => 1,
  88. },
  89. ]
  90. },
  91. {
  92. # Results doctored, if you change this query you need to copy the
  93. # expected results into test/nightly/benchmarks
  94. 'name' => 'LoaderBinStorage',
  95. 'tests' => [
  96. {
  97. 'num' => 1,
  98. 'pig' => q\register :FUNCPATH:/testudf.jar;
  99. a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  100. b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Swap(name, age), TOKENIZE((chararray)name), org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
  101. store b into ':OUTPATH:.intermediate' using BinStorage();
  102. c = load ':OUTPATH:.intermediate' using BinStorage();
  103. store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
  104. 'notmq' => 1,
  105. },
  106. ]
  107. },
  108. {
  109. # Results doctored, if you change this query you need to copy the
  110. # expected results into test/nightly/benchmarks
  111. 'name' => 'LoaderTextLoader',
  112. 'tests' => [
  113. {
  114. 'num' => 1,
  115. 'pig' => q\register :FUNCPATH:/testudf.jar;
  116. a = load ':INPATH:/singlefile/textdoc' using TextLoader();
  117. b = foreach a generate TOKENIZE((chararray)$0);
  118. store b into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
  119. },
  120. ]
  121. },
  122. {
  123. 'name' => 'FilterBoolean',
  124. 'tests' => [
  125. {
  126. 'num' => 1,
  127. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  128. b = filter a by name == 'fred allen' and age > 50;
  129. store b into ':OUTPATH:' using PigStorage;\,
  130. },
  131. {
  132. 'num' => 2,
  133. 'pig' => q\a = load ':INPATH:/dir/studenttab10k' using PigStorage() as (name, age, gpa);
  134. b = filter a by name != 'fred allen' or age < 10;
  135. store b into ':OUTPATH:' using PigStorage;\,
  136. },
  137. {
  138. 'num' => 3,
  139. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  140. b = filter a by not (age == 50);
  141. store b into ':OUTPATH:' using PigStorage;\,
  142. },
  143. {
  144. 'num' => 4,
  145. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  146. b = filter a by (age >= 50 or name > 'fred') and (gpa <= 3.0 or name >= 'bob');
  147. store b into ':OUTPATH:' using PigStorage;\,
  148. },
  149. {
  150. 'num' => 5,
  151. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  152. b = filter a by age >= 50 or name > 'fred' and gpa <= 3.0 or name >= 'bob';
  153. store b into ':OUTPATH:' using PigStorage;\,
  154. },
  155. # test filter <= and >= for chararray, int and double
  156. {
  157. 'num' => 6,
  158. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
  159. b = filter a by age >= 40 and age <=50 and gpa >= 2.0 and gpa <= 3.0 and name >= 'bob' and name <= 'fred';
  160. store b into ':OUTPATH:' using PigStorage;\,
  161. },
  162. # test filter <= and >= for bytearray, long and float
  163. {
  164. 'num' => 7,
  165. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
  166. b = filter a by age >= 40 and age <=50 and gpa >= 2.0f and gpa <= 3.0f and name >= 'bob' and name <= 'fred';
  167. store b into ':OUTPATH:' using PigStorage;\,
  168. },
  169. # test filter < and > for chararray, int and double
  170. {
  171. 'num' => 8,
  172. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
  173. b = filter a by age > 40 and age <50 and gpa > 2.0 and gpa < 3.0 and name > 'bob' and name < 'fred';
  174. store b into ':OUTPATH:' using PigStorage;\,
  175. },
  176. # test filter < and > for bytearray, long and float
  177. {
  178. 'num' => 9,
  179. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
  180. b = filter a by age > 40 and age <50 and gpa > 2.0f and gpa < 3.0f and name > 'bob' and name < 'fred';
  181. store b into ':OUTPATH:' using PigStorage;\,
  182. },
  183. # test filter <= and >= for explicit cast for chararray, int and double
  184. {
  185. 'num' => 10,
  186. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  187. b = filter a by (int)age >= 40 and (int)age <=50 and (double)gpa >= 2.0 and (double)gpa <= 3.0 and (chararray)name >= 'bob' and (chararray)name <= 'fred';
  188. store b into ':OUTPATH:' using PigStorage;\,
  189. },
  190. # test filter <= and >= for explicit cast for bytearray, long and float
  191. {
  192. 'num' => 11,
  193. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  194. b = filter a by (long)age >= 40 and (long)age <=50 and (float)gpa >= 2.0f and (float)gpa <= 3.0f and name >= 'bob' and name <= 'fred';
  195. store b into ':OUTPATH:' using PigStorage;\,
  196. },
  197. # test filter < and > for explicit cast for chararray, int and double
  198. {
  199. 'num' => 12,
  200. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  201. b = filter a by (int)age > 40 and (int)age <50 and (double)gpa > 2.0 and (double)gpa < 3.0 and (chararray)name > 'bob' and (chararray)name < 'fred';
  202. store b into ':OUTPATH:' using PigStorage;\,
  203. },
  204. # test filter < and > for explicit cast for bytearray, long and float
  205. {
  206. 'num' => 13,
  207. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  208. b = filter a by (long)age > 40 and (long)age <50 and (float)gpa > 2.0f and (float)gpa < 3.0f and name > 'bob' and name < 'fred';
  209. store b into ':OUTPATH:' using PigStorage;\,
  210. },
  211. # test AND with nulls
  212. {
  213. 'num' => 14,
  214. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
  215. b = filter a by name == 'fred allen' and age > 50;
  216. store b into ':OUTPATH:' using PigStorage;\,
  217. },
  218. # test OR with nulls
  219. {
  220. 'num' => 15,
  221. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
  222. b = filter a by name != 'fred allen' or age < 10;
  223. store b into ':OUTPATH:' using PigStorage;\,
  224. },
  225. # test with nulls filter <= and >= for chararray, int and double
  226. {
  227. 'num' => 16,
  228. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
  229. b = filter a by age >= 40 and age <=50 and gpa >= 2.0 and gpa <= 3.0 and name >= 'bob' and name <= 'fred';
  230. store b into ':OUTPATH:' using PigStorage;\,
  231. },
  232. # test with nulls filter < and > for explicit cast for chararray, int and double
  233. {
  234. 'num' => 17,
  235. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);
  236. b = filter a by (int)age > 40 and (int)age <50 and (double)gpa > 2.0 and (double)gpa < 3.0 and (chararray)name > 'bob' and (chararray)name < 'fred';
  237. store b into ':OUTPATH:' using PigStorage;\,
  238. },
  239. {
  240. 'num' => 18,
  241. 'ignore' => 1, # PIG-2593
  242. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  243. b = filter a by instate;
  244. store b into ':OUTPATH:' using PigStorage;\,
  245. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  246. b = filter a by instate == 'true';
  247. store b into ':OUTPATH:' using PigStorage;\,
  248. },
  249. {
  250. 'num' => 19,
  251. 'ignore' => 1, # PIG-2593
  252. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  253. b = filter a by not instate;
  254. store b into ':OUTPATH:' using PigStorage;\,
  255. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  256. b = filter a by instate == 'false';
  257. store b into ':OUTPATH:' using PigStorage;\,
  258. },
  259. {
  260. 'num' => 20,
  261. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  262. b = filter a by instate is null;
  263. store b into ':OUTPATH:' using PigStorage;\,
  264. },
  265. {
  266. 'num' => 21,
  267. 'ignore' => 1, # TODO Need to file a JIRA-2
  268. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  269. b = filter a by instate == true;
  270. store b into ':OUTPATH:' using PigStorage;\,
  271. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  272. b = filter a by instate == 'true';
  273. store b into ':OUTPATH:' using PigStorage;\,
  274. },
  275. {
  276. 'num' => 22,
  277. 'ignore' => 1, # TODO Need to file a JIRA-2
  278. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  279. b = filter a by instate == false;
  280. store b into ':OUTPATH:' using PigStorage;\,
  281. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
  282. b = filter a by instate == 'false';
  283. store b into ':OUTPATH:' using PigStorage;\,
  284. },
  285. {
  286. 'num' => 23,
  287. 'ignore' => 1, # TODO Need to file a JIRA-1
  288. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  289. b = filter a by instate;
  290. store b into ':OUTPATH:' using PigStorage;\,
  291. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  292. b = filter a by instate == 'true';
  293. store b into ':OUTPATH:' using PigStorage;\,
  294. },
  295. {
  296. 'num' => 24,
  297. 'ignore' => 1, # TODO Need to file a JIRA-1
  298. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  299. b = filter a by not instate;
  300. store b into ':OUTPATH:' using PigStorage;\,
  301. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  302. b = filter a by instate == 'false';
  303. store b into ':OUTPATH:' using PigStorage;\,
  304. },
  305. {
  306. 'num' => 25,
  307. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  308. b = filter a by instate is null;
  309. store b into ':OUTPATH:' using PigStorage;\,
  310. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  311. b = filter a by instate is null;
  312. store b into ':OUTPATH:' using PigStorage;\,
  313. },
  314. {
  315. 'num' => 26,
  316. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  317. b = filter a by instate == true;
  318. store b into ':OUTPATH:' using PigStorage;\,
  319. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  320. b = filter a by instate == 'true';
  321. store b into ':OUTPATH:' using PigStorage;\,
  322. },
  323. {
  324. 'num' => 27,
  325. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  326. b = filter a by instate == false;
  327. store b into ':OUTPATH:' using PigStorage;\,
  328. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  329. b = filter a by instate == 'false';
  330. store b into ':OUTPATH:' using PigStorage;\,
  331. },
  332. ],
  333. },
  334. {
  335. 'name' => 'FilterEq',
  336. 'tests' => [
  337. {
  338. 'num' => 1,
  339. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  340. b = filter a by name == 'alice johnson' and age == 64 and gpa == 3.99;
  341. store b into ':OUTPATH:' using PigStorage;\,
  342. },
  343. {
  344. 'num' => 2,
  345. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  346. b = filter a by name > 'fred allen' and age > 40 and gpa > 2.50;
  347. store b into ':OUTPATH:' using PigStorage;\,
  348. },
  349. {
  350. 'num' => 3,
  351. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  352. b = filter a by name >= 'fred allen' and age >= 40 and gpa >= 2.50;
  353. store b into ':OUTPATH:' using PigStorage;\,
  354. },
  355. {
  356. 'num' => 4,
  357. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  358. b = filter a by name lt 'fred allen' and age < 40 and gpa < 2.50;
  359. store b into ':OUTPATH:' using PigStorage;\,
  360. },
  361. {
  362. 'num' => 5,
  363. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  364. b = filter a by name lte 'fred allen' and age <= 40 and gpa <= 2.50;
  365. store b into ':OUTPATH:' using PigStorage;\,
  366. },
  367. {
  368. 'num' => 6,
  369. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage();
  370. b = filter a by $0 neq 'fred allen' and $1 != '40' and $2 != '2.50';
  371. store b into ':OUTPATH:' using PigStorage;\,
  372. },
  373. # test for filter == for chararray, int and double
  374. {
  375. 'num' => 7,
  376. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
  377. b = filter a by name == 'fred allen' and age == 61 and gpa == 1.42;
  378. store b into ':OUTPATH:' using PigStorage;\,
  379. },
  380. # test for filter == for bytearray, long and float
  381. {
  382. 'num' => 8,
  383. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
  384. b = filter a by name == 'fred allen' and age == 61 and gpa == 1.42f;
  385. store b into ':OUTPATH:' using PigStorage;\,
  386. },
  387. # test for filter != for chararray, int and double
  388. {
  389. 'num' => 9,
  390. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
  391. b = filter a by $0 != 'fred allen' and $1 != 40 and $2 != 2.50;
  392. store b into ':OUTPATH:' using PigStorage;\,
  393. },
  394. # test for filter != for bytearray, long and float
  395. {
  396. 'num' => 10,
  397. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:float);
  398. b = filter a by $0 != 'fred allen' and $1 != 40 and $2 != 2.50f;
  399. store b into ':OUTPATH:' using PigStorage;\,
  400. },
  401. # test for filter == for explicit casts to chararray, int and double
  402. {
  403. 'num' => 11,
  404. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  405. b = filter a by (chararray)name == 'fred allen' and (int)age == 61 and (double)gpa == 1.42;
  406. store b into ':OUTPATH:' using PigStorage;\,
  407. },
  408. # test for filter == for explicit casts to bytearray, long and float
  409. {
  410. 'num' => 12,
  411. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  412. b = filter a by name == 'fred allen' and (long)age == 61 and (float)gpa == 1.42f;
  413. store b into ':OUTPATH:' using PigStorage;\,
  414. },
  415. # test for filter != for explicit casts to chararray, int and double
  416. {
  417. 'num' => 13,
  418. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() ;
  419. b = filter a by (chararray)$0 != 'fred allen' and (int)$1 != 40 and (double)$2 != 2.50;
  420. store b into ':OUTPATH:' using PigStorage;\,
  421. },
  422. # test for filter != for explicit casts to bytearray, long and float
  423. {
  424. 'num' => 14,
  425. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() ;
  426. b = filter a by $0 != 'fred allen' and (long)$1 != 40 and (float)$2 != 2.50f;
  427. store b into ':OUTPATH:' using PigStorage;\,
  428. },
  429. ]
  430. },
  431. {
  432. 'name' => 'FilterMatches',
  433. 'tests' => [
  434. {
  435. 'num' => 1,
  436. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  437. b = filter a by name matches '^fred.*';
  438. store b into ':OUTPATH:' using PigStorage;\,
  439. },
  440. {
  441. 'num' => 2,
  442. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage();
  443. b = filter a by not $0 matches '^fred.*';
  444. store b into ':OUTPATH:' using PigStorage;\,
  445. },
  446. {
  447. # test for filter on matches for chararray (declared and explicit cast)
  448. 'num' => 3,
  449. 'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
  450. b = filter a by name matches '^fred.*' and (chararray)registration matches '^dem.*';
  451. store b into ':OUTPATH:' using PigStorage;\,
  452. },
  453. {
  454. 'num' => 4,
  455. 'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
  456. b = filter a by name matches 'f.ed' and (chararray)registration matches 'd.m';
  457. store b into ':OUTPATH:' using PigStorage;\,
  458. },
  459. {
  460. 'num' => 5,
  461. 'pig' => q\a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);
  462. b = filter a by name matches 'f[^f]ed.*';
  463. store b into ':OUTPATH:' using PigStorage;\,
  464. },
  465. {
  466. 'num' => 6,
  467. 'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches '.*\\\\wan.*';\nstore b into ':OUTPATH:' using PigStorage;",
  468. },
  469. {
  470. 'num' => 7,
  471. 'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches '^e.*\\\\sc.*';\nstore b into ':OUTPATH:' using PigStorage;",
  472. },
  473. {
  474. 'num' => 8,
  475. 'pig' => "a = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double);\nb = filter a by name matches 'ethan white';\nstore b into ':OUTPATH:' using PigStorage;",
  476. },
  477. {
  478. 'num' => 9,
  479. 'pig' => "a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age, gpa);\nb = filter a by gpa matches '\\\\d\\\\.45';\nstore b into ':OUTPATH:' using PigStorage;",
  480. },
  481. ]
  482. },
  483. {
  484. 'name' => 'FilterUdf',
  485. 'tests' => [
  486. {
  487. 'num' => 1,
  488. 'pig' => q\
  489. a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  490. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  491. c = cogroup a by (name, age), b by (name, age);
  492. d = filter c by not IsEmpty(a);
  493. e = filter d by not IsEmpty(b);
  494. f = foreach e generate flatten(a), flatten(b);
  495. store f into ':OUTPATH:';\,
  496. },
  497. {
  498. 'num' => 2,
  499. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  500. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  501. c = filter a by age < 50;
  502. d = filter b by age < 50;
  503. e = cogroup c by (name, age), d by (name, age);
  504. f = filter e by COUNT(c)> 0 AND COUNT(d)>0;
  505. store f into ':OUTPATH:';\,
  506. 'rc' => 0
  507. },
  508. ]
  509. },
  510. # TODO Group that don't flatten via Agg functions
  511. {
  512. 'name' => 'GroupAggFunc',
  513. 'tests' => [
  514. {
  515. 'num' => 1,
  516. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  517. b = group a by name;
  518. c = foreach b generate group, COUNT(a.age);
  519. store c into ':OUTPATH:';\,
  520. },
  521. {
  522. 'num' => 2,
  523. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  524. b = group a by $0;
  525. c = foreach b generate group, COUNT(a.$1);
  526. store c into ':OUTPATH:';\,
  527. },
  528. {
  529. 'num' => 3,
  530. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  531. b = group a by (name, age);
  532. c = foreach b generate group.name, group.age, COUNT(a.gpa);
  533. store c into ':OUTPATH:';\,
  534. },
  535. {
  536. 'num' => 5,
  537. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  538. b = group a all;
  539. c = foreach b generate COUNT(a.$0);
  540. store c into ':OUTPATH:';\,
  541. },
  542. {
  543. 'num' => 6,
  544. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  545. b = group a by name;
  546. c = foreach b generate group, SUM(a.age);
  547. store c into ':OUTPATH:';\,
  548. },
  549. {
  550. 'num' => 7,
  551. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  552. b = group a by name;
  553. c = foreach b generate group, SUM(a.gpa);
  554. store c into ':OUTPATH:';\,
  555. 'floatpostprocess' => 1,
  556. 'delimiter' => ' ',
  557. },
  558. {
  559. 'num' => 8,
  560. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  561. b = group a by name;
  562. c = foreach b generate group, AVG(a.age);
  563. store c into ':OUTPATH:';\,
  564. },
  565. {
  566. 'num' => 9,
  567. 'ignore23' => 'I cannot get it right due to float precision, temporarily disable',
  568. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  569. b = group a by name;
  570. c = foreach b generate group, AVG(a.gpa);
  571. store c into ':OUTPATH:';\,
  572. 'floatpostprocess' => 1,
  573. 'delimiter' => ' ',
  574. },
  575. {
  576. 'num' => 10,
  577. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  578. b = group a by name;
  579. c = foreach b generate group, MIN(a.gpa);
  580. store c into ':OUTPATH:';\,
  581. },
  582. {
  583. 'num' => 11,
  584. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  585. b = group a by name;
  586. c = foreach b generate group, MAX(a.gpa);
  587. store c into ':OUTPATH:';\,
  588. },
  589. {
  590. 'num' => 12,
  591. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  592. b = group a by (name, age);
  593. c = foreach b generate flatten(group), SUM(a.gpa);
  594. store c into ':OUTPATH:';\,
  595. 'floatpostprocess' => 1,
  596. 'delimiter' => ' ',
  597. },
  598. {
  599. 'num' => 13,
  600. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  601. b = group a by (name);
  602. c = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  603. d = cogroup b by group, c by name;
  604. e = foreach d generate flatten(group), SUM(c.gpa), COUNT(c.name);
  605. store e into ':OUTPATH:';\,
  606. 'floatpostprocess' => 1,
  607. 'delimiter' => ' ',
  608. },
  609. {
  610. 'num' => 14,
  611. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  612. b = group a by (name);
  613. e = foreach b generate COUNT(a.name);
  614. store e into ':OUTPATH:';\,
  615. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  616. b = group a by (name);
  617. e = foreach b generate COUNT(a.name);
  618. store e into ':OUTPATH:';\,
  619. }
  620. ],
  621. },
  622. {
  623. 'name' => 'MapPartialAgg',
  624. 'tests' => [
  625. {
  626. 'num' => 1,
  627. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  628. b = group a by name;
  629. c = foreach b generate group, COUNT(a.age);
  630. store c into ':OUTPATH:';\,
  631. 'java_params' => ['-Dpig.exec.mapPartAgg=true']
  632. },
  633. {
  634. #multiquery with group in one sub query
  635. 'num' => 2,
  636. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  637. b = filter a by age < 22; store b into ':OUTPATH:.1';
  638. c = group b by age;
  639. d = foreach c generate group, SUM(b.gpa);
  640. store d into ':OUTPATH:.2'; #,
  641. 'java_params' => ['-Dpig.exec.mapPartAgg=true']
  642. },
  643. {
  644. #multi query with two group on diff columns
  645. 'num' => 3,
  646. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  647. g1 = group a by name;
  648. f1 = foreach g1 generate group as name, MAX(a.gpa);
  649. store f1 into ':OUTPATH:.1';
  650. g2 = group a by age;
  651. f2 = foreach g2 generate group as age, AVG(a.gpa);
  652. store f2 into ':OUTPATH:.2'; #,
  653. 'java_params' => ['-Dpig.exec.mapPartAgg=true']
  654. },
  655. {
  656. #multi query with three groups on diff columns, group key being an expression
  657. 'num' => 4,
  658. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  659. g1 = group a by name;
  660. f1 = foreach g1 generate group as name, MAX(a.gpa);
  661. store f1 into ':OUTPATH:.1';
  662. g2 = group a by age%10;
  663. f2 = foreach g2 generate group as age_mod10, AVG(a.gpa);
  664. store f2 into ':OUTPATH:.2';
  665. g3 = group a by age;
  666. f3 = foreach g3 generate group%10, AVG(a.gpa);
  667. store f3 into ':OUTPATH:.3';
  668. g4 = group a by gpa;
  669. f4 = foreach g4 generate group as gpa, COUNT(a);
  670. store f4 into ':OUTPATH:.4';
  671. #,
  672. 'java_params' => ['-Dpig.exec.mapPartAgg=true']
  673. },
  674. {
  675. #aggregation gets more than one tuple for every tuple from load func
  676. 'num' => 5,
  677. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  678. b = foreach a generate name, age, gpa, flatten(TOBAG(age,age)) as x;
  679. c = group b by age;
  680. d = foreach c generate group, AVG(b.gpa);
  681. store d into ':OUTPATH:'; #,
  682. 'java_params' => ['-Dpig.exec.mapPartAgg=true']
  683. },
  684. ],
  685. },
  686. {
  687. 'name' => 'EvalFunc',
  688. 'tests' => [
  689. {
  690. 'num' => 1,
  691. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  692. b = filter a by name lt 'b';
  693. c = foreach b generate ARITY(name, age, gpa);
  694. store c into ':OUTPATH:';\,
  695. },
  696. {
  697. 'num' => 2,
  698. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age, gpa);
  699. b = filter a by name lt 'b';
  700. c = foreach b generate TOKENIZE(name);
  701. d = foreach c generate flatten($0);
  702. store d into ':OUTPATH:';\,
  703. },
  704. {
  705. 'num' => 3,
  706. 'pig' => q\register :FUNCPATH:/testudf.jar;
  707. a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  708. b = filter a by name lt 'b';
  709. c = foreach b generate org.apache.pig.test.udf.evalfunc.Swap(name, age);
  710. store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
  711. },
  712. {
  713. 'num' => 4,
  714. 'pig' => q\register :FUNCPATH:/testudf.jar;
  715. a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  716. b = filter a by name lt 'b';
  717. c = foreach b generate org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
  718. store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
  719. },
  720. {
  721. 'num' => 5,
  722. 'pig' => q\register :FUNCPATH:/testudf.jar;
  723. a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  724. b = foreach a generate org.apache.pig.test.udf.evalfunc.TestBoolean(instate);
  725. store b into ':OUTPATH:';\,
  726. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  727. b = foreach a generate (instate is null ? '' : (instate == 'true' ? 'false' : 'true'));
  728. store b into ':OUTPATH:';\,
  729. }
  730. ]
  731. },
  732. # TODO DIFF
  733. # TODO User defined grouping function
  734. {
  735. 'name' => 'CoGroupFlatten',
  736. 'tests' => [
  737. {
  738. 'num' => 1,
  739. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  740. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  741. c = filter a by age < 20;
  742. d = filter b by age < 20;
  743. e = cogroup c by name, d by name;
  744. f = foreach e generate flatten (c), flatten(d);
  745. store f into ':OUTPATH:';\,
  746. },
  747. {
  748. 'num' => 2,
  749. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  750. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  751. c = filter a by $1 < 20;
  752. d = filter b by $1 < 20;
  753. e = cogroup c by $0, d by $0;
  754. f = foreach e generate flatten (c), flatten(d);
  755. store f into ':OUTPATH:';\,
  756. },
  757. {
  758. 'num' => 3,
  759. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  760. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  761. c = filter a by age < 20;
  762. d = filter b by age < 20;
  763. e = cogroup c by (name, age), d by (name, age);
  764. f = foreach e generate flatten (c), flatten(d);
  765. store f into ':OUTPATH:';\,
  766. },
  767. {
  768. 'num' => 4,
  769. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  770. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  771. d = filter b by age < 20;
  772. e = cogroup a by (name, age) inner, d by (name, age);
  773. f = foreach e generate flatten (a), flatten(d);
  774. store f into ':OUTPATH:';\,
  775. },
  776. {
  777. 'num' => 5,
  778. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  779. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  780. c = filter a by age < 20;
  781. e = cogroup c by (name, age), b by (name, age) inner;
  782. f = foreach e generate flatten (c), flatten(b);
  783. store f into ':OUTPATH:';\,
  784. },
  785. {
  786. 'num' => 6,
  787. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  788. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  789. e = cogroup a by (name, age) inner, b by (name, age) inner;
  790. f = foreach e generate flatten (a), flatten(b);
  791. store f into ':OUTPATH:';\,
  792. },
  793. {
  794. # Test cogrouping data loaded from two separate loaders. We don't have any data that can join with studenttab that isn't also loaded with PigStorage, so the
  795. # first step is an intermediate load and store using BinStorage.
  796. 'num' => 7,
  797. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  798. store a into ':OUTPATH:.intermediate' using BinStorage();
  799. b = load ':OUTPATH:.intermediate' using BinStorage() as (name, age, gpa);
  800. c = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  801. e = cogroup b by (name, age) inner, c by (name, age) inner;
  802. f = foreach e generate flatten (b), flatten(c);
  803. store f into ':OUTPATH:';\,
  804. 'notmq' => 1,
  805. },
  806. ]
  807. },
  808. {
  809. 'name' => 'CoGroup',
  810. 'tests' => [
  811. {
  812. 'num' => 1,
  813. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  814. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  815. c = cogroup a by name, b by name;
  816. d = foreach c generate flatten(group), COUNT(a) + COUNT(b);
  817. store d into ':OUTPATH:';\,
  818. },
  819. ]
  820. },
  821. {
  822. 'name' => 'Join',
  823. 'tests' => [
  824. {
  825. 'num' => 1,
  826. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  827. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  828. c = filter a by age < 20;
  829. d = filter b by age < 20;
  830. e = join c by name, d by name;
  831. store e into ':OUTPATH:';\,
  832. },
  833. {
  834. 'num' => 2,
  835. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  836. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  837. c = filter a by age < 20;
  838. d = filter b by age < 20;
  839. e = join c by $0, d by $0;
  840. store e into ':OUTPATH:';\,
  841. },
  842. {
  843. 'num' => 3,
  844. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  845. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  846. c = filter a by age < 20;
  847. d = filter b by age < 20;
  848. e = join c by (name, age), d by (name, age);
  849. store e into ':OUTPATH:';\,
  850. },
  851. # self join with implict split
  852. # JIRA PIG-429
  853. {
  854. 'num' => 4,
  855. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  856. b = filter a by $1 > 25;
  857. c = join a by $0, b by $0;
  858. store c into ':OUTPATH:';\,
  859. },
  860. # join with one input having schema and another without
  861. # JIRA PIG-428
  862. {
  863. 'num' => 5,
  864. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray,age:int, gpa:double);
  865. another = load ':INPATH:/singlefile/studenttab10k';
  866. c = foreach another generate $0, $1+ 10, $2 + 10.0;
  867. d = join a by $0, c by $0;
  868. store d into ':OUTPATH:';\,
  869. },
  870. # self join using fragment replicate join
  871. # no types
  872. {
  873. 'num' => 6,
  874. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  875. b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  876. c = join a by name, b by name using 'repl';
  877. store c into ':OUTPATH:';\,
  878. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  879. b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  880. c = join a by name, b by name ;
  881. store c into ':OUTPATH:';\,
  882. },
  883. # self join using fragment replicate join
  884. # with types and no cast for join key
  885. {
  886. 'num' => 7,
  887. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  888. b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  889. c = join a by name, b by name using 'repl';
  890. store c into ':OUTPATH:';\,
  891. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  892. b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  893. c = join a by name, b by name ;
  894. store c into ':OUTPATH:';\,
  895. },
  896. # self join using fragment replicate join
  897. # with types and cast for join key
  898. {
  899. 'num' => 8,
  900. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  901. b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
  902. c = join a by gpa, b by gpa using 'repl';
  903. store c into ':OUTPATH:';\,
  904. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  905. b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
  906. c = join a by gpa, b by gpa ;
  907. store c into ':OUTPATH:';\,
  908. },
  909. # left outer join
  910. {
  911. 'num' => 9,
  912. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
  913. b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:long, registration:chararray, contributions:double);
  914. c = join a by name left outer, b by name;
  915. store c into ':OUTPATH:';\,
  916. },
  917. # right outer join
  918. {
  919. 'num' => 10,
  920. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
  921. b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:long, registration:chararray, contributions:double);
  922. c = join a by name right outer, b by name;
  923. store c into ':OUTPATH:';\,
  924. },
  925. # full outer join
  926. {
  927. 'num' => 11,
  928. 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);
  929. b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:long, registration:chararray, contributions:double);
  930. c = join a by name full outer, b by name;
  931. store c into ':OUTPATH:';\,
  932. },
  933. # see PIG-1209 join package now uses internalcachedBag, so every tuple on reduce side in this test will spilled to disk.
  934. {
  935. 'num' => 12,
  936. 'java_params' => ['-Dpig.cachedbag.memusage=0'],
  937. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
  938. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  939. c = filter a by age < 20;
  940. d = filter b by age < 20;
  941. e = join c by name, d by name;
  942. store e into ':OUTPATH:';\,
  943. },
  944. {
  945. 'num' => 13,
  946. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  947. b = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  948. c = filter a by age < 20;
  949. d = filter b by age < 20;
  950. e = join c by instate, d by instate parallel 5;
  951. store e into ':OUTPATH:';\,
  952. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  953. b = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  954. c = filter a by age < 20;
  955. d = filter b by age < 20;
  956. e = join c by instate, d by instate parallel 5;
  957. store e into ':OUTPATH:';\,
  958. }
  959. ]
  960. },
  961. {
  962. 'name' => 'Foreach',
  963. 'tests' => [
  964. {
  965. 'num' => 1,
  966. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  967. b = foreach a generate *;
  968. store b into ':OUTPATH:';\,
  969. },
  970. {
  971. 'num' => 2,
  972. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  973. b = foreach a generate *;
  974. store b into ':OUTPATH:';\,
  975. },
  976. {
  977. 'num' => 3,
  978. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  979. b = foreach a generate name, age;
  980. store b into ':OUTPATH:';\,
  981. },
  982. {
  983. 'num' => 4,
  984. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  985. b = foreach a generate $0, $2;
  986. store b into ':OUTPATH:';\,
  987. },
  988. {
  989. # test filter, projection, sort , duplicate elimination
  990. 'num' => 5,
  991. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  992. b = filter a by age < 20;
  993. c = group b by age;
  994. d = foreach c {
  995. cf = filter b by gpa < 3.0;
  996. cp = cf.gpa;
  997. cd = distinct cp;
  998. co = order cd by $0;
  999. generate group, flatten(co);
  1000. }
  1001. store d into ':OUTPATH:';\,
  1002. },
  1003. {
  1004. # test flatten for map and scalar
  1005. 'num' => 6,
  1006. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1007. a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1008. b = foreach a generate flatten(name) as n, flatten(org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, gpa)) as m;
  1009. store b into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
  1010. },
  1011. {
  1012. # test flatten for UDF that returns bag with multiple tuples with multiple columns
  1013. 'num' => 7,
  1014. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1015. a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1016. b = foreach a generate name, flatten(org.apache.pig.test.udf.evalfunc.CreateTupleBag(age, gpa)) as foo;
  1017. store b into ':OUTPATH:';\,
  1018. },
  1019. {
  1020. 'num' => 8,
  1021. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age: int, gpa);
  1022. c = group a by name;
  1023. d = foreach c generate flatten(group), MAX(a.age) + MIN(a.age);
  1024. store d into ':OUTPATH:';\,
  1025. },
  1026. {
  1027. # test filter, projection, sort , duplicate elimination
  1028. 'num' => 9,
  1029. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1030. b = filter a by age < 20;
  1031. c = group b by age;
  1032. d = foreach c {
  1033. cf = filter b by gpa >= 3.0 and gpa <= 3.5;
  1034. cp = cf.gpa;
  1035. cd = distinct cp;
  1036. co = order cd by $0;
  1037. generate group, flatten(co);
  1038. }
  1039. store d into ':OUTPATH:';\,
  1040. },
  1041. {
  1042. # test filter, projection, sort , duplicate elimination
  1043. 'num' => 10,
  1044. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1045. b = filter a by age < 20;
  1046. c = group b by age;
  1047. d = foreach c {
  1048. cf = filter b by (gpa == 4.0 or gpa != 2.0) and name > 'a';
  1049. cp = cf.gpa;
  1050. cd = distinct cp;
  1051. co = order cd by $0;
  1052. generate group, flatten(co);
  1053. }
  1054. store d into ':OUTPATH:';\,
  1055. },
  1056. {
  1057. # test filter, projection, sort , duplicate elimination
  1058. 'num' => 11,
  1059. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1060. b = filter a by age < 20;
  1061. c = foreach b {
  1062. exp1 = age + gpa;
  1063. exp2 = exp1 + age;
  1064. generate exp1, exp2;
  1065. }
  1066. store c into ':OUTPATH:';\,
  1067. },
  1068. {
  1069. # test a udf with no args
  1070. 'num' => 12,
  1071. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1072. a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1073. b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
  1074. store b into ':OUTPATH:';\,
  1075. },
  1076. {
  1077. 'num' => 13,
  1078. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  1079. b = foreach a generate *;
  1080. store b into ':OUTPATH:';\,
  1081. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
  1082. b = foreach a generate *;
  1083. store b into ':OUTPATH:';\,
  1084. }
  1085. ]
  1086. },
  1087. {
  1088. 'name' => 'Order',
  1089. 'tests' => [
  1090. {
  1091. 'num' => 1,
  1092. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1093. b = foreach a generate name;
  1094. c = order b by name;
  1095. store c into ':OUTPATH:';\,
  1096. 'sortArgs' => ['-t', ' ', '+0', '-1'],
  1097. },
  1098. {
  1099. 'num' => 2,
  1100. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1101. b = foreach a generate $1;
  1102. c = order b by $0;
  1103. store c into ':OUTPATH:';\,
  1104. 'sortArgs' => ['-t', ' ', '+0', '-1'],
  1105. },
  1106. {
  1107. 'num' => 3,
  1108. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1109. b = foreach a generate gpa;
  1110. c = order b by gpa;
  1111. store c into ':OUTPATH:';\,
  1112. 'sortArgs' => ['-t', ' ', '+0', '-1'],
  1113. },
  1114. {
  1115. 'num' => 4,
  1116. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1117. b = order a by *;
  1118. store b into ':OUTPATH:';\,
  1119. 'sortArgs' => ['-t', ' '],
  1120. },
  1121. {
  1122. 'num' => 5,
  1123. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1124. b = foreach a generate name, age;
  1125. c = order b by name, age;
  1126. store c into ':OUTPATH:';\,
  1127. 'sortArgs' => ['-t', ' ', '+0', '-2'],
  1128. },
  1129. {
  1130. 'num' => 6,
  1131. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1132. c = order a by $0;
  1133. store c into ':OUTPATH:';\,
  1134. 'sortArgs' => ['-t', ' ', '+0', '-1'],
  1135. },
  1136. {
  1137. 'num' => 7,
  1138. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1139. c = order a by $1;
  1140. store c into ':OUTPATH:';\,
  1141. 'sortArgs' => ['-t', ' ', '+1', '-2'],
  1142. },
  1143. {
  1144. 'num' => 8,
  1145. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1146. c = order a by $0, $1;
  1147. store c into ':OUTPATH:';\,
  1148. 'sortArgs' => ['-t', ' ', '+0', '-2'],
  1149. },
  1150. {
  1151. 'num' => 9,
  1152. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1153. c = order a by $1, $0;
  1154. store c into ':OUTPATH:';\,
  1155. 'sortArgs' => ['-t', ' ', '+1', '-2', '+0', '-1'],
  1156. },
  1157. {
  1158. 'num' => 10,
  1159. 'ignore' => 'order by UDF is not supported',
  1160. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1161. a = load ':INPATH:/singlefile/studenttab10k';
  1162. c = order a by * using org.apache.pig.test.udf.orderby.OrdDesc;
  1163. store c into ':OUTPATH:';\,
  1164. 'sortArgs' => ['-t', ' ', '-r'],
  1165. },
  1166. {
  1167. 'num' => 11,
  1168. 'ignore' => 'order by UDF is not supported',
  1169. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1170. a = load ':INPATH:/singlefile/studenttab10k';
  1171. c = order a by $0 using org.apache.pig.test.udf.orderby.OrdDesc;
  1172. store c into ':OUTPATH:';\,
  1173. 'sortArgs' => ['-t', ' ', '-r', '+0', '-1'],
  1174. },
  1175. {
  1176. 'num' => 12,
  1177. 'ignore' => 'order by UDF is not supported',
  1178. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1179. a = load ':INPATH:/singlefile/studenttab10k';
  1180. c = order a by $0, $1 using org.apache.pig.test.udf.orderby.OrdDesc;
  1181. store c into ':OUTPATH:';\,
  1182. 'sortArgs' => ['-t', ' ', '-r', '+0', '-2'],
  1183. },
  1184. # ALERT All these tests with inner order bys aren't testing the inner
  1185. # ordering. We need to develop a sorting tool to do that.
  1186. {
  1187. 'num' => 13,
  1188. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1189. b = group a by $0;
  1190. c = foreach b {c1 = order $1 by $1; generate flatten(c1); };
  1191. store c into ':OUTPATH:';\,
  1192. },
  1193. {
  1194. 'num' => 14,
  1195. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1196. b = group a by $0;
  1197. c = foreach b {c1 = order $1 by *; generate flatten(c1); };
  1198. store c into ':OUTPATH:';\,
  1199. },
  1200. {
  1201. 'num' => 15,
  1202. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1203. a = load ':INPATH:/singlefile/studenttab10k';
  1204. b = group a by $0;
  1205. c = foreach b {c1 = order $1 by * using org.apache.pig.test.udf.orderby.OrdDesc; generate flatten(c1); };
  1206. store c into ':OUTPATH:';\,
  1207. },
  1208. {
  1209. 'num' => 16,
  1210. 'pig' => q\register :FUNCPATH:/testudf.jar;
  1211. a = load ':INPATH:/singlefile/studenttab10k';
  1212. b = group a by $0;
  1213. c = foreach b {c1 = order $1 by $1 using org.apache.pig.test.udf.orderby.OrdDesc; generate flatten(c1);};
  1214. store c into ':OUTPATH:';\,
  1215. },
  1216. {
  1217. 'num' => 17,
  1218. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1219. b = group a by $0;
  1220. c = foreach b {c1 = order $1 by $1; generate flatten(c1), MAX($1.$1); };
  1221. store c into ':OUTPATH:';\,
  1222. },
  1223. {
  1224. # test to make sure the weighted range patitioning
  1225. # works correctly when a sort key value repeats across
  1226. # reduce partitions
  1227. 'num' => 18,
  1228. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
  1229. b = order a by $1 parallel 100;
  1230. store b into ':OUTPATH:';\,
  1231. 'sortArgs' => ['-t', ' ', '+1', '-2'],
  1232. },
  1233. {
  1234. 'num' => 19,
  1235. 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
  1236. b = foreach a generate instate;
  1237. c = order b by instate;
  1238. store c into ':OUTPATH:';\,
  1239. 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
  1240. b = foreach a generate instate;
  1241. c = order b by instate;
  1242. store c into ':OUTPATH:';\,
  1243. 'sortArgs' => ['-t', ' ', '+0', '-1'],
  1244. },
  1245. ]
  1246. },
  1247. {
  1248. 'name' => 'Distinct',
  1249. 'tests' => [
  1250. {
  1251. 'num' => 1,
  1252. 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  1253. b = foreach a generate name;
  1254. c = distinct b;
  1255. store c into ':OUTPATH:';\,
  1256. },
  1257. {
  1258. 'nu…

Large files files are truncated, but you can click here to view the full file