PageRenderTime 35ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/test/e2e/pig/tests/multiquery.conf

https://github.com/dorefiend/pig
Perl | 518 lines | 442 code | 6 blank | 70 comment | 28 complexity | 5e66a029c6fab2eb3f83bcd348aaf5e4 MD5 | raw file
Possible License(s): Apache-2.0, CPL-1.0
  1. #!/usr/bin/env perl
  2. ############################################################################
  3. # Licensed to the Apache Software Foundation (ASF) under one or more
  4. # contributor license agreements. See the NOTICE file distributed with
  5. # this work for additional information regarding copyright ownership.
  6. # The ASF licenses this file to You under the Apache License, Version 2.0
  7. # (the "License"); you may not use this file except in compliance with
  8. # the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. ####################################################################
  18. # SUB: Multiquery
  19. # Please include a brief description here.
  20. # - _TEST_ The first example; one that is defined in the bug with one split
  21. # - in the map phase
  22. # - _TEST_ Multiple side files, all in map phase.
  23. # - _TEST_ Two loads and two stores in map phase.
  24. # - _TEST_ One split added in reduce phase and map-only splitee.
  25. # - _TEST_ One split added in reduce phase and one map-reduce splitee
  26. # - _TEST_ One split in reduce phase and two Map-Reduce splitees.
  27. # - _TEST_ Two loads and two stores in reduce phase
  28. # - _TEST_ Explicit split with two side files.
  29. # - _TEST_ Explicit split with order by and two side files.
  30. # - _TEST_ Implicit split with multiple side files.
  31. # - _TEST_ Streaming with multiple stores.
  32. # - _TEST_ Script with intermediate stores.
  33. # - _TEST_ Implicit split with order by and multiple side files.
  34. # - _TEST_ Self join using fragment replicate join with multiple side files.
  35. # - _TEST_ PigMix Test Case L12.
  36. # - _TEST_ One split in map phase and two Map-Reduce splitees with mixed combiners.
  37. # - _TEST_ One split in map phase and two Map-Reduce splitees without combiners.
  38. # - _TEST_ Splittees with different map key types and nested splits.
  39. # - _TEST_ Splittees with different map key type.
  40. # - _TEST_ Streaming in demux.
  41. # - _TEST_ Streaming in nested demux.
  42. # - _TEST_ PigMix Test Case L12 version 2
  43. $cfg = {
  44. 'driver' => 'Pig',
  45. 'nummachines' => 5,
  46. 'groups' => [
  47. {
  48. 'name' => 'MultiQuery',
  49. 'floatpostprocess' => 1,
  50. 'delimiter' => ' ',
  51. 'tests' => [
  52. {
  53. # The first exmaple; one that is defined in the bug with one split
  54. # in the map phase
  55. 'num' => 1,
  56. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  57. b = filter a by age < 22; store b into ':OUTPATH:.1';
  58. c = group b by age;
  59. d = foreach c generate group, SUM(b.gpa);
  60. store d into ':OUTPATH:.2'; #,
  61. 'sql' => "select name, age, gpa from studenttab10k where age < 22;
  62. select age, sum(gpa) from studenttab10k where age < 22 group by age;",
  63. },
  64. {
  65. # Multiple side files, all in map phase.
  66. 'num' => 2,
  67. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  68. b = filter a by age < 22;
  69. store b into ':OUTPATH:.1';
  70. c = filter b by gpa > 3.0;
  71. store c into ':OUTPATH:.2';
  72. d = filter c by name < 'm';
  73. store d into ':OUTPATH:.3'; #,
  74. 'sql' => "select name, age, gpa from studenttab10k where age < 22;
  75. select name, age, gpa from studenttab10k where age < 22 and gpa > 3.0;
  76. select name, age, gpa from studenttab10k where age < 22 and gpa > 3.0 and name < 'm';",
  77. },
  78. {
  79. # Two loads and two stores in map phase.
  80. 'num' => 3,
  81. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  82. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  83. c = filter a by age < 20;
  84. d = filter b by age < 20;
  85. store c into ':OUTPATH:.1';
  86. store d into ':OUTPATH:.2';
  87. e = cogroup c by name, d by name;
  88. f = foreach e generate flatten(c), flatten(d);
  89. store f into ':OUTPATH:.3'; #,
  90. 'sql' => "select name, age, gpa from studenttab10k where age < 20;
  91. select name, age, registration, contributions from votertab10k where age < 20;
  92. select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions
  93. from studenttab10k as a full outer join votertab10k as b using(name)
  94. where a.age < 20 and b.age < 20;",
  95. },
  96. {
  97. # One split added in reduce phase and map-only splitee.
  98. 'num' => 4,
  99. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  100. b = filter a by gpa < 3.0;
  101. c = group b by age;
  102. d = foreach c generate group, AVG(b.gpa);
  103. store d into ':OUTPATH:.1';
  104. e = filter d by $1 > 1.5;
  105. store e into ':OUTPATH:.2'; #,
  106. 'sql' => "select age, avg(gpa) from studenttab10k where gpa < 3.0 group by age;
  107. select age, avg(gpa) from studenttab10k where gpa < 3.0 group by age having avg(gpa) > 1.5;",
  108. },
  109. {
  110. # One split added in reduce phase and one map-reduce splitee
  111. 'num' => 5,
  112. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  113. b = filter a by gpa < 3.0;
  114. c = group b by age;
  115. d = foreach c generate group, AVG(b.gpa);
  116. store d into ':OUTPATH:.1';
  117. e = filter d by $1 > 1.5;
  118. f = group e by $1;
  119. g = foreach f generate group, SUM(e.$0);
  120. store g into ':OUTPATH:.2'; #,
  121. 'sql' => "select age, avg(gpa) from studenttab10k where gpa < 3.0 group by age;
  122. select t.b, sum(t.a) from (select age as a, avg(gpa) as b from studenttab10k
  123. where gpa < 3.0 group by age having avg(gpa) > 1.5) as t group by t.b;",
  124. },
  125. {
  126. # One split in reduce phase and two Map-Reduce splitees.
  127. 'num' => 6,
  128. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  129. b = filter a by gpa < 3.0;
  130. c = group b by age;
  131. d = foreach c generate group, AVG(b.gpa);
  132. e = filter d by $1 > 1.5;
  133. e1= group e by $1;
  134. e2 = foreach e1 generate group, SUM(e.$0);
  135. store e2 into ':OUTPATH:.1';
  136. f = filter d by $1 <= 1.5;
  137. f1 = group f by $1;
  138. f2 = foreach f1 generate group, COUNT(f.$0);
  139. store f2 into ':OUTPATH:.2'; #,
  140. 'sql' => "select t.c1, sum(t.c0) from (select age as c0, avg(gpa) as c1 from studenttab10k
  141. where gpa < 3.0 group by age having avg(gpa) > 1.5) as t group by t.c1;
  142. select t.c1, count(t.c0) from (select age as c0, avg(gpa) as c1 from studenttab10k
  143. where gpa < 3.0 group by age having avg(gpa) <= 1.5) as t group by t.c1;",
  144. },
  145. {
  146. # Two loads and two stores in reduce phase
  147. 'num' => 7,
  148. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  149. b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  150. c = filter a by age < 20;
  151. d = filter b by age < 20;
  152. e = cogroup c by name, d by name;
  153. f = foreach e generate flatten(c), flatten(d);
  154. g = group f by d::age;
  155. h = foreach g generate group, SUM(f.gpa);
  156. store h into ':OUTPATH:.1';
  157. e = filter f by c::gpa < 3.0;
  158. store e into ':OUTPATH:.2'; #,
  159. 'sql' => "select c5, sum(c3) from (select a.name as c1, a.age as c2, a.gpa as c3, b.name as c4,
  160. b.age as c5, b.registration as c6, b.contributions as c7
  161. from studenttab10k as a full outer join votertab10k as b using(name)
  162. where a.age < 20 and b.age < 20) as t group by t.c5;
  163. select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions
  164. from studenttab10k as a full outer join votertab10k as b using(name)
  165. where a.age < 20 and b.age < 20 and a.gpa < 3.0;",
  166. },
  167. {
  168. # Explicit split with two side files.
  169. 'num'=> 8,
  170. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  171. split a into a1 if name > 'm', a2 if name <= 'm';
  172. store a1 into ':OUTPATH:.1';
  173. store a2 into ':OUTPATH:.2';
  174. b = cogroup a1 by age, a2 by age;
  175. c = foreach b generate flatten(a1), flatten(a2);
  176. store c into ':OUTPATH:.3'; #,
  177. 'sql' => "select name, age, gpa from studenttab10k where name > 'm';
  178. select name, age, gpa from studenttab10k where name <= 'm';
  179. select A.name, A.age, A.gpa, B.name, B.age, B.gpa
  180. from (select * from studenttab10k where name > 'm') as A
  181. join (select * from studenttab10k where name <= 'm') as B using (age);",
  182. },
  183. {
  184. # Explicit split with order by and two side files.
  185. 'num'=> 9,
  186. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
  187. split a into a1 if age > 50, a2 if name < 'm';
  188. b2 = distinct a2;
  189. b1 = order a1 by name;
  190. store b2 into ':OUTPATH:.2';
  191. store b1 into ':OUTPATH:.1';
  192. c = cogroup b2 by name, b1 by name;
  193. d = foreach c generate flatten(group), COUNT($1), COUNT($2);
  194. store d into ':OUTPATH:.3'; #,
  195. 'sql' => "select name, age, gpa from studenttab10k where age > 50 order by name;
  196. select distinct name, age, gpa from studenttab10k where name < 'm';
  197. select name, count(A.name), count(B.name)
  198. from (select distinct name from studenttab10k where name < 'm') as A
  199. join (select name from studenttab10k where age > 50) as B using (name) group by name;",
  200. 'verify_with_pig' => 1,
  201. 'verify_pig_version' => 'old',
  202. },
  203. {
  204. # Implicit split with multiple side files.
  205. 'num'=> 10,
  206. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
  207. b = filter a by age > 50;
  208. c = filter a by gpa > 3.0;
  209. store c into ':OUTPATH:.1';
  210. d = cogroup b by name, c by name;
  211. e = foreach d generate flatten(b), flatten(c);
  212. store e into ':OUTPATH:.2';
  213. f = filter e by b::age < 75;
  214. store f into ':OUTPATH:.3'; #,
  215. 'sql' => "select name, age, gpa from studenttab10k where gpa > 3.0;
  216. select A.name, A.age, A.gpa, B.name, B.age, B.gpa
  217. from (select * from studenttab10k where age > 50) as A
  218. join (select * from studenttab10k where gpa > 3.0) as B using (name);
  219. select A.name, A.age, A.gpa, B.name, B.age, B.gpa
  220. from (select * from studenttab10k where age > 50) as A
  221. join (select * from studenttab10k where gpa > 3.0) as B using (name) where A.age < 75;",
  222. },
  223. {
  224. # Streaming with multiple stores
  225. 'num' => 11,
  226. 'pig' => q# define CMD1 `perl -ne 'print $_;'`;
  227. define CMD2 `perl -ne 'print $_;'`;
  228. A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  229. B = stream A through CMD1 as (name, age, gpa);
  230. store B into ':OUTPATH:.1';
  231. C = stream B through CMD2 as (name, age, gpa);
  232. D = JOIN B by name, C by name;
  233. store D into ':OUTPATH:.2'; #,
  234. 'sql' => "select name, age, gpa from studenttab10k;
  235. select A.name, A.age, A.gpa, B.name, B.age, B.gpa
  236. from studenttab10k as A join studenttab10k as B using(name);",
  237. },
  238. {
  239. # With intermediate store
  240. 'num' => 12,
  241. 'pig' => q# A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
  242. store A into ':OUTPATH:.1';
  243. B = load ':OUTPATH:.1';
  244. store B into ':OUTPATH:.2'; #,
  245. 'sql' => "select name, age, gpa from studenttab10k;
  246. select name, age, gpa from studenttab10k;",
  247. },
  248. {
  249. # Implicit split with order by and multiple side files.
  250. 'num'=>13,
  251. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
  252. b = filter a by age > 50;
  253. c = filter a by gpa > 3.0;
  254. store c into ':OUTPATH:.1';
  255. d = cogroup b by name, c by name;
  256. e = foreach d generate flatten(b), flatten(c);
  257. f = order e by b::name;
  258. store e into ':OUTPATH:.2';
  259. f = filter e by b::age < 75;
  260. store f into ':OUTPATH:.3'; #,
  261. 'sql' => "select name, age, gpa from studenttab10k where gpa > 3.0;
  262. select A.name, A.age, A.gpa, B.name, B.age, B.gpa
  263. from (select * from studenttab10k where age > 50) as A
  264. join (select * from studenttab10k where gpa > 3.0) as B using (name)
  265. order by A.name;
  266. select A.name, A.age, A.gpa, B.name, B.age, B.gpa
  267. from (select * from studenttab10k where age > 50) as A
  268. join (select * from studenttab10k where gpa > 3.0) as B using (name)
  269. where A.age < 75 order by A.name;",
  270. },
  271. # Self join using fragment replicate join with multiple side files
  272. {
  273. 'num' => 14,
  274. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  275. b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
  276. c = filter a by age > 50;
  277. store c into ':OUTPATH:.1';
  278. d = filter b by gpa > 3.0;
  279. store d into ':OUTPATH:.2';
  280. e = join c by gpa, d by gpa using 'repl';
  281. store e into ':OUTPATH:.3'; #,
  282. 'sql' => "select name, age, gpa from studenttab10k where age > 50;
  283. select name, age, gpa from studenttab10k where gpa > 3.0;
  284. select a.name, a.age, a.gpa, b.name, b.age, b.gpa
  285. from studenttab10k as a join studenttab10k as b using(gpa)
  286. where a.age > 50 and b.gpa > 3.0;",
  287. },
  288. # PigMix Test Case L12
  289. {
  290. 'num' => 15,
  291. 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  292. b = foreach a generate name, age, contributions;
  293. split b into c1 if age > 50, c2 if age <= 50;
  294. split c1 into d1 if name < 'm', d2 if name >= 'm';
  295. e = group c2 by name;
  296. e1 = foreach e generate group, SUM(c2.contributions);
  297. store e1 into ':OUTPATH:.1';
  298. f = group d1 by name;
  299. f1 = foreach f generate group, MAX(d1.contributions);
  300. store f1 into ':OUTPATH:.2';
  301. g = group d2 by name;
  302. g1 = foreach g generate group, COUNT(d2);
  303. store g1 into ':OUTPATH:.3'; #,
  304. 'sql' => "select name, sum(contributions) from votertab10k where age <= 50 group by name;
  305. select name, max(contributions) from votertab10k where (age > 50 and name < 'm') group by name;
  306. select name, count(*) from votertab10k where (age > 50 and name >= 'm') group by name;",
  307. },
  308. # One split in map phase and two Map-Reduce splitees with mixed combiner.
  309. {
  310. 'num' => 16,
  311. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  312. b = filter a by gpa < 3.0;
  313. c = filter a by gpa >= 3.0;
  314. d = group b by age;
  315. e = foreach d generate group, AVG(b.gpa);
  316. store e into ':OUTPATH:.1';
  317. f = group c by age;
  318. g = foreach f generate group, MAX(c.gpa) - MIN(c.gpa);
  319. store g into ':OUTPATH:.2'; #,
  320. 'sql' => "select age, avg(gpa) from studenttab10k where gpa < 3.0 group by age;
  321. select age, max(gpa) - min(gpa) from studenttab10k where gpa >= 3.0 group by age;",
  322. },
  323. # One split in map phase and two Map-Reduce splitees without combiner.
  324. {
  325. 'num' => 17,
  326. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float);
  327. b = filter a by gpa < 3.0;
  328. c = filter a by gpa >= 3.0;
  329. d = group b by age;
  330. e = foreach d generate group, MAX(b.gpa) + MIN(b.gpa);
  331. store e into ':OUTPATH:.1';
  332. f = group c by age;
  333. g = foreach f generate group, MAX(c.gpa) - MIN(c.gpa);
  334. store g into ':OUTPATH:.2'; #,
  335. 'sql' => "select age, max(gpa) + min(gpa) from studenttab10k where gpa < 3.0 group by age;
  336. select age, max(gpa) - min(gpa) from studenttab10k where gpa >= 3.0 group by age;",
  337. },
  338. # Splittees with different map key types and nested splits
  339. {
  340. 'num' => 18,
  341. 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name: chararray, age:int, registration, contributions:double);
  342. b = foreach a generate name, age, contributions;
  343. split b into c1 if age > 10, c2 if age <= 60;
  344. split c1 into d1 if name < 'y', d2 if name >= 'c';
  345. e = group c2 by name parallel 2;
  346. e1 = foreach e generate group, SUM(c2.contributions);
  347. store e1 into ':OUTPATH:.1';
  348. f = group d1 by name parallel 3;
  349. f1 = foreach f generate group, MAX(d1.contributions);
  350. store f1 into ':OUTPATH:.2';
  351. g = group d2 by age parallel 4;
  352. g1 = foreach g generate group, COUNT(d2);
  353. store g1 into ':OUTPATH:.3'; #,
  354. 'sql' => "select name, sum(contributions) from votertab10k where age <= 60 group by name;
  355. select name, max(contributions) from votertab10k where (age > 10 and name < 'y') group by name;
  356. select age, count(*) from votertab10k where (age > 10 and name >= 'c') group by age;",
  357. },
  358. # Splittees with different map key types
  359. {
  360. 'num' => 19,
  361. 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name: chararray, age:int, registration, contributions:double);
  362. b = foreach a generate name, age, contributions;
  363. split b into c1 if age > 50, c2 if age <= 50;
  364. e = group c2 by name;
  365. e1 = foreach e generate group, SUM(c2.contributions);
  366. store e1 into ':OUTPATH:.1';
  367. f = group c1 by age;
  368. f1 = foreach f generate group, MAX(c1.contributions);
  369. store f1 into ':OUTPATH:.2'; #,
  370. 'sql' => "select name, sum(contributions) from votertab10k where age <= 50 group by name;
  371. select age, max(contributions) from votertab10k where age > 50 group by age;",
  372. },
  373. # Streaming in demux
  374. {
  375. 'num' => 20,
  376. 'execonly' => 'mapred',
  377. 'pig' => q#
  378. define CMD `perl GroupBy.pl '\t' 0` ship(':SCRIPTHOMEPATH:/GroupBy.pl');
  379. A = load ':INPATH:/singlefile/studenttab10k';
  380. split A into A1 if $0 < 'm', A2 if $0 >= 'm';
  381. B = group A1 by $0;
  382. C = foreach B generate flatten(A1);
  383. D = stream C through CMD;
  384. store D into ':OUTPATH:.1';
  385. E = group A2 by $0;
  386. F = foreach E generate group, COUNT(A2);
  387. store F into ':OUTPATH:.2';#,
  388. 'sql' => "select name, count(*) from studenttab10k where name < 'm' group by name;
  389. select name, count(*) from studenttab10k where name >= 'm' group by name;",
  390. },
  391. # Streaming in nested demux
  392. {
  393. 'num' => 21,
  394. 'execonly' => 'mapred',
  395. 'pig' => q#
  396. define CMD `perl GroupBy.pl '\t' 0` ship(':SCRIPTHOMEPATH:/GroupBy.pl');
  397. A = load ':INPATH:/singlefile/studenttab10k';
  398. split A into A1 if $0 < 'm', A2 if $0 >= 'm';
  399. split A1 into A3 if $1 < 30, A4 if $1 >= 30;
  400. B = group A3 by $0;
  401. C = foreach B generate flatten(A3);
  402. D = stream C through CMD;
  403. store D into ':OUTPATH:.1';
  404. E = group A2 by $0;
  405. F = foreach E generate group, COUNT(A2);
  406. store F into ':OUTPATH:.2';
  407. G = group A4 by $0;
  408. H = foreach G generate group, COUNT(A4);
  409. store H into ':OUTPATH:.3';#,
  410. 'sql' => "select name, count(*) from studenttab10k where name < 'm' and age < 30 group by name;
  411. select name, count(*) from studenttab10k where name >= 'm' group by name;
  412. select name, count(*) from studenttab10k where name < 'm' and age >= 30 group by name;",
  413. },
  414. # PigMix Test Case L12 version 2
  415. {
  416. 'num' => 22,
  417. 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  418. b = foreach a generate name, age, contributions;
  419. split b into c1 if age > 50, c2 if age <= 50;
  420. split c1 into d1 if name < 'm', d2 if name >= 'm';
  421. e = group c2 by (name, age);
  422. e1 = foreach e generate flatten(group), SUM(c2.contributions);
  423. store e1 into ':OUTPATH:.1';
  424. f = group d1 by (name, age);
  425. f1 = foreach f generate flatten(group), MAX(d1.contributions);
  426. store f1 into ':OUTPATH:.2';
  427. g = group d2 by (name, age);
  428. g1 = foreach g generate flatten(group), COUNT(d2);
  429. store g1 into ':OUTPATH:.3'; #,
  430. 'sql' => "select name, age, sum(contributions) from votertab10k where age <= 50 group by name, age;
  431. select name, age, max(contributions) from votertab10k where (age > 50 and name < 'm') group by name, age;
  432. select name, age, count(*) from votertab10k where (age > 50 and name >= 'm') group by name, age;",
  433. },
  434. # PigMix Test Case L12 version 3 (modified to have different map key types in inner split)
  435. {
  436. 'num' => 23,
  437. 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
  438. b = foreach a generate name, age, contributions;
  439. split b into c1 if age > 50, c2 if age <= 50;
  440. split c1 into d1 if name < 'm', d2 if name >= 'm';
  441. f = group d1 by name;
  442. f1 = foreach f generate flatten(group), MAX(d1.contributions);
  443. store f1 into ':OUTPATH:.1';
  444. g = group d2 by (name, age);
  445. g1 = foreach g generate flatten(group), COUNT(d2);
  446. store g1 into ':OUTPATH:.2';
  447. e = group c2 by (name, age);
  448. e1 = foreach e generate flatten(group), SUM(c2.contributions);
  449. store e1 into ':OUTPATH:.3'; #,
  450. 'sql' => "select name, max(contributions) from votertab10k where (age > 50 and name < 'm') group by name;
  451. select name, age, count(*) from votertab10k where (age > 50 and name >= 'm') group by name, age;
  452. select name, age, sum(contributions) from votertab10k where age <= 50 group by name, age;",
  453. },
  454. # Pig-976: Multi-query optimization throws ClassCastException
  455. {
  456. 'num' => 24,
  457. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
  458. b = group a by name;
  459. c = group a by age;
  460. d = foreach b generate MAX(a.age);
  461. e = foreach c generate group, SUM(a.gpa);
  462. store d into ':OUTPATH:.1';
  463. store e into ':OUTPATH:.2'; #,
  464. 'sql' => "select max(age) from studenttab10k group by name;
  465. select age, sum(gpa) from studenttab10k group by age;",
  466. },
  467. # Pig-976: Multi-query optimization throws ClassCastException
  468. {
  469. 'num' => 25,
  470. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
  471. b = group a all;
  472. c = group a by age;
  473. d = foreach b generate COUNT(a), MAX(a.age);
  474. e = foreach c generate group, SUM(a.gpa);
  475. store d into ':OUTPATH:.1';
  476. store e into ':OUTPATH:.2'; #,
  477. 'sql' => "select count(*), max(age) from studenttab10k;
  478. select age, sum(gpa) from studenttab10k group by age;",
  479. },
  480. # Pig-983: multi-query optimization on multiple group bys following a join or cogroup
  481. {
  482. 'num' => 26,
  483. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
  484. b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:int, registration, contributions:double);
  485. c = join a by name, b by name;
  486. d = group c by a::age;
  487. e = group c by b::age;
  488. d1 = foreach d generate group, COUNT(c), MAX(c.a::gpa);
  489. e1 = foreach e generate group, SUM(c.b::contributions);
  490. store d1 into ':OUTPATH:.1';
  491. store e1 into ':OUTPATH:.2'; #,
  492. 'sql' => "select a.age, count(*), max(a.gpa) from studenttab10k as a inner join votertab10k as b on (a.name = b.name) group by a.age;
  493. select b.age, sum(b.contributions) from studenttab10k as a inner join votertab10k as b on (a.name = b.name) group by b.age;",
  494. },
  495. # Pig-976: Multi-query optimization throws ClassCastException
  496. {
  497. 'num' => 27,
  498. 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float);
  499. b = group a by name;
  500. c = group a by age;
  501. d = foreach b generate MAX(a.age), group;
  502. e = foreach c generate group, SUM(a.gpa);
  503. store d into ':OUTPATH:.1';
  504. store e into ':OUTPATH:.2'; #,
  505. 'sql' => "select max(age), name from studenttab10k group by name;
  506. select age, sum(gpa) from studenttab10k group by age;",
  507. },
  508. ] # end of tests
  509. },
  510. ] # end of groups
  511. }
  512. ;