PageRenderTime 59ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/test/org/apache/pig/test/TestMultiQueryCompiler.java

https://github.com/dorefiend/pig
Java | 1565 lines | 1169 code | 371 blank | 25 comment | 9 complexity | 436e954ea0fe5d6eee67325edc13d5ab MD5 | raw file
Possible License(s): Apache-2.0, CPL-1.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.pig.test;
  19. import java.io.ByteArrayOutputStream;
  20. import java.io.File;
  21. import java.io.FileWriter;
  22. import java.io.IOException;
  23. import java.io.PrintWriter;
  24. import java.io.StringReader;
  25. import java.util.ArrayList;
  26. import java.util.Collections;
  27. import java.util.Iterator;
  28. import junit.framework.Assert;
  29. import org.apache.pig.ExecType;
  30. import org.apache.pig.PigException;
  31. import org.apache.pig.PigServer;
  32. import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher;
  33. import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper;
  34. import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan;
  35. import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
  36. import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
  37. import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSplit;
  38. import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
  39. import org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil;
  40. import org.apache.pig.impl.PigContext;
  41. import org.apache.pig.impl.io.FileLocalizer;
  42. import org.apache.pig.impl.plan.Operator;
  43. import org.apache.pig.impl.plan.OperatorPlan;
  44. import org.apache.pig.impl.util.LogUtils;
  45. import org.apache.pig.newplan.logical.relational.LogicalPlan;
  46. import org.apache.pig.test.utils.SimpleCustomPartitioner;
  47. import org.apache.pig.tools.grunt.GruntParser;
  48. import org.apache.pig.tools.pigscript.parser.ParseException;
  49. import org.junit.After;
  50. import org.junit.AfterClass;
  51. import org.junit.Before;
  52. import org.junit.BeforeClass;
  53. import org.junit.Test;
  54. import org.junit.runner.RunWith;
  55. import org.junit.runners.JUnit4;
  56. @RunWith(JUnit4.class)
  57. public class TestMultiQueryCompiler {
  58. private static MiniCluster cluster;
  59. private PigServer myPig;
  60. @BeforeClass
  61. public static void setUpBeforeClass() throws IOException {
  62. cluster = MiniCluster.buildCluster();
  63. Util.copyFromLocalToCluster(cluster,
  64. "test/org/apache/pig/test/data/passwd", "passwd");
  65. Util.copyFromLocalToCluster(cluster,
  66. "test/org/apache/pig/test/data/passwd2", "passwd2");
  67. }
  68. @AfterClass
  69. public static void tearDownAfterClass() throws IOException {
  70. Util.deleteFile(cluster, "passwd");
  71. Util.deleteFile(cluster, "passwd2");
  72. cluster.shutDown();
  73. }
  74. @Before
  75. public void setUp() throws Exception {
  76. cluster.setProperty("opt.multiquery", ""+true);
  77. myPig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
  78. deleteOutputFiles();
  79. }
  80. @After
  81. public void tearDown() throws Exception {
  82. myPig = null;
  83. }
  84. @Test
  85. public void testMultiQueryJiraPig1438() {
  86. // test case: merge multiple distinct jobs -- one group by job, one distinct job
  87. String INPUT_FILE = "abc";
  88. try {
  89. myPig.setBatchOn();
  90. myPig.registerQuery("A = load '" + INPUT_FILE + "' as (col1:int, col2:int, col3:int);");
  91. myPig.registerQuery("B1 = foreach A generate col1, col2;");
  92. myPig.registerQuery("B2 = foreach A generate col2, col3;");
  93. myPig.registerQuery("C1 = distinct B1;");
  94. myPig.registerQuery("C2 = group B2 by (col2, col3);");
  95. myPig.registerQuery("D1 = foreach C1 generate col1, col2;");
  96. myPig.registerQuery("D2 = foreach C2 generate B2.col2, B2.col3;");
  97. myPig.registerQuery("store D1 into '/tmp/output1';");
  98. myPig.registerQuery("store D2 into '/tmp/output2';");
  99. LogicalPlan lp = checkLogicalPlan(1, 2, 9);
  100. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 2, 15);
  101. checkMRPlan(pp, 1, 1, 2);
  102. } catch (Exception e) {
  103. e.printStackTrace();
  104. Assert.fail();
  105. }
  106. }
  107. @Test
  108. public void testMultiQueryJiraPig1060() {
  109. // test case:
  110. String INPUT_FILE = "pig-1060.txt";
  111. try {
  112. PrintWriter w = new PrintWriter(new FileWriter(INPUT_FILE));
  113. w.println("apple\t2");
  114. w.println("apple\t12");
  115. w.println("orange\t3");
  116. w.println("orange\t23");
  117. w.println("strawberry\t10");
  118. w.println("strawberry\t34");
  119. w.close();
  120. Util.copyFromLocalToCluster(cluster, INPUT_FILE, INPUT_FILE);
  121. myPig.setBatchOn();
  122. myPig.registerQuery("data = load '" + INPUT_FILE +
  123. "' as (name:chararray, gid:int);");
  124. myPig.registerQuery("f1 = filter data by gid < 5;");
  125. myPig.registerQuery("g1 = group f1 by name;");
  126. myPig.registerQuery("p1 = foreach g1 generate group, COUNT(f1.gid);");
  127. myPig.registerQuery("store p1 into '/tmp/output1';");
  128. myPig.registerQuery("f2 = filter data by gid > 5;");
  129. myPig.registerQuery("g2 = group f2 by name;");
  130. myPig.registerQuery("p2 = foreach g2 generate group, COUNT(f2.gid);");
  131. myPig.registerQuery("store p2 into '/tmp/output2';");
  132. myPig.registerQuery("f3 = filter f2 by gid > 10;");
  133. myPig.registerQuery("g3 = group f3 by name;");
  134. myPig.registerQuery("p3 = foreach g3 generate group, COUNT(f3.gid);");
  135. myPig.registerQuery("store p3 into '/tmp/output3';");
  136. myPig.registerQuery("f4 = filter f3 by gid < 20;");
  137. myPig.registerQuery("g4 = group f4 by name;");
  138. myPig.registerQuery("p4 = foreach g4 generate group, COUNT(f4.gid);");
  139. myPig.registerQuery("store p4 into '/tmp/output4';");
  140. LogicalPlan lp = checkLogicalPlan(1, 4, 17);
  141. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 4, 35);
  142. checkMRPlan(pp, 1, 1, 1);
  143. } catch (Exception e) {
  144. e.printStackTrace();
  145. Assert.fail();
  146. } finally {
  147. new File(INPUT_FILE).delete();
  148. try {
  149. Util.deleteFile(cluster, INPUT_FILE);
  150. } catch (IOException e) {
  151. e.printStackTrace();
  152. Assert.fail();
  153. }
  154. }
  155. }
  156. @Test
  157. public void testMultiQueryJiraPig920() {
  158. // test case: a simple diamond query
  159. try {
  160. myPig.setBatchOn();
  161. myPig.registerQuery("a = load 'passwd' " +
  162. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  163. myPig.registerQuery("b = filter a by uid < 5;");
  164. myPig.registerQuery("c = filter a by gid >= 5;");
  165. myPig.registerQuery("d = cogroup c by $0, b by $0;");
  166. myPig.registerQuery("e = foreach d generate group, COUNT(c), COUNT(b);");
  167. myPig.registerQuery("store e into '/tmp/output1';");
  168. LogicalPlan lp = checkLogicalPlan(1, 1, 6);
  169. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 1, 13);
  170. checkMRPlan(pp, 1, 1, 1);
  171. } catch (Exception e) {
  172. e.printStackTrace();
  173. Assert.fail();
  174. }
  175. }
  176. @Test
  177. public void testMultiQueryJiraPig920_1() {
  178. // test case: a query with two diamonds
  179. try {
  180. myPig.setBatchOn();
  181. myPig.registerQuery("a = load 'passwd' " +
  182. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  183. myPig.registerQuery("b = filter a by uid < 5;");
  184. myPig.registerQuery("c = filter a by gid >= 5;");
  185. myPig.registerQuery("d = filter a by uid >= 5;");
  186. myPig.registerQuery("e = filter a by gid < 5;");
  187. myPig.registerQuery("f = cogroup c by $0, b by $0;");
  188. myPig.registerQuery("f1 = foreach f generate group, COUNT(c), COUNT(b);");
  189. myPig.registerQuery("store f1 into '/tmp/output1';");
  190. myPig.registerQuery("g = cogroup d by $0, e by $0;");
  191. myPig.registerQuery("g1 = foreach g generate group, COUNT(d), COUNT(e);");
  192. myPig.registerQuery("store g1 into '/tmp/output2';");
  193. LogicalPlan lp = checkLogicalPlan(1, 2, 11);
  194. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 2, 23);
  195. checkMRPlan(pp, 2, 2, 2);
  196. } catch (Exception e) {
  197. e.printStackTrace();
  198. Assert.fail();
  199. }
  200. }
  201. @Test
  202. public void testMultiQueryWithDemoCase() {
  203. System.out.println("===== multi-query with demo case 2 =====");
  204. try {
  205. myPig.setBatchOn();
  206. myPig.registerQuery("a = load 'passwd' " +
  207. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  208. myPig.registerQuery("b = foreach a generate uname, uid, gid;");
  209. myPig.registerQuery("c = filter b by uid < 5;");
  210. myPig.registerQuery("d = filter c by gid >= 5;");
  211. myPig.registerQuery("store d into '/tmp/output1';");
  212. myPig.registerQuery("e = filter b by uid >= 5;");
  213. myPig.registerQuery("store e into '/tmp/output2';");
  214. myPig.registerQuery("f = filter c by gid < 5;");
  215. myPig.registerQuery("g = group f by uname;");
  216. myPig.registerQuery("h = foreach g generate group, COUNT(f.uid);");
  217. myPig.registerQuery("store h into '/tmp/output3';");
  218. LogicalPlan lp = checkLogicalPlan(1, 3, 11);
  219. // NOTE: old way seemingly generated a useless foreach operator. Now we have one less operator. Reason unknow.
  220. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 19);
  221. checkMRPlan(pp, 1, 1, 1);
  222. } catch (Exception e) {
  223. e.printStackTrace();
  224. Assert.fail();
  225. }
  226. }
  227. @Test
  228. public void testMultiQueryWithSingleMapReduceSplittee() {
  229. System.out.println("===== multi-query with single map reduce splittee =====");
  230. try {
  231. myPig.setBatchOn();
  232. myPig.registerQuery("a = load 'passwd' " +
  233. "using PigStorage(':') as (uname, passwd, uid, gid);");
  234. myPig.registerQuery("b = foreach a generate uname, uid, gid;");
  235. myPig.registerQuery("split b into c1 if uid > 5, c2 if uid <= 5 ;");
  236. myPig.registerQuery("f = group c2 by uname;");
  237. myPig.registerQuery("f1 = foreach f generate group, SUM(c2.gid);");
  238. myPig.registerQuery("store f1 into '/tmp/output1';");
  239. LogicalPlan lp = checkLogicalPlan(1, 1, 7);
  240. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 1, 9);
  241. checkMRPlan(pp, 1, 1, 1);
  242. } catch (Exception e) {
  243. e.printStackTrace();
  244. Assert.fail();
  245. }
  246. }
  247. @Test
  248. public void testMultiQueryPhase3BaseCase() {
  249. System.out.println("===== multi-query phase 3 base case =====");
  250. try {
  251. myPig.setBatchOn();
  252. myPig.registerQuery("a = load 'passwd' " +
  253. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  254. myPig.registerQuery("b = filter a by uid < 5;");
  255. myPig.registerQuery("c = filter a by uid >= 5 and uid < 10;");
  256. myPig.registerQuery("d = filter a by uid >= 10;");
  257. myPig.registerQuery("b1 = group b by gid;");
  258. myPig.registerQuery("b2 = foreach b1 generate group, COUNT(b.uid);");
  259. myPig.registerQuery("b3 = filter b2 by $1 > 5;");
  260. myPig.registerQuery("store b3 into '/tmp/output1';");
  261. myPig.registerQuery("c1 = group c by gid;");
  262. myPig.registerQuery("c2 = foreach c1 generate group, SUM(c.uid);");
  263. myPig.registerQuery("store c2 into '/tmp/output2';");
  264. myPig.registerQuery("d1 = group d by gid;");
  265. myPig.registerQuery("d2 = foreach d1 generate group, AVG(d.uid);");
  266. myPig.registerQuery("store d2 into '/tmp/output3';");
  267. LogicalPlan lp = checkLogicalPlan(1, 3, 14);
  268. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 25);
  269. checkMRPlan(pp, 1, 1, 1);
  270. } catch (Exception e) {
  271. e.printStackTrace();
  272. Assert.fail();
  273. }
  274. }
  275. @Test
  276. public void testMultiQueryJiraPig983() {
  277. System.out.println("===== multi-query Jira Pig-983 =====");
  278. try {
  279. myPig.setBatchOn();
  280. myPig.registerQuery("a = load 'passwd' " +
  281. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  282. myPig.registerQuery("b = filter a by uid < 5;");
  283. myPig.registerQuery("c = filter a by uid >= 5;");
  284. myPig.registerQuery("d = join b by uname, c by uname;");
  285. myPig.registerQuery("e = group d by b::gid;");
  286. myPig.registerQuery("e1 = foreach e generate group, COUNT(d.b::uid);");
  287. myPig.registerQuery("store e1 into '/tmp/output1';");
  288. myPig.registerQuery("f = group d by c::gid;");
  289. myPig.registerQuery("f1 = foreach f generate group, SUM(d.c::uid);");
  290. myPig.registerQuery("store f1 into '/tmp/output2';");
  291. LogicalPlan lp = checkLogicalPlan(1, 2, 10);
  292. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 2, 25);
  293. checkMRPlan(pp, 1, 1, 2);
  294. } catch (Exception e) {
  295. e.printStackTrace();
  296. Assert.fail();
  297. }
  298. }
  299. @Test
  300. public void testMultiQueryPhase3WithoutCombiner() {
  301. System.out.println("===== multi-query phase 3 without combiner =====");
  302. try {
  303. myPig.setBatchOn();
  304. myPig.registerQuery("a = load 'passwd' " +
  305. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  306. myPig.registerQuery("b = filter a by uid < 5;");
  307. myPig.registerQuery("c = filter a by uid >= 5 and uid < 10;");
  308. myPig.registerQuery("d = filter a by uid >= 10;");
  309. myPig.registerQuery("b1 = group b by gid;");
  310. myPig.registerQuery("b2 = foreach b1 generate group, COUNT(b.uid) + SUM(b.uid);");
  311. myPig.registerQuery("b3 = filter b2 by $1 > 5;");
  312. myPig.registerQuery("store b3 into '/tmp/output1';");
  313. myPig.registerQuery("c1 = group c by gid;");
  314. myPig.registerQuery("c2 = foreach c1 generate group, SUM(c.uid) - COUNT(c.uid);");
  315. myPig.registerQuery("store c2 into '/tmp/output2';");
  316. myPig.registerQuery("d1 = group d by gid;");
  317. myPig.registerQuery("d2 = foreach d1 generate group, MAX(d.uid) - MIN(d.uid);");
  318. myPig.registerQuery("store d2 into '/tmp/output3';");
  319. LogicalPlan lp = checkLogicalPlan(1, 3, 14);
  320. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 25);
  321. checkMRPlan(pp, 1, 1, 1);
  322. } catch (Exception e) {
  323. e.printStackTrace();
  324. Assert.fail();
  325. }
  326. }
  327. @Test
  328. public void testMultiQueryPhase3WithMixedCombiner() {
  329. System.out.println("===== multi-query phase 3 with mixed combiner =====");
  330. try {
  331. myPig.setBatchOn();
  332. myPig.registerQuery("a = load 'passwd' " +
  333. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  334. myPig.registerQuery("b = filter a by uid < 5;");
  335. myPig.registerQuery("c = filter a by uid >= 5 and uid < 10;");
  336. myPig.registerQuery("d = filter a by uid >= 10;");
  337. myPig.registerQuery("b1 = group b by gid;");
  338. myPig.registerQuery("b2 = foreach b1 generate group, COUNT(b.uid);");
  339. myPig.registerQuery("b3 = filter b2 by $1 > 5;");
  340. myPig.registerQuery("store b3 into '/tmp/output1';");
  341. myPig.registerQuery("c1 = group c by gid;");
  342. myPig.registerQuery("c2 = foreach c1 generate group, SUM(c.uid);");
  343. myPig.registerQuery("store c2 into '/tmp/output2';");
  344. myPig.registerQuery("d1 = group d by gid;");
  345. myPig.registerQuery("d2 = foreach d1 generate group, d.uname, MAX(d.uid) - MIN(d.uid);");
  346. myPig.registerQuery("store d2 into '/tmp/output3';");
  347. LogicalPlan lp = checkLogicalPlan(1, 3, 14);
  348. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 25);
  349. checkMRPlan(pp, 1, 1, 2);
  350. } catch (Exception e) {
  351. e.printStackTrace();
  352. Assert.fail();
  353. }
  354. }
  355. @Test
  356. public void testMultiQueryPhase3WithDifferentMapDataTypes() {
  357. System.out.println("===== multi-query phase 3 with different map datatypes =====");
  358. try {
  359. myPig.setBatchOn();
  360. myPig.registerQuery("a = load 'passwd' " +
  361. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  362. myPig.registerQuery("b = filter a by uid < 5;");
  363. myPig.registerQuery("c = filter a by uid >= 5 and uid < 10;");
  364. myPig.registerQuery("d = filter a by uid >= 10;");
  365. myPig.registerQuery("b1 = group b by gid parallel 2;");
  366. myPig.registerQuery("b2 = foreach b1 generate group, COUNT(b.uid);");
  367. myPig.registerQuery("b3 = filter b2 by $1 > 5;");
  368. myPig.registerQuery("store b3 into '/tmp/output1';");
  369. myPig.registerQuery("c1 = group c by $1 parallel 3;");
  370. myPig.registerQuery("c2 = foreach c1 generate group, SUM(c.uid);");
  371. myPig.registerQuery("store c2 into '/tmp/output2';");
  372. myPig.registerQuery("d1 = group d by $1 parallel 4;");
  373. myPig.registerQuery("d2 = foreach d1 generate group, COUNT(d.uid);");
  374. myPig.registerQuery("store d2 into '/tmp/output3';");
  375. LogicalPlan lp = checkLogicalPlan(1, 3, 14);
  376. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 25);
  377. checkMRPlan(pp, 1, 1, 1);
  378. } catch (Exception e) {
  379. e.printStackTrace();
  380. Assert.fail();
  381. }
  382. }
  383. @Test
  384. public void testMultiQueryPhase3StreamingInReducer() {
  385. System.out.println("===== multi-query phase 3 with streaming in reducer =====");
  386. try {
  387. myPig.setBatchOn();
  388. myPig.registerQuery("A = load 'passwd';");
  389. myPig.registerQuery("Split A into A1 if $2 > 5, A2 if $2 >= 5;");
  390. myPig.registerQuery("Split A1 into A3 if $0 > 'm', A4 if $0 >= 'm';");
  391. myPig.registerQuery("B = group A3 by $2;");
  392. myPig.registerQuery("C = foreach B generate flatten(A3);");
  393. myPig.registerQuery("D = stream B through `cat`;");
  394. myPig.registerQuery("store D into '/tmp/output1';");
  395. myPig.registerQuery("E = group A4 by $2;");
  396. myPig.registerQuery("F = foreach E generate group, COUNT(A4);");
  397. myPig.registerQuery("store F into '/tmp/output2';");
  398. myPig.registerQuery("G = group A1 by $2;");
  399. myPig.registerQuery("H = foreach G generate group, COUNT(A1);");
  400. myPig.registerQuery("store H into '/tmp/output3';");
  401. LogicalPlan lp = checkLogicalPlan(1, 3, 15);
  402. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 24);
  403. checkMRPlan(pp, 1, 1, 2);
  404. } catch (Exception e) {
  405. e.printStackTrace();
  406. Assert.fail();
  407. }
  408. }
  409. @Test
  410. public void testMultiQueryWithPigMixL12() {
  411. System.out.println("===== multi-query with PigMix L12 =====");
  412. try {
  413. myPig.setBatchOn();
  414. myPig.registerQuery("a = load 'passwd' " +
  415. "using PigStorage(':') as (uname, passwd, uid, gid);");
  416. myPig.registerQuery("b = foreach a generate uname, passwd, uid, gid;");
  417. myPig.registerQuery("split b into c1 if uid > 5, c2 if uid <= 5 ;");
  418. myPig.registerQuery("split c1 into d1 if gid < 5, d2 if gid >= 5;");
  419. myPig.registerQuery("e = group d1 by uname;");
  420. myPig.registerQuery("e1 = foreach e generate group, MAX(d1.uid);");
  421. myPig.registerQuery("store e1 into '/tmp/output1';");
  422. myPig.registerQuery("f = group c2 by uname;");
  423. myPig.registerQuery("f1 = foreach f generate group, SUM(c2.gid);");
  424. myPig.registerQuery("store f1 into '/tmp/output2';");
  425. myPig.registerQuery("g = group d2 by uname;");
  426. myPig.registerQuery("g1 = foreach g generate group, COUNT(d2);");
  427. myPig.registerQuery("store g1 into '/tmp/output3';");
  428. LogicalPlan lp = checkLogicalPlan(1, 3, 17);
  429. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 23);
  430. checkMRPlan(pp, 1, 1, 1);
  431. } catch (Exception e) {
  432. e.printStackTrace();
  433. Assert.fail();
  434. }
  435. }
  436. @Test
  437. public void testMultiQueryWithCoGroup() {
  438. System.out.println("===== multi-query with CoGroup =====");
  439. try {
  440. myPig.setBatchOn();
  441. myPig.registerQuery("a = load 'passwd' " +
  442. "using PigStorage(':') as (uname, passwd, uid, gid);");
  443. myPig.registerQuery("store a into '/tmp/output1' using BinStorage();");
  444. myPig.registerQuery("b = load '/tmp/output1' using BinStorage() as (uname, passwd, uid, gid);");
  445. myPig.registerQuery("c = load 'passwd2' " +
  446. "using PigStorage(':') as (uname, passwd, uid, gid);");
  447. myPig.registerQuery("d = cogroup b by (uname, uid) inner, c by (uname, uid) inner;");
  448. myPig.registerQuery("e = foreach d generate flatten(b), flatten(c);");
  449. myPig.registerQuery("store e into '/tmp/output2';");
  450. LogicalPlan lp = checkLogicalPlan(2, 1, 7);
  451. PhysicalPlan pp = checkPhysicalPlan(lp, 2, 1, 13);
  452. checkMRPlan(pp, 1, 1, 2);
  453. } catch (Exception e) {
  454. e.printStackTrace();
  455. Assert.fail();
  456. }
  457. }
  458. @Test
  459. public void testMultiQueryWithFJ() {
  460. System.out.println("===== multi-query with FJ =====");
  461. try {
  462. myPig.setBatchOn();
  463. myPig.registerQuery("a = load 'passwd' " +
  464. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  465. myPig.registerQuery("b = load 'passwd' " +
  466. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  467. myPig.registerQuery("c = filter a by uid > 5;");
  468. myPig.registerQuery("store c into '/tmp/output1';");
  469. myPig.registerQuery("d = filter b by gid > 10;");
  470. myPig.registerQuery("store d into '/tmp/output2';");
  471. myPig.registerQuery("e = join c by gid, d by gid using 'repl';");
  472. myPig.registerQuery("store e into '/tmp/output3';");
  473. LogicalPlan lp = checkLogicalPlan(2, 3, 8);
  474. PhysicalPlan pp = checkPhysicalPlan(lp, 2, 3, 16);
  475. checkMRPlan(pp, 2, 1, 3);
  476. } catch (Exception e) {
  477. e.printStackTrace();
  478. Assert.fail();
  479. }
  480. }
  481. @Test
  482. public void testMultiQueryWithExplicitSplitAndSideFiles() {
  483. System.out.println("===== multi-query with explicit split and side files =====");
  484. try {
  485. myPig.setBatchOn();
  486. myPig.registerQuery("a = load 'passwd' " +
  487. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  488. myPig.registerQuery("split a into b if uid > 500, c if uid <= 500;");
  489. myPig.registerQuery("store b into '/tmp/output1';");
  490. myPig.registerQuery("store c into '/tmp/output2';");
  491. myPig.registerQuery("e = cogroup b by gid, c by gid;");
  492. myPig.registerQuery("d = foreach e generate flatten(c), flatten(b);");
  493. myPig.registerQuery("store d into '/tmp/output3';");
  494. LogicalPlan lp = checkLogicalPlan(1, 3, 9);
  495. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 19);
  496. checkMRPlan(pp, 1, 1, 2);
  497. } catch (Exception e) {
  498. e.printStackTrace();
  499. Assert.fail();
  500. }
  501. }
  502. @Test
  503. public void testMultiQueryWithExplicitSplitAndOrderByAndSideFiles() {
  504. System.out.println("===== multi-query with explicit split, orderby and side files =====");
  505. try {
  506. myPig.setBatchOn();
  507. myPig.registerQuery("a = load 'passwd' " +
  508. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  509. myPig.registerQuery("split a into a1 if uid > 500, a2 if gid > 500;");
  510. myPig.registerQuery("b1 = distinct a1;");
  511. myPig.registerQuery("b2 = order a2 by uname;");
  512. myPig.registerQuery("store b1 into '/tmp/output1';");
  513. myPig.registerQuery("store b2 into '/tmp/output2';");
  514. myPig.registerQuery("c = cogroup b1 by uname, b2 by uname;");
  515. myPig.registerQuery("d = foreach c generate flatten(group), flatten($1), flatten($2);");
  516. myPig.registerQuery("store d into '/tmp/output3';");
  517. LogicalPlan lp = checkLogicalPlan(1, 3, 11);
  518. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 21);
  519. checkMRPlan(pp, 1, 1, 4);
  520. } catch (Exception e) {
  521. e.printStackTrace();
  522. Assert.fail();
  523. }
  524. }
  525. @Test
  526. public void testMultiQueryWithIntermediateStores() {
  527. System.out.println("===== multi-query with intermediate stores =====");
  528. try {
  529. myPig.setBatchOn();
  530. myPig.registerQuery("a = load 'passwd' " +
  531. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  532. myPig.registerQuery("store a into '/tmp/output1';");
  533. myPig.registerQuery("b = load '/tmp/output1' using PigStorage(':'); ");
  534. myPig.registerQuery("store b into '/tmp/output2';");
  535. LogicalPlan lp = checkLogicalPlan(1, 1, 4);
  536. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 1, 5);
  537. checkMRPlan(pp, 1, 1, 2);
  538. } catch (Exception e) {
  539. e.printStackTrace();
  540. Assert.fail();
  541. }
  542. }
  543. @Test
  544. public void testMultiQueryWithImplicitSplitAndSideFiles() {
  545. System.out.println("===== multi-query with implicit split and side files =====");
  546. try {
  547. myPig.setBatchOn();
  548. myPig.registerQuery("a = load 'passwd' " +
  549. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  550. myPig.registerQuery("b = filter a by uid > 500;");
  551. myPig.registerQuery("c = filter a by gid > 500;");
  552. myPig.registerQuery("store c into '/tmp/output1';");
  553. myPig.registerQuery("d = cogroup b by uname, c by uname;");
  554. myPig.registerQuery("e = foreach d generate flatten(c), flatten(b);");
  555. myPig.registerQuery("store e into '/tmp/output2';");
  556. myPig.registerQuery("f = filter e by b::uid < 1000;");
  557. myPig.registerQuery("store f into '/tmp/output3';");
  558. LogicalPlan lp = checkLogicalPlan(1, 3, 9);
  559. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 22);
  560. checkMRPlan(pp, 1, 1, 2);
  561. } catch (Exception e) {
  562. e.printStackTrace();
  563. Assert.fail();
  564. }
  565. }
  566. @Test
  567. public void testMultiQueryWithTwoLoadsAndTwoStores() {
  568. System.out.println("===== multi-query with two loads and two stores =====");
  569. try {
  570. myPig.setBatchOn();
  571. myPig.registerQuery("a = load 'passwd' " +
  572. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int,gid:int);");
  573. myPig.registerQuery("b = load 'passwd2' " +
  574. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int,gid:int);");
  575. myPig.registerQuery("c = filter a by uid > 5;");
  576. myPig.registerQuery("d = filter b by uid > 10;");
  577. myPig.registerQuery("e = cogroup c by uid, d by uid;");
  578. myPig.registerQuery("f = foreach e generate flatten(c), flatten(d);");
  579. myPig.registerQuery("g = group f by d::gid;");
  580. myPig.registerQuery("h = filter f by c::gid > 5;");
  581. myPig.registerQuery("store g into '/tmp/output1';");
  582. myPig.registerQuery("store h into '/tmp/output2';");
  583. LogicalPlan lp = checkLogicalPlan(2, 2, 10);
  584. PhysicalPlan pp = checkPhysicalPlan(lp, 2, 2, 20);
  585. checkMRPlan(pp, 1, 1, 2);
  586. } catch (Exception e) {
  587. e.printStackTrace();
  588. Assert.fail();
  589. }
  590. }
  591. @Test
  592. public void testMultiQueryWithSplitInReduce() {
  593. System.out.println("===== multi-query with split in reduce =====");
  594. try {
  595. myPig.setBatchOn();
  596. myPig.registerQuery("a = load 'passwd' " +
  597. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  598. myPig.registerQuery("b = filter a by uid > 5;");
  599. myPig.registerQuery("c = group b by gid;");
  600. myPig.registerQuery("d = foreach c generate group, COUNT(b.uid);");
  601. myPig.registerQuery("store d into '/tmp/output1';");
  602. myPig.registerQuery("e = filter d by $1 > 5;");
  603. myPig.registerQuery("store e into '/tmp/output2';");
  604. LogicalPlan lp = checkLogicalPlan(1, 2, 7);
  605. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 2, 13);
  606. checkMRPlan(pp, 1, 1, 1);
  607. } catch (Exception e) {
  608. e.printStackTrace();
  609. Assert.fail();
  610. }
  611. }
  612. @Test
  613. public void testMultiQueryWithSplitInReduceAndReduceSplitee() {
  614. System.out.println("===== multi-query with split in reduce and reduce splitee =====");
  615. try {
  616. myPig.setBatchOn();
  617. myPig.registerQuery("a = load 'passwd' " +
  618. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  619. myPig.registerQuery("b = filter a by uid > 5;");
  620. myPig.registerQuery("c = group b by gid;");
  621. myPig.registerQuery("d = foreach c generate group, COUNT(b.uid);");
  622. myPig.registerQuery("store d into '/tmp/output1';");
  623. myPig.registerQuery("e = filter d by $1 > 5;");
  624. myPig.registerQuery("f = group e by $1;");
  625. myPig.registerQuery("g = foreach f generate group, SUM(e.$0);");
  626. myPig.registerQuery("store g into '/tmp/output2';");
  627. LogicalPlan lp = checkLogicalPlan(1, 2, 9);
  628. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 2, 17);
  629. checkMRPlan(pp, 1, 1, 2);
  630. } catch (Exception e) {
  631. e.printStackTrace();
  632. Assert.fail();
  633. }
  634. }
  635. @Test
  636. public void testMultiQueryWithSplitInReduceAndReduceSplitees() {
  637. System.out.println("===== multi-query with split in reduce and reduce splitees =====");
  638. try {
  639. myPig.setBatchOn();
  640. myPig.registerQuery("a = load 'passwd' " +
  641. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  642. myPig.registerQuery("b = filter a by uid > 5;");
  643. myPig.registerQuery("c = group b by gid;");
  644. myPig.registerQuery("d = foreach c generate group, COUNT(b.uid);");
  645. myPig.registerQuery("e = filter d by $1 > 5;");
  646. myPig.registerQuery("e1 = group e by $1;");
  647. myPig.registerQuery("e2 = foreach e1 generate group, SUM(e.$0);");
  648. myPig.registerQuery("store e2 into '/tmp/output1';");
  649. myPig.registerQuery("f = filter d by $1 < 5;");
  650. myPig.registerQuery("f1 = group f by $1;");
  651. myPig.registerQuery("f2 = foreach f1 generate group, COUNT(f.$0);");
  652. myPig.registerQuery("store f2 into '/tmp/output2';");
  653. LogicalPlan lp = checkLogicalPlan(1, 2, 12);
  654. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 2, 22);
  655. checkMRPlan(pp, 1, 1, 2);
  656. } catch (Exception e) {
  657. e.printStackTrace();
  658. Assert.fail();
  659. }
  660. }
  661. @Test
  662. public void testMultiQueryWithSplitInReduceAndReduceSpliteesAndMore() {
  663. System.out.println("===== multi-query with split in reduce and reduce splitees and more =====");
  664. try {
  665. myPig.setBatchOn();
  666. myPig.registerQuery("a = load 'passwd' " +
  667. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  668. myPig.registerQuery("b = filter a by uid > 500;");
  669. myPig.registerQuery("c = group b by gid;");
  670. myPig.registerQuery("d = foreach c generate group, COUNT(b.uid);");
  671. myPig.registerQuery("e = filter d by $1 > 5;");
  672. myPig.registerQuery("e1 = group e by $1;");
  673. myPig.registerQuery("e2 = foreach e1 generate group, SUM(e.$0);");
  674. myPig.registerQuery("e3 = filter e2 by $1 > 10;");
  675. myPig.registerQuery("e4 = group e3 by $1;");
  676. myPig.registerQuery("e5 = foreach e4 generate group, SUM(e3.$0);");
  677. myPig.registerQuery("store e5 into '/tmp/output1';");
  678. myPig.registerQuery("f = filter d by $1 < 5;");
  679. myPig.registerQuery("f1 = group f by $1;");
  680. myPig.registerQuery("f2 = foreach f1 generate group, COUNT(f.$0);");
  681. myPig.registerQuery("f3 = filter f2 by $1 < 100;");
  682. myPig.registerQuery("f4 = group f3 by $1;");
  683. myPig.registerQuery("f5 = foreach f4 generate group, COUNT(f3.$0);");
  684. myPig.registerQuery("store f5 into '/tmp/output2';");
  685. LogicalPlan lp = checkLogicalPlan(1, 2, 18);
  686. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 2, 32);
  687. checkMRPlan(pp, 1, 2, 4);
  688. } catch (Exception e) {
  689. e.printStackTrace();
  690. Assert.fail();
  691. }
  692. }
  693. @Test
  694. public void testMultiQueryWithSplitInMapAndReduceSplitees() {
  695. System.out.println("===== multi-query with split in map and reduce splitees =====");
  696. try {
  697. myPig.setBatchOn();
  698. myPig.registerQuery("a = load 'passwd' " +
  699. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);");
  700. myPig.registerQuery("b = filter a by uid < 5;");
  701. myPig.registerQuery("c = filter a by uid >= 5 and uid < 10;");
  702. myPig.registerQuery("d = filter a by uid >= 10;");
  703. myPig.registerQuery("b1 = group b by gid;");
  704. myPig.registerQuery("b2 = foreach b1 generate group, COUNT(b.uid);");
  705. myPig.registerQuery("b3 = filter b2 by $1 > 5;");
  706. myPig.registerQuery("store b3 into '/tmp/output1';");
  707. myPig.registerQuery("c1 = group c by $1;");
  708. myPig.registerQuery("c2 = foreach c1 generate group, SUM(c.uid);");
  709. myPig.registerQuery("store c2 into '/tmp/output2';");
  710. myPig.registerQuery("d1 = group d by $1;");
  711. myPig.registerQuery("d2 = foreach d1 generate group, COUNT(d.uid);");
  712. myPig.registerQuery("store d2 into '/tmp/output3';");
  713. LogicalPlan lp = checkLogicalPlan(1, 3, 14);
  714. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 25);
  715. checkMRPlan(pp, 1, 1, 1);
  716. } catch (Exception e) {
  717. e.printStackTrace();
  718. Assert.fail();
  719. }
  720. }
  721. @Test
  722. public void testMultiQueryWithTwoStores() {
  723. System.out.println("===== multi-query with 2 stores =====");
  724. try {
  725. myPig.setBatchOn();
  726. myPig.registerQuery("a = load 'passwd' " +
  727. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int,gid:int);");
  728. myPig.registerQuery("b = filter a by uid > 5;");
  729. myPig.registerQuery("store b into '/tmp/output1';");
  730. myPig.registerQuery("c = group b by gid;");
  731. myPig.registerQuery("store c into '/tmp/output2';");
  732. LogicalPlan lp = checkLogicalPlan(1, 2, 5);
  733. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 2, 11);
  734. checkMRPlan(pp, 1, 1, 1);
  735. } catch (Exception e) {
  736. e.printStackTrace();
  737. Assert.fail();
  738. }
  739. }
  740. @Test
  741. public void testMultiQueryWithThreeStores() {
  742. System.out.println("===== multi-query with 3 stores =====");
  743. try {
  744. myPig.setBatchOn();
  745. myPig.registerQuery("a = load 'passwd' " +
  746. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int,gid:int);");
  747. myPig.registerQuery("b = filter a by uid > 5;");
  748. myPig.registerQuery("store b into '/tmp/output1';");
  749. myPig.registerQuery("c = filter b by uid > 10;");
  750. myPig.registerQuery("store c into '/tmp/output2';");
  751. myPig.registerQuery("d = filter c by uid > 15;");
  752. myPig.registerQuery("store d into '/tmp/output3';");
  753. LogicalPlan lp = checkLogicalPlan(1, 3, 7);
  754. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 14);
  755. checkMRPlan(pp, 1, 1, 1);
  756. } catch (Exception e) {
  757. e.printStackTrace();
  758. Assert.fail();
  759. }
  760. }
  761. @Test
  762. public void testMultiQueryWithTwoLoads() {
  763. System.out.println("===== multi-query with two loads =====");
  764. try {
  765. myPig.setBatchOn();
  766. myPig.registerQuery("a = load 'passwd' " +
  767. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int,gid:int);");
  768. myPig.registerQuery("b = load 'passwd2' " +
  769. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int,gid:int);");
  770. myPig.registerQuery("c = filter a by uid > 5;");
  771. myPig.registerQuery("d = filter b by uid > 10;");
  772. myPig.registerQuery("store c into '/tmp/output1';");
  773. myPig.registerQuery("store d into '/tmp/output2';");
  774. myPig.registerQuery("e = cogroup c by uid, d by uid;");
  775. myPig.registerQuery("store e into '/tmp/output3';");
  776. LogicalPlan lp = checkLogicalPlan(2, 3, 8);
  777. PhysicalPlan pp = checkPhysicalPlan(lp, 2, 3, 19);
  778. checkMRPlan(pp, 2, 1, 3);
  779. } catch (Exception e) {
  780. e.printStackTrace();
  781. Assert.fail();
  782. }
  783. }
  784. @Test
  785. public void testStoreOrder() {
  786. System.out.println("===== multi-query store order =====");
  787. try {
  788. myPig.setBatchOn();
  789. myPig.registerQuery("a = load 'passwd';");
  790. myPig.registerQuery("store a into '/tmp/output1' using BinStorage();");
  791. myPig.registerQuery("a = load '/tmp/output1';");
  792. myPig.registerQuery("store a into '/tmp/output2';");
  793. myPig.registerQuery("a = load '/tmp/output1';");
  794. myPig.registerQuery("store a into '/tmp/output3';");
  795. myPig.registerQuery("a = load '/tmp/output2' using BinStorage();");
  796. myPig.registerQuery("store a into '/tmp/output4';");
  797. myPig.registerQuery("a = load '/tmp/output2';");
  798. myPig.registerQuery("b = load '/tmp/output1';");
  799. myPig.registerQuery("c = cogroup a by $0, b by $0;");
  800. myPig.registerQuery("store c into '/tmp/output5';");
  801. LogicalPlan lp = checkLogicalPlan(1, 3, 12);
  802. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 15);
  803. MROperPlan mp = checkMRPlan(pp, 1, 3, 5);
  804. myPig.executeBatch();
  805. myPig.discardBatch();
  806. Assert.assertTrue(myPig.getPigContext().getDfs().isContainer("/tmp/output1"));
  807. Assert.assertTrue(myPig.getPigContext().getDfs().isContainer("/tmp/output2"));
  808. Assert.assertTrue(myPig.getPigContext().getDfs().isContainer("/tmp/output3"));
  809. Assert.assertTrue(myPig.getPigContext().getDfs().isContainer("/tmp/output4"));
  810. Assert.assertTrue(myPig.getPigContext().getDfs().isContainer("/tmp/output5"));
  811. } catch (Exception e) {
  812. e.printStackTrace();
  813. Assert.fail();
  814. }
  815. }
  816. @Test
  817. public void testUnnecessaryStoreRemoval() {
  818. System.out.println("===== multi-query unnecessary stores =====");
  819. try {
  820. myPig.setBatchOn();
  821. myPig.registerQuery("a = load 'passwd' " +
  822. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int,gid:int);");
  823. myPig.registerQuery("b = group a by uname;");
  824. myPig.registerQuery("store b into '/tmp/output1';");
  825. myPig.registerQuery("store b into '/tmp/output2';");
  826. myPig.registerQuery("c = load '/tmp/output1';");
  827. myPig.registerQuery("d = group c by $0;");
  828. myPig.registerQuery("e = store d into '/tmp/output3';");
  829. myPig.registerQuery("f = load '/tmp/output2';");
  830. myPig.registerQuery("g = group f by $0;");
  831. myPig.registerQuery("store g into '/tmp/output4';");
  832. LogicalPlan lp = checkLogicalPlan(1, 2, 10);
  833. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 2, 20);
  834. MROperPlan mp = checkMRPlan(pp, 1, 2, 3);
  835. MapReduceOper mo1 = mp.getRoots().get(0);
  836. MapReduceOper mo2 = mp.getLeaves().get(0);
  837. MapReduceOper mo3 = mp.getLeaves().get(1);
  838. checkPhysicalPlan(mo1.mapPlan, 1, 1, 3);
  839. checkPhysicalPlan(mo1.reducePlan, 1, 1, 2);
  840. PhysicalOperator leaf = mo1.reducePlan.getLeaves().get(0);
  841. Assert.assertTrue(leaf instanceof POSplit);
  842. POSplit split = (POSplit)leaf;
  843. int i = 0;
  844. for (PhysicalPlan p: split.getPlans()) {
  845. checkPhysicalPlan(p, 1, 1, 1);
  846. ++i;
  847. }
  848. Assert.assertEquals(i,2);
  849. checkPhysicalPlan(mo2.mapPlan, 1, 1, 2);
  850. checkPhysicalPlan(mo2.reducePlan, 1, 1, 2);
  851. leaf = mo2.reducePlan.getLeaves().get(0);
  852. Assert.assertTrue(leaf instanceof POStore);
  853. checkPhysicalPlan(mo3.mapPlan, 1, 1, 2);
  854. checkPhysicalPlan(mo3.reducePlan, 1, 1, 2);
  855. leaf = mo3.reducePlan.getLeaves().get(0);
  856. Assert.assertTrue(leaf instanceof POStore);
  857. myPig.executeBatch();
  858. myPig.discardBatch();
  859. Assert.assertTrue(myPig.getPigContext().getDfs().isContainer("/tmp/output1"));
  860. Assert.assertTrue(myPig.getPigContext().getDfs().isContainer("/tmp/output2"));
  861. Assert.assertTrue(myPig.getPigContext().getDfs().isContainer("/tmp/output3"));
  862. Assert.assertTrue(myPig.getPigContext().getDfs().isContainer("/tmp/output4"));
  863. } catch (Exception e) {
  864. e.printStackTrace();
  865. Assert.fail();
  866. }
  867. }
  868. @Test
  869. public void testUnnecessaryStoreRemovalCollapseSplit() {
  870. System.out.println("===== multi-query unnecessary stores collapse split =====");
  871. try {
  872. myPig.setBatchOn();
  873. myPig.registerQuery("a = load 'passwd' " +
  874. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int,gid:int);");
  875. myPig.registerQuery("b = group a by uname;");
  876. myPig.registerQuery("store b into '/tmp/output1';");
  877. myPig.registerQuery("c = load '/tmp/output1';");
  878. myPig.registerQuery("d = group c by $0;");
  879. myPig.registerQuery("e = store d into '/tmp/output2';");
  880. LogicalPlan lp = checkLogicalPlan(1, 1, 6);
  881. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 1, 11);
  882. MROperPlan mp = checkMRPlan(pp, 1, 1, 2);
  883. MapReduceOper mo1 = mp.getRoots().get(0);
  884. MapReduceOper mo2 = mp.getLeaves().get(0);
  885. checkPhysicalPlan(mo1.mapPlan, 1, 1, 3);
  886. checkPhysicalPlan(mo1.reducePlan, 1, 1, 2);
  887. PhysicalOperator leaf = mo1.reducePlan.getLeaves().get(0);
  888. Assert.assertTrue(leaf instanceof POStore);
  889. checkPhysicalPlan(mo2.mapPlan, 1, 1, 2);
  890. checkPhysicalPlan(mo2.reducePlan, 1, 1, 2);
  891. leaf = mo2.reducePlan.getLeaves().get(0);
  892. Assert.assertTrue(leaf instanceof POStore);
  893. myPig.executeBatch();
  894. myPig.discardBatch();
  895. Assert.assertTrue(myPig.getPigContext().getDfs().isContainer("/tmp/output1"));
  896. Assert.assertTrue(myPig.getPigContext().getDfs().isContainer("/tmp/output2"));
  897. } catch (Exception e) {
  898. e.printStackTrace();
  899. Assert.fail();
  900. }
  901. }
  902. @Test
  903. public void testEmptyFilterRemoval() {
  904. System.out.println("===== multi-query empty filters =====");
  905. try {
  906. myPig.setBatchOn();
  907. myPig.registerQuery("a = load 'passwd' " +
  908. "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int,gid:int);");
  909. myPig.registerQuery("b = filter a by uid>0;");
  910. myPig.registerQuery("c = filter a by uid>5;");
  911. myPig.registerQuery("d = filter c by uid<10;");
  912. myPig.registerQuery("store b into '/tmp/output1';");
  913. myPig.registerQuery("store b into '/tmp/output2';");
  914. myPig.registerQuery("store b into '/tmp/output3';");
  915. LogicalPlan lp = checkLogicalPlan(1, 3, 5);
  916. PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 10);
  917. MROperPlan mp = checkMRPlan(pp, 1, 1, 1);
  918. MapReduceOper mo = mp.getRoots().get(0);
  919. checkPhysicalPlan(mo.mapPlan, 1, 1, 4);
  920. PhysicalOperator leaf = mo.mapPlan.getLeaves().get(0);
  921. Assert.assertTrue(leaf instanceof POSplit);
  922. POSplit split = (POSplit)leaf;
  923. int i = 0;
  924. for (PhysicalPlan p: split.getPlans()) {
  925. checkPhysicalPlan(p, 1, 1, 1);
  926. ++i;
  927. }
  928. Assert.assertEquals(i,3);
  929. myPig.executeBatch();
  930. myPig.discardBatch();
  931. } catch (Exception e) {
  932. e.printStackTrace();
  933. Assert.fail();
  934. }
  935. }
  936. @Test
  937. public void testMultiQueryWithDescribe() {
  938. System.out.println("===== multi-query with describe =====");
  939. try {
  940. String script = "a = load 'passwd' "
  941. + "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int,gid:int);"
  942. + "b = filter a by uid > 5;"
  943. + "describe b;"
  944. + "store b into '/tmp/output1';\n";
  945. GruntParser parser = new GruntParser(new StringReader(script));
  946. parser.setInteractive(false);
  947. parser.setParams(myPig);
  948. parser.parseStopOnError();
  949. } catch (Exception e) {
  950. e.printStackTrace();
  951. Assert.fail();
  952. }
  953. }
  954. @Test
  955. public void testMultiQueryWithIllustrate() {
  956. System.out.println("===== multi-query with illustrate =====");
  957. try {
  958. String script = "a = load 'passwd' "
  959. + "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int,gid:int);"
  960. + "b = filter a by uid > 5;"
  961. + "illustrate b;"
  962. + "store b into '/tmp/output1';\n";
  963. GruntParser parser = new GruntParser(new StringReader(script));
  964. parser.setInteractive(false);
  965. parser.setParams(myPig);
  966. parser.pa

Large files files are truncated, but you can click here to view the full file