PageRenderTime 48ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/test/org/apache/pig/test/TestNewPlanColumnPrune.java

https://github.com/dorefiend/pig
Java | 444 lines | 335 code | 75 blank | 34 comment | 3 complexity | 36bb665abc3eab28fd20ba45984c3a7f MD5 | raw file
Possible License(s): Apache-2.0, CPL-1.0
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License" + you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.pig.test;
  19. import static org.junit.Assert.assertEquals;
  20. import static org.junit.Assert.assertNull;
  21. import static org.junit.Assert.assertTrue;
  22. import static org.junit.Assert.fail;
  23. import java.util.ArrayList;
  24. import java.util.HashSet;
  25. import java.util.List;
  26. import java.util.Map;
  27. import java.util.Properties;
  28. import java.util.Set;
  29. import org.apache.pig.ExecType;
  30. import org.apache.pig.PigServer;
  31. import org.apache.pig.impl.PigContext;
  32. import org.apache.pig.newplan.Operator;
  33. import org.apache.pig.newplan.OperatorPlan;
  34. import org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer;
  35. import org.apache.pig.newplan.logical.relational.LOLoad;
  36. import org.apache.pig.newplan.logical.relational.LogicalPlan;
  37. import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator;
  38. import org.apache.pig.newplan.logical.rules.AddForEach;
  39. import org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune;
  40. import org.apache.pig.newplan.logical.rules.MapKeysPruneHelper;
  41. import org.apache.pig.newplan.optimizer.PlanOptimizer;
  42. import org.apache.pig.newplan.optimizer.Rule;
  43. import org.junit.Test;
  44. public class TestNewPlanColumnPrune {
  45. LogicalPlan plan = null;
  46. PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
  47. private LogicalPlan buildPlan(String query) throws Exception{
  48. PigServer pigServer = new PigServer( pc );
  49. return Util.buildLp(pigServer, query);
  50. }
  51. @Test
  52. public void testNoPrune() throws Exception {
  53. // no foreach
  54. String query = "a = load 'd.txt' as (id, v1, v2);" +
  55. "b = filter a by v1==NULL;" +
  56. "store b into 'empty';";
  57. LogicalPlan newLogicalPlan = buildPlan(query);
  58. PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  59. optimizer.optimize();
  60. query = "a = load 'd.txt' as (id, v1, v2);" +
  61. "b = filter a by v1==NULL;" +
  62. "store b into 'empty';";
  63. LogicalPlan expected = buildPlan(query);
  64. assertTrue(expected.isEqual(newLogicalPlan));
  65. // no schema
  66. query = "a = load 'd.txt';" +
  67. "b = foreach a generate $0, $1;" +
  68. "store b into 'empty';";
  69. newLogicalPlan = buildPlan(query);
  70. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  71. optimizer.optimize();
  72. query = "a = load 'd.txt';"+
  73. "b = foreach a generate $0, $1;"+
  74. "store b into 'empty';";
  75. expected = buildPlan(query);
  76. assertTrue(expected.isEqual(newLogicalPlan));
  77. }
  78. @Test
  79. public void testPrune() throws Exception {
  80. // only foreach
  81. String query = "a = load 'd.txt' as (id, v1, v2);" +
  82. "b = foreach a generate id;"+
  83. "store b into 'empty';";
  84. LogicalPlan newLogicalPlan = buildPlan(query);
  85. PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  86. optimizer.optimize();
  87. query = "a = load 'd.txt' as (id);" +
  88. "b = foreach a generate id;"+
  89. "store b into 'empty';";
  90. LogicalPlan expected = buildPlan(query);
  91. assertTrue(expected.isEqual(newLogicalPlan));
  92. // with filter
  93. query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);"+
  94. "b = filter a by v1 != NULL AND (v2+v3)<100;"+
  95. "c = foreach b generate id;"+
  96. "store c into 'empty';";
  97. newLogicalPlan = buildPlan(query);
  98. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  99. optimizer.optimize();
  100. query = "a = load 'd.txt' as (id, v1, v3, v2);" +
  101. "b = filter a by v1 != NULL AND (v2+v3)<100;" +
  102. "c = foreach b generate id;" +
  103. "store c into 'empty';";
  104. expected = buildPlan(query);
  105. assertTrue(expected.isEqual(newLogicalPlan));
  106. // with 2 foreach
  107. query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
  108. "b = foreach a generate v2, v5, v4;" +
  109. "c = foreach b generate v5, v4;" +
  110. "store c into 'empty';";
  111. newLogicalPlan = buildPlan(query);
  112. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  113. optimizer.optimize();
  114. query = "a = load 'd.txt' as (v5, v4);" +
  115. "b = foreach a generate v5, v4;" +
  116. "c = foreach b generate v5, v4;" +
  117. "store c into 'empty';";
  118. expected = buildPlan(query);
  119. assertTrue(expected.isEqual(newLogicalPlan));
  120. // with 2 foreach
  121. query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
  122. "b = foreach a generate id, v1, v5, v3, v4;" +
  123. "c = foreach b generate v5, v4;" +
  124. "store c into 'empty';";
  125. newLogicalPlan = buildPlan(query);
  126. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  127. optimizer.optimize();
  128. query = "a = load 'd.txt' as (v5, v4);" +
  129. "b = foreach a generate v5, v4;" +
  130. "c = foreach b generate v5, v4;" +
  131. "store c into 'empty';";
  132. expected = buildPlan(query);
  133. assertTrue(expected.isEqual(newLogicalPlan));
  134. // with 2 foreach and filter in between
  135. query = "a =load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
  136. "b = foreach a generate v2, v5, v4;" +
  137. "c = filter b by v2 != NULL;" +
  138. "d = foreach c generate v5, v4;" +
  139. "store d into 'empty';";
  140. newLogicalPlan = buildPlan(query);
  141. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  142. optimizer.optimize();
  143. query = "a =load 'd.txt' as (v5, v4, v2);" +
  144. "b = foreach a generate v2, v5, v4;" +
  145. "c = filter b by v2 != NULL;" +
  146. "d = foreach c generate v5, v4;" +
  147. "store d into 'empty';";
  148. expected = buildPlan(query);
  149. assertTrue(expected.isEqual(newLogicalPlan));
  150. // with 2 foreach after join
  151. query = "a =load 'd.txt' as (id, v1, v2, v3);" +
  152. "b = load 'c.txt' as (id, v4, v5, v6);" +
  153. "c = join a by id, b by id;" +
  154. "d = foreach c generate a::id, v5, v3, v4;" +
  155. "store d into 'empty';";
  156. newLogicalPlan = buildPlan(query);
  157. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  158. optimizer.optimize();
  159. query = "a =load 'd.txt' as (id, v3);" +
  160. "b = load 'c.txt' as (id, v4, v5);" +
  161. "c = join a by id, b by id;" +
  162. "d = foreach c generate a::id, v5, v3, v4;" +
  163. "store d into 'empty';";
  164. expected = buildPlan(query);
  165. assertTrue(expected.isEqual(newLogicalPlan));
  166. // with BinStorage, insert foreach after load
  167. query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
  168. "c = filter a by v2 != NULL;" +
  169. "d = foreach c generate v5, v4;" +
  170. "store d into 'empty';";
  171. newLogicalPlan = buildPlan(query);
  172. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  173. optimizer.optimize();
  174. query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
  175. "b = foreach a generate v5, v4, v2;" +
  176. "c = filter b by v2 != NULL;" +
  177. "d = foreach c generate v5, v4;" +
  178. "store d into 'empty';";
  179. expected = buildPlan(query);
  180. assertTrue(expected.isEqual(newLogicalPlan));
  181. // with BinStorage, not to insert foreach after load if there is already one
  182. query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
  183. "b = foreach a generate v5, v4, v2;" +
  184. "c = filter b by v2 != NULL;" +
  185. "d = foreach c generate v5;" +
  186. "store d into 'empty';";
  187. newLogicalPlan = buildPlan(query);
  188. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  189. optimizer.optimize();
  190. query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
  191. "b = foreach a generate v5, v2;" +
  192. "c = filter b by v2 != NULL;" +
  193. "d = foreach c generate v5;" +
  194. "store d into 'empty';";
  195. expected = buildPlan(query);
  196. assertTrue(expected.isEqual(newLogicalPlan));
  197. // with BinStorage, not to insert foreach after load if there is already one
  198. query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
  199. "b = foreach a generate v5, v4, v2, 10;" +
  200. "c = filter b by v2 != NULL;" +
  201. "d = foreach c generate v5;" +
  202. "store d into 'empty';";
  203. newLogicalPlan = buildPlan(query);
  204. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  205. optimizer.optimize();
  206. query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
  207. "b = foreach a generate v5, v2, 10;" +
  208. "c = filter b by v2 != NULL;" +
  209. "d = foreach c generate v5;" +
  210. "store d into 'empty';";
  211. expected = buildPlan(query);
  212. assertTrue(expected.isEqual(newLogicalPlan));
  213. }
  214. @Test
  215. @SuppressWarnings("unchecked")
  216. public void testPruneWithMapKey() throws Exception {
  217. // only foreach
  218. String query = "a =load 'd.txt' as (id, v1, m:map[]);" +
  219. "b = foreach a generate id, m#'path';" +
  220. "store b into 'empty';";
  221. LogicalPlan newLogicalPlan = buildPlan(query);
  222. PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  223. optimizer.optimize();
  224. query = "a =load 'd.txt' as (id, m:map[]);" +
  225. "b = foreach a generate id, m#'path';" +
  226. "store b into 'empty';";
  227. LogicalPlan expected = buildPlan(query);
  228. assertTrue(expected.isEqual(newLogicalPlan));
  229. LOLoad op = (LOLoad)newLogicalPlan.getSources().get(0);
  230. Map<Integer,Set<String>> annotation =
  231. (Map<Integer, Set<String>>) op.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
  232. assertEquals(1, annotation.size());
  233. Set<String> s = new HashSet<String>();
  234. s.add("path");
  235. assertEquals(annotation.get(2), s);
  236. // foreach with join
  237. query = "a =load 'd.txt' as (id, v1, m:map[]);" +
  238. "b = load 'd.txt' as (id, v1, m:map[]);" +
  239. "c = join a by id, b by id;" +
  240. "d = filter c by a::m#'path' != NULL;" +
  241. "e = foreach d generate a::id, b::id, b::m#'path', a::m;" +
  242. "store e into 'empty';";
  243. newLogicalPlan = buildPlan(query);
  244. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  245. optimizer.optimize();
  246. query = "a =load 'd.txt' as (id, m:map[]);" +
  247. "b = load 'd.txt' as (id, m:map[]);" +
  248. "c = join a by id, b by id;" +
  249. "d = filter c by a::m#'path' != NULL;" +
  250. "e = foreach d generate a::id, b::id, b::m#'path', a::m;" +
  251. "store e into 'empty';";
  252. expected = buildPlan(query);
  253. assertTrue(expected.isEqual(newLogicalPlan));
  254. List<Operator> ll = newLogicalPlan.getSources();
  255. assertEquals(2, ll.size());
  256. LOLoad loada = null;
  257. LOLoad loadb = null;
  258. for(Operator opp: ll) {
  259. if (((LogicalRelationalOperator)opp).getAlias().equals("a")) {
  260. loada = (LOLoad)opp;
  261. continue;
  262. }
  263. if (((LogicalRelationalOperator)opp).getAlias().equals("b")) {
  264. loadb = (LOLoad)opp;
  265. continue;
  266. }
  267. }
  268. annotation =
  269. (Map<Integer, Set<String>>) loada.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
  270. assertNull(annotation);
  271. annotation =
  272. (Map<Integer, Set<String>>) loadb.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
  273. assertEquals(1, annotation.size());
  274. s = new HashSet<String>();
  275. s.add("path");
  276. assertEquals(annotation.get(2), s);
  277. }
  278. @Test
  279. public void testPruneWithBag() throws Exception {
  280. // filter above foreach
  281. String query = "a =load 'd.txt' as (id, v:bag{t:(s1,s2,s3)});" +
  282. "b = filter a by id>10;" +
  283. "c = foreach b generate id, FLATTEN(v);" +
  284. "d = foreach c generate id, v::s2;" +
  285. "store d into 'empty';";
  286. LogicalPlan newLogicalPlan = buildPlan(query);
  287. PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  288. optimizer.optimize();
  289. query = "a =load 'd.txt' as (id, v:bag{t:(s1,s2,s3)});" +
  290. "b = filter a by id>10;" +
  291. "c = foreach b generate id, FLATTEN(v);" +
  292. "d = foreach c generate id, v::s2;" +
  293. "store d into 'empty';";
  294. LogicalPlan expected = buildPlan(query);
  295. assertTrue(expected.isEqual(newLogicalPlan));
  296. }
  297. @Test
  298. public void testAddForeach() throws Exception {
  299. // filter above foreach
  300. String query = "a =load 'd.txt' as (id, v1, v2);" +
  301. "b = filter a by v1>10;" +
  302. "c = foreach b generate id;" +
  303. "store c into 'empty';";
  304. LogicalPlan newLogicalPlan = buildPlan(query);
  305. PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  306. optimizer.optimize();
  307. query = "a =load 'd.txt' as (id, v1);" +
  308. "b = filter a by v1>10;" +
  309. "c = foreach b generate id;" +
  310. "store c into 'empty';";
  311. LogicalPlan expected = buildPlan(query);
  312. assertTrue(expected.isEqual(newLogicalPlan));
  313. // join with foreach
  314. query = "a =load 'd.txt' as (id, v1, v2);" +
  315. "b = load 'd.txt' as (id, v1, v2);" +
  316. "c = join a by id, b by id;" +
  317. "d = filter c by a::v1>b::v1;" +
  318. "e = foreach d generate a::id;" +
  319. "store e into 'empty';";
  320. newLogicalPlan = buildPlan(query);
  321. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  322. optimizer.optimize();
  323. query = "a =load 'd.txt' as (id, v1);" +
  324. "b = load 'd.txt' as (id, v1);" +
  325. "c = join a by id, b by id;" +
  326. "d = foreach c generate a::id, a::v1, b::v1;" +
  327. "e = filter d by a::v1>b::v1;" +
  328. "f = foreach e generate a::id;" +
  329. "store f into 'empty';";
  330. expected = buildPlan(query);
  331. assertTrue(expected.isEqual(newLogicalPlan));
  332. }
  333. @Test
  334. public void testPruneSubTreeForEach() throws Exception {
  335. String query = "a =load 'd.txt' as (id, v1);" +
  336. "b = group a by id;" +
  337. "c = foreach b { d = a.v1; " +
  338. " e = distinct d; " +
  339. " generate group, e; };" +
  340. "f = foreach c generate group ;" +
  341. "store f into 'empty';";
  342. LogicalPlan newLogicalPlan = buildPlan(query);
  343. PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  344. try {
  345. optimizer.optimize();
  346. } catch (Exception e) {
  347. //PIG-2968 throws ConcurrentModificationException
  348. e.printStackTrace();
  349. fail("Unexpected Exception: " + e);
  350. }
  351. }
  352. public class MyPlanOptimizer extends LogicalPlanOptimizer {
  353. protected MyPlanOptimizer(OperatorPlan p, int iterations) {
  354. super(p, iterations, null);
  355. }
  356. protected List<Set<Rule>> buildRuleSets() {
  357. List<Set<Rule>> ls = new ArrayList<Set<Rule>>();
  358. Rule r = new ColumnMapKeyPrune("ColumnMapKeyPrune");
  359. Set<Rule> s = new HashSet<Rule>();
  360. s.add(r);
  361. ls.add(s);
  362. r = new AddForEach("AddForEach");
  363. s = new HashSet<Rule>();
  364. s.add(r);
  365. ls.add(s);
  366. return ls;
  367. }
  368. }
  369. }