PageRenderTime 53ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/test/org/apache/pig/test/TestNewPlanColumnPrune.java

https://github.com/zjffdu/pig
Java | 416 lines | 308 code | 75 blank | 33 comment | 3 complexity | 5ca0a1fdd257b9003b875492a5fbe801 MD5 | raw file
Possible License(s): Apache-2.0, CPL-1.0
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License" + you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.pig.test;
  19. import java.util.ArrayList;
  20. import java.util.HashSet;
  21. import java.util.List;
  22. import java.util.Map;
  23. import java.util.Properties;
  24. import java.util.Set;
  25. import junit.framework.TestCase;
  26. import org.apache.pig.ExecType;
  27. import org.apache.pig.PigServer;
  28. import org.apache.pig.impl.PigContext;
  29. import org.apache.pig.newplan.Operator;
  30. import org.apache.pig.newplan.OperatorPlan;
  31. import org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer;
  32. import org.apache.pig.newplan.logical.relational.LOLoad;
  33. import org.apache.pig.newplan.logical.relational.LogicalPlan;
  34. import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator;
  35. import org.apache.pig.newplan.logical.rules.AddForEach;
  36. import org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune;
  37. import org.apache.pig.newplan.logical.rules.MapKeysPruneHelper;
  38. import org.apache.pig.newplan.optimizer.PlanOptimizer;
  39. import org.apache.pig.newplan.optimizer.Rule;
  40. public class TestNewPlanColumnPrune extends TestCase {
  41. LogicalPlan plan = null;
  42. PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
  43. private LogicalPlan buildPlan(String query) throws Exception{
  44. PigServer pigServer = new PigServer( pc );
  45. return Util.buildLp(pigServer, query);
  46. }
  47. public void testNoPrune() throws Exception {
  48. // no foreach
  49. String query = "a = load 'd.txt' as (id, v1, v2);" +
  50. "b = filter a by v1==NULL;" +
  51. "store b into 'empty';";
  52. LogicalPlan newLogicalPlan = buildPlan(query);
  53. PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  54. optimizer.optimize();
  55. query = "a = load 'd.txt' as (id, v1, v2);" +
  56. "b = filter a by v1==NULL;" +
  57. "store b into 'empty';";
  58. LogicalPlan expected = buildPlan(query);
  59. assertTrue(expected.isEqual(newLogicalPlan));
  60. // no schema
  61. query = "a = load 'd.txt';" +
  62. "b = foreach a generate $0, $1;" +
  63. "store b into 'empty';";
  64. newLogicalPlan = buildPlan(query);
  65. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  66. optimizer.optimize();
  67. query = "a = load 'd.txt';"+
  68. "b = foreach a generate $0, $1;"+
  69. "store b into 'empty';";
  70. expected = buildPlan(query);
  71. assertTrue(expected.isEqual(newLogicalPlan));
  72. }
  73. public void testPrune() throws Exception {
  74. // only foreach
  75. String query = "a = load 'd.txt' as (id, v1, v2);" +
  76. "b = foreach a generate id;"+
  77. "store b into 'empty';";
  78. LogicalPlan newLogicalPlan = buildPlan(query);
  79. PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  80. optimizer.optimize();
  81. query = "a = load 'd.txt' as (id);" +
  82. "b = foreach a generate id;"+
  83. "store b into 'empty';";
  84. LogicalPlan expected = buildPlan(query);
  85. assertTrue(expected.isEqual(newLogicalPlan));
  86. // with filter
  87. query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);"+
  88. "b = filter a by v1 != NULL AND (v2+v3)<100;"+
  89. "c = foreach b generate id;"+
  90. "store c into 'empty';";
  91. newLogicalPlan = buildPlan(query);
  92. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  93. optimizer.optimize();
  94. query = "a = load 'd.txt' as (id, v1, v3, v2);" +
  95. "b = filter a by v1 != NULL AND (v2+v3)<100;" +
  96. "c = foreach b generate id;" +
  97. "store c into 'empty';";
  98. expected = buildPlan(query);
  99. assertTrue(expected.isEqual(newLogicalPlan));
  100. // with 2 foreach
  101. query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
  102. "b = foreach a generate v2, v5, v4;" +
  103. "c = foreach b generate v5, v4;" +
  104. "store c into 'empty';";
  105. newLogicalPlan = buildPlan(query);
  106. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  107. optimizer.optimize();
  108. query = "a = load 'd.txt' as (v5, v4);" +
  109. "b = foreach a generate v5, v4;" +
  110. "c = foreach b generate v5, v4;" +
  111. "store c into 'empty';";
  112. expected = buildPlan(query);
  113. assertTrue(expected.isEqual(newLogicalPlan));
  114. // with 2 foreach
  115. query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
  116. "b = foreach a generate id, v1, v5, v3, v4;" +
  117. "c = foreach b generate v5, v4;" +
  118. "store c into 'empty';";
  119. newLogicalPlan = buildPlan(query);
  120. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  121. optimizer.optimize();
  122. query = "a = load 'd.txt' as (v5, v4);" +
  123. "b = foreach a generate v5, v4;" +
  124. "c = foreach b generate v5, v4;" +
  125. "store c into 'empty';";
  126. expected = buildPlan(query);
  127. assertTrue(expected.isEqual(newLogicalPlan));
  128. // with 2 foreach and filter in between
  129. query = "a =load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
  130. "b = foreach a generate v2, v5, v4;" +
  131. "c = filter b by v2 != NULL;" +
  132. "d = foreach c generate v5, v4;" +
  133. "store d into 'empty';";
  134. newLogicalPlan = buildPlan(query);
  135. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  136. optimizer.optimize();
  137. query = "a =load 'd.txt' as (v5, v4, v2);" +
  138. "b = foreach a generate v2, v5, v4;" +
  139. "c = filter b by v2 != NULL;" +
  140. "d = foreach c generate v5, v4;" +
  141. "store d into 'empty';";
  142. expected = buildPlan(query);
  143. assertTrue(expected.isEqual(newLogicalPlan));
  144. // with 2 foreach after join
  145. query = "a =load 'd.txt' as (id, v1, v2, v3);" +
  146. "b = load 'c.txt' as (id, v4, v5, v6);" +
  147. "c = join a by id, b by id;" +
  148. "d = foreach c generate a::id, v5, v3, v4;" +
  149. "store d into 'empty';";
  150. newLogicalPlan = buildPlan(query);
  151. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  152. optimizer.optimize();
  153. query = "a =load 'd.txt' as (id, v3);" +
  154. "b = load 'c.txt' as (id, v4, v5);" +
  155. "c = join a by id, b by id;" +
  156. "d = foreach c generate a::id, v5, v3, v4;" +
  157. "store d into 'empty';";
  158. expected = buildPlan(query);
  159. assertTrue(expected.isEqual(newLogicalPlan));
  160. // with BinStorage, insert foreach after load
  161. query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
  162. "c = filter a by v2 != NULL;" +
  163. "d = foreach c generate v5, v4;" +
  164. "store d into 'empty';";
  165. newLogicalPlan = buildPlan(query);
  166. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  167. optimizer.optimize();
  168. query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
  169. "b = foreach a generate v5, v4, v2;" +
  170. "c = filter b by v2 != NULL;" +
  171. "d = foreach c generate v5, v4;" +
  172. "store d into 'empty';";
  173. expected = buildPlan(query);
  174. assertTrue(expected.isEqual(newLogicalPlan));
  175. // with BinStorage, not to insert foreach after load if there is already one
  176. query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
  177. "b = foreach a generate v5, v4, v2;" +
  178. "c = filter b by v2 != NULL;" +
  179. "d = foreach c generate v5;" +
  180. "store d into 'empty';";
  181. newLogicalPlan = buildPlan(query);
  182. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  183. optimizer.optimize();
  184. query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
  185. "b = foreach a generate v5, v2;" +
  186. "c = filter b by v2 != NULL;" +
  187. "d = foreach c generate v5;" +
  188. "store d into 'empty';";
  189. expected = buildPlan(query);
  190. assertTrue(expected.isEqual(newLogicalPlan));
  191. // with BinStorage, not to insert foreach after load if there is already one
  192. query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
  193. "b = foreach a generate v5, v4, v2, 10;" +
  194. "c = filter b by v2 != NULL;" +
  195. "d = foreach c generate v5;" +
  196. "store d into 'empty';";
  197. newLogicalPlan = buildPlan(query);
  198. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  199. optimizer.optimize();
  200. query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
  201. "b = foreach a generate v5, v2, 10;" +
  202. "c = filter b by v2 != NULL;" +
  203. "d = foreach c generate v5;" +
  204. "store d into 'empty';";
  205. expected = buildPlan(query);
  206. assertTrue(expected.isEqual(newLogicalPlan));
  207. }
  208. @SuppressWarnings("unchecked")
  209. public void testPruneWithMapKey() throws Exception {
  210. // only foreach
  211. String query = "a =load 'd.txt' as (id, v1, m:map[]);" +
  212. "b = foreach a generate id, m#'path';" +
  213. "store b into 'empty';";
  214. LogicalPlan newLogicalPlan = buildPlan(query);
  215. PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  216. optimizer.optimize();
  217. query = "a =load 'd.txt' as (id, m:map[]);" +
  218. "b = foreach a generate id, m#'path';" +
  219. "store b into 'empty';";
  220. LogicalPlan expected = buildPlan(query);
  221. assertTrue(expected.isEqual(newLogicalPlan));
  222. LOLoad op = (LOLoad)newLogicalPlan.getSources().get(0);
  223. Map<Integer,Set<String>> annotation =
  224. (Map<Integer, Set<String>>) op.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
  225. assertEquals(annotation.size(), 1);
  226. Set<String> s = new HashSet<String>();
  227. s.add("path");
  228. assertEquals(annotation.get(2), s);
  229. // foreach with join
  230. query = "a =load 'd.txt' as (id, v1, m:map[]);" +
  231. "b = load 'd.txt' as (id, v1, m:map[]);" +
  232. "c = join a by id, b by id;" +
  233. "d = filter c by a::m#'path' != NULL;" +
  234. "e = foreach d generate a::id, b::id, b::m#'path', a::m;" +
  235. "store e into 'empty';";
  236. newLogicalPlan = buildPlan(query);
  237. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  238. optimizer.optimize();
  239. query = "a =load 'd.txt' as (id, m:map[]);" +
  240. "b = load 'd.txt' as (id, m:map[]);" +
  241. "c = join a by id, b by id;" +
  242. "d = filter c by a::m#'path' != NULL;" +
  243. "e = foreach d generate a::id, b::id, b::m#'path', a::m;" +
  244. "store e into 'empty';";
  245. expected = buildPlan(query);
  246. assertTrue(expected.isEqual(newLogicalPlan));
  247. List<Operator> ll = newLogicalPlan.getSources();
  248. assertEquals(ll.size(), 2);
  249. LOLoad loada = null;
  250. LOLoad loadb = null;
  251. for(Operator opp: ll) {
  252. if (((LogicalRelationalOperator)opp).getAlias().equals("a")) {
  253. loada = (LOLoad)opp;
  254. continue;
  255. }
  256. if (((LogicalRelationalOperator)opp).getAlias().equals("b")) {
  257. loadb = (LOLoad)opp;
  258. continue;
  259. }
  260. }
  261. annotation =
  262. (Map<Integer, Set<String>>) loada.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
  263. assertNull(annotation);
  264. annotation =
  265. (Map<Integer, Set<String>>) loadb.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
  266. assertEquals(annotation.size(), 1);
  267. s = new HashSet<String>();
  268. s.add("path");
  269. assertEquals(annotation.get(2), s);
  270. }
  271. public void testPruneWithBag() throws Exception {
  272. // filter above foreach
  273. String query = "a =load 'd.txt' as (id, v:bag{t:(s1,s2,s3)});" +
  274. "b = filter a by id>10;" +
  275. "c = foreach b generate id, FLATTEN(v);" +
  276. "d = foreach c generate id, v::s2;" +
  277. "store d into 'empty';";
  278. LogicalPlan newLogicalPlan = buildPlan(query);
  279. PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  280. optimizer.optimize();
  281. query = "a =load 'd.txt' as (id, v:bag{t:(s1,s2,s3)});" +
  282. "b = filter a by id>10;" +
  283. "c = foreach b generate id, FLATTEN(v);" +
  284. "d = foreach c generate id, v::s2;" +
  285. "store d into 'empty';";
  286. LogicalPlan expected = buildPlan(query);
  287. assertTrue(expected.isEqual(newLogicalPlan));
  288. }
  289. public void testAddForeach() throws Exception {
  290. // filter above foreach
  291. String query = "a =load 'd.txt' as (id, v1, v2);" +
  292. "b = filter a by v1>10;" +
  293. "c = foreach b generate id;" +
  294. "store c into 'empty';";
  295. LogicalPlan newLogicalPlan = buildPlan(query);
  296. PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  297. optimizer.optimize();
  298. query = "a =load 'd.txt' as (id, v1);" +
  299. "b = filter a by v1>10;" +
  300. "c = foreach b generate id;" +
  301. "store c into 'empty';";
  302. LogicalPlan expected = buildPlan(query);
  303. assertTrue(expected.isEqual(newLogicalPlan));
  304. // join with foreach
  305. query = "a =load 'd.txt' as (id, v1, v2);" +
  306. "b = load 'd.txt' as (id, v1, v2);" +
  307. "c = join a by id, b by id;" +
  308. "d = filter c by a::v1>b::v1;" +
  309. "e = foreach d generate a::id;" +
  310. "store e into 'empty';";
  311. newLogicalPlan = buildPlan(query);
  312. optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
  313. optimizer.optimize();
  314. query = "a =load 'd.txt' as (id, v1);" +
  315. "b = load 'd.txt' as (id, v1);" +
  316. "c = join a by id, b by id;" +
  317. "d = foreach c generate a::id, a::v1, b::v1;" +
  318. "e = filter d by a::v1>b::v1;" +
  319. "f = foreach e generate a::id;" +
  320. "store f into 'empty';";
  321. expected = buildPlan(query);
  322. assertTrue(expected.isEqual(newLogicalPlan));
  323. }
  324. public class MyPlanOptimizer extends LogicalPlanOptimizer {
  325. protected MyPlanOptimizer(OperatorPlan p, int iterations) {
  326. super(p, iterations, null);
  327. }
  328. protected List<Set<Rule>> buildRuleSets() {
  329. List<Set<Rule>> ls = new ArrayList<Set<Rule>>();
  330. Rule r = new ColumnMapKeyPrune("ColumnMapKeyPrune");
  331. Set<Rule> s = new HashSet<Rule>();
  332. s.add(r);
  333. ls.add(s);
  334. r = new AddForEach("AddForEach");
  335. s = new HashSet<Rule>();
  336. s.add(r);
  337. ls.add(s);
  338. return ls;
  339. }
  340. }
  341. }