PageRenderTime 48ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.2.0-rc0/src/test/org/apache/hcatalog/pig/TestHCatEximLoader.java

#
Java | 351 lines | 285 code | 43 blank | 23 comment | 12 complexity | bb6a3ee73e5e53d58ee29b15a3e524ea MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hcatalog.pig;
  19. import java.io.IOException;
  20. import java.util.Iterator;
  21. import java.util.Map;
  22. import java.util.Properties;
  23. import java.util.TreeMap;
  24. import junit.framework.TestCase;
  25. import org.apache.hcatalog.MiniCluster;
  26. import org.apache.pig.ExecType;
  27. import org.apache.pig.PigServer;
  28. import org.apache.pig.backend.executionengine.ExecException;
  29. import org.apache.pig.data.Tuple;
  30. import org.apache.pig.impl.util.UDFContext;
  31. /**
  32. *
  33. * TestHCatEximLoader. Assumes Exim storer is working well
  34. *
  35. */
  36. public class TestHCatEximLoader extends TestCase {
  37. private static final String NONPART_TABLE = "junit_unparted";
  38. private static final String PARTITIONED_TABLE = "junit_parted";
  39. private static MiniCluster cluster = MiniCluster.buildCluster();
  40. private static final String dataLocation = "/tmp/data";
  41. private static String fqdataLocation;
  42. private static final String exportLocation = "/tmp/export";
  43. private static String fqexportLocation;
  44. private static Properties props;
  45. private void cleanup() throws IOException {
  46. MiniCluster.deleteFile(cluster, dataLocation);
  47. MiniCluster.deleteFile(cluster, exportLocation);
  48. }
  49. @Override
  50. protected void setUp() throws Exception {
  51. props = new Properties();
  52. props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name"));
  53. System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName()
  54. + ", fs.default.name : " + props.getProperty("fs.default.name"));
  55. fqdataLocation = cluster.getProperties().getProperty("fs.default.name") + dataLocation;
  56. fqexportLocation = cluster.getProperties().getProperty("fs.default.name") + exportLocation;
  57. System.out.println("FQ Data Location :" + fqdataLocation);
  58. System.out.println("FQ Export Location :" + fqexportLocation);
  59. cleanup();
  60. }
  61. @Override
  62. protected void tearDown() throws Exception {
  63. cleanup();
  64. }
  65. private void populateDataFile() throws IOException {
  66. MiniCluster.deleteFile(cluster, dataLocation);
  67. String[] input = new String[] {
  68. "237,Krishna,01/01/1990,M,IN,TN",
  69. "238,Kalpana,01/01/2000,F,IN,KA",
  70. "239,Satya,01/01/2001,M,US,TN",
  71. "240,Kavya,01/01/2002,F,US,KA"
  72. };
  73. MiniCluster.createInputFile(cluster, dataLocation, input);
  74. }
  75. private static class EmpDetail {
  76. String name;
  77. String dob;
  78. String mf;
  79. String country;
  80. String state;
  81. }
  82. private void assertEmpDetail(Tuple t, Map<Integer, EmpDetail> eds) throws ExecException {
  83. assertNotNull(t);
  84. assertEquals(6, t.size());
  85. assertTrue(t.get(0).getClass() == Integer.class);
  86. assertTrue(t.get(1).getClass() == String.class);
  87. assertTrue(t.get(2).getClass() == String.class);
  88. assertTrue(t.get(3).getClass() == String.class);
  89. assertTrue(t.get(4).getClass() == String.class);
  90. assertTrue(t.get(5).getClass() == String.class);
  91. EmpDetail ed = eds.remove(t.get(0));
  92. assertNotNull(ed);
  93. assertEquals(ed.name, t.get(1));
  94. assertEquals(ed.dob, t.get(2));
  95. assertEquals(ed.mf, t.get(3));
  96. assertEquals(ed.country, t.get(4));
  97. assertEquals(ed.state, t.get(5));
  98. }
  99. private void addEmpDetail(Map<Integer, EmpDetail> empDetails, int id, String name,
  100. String dob, String mf, String country, String state) {
  101. EmpDetail ed = new EmpDetail();
  102. ed.name = name;
  103. ed.dob = dob;
  104. ed.mf = mf;
  105. ed.country = country;
  106. ed.state = state;
  107. empDetails.put(id, ed);
  108. }
  109. private void assertEmpDetail(Tuple t, Integer id, String name, String dob, String mf)
  110. throws ExecException {
  111. assertNotNull(t);
  112. assertEquals(4, t.size());
  113. assertTrue(t.get(0).getClass() == Integer.class);
  114. assertTrue(t.get(1).getClass() == String.class);
  115. assertTrue(t.get(2).getClass() == String.class);
  116. assertTrue(t.get(3).getClass() == String.class);
  117. assertEquals(id, t.get(0));
  118. assertEquals(name, t.get(1));
  119. assertEquals(dob, t.get(2));
  120. assertEquals(mf, t.get(3));
  121. }
  122. private void assertEmpDetail(Tuple t, String mf, String name)
  123. throws ExecException {
  124. assertNotNull(t);
  125. assertEquals(2, t.size());
  126. assertTrue(t.get(0).getClass() == String.class);
  127. assertTrue(t.get(1).getClass() == String.class);
  128. assertEquals(mf, t.get(0));
  129. assertEquals(name, t.get(1));
  130. }
  131. public void testLoadNonPartTable() throws Exception {
  132. populateDataFile();
  133. {
  134. PigServer server = new PigServer(ExecType.LOCAL, props);
  135. UDFContext.getUDFContext().setClientSystemProps();
  136. server.setBatchOn();
  137. server
  138. .registerQuery("A = load '"
  139. + fqdataLocation
  140. + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);");
  141. server.registerQuery("store A into '" + NONPART_TABLE
  142. + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');");
  143. server.executeBatch();
  144. }
  145. {
  146. PigServer server = new PigServer(ExecType.LOCAL, props);
  147. UDFContext.getUDFContext().setClientSystemProps();
  148. server
  149. .registerQuery("A = load '"
  150. + fqexportLocation
  151. + "' using org.apache.hcatalog.pig.HCatEximLoader();");
  152. Iterator<Tuple> XIter = server.openIterator("A");
  153. assertTrue(XIter.hasNext());
  154. Tuple t = XIter.next();
  155. assertEmpDetail(t, 237, "Krishna", "01/01/1990", "M");
  156. assertTrue(XIter.hasNext());
  157. t = XIter.next();
  158. assertEmpDetail(t, 238, "Kalpana", "01/01/2000", "F");
  159. assertTrue(XIter.hasNext());
  160. t = XIter.next();
  161. assertEmpDetail(t, 239, "Satya", "01/01/2001", "M");
  162. assertTrue(XIter.hasNext());
  163. t = XIter.next();
  164. assertEmpDetail(t, 240, "Kavya", "01/01/2002", "F");
  165. assertFalse(XIter.hasNext());
  166. }
  167. }
  168. public void testLoadNonPartProjection() throws Exception {
  169. populateDataFile();
  170. {
  171. PigServer server = new PigServer(ExecType.LOCAL, props);
  172. UDFContext.getUDFContext().setClientSystemProps();
  173. server.setBatchOn();
  174. server
  175. .registerQuery("A = load '"
  176. + fqdataLocation
  177. + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);");
  178. server.registerQuery("store A into '" + NONPART_TABLE
  179. + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');");
  180. server.executeBatch();
  181. }
  182. {
  183. PigServer server = new PigServer(ExecType.LOCAL, props);
  184. UDFContext.getUDFContext().setClientSystemProps();
  185. server
  186. .registerQuery("A = load '"
  187. + fqexportLocation
  188. + "' using org.apache.hcatalog.pig.HCatEximLoader();");
  189. server.registerQuery("B = foreach A generate emp_sex, emp_name;");
  190. Iterator<Tuple> XIter = server.openIterator("B");
  191. assertTrue(XIter.hasNext());
  192. Tuple t = XIter.next();
  193. assertEmpDetail(t, "M", "Krishna");
  194. assertTrue(XIter.hasNext());
  195. t = XIter.next();
  196. assertEmpDetail(t, "F", "Kalpana");
  197. assertTrue(XIter.hasNext());
  198. t = XIter.next();
  199. assertEmpDetail(t, "M", "Satya");
  200. assertTrue(XIter.hasNext());
  201. t = XIter.next();
  202. assertEmpDetail(t, "F", "Kavya");
  203. assertFalse(XIter.hasNext());
  204. }
  205. }
  206. public void testLoadMultiPartTable() throws Exception {
  207. {
  208. populateDataFile();
  209. PigServer server = new PigServer(ExecType.LOCAL, props);
  210. UDFContext.getUDFContext().setClientSystemProps();
  211. server.setBatchOn();
  212. server
  213. .registerQuery("A = load '"
  214. + fqdataLocation +
  215. "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);"
  216. );
  217. server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';");
  218. server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';");
  219. server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';");
  220. server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';");
  221. server.registerQuery("store INTN into '" + PARTITIONED_TABLE
  222. + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
  223. "', 'emp_country=in,emp_state=tn');");
  224. server.registerQuery("store INKA into '" + PARTITIONED_TABLE
  225. + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
  226. "', 'emp_country=in,emp_state=ka');");
  227. server.registerQuery("store USTN into '" + PARTITIONED_TABLE
  228. + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
  229. "', 'emp_country=us,emp_state=tn');");
  230. server.registerQuery("store USKA into '" + PARTITIONED_TABLE
  231. + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
  232. "', 'emp_country=us,emp_state=ka');");
  233. server.executeBatch();
  234. }
  235. {
  236. PigServer server = new PigServer(ExecType.LOCAL, props);
  237. UDFContext.getUDFContext().setClientSystemProps();
  238. server
  239. .registerQuery("A = load '"
  240. + fqexportLocation
  241. + "' using org.apache.hcatalog.pig.HCatEximLoader() "
  242. //+ "as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);");
  243. + ";");
  244. Iterator<Tuple> XIter = server.openIterator("A");
  245. Map<Integer, EmpDetail> empDetails = new TreeMap<Integer, EmpDetail>();
  246. addEmpDetail(empDetails, 237, "Krishna", "01/01/1990", "M", "in", "tn");
  247. addEmpDetail(empDetails, 238, "Kalpana", "01/01/2000", "F", "in", "ka");
  248. addEmpDetail(empDetails, 239, "Satya", "01/01/2001", "M", "us", "tn");
  249. addEmpDetail(empDetails, 240, "Kavya", "01/01/2002", "F", "us", "ka");
  250. while(XIter.hasNext()) {
  251. Tuple t = XIter.next();
  252. assertNotSame(0, empDetails.size());
  253. assertEmpDetail(t, empDetails);
  254. }
  255. assertEquals(0, empDetails.size());
  256. }
  257. }
  258. public void testLoadMultiPartFilter() throws Exception {
  259. {
  260. populateDataFile();
  261. PigServer server = new PigServer(ExecType.LOCAL, props);
  262. UDFContext.getUDFContext().setClientSystemProps();
  263. server.setBatchOn();
  264. server
  265. .registerQuery("A = load '"
  266. + fqdataLocation +
  267. "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);"
  268. );
  269. server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';");
  270. server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';");
  271. server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';");
  272. server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';");
  273. server.registerQuery("store INTN into '" + PARTITIONED_TABLE
  274. + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
  275. "', 'emp_country=in,emp_state=tn');");
  276. server.registerQuery("store INKA into '" + PARTITIONED_TABLE
  277. + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
  278. "', 'emp_country=in,emp_state=ka');");
  279. server.registerQuery("store USTN into '" + PARTITIONED_TABLE
  280. + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
  281. "', 'emp_country=us,emp_state=tn');");
  282. server.registerQuery("store USKA into '" + PARTITIONED_TABLE
  283. + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
  284. "', 'emp_country=us,emp_state=ka');");
  285. server.executeBatch();
  286. }
  287. {
  288. PigServer server = new PigServer(ExecType.LOCAL, props);
  289. UDFContext.getUDFContext().setClientSystemProps();
  290. server
  291. .registerQuery("A = load '"
  292. + fqexportLocation
  293. + "' using org.apache.hcatalog.pig.HCatEximLoader() "
  294. + ";");
  295. server.registerQuery("B = filter A by emp_state == 'ka';");
  296. Iterator<Tuple> XIter = server.openIterator("B");
  297. Map<Integer, EmpDetail> empDetails = new TreeMap<Integer, EmpDetail>();
  298. addEmpDetail(empDetails, 238, "Kalpana", "01/01/2000", "F", "in", "ka");
  299. addEmpDetail(empDetails, 240, "Kavya", "01/01/2002", "F", "us", "ka");
  300. while(XIter.hasNext()) {
  301. Tuple t = XIter.next();
  302. assertNotSame(0, empDetails.size());
  303. assertEmpDetail(t, empDetails);
  304. }
  305. assertEquals(0, empDetails.size());
  306. }
  307. }
  308. }