PageRenderTime 69ms CodeModel.GetById 1ms app.highlight 63ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.2.0-rc0/src/test/org/apache/hcatalog/pig/TestHCatEximLoader.java

#
Java | 351 lines | 285 code | 43 blank | 23 comment | 12 complexity | bb6a3ee73e5e53d58ee29b15a3e524ea MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18package org.apache.hcatalog.pig;
 19
 20import java.io.IOException;
 21import java.util.Iterator;
 22import java.util.Map;
 23import java.util.Properties;
 24import java.util.TreeMap;
 25
 26import junit.framework.TestCase;
 27
 28import org.apache.hcatalog.MiniCluster;
 29import org.apache.pig.ExecType;
 30import org.apache.pig.PigServer;
 31import org.apache.pig.backend.executionengine.ExecException;
 32import org.apache.pig.data.Tuple;
 33import org.apache.pig.impl.util.UDFContext;
 34
 35/**
 36 *
 37 * TestHCatEximLoader. Assumes Exim storer is working well
 38 *
 39 */
 40public class TestHCatEximLoader extends TestCase {
 41
 42  private static final String NONPART_TABLE = "junit_unparted";
 43  private static final String PARTITIONED_TABLE = "junit_parted";
 44  private static MiniCluster cluster = MiniCluster.buildCluster();
 45
 46  private static final String dataLocation = "/tmp/data";
 47  private static String fqdataLocation;
 48  private static final String exportLocation = "/tmp/export";
 49  private static String fqexportLocation;
 50
 51  private static Properties props;
 52
 53  private void cleanup() throws IOException {
 54    MiniCluster.deleteFile(cluster, dataLocation);
 55    MiniCluster.deleteFile(cluster, exportLocation);
 56  }
 57
 58  @Override
 59  protected void setUp() throws Exception {
 60    props = new Properties();
 61    props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name"));
 62    System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName()
 63        + ", fs.default.name : " + props.getProperty("fs.default.name"));
 64    fqdataLocation = cluster.getProperties().getProperty("fs.default.name") + dataLocation;
 65    fqexportLocation = cluster.getProperties().getProperty("fs.default.name") + exportLocation;
 66    System.out.println("FQ Data Location :" + fqdataLocation);
 67    System.out.println("FQ Export Location :" + fqexportLocation);
 68    cleanup();
 69  }
 70
 71  @Override
 72  protected void tearDown() throws Exception {
 73    cleanup();
 74  }
 75
 76  private void populateDataFile() throws IOException {
 77    MiniCluster.deleteFile(cluster, dataLocation);
 78    String[] input = new String[] {
 79        "237,Krishna,01/01/1990,M,IN,TN",
 80        "238,Kalpana,01/01/2000,F,IN,KA",
 81        "239,Satya,01/01/2001,M,US,TN",
 82        "240,Kavya,01/01/2002,F,US,KA"
 83    };
 84    MiniCluster.createInputFile(cluster, dataLocation, input);
 85  }
 86
 87  private static class EmpDetail {
 88    String name;
 89    String dob;
 90    String mf;
 91    String country;
 92    String state;
 93  }
 94
 95  private void assertEmpDetail(Tuple t, Map<Integer, EmpDetail> eds) throws ExecException {
 96    assertNotNull(t);
 97    assertEquals(6, t.size());
 98
 99    assertTrue(t.get(0).getClass() == Integer.class);
100    assertTrue(t.get(1).getClass() == String.class);
101    assertTrue(t.get(2).getClass() == String.class);
102    assertTrue(t.get(3).getClass() == String.class);
103    assertTrue(t.get(4).getClass() == String.class);
104    assertTrue(t.get(5).getClass() == String.class);
105
106    EmpDetail ed = eds.remove(t.get(0));
107    assertNotNull(ed);
108
109    assertEquals(ed.name, t.get(1));
110    assertEquals(ed.dob, t.get(2));
111    assertEquals(ed.mf, t.get(3));
112    assertEquals(ed.country, t.get(4));
113    assertEquals(ed.state, t.get(5));
114  }
115
116  private void addEmpDetail(Map<Integer, EmpDetail> empDetails, int id, String name,
117      String dob, String mf, String country, String state) {
118    EmpDetail ed = new EmpDetail();
119    ed.name = name;
120    ed.dob = dob;
121    ed.mf = mf;
122    ed.country = country;
123    ed.state = state;
124    empDetails.put(id, ed);
125  }
126
127
128
129  private void assertEmpDetail(Tuple t, Integer id, String name, String dob, String mf)
130      throws ExecException {
131    assertNotNull(t);
132    assertEquals(4, t.size());
133    assertTrue(t.get(0).getClass() == Integer.class);
134    assertTrue(t.get(1).getClass() == String.class);
135    assertTrue(t.get(2).getClass() == String.class);
136    assertTrue(t.get(3).getClass() == String.class);
137
138    assertEquals(id, t.get(0));
139    assertEquals(name, t.get(1));
140    assertEquals(dob, t.get(2));
141    assertEquals(mf, t.get(3));
142  }
143
144  private void assertEmpDetail(Tuple t, String mf, String name)
145      throws ExecException {
146    assertNotNull(t);
147    assertEquals(2, t.size());
148    assertTrue(t.get(0).getClass() == String.class);
149    assertTrue(t.get(1).getClass() == String.class);
150
151    assertEquals(mf, t.get(0));
152    assertEquals(name, t.get(1));
153  }
154
155
156
157  public void testLoadNonPartTable() throws Exception {
158    populateDataFile();
159    {
160      PigServer server = new PigServer(ExecType.LOCAL, props);
161      UDFContext.getUDFContext().setClientSystemProps();
162      server.setBatchOn();
163      server
164          .registerQuery("A = load '"
165              + fqdataLocation
166              + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);");
167      server.registerQuery("store A into '" + NONPART_TABLE
168          + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');");
169      server.executeBatch();
170    }
171    {
172      PigServer server = new PigServer(ExecType.LOCAL, props);
173      UDFContext.getUDFContext().setClientSystemProps();
174
175      server
176          .registerQuery("A = load '"
177              + fqexportLocation
178              + "' using org.apache.hcatalog.pig.HCatEximLoader();");
179      Iterator<Tuple> XIter = server.openIterator("A");
180      assertTrue(XIter.hasNext());
181      Tuple t = XIter.next();
182      assertEmpDetail(t, 237, "Krishna", "01/01/1990", "M");
183      assertTrue(XIter.hasNext());
184      t = XIter.next();
185      assertEmpDetail(t, 238, "Kalpana", "01/01/2000", "F");
186      assertTrue(XIter.hasNext());
187      t = XIter.next();
188      assertEmpDetail(t, 239, "Satya", "01/01/2001", "M");
189      assertTrue(XIter.hasNext());
190      t = XIter.next();
191      assertEmpDetail(t, 240, "Kavya", "01/01/2002", "F");
192      assertFalse(XIter.hasNext());
193    }
194  }
195
196  public void testLoadNonPartProjection() throws Exception {
197    populateDataFile();
198    {
199      PigServer server = new PigServer(ExecType.LOCAL, props);
200      UDFContext.getUDFContext().setClientSystemProps();
201      server.setBatchOn();
202      server
203          .registerQuery("A = load '"
204              + fqdataLocation
205              + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);");
206      server.registerQuery("store A into '" + NONPART_TABLE
207          + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');");
208      server.executeBatch();
209    }
210    {
211      PigServer server = new PigServer(ExecType.LOCAL, props);
212      UDFContext.getUDFContext().setClientSystemProps();
213
214      server
215          .registerQuery("A = load '"
216              + fqexportLocation
217              + "' using org.apache.hcatalog.pig.HCatEximLoader();");
218      server.registerQuery("B = foreach A generate emp_sex, emp_name;");
219
220      Iterator<Tuple> XIter = server.openIterator("B");
221      assertTrue(XIter.hasNext());
222      Tuple t = XIter.next();
223      assertEmpDetail(t, "M", "Krishna");
224      assertTrue(XIter.hasNext());
225      t = XIter.next();
226      assertEmpDetail(t, "F", "Kalpana");
227      assertTrue(XIter.hasNext());
228      t = XIter.next();
229      assertEmpDetail(t, "M", "Satya");
230      assertTrue(XIter.hasNext());
231      t = XIter.next();
232      assertEmpDetail(t, "F", "Kavya");
233      assertFalse(XIter.hasNext());
234    }
235  }
236
237
238  public void testLoadMultiPartTable() throws Exception {
239    {
240      populateDataFile();
241      PigServer server = new PigServer(ExecType.LOCAL, props);
242      UDFContext.getUDFContext().setClientSystemProps();
243      server.setBatchOn();
244      server
245          .registerQuery("A = load '"
246              + fqdataLocation +
247              "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);"
248          );
249      server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';");
250      server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';");
251      server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';");
252      server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';");
253      server.registerQuery("store INTN into '" + PARTITIONED_TABLE
254          + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
255          "', 'emp_country=in,emp_state=tn');");
256      server.registerQuery("store INKA into '" + PARTITIONED_TABLE
257          + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
258          "', 'emp_country=in,emp_state=ka');");
259      server.registerQuery("store USTN into '" + PARTITIONED_TABLE
260          + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
261          "', 'emp_country=us,emp_state=tn');");
262      server.registerQuery("store USKA into '" + PARTITIONED_TABLE
263          + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
264          "', 'emp_country=us,emp_state=ka');");
265      server.executeBatch();
266    }
267    {
268      PigServer server = new PigServer(ExecType.LOCAL, props);
269      UDFContext.getUDFContext().setClientSystemProps();
270
271      server
272          .registerQuery("A = load '"
273              + fqexportLocation
274              + "' using org.apache.hcatalog.pig.HCatEximLoader() "
275              //+ "as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);");
276              + ";");
277
278      Iterator<Tuple> XIter = server.openIterator("A");
279
280      Map<Integer, EmpDetail> empDetails = new TreeMap<Integer, EmpDetail>();
281      addEmpDetail(empDetails, 237, "Krishna", "01/01/1990", "M", "in", "tn");
282      addEmpDetail(empDetails, 238, "Kalpana", "01/01/2000", "F", "in", "ka");
283      addEmpDetail(empDetails, 239, "Satya", "01/01/2001", "M", "us", "tn");
284      addEmpDetail(empDetails, 240, "Kavya", "01/01/2002", "F", "us", "ka");
285
286      while(XIter.hasNext()) {
287        Tuple t = XIter.next();
288        assertNotSame(0, empDetails.size());
289        assertEmpDetail(t, empDetails);
290      }
291      assertEquals(0, empDetails.size());
292    }
293  }
294
295  public void testLoadMultiPartFilter() throws Exception {
296    {
297      populateDataFile();
298      PigServer server = new PigServer(ExecType.LOCAL, props);
299      UDFContext.getUDFContext().setClientSystemProps();
300      server.setBatchOn();
301      server
302          .registerQuery("A = load '"
303              + fqdataLocation +
304              "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);"
305          );
306      server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';");
307      server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';");
308      server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';");
309      server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';");
310      server.registerQuery("store INTN into '" + PARTITIONED_TABLE
311          + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
312          "', 'emp_country=in,emp_state=tn');");
313      server.registerQuery("store INKA into '" + PARTITIONED_TABLE
314          + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
315          "', 'emp_country=in,emp_state=ka');");
316      server.registerQuery("store USTN into '" + PARTITIONED_TABLE
317          + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
318          "', 'emp_country=us,emp_state=tn');");
319      server.registerQuery("store USKA into '" + PARTITIONED_TABLE
320          + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation +
321          "', 'emp_country=us,emp_state=ka');");
322      server.executeBatch();
323    }
324    {
325      PigServer server = new PigServer(ExecType.LOCAL, props);
326      UDFContext.getUDFContext().setClientSystemProps();
327
328      server
329          .registerQuery("A = load '"
330              + fqexportLocation
331              + "' using org.apache.hcatalog.pig.HCatEximLoader() "
332              + ";");
333      server.registerQuery("B = filter A by emp_state == 'ka';");
334
335      Iterator<Tuple> XIter = server.openIterator("B");
336
337      Map<Integer, EmpDetail> empDetails = new TreeMap<Integer, EmpDetail>();
338      addEmpDetail(empDetails, 238, "Kalpana", "01/01/2000", "F", "in", "ka");
339      addEmpDetail(empDetails, 240, "Kavya", "01/01/2002", "F", "us", "ka");
340
341      while(XIter.hasNext()) {
342        Tuple t = XIter.next();
343        assertNotSame(0, empDetails.size());
344        assertEmpDetail(t, empDetails);
345      }
346      assertEquals(0, empDetails.size());
347    }
348  }
349
350
351}