PageRenderTime 37ms CodeModel.GetById 16ms app.highlight 18ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.2.0-rc0/src/test/org/apache/hcatalog/mapreduce/TestHCatEximOutputFormat.java

#
Java | 260 lines | 210 code | 27 blank | 23 comment | 2 complexity | a0c15e44d9134facd0e4a7ea2e5d6db6 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hcatalog.mapreduce;
 20
 21import java.io.IOException;
 22import java.util.ArrayList;
 23import java.util.List;
 24import java.util.Map;
 25
 26import junit.framework.TestCase;
 27
 28import org.apache.hadoop.conf.Configuration;
 29import org.apache.hadoop.fs.FSDataOutputStream;
 30import org.apache.hadoop.fs.FileSystem;
 31import org.apache.hadoop.fs.LocalFileSystem;
 32import org.apache.hadoop.fs.Path;
 33import org.apache.hadoop.hive.metastore.api.FieldSchema;
 34import org.apache.hadoop.hive.metastore.api.Partition;
 35import org.apache.hadoop.hive.metastore.api.Table;
 36import org.apache.hadoop.hive.ql.parse.EximUtil;
 37import org.apache.hadoop.hive.serde.Constants;
 38import org.apache.hadoop.io.LongWritable;
 39import org.apache.hadoop.io.Text;
 40import org.apache.hadoop.mapreduce.Job;
 41import org.apache.hadoop.mapreduce.Mapper;
 42import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 43import org.apache.hcatalog.common.HCatConstants;
 44import org.apache.hcatalog.common.HCatUtil;
 45import org.apache.hcatalog.data.DefaultHCatRecord;
 46import org.apache.hcatalog.data.HCatRecord;
 47import org.apache.hcatalog.data.schema.HCatFieldSchema;
 48import org.apache.hcatalog.data.schema.HCatSchema;
 49import org.apache.hcatalog.data.schema.HCatSchemaUtils;
 50
 51/**
 52 *
 53 * TestHCatEximOutputFormat. Some basic testing here. More testing done via
 54 * TestHCatEximInputFormat
 55 *
 56 */
 57public class TestHCatEximOutputFormat extends TestCase {
 58
 59  public static class TestMap extends
 60      Mapper<LongWritable, Text, LongWritable, HCatRecord> {
 61
 62    private HCatSchema recordSchema;
 63
 64    @Override
 65    protected void setup(Context context) throws IOException,
 66        InterruptedException {
 67      super.setup(context);
 68      recordSchema = HCatEximOutputFormat.getTableSchema(context);
 69      System.out.println("TestMap/setup called");
 70    }
 71
 72    @Override
 73    public void map(LongWritable key, Text value, Context context)
 74        throws IOException, InterruptedException {
 75      String[] cols = value.toString().split(",");
 76      HCatRecord record = new DefaultHCatRecord(recordSchema.size());
 77      System.out.println("TestMap/map called. Cols[0]:" + cols[0]);
 78      System.out.println("TestMap/map called. Cols[1]:" + cols[1]);
 79      System.out.println("TestMap/map called. Cols[2]:" + cols[2]);
 80      System.out.println("TestMap/map called. Cols[3]:" + cols[3]);
 81      record.setInteger("emp_id", recordSchema, Integer.parseInt(cols[0]));
 82      record.setString("emp_name", recordSchema, cols[1]);
 83      record.setString("emp_dob", recordSchema, cols[2]);
 84      record.setString("emp_sex", recordSchema, cols[3]);
 85      context.write(key, record);
 86    }
 87  }
 88
 89
 90  private static final String dbName = "hcatEximOutputFormatTestDB";
 91  private static final String tblName = "hcatEximOutputFormatTestTable";
 92  Configuration conf;
 93  Job job;
 94  List<HCatFieldSchema> columns;
 95  HCatSchema schema;
 96  FileSystem fs;
 97  Path outputLocation;
 98  Path dataLocation;
 99
100  public void testNonPart() throws Exception {
101    try {
102      HCatEximOutputFormat.setOutput(
103          job,
104          dbName,
105          tblName,
106          outputLocation.toString(),
107          null,
108          null,
109          schema);
110
111      job.waitForCompletion(true);
112      HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null);
113      committer.cleanupJob(job);
114
115      Path metadataPath = new Path(outputLocation, "_metadata");
116      Map.Entry<Table, List<Partition>> rv = EximUtil.readMetaData(fs, metadataPath);
117      Table table = rv.getKey();
118      List<Partition> partitions = rv.getValue();
119
120      assertEquals(dbName, table.getDbName());
121      assertEquals(tblName, table.getTableName());
122      assertTrue(EximUtil.schemaCompare(table.getSd().getCols(),
123          HCatUtil.getFieldSchemaList(columns)));
124      assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver",
125          table.getParameters().get(HCatConstants.HCAT_ISD_CLASS));
126      assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver",
127          table.getParameters().get(HCatConstants.HCAT_OSD_CLASS));
128      assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat",
129          table.getSd().getInputFormat());
130      assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat",
131          table.getSd().getOutputFormat());
132      assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe",
133          table.getSd().getSerdeInfo().getSerializationLib());
134      assertEquals(0, table.getPartitionKeys().size());
135
136      assertEquals(0, partitions.size());
137    } catch (Exception e) {
138      System.out.println("Test failed with " + e.getMessage());
139      e.printStackTrace();
140      throw e;
141    }
142
143  }
144
145  public void testPart() throws Exception {
146    try {
147      List<HCatFieldSchema> partKeys = new ArrayList<HCatFieldSchema>();
148      partKeys.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_country",
149          Constants.STRING_TYPE_NAME, "")));
150      partKeys.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_state",
151          Constants.STRING_TYPE_NAME, "")));
152      HCatSchema partitionSchema = new HCatSchema(partKeys);
153
154      List<String> partitionVals = new ArrayList<String>();
155      partitionVals.add("IN");
156      partitionVals.add("TN");
157
158      HCatEximOutputFormat.setOutput(
159          job,
160          dbName,
161          tblName,
162          outputLocation.toString(),
163          partitionSchema,
164          partitionVals,
165          schema);
166
167      job.waitForCompletion(true);
168      HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null);
169      committer.cleanupJob(job);
170      Path metadataPath = new Path(outputLocation, "_metadata");
171      Map.Entry<Table, List<Partition>> rv = EximUtil.readMetaData(fs, metadataPath);
172      Table table = rv.getKey();
173      List<Partition> partitions = rv.getValue();
174
175      assertEquals(dbName, table.getDbName());
176      assertEquals(tblName, table.getTableName());
177      assertTrue(EximUtil.schemaCompare(table.getSd().getCols(),
178          HCatUtil.getFieldSchemaList(columns)));
179      assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver",
180          table.getParameters().get(HCatConstants.HCAT_ISD_CLASS));
181      assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver",
182          table.getParameters().get(HCatConstants.HCAT_OSD_CLASS));
183      assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat",
184          table.getSd().getInputFormat());
185      assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat",
186          table.getSd().getOutputFormat());
187      assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe",
188          table.getSd().getSerdeInfo().getSerializationLib());
189      assertEquals(2, table.getPartitionKeys().size());
190      List<FieldSchema> partSchema = table.getPartitionKeys();
191      assertEquals("emp_country", partSchema.get(0).getName());
192      assertEquals("emp_state", partSchema.get(1).getName());
193
194      assertEquals(1, partitions.size());
195      Partition partition = partitions.get(0);
196      assertEquals("IN", partition.getValues().get(0));
197      assertEquals("TN", partition.getValues().get(1));
198      assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver",
199          partition.getParameters().get(HCatConstants.HCAT_ISD_CLASS));
200      assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver",
201          partition.getParameters().get(HCatConstants.HCAT_OSD_CLASS));
202    } catch (Exception e) {
203      System.out.println("Test failed with " + e.getMessage());
204      e.printStackTrace();
205      throw e;
206    }
207  }
208
209  @Override
210  protected void setUp() throws Exception {
211    System.out.println("Setup started");
212    super.setUp();
213    conf = new Configuration();
214    job = new Job(conf, "test eximoutputformat");
215    columns = new ArrayList<HCatFieldSchema>();
216    columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id",
217        Constants.INT_TYPE_NAME, "")));
218    columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name",
219        Constants.STRING_TYPE_NAME, "")));
220    columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob",
221        Constants.STRING_TYPE_NAME, "")));
222    columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex",
223        Constants.STRING_TYPE_NAME, "")));
224    schema = new HCatSchema(columns);
225
226    fs = new LocalFileSystem();
227    fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration());
228    outputLocation = new Path(fs.getWorkingDirectory(), "tmp/exports");
229    if (fs.exists(outputLocation)) {
230      fs.delete(outputLocation, true);
231    }
232    dataLocation = new Path(fs.getWorkingDirectory(), "tmp/data");
233    if (fs.exists(dataLocation)) {
234      fs.delete(dataLocation, true);
235    }
236    FSDataOutputStream ds = fs.create(dataLocation, true);
237    ds.writeBytes("237,Krishna,01/01/1990,M,IN,TN\n");
238    ds.writeBytes("238,Kalpana,01/01/2000,F,IN,KA\n");
239    ds.writeBytes("239,Satya,01/01/2001,M,US,TN\n");
240    ds.writeBytes("240,Kavya,01/01/2002,F,US,KA\n");
241    ds.close();
242
243    job.setInputFormatClass(TextInputFormat.class);
244    job.setOutputFormatClass(HCatEximOutputFormat.class);
245    TextInputFormat.setInputPaths(job, dataLocation);
246    job.setJarByClass(this.getClass());
247    job.setMapperClass(TestMap.class);
248    job.setNumReduceTasks(0);
249    System.out.println("Setup done");
250  }
251
252  @Override
253  protected void tearDown() throws Exception {
254    System.out.println("Teardown started");
255    super.tearDown();
256    fs.delete(dataLocation, true);
257    fs.delete(outputLocation, true);
258    System.out.println("Teardown done");
259  }
260}