/tags/release-0.2.0-rc0/src/test/org/apache/hcatalog/mapreduce/TestHCatEximOutputFormat.java

# · Java · 260 lines · 210 code · 27 blank · 23 comment · 2 complexity · a0c15e44d9134facd0e4a7ea2e5d6db6 MD5 · raw file

  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hcatalog.mapreduce;
  19. import java.io.IOException;
  20. import java.util.ArrayList;
  21. import java.util.List;
  22. import java.util.Map;
  23. import junit.framework.TestCase;
  24. import org.apache.hadoop.conf.Configuration;
  25. import org.apache.hadoop.fs.FSDataOutputStream;
  26. import org.apache.hadoop.fs.FileSystem;
  27. import org.apache.hadoop.fs.LocalFileSystem;
  28. import org.apache.hadoop.fs.Path;
  29. import org.apache.hadoop.hive.metastore.api.FieldSchema;
  30. import org.apache.hadoop.hive.metastore.api.Partition;
  31. import org.apache.hadoop.hive.metastore.api.Table;
  32. import org.apache.hadoop.hive.ql.parse.EximUtil;
  33. import org.apache.hadoop.hive.serde.Constants;
  34. import org.apache.hadoop.io.LongWritable;
  35. import org.apache.hadoop.io.Text;
  36. import org.apache.hadoop.mapreduce.Job;
  37. import org.apache.hadoop.mapreduce.Mapper;
  38. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  39. import org.apache.hcatalog.common.HCatConstants;
  40. import org.apache.hcatalog.common.HCatUtil;
  41. import org.apache.hcatalog.data.DefaultHCatRecord;
  42. import org.apache.hcatalog.data.HCatRecord;
  43. import org.apache.hcatalog.data.schema.HCatFieldSchema;
  44. import org.apache.hcatalog.data.schema.HCatSchema;
  45. import org.apache.hcatalog.data.schema.HCatSchemaUtils;
  46. /**
  47. *
  48. * TestHCatEximOutputFormat. Some basic testing here. More testing done via
  49. * TestHCatEximInputFormat
  50. *
  51. */
  52. public class TestHCatEximOutputFormat extends TestCase {
  53. public static class TestMap extends
  54. Mapper<LongWritable, Text, LongWritable, HCatRecord> {
  55. private HCatSchema recordSchema;
  56. @Override
  57. protected void setup(Context context) throws IOException,
  58. InterruptedException {
  59. super.setup(context);
  60. recordSchema = HCatEximOutputFormat.getTableSchema(context);
  61. System.out.println("TestMap/setup called");
  62. }
  63. @Override
  64. public void map(LongWritable key, Text value, Context context)
  65. throws IOException, InterruptedException {
  66. String[] cols = value.toString().split(",");
  67. HCatRecord record = new DefaultHCatRecord(recordSchema.size());
  68. System.out.println("TestMap/map called. Cols[0]:" + cols[0]);
  69. System.out.println("TestMap/map called. Cols[1]:" + cols[1]);
  70. System.out.println("TestMap/map called. Cols[2]:" + cols[2]);
  71. System.out.println("TestMap/map called. Cols[3]:" + cols[3]);
  72. record.setInteger("emp_id", recordSchema, Integer.parseInt(cols[0]));
  73. record.setString("emp_name", recordSchema, cols[1]);
  74. record.setString("emp_dob", recordSchema, cols[2]);
  75. record.setString("emp_sex", recordSchema, cols[3]);
  76. context.write(key, record);
  77. }
  78. }
  79. private static final String dbName = "hcatEximOutputFormatTestDB";
  80. private static final String tblName = "hcatEximOutputFormatTestTable";
  81. Configuration conf;
  82. Job job;
  83. List<HCatFieldSchema> columns;
  84. HCatSchema schema;
  85. FileSystem fs;
  86. Path outputLocation;
  87. Path dataLocation;
  88. public void testNonPart() throws Exception {
  89. try {
  90. HCatEximOutputFormat.setOutput(
  91. job,
  92. dbName,
  93. tblName,
  94. outputLocation.toString(),
  95. null,
  96. null,
  97. schema);
  98. job.waitForCompletion(true);
  99. HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null);
  100. committer.cleanupJob(job);
  101. Path metadataPath = new Path(outputLocation, "_metadata");
  102. Map.Entry<Table, List<Partition>> rv = EximUtil.readMetaData(fs, metadataPath);
  103. Table table = rv.getKey();
  104. List<Partition> partitions = rv.getValue();
  105. assertEquals(dbName, table.getDbName());
  106. assertEquals(tblName, table.getTableName());
  107. assertTrue(EximUtil.schemaCompare(table.getSd().getCols(),
  108. HCatUtil.getFieldSchemaList(columns)));
  109. assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver",
  110. table.getParameters().get(HCatConstants.HCAT_ISD_CLASS));
  111. assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver",
  112. table.getParameters().get(HCatConstants.HCAT_OSD_CLASS));
  113. assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat",
  114. table.getSd().getInputFormat());
  115. assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat",
  116. table.getSd().getOutputFormat());
  117. assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe",
  118. table.getSd().getSerdeInfo().getSerializationLib());
  119. assertEquals(0, table.getPartitionKeys().size());
  120. assertEquals(0, partitions.size());
  121. } catch (Exception e) {
  122. System.out.println("Test failed with " + e.getMessage());
  123. e.printStackTrace();
  124. throw e;
  125. }
  126. }
  127. public void testPart() throws Exception {
  128. try {
  129. List<HCatFieldSchema> partKeys = new ArrayList<HCatFieldSchema>();
  130. partKeys.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_country",
  131. Constants.STRING_TYPE_NAME, "")));
  132. partKeys.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_state",
  133. Constants.STRING_TYPE_NAME, "")));
  134. HCatSchema partitionSchema = new HCatSchema(partKeys);
  135. List<String> partitionVals = new ArrayList<String>();
  136. partitionVals.add("IN");
  137. partitionVals.add("TN");
  138. HCatEximOutputFormat.setOutput(
  139. job,
  140. dbName,
  141. tblName,
  142. outputLocation.toString(),
  143. partitionSchema,
  144. partitionVals,
  145. schema);
  146. job.waitForCompletion(true);
  147. HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null);
  148. committer.cleanupJob(job);
  149. Path metadataPath = new Path(outputLocation, "_metadata");
  150. Map.Entry<Table, List<Partition>> rv = EximUtil.readMetaData(fs, metadataPath);
  151. Table table = rv.getKey();
  152. List<Partition> partitions = rv.getValue();
  153. assertEquals(dbName, table.getDbName());
  154. assertEquals(tblName, table.getTableName());
  155. assertTrue(EximUtil.schemaCompare(table.getSd().getCols(),
  156. HCatUtil.getFieldSchemaList(columns)));
  157. assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver",
  158. table.getParameters().get(HCatConstants.HCAT_ISD_CLASS));
  159. assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver",
  160. table.getParameters().get(HCatConstants.HCAT_OSD_CLASS));
  161. assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat",
  162. table.getSd().getInputFormat());
  163. assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat",
  164. table.getSd().getOutputFormat());
  165. assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe",
  166. table.getSd().getSerdeInfo().getSerializationLib());
  167. assertEquals(2, table.getPartitionKeys().size());
  168. List<FieldSchema> partSchema = table.getPartitionKeys();
  169. assertEquals("emp_country", partSchema.get(0).getName());
  170. assertEquals("emp_state", partSchema.get(1).getName());
  171. assertEquals(1, partitions.size());
  172. Partition partition = partitions.get(0);
  173. assertEquals("IN", partition.getValues().get(0));
  174. assertEquals("TN", partition.getValues().get(1));
  175. assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver",
  176. partition.getParameters().get(HCatConstants.HCAT_ISD_CLASS));
  177. assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver",
  178. partition.getParameters().get(HCatConstants.HCAT_OSD_CLASS));
  179. } catch (Exception e) {
  180. System.out.println("Test failed with " + e.getMessage());
  181. e.printStackTrace();
  182. throw e;
  183. }
  184. }
  185. @Override
  186. protected void setUp() throws Exception {
  187. System.out.println("Setup started");
  188. super.setUp();
  189. conf = new Configuration();
  190. job = new Job(conf, "test eximoutputformat");
  191. columns = new ArrayList<HCatFieldSchema>();
  192. columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id",
  193. Constants.INT_TYPE_NAME, "")));
  194. columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name",
  195. Constants.STRING_TYPE_NAME, "")));
  196. columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob",
  197. Constants.STRING_TYPE_NAME, "")));
  198. columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex",
  199. Constants.STRING_TYPE_NAME, "")));
  200. schema = new HCatSchema(columns);
  201. fs = new LocalFileSystem();
  202. fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration());
  203. outputLocation = new Path(fs.getWorkingDirectory(), "tmp/exports");
  204. if (fs.exists(outputLocation)) {
  205. fs.delete(outputLocation, true);
  206. }
  207. dataLocation = new Path(fs.getWorkingDirectory(), "tmp/data");
  208. if (fs.exists(dataLocation)) {
  209. fs.delete(dataLocation, true);
  210. }
  211. FSDataOutputStream ds = fs.create(dataLocation, true);
  212. ds.writeBytes("237,Krishna,01/01/1990,M,IN,TN\n");
  213. ds.writeBytes("238,Kalpana,01/01/2000,F,IN,KA\n");
  214. ds.writeBytes("239,Satya,01/01/2001,M,US,TN\n");
  215. ds.writeBytes("240,Kavya,01/01/2002,F,US,KA\n");
  216. ds.close();
  217. job.setInputFormatClass(TextInputFormat.class);
  218. job.setOutputFormatClass(HCatEximOutputFormat.class);
  219. TextInputFormat.setInputPaths(job, dataLocation);
  220. job.setJarByClass(this.getClass());
  221. job.setMapperClass(TestMap.class);
  222. job.setNumReduceTasks(0);
  223. System.out.println("Setup done");
  224. }
  225. @Override
  226. protected void tearDown() throws Exception {
  227. System.out.println("Teardown started");
  228. super.tearDown();
  229. fs.delete(dataLocation, true);
  230. fs.delete(outputLocation, true);
  231. System.out.println("Teardown done");
  232. }
  233. }