PageRenderTime 58ms CodeModel.GetById 1ms app.highlight 52ms RepoModel.GetById 1ms app.codeStats 1ms

/tags/release-0.0.0-rc0/src/test/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java

#
Java | 314 lines | 206 code | 78 blank | 30 comment | 13 complexity | eb3c3c5a2d4f5f34a1ef0b48cb1473a1 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hcatalog.mapreduce;
 20
 21import java.io.IOException;
 22import java.util.ArrayList;
 23import java.util.HashMap;
 24import java.util.List;
 25import java.util.Map;
 26
 27import org.apache.hadoop.hive.metastore.api.FieldSchema;
 28import org.apache.hadoop.hive.serde.Constants;
 29import org.apache.hcatalog.common.ErrorType;
 30import org.apache.hcatalog.common.HCatException;
 31import org.apache.hcatalog.data.DefaultHCatRecord;
 32import org.apache.hcatalog.data.HCatRecord;
 33import org.apache.hcatalog.data.schema.HCatFieldSchema;
 34import org.apache.hcatalog.data.schema.HCatSchema;
 35import org.apache.hcatalog.data.schema.HCatSchemaUtils;
 36
 37public class TestHCatPartitioned extends HCatMapReduceTest {
 38
 39  private List<HCatRecord> writeRecords;
 40  private List<HCatFieldSchema> partitionColumns;
 41
 42  @Override
 43  protected void initialize() throws Exception {
 44
 45    tableName = "testHowlPartitionedTable";
 46    writeRecords = new ArrayList<HCatRecord>();
 47
 48    for(int i = 0;i < 20;i++) {
 49      List<Object> objList = new ArrayList<Object>();
 50
 51      objList.add(i);
 52      objList.add("strvalue" + i);
 53      writeRecords.add(new DefaultHCatRecord(objList));
 54    }
 55
 56    partitionColumns = new ArrayList<HCatFieldSchema>();
 57    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", Constants.INT_TYPE_NAME, "")));
 58    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", Constants.STRING_TYPE_NAME, "")));
 59  }
 60
 61
 62  @Override
 63  protected List<FieldSchema> getPartitionKeys() {
 64    List<FieldSchema> fields = new ArrayList<FieldSchema>();
 65    fields.add(new FieldSchema("PaRT1", Constants.STRING_TYPE_NAME, ""));
 66    return fields;
 67  }
 68
 69  @Override
 70  protected List<FieldSchema> getTableColumns() {
 71    List<FieldSchema> fields = new ArrayList<FieldSchema>();
 72    fields.add(new FieldSchema("c1", Constants.INT_TYPE_NAME, ""));
 73    fields.add(new FieldSchema("c2", Constants.STRING_TYPE_NAME, ""));
 74    return fields;
 75  }
 76
 77
 78  public void testHowlPartitionedTable() throws Exception {
 79
 80    Map<String, String> partitionMap = new HashMap<String, String>();
 81    partitionMap.put("part1", "p1value1");
 82
 83    runMRCreate(partitionMap, partitionColumns, writeRecords, 10);
 84
 85    partitionMap.clear();
 86    partitionMap.put("PART1", "p1value2");
 87
 88    runMRCreate(partitionMap, partitionColumns, writeRecords, 20);
 89
 90    //Test for duplicate publish
 91    IOException exc = null;
 92    try {
 93      runMRCreate(partitionMap, partitionColumns, writeRecords, 20);
 94    } catch(IOException e) {
 95      exc = e;
 96    }
 97
 98    assertTrue(exc != null);
 99    assertTrue(exc instanceof HCatException);
100    assertEquals(ErrorType.ERROR_DUPLICATE_PARTITION, ((HCatException) exc).getErrorType());
101
102    //Test for publish with invalid partition key name
103    exc = null;
104    partitionMap.clear();
105    partitionMap.put("px", "p1value2");
106
107    try {
108      runMRCreate(partitionMap, partitionColumns, writeRecords, 20);
109    } catch(IOException e) {
110      exc = e;
111    }
112
113    assertTrue(exc != null);
114    assertTrue(exc instanceof HCatException);
115    assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType());
116
117
118    //Test for null partition value map
119    exc = null;
120    try {
121      runMRCreate(null, partitionColumns, writeRecords, 20);
122    } catch(IOException e) {
123      exc = e;
124    }
125
126    assertTrue(exc != null);
127    assertTrue(exc instanceof HCatException);
128    assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType());
129
130    //Read should get 10 + 20 rows
131    runMRRead(30);
132
133    //Read with partition filter
134    runMRRead(10, "part1 = \"p1value1\"");
135    runMRRead(20, "part1 = \"p1value2\"");
136    runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\"");
137
138    tableSchemaTest();
139    columnOrderChangeTest();
140    hiveReadTest();
141  }
142
143
144  //test that new columns gets added to table schema
145  private void tableSchemaTest() throws Exception {
146
147    HCatSchema tableSchema = getTableSchema();
148
149    assertEquals(3, tableSchema.getFields().size());
150
151    //Update partition schema to have 3 fields
152    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", Constants.STRING_TYPE_NAME, "")));
153
154    writeRecords = new ArrayList<HCatRecord>();
155
156    for(int i = 0;i < 20;i++) {
157      List<Object> objList = new ArrayList<Object>();
158
159      objList.add(i);
160      objList.add("strvalue" + i);
161      objList.add("str2value" + i);
162
163      writeRecords.add(new DefaultHCatRecord(objList));
164    }
165
166    Map<String, String> partitionMap = new HashMap<String, String>();
167    partitionMap.put("part1", "p1value5");
168
169    runMRCreate(partitionMap, partitionColumns, writeRecords, 10);
170
171    tableSchema = getTableSchema();
172
173    //assert that c3 has got added to table schema
174    assertEquals(4, tableSchema.getFields().size());
175    assertEquals("c1", tableSchema.getFields().get(0).getName());
176    assertEquals("c2", tableSchema.getFields().get(1).getName());
177    assertEquals("c3", tableSchema.getFields().get(2).getName());
178    assertEquals("part1", tableSchema.getFields().get(3).getName());
179
180    //Test that changing column data type fails
181    partitionMap.clear();
182    partitionMap.put("part1", "p1value6");
183
184    partitionColumns = new ArrayList<HCatFieldSchema>();
185    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", Constants.INT_TYPE_NAME, "")));
186    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", Constants.INT_TYPE_NAME, "")));
187
188    IOException exc = null;
189    try {
190      runMRCreate(partitionMap, partitionColumns, writeRecords, 20);
191    } catch(IOException e) {
192      exc = e;
193    }
194
195    assertTrue(exc != null);
196    assertTrue(exc instanceof HCatException);
197    assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType());
198
199    //Test that partition key is not allowed in data
200    partitionColumns = new ArrayList<HCatFieldSchema>();
201    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", Constants.INT_TYPE_NAME, "")));
202    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", Constants.STRING_TYPE_NAME, "")));
203    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", Constants.STRING_TYPE_NAME, "")));
204    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", Constants.STRING_TYPE_NAME, "")));
205
206    List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20);
207    for(int i = 0;i < 20;i++) {
208      List<Object> objList = new ArrayList<Object>();
209
210      objList.add(i);
211      objList.add("c2value" + i);
212      objList.add("c3value" + i);
213      objList.add("p1value6");
214
215      recordsContainingPartitionCols.add(new DefaultHCatRecord(objList));
216    }
217
218    exc = null;
219    try {
220      runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20);
221    } catch(IOException e) {
222      exc = e;
223    }
224
225    List<HCatRecord> records= runMRRead(20,"part1 = \"p1value6\"");
226    assertEquals(20, records.size());
227    Integer i =0;
228    for(HCatRecord rec : records){
229      assertEquals(4, rec.size());
230      assertTrue(rec.get(0).equals(i));
231      assertTrue(rec.get(1).equals("c2value"+i));
232      assertTrue(rec.get(2).equals("c3value"+i));
233      assertTrue(rec.get(3).equals("p1value6"));
234      i++;
235    }
236  }
237
238  //check behavior while change the order of columns
239  private void columnOrderChangeTest() throws Exception {
240
241    HCatSchema tableSchema = getTableSchema();
242
243    assertEquals(4, tableSchema.getFields().size());
244
245    partitionColumns = new ArrayList<HCatFieldSchema>();
246    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", Constants.INT_TYPE_NAME, "")));
247    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", Constants.STRING_TYPE_NAME, "")));
248    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", Constants.STRING_TYPE_NAME, "")));
249
250
251    writeRecords = new ArrayList<HCatRecord>();
252
253    for(int i = 0;i < 10;i++) {
254      List<Object> objList = new ArrayList<Object>();
255
256      objList.add(i);
257      objList.add("co strvalue" + i);
258      objList.add("co str2value" + i);
259
260      writeRecords.add(new DefaultHCatRecord(objList));
261    }
262
263    Map<String, String> partitionMap = new HashMap<String, String>();
264    partitionMap.put("part1", "p1value8");
265
266
267    Exception exc = null;
268    try {
269      runMRCreate(partitionMap, partitionColumns, writeRecords, 10);
270    } catch(IOException e) {
271      exc = e;
272    }
273
274    assertTrue(exc != null);
275    assertTrue(exc instanceof HCatException);
276    assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());
277
278
279    partitionColumns = new ArrayList<HCatFieldSchema>();
280    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", Constants.INT_TYPE_NAME, "")));
281    partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", Constants.STRING_TYPE_NAME, "")));
282
283    writeRecords = new ArrayList<HCatRecord>();
284
285    for(int i = 0;i < 10;i++) {
286      List<Object> objList = new ArrayList<Object>();
287
288      objList.add(i);
289      objList.add("co strvalue" + i);
290
291      writeRecords.add(new DefaultHCatRecord(objList));
292    }
293
294    runMRCreate(partitionMap, partitionColumns, writeRecords, 10);
295
296    //Read should get 10 + 20 + 10 + 10 + 20 rows
297    runMRRead(70);
298  }
299
300  //Test that data inserted through howloutputformat is readable from hive
301  private void hiveReadTest() throws Exception {
302
303    String query = "select * from " + tableName;
304    int retCode = driver.run(query).getResponseCode();
305
306    if( retCode != 0 ) {
307      throw new Exception("Error " + retCode + " running query " + query);
308    }
309
310    ArrayList<String> res = new ArrayList<String>();
311    driver.getResults(res);
312    assertEquals(70, res.size());
313  }
314}