PageRenderTime 52ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.0.0-rc0/src/test/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java

#
Java | 314 lines | 206 code | 78 blank | 30 comment | 13 complexity | eb3c3c5a2d4f5f34a1ef0b48cb1473a1 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hcatalog.mapreduce;
  19. import java.io.IOException;
  20. import java.util.ArrayList;
  21. import java.util.HashMap;
  22. import java.util.List;
  23. import java.util.Map;
  24. import org.apache.hadoop.hive.metastore.api.FieldSchema;
  25. import org.apache.hadoop.hive.serde.Constants;
  26. import org.apache.hcatalog.common.ErrorType;
  27. import org.apache.hcatalog.common.HCatException;
  28. import org.apache.hcatalog.data.DefaultHCatRecord;
  29. import org.apache.hcatalog.data.HCatRecord;
  30. import org.apache.hcatalog.data.schema.HCatFieldSchema;
  31. import org.apache.hcatalog.data.schema.HCatSchema;
  32. import org.apache.hcatalog.data.schema.HCatSchemaUtils;
  33. public class TestHCatPartitioned extends HCatMapReduceTest {
  34. private List<HCatRecord> writeRecords;
  35. private List<HCatFieldSchema> partitionColumns;
  36. @Override
  37. protected void initialize() throws Exception {
  38. tableName = "testHowlPartitionedTable";
  39. writeRecords = new ArrayList<HCatRecord>();
  40. for(int i = 0;i < 20;i++) {
  41. List<Object> objList = new ArrayList<Object>();
  42. objList.add(i);
  43. objList.add("strvalue" + i);
  44. writeRecords.add(new DefaultHCatRecord(objList));
  45. }
  46. partitionColumns = new ArrayList<HCatFieldSchema>();
  47. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", Constants.INT_TYPE_NAME, "")));
  48. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", Constants.STRING_TYPE_NAME, "")));
  49. }
  50. @Override
  51. protected List<FieldSchema> getPartitionKeys() {
  52. List<FieldSchema> fields = new ArrayList<FieldSchema>();
  53. fields.add(new FieldSchema("PaRT1", Constants.STRING_TYPE_NAME, ""));
  54. return fields;
  55. }
  56. @Override
  57. protected List<FieldSchema> getTableColumns() {
  58. List<FieldSchema> fields = new ArrayList<FieldSchema>();
  59. fields.add(new FieldSchema("c1", Constants.INT_TYPE_NAME, ""));
  60. fields.add(new FieldSchema("c2", Constants.STRING_TYPE_NAME, ""));
  61. return fields;
  62. }
  63. public void testHowlPartitionedTable() throws Exception {
  64. Map<String, String> partitionMap = new HashMap<String, String>();
  65. partitionMap.put("part1", "p1value1");
  66. runMRCreate(partitionMap, partitionColumns, writeRecords, 10);
  67. partitionMap.clear();
  68. partitionMap.put("PART1", "p1value2");
  69. runMRCreate(partitionMap, partitionColumns, writeRecords, 20);
  70. //Test for duplicate publish
  71. IOException exc = null;
  72. try {
  73. runMRCreate(partitionMap, partitionColumns, writeRecords, 20);
  74. } catch(IOException e) {
  75. exc = e;
  76. }
  77. assertTrue(exc != null);
  78. assertTrue(exc instanceof HCatException);
  79. assertEquals(ErrorType.ERROR_DUPLICATE_PARTITION, ((HCatException) exc).getErrorType());
  80. //Test for publish with invalid partition key name
  81. exc = null;
  82. partitionMap.clear();
  83. partitionMap.put("px", "p1value2");
  84. try {
  85. runMRCreate(partitionMap, partitionColumns, writeRecords, 20);
  86. } catch(IOException e) {
  87. exc = e;
  88. }
  89. assertTrue(exc != null);
  90. assertTrue(exc instanceof HCatException);
  91. assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType());
  92. //Test for null partition value map
  93. exc = null;
  94. try {
  95. runMRCreate(null, partitionColumns, writeRecords, 20);
  96. } catch(IOException e) {
  97. exc = e;
  98. }
  99. assertTrue(exc != null);
  100. assertTrue(exc instanceof HCatException);
  101. assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType());
  102. //Read should get 10 + 20 rows
  103. runMRRead(30);
  104. //Read with partition filter
  105. runMRRead(10, "part1 = \"p1value1\"");
  106. runMRRead(20, "part1 = \"p1value2\"");
  107. runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\"");
  108. tableSchemaTest();
  109. columnOrderChangeTest();
  110. hiveReadTest();
  111. }
  112. //test that new columns gets added to table schema
  113. private void tableSchemaTest() throws Exception {
  114. HCatSchema tableSchema = getTableSchema();
  115. assertEquals(3, tableSchema.getFields().size());
  116. //Update partition schema to have 3 fields
  117. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", Constants.STRING_TYPE_NAME, "")));
  118. writeRecords = new ArrayList<HCatRecord>();
  119. for(int i = 0;i < 20;i++) {
  120. List<Object> objList = new ArrayList<Object>();
  121. objList.add(i);
  122. objList.add("strvalue" + i);
  123. objList.add("str2value" + i);
  124. writeRecords.add(new DefaultHCatRecord(objList));
  125. }
  126. Map<String, String> partitionMap = new HashMap<String, String>();
  127. partitionMap.put("part1", "p1value5");
  128. runMRCreate(partitionMap, partitionColumns, writeRecords, 10);
  129. tableSchema = getTableSchema();
  130. //assert that c3 has got added to table schema
  131. assertEquals(4, tableSchema.getFields().size());
  132. assertEquals("c1", tableSchema.getFields().get(0).getName());
  133. assertEquals("c2", tableSchema.getFields().get(1).getName());
  134. assertEquals("c3", tableSchema.getFields().get(2).getName());
  135. assertEquals("part1", tableSchema.getFields().get(3).getName());
  136. //Test that changing column data type fails
  137. partitionMap.clear();
  138. partitionMap.put("part1", "p1value6");
  139. partitionColumns = new ArrayList<HCatFieldSchema>();
  140. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", Constants.INT_TYPE_NAME, "")));
  141. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", Constants.INT_TYPE_NAME, "")));
  142. IOException exc = null;
  143. try {
  144. runMRCreate(partitionMap, partitionColumns, writeRecords, 20);
  145. } catch(IOException e) {
  146. exc = e;
  147. }
  148. assertTrue(exc != null);
  149. assertTrue(exc instanceof HCatException);
  150. assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType());
  151. //Test that partition key is not allowed in data
  152. partitionColumns = new ArrayList<HCatFieldSchema>();
  153. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", Constants.INT_TYPE_NAME, "")));
  154. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", Constants.STRING_TYPE_NAME, "")));
  155. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", Constants.STRING_TYPE_NAME, "")));
  156. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", Constants.STRING_TYPE_NAME, "")));
  157. List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20);
  158. for(int i = 0;i < 20;i++) {
  159. List<Object> objList = new ArrayList<Object>();
  160. objList.add(i);
  161. objList.add("c2value" + i);
  162. objList.add("c3value" + i);
  163. objList.add("p1value6");
  164. recordsContainingPartitionCols.add(new DefaultHCatRecord(objList));
  165. }
  166. exc = null;
  167. try {
  168. runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20);
  169. } catch(IOException e) {
  170. exc = e;
  171. }
  172. List<HCatRecord> records= runMRRead(20,"part1 = \"p1value6\"");
  173. assertEquals(20, records.size());
  174. Integer i =0;
  175. for(HCatRecord rec : records){
  176. assertEquals(4, rec.size());
  177. assertTrue(rec.get(0).equals(i));
  178. assertTrue(rec.get(1).equals("c2value"+i));
  179. assertTrue(rec.get(2).equals("c3value"+i));
  180. assertTrue(rec.get(3).equals("p1value6"));
  181. i++;
  182. }
  183. }
  184. //check behavior while change the order of columns
  185. private void columnOrderChangeTest() throws Exception {
  186. HCatSchema tableSchema = getTableSchema();
  187. assertEquals(4, tableSchema.getFields().size());
  188. partitionColumns = new ArrayList<HCatFieldSchema>();
  189. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", Constants.INT_TYPE_NAME, "")));
  190. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", Constants.STRING_TYPE_NAME, "")));
  191. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", Constants.STRING_TYPE_NAME, "")));
  192. writeRecords = new ArrayList<HCatRecord>();
  193. for(int i = 0;i < 10;i++) {
  194. List<Object> objList = new ArrayList<Object>();
  195. objList.add(i);
  196. objList.add("co strvalue" + i);
  197. objList.add("co str2value" + i);
  198. writeRecords.add(new DefaultHCatRecord(objList));
  199. }
  200. Map<String, String> partitionMap = new HashMap<String, String>();
  201. partitionMap.put("part1", "p1value8");
  202. Exception exc = null;
  203. try {
  204. runMRCreate(partitionMap, partitionColumns, writeRecords, 10);
  205. } catch(IOException e) {
  206. exc = e;
  207. }
  208. assertTrue(exc != null);
  209. assertTrue(exc instanceof HCatException);
  210. assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());
  211. partitionColumns = new ArrayList<HCatFieldSchema>();
  212. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", Constants.INT_TYPE_NAME, "")));
  213. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", Constants.STRING_TYPE_NAME, "")));
  214. writeRecords = new ArrayList<HCatRecord>();
  215. for(int i = 0;i < 10;i++) {
  216. List<Object> objList = new ArrayList<Object>();
  217. objList.add(i);
  218. objList.add("co strvalue" + i);
  219. writeRecords.add(new DefaultHCatRecord(objList));
  220. }
  221. runMRCreate(partitionMap, partitionColumns, writeRecords, 10);
  222. //Read should get 10 + 20 + 10 + 10 + 20 rows
  223. runMRRead(70);
  224. }
  225. //Test that data inserted through howloutputformat is readable from hive
  226. private void hiveReadTest() throws Exception {
  227. String query = "select * from " + tableName;
  228. int retCode = driver.run(query).getResponseCode();
  229. if( retCode != 0 ) {
  230. throw new Exception("Error " + retCode + " running query " + query);
  231. }
  232. ArrayList<String> res = new ArrayList<String>();
  233. driver.getResults(res);
  234. assertEquals(70, res.size());
  235. }
  236. }