PageRenderTime 50ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java

http://github.com/apache/hive
Java | 354 lines | 234 code | 81 blank | 39 comment | 14 complexity | 2a9217c2b6ff828903be422c77214ce3 MD5 | raw file
Possible License(s): Apache-2.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing,
  13. * software distributed under the License is distributed on an
  14. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. * KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations
  17. * under the License.
  18. */
  19. package org.apache.hcatalog.mapreduce;
  20. import java.io.IOException;
  21. import java.util.ArrayList;
  22. import java.util.HashMap;
  23. import java.util.List;
  24. import java.util.Map;
  25. import org.apache.hadoop.hive.metastore.api.FieldSchema;
  26. import org.apache.hadoop.hive.serde.serdeConstants;
  27. import org.apache.hcatalog.common.ErrorType;
  28. import org.apache.hcatalog.common.HCatException;
  29. import org.apache.hcatalog.data.DefaultHCatRecord;
  30. import org.apache.hcatalog.data.HCatRecord;
  31. import org.apache.hcatalog.data.schema.HCatFieldSchema;
  32. import org.apache.hcatalog.data.schema.HCatSchema;
  33. import org.apache.hcatalog.data.schema.HCatSchemaUtils;
  34. import org.junit.BeforeClass;
  35. import org.junit.Test;
  36. import static junit.framework.Assert.assertEquals;
  37. import static org.junit.Assert.assertTrue;
  38. /**
  39. * @deprecated Use/modify {@link org.apache.hive.hcatalog.mapreduce.TestHCatPartitioned} instead
  40. */
  41. public class TestHCatPartitioned extends HCatMapReduceTest {
  42. private static List<HCatRecord> writeRecords;
  43. private static List<HCatFieldSchema> partitionColumns;
  44. @BeforeClass
  45. public static void oneTimeSetUp() throws Exception {
  46. tableName = "testHCatPartitionedTable";
  47. writeRecords = new ArrayList<HCatRecord>();
  48. for (int i = 0; i < 20; i++) {
  49. List<Object> objList = new ArrayList<Object>();
  50. objList.add(i);
  51. objList.add("strvalue" + i);
  52. writeRecords.add(new DefaultHCatRecord(objList));
  53. }
  54. partitionColumns = new ArrayList<HCatFieldSchema>();
  55. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
  56. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
  57. }
  58. @Override
  59. protected List<FieldSchema> getPartitionKeys() {
  60. List<FieldSchema> fields = new ArrayList<FieldSchema>();
  61. //Defining partition names in unsorted order
  62. fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, ""));
  63. fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, ""));
  64. return fields;
  65. }
  66. @Override
  67. protected List<FieldSchema> getTableColumns() {
  68. List<FieldSchema> fields = new ArrayList<FieldSchema>();
  69. fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""));
  70. fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""));
  71. return fields;
  72. }
  73. @Test
  74. public void testHCatPartitionedTable() throws Exception {
  75. Map<String, String> partitionMap = new HashMap<String, String>();
  76. partitionMap.put("part1", "p1value1");
  77. partitionMap.put("part0", "p0value1");
  78. runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
  79. partitionMap.clear();
  80. partitionMap.put("PART1", "p1value2");
  81. partitionMap.put("PART0", "p0value2");
  82. runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
  83. //Test for duplicate publish
  84. IOException exc = null;
  85. try {
  86. runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
  87. } catch (IOException e) {
  88. exc = e;
  89. }
  90. assertTrue(exc != null);
  91. assertTrue(exc instanceof HCatException);
  92. assertEquals(ErrorType.ERROR_DUPLICATE_PARTITION, ((HCatException) exc).getErrorType());
  93. //Test for publish with invalid partition key name
  94. exc = null;
  95. partitionMap.clear();
  96. partitionMap.put("px1", "p1value2");
  97. partitionMap.put("px0", "p0value2");
  98. try {
  99. runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
  100. } catch (IOException e) {
  101. exc = e;
  102. }
  103. assertTrue(exc != null);
  104. assertTrue(exc instanceof HCatException);
  105. assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType());
  106. //Test for publish with missing partition key values
  107. exc = null;
  108. partitionMap.clear();
  109. partitionMap.put("px", "p1value2");
  110. try {
  111. runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
  112. } catch (IOException e) {
  113. exc = e;
  114. }
  115. assertTrue(exc != null);
  116. assertTrue(exc instanceof HCatException);
  117. assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType());
  118. //Test for null partition value map
  119. exc = null;
  120. try {
  121. runMRCreate(null, partitionColumns, writeRecords, 20, false);
  122. } catch (IOException e) {
  123. exc = e;
  124. }
  125. assertTrue(exc == null);
  126. // assertTrue(exc instanceof HCatException);
  127. // assertEquals(ErrorType.ERROR_PUBLISHING_PARTITION, ((HCatException) exc).getErrorType());
  128. // With Dynamic partitioning, this isn't an error that the keyValues specified didn't values
  129. //Read should get 10 + 20 rows
  130. runMRRead(30);
  131. //Read with partition filter
  132. runMRRead(10, "part1 = \"p1value1\"");
  133. runMRRead(20, "part1 = \"p1value2\"");
  134. runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\"");
  135. runMRRead(10, "part0 = \"p0value1\"");
  136. runMRRead(20, "part0 = \"p0value2\"");
  137. runMRRead(30, "part0 = \"p0value1\" or part0 = \"p0value2\"");
  138. tableSchemaTest();
  139. columnOrderChangeTest();
  140. hiveReadTest();
  141. }
  142. //test that new columns gets added to table schema
  143. private void tableSchemaTest() throws Exception {
  144. HCatSchema tableSchema = getTableSchema();
  145. assertEquals(4, tableSchema.getFields().size());
  146. //Update partition schema to have 3 fields
  147. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
  148. writeRecords = new ArrayList<HCatRecord>();
  149. for (int i = 0; i < 20; i++) {
  150. List<Object> objList = new ArrayList<Object>();
  151. objList.add(i);
  152. objList.add("strvalue" + i);
  153. objList.add("str2value" + i);
  154. writeRecords.add(new DefaultHCatRecord(objList));
  155. }
  156. Map<String, String> partitionMap = new HashMap<String, String>();
  157. partitionMap.put("part1", "p1value5");
  158. partitionMap.put("part0", "p0value5");
  159. runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
  160. tableSchema = getTableSchema();
  161. //assert that c3 has got added to table schema
  162. assertEquals(5, tableSchema.getFields().size());
  163. assertEquals("c1", tableSchema.getFields().get(0).getName());
  164. assertEquals("c2", tableSchema.getFields().get(1).getName());
  165. assertEquals("c3", tableSchema.getFields().get(2).getName());
  166. assertEquals("part1", tableSchema.getFields().get(3).getName());
  167. assertEquals("part0", tableSchema.getFields().get(4).getName());
  168. //Test that changing column data type fails
  169. partitionMap.clear();
  170. partitionMap.put("part1", "p1value6");
  171. partitionMap.put("part0", "p0value6");
  172. partitionColumns = new ArrayList<HCatFieldSchema>();
  173. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
  174. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, "")));
  175. IOException exc = null;
  176. try {
  177. runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
  178. } catch (IOException e) {
  179. exc = e;
  180. }
  181. assertTrue(exc != null);
  182. assertTrue(exc instanceof HCatException);
  183. assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType());
  184. //Test that partition key is not allowed in data
  185. partitionColumns = new ArrayList<HCatFieldSchema>();
  186. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
  187. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
  188. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
  189. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, "")));
  190. List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20);
  191. for (int i = 0; i < 20; i++) {
  192. List<Object> objList = new ArrayList<Object>();
  193. objList.add(i);
  194. objList.add("c2value" + i);
  195. objList.add("c3value" + i);
  196. objList.add("p1value6");
  197. recordsContainingPartitionCols.add(new DefaultHCatRecord(objList));
  198. }
  199. exc = null;
  200. try {
  201. runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true);
  202. } catch (IOException e) {
  203. exc = e;
  204. }
  205. List<HCatRecord> records = runMRRead(20, "part1 = \"p1value6\"");
  206. assertEquals(20, records.size());
  207. records = runMRRead(20, "part0 = \"p0value6\"");
  208. assertEquals(20, records.size());
  209. Integer i = 0;
  210. for (HCatRecord rec : records) {
  211. assertEquals(5, rec.size());
  212. assertTrue(rec.get(0).equals(i));
  213. assertTrue(rec.get(1).equals("c2value" + i));
  214. assertTrue(rec.get(2).equals("c3value" + i));
  215. assertTrue(rec.get(3).equals("p1value6"));
  216. assertTrue(rec.get(4).equals("p0value6"));
  217. i++;
  218. }
  219. }
  220. //check behavior while change the order of columns
  221. private void columnOrderChangeTest() throws Exception {
  222. HCatSchema tableSchema = getTableSchema();
  223. assertEquals(5, tableSchema.getFields().size());
  224. partitionColumns = new ArrayList<HCatFieldSchema>();
  225. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
  226. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
  227. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
  228. writeRecords = new ArrayList<HCatRecord>();
  229. for (int i = 0; i < 10; i++) {
  230. List<Object> objList = new ArrayList<Object>();
  231. objList.add(i);
  232. objList.add("co strvalue" + i);
  233. objList.add("co str2value" + i);
  234. writeRecords.add(new DefaultHCatRecord(objList));
  235. }
  236. Map<String, String> partitionMap = new HashMap<String, String>();
  237. partitionMap.put("part1", "p1value8");
  238. partitionMap.put("part0", "p0value8");
  239. Exception exc = null;
  240. try {
  241. runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
  242. } catch (IOException e) {
  243. exc = e;
  244. }
  245. assertTrue(exc != null);
  246. assertTrue(exc instanceof HCatException);
  247. assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());
  248. partitionColumns = new ArrayList<HCatFieldSchema>();
  249. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
  250. partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
  251. writeRecords = new ArrayList<HCatRecord>();
  252. for (int i = 0; i < 10; i++) {
  253. List<Object> objList = new ArrayList<Object>();
  254. objList.add(i);
  255. objList.add("co strvalue" + i);
  256. writeRecords.add(new DefaultHCatRecord(objList));
  257. }
  258. runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
  259. //Read should get 10 + 20 + 10 + 10 + 20 rows
  260. runMRRead(70);
  261. }
  262. //Test that data inserted through hcatoutputformat is readable from hive
  263. private void hiveReadTest() throws Exception {
  264. String query = "select * from " + tableName;
  265. int retCode = driver.run(query).getResponseCode();
  266. if (retCode != 0) {
  267. throw new Exception("Error " + retCode + " running query " + query);
  268. }
  269. ArrayList<String> res = new ArrayList<String>();
  270. driver.getResults(res);
  271. assertEquals(70, res.size());
  272. }
  273. }