PageRenderTime 46ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/Deserializer.java

https://github.com/apache/hive
Java | 280 lines | 191 code | 46 blank | 43 comment | 16 complexity | a7de2732872b7b051f16a1de954147ff MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing,
  13. * software distributed under the License is distributed on an
  14. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. * KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations
  17. * under the License.
  18. */
  19. package org.apache.iceberg.mr.hive;
  20. import java.util.ArrayList;
  21. import java.util.HashMap;
  22. import java.util.List;
  23. import java.util.Map;
  24. import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
  25. import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
  26. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  27. import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
  28. import org.apache.hadoop.hive.serde2.objectinspector.StructField;
  29. import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
  30. import org.apache.iceberg.Schema;
  31. import org.apache.iceberg.data.GenericRecord;
  32. import org.apache.iceberg.data.Record;
  33. import org.apache.iceberg.mr.hive.serde.objectinspector.WriteObjectInspector;
  34. import org.apache.iceberg.relocated.com.google.common.collect.Maps;
  35. import org.apache.iceberg.schema.SchemaWithPartnerVisitor;
  36. import org.apache.iceberg.types.Type.PrimitiveType;
  37. import org.apache.iceberg.types.Types.ListType;
  38. import org.apache.iceberg.types.Types.MapType;
  39. import org.apache.iceberg.types.Types.NestedField;
  40. import org.apache.iceberg.types.Types.StructType;
  41. class Deserializer {
  42. private FieldDeserializer fieldDeserializer;
  43. /**
  44. * Builder to create a Deserializer instance.
  45. * Requires an Iceberg Schema and the Hive ObjectInspector for converting the data.
  46. */
  47. static class Builder {
  48. private Schema schema;
  49. private StructObjectInspector writerInspector;
  50. private StructObjectInspector sourceInspector;
  51. Builder schema(Schema mainSchema) {
  52. this.schema = mainSchema;
  53. return this;
  54. }
  55. Builder writerInspector(StructObjectInspector inspector) {
  56. this.writerInspector = inspector;
  57. return this;
  58. }
  59. Builder sourceInspector(StructObjectInspector inspector) {
  60. this.sourceInspector = inspector;
  61. return this;
  62. }
  63. Deserializer build() {
  64. return new Deserializer(schema, new ObjectInspectorPair(writerInspector, sourceInspector));
  65. }
  66. }
  67. /**
  68. * Deserializes the Hive result object to an Iceberg record using the provided ObjectInspectors.
  69. * @param data The Hive data to deserialize
  70. * @return The resulting Iceberg Record
  71. */
  72. Record deserialize(Object data) {
  73. return (Record) fieldDeserializer.value(data);
  74. }
  75. private Deserializer(Schema schema, ObjectInspectorPair pair) {
  76. this.fieldDeserializer = DeserializerVisitor.visit(schema, pair);
  77. }
  78. private static class DeserializerVisitor extends SchemaWithPartnerVisitor<ObjectInspectorPair, FieldDeserializer> {
  79. public static FieldDeserializer visit(Schema schema, ObjectInspectorPair pair) {
  80. return visit(schema, new FixNameMappingObjectInspectorPair(schema, pair), new DeserializerVisitor(),
  81. new PartnerObjectInspectorByNameAccessors());
  82. }
  83. @Override
  84. public FieldDeserializer schema(Schema schema, ObjectInspectorPair pair, FieldDeserializer deserializer) {
  85. return deserializer;
  86. }
  87. @Override
  88. public FieldDeserializer field(NestedField field, ObjectInspectorPair pair, FieldDeserializer deserializer) {
  89. return deserializer;
  90. }
  91. @Override
  92. public FieldDeserializer primitive(PrimitiveType type, ObjectInspectorPair pair) {
  93. return o -> {
  94. if (o == null) {
  95. return null;
  96. }
  97. ObjectInspector writerFieldInspector = pair.writerInspector();
  98. ObjectInspector sourceFieldInspector = pair.sourceInspector();
  99. Object result = ((PrimitiveObjectInspector) sourceFieldInspector).getPrimitiveJavaObject(o);
  100. if (writerFieldInspector instanceof WriteObjectInspector) {
  101. // If we have a conversion method defined for the ObjectInspector then convert
  102. result = ((WriteObjectInspector) writerFieldInspector).convert(result);
  103. }
  104. return result;
  105. };
  106. }
  107. @Override
  108. public FieldDeserializer struct(StructType type, ObjectInspectorPair pair, List<FieldDeserializer> deserializers) {
  109. return o -> {
  110. if (o == null) {
  111. return null;
  112. }
  113. List<Object> data = ((StructObjectInspector) pair.sourceInspector()).getStructFieldsDataAsList(o);
  114. Record result = GenericRecord.create(type);
  115. for (int i = 0; i < deserializers.size(); i++) {
  116. Object fieldValue = data.get(i);
  117. if (fieldValue != null) {
  118. result.set(i, deserializers.get(i).value(fieldValue));
  119. } else {
  120. result.set(i, null);
  121. }
  122. }
  123. return result;
  124. };
  125. }
  126. @Override
  127. public FieldDeserializer list(ListType listTypeInfo, ObjectInspectorPair pair, FieldDeserializer deserializer) {
  128. return o -> {
  129. if (o == null) {
  130. return null;
  131. }
  132. List<Object> result = new ArrayList<>();
  133. ListObjectInspector listInspector = (ListObjectInspector) pair.sourceInspector();
  134. for (Object val : listInspector.getList(o)) {
  135. result.add(deserializer.value(val));
  136. }
  137. return result;
  138. };
  139. }
  140. @Override
  141. public FieldDeserializer map(MapType mapType, ObjectInspectorPair pair, FieldDeserializer keyDeserializer,
  142. FieldDeserializer valueDeserializer) {
  143. return o -> {
  144. if (o == null) {
  145. return null;
  146. }
  147. Map<Object, Object> result = new HashMap<>();
  148. MapObjectInspector mapObjectInspector = (MapObjectInspector) pair.sourceInspector();
  149. for (Map.Entry<?, ?> entry : mapObjectInspector.getMap(o).entrySet()) {
  150. result.put(keyDeserializer.value(entry.getKey()), valueDeserializer.value(entry.getValue()));
  151. }
  152. return result;
  153. };
  154. }
  155. }
  156. private static class PartnerObjectInspectorByNameAccessors
  157. implements SchemaWithPartnerVisitor.PartnerAccessors<ObjectInspectorPair> {
  158. @Override
  159. public ObjectInspectorPair fieldPartner(ObjectInspectorPair pair, int fieldId, String name) {
  160. String sourceName = pair.sourceName(name);
  161. return new ObjectInspectorPair(
  162. ((StructObjectInspector) pair.writerInspector()).getStructFieldRef(name).getFieldObjectInspector(),
  163. ((StructObjectInspector) pair.sourceInspector()).getStructFieldRef(sourceName).getFieldObjectInspector());
  164. }
  165. @Override
  166. public ObjectInspectorPair mapKeyPartner(ObjectInspectorPair pair) {
  167. return new ObjectInspectorPair(
  168. ((MapObjectInspector) pair.writerInspector()).getMapKeyObjectInspector(),
  169. ((MapObjectInspector) pair.sourceInspector()).getMapKeyObjectInspector());
  170. }
  171. @Override
  172. public ObjectInspectorPair mapValuePartner(ObjectInspectorPair pair) {
  173. return new ObjectInspectorPair(
  174. ((MapObjectInspector) pair.writerInspector()).getMapValueObjectInspector(),
  175. ((MapObjectInspector) pair.sourceInspector()).getMapValueObjectInspector());
  176. }
  177. @Override
  178. public ObjectInspectorPair listElementPartner(ObjectInspectorPair pair) {
  179. return new ObjectInspectorPair(
  180. ((ListObjectInspector) pair.writerInspector()).getListElementObjectInspector(),
  181. ((ListObjectInspector) pair.sourceInspector()).getListElementObjectInspector());
  182. }
  183. }
  184. private interface FieldDeserializer {
  185. Object value(Object object);
  186. }
  187. /**
  188. * Hive query results schema column names do not match the target Iceberg column names.
  189. * Instead we have to rely on the column order. To keep the other parts of the code generic we fix this with a
  190. * wrapper around the ObjectInspectorPair. This wrapper maps the Iceberg schema column names instead of the Hive
  191. * column names.
  192. */
  193. private static class FixNameMappingObjectInspectorPair extends ObjectInspectorPair {
  194. private final Map<String, String> sourceNameMap;
  195. FixNameMappingObjectInspectorPair(Schema schema, ObjectInspectorPair pair) {
  196. super(pair.writerInspector(), pair.sourceInspector());
  197. this.sourceNameMap = Maps.newHashMapWithExpectedSize(schema.columns().size());
  198. List<? extends StructField> fields = ((StructObjectInspector) sourceInspector()).getAllStructFieldRefs();
  199. for (int i = 0; i < schema.columns().size(); ++i) {
  200. sourceNameMap.put(schema.columns().get(i).name(), fields.get(i).getFieldName());
  201. }
  202. }
  203. @Override
  204. String sourceName(String originalName) {
  205. return sourceNameMap.get(originalName);
  206. }
  207. }
  208. /**
  209. * To get the data for Iceberg {@link Record}s we have to use both ObjectInspectors.
  210. * <p>
  211. * We use the Hive ObjectInspectors (sourceInspector) to get the Hive primitive types.
  212. * <p>
  213. * We use the Iceberg ObjectInspectors (writerInspector) only if conversion is needed for
  214. * generating the correct type for Iceberg Records. See: {@link WriteObjectInspector} interface on the provided
  215. * writerInspector.
  216. */
  217. private static class ObjectInspectorPair {
  218. private ObjectInspector writerInspector;
  219. private ObjectInspector sourceInspector;
  220. ObjectInspectorPair(ObjectInspector writerInspector, ObjectInspector sourceInspector) {
  221. this.writerInspector = writerInspector;
  222. this.sourceInspector = sourceInspector;
  223. }
  224. ObjectInspector writerInspector() {
  225. return writerInspector;
  226. }
  227. ObjectInspector sourceInspector() {
  228. return sourceInspector;
  229. }
  230. String sourceName(String originalName) {
  231. return originalName;
  232. }
  233. }
  234. }