PageRenderTime 48ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java

#
Java | 159 lines | 114 code | 19 blank | 26 comment | 18 complexity | 2d2d6ae0c4cfa71eadc2140a5da0154b MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.udf.generic;
  19. import java.util.ArrayList;
  20. import org.apache.commons.logging.Log;
  21. import org.apache.commons.logging.LogFactory;
  22. import org.apache.hadoop.hive.ql.exec.Description;
  23. import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
  24. import org.apache.hadoop.hive.ql.metadata.HiveException;
  25. import org.apache.hadoop.hive.serde.Constants;
  26. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  27. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
  28. import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
  29. import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
  30. import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
  31. import org.apache.hadoop.io.Text;
  32. import org.json.JSONException;
  33. import org.json.JSONObject;
  34. /**
  35. * GenericUDTFJSONTuple: this
  36. *
  37. */
  38. @Description(name = "json_tuple",
  39. value = "_FUNC_(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple names and return a tuple. " +
  40. "All the input parameters and output column types are string.")
  41. public class GenericUDTFJSONTuple extends GenericUDTF {
  42. private static Log LOG = LogFactory.getLog(GenericUDTFJSONTuple.class.getName());
  43. int numCols; // number of output columns
  44. String[] paths; // array of path expressions, each of which corresponds to a column
  45. Text[] retCols; // array of returned column values
  46. Text[] cols; // object pool of non-null Text, avoid creating objects all the time
  47. Object[] nullCols; // array of null column values
  48. ObjectInspector[] inputOIs; // input ObjectInspectors
  49. boolean pathParsed = false;
  50. boolean seenErrors = false;
  51. @Override
  52. public void close() throws HiveException {
  53. }
  54. @Override
  55. public StructObjectInspector initialize(ObjectInspector[] args)
  56. throws UDFArgumentException {
  57. inputOIs = args;
  58. numCols = args.length - 1;
  59. if (numCols < 1) {
  60. throw new UDFArgumentException("json_tuple() takes at least two arguments: " +
  61. "the json string and a path expression");
  62. }
  63. for (int i = 0; i < args.length; ++i) {
  64. if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE ||
  65. !args[i].getTypeName().equals(Constants.STRING_TYPE_NAME)) {
  66. throw new UDFArgumentException("json_tuple()'s arguments have to be string type");
  67. }
  68. }
  69. seenErrors = false;
  70. pathParsed = false;
  71. paths = new String[numCols];
  72. cols = new Text[numCols];
  73. retCols = new Text[numCols];
  74. nullCols = new Object[numCols];
  75. for (int i = 0; i < numCols; ++i) {
  76. cols[i] = new Text();
  77. retCols[i] = cols[i];
  78. nullCols[i] = null;
  79. }
  80. // construct output object inspector
  81. ArrayList<String> fieldNames = new ArrayList<String>(numCols);
  82. ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numCols);
  83. for (int i = 0; i < numCols; ++i) {
  84. // column name can be anything since it will be named by UDTF as clause
  85. fieldNames.add("c" + i);
  86. // all returned type will be Text
  87. fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
  88. }
  89. return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
  90. }
  91. @Override
  92. public void process(Object[] o) throws HiveException {
  93. if (o[0] == null) {
  94. forward(nullCols);
  95. return;
  96. }
  97. // get the path expression for the 1st row only
  98. if (!pathParsed) {
  99. for (int i = 0;i < numCols; ++i) {
  100. paths[i] = ((StringObjectInspector) inputOIs[i+1]).getPrimitiveJavaObject(o[i+1]);
  101. }
  102. pathParsed = true;
  103. }
  104. String jsonStr = ((StringObjectInspector) inputOIs[0]).getPrimitiveJavaObject(o[0]);
  105. if (jsonStr == null) {
  106. forward(nullCols);
  107. return;
  108. }
  109. try {
  110. JSONObject jsonObj = new JSONObject(jsonStr);
  111. for (int i = 0; i < numCols; ++i) {
  112. if (jsonObj.isNull(paths[i])) {
  113. retCols[i] = null;
  114. } else {
  115. if (retCols[i] == null) {
  116. retCols[i] = cols[i]; // use the object pool rather than creating a new object
  117. }
  118. retCols[i].set(jsonObj.getString(paths[i]));
  119. }
  120. }
  121. forward(retCols);
  122. return;
  123. } catch (JSONException e) {
  124. // parsing error, invalid JSON string
  125. if (!seenErrors) {
  126. LOG.error("The input is not a valid JSON string: " + jsonStr + ". Skipping such error messages in the future.");
  127. seenErrors = true;
  128. }
  129. forward(nullCols);
  130. return;
  131. } catch (Throwable e) {
  132. LOG.error("JSON parsing/evaluation exception" + e);
  133. forward(nullCols);
  134. }
  135. }
  136. @Override
  137. public String toString() {
  138. return "json_tuple";
  139. }
  140. }