PageRenderTime 10ms CodeModel.GetById 1ms app.highlight 7ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java

#
Java | 159 lines | 114 code | 19 blank | 26 comment | 18 complexity | 2d2d6ae0c4cfa71eadc2140a5da0154b MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.udf.generic;
 20
 21import java.util.ArrayList;
 22
 23import org.apache.commons.logging.Log;
 24import org.apache.commons.logging.LogFactory;
 25import org.apache.hadoop.hive.ql.exec.Description;
 26import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 27import org.apache.hadoop.hive.ql.metadata.HiveException;
 28import org.apache.hadoop.hive.serde.Constants;
 29import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 30import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 31import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 32import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 33import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 34import org.apache.hadoop.io.Text;
 35import org.json.JSONException;
 36import org.json.JSONObject;
 37/**
 38 * GenericUDTFJSONTuple: this
 39 *
 40 */
 41@Description(name = "json_tuple",
 42    value = "_FUNC_(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple names and return a tuple. " +
 43    		"All the input parameters and output column types are string.")
 44
 45public class GenericUDTFJSONTuple extends GenericUDTF {
 46
 47  private static Log LOG = LogFactory.getLog(GenericUDTFJSONTuple.class.getName());
 48
 49  int numCols;    // number of output columns
 50  String[] paths; // array of path expressions, each of which corresponds to a column
 51  Text[] retCols; // array of returned column values
 52  Text[] cols;    // object pool of non-null Text, avoid creating objects all the time
 53  Object[] nullCols; // array of null column values
 54  ObjectInspector[] inputOIs; // input ObjectInspectors
 55  boolean pathParsed = false;
 56  boolean seenErrors = false;
 57
 58  @Override
 59  public void close() throws HiveException {
 60  }
 61
 62  @Override
 63  public StructObjectInspector initialize(ObjectInspector[] args)
 64      throws UDFArgumentException {
 65
 66    inputOIs = args;
 67    numCols = args.length - 1;
 68
 69    if (numCols < 1) {
 70      throw new UDFArgumentException("json_tuple() takes at least two arguments: " +
 71      		"the json string and a path expression");
 72    }
 73
 74    for (int i = 0; i < args.length; ++i) {
 75      if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE ||
 76          !args[i].getTypeName().equals(Constants.STRING_TYPE_NAME)) {
 77        throw new UDFArgumentException("json_tuple()'s arguments have to be string type");
 78      }
 79    }
 80
 81    seenErrors = false;
 82    pathParsed = false;
 83    paths = new String[numCols];
 84    cols = new Text[numCols];
 85    retCols = new Text[numCols];
 86    nullCols = new Object[numCols];
 87
 88    for (int i = 0; i < numCols; ++i) {
 89      cols[i] = new Text();
 90      retCols[i] = cols[i];
 91      nullCols[i] = null;
 92    }
 93
 94    // construct output object inspector
 95    ArrayList<String> fieldNames = new ArrayList<String>(numCols);
 96    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numCols);
 97    for (int i = 0; i < numCols; ++i) {
 98      // column name can be anything since it will be named by UDTF as clause
 99      fieldNames.add("c" + i);
100      // all returned type will be Text
101      fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
102    }
103    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
104  }
105
106  @Override
107  public void process(Object[] o) throws HiveException {
108
109    if (o[0] == null) {
110      forward(nullCols);
111      return;
112    }
113    // get the path expression for the 1st row only
114    if (!pathParsed) {
115      for (int i = 0;i < numCols; ++i) {
116        paths[i] = ((StringObjectInspector) inputOIs[i+1]).getPrimitiveJavaObject(o[i+1]);
117      }
118      pathParsed = true;
119    }
120
121    String jsonStr = ((StringObjectInspector) inputOIs[0]).getPrimitiveJavaObject(o[0]);
122    if (jsonStr == null) {
123      forward(nullCols);
124      return;
125    }
126    try {
127      JSONObject jsonObj = new JSONObject(jsonStr);
128
129      for (int i = 0; i < numCols; ++i) {
130        if (jsonObj.isNull(paths[i])) {
131          retCols[i] = null;
132        } else {
133          if (retCols[i] == null) {
134            retCols[i] = cols[i]; // use the object pool rather than creating a new object
135          }
136          retCols[i].set(jsonObj.getString(paths[i]));
137        }
138      }
139      forward(retCols);
140      return;
141    } catch (JSONException e) {
142      // parsing error, invalid JSON string
143      if (!seenErrors) {
144        LOG.error("The input is not a valid JSON string: " + jsonStr + ". Skipping such error messages in the future.");
145        seenErrors = true;
146      }
147      forward(nullCols);
148      return;
149    } catch (Throwable e) {
150      LOG.error("JSON parsing/evaluation exception" + e);
151      forward(nullCols);
152    }
153  }
154
155  @Override
156  public String toString() {
157    return "json_tuple";
158  }
159}