/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
# · Java · 252 lines · 179 code · 27 blank · 46 comment · 40 complexity · 96c4aaa4b6c7cfe89ca956e05d2cfe66 MD5 · raw file
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hadoop.hive.ql.udf;
- import java.util.ArrayList;
- import java.util.Iterator;
- import java.util.LinkedHashMap;
- import java.util.Map;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import org.apache.hadoop.hive.ql.exec.Description;
- import org.apache.hadoop.hive.ql.exec.UDF;
- import org.apache.hadoop.io.Text;
- import org.json.JSONArray;
- import org.json.JSONException;
- import org.json.JSONObject;
- /**
- * UDFJson.
- *
- */
- @Description(name = "get_json_object",
- value = "_FUNC_(json_txt, path) - Extract a json object from path ",
- extended = "Extract json object from a json string based on json path "
- + "specified, and return json string of the extracted json object. It "
- + "will return null if the input json string is invalid.\n"
- + "A limited version of JSONPath supported:\n"
- + " $ : Root object\n"
- + " . : Child operator\n"
- + " [] : Subscript operator for array\n"
- + " * : Wildcard for []\n"
- + "Syntax not supported that's worth noticing:\n"
- + " '' : Zero length string as key\n"
- + " .. : Recursive descent\n"
- + " @ : Current object/element\n"
- + " () : Script expression\n"
- + " ?() : Filter (script) expression.\n"
- + " [,] : Union operator\n"
- + " [start:end:step] : array slice operator\n")
- public class UDFJson extends UDF {
- private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-]+).*");
- private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]");
- // An LRU cache using a linked hash map
- static class HashCache<K, V> extends LinkedHashMap<K, V> {
- private static final int CACHE_SIZE = 16;
- private static final int INIT_SIZE = 32;
- private static final float LOAD_FACTOR = 0.6f;
- HashCache() {
- super(INIT_SIZE, LOAD_FACTOR);
- }
- private static final long serialVersionUID = 1;
- @Override
- protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
- return size() > CACHE_SIZE;
- }
- }
- static Map<String, Object> extractObjectCache = new HashCache<String, Object>();
- static Map<String, String[]> pathExprCache = new HashCache<String, String[]>();
- static Map<String, ArrayList<String>> indexListCache = new HashCache<String, ArrayList<String>>();
- static Map<String, String> mKeyGroup1Cache = new HashCache<String, String>();
- static Map<String, Boolean> mKeyMatchesCache = new HashCache<String, Boolean>();
- Text result = new Text();
- public UDFJson() {
- }
- /**
- * Extract json object from a json string based on json path specified, and
- * return json string of the extracted json object. It will return null if the
- * input json string is invalid.
- *
- * A limited version of JSONPath supported: $ : Root object . : Child operator
- * [] : Subscript operator for array * : Wildcard for []
- *
- * Syntax not supported that's worth noticing: '' : Zero length string as key
- * .. : Recursive descent &#064; : Current object/element () : Script
- * expression ?() : Filter (script) expression. [,] : Union operator
- * [start:end:step] : array slice operator
- *
- * @param jsonString
- * the json string.
- * @param pathString
- * the json path expression.
- * @return json string or null when an error happens.
- */
- public Text evaluate(String jsonString, String pathString) {
- if (jsonString == null || jsonString == "" || pathString == null
- || pathString == "") {
- return null;
- }
- try {
- // Cache pathExpr
- String[] pathExpr = pathExprCache.get(pathString);
- if (pathExpr == null) {
- pathExpr = pathString.split("\\.", -1);
- pathExprCache.put(pathString, pathExpr);
- }
- if (!pathExpr[0].equalsIgnoreCase("$")) {
- return null;
- }
- // Cache extractObject
- Object extractObject = extractObjectCache.get(jsonString);
- if (extractObject == null) {
- extractObject = new JSONObject(jsonString);
- extractObjectCache.put(jsonString, extractObject);
- }
- for (int i = 1; i < pathExpr.length; i++) {
- extractObject = extract(extractObject, pathExpr[i]);
- }
- result.set(extractObject.toString());
- return result;
- } catch (Exception e) {
- return null;
- }
- }
- private Object extract(Object json, String path) throws JSONException {
- // Cache patternkey.matcher(path).matches()
- Matcher mKey = null;
- Boolean mKeyMatches = mKeyMatchesCache.get(path);
- if (mKeyMatches == null) {
- mKey = patternKey.matcher(path);
- mKeyMatches = mKey.matches() ? Boolean.TRUE : Boolean.FALSE;
- mKeyMatchesCache.put(path, mKeyMatches);
- }
- if (!mKeyMatches.booleanValue()) {
- return null;
- }
- // Cache mkey.group(1)
- String mKeyGroup1 = mKeyGroup1Cache.get(path);
- if (mKeyGroup1 == null) {
- if (mKey == null) {
- mKey = patternKey.matcher(path);
- }
- mKeyGroup1 = mKey.group(1);
- mKeyGroup1Cache.put(path, mKeyGroup1);
- }
- json = extract_json_withkey(json, mKeyGroup1);
- // Cache indexList
- ArrayList<String> indexList = indexListCache.get(path);
- if (indexList == null) {
- Matcher mIndex = patternIndex.matcher(path);
- indexList = new ArrayList<String>();
- while (mIndex.find()) {
- indexList.add(mIndex.group(1));
- }
- indexListCache.put(path, indexList);
- }
- if (indexList.size() > 0) {
- json = extract_json_withindex(json, indexList);
- }
- return json;
- }
- ArrayList<Object> jsonList = new ArrayList<Object>();
- private Object extract_json_withindex(Object json, ArrayList<String> indexList)
- throws JSONException {
- jsonList.clear();
- jsonList.add(json);
- Iterator<String> itr = indexList.iterator();
- while (itr.hasNext()) {
- String index = itr.next();
- ArrayList<Object> tmp_jsonList = new ArrayList<Object>();
- if (index.equalsIgnoreCase("*")) {
- for (int i = 0; i < (jsonList).size(); i++) {
- try {
- JSONArray array = (JSONArray) (jsonList).get(i);
- for (int j = 0; j < array.length(); j++) {
- tmp_jsonList.add(array.get(j));
- }
- } catch (Exception e) {
- continue;
- }
- }
- jsonList = tmp_jsonList;
- } else {
- for (int i = 0; i < (jsonList).size(); i++) {
- try {
- tmp_jsonList.add(((JSONArray) (jsonList).get(i)).get(Integer
- .parseInt(index)));
- } catch (ClassCastException e) {
- continue;
- } catch (JSONException e) {
- return null;
- }
- jsonList = tmp_jsonList;
- }
- }
- }
- return (jsonList.size() > 1) ? new JSONArray(jsonList) : jsonList.get(0);
- }
- private Object extract_json_withkey(Object json, String path)
- throws JSONException {
- if (json.getClass() == org.json.JSONArray.class) {
- JSONArray jsonArray = new JSONArray();
- for (int i = 0; i < ((JSONArray) json).length(); i++) {
- Object josn_elem = ((JSONArray) json).get(i);
- try {
- Object json_obj = ((JSONObject) josn_elem).get(path);
- if (json_obj.getClass() == org.json.JSONArray.class) {
- for (int j = 0; j < ((JSONArray) json_obj).length(); j++) {
- jsonArray.put(((JSONArray) json_obj).get(j));
- }
- } else {
- jsonArray.put(json_obj);
- }
- } catch (Exception e) {
- continue;
- }
- }
- return (jsonArray.length() == 0) ? null : jsonArray;
- } else {
- return ((JSONObject) json).get(path);
- }
- }
- }