PageRenderTime 48ms CodeModel.GetById 1ms app.highlight 41ms RepoModel.GetById 2ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java

#
Java | 252 lines | 179 code | 27 blank | 46 comment | 40 complexity | 96c4aaa4b6c7cfe89ca956e05d2cfe66 MD5 | raw file
  1/**
  2 * Licensed to the Apache Software Foundation (ASF) under one
  3 * or more contributor license agreements.  See the NOTICE file
  4 * distributed with this work for additional information
  5 * regarding copyright ownership.  The ASF licenses this file
  6 * to you under the Apache License, Version 2.0 (the
  7 * "License"); you may not use this file except in compliance
  8 * with the License.  You may obtain a copy of the License at
  9 *
 10 *     http://www.apache.org/licenses/LICENSE-2.0
 11 *
 12 * Unless required by applicable law or agreed to in writing, software
 13 * distributed under the License is distributed on an "AS IS" BASIS,
 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 * See the License for the specific language governing permissions and
 16 * limitations under the License.
 17 */
 18
 19package org.apache.hadoop.hive.ql.udf;
 20
 21import java.util.ArrayList;
 22import java.util.Iterator;
 23import java.util.LinkedHashMap;
 24import java.util.Map;
 25import java.util.regex.Matcher;
 26import java.util.regex.Pattern;
 27
 28import org.apache.hadoop.hive.ql.exec.Description;
 29import org.apache.hadoop.hive.ql.exec.UDF;
 30import org.apache.hadoop.io.Text;
 31import org.json.JSONArray;
 32import org.json.JSONException;
 33import org.json.JSONObject;
 34
 35/**
 36 * UDFJson.
 37 *
 38 */
 39@Description(name = "get_json_object",
 40    value = "_FUNC_(json_txt, path) - Extract a json object from path ",
 41    extended = "Extract json object from a json string based on json path "
 42    + "specified, and return json string of the extracted json object. It "
 43    + "will return null if the input json string is invalid.\n"
 44    + "A limited version of JSONPath supported:\n"
 45    + "  $   : Root object\n"
 46    + "  .   : Child operator\n"
 47    + "  []  : Subscript operator for array\n"
 48    + "  *   : Wildcard for []\n"
 49    + "Syntax not supported that's worth noticing:\n"
 50    + "  ''  : Zero length string as key\n"
 51    + "  ..  : Recursive descent\n"
 52    + "  @   : Current object/element\n"
 53    + "  ()  : Script expression\n"
 54    + "  ?() : Filter (script) expression.\n"
 55    + "  [,] : Union operator\n"
 56    + "  [start:end:step] : array slice operator\n")
 57public class UDFJson extends UDF {
 58  private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-]+).*");
 59  private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]");
 60
 61  // An LRU cache using a linked hash map
 62  static class HashCache<K, V> extends LinkedHashMap<K, V> {
 63
 64    private static final int CACHE_SIZE = 16;
 65    private static final int INIT_SIZE = 32;
 66    private static final float LOAD_FACTOR = 0.6f;
 67
 68    HashCache() {
 69      super(INIT_SIZE, LOAD_FACTOR);
 70    }
 71
 72    private static final long serialVersionUID = 1;
 73
 74    @Override
 75    protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
 76      return size() > CACHE_SIZE;
 77    }
 78
 79  }
 80
 81  static Map<String, Object> extractObjectCache = new HashCache<String, Object>();
 82  static Map<String, String[]> pathExprCache = new HashCache<String, String[]>();
 83  static Map<String, ArrayList<String>> indexListCache = new HashCache<String, ArrayList<String>>();
 84  static Map<String, String> mKeyGroup1Cache = new HashCache<String, String>();
 85  static Map<String, Boolean> mKeyMatchesCache = new HashCache<String, Boolean>();
 86
 87  Text result = new Text();
 88
 89  public UDFJson() {
 90  }
 91
 92  /**
 93   * Extract json object from a json string based on json path specified, and
 94   * return json string of the extracted json object. It will return null if the
 95   * input json string is invalid.
 96   * 
 97   * A limited version of JSONPath supported: $ : Root object . : Child operator
 98   * [] : Subscript operator for array * : Wildcard for []
 99   * 
100   * Syntax not supported that's worth noticing: '' : Zero length string as key
101   * .. : Recursive descent &amp;#064; : Current object/element () : Script
102   * expression ?() : Filter (script) expression. [,] : Union operator
103   * [start:end:step] : array slice operator
104   * 
105   * @param jsonString
106   *          the json string.
107   * @param pathString
108   *          the json path expression.
109   * @return json string or null when an error happens.
110   */
111  public Text evaluate(String jsonString, String pathString) {
112
113    if (jsonString == null || jsonString == "" || pathString == null
114        || pathString == "") {
115      return null;
116    }
117
118    try {
119      // Cache pathExpr
120      String[] pathExpr = pathExprCache.get(pathString);
121      if (pathExpr == null) {
122        pathExpr = pathString.split("\\.", -1);
123        pathExprCache.put(pathString, pathExpr);
124      }
125
126      if (!pathExpr[0].equalsIgnoreCase("$")) {
127        return null;
128      }
129      // Cache extractObject
130      Object extractObject = extractObjectCache.get(jsonString);
131      if (extractObject == null) {
132        extractObject = new JSONObject(jsonString);
133        extractObjectCache.put(jsonString, extractObject);
134      }
135      for (int i = 1; i < pathExpr.length; i++) {
136        extractObject = extract(extractObject, pathExpr[i]);
137      }
138      result.set(extractObject.toString());
139      return result;
140    } catch (Exception e) {
141      return null;
142    }
143  }
144
145  private Object extract(Object json, String path) throws JSONException {
146
147    // Cache patternkey.matcher(path).matches()
148    Matcher mKey = null;
149    Boolean mKeyMatches = mKeyMatchesCache.get(path);
150    if (mKeyMatches == null) {
151      mKey = patternKey.matcher(path);
152      mKeyMatches = mKey.matches() ? Boolean.TRUE : Boolean.FALSE;
153      mKeyMatchesCache.put(path, mKeyMatches);
154    }
155    if (!mKeyMatches.booleanValue()) {
156      return null;
157    }
158
159    // Cache mkey.group(1)
160    String mKeyGroup1 = mKeyGroup1Cache.get(path);
161    if (mKeyGroup1 == null) {
162      if (mKey == null) {
163        mKey = patternKey.matcher(path);
164      }
165      mKeyGroup1 = mKey.group(1);
166      mKeyGroup1Cache.put(path, mKeyGroup1);
167    }
168    json = extract_json_withkey(json, mKeyGroup1);
169
170    // Cache indexList
171    ArrayList<String> indexList = indexListCache.get(path);
172    if (indexList == null) {
173      Matcher mIndex = patternIndex.matcher(path);
174      indexList = new ArrayList<String>();
175      while (mIndex.find()) {
176        indexList.add(mIndex.group(1));
177      }
178      indexListCache.put(path, indexList);
179    }
180
181    if (indexList.size() > 0) {
182      json = extract_json_withindex(json, indexList);
183    }
184
185    return json;
186  }
187
188  ArrayList<Object> jsonList = new ArrayList<Object>();
189
190  private Object extract_json_withindex(Object json, ArrayList<String> indexList)
191      throws JSONException {
192
193    jsonList.clear();
194    jsonList.add(json);
195    Iterator<String> itr = indexList.iterator();
196    while (itr.hasNext()) {
197      String index = itr.next();
198      ArrayList<Object> tmp_jsonList = new ArrayList<Object>();
199      if (index.equalsIgnoreCase("*")) {
200        for (int i = 0; i < (jsonList).size(); i++) {
201          try {
202            JSONArray array = (JSONArray) (jsonList).get(i);
203            for (int j = 0; j < array.length(); j++) {
204              tmp_jsonList.add(array.get(j));
205            }
206          } catch (Exception e) {
207            continue;
208          }
209        }
210        jsonList = tmp_jsonList;
211      } else {
212        for (int i = 0; i < (jsonList).size(); i++) {
213          try {
214            tmp_jsonList.add(((JSONArray) (jsonList).get(i)).get(Integer
215                .parseInt(index)));
216          } catch (ClassCastException e) {
217            continue;
218          } catch (JSONException e) {
219            return null;
220          }
221          jsonList = tmp_jsonList;
222        }
223      }
224    }
225    return (jsonList.size() > 1) ? new JSONArray(jsonList) : jsonList.get(0);
226  }
227
228  private Object extract_json_withkey(Object json, String path)
229      throws JSONException {
230    if (json.getClass() == org.json.JSONArray.class) {
231      JSONArray jsonArray = new JSONArray();
232      for (int i = 0; i < ((JSONArray) json).length(); i++) {
233        Object josn_elem = ((JSONArray) json).get(i);
234        try {
235          Object json_obj = ((JSONObject) josn_elem).get(path);
236          if (json_obj.getClass() == org.json.JSONArray.class) {
237            for (int j = 0; j < ((JSONArray) json_obj).length(); j++) {
238              jsonArray.put(((JSONArray) json_obj).get(j));
239            }
240          } else {
241            jsonArray.put(json_obj);
242          }
243        } catch (Exception e) {
244          continue;
245        }
246      }
247      return (jsonArray.length() == 0) ? null : jsonArray;
248    } else {
249      return ((JSONObject) json).get(path);
250    }
251  }
252}