/tags/release-0.1-rc2/hive/external/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java

# · Java · 252 lines · 179 code · 27 blank · 46 comment · 40 complexity · 96c4aaa4b6c7cfe89ca956e05d2cfe66 MD5 · raw file

  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.udf;
  19. import java.util.ArrayList;
  20. import java.util.Iterator;
  21. import java.util.LinkedHashMap;
  22. import java.util.Map;
  23. import java.util.regex.Matcher;
  24. import java.util.regex.Pattern;
  25. import org.apache.hadoop.hive.ql.exec.Description;
  26. import org.apache.hadoop.hive.ql.exec.UDF;
  27. import org.apache.hadoop.io.Text;
  28. import org.json.JSONArray;
  29. import org.json.JSONException;
  30. import org.json.JSONObject;
  31. /**
  32. * UDFJson.
  33. *
  34. */
  35. @Description(name = "get_json_object",
  36. value = "_FUNC_(json_txt, path) - Extract a json object from path ",
  37. extended = "Extract json object from a json string based on json path "
  38. + "specified, and return json string of the extracted json object. It "
  39. + "will return null if the input json string is invalid.\n"
  40. + "A limited version of JSONPath supported:\n"
  41. + " $ : Root object\n"
  42. + " . : Child operator\n"
  43. + " [] : Subscript operator for array\n"
  44. + " * : Wildcard for []\n"
  45. + "Syntax not supported that's worth noticing:\n"
  46. + " '' : Zero length string as key\n"
  47. + " .. : Recursive descent\n"
  48. + " @ : Current object/element\n"
  49. + " () : Script expression\n"
  50. + " ?() : Filter (script) expression.\n"
  51. + " [,] : Union operator\n"
  52. + " [start:end:step] : array slice operator\n")
  53. public class UDFJson extends UDF {
  54. private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-]+).*");
  55. private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]");
  56. // An LRU cache using a linked hash map
  57. static class HashCache<K, V> extends LinkedHashMap<K, V> {
  58. private static final int CACHE_SIZE = 16;
  59. private static final int INIT_SIZE = 32;
  60. private static final float LOAD_FACTOR = 0.6f;
  61. HashCache() {
  62. super(INIT_SIZE, LOAD_FACTOR);
  63. }
  64. private static final long serialVersionUID = 1;
  65. @Override
  66. protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
  67. return size() > CACHE_SIZE;
  68. }
  69. }
  70. static Map<String, Object> extractObjectCache = new HashCache<String, Object>();
  71. static Map<String, String[]> pathExprCache = new HashCache<String, String[]>();
  72. static Map<String, ArrayList<String>> indexListCache = new HashCache<String, ArrayList<String>>();
  73. static Map<String, String> mKeyGroup1Cache = new HashCache<String, String>();
  74. static Map<String, Boolean> mKeyMatchesCache = new HashCache<String, Boolean>();
  75. Text result = new Text();
  76. public UDFJson() {
  77. }
  78. /**
  79. * Extract json object from a json string based on json path specified, and
  80. * return json string of the extracted json object. It will return null if the
  81. * input json string is invalid.
  82. *
  83. * A limited version of JSONPath supported: $ : Root object . : Child operator
  84. * [] : Subscript operator for array * : Wildcard for []
  85. *
  86. * Syntax not supported that's worth noticing: '' : Zero length string as key
  87. * .. : Recursive descent &amp;#064; : Current object/element () : Script
  88. * expression ?() : Filter (script) expression. [,] : Union operator
  89. * [start:end:step] : array slice operator
  90. *
  91. * @param jsonString
  92. * the json string.
  93. * @param pathString
  94. * the json path expression.
  95. * @return json string or null when an error happens.
  96. */
  97. public Text evaluate(String jsonString, String pathString) {
  98. if (jsonString == null || jsonString == "" || pathString == null
  99. || pathString == "") {
  100. return null;
  101. }
  102. try {
  103. // Cache pathExpr
  104. String[] pathExpr = pathExprCache.get(pathString);
  105. if (pathExpr == null) {
  106. pathExpr = pathString.split("\\.", -1);
  107. pathExprCache.put(pathString, pathExpr);
  108. }
  109. if (!pathExpr[0].equalsIgnoreCase("$")) {
  110. return null;
  111. }
  112. // Cache extractObject
  113. Object extractObject = extractObjectCache.get(jsonString);
  114. if (extractObject == null) {
  115. extractObject = new JSONObject(jsonString);
  116. extractObjectCache.put(jsonString, extractObject);
  117. }
  118. for (int i = 1; i < pathExpr.length; i++) {
  119. extractObject = extract(extractObject, pathExpr[i]);
  120. }
  121. result.set(extractObject.toString());
  122. return result;
  123. } catch (Exception e) {
  124. return null;
  125. }
  126. }
  127. private Object extract(Object json, String path) throws JSONException {
  128. // Cache patternkey.matcher(path).matches()
  129. Matcher mKey = null;
  130. Boolean mKeyMatches = mKeyMatchesCache.get(path);
  131. if (mKeyMatches == null) {
  132. mKey = patternKey.matcher(path);
  133. mKeyMatches = mKey.matches() ? Boolean.TRUE : Boolean.FALSE;
  134. mKeyMatchesCache.put(path, mKeyMatches);
  135. }
  136. if (!mKeyMatches.booleanValue()) {
  137. return null;
  138. }
  139. // Cache mkey.group(1)
  140. String mKeyGroup1 = mKeyGroup1Cache.get(path);
  141. if (mKeyGroup1 == null) {
  142. if (mKey == null) {
  143. mKey = patternKey.matcher(path);
  144. }
  145. mKeyGroup1 = mKey.group(1);
  146. mKeyGroup1Cache.put(path, mKeyGroup1);
  147. }
  148. json = extract_json_withkey(json, mKeyGroup1);
  149. // Cache indexList
  150. ArrayList<String> indexList = indexListCache.get(path);
  151. if (indexList == null) {
  152. Matcher mIndex = patternIndex.matcher(path);
  153. indexList = new ArrayList<String>();
  154. while (mIndex.find()) {
  155. indexList.add(mIndex.group(1));
  156. }
  157. indexListCache.put(path, indexList);
  158. }
  159. if (indexList.size() > 0) {
  160. json = extract_json_withindex(json, indexList);
  161. }
  162. return json;
  163. }
  164. ArrayList<Object> jsonList = new ArrayList<Object>();
  165. private Object extract_json_withindex(Object json, ArrayList<String> indexList)
  166. throws JSONException {
  167. jsonList.clear();
  168. jsonList.add(json);
  169. Iterator<String> itr = indexList.iterator();
  170. while (itr.hasNext()) {
  171. String index = itr.next();
  172. ArrayList<Object> tmp_jsonList = new ArrayList<Object>();
  173. if (index.equalsIgnoreCase("*")) {
  174. for (int i = 0; i < (jsonList).size(); i++) {
  175. try {
  176. JSONArray array = (JSONArray) (jsonList).get(i);
  177. for (int j = 0; j < array.length(); j++) {
  178. tmp_jsonList.add(array.get(j));
  179. }
  180. } catch (Exception e) {
  181. continue;
  182. }
  183. }
  184. jsonList = tmp_jsonList;
  185. } else {
  186. for (int i = 0; i < (jsonList).size(); i++) {
  187. try {
  188. tmp_jsonList.add(((JSONArray) (jsonList).get(i)).get(Integer
  189. .parseInt(index)));
  190. } catch (ClassCastException e) {
  191. continue;
  192. } catch (JSONException e) {
  193. return null;
  194. }
  195. jsonList = tmp_jsonList;
  196. }
  197. }
  198. }
  199. return (jsonList.size() > 1) ? new JSONArray(jsonList) : jsonList.get(0);
  200. }
  201. private Object extract_json_withkey(Object json, String path)
  202. throws JSONException {
  203. if (json.getClass() == org.json.JSONArray.class) {
  204. JSONArray jsonArray = new JSONArray();
  205. for (int i = 0; i < ((JSONArray) json).length(); i++) {
  206. Object josn_elem = ((JSONArray) json).get(i);
  207. try {
  208. Object json_obj = ((JSONObject) josn_elem).get(path);
  209. if (json_obj.getClass() == org.json.JSONArray.class) {
  210. for (int j = 0; j < ((JSONArray) json_obj).length(); j++) {
  211. jsonArray.put(((JSONArray) json_obj).get(j));
  212. }
  213. } else {
  214. jsonArray.put(json_obj);
  215. }
  216. } catch (Exception e) {
  217. continue;
  218. }
  219. }
  220. return (jsonArray.length() == 0) ? null : jsonArray;
  221. } else {
  222. return ((JSONObject) json).get(path);
  223. }
  224. }
  225. }