/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRegExpExtract.java

# · Java · 65 lines · 35 code · 8 blank · 22 comment · 8 complexity · 8709db4bab408a46e355206ecf3db7c9 MD5 · raw file

  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.udf;
  19. import java.util.regex.MatchResult;
  20. import java.util.regex.Matcher;
  21. import java.util.regex.Pattern;
  22. import org.apache.hadoop.hive.ql.exec.Description;
  23. import org.apache.hadoop.hive.ql.exec.UDF;
  24. /**
  25. * UDF to extract a specific group identified by a java regex. Note that if a
  26. * regexp has a backslash ('\'), then need to specify '\\' For example,
  27. * regexp_extract('100-200', '(\\d+)-(\\d+)', 1) will return '100'
  28. */
  29. @Description(name = "regexp_extract",
  30. value = "_FUNC_(str, regexp[, idx]) - extracts a group that matches regexp",
  31. extended = "Example:\n"
  32. + " > SELECT _FUNC_('100-200', '(\\d+)-(\\d+)', 1) FROM src LIMIT 1;\n"
  33. + " '100'")
  34. public class UDFRegExpExtract extends UDF {
  35. private String lastRegex = null;
  36. private Pattern p = null;
  37. public UDFRegExpExtract() {
  38. }
  39. public String evaluate(String s, String regex, Integer extractIndex) {
  40. if (s == null || regex == null) {
  41. return null;
  42. }
  43. if (!regex.equals(lastRegex) || p == null) {
  44. lastRegex = regex;
  45. p = Pattern.compile(regex);
  46. }
  47. Matcher m = p.matcher(s);
  48. if (m.find()) {
  49. MatchResult mr = m.toMatchResult();
  50. return mr.group(extractIndex);
  51. }
  52. return "";
  53. }
  54. public String evaluate(String s, String regex) {
  55. return this.evaluate(s, regex, 1);
  56. }
  57. }