/tags/release-0.0.0-rc0/hive/external/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFindInSet.java
Java | 98 lines | 63 code | 14 blank | 21 comment | 24 complexity | 0f1a082cc993d4dfb18fcff91a21027c MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hadoop.hive.ql.udf;
- import org.apache.hadoop.hive.ql.exec.Description;
- import org.apache.hadoop.hive.ql.exec.UDF;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.Text;
- /**
- * UDFFindInSet.
- *
- */
- @Description(name = "find_in_set", value = "_FUNC_(str,str_array) - Returns the first occurrence "
- + " of str in str_array where str_array is a comma-delimited string."
- + " Returns null if either argument is null."
- + " Returns 0 if the first argument has any commas.", extended = "Example:\n"
- + " > SELECT _FUNC_('ab','abc,b,ab,c,def') FROM src LIMIT 1;\n"
- + " 3\n"
- + " > SELECT * FROM src1 WHERE NOT _FUNC_(key,'311,128,345,956')=0;\n"
- + " 311 val_311\n" + " 128"
- )
- public class UDFFindInSet extends UDF {
- private final IntWritable result = new IntWritable();
- public IntWritable evaluate(Text s, Text txtarray) {
- if (s == null || txtarray == null) {
- return null;
- }
- byte[] search_bytes = s.getBytes();
- for (int i = 0; i < s.getLength(); i++) {
- if (search_bytes[i] == ',') {
- result.set(0);
- return result;
- }
- }
- byte[] data = txtarray.getBytes();
- int search_length = s.getLength();
- int cur_pos_in_array = 0;
- int cur_length = 0;
- boolean matching = true;
- for (int i = 0; i < txtarray.getLength(); i++) {
- if (data[i] == ',') {
- cur_pos_in_array++;
- if (matching && cur_length == search_length) {
- result.set(cur_pos_in_array);
- return result;
- } else {
- matching = true;
- cur_length = 0;
- }
- } else {
- if (cur_length + 1 <= search_length) {
- if (!matching || search_bytes[cur_length] != data[i]) {
- matching = false;
- }
- } else {
- matching = false;
- }
- cur_length++;
- }
- }
- if (matching && cur_length == search_length) {
- cur_pos_in_array++;
- result.set(cur_pos_in_array);
- return result;
- } else {
- result.set(0);
- return result;
- }
- }
- }