PageRenderTime 57ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java

https://github.com/pkalmegh/hive
Java | 350 lines | 256 code | 39 blank | 55 comment | 71 complexity | fb1eb12ead4db87368843f2d8f6f3d19 MD5 | raw file
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.exec.vector.udf;
  19. import java.sql.Date;
  20. import java.sql.Timestamp;
  21. import org.apache.hadoop.hive.common.type.HiveDecimal;
  22. import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
  23. import org.apache.hadoop.hive.ql.exec.vector.*;
  24. import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
  25. import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
  26. import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
  27. import org.apache.hadoop.hive.ql.metadata.HiveException;
  28. import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
  29. import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
  30. import org.apache.hadoop.hive.serde2.io.DateWritable;
  31. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  32. import org.apache.hadoop.hive.serde2.objectinspector.primitive.*;
  33. import org.apache.hadoop.io.Text;
  34. /**
  35. * A VectorUDFAdaptor is a vectorized expression for invoking a custom
  36. * UDF on zero or more input vectors or constants which are the function arguments.
  37. */
  38. public class VectorUDFAdaptor extends VectorExpression {
  39. private static final long serialVersionUID = 1L;
  40. private int outputColumn;
  41. private String resultType;
  42. private VectorUDFArgDesc[] argDescs;
  43. private ExprNodeGenericFuncDesc expr;
  44. private transient GenericUDF genericUDF;
  45. private transient GenericUDF.DeferredObject[] deferredChildren;
  46. private transient ObjectInspector outputOI;
  47. private transient ObjectInspector[] childrenOIs;
  48. private transient VectorExpressionWriter[] writers;
  49. public VectorUDFAdaptor() {
  50. super();
  51. }
  52. public VectorUDFAdaptor (
  53. ExprNodeGenericFuncDesc expr,
  54. int outputColumn,
  55. String resultType,
  56. VectorUDFArgDesc[] argDescs) throws HiveException {
  57. this();
  58. this.expr = expr;
  59. this.outputColumn = outputColumn;
  60. this.resultType = resultType;
  61. this.argDescs = argDescs;
  62. }
  63. // Initialize transient fields. To be called after deserialization of other fields.
  64. public void init() throws HiveException, UDFArgumentException {
  65. genericUDF = expr.getGenericUDF();
  66. deferredChildren = new GenericUDF.DeferredObject[expr.getChildren().size()];
  67. childrenOIs = new ObjectInspector[expr.getChildren().size()];
  68. writers = VectorExpressionWriterFactory.getExpressionWriters(expr.getChildren());
  69. for (int i = 0; i < childrenOIs.length; i++) {
  70. childrenOIs[i] = writers[i].getObjectInspector();
  71. }
  72. outputOI = VectorExpressionWriterFactory.genVectorExpressionWritable(expr)
  73. .getObjectInspector();
  74. genericUDF.initialize(childrenOIs);
  75. // Initialize constant arguments
  76. for (int i = 0; i < argDescs.length; i++) {
  77. if (argDescs[i].isConstant()) {
  78. argDescs[i].prepareConstant();
  79. }
  80. }
  81. }
  82. @Override
  83. public void evaluate(VectorizedRowBatch batch) {
  84. if (genericUDF == null) {
  85. try {
  86. init();
  87. } catch (Exception e) {
  88. throw new RuntimeException(e);
  89. }
  90. }
  91. if (childExpressions != null) {
  92. super.evaluateChildren(batch);
  93. }
  94. int[] sel = batch.selected;
  95. int n = batch.size;
  96. ColumnVector outV = batch.cols[outputColumn];
  97. // If the output column is of type string, initialize the buffer to receive data.
  98. if (outV instanceof BytesColumnVector) {
  99. ((BytesColumnVector) outV).initBuffer();
  100. }
  101. if (n == 0) {
  102. //Nothing to do
  103. return;
  104. }
  105. batch.cols[outputColumn].noNulls = true;
  106. /* If all input columns are repeating, just evaluate function
  107. * for row 0 in the batch and set output repeating.
  108. */
  109. if (allInputColsRepeating(batch)) {
  110. setResult(0, batch);
  111. batch.cols[outputColumn].isRepeating = true;
  112. return;
  113. } else {
  114. batch.cols[outputColumn].isRepeating = false;
  115. }
  116. if (batch.selectedInUse) {
  117. for(int j = 0; j != n; j++) {
  118. int i = sel[j];
  119. setResult(i, batch);
  120. }
  121. } else {
  122. for (int i = 0; i != n; i++) {
  123. setResult(i, batch);
  124. }
  125. }
  126. }
  127. /* Return false if any input column is non-repeating, otherwise true.
  128. * This returns false if all the arguments are constant or there
  129. * are zero arguments.
  130. *
  131. * A possible future optimization is to set the output to isRepeating
  132. * for cases of all-constant arguments for deterministic functions.
  133. */
  134. private boolean allInputColsRepeating(VectorizedRowBatch batch) {
  135. int varArgCount = 0;
  136. for (int i = 0; i < argDescs.length; i++) {
  137. if (argDescs[i].isVariable() && !batch.cols[argDescs[i].getColumnNum()].isRepeating) {
  138. return false;
  139. }
  140. varArgCount += 1;
  141. }
  142. if (varArgCount > 0) {
  143. return true;
  144. } else {
  145. return false;
  146. }
  147. }
  148. /* Calculate the function result for row i of the batch and
  149. * set the output column vector entry i to the result.
  150. */
  151. private void setResult(int i, VectorizedRowBatch b) {
  152. // get arguments
  153. for (int j = 0; j < argDescs.length; j++) {
  154. deferredChildren[j] = argDescs[j].getDeferredJavaObject(i, b, j, writers);
  155. }
  156. // call function
  157. Object result;
  158. try {
  159. result = genericUDF.evaluate(deferredChildren);
  160. } catch (HiveException e) {
  161. /* For UDFs that expect primitive types (like int instead of Integer or IntWritable),
  162. * this will catch the the exception that happens if they are passed a NULL value.
  163. * Then the default NULL handling logic will apply, and the result will be NULL.
  164. */
  165. result = null;
  166. }
  167. // set output column vector entry
  168. if (result == null) {
  169. b.cols[outputColumn].noNulls = false;
  170. b.cols[outputColumn].isNull[i] = true;
  171. } else {
  172. b.cols[outputColumn].isNull[i] = false;
  173. setOutputCol(b.cols[outputColumn], i, result);
  174. }
  175. }
  176. private void setOutputCol(ColumnVector colVec, int i, Object value) {
  177. /* Depending on the output type, get the value, cast the result to the
  178. * correct type if needed, and assign the result into the output vector.
  179. */
  180. if (outputOI instanceof WritableStringObjectInspector) {
  181. BytesColumnVector bv = (BytesColumnVector) colVec;
  182. Text t;
  183. if (value instanceof String) {
  184. t = new Text((String) value);
  185. } else {
  186. t = ((WritableStringObjectInspector) outputOI).getPrimitiveWritableObject(value);
  187. }
  188. bv.setVal(i, t.getBytes(), 0, t.getLength());
  189. } else if (outputOI instanceof WritableIntObjectInspector) {
  190. LongColumnVector lv = (LongColumnVector) colVec;
  191. if (value instanceof Integer) {
  192. lv.vector[i] = (Integer) value;
  193. } else {
  194. lv.vector[i] = ((WritableIntObjectInspector) outputOI).get(value);
  195. }
  196. } else if (outputOI instanceof WritableLongObjectInspector) {
  197. LongColumnVector lv = (LongColumnVector) colVec;
  198. if (value instanceof Long) {
  199. lv.vector[i] = (Long) value;
  200. } else {
  201. lv.vector[i] = ((WritableLongObjectInspector) outputOI).get(value);
  202. }
  203. } else if (outputOI instanceof WritableDoubleObjectInspector) {
  204. DoubleColumnVector dv = (DoubleColumnVector) colVec;
  205. if (value instanceof Double) {
  206. dv.vector[i] = (Double) value;
  207. } else {
  208. dv.vector[i] = ((WritableDoubleObjectInspector) outputOI).get(value);
  209. }
  210. } else if (outputOI instanceof WritableFloatObjectInspector) {
  211. DoubleColumnVector dv = (DoubleColumnVector) colVec;
  212. if (value instanceof Float) {
  213. dv.vector[i] = (Float) value;
  214. } else {
  215. dv.vector[i] = ((WritableFloatObjectInspector) outputOI).get(value);
  216. }
  217. } else if (outputOI instanceof WritableShortObjectInspector) {
  218. LongColumnVector lv = (LongColumnVector) colVec;
  219. if (value instanceof Short) {
  220. lv.vector[i] = (Short) value;
  221. } else {
  222. lv.vector[i] = ((WritableShortObjectInspector) outputOI).get(value);
  223. }
  224. } else if (outputOI instanceof WritableByteObjectInspector) {
  225. LongColumnVector lv = (LongColumnVector) colVec;
  226. if (value instanceof Byte) {
  227. lv.vector[i] = (Byte) value;
  228. } else {
  229. lv.vector[i] = ((WritableByteObjectInspector) outputOI).get(value);
  230. }
  231. } else if (outputOI instanceof WritableTimestampObjectInspector) {
  232. LongColumnVector lv = (LongColumnVector) colVec;
  233. Timestamp ts;
  234. if (value instanceof Timestamp) {
  235. ts = (Timestamp) value;
  236. } else {
  237. ts = ((WritableTimestampObjectInspector) outputOI).getPrimitiveJavaObject(value);
  238. }
  239. /* Calculate the number of nanoseconds since the epoch as a long integer. By convention
  240. * that is how Timestamp values are operated on in a vector.
  241. */
  242. long l = ts.getTime() * 1000000 // Shift the milliseconds value over by 6 digits
  243. // to scale for nanosecond precision.
  244. // The milliseconds digits will by convention be all 0s.
  245. + ts.getNanos() % 1000000; // Add on the remaining nanos.
  246. // The % 1000000 operation removes the ms values
  247. // so that the milliseconds are not counted twice.
  248. lv.vector[i] = l;
  249. } else if (outputOI instanceof WritableDateObjectInspector) {
  250. LongColumnVector lv = (LongColumnVector) colVec;
  251. Date ts;
  252. if (value instanceof Date) {
  253. ts = (Date) value;
  254. } else {
  255. ts = ((WritableDateObjectInspector) outputOI).getPrimitiveJavaObject(value);
  256. }
  257. long l = DateWritable.dateToDays(ts);
  258. lv.vector[i] = l;
  259. } else if (outputOI instanceof WritableBooleanObjectInspector) {
  260. LongColumnVector lv = (LongColumnVector) colVec;
  261. if (value instanceof Boolean) {
  262. lv.vector[i] = (Boolean) value ? 1 : 0;
  263. } else {
  264. lv.vector[i] = ((WritableBooleanObjectInspector) outputOI).get(value) ? 1 : 0;
  265. }
  266. } else if (outputOI instanceof WritableHiveDecimalObjectInspector) {
  267. DecimalColumnVector dcv = (DecimalColumnVector) colVec;
  268. if (value instanceof HiveDecimal) {
  269. dcv.vector[i].update(((HiveDecimal) value).bigDecimalValue());
  270. } else {
  271. HiveDecimal hd = ((WritableHiveDecimalObjectInspector) outputOI).getPrimitiveJavaObject(value);
  272. dcv.vector[i].update(hd.bigDecimalValue());
  273. }
  274. } else {
  275. throw new RuntimeException("Unhandled object type " + outputOI.getTypeName());
  276. }
  277. }
  278. @Override
  279. public int getOutputColumn() {
  280. return outputColumn;
  281. }
  282. public void setOutputColumn(int outputColumn) {
  283. this.outputColumn = outputColumn;
  284. }
  285. @Override
  286. public String getOutputType() {
  287. return resultType;
  288. }
  289. public String getResultType() {
  290. return resultType;
  291. }
  292. public void setResultType(String resultType) {
  293. this.resultType = resultType;
  294. }
  295. public VectorUDFArgDesc[] getArgDescs() {
  296. return argDescs;
  297. }
  298. public void setArgDescs(VectorUDFArgDesc[] argDescs) {
  299. this.argDescs = argDescs;
  300. }
  301. public ExprNodeGenericFuncDesc getExpr() {
  302. return expr;
  303. }
  304. public void setExpr(ExprNodeGenericFuncDesc expr) {
  305. this.expr = expr;
  306. }
  307. @Override
  308. public VectorExpressionDescriptor.Descriptor getDescriptor() {
  309. return (new VectorExpressionDescriptor.Builder()).build();
  310. }
  311. }