PageRenderTime 45ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/release-0.1-rc2/hive/external/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java

#
Java | 214 lines | 106 code | 15 blank | 93 comment | 26 complexity | 31914d86f7b38b8327c9864bd63c89bd MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.serde2.lazy;
  19. import java.io.IOException;
  20. import java.io.OutputStream;
  21. import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector;
  22. import org.apache.hadoop.io.IntWritable;
  23. /**
  24. * LazyObject for storing a value of Integer.
  25. *
  26. * <p>
  27. * Part of the code is adapted from Apache Harmony Project.
  28. *
  29. * As with the specification, this implementation relied on code laid out in <a
  30. * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
  31. * Delight, (Addison Wesley, 2002)</a> as well as <a
  32. * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
  33. * </p>
  34. *
  35. */
  36. public class LazyInteger extends
  37. LazyPrimitive<LazyIntObjectInspector, IntWritable> {
  38. public LazyInteger(LazyIntObjectInspector oi) {
  39. super(oi);
  40. data = new IntWritable();
  41. }
  42. public LazyInteger(LazyInteger copy) {
  43. super(copy);
  44. data = new IntWritable(copy.data.get());
  45. }
  46. @Override
  47. public void init(ByteArrayRef bytes, int start, int length) {
  48. try {
  49. data.set(parseInt(bytes.getData(), start, length, 10));
  50. isNull = false;
  51. } catch (NumberFormatException e) {
  52. isNull = true;
  53. }
  54. }
  55. /**
  56. * Parses the string argument as if it was an int value and returns the
  57. * result. Throws NumberFormatException if the string does not represent an
  58. * int quantity.
  59. *
  60. * @param bytes
  61. * @param start
  62. * @param length
  63. * a UTF-8 encoded string representation of an int quantity.
  64. * @return int the value represented by the argument
  65. * @exception NumberFormatException
  66. * if the argument could not be parsed as an int quantity.
  67. */
  68. public static int parseInt(byte[] bytes, int start, int length) {
  69. return parseInt(bytes, start, length, 10);
  70. }
  71. /**
  72. * Parses the string argument as if it was an int value and returns the
  73. * result. Throws NumberFormatException if the string does not represent an
  74. * int quantity. The second argument specifies the radix to use when parsing
  75. * the value.
  76. *
  77. * @param bytes
  78. * @param start
  79. * @param length
  80. * a UTF-8 encoded string representation of an int quantity.
  81. * @param radix
  82. * the base to use for conversion.
  83. * @return the value represented by the argument
  84. * @exception NumberFormatException
  85. * if the argument could not be parsed as an int quantity.
  86. */
  87. public static int parseInt(byte[] bytes, int start, int length, int radix) {
  88. if (bytes == null) {
  89. throw new NumberFormatException("String is null");
  90. }
  91. if (radix < Character.MIN_RADIX || radix > Character.MAX_RADIX) {
  92. throw new NumberFormatException("Invalid radix: " + radix);
  93. }
  94. if (length == 0) {
  95. throw new NumberFormatException("Empty string!");
  96. }
  97. int offset = start;
  98. boolean negative = bytes[start] == '-';
  99. if (negative || bytes[start] == '+') {
  100. offset++;
  101. if (length == 1) {
  102. throw new NumberFormatException(LazyUtils.convertToString(bytes, start,
  103. length));
  104. }
  105. }
  106. return parse(bytes, start, length, offset, radix, negative);
  107. }
  108. /**
  109. *
  110. * @param bytes
  111. * @param start
  112. * @param length
  113. * a UTF-8 encoded string representation of an int quantity.
  114. * @param radix
  115. * the base to use for conversion.
  116. * @param offset
  117. * the starting position after the sign (if exists)
  118. * @param radix
  119. * the base to use for conversion.
  120. * @param negative
  121. * whether the number is negative.
  122. * @return the value represented by the argument
  123. * @exception NumberFormatException
  124. * if the argument could not be parsed as an int quantity.
  125. */
  126. private static int parse(byte[] bytes, int start, int length, int offset,
  127. int radix, boolean negative) {
  128. int max = Integer.MIN_VALUE / radix;
  129. int result = 0, end = start + length;
  130. while (offset < end) {
  131. int digit = LazyUtils.digit(bytes[offset++], radix);
  132. if (digit == -1) {
  133. throw new NumberFormatException(LazyUtils.convertToString(bytes, start,
  134. length));
  135. }
  136. if (max > result) {
  137. throw new NumberFormatException(LazyUtils.convertToString(bytes, start,
  138. length));
  139. }
  140. int next = result * radix - digit;
  141. if (next > result) {
  142. throw new NumberFormatException(LazyUtils.convertToString(bytes, start,
  143. length));
  144. }
  145. result = next;
  146. }
  147. if (!negative) {
  148. result = -result;
  149. if (result < 0) {
  150. throw new NumberFormatException(LazyUtils.convertToString(bytes, start,
  151. length));
  152. }
  153. }
  154. return result;
  155. }
  156. /**
  157. * Writes out the text representation of an integer using base 10 to an
  158. * OutputStream in UTF-8 encoding.
  159. *
  160. * Note: division by a constant (like 10) is much faster than division by a
  161. * variable. That's one of the reasons that we don't make radix a parameter
  162. * here.
  163. *
  164. * @param out
  165. * the outputstream to write to
  166. * @param i
  167. * an int to write out
  168. * @throws IOException
  169. */
  170. public static void writeUTF8(OutputStream out, int i) throws IOException {
  171. if (i == 0) {
  172. out.write('0');
  173. return;
  174. }
  175. boolean negative = i < 0;
  176. if (negative) {
  177. out.write('-');
  178. } else {
  179. // negative range is bigger than positive range, so there is no risk
  180. // of overflow here.
  181. i = -i;
  182. }
  183. int start = 1000000000;
  184. while (i / start == 0) {
  185. start /= 10;
  186. }
  187. while (start > 0) {
  188. out.write('0' - (i / start % 10));
  189. start /= 10;
  190. }
  191. }
  192. public static void writeUTF8NoException(OutputStream out, int i) {
  193. try {
  194. writeUTF8(out, i);
  195. } catch (IOException e) {
  196. throw new RuntimeException(e);
  197. }
  198. }
  199. }