PageRenderTime 38ms CodeModel.GetById 10ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/release-0.0.0-rc0/hive/external/contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesInput.java

#
Java | 533 lines | 286 code | 40 blank | 207 comment | 92 complexity | d71f330de7bae6faefd5e77924a43376 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, JSON, CPL-1.0
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.util.typedbytes;
  19. import java.io.DataInput;
  20. import java.io.EOFException;
  21. import java.io.IOException;
  22. import java.util.ArrayList;
  23. import java.util.List;
  24. import java.util.TreeMap;
  25. import org.apache.hadoop.io.WritableUtils;
  26. import org.apache.hadoop.record.Buffer;
  27. /**
  28. * Provides functionality for reading typed bytes.
  29. */
  30. public class TypedBytesInput {
  31. private DataInput in;
  32. private TypedBytesInput() {
  33. }
  34. private void setDataInput(DataInput in) {
  35. this.in = in;
  36. }
  37. private static ThreadLocal tbIn = new ThreadLocal() {
  38. @Override
  39. protected synchronized Object initialValue() {
  40. return new TypedBytesInput();
  41. }
  42. };
  43. /**
  44. * Get a thread-local typed bytes input for the supplied {@link DataInput}.
  45. *
  46. * @param in
  47. * data input object
  48. * @return typed bytes input corresponding to the supplied {@link DataInput}.
  49. */
  50. public static TypedBytesInput get(DataInput in) {
  51. TypedBytesInput bin = (TypedBytesInput) tbIn.get();
  52. bin.setDataInput(in);
  53. return bin;
  54. }
  55. /** Creates a new instance of TypedBytesInput. */
  56. public TypedBytesInput(DataInput in) {
  57. this.in = in;
  58. }
  59. /**
  60. * Reads a typed bytes sequence and converts it to a Java object. The first
  61. * byte is interpreted as a type code, and then the right number of subsequent
  62. * bytes are read depending on the obtained type.
  63. *
  64. * @return the obtained object or null when the end of the file is reached
  65. * @throws IOException
  66. */
  67. public Object read() throws IOException {
  68. int code = 1;
  69. try {
  70. code = in.readUnsignedByte();
  71. } catch (EOFException eof) {
  72. return null;
  73. }
  74. if (code == Type.BYTES.code) {
  75. return new Buffer(readBytes());
  76. } else if (code == Type.BYTE.code) {
  77. return readByte();
  78. } else if (code == Type.BOOL.code) {
  79. return readBool();
  80. } else if (code == Type.INT.code) {
  81. return readInt();
  82. } else if (code == Type.SHORT.code) {
  83. return readShort();
  84. } else if (code == Type.LONG.code) {
  85. return readLong();
  86. } else if (code == Type.FLOAT.code) {
  87. return readFloat();
  88. } else if (code == Type.DOUBLE.code) {
  89. return readDouble();
  90. } else if (code == Type.STRING.code) {
  91. return readString();
  92. } else if (code == Type.VECTOR.code) {
  93. return readVector();
  94. } else if (code == Type.LIST.code) {
  95. return readList();
  96. } else if (code == Type.MAP.code) {
  97. return readMap();
  98. } else if (code == Type.MARKER.code) {
  99. return null;
  100. } else if (50 <= code && code <= 200) { // application-specific typecodes
  101. return new Buffer(readBytes());
  102. } else {
  103. throw new RuntimeException("unknown type");
  104. }
  105. }
  106. /**
  107. * Reads a typed bytes sequence. The first byte is interpreted as a type code,
  108. * and then the right number of subsequent bytes are read depending on the
  109. * obtained type.
  110. *
  111. * @return the obtained typed bytes sequence or null when the end of the file
  112. * is reached
  113. * @throws IOException
  114. */
  115. public byte[] readRaw() throws IOException {
  116. int code = -1;
  117. try {
  118. code = in.readUnsignedByte();
  119. } catch (EOFException eof) {
  120. return null;
  121. }
  122. if (code == Type.BYTES.code) {
  123. return readRawBytes();
  124. } else if (code == Type.BYTE.code) {
  125. return readRawByte();
  126. } else if (code == Type.BOOL.code) {
  127. return readRawBool();
  128. } else if (code == Type.INT.code) {
  129. return readRawInt();
  130. } else if (code == Type.LONG.code) {
  131. return readRawLong();
  132. } else if (code == Type.FLOAT.code) {
  133. return readRawFloat();
  134. } else if (code == Type.DOUBLE.code) {
  135. return readRawDouble();
  136. } else if (code == Type.STRING.code) {
  137. return readRawString();
  138. } else if (code == Type.VECTOR.code) {
  139. return readRawVector();
  140. } else if (code == Type.LIST.code) {
  141. return readRawList();
  142. } else if (code == Type.MAP.code) {
  143. return readRawMap();
  144. } else if (code == Type.MARKER.code) {
  145. return null;
  146. } else if (50 <= code && code <= 200) { // application-specific typecodes
  147. return readRawBytes();
  148. } else {
  149. throw new RuntimeException("unknown type");
  150. }
  151. }
  152. /**
  153. * Reads a type byte and returns the corresponding {@link Type}.
  154. *
  155. * @return the obtained Type or null when the end of the file is reached
  156. * @throws IOException
  157. */
  158. public Type readType() throws IOException {
  159. int code = -1;
  160. try {
  161. code = in.readUnsignedByte();
  162. } catch (EOFException eof) {
  163. return null;
  164. }
  165. for (Type type : Type.values()) {
  166. if (type.code == code) {
  167. return type;
  168. }
  169. }
  170. return null;
  171. }
  172. /**
  173. * Skips a type byte.
  174. *
  175. * @return true iff the end of the file was not reached
  176. * @throws IOException
  177. */
  178. public boolean skipType() throws IOException {
  179. try {
  180. in.readByte();
  181. return true;
  182. } catch (EOFException eof) {
  183. return false;
  184. }
  185. }
  186. /**
  187. * Reads the bytes following a <code>Type.BYTES</code> code.
  188. *
  189. * @return the obtained bytes sequence
  190. * @throws IOException
  191. */
  192. public byte[] readBytes() throws IOException {
  193. int length = in.readInt();
  194. byte[] bytes = new byte[length];
  195. in.readFully(bytes);
  196. return bytes;
  197. }
  198. /**
  199. * Reads the raw bytes following a <code>Type.BYTES</code> code.
  200. *
  201. * @return the obtained bytes sequence
  202. * @throws IOException
  203. */
  204. public byte[] readRawBytes() throws IOException {
  205. int length = in.readInt();
  206. byte[] bytes = new byte[5 + length];
  207. bytes[0] = (byte) Type.BYTES.code;
  208. bytes[1] = (byte) (0xff & (length >> 24));
  209. bytes[2] = (byte) (0xff & (length >> 16));
  210. bytes[3] = (byte) (0xff & (length >> 8));
  211. bytes[4] = (byte) (0xff & length);
  212. in.readFully(bytes, 5, length);
  213. return bytes;
  214. }
  215. /**
  216. * Reads the byte following a <code>Type.BYTE</code> code.
  217. *
  218. * @return the obtained byte
  219. * @throws IOException
  220. */
  221. public byte readByte() throws IOException {
  222. return in.readByte();
  223. }
  224. /**
  225. * Reads the raw byte following a <code>Type.BYTE</code> code.
  226. *
  227. * @return the obtained byte
  228. * @throws IOException
  229. */
  230. public byte[] readRawByte() throws IOException {
  231. byte[] bytes = new byte[2];
  232. bytes[0] = (byte) Type.BYTE.code;
  233. in.readFully(bytes, 1, 1);
  234. return bytes;
  235. }
  236. /**
  237. * Reads the boolean following a <code>Type.BOOL</code> code.
  238. *
  239. * @return the obtained boolean
  240. * @throws IOException
  241. */
  242. public boolean readBool() throws IOException {
  243. return in.readBoolean();
  244. }
  245. /**
  246. * Reads the raw bytes following a <code>Type.BOOL</code> code.
  247. *
  248. * @return the obtained bytes sequence
  249. * @throws IOException
  250. */
  251. public byte[] readRawBool() throws IOException {
  252. byte[] bytes = new byte[2];
  253. bytes[0] = (byte) Type.BOOL.code;
  254. in.readFully(bytes, 1, 1);
  255. return bytes;
  256. }
  257. /**
  258. * Reads the integer following a <code>Type.INT</code> code.
  259. *
  260. * @return the obtained integer
  261. * @throws IOException
  262. */
  263. public int readInt() throws IOException {
  264. return in.readInt();
  265. }
  266. /**
  267. * Reads the short following a <code>Type.SHORT</code> code.
  268. *
  269. * @return the obtained short
  270. * @throws IOException
  271. */
  272. public short readShort() throws IOException {
  273. return in.readShort();
  274. }
  275. /**
  276. * Reads the raw bytes following a <code>Type.INT</code> code.
  277. *
  278. * @return the obtained bytes sequence
  279. * @throws IOException
  280. */
  281. public byte[] readRawInt() throws IOException {
  282. byte[] bytes = new byte[5];
  283. bytes[0] = (byte) Type.INT.code;
  284. in.readFully(bytes, 1, 4);
  285. return bytes;
  286. }
  287. /**
  288. * Reads the long following a <code>Type.LONG</code> code.
  289. *
  290. * @return the obtained long
  291. * @throws IOException
  292. */
  293. public long readLong() throws IOException {
  294. return in.readLong();
  295. }
  296. /**
  297. * Reads the raw bytes following a <code>Type.LONG</code> code.
  298. *
  299. * @return the obtained bytes sequence
  300. * @throws IOException
  301. */
  302. public byte[] readRawLong() throws IOException {
  303. byte[] bytes = new byte[9];
  304. bytes[0] = (byte) Type.LONG.code;
  305. in.readFully(bytes, 1, 8);
  306. return bytes;
  307. }
  308. /**
  309. * Reads the float following a <code>Type.FLOAT</code> code.
  310. *
  311. * @return the obtained float
  312. * @throws IOException
  313. */
  314. public float readFloat() throws IOException {
  315. return in.readFloat();
  316. }
  317. /**
  318. * Reads the raw bytes following a <code>Type.FLOAT</code> code.
  319. *
  320. * @return the obtained bytes sequence
  321. * @throws IOException
  322. */
  323. public byte[] readRawFloat() throws IOException {
  324. byte[] bytes = new byte[5];
  325. bytes[0] = (byte) Type.FLOAT.code;
  326. in.readFully(bytes, 1, 4);
  327. return bytes;
  328. }
  329. /**
  330. * Reads the double following a <code>Type.DOUBLE</code> code.
  331. *
  332. * @return the obtained double
  333. * @throws IOException
  334. */
  335. public double readDouble() throws IOException {
  336. return in.readDouble();
  337. }
  338. /**
  339. * Reads the raw bytes following a <code>Type.DOUBLE</code> code.
  340. *
  341. * @return the obtained bytes sequence
  342. * @throws IOException
  343. */
  344. public byte[] readRawDouble() throws IOException {
  345. byte[] bytes = new byte[9];
  346. bytes[0] = (byte) Type.DOUBLE.code;
  347. in.readFully(bytes, 1, 8);
  348. return bytes;
  349. }
  350. /**
  351. * Reads the string following a <code>Type.STRING</code> code.
  352. *
  353. * @return the obtained string
  354. * @throws IOException
  355. */
  356. public String readString() throws IOException {
  357. return WritableUtils.readString(in);
  358. }
  359. /**
  360. * Reads the raw bytes following a <code>Type.STRING</code> code.
  361. *
  362. * @return the obtained bytes sequence
  363. * @throws IOException
  364. */
  365. public byte[] readRawString() throws IOException {
  366. int length = in.readInt();
  367. byte[] bytes = new byte[5 + length];
  368. bytes[0] = (byte) Type.STRING.code;
  369. bytes[1] = (byte) (0xff & (length >> 24));
  370. bytes[2] = (byte) (0xff & (length >> 16));
  371. bytes[3] = (byte) (0xff & (length >> 8));
  372. bytes[4] = (byte) (0xff & length);
  373. in.readFully(bytes, 5, length);
  374. return bytes;
  375. }
  376. /**
  377. * Reads the vector following a <code>Type.VECTOR</code> code.
  378. *
  379. * @return the obtained vector
  380. * @throws IOException
  381. */
  382. @SuppressWarnings("unchecked")
  383. public ArrayList readVector() throws IOException {
  384. int length = readVectorHeader();
  385. ArrayList result = new ArrayList(length);
  386. for (int i = 0; i < length; i++) {
  387. result.add(read());
  388. }
  389. return result;
  390. }
  391. /**
  392. * Reads the raw bytes following a <code>Type.VECTOR</code> code.
  393. *
  394. * @return the obtained bytes sequence
  395. * @throws IOException
  396. */
  397. public byte[] readRawVector() throws IOException {
  398. Buffer buffer = new Buffer();
  399. int length = readVectorHeader();
  400. buffer.append(new byte[] {(byte) Type.VECTOR.code,
  401. (byte) (0xff & (length >> 24)), (byte) (0xff & (length >> 16)),
  402. (byte) (0xff & (length >> 8)), (byte) (0xff & length)});
  403. for (int i = 0; i < length; i++) {
  404. buffer.append(readRaw());
  405. }
  406. return buffer.get();
  407. }
  408. /**
  409. * Reads the header following a <code>Type.VECTOR</code> code.
  410. *
  411. * @return the number of elements in the vector
  412. * @throws IOException
  413. */
  414. public int readVectorHeader() throws IOException {
  415. return in.readInt();
  416. }
  417. /**
  418. * Reads the list following a <code>Type.LIST</code> code.
  419. *
  420. * @return the obtained list
  421. * @throws IOException
  422. */
  423. @SuppressWarnings("unchecked")
  424. public List readList() throws IOException {
  425. List list = new ArrayList();
  426. Object obj = read();
  427. while (obj != null) {
  428. list.add(obj);
  429. obj = read();
  430. }
  431. return list;
  432. }
  433. /**
  434. * Reads the raw bytes following a <code>Type.LIST</code> code.
  435. *
  436. * @return the obtained bytes sequence
  437. * @throws IOException
  438. */
  439. public byte[] readRawList() throws IOException {
  440. Buffer buffer = new Buffer(new byte[] {(byte) Type.LIST.code});
  441. byte[] bytes = readRaw();
  442. while (bytes != null) {
  443. buffer.append(bytes);
  444. bytes = readRaw();
  445. }
  446. buffer.append(new byte[] {(byte) Type.MARKER.code});
  447. return buffer.get();
  448. }
  449. /**
  450. * Reads the map following a <code>Type.MAP</code> code.
  451. *
  452. * @return the obtained map
  453. * @throws IOException
  454. */
  455. @SuppressWarnings("unchecked")
  456. public TreeMap readMap() throws IOException {
  457. int length = readMapHeader();
  458. TreeMap result = new TreeMap();
  459. for (int i = 0; i < length; i++) {
  460. Object key = read();
  461. Object value = read();
  462. result.put(key, value);
  463. }
  464. return result;
  465. }
  466. /**
  467. * Reads the raw bytes following a <code>Type.MAP</code> code.
  468. *
  469. * @return the obtained bytes sequence
  470. * @throws IOException
  471. */
  472. public byte[] readRawMap() throws IOException {
  473. Buffer buffer = new Buffer();
  474. int length = readMapHeader();
  475. buffer.append(new byte[] {(byte) Type.MAP.code,
  476. (byte) (0xff & (length >> 24)), (byte) (0xff & (length >> 16)),
  477. (byte) (0xff & (length >> 8)), (byte) (0xff & length)});
  478. for (int i = 0; i < length; i++) {
  479. buffer.append(readRaw());
  480. buffer.append(readRaw());
  481. }
  482. return buffer.get();
  483. }
  484. /**
  485. * Reads the header following a <code>Type.MAP</code> code.
  486. *
  487. * @return the number of key-value pairs in the map
  488. * @throws IOException
  489. */
  490. public int readMapHeader() throws IOException {
  491. return in.readInt();
  492. }
  493. }