PageRenderTime 58ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 1ms

/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java

https://github.com/rbodkin/hadoop-common
Java | 414 lines | 280 code | 44 blank | 90 comment | 38 complexity | 4e16009fbf89274ea5da1679fb4bf69e MD5 | raw file
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.util;
  19. import java.io.DataInputStream;
  20. import java.io.DataOutputStream;
  21. import java.io.IOException;
  22. import java.nio.ByteBuffer;
  23. import java.util.zip.Checksum;
  24. import org.apache.hadoop.classification.InterfaceAudience;
  25. import org.apache.hadoop.classification.InterfaceStability;
  26. import org.apache.hadoop.fs.ChecksumException;
  27. /**
  28. * This class provides inteface and utilities for processing checksums for
  29. * DFS data transfers.
  30. */
  31. @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
  32. @InterfaceStability.Evolving
  33. public class DataChecksum implements Checksum {
  34. // Misc constants
  35. public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len
  36. // checksum types
  37. public static final int CHECKSUM_NULL = 0;
  38. public static final int CHECKSUM_CRC32 = 1;
  39. public static final int CHECKSUM_CRC32C = 2;
  40. private static final int CHECKSUM_NULL_SIZE = 0;
  41. private static final int CHECKSUM_CRC32_SIZE = 4;
  42. private static final int CHECKSUM_CRC32C_SIZE = 4;
  43. public static DataChecksum newDataChecksum( int type, int bytesPerChecksum ) {
  44. if ( bytesPerChecksum <= 0 ) {
  45. return null;
  46. }
  47. switch ( type ) {
  48. case CHECKSUM_NULL :
  49. return new DataChecksum( CHECKSUM_NULL, new ChecksumNull(),
  50. CHECKSUM_NULL_SIZE, bytesPerChecksum );
  51. case CHECKSUM_CRC32 :
  52. return new DataChecksum( CHECKSUM_CRC32, new PureJavaCrc32(),
  53. CHECKSUM_CRC32_SIZE, bytesPerChecksum );
  54. case CHECKSUM_CRC32C:
  55. return new DataChecksum( CHECKSUM_CRC32C, new PureJavaCrc32C(),
  56. CHECKSUM_CRC32C_SIZE, bytesPerChecksum);
  57. default:
  58. return null;
  59. }
  60. }
  61. /**
  62. * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
  63. * @return DataChecksum of the type in the array or null in case of an error.
  64. */
  65. public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
  66. if ( offset < 0 || bytes.length < offset + HEADER_LEN ) {
  67. return null;
  68. }
  69. // like readInt():
  70. int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) |
  71. ( (bytes[offset+2] & 0xff) << 16 ) |
  72. ( (bytes[offset+3] & 0xff) << 8 ) |
  73. ( (bytes[offset+4] & 0xff) );
  74. return newDataChecksum( bytes[0], bytesPerChecksum );
  75. }
  76. /**
  77. * This constructucts a DataChecksum by reading HEADER_LEN bytes from
  78. * input stream <i>in</i>
  79. */
  80. public static DataChecksum newDataChecksum( DataInputStream in )
  81. throws IOException {
  82. int type = in.readByte();
  83. int bpc = in.readInt();
  84. DataChecksum summer = newDataChecksum( type, bpc );
  85. if ( summer == null ) {
  86. throw new IOException( "Could not create DataChecksum of type " +
  87. type + " with bytesPerChecksum " + bpc );
  88. }
  89. return summer;
  90. }
  91. /**
  92. * Writes the checksum header to the output stream <i>out</i>.
  93. */
  94. public void writeHeader( DataOutputStream out )
  95. throws IOException {
  96. out.writeByte( type );
  97. out.writeInt( bytesPerChecksum );
  98. }
  99. public byte[] getHeader() {
  100. byte[] header = new byte[DataChecksum.HEADER_LEN];
  101. header[0] = (byte) (type & 0xff);
  102. // Writing in buffer just like DataOutput.WriteInt()
  103. header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
  104. header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
  105. header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
  106. header[1+3] = (byte) (bytesPerChecksum & 0xff);
  107. return header;
  108. }
  109. /**
  110. * Writes the current checksum to the stream.
  111. * If <i>reset</i> is true, then resets the checksum.
  112. * @return number of bytes written. Will be equal to getChecksumSize();
  113. */
  114. public int writeValue( DataOutputStream out, boolean reset )
  115. throws IOException {
  116. if ( size <= 0 ) {
  117. return 0;
  118. }
  119. if ( size == 4 ) {
  120. out.writeInt( (int) summer.getValue() );
  121. } else {
  122. throw new IOException( "Unknown Checksum " + type );
  123. }
  124. if ( reset ) {
  125. reset();
  126. }
  127. return size;
  128. }
  129. /**
  130. * Writes the current checksum to a buffer.
  131. * If <i>reset</i> is true, then resets the checksum.
  132. * @return number of bytes written. Will be equal to getChecksumSize();
  133. */
  134. public int writeValue( byte[] buf, int offset, boolean reset )
  135. throws IOException {
  136. if ( size <= 0 ) {
  137. return 0;
  138. }
  139. if ( size == 4 ) {
  140. int checksum = (int) summer.getValue();
  141. buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
  142. buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
  143. buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
  144. buf[offset+3] = (byte) (checksum & 0xff);
  145. } else {
  146. throw new IOException( "Unknown Checksum " + type );
  147. }
  148. if ( reset ) {
  149. reset();
  150. }
  151. return size;
  152. }
  153. /**
  154. * Compares the checksum located at buf[offset] with the current checksum.
  155. * @return true if the checksum matches and false otherwise.
  156. */
  157. public boolean compare( byte buf[], int offset ) {
  158. if ( size == 4 ) {
  159. int checksum = ( (buf[offset+0] & 0xff) << 24 ) |
  160. ( (buf[offset+1] & 0xff) << 16 ) |
  161. ( (buf[offset+2] & 0xff) << 8 ) |
  162. ( (buf[offset+3] & 0xff) );
  163. return checksum == (int) summer.getValue();
  164. }
  165. return size == 0;
  166. }
  167. private final int type;
  168. private final int size;
  169. private final Checksum summer;
  170. private final int bytesPerChecksum;
  171. private int inSum = 0;
  172. private DataChecksum( int checksumType, Checksum checksum,
  173. int sumSize, int chunkSize ) {
  174. type = checksumType;
  175. summer = checksum;
  176. size = sumSize;
  177. bytesPerChecksum = chunkSize;
  178. }
  179. // Accessors
  180. public int getChecksumType() {
  181. return type;
  182. }
  183. public int getChecksumSize() {
  184. return size;
  185. }
  186. public int getBytesPerChecksum() {
  187. return bytesPerChecksum;
  188. }
  189. public int getNumBytesInSum() {
  190. return inSum;
  191. }
  192. public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
  193. static public int getChecksumHeaderSize() {
  194. return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
  195. }
  196. //Checksum Interface. Just a wrapper around member summer.
  197. public long getValue() {
  198. return summer.getValue();
  199. }
  200. public void reset() {
  201. summer.reset();
  202. inSum = 0;
  203. }
  204. public void update( byte[] b, int off, int len ) {
  205. if ( len > 0 ) {
  206. summer.update( b, off, len );
  207. inSum += len;
  208. }
  209. }
  210. public void update( int b ) {
  211. summer.update( b );
  212. inSum += 1;
  213. }
  214. /**
  215. * Verify that the given checksums match the given data.
  216. *
  217. * The 'mark' of the ByteBuffer parameters may be modified by this function,.
  218. * but the position is maintained.
  219. *
  220. * @param data the DirectByteBuffer pointing to the data to verify.
  221. * @param checksums the DirectByteBuffer pointing to a series of stored
  222. * checksums
  223. * @param fileName the name of the file being read, for error-reporting
  224. * @param basePos the file position to which the start of 'data' corresponds
  225. * @throws ChecksumException if the checksums do not match
  226. */
  227. public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
  228. String fileName, long basePos)
  229. throws ChecksumException {
  230. if (size == 0) return;
  231. if (data.hasArray() && checksums.hasArray()) {
  232. verifyChunkedSums(
  233. data.array(), data.arrayOffset() + data.position(), data.remaining(),
  234. checksums.array(), checksums.arrayOffset() + checksums.position(),
  235. fileName, basePos);
  236. return;
  237. }
  238. if (NativeCrc32.isAvailable()) {
  239. NativeCrc32.verifyChunkedSums(bytesPerChecksum, type, checksums, data,
  240. fileName, basePos);
  241. return;
  242. }
  243. int startDataPos = data.position();
  244. data.mark();
  245. checksums.mark();
  246. try {
  247. byte[] buf = new byte[bytesPerChecksum];
  248. byte[] sum = new byte[size];
  249. while (data.remaining() > 0) {
  250. int n = Math.min(data.remaining(), bytesPerChecksum);
  251. checksums.get(sum);
  252. data.get(buf, 0, n);
  253. summer.reset();
  254. summer.update(buf, 0, n);
  255. int calculated = (int)summer.getValue();
  256. int stored = (sum[0] << 24 & 0xff000000) |
  257. (sum[1] << 16 & 0xff0000) |
  258. (sum[2] << 8 & 0xff00) |
  259. sum[3] & 0xff;
  260. if (calculated != stored) {
  261. long errPos = basePos + data.position() - startDataPos - n;
  262. throw new ChecksumException(
  263. "Checksum error: "+ fileName + " at "+ errPos +
  264. " exp: " + stored + " got: " + calculated, errPos);
  265. }
  266. }
  267. } finally {
  268. data.reset();
  269. checksums.reset();
  270. }
  271. }
  272. /**
  273. * Implementation of chunked verification specifically on byte arrays. This
  274. * is to avoid the copy when dealing with ByteBuffers that have array backing.
  275. */
  276. private void verifyChunkedSums(
  277. byte[] data, int dataOff, int dataLen,
  278. byte[] checksums, int checksumsOff, String fileName,
  279. long basePos) throws ChecksumException {
  280. int remaining = dataLen;
  281. int dataPos = 0;
  282. while (remaining > 0) {
  283. int n = Math.min(remaining, bytesPerChecksum);
  284. summer.reset();
  285. summer.update(data, dataOff + dataPos, n);
  286. dataPos += n;
  287. remaining -= n;
  288. int calculated = (int)summer.getValue();
  289. int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
  290. (checksums[checksumsOff + 1] << 16 & 0xff0000) |
  291. (checksums[checksumsOff + 2] << 8 & 0xff00) |
  292. checksums[checksumsOff + 3] & 0xff;
  293. checksumsOff += 4;
  294. if (calculated != stored) {
  295. long errPos = basePos + dataPos - n;
  296. throw new ChecksumException(
  297. "Checksum error: "+ fileName + " at "+ errPos +
  298. " exp: " + stored + " got: " + calculated, errPos);
  299. }
  300. }
  301. }
  302. /**
  303. * Calculate checksums for the given data.
  304. *
  305. * The 'mark' of the ByteBuffer parameters may be modified by this function,
  306. * but the position is maintained.
  307. *
  308. * @param data the DirectByteBuffer pointing to the data to checksum.
  309. * @param checksums the DirectByteBuffer into which checksums will be
  310. * stored. Enough space must be available in this
  311. * buffer to put the checksums.
  312. */
  313. public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
  314. if (size == 0) return;
  315. if (data.hasArray() && checksums.hasArray()) {
  316. calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
  317. checksums.array(), checksums.arrayOffset() + checksums.position());
  318. return;
  319. }
  320. data.mark();
  321. checksums.mark();
  322. try {
  323. byte[] buf = new byte[bytesPerChecksum];
  324. while (data.remaining() > 0) {
  325. int n = Math.min(data.remaining(), bytesPerChecksum);
  326. data.get(buf, 0, n);
  327. summer.reset();
  328. summer.update(buf, 0, n);
  329. checksums.putInt((int)summer.getValue());
  330. }
  331. } finally {
  332. data.reset();
  333. checksums.reset();
  334. }
  335. }
  336. /**
  337. * Implementation of chunked calculation specifically on byte arrays. This
  338. * is to avoid the copy when dealing with ByteBuffers that have array backing.
  339. */
  340. private void calculateChunkedSums(
  341. byte[] data, int dataOffset, int dataLength,
  342. byte[] sums, int sumsOffset) {
  343. int remaining = dataLength;
  344. while (remaining > 0) {
  345. int n = Math.min(remaining, bytesPerChecksum);
  346. summer.reset();
  347. summer.update(data, dataOffset, n);
  348. dataOffset += n;
  349. remaining -= n;
  350. long calculated = summer.getValue();
  351. sums[sumsOffset++] = (byte) (calculated >> 24);
  352. sums[sumsOffset++] = (byte) (calculated >> 16);
  353. sums[sumsOffset++] = (byte) (calculated >> 8);
  354. sums[sumsOffset++] = (byte) (calculated);
  355. }
  356. }
  357. /**
  358. * This just provides a dummy implimentation for Checksum class
  359. * This is used when there is no checksum available or required for
  360. * data
  361. */
  362. static class ChecksumNull implements Checksum {
  363. public ChecksumNull() {}
  364. //Dummy interface
  365. public long getValue() { return 0; }
  366. public void reset() {}
  367. public void update(byte[] b, int off, int len) {}
  368. public void update(int b) {}
  369. };
  370. }