PageRenderTime 52ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/arch/src/org/arch/misc/crypto/base64/Base64.java

http://snova.googlecode.com/
Java | 771 lines | 362 code | 88 blank | 321 comment | 128 complexity | 98501a0c521127b78ea366b234a49ca3 MD5 | raw file
Possible License(s): GPL-3.0
  1. package org.arch.misc.crypto.base64;
  2. import java.util.Arrays;
  3. /**
  4. * A very fast and memory efficient class to encode and decode to and from
  5. * BASE64 in full accordance with RFC 2045.<br>
  6. * <br>
  7. * On Windows XP sp1 with 1.4.2_04 and later ;), this encoder and decoder is
  8. * about 10 times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast
  9. * on larger arrays (10000 - 1000000 bytes) compared to
  10. * <code>sun.misc.Encoder()/Decoder()</code>.<br>
  11. * <br>
  12. *
  13. * On byte arrays the encoder is about 20% faster than Jakarta Commons Base64
  14. * Codec for encode and about 50% faster for decoding large arrays. This
  15. * implementation is about twice as fast on very small arrays (&lt 30 bytes). If
  16. * source/destination is a <code>String</code> this version is about three times
  17. * as fast due to the fact that the Commons Codec result has to be recoded to a
  18. * <code>String</code> from <code>byte[]</code>, which is very expensive.<br>
  19. * <br>
  20. *
  21. * This encode/decode algorithm doesn't create any temporary arrays as many
  22. * other codecs do, it only allocates the resulting array. This produces less
  23. * garbage and it is possible to handle arrays twice as large as algorithms that
  24. * create a temporary array. (E.g. Jakarta Commons Codec). It is unknown whether
  25. * Sun's <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays but
  26. * since performance is quite low it probably does.<br>
  27. * <br>
  28. *
  29. * The encoder produces the same output as the Sun one except that the Sun's
  30. * encoder appends a trailing line separator if the last character isn't a pad.
  31. * Unclear why but it only adds to the length and is probably a side effect.
  32. * Both are in conformance with RFC 2045 though.<br>
  33. * Commons codec seem to always att a trailing line separator.<br>
  34. * <br>
  35. *
  36. * <b>Note!</b> The encode/decode method pairs (types) come in three versions
  37. * with the <b>exact</b> same algorithm and thus a lot of code redundancy. This
  38. * is to not create any temporary arrays for transcoding to/from different
  39. * format types. The methods not used can simply be commented out.<br>
  40. * <br>
  41. *
  42. * There is also a "fast" version of all decode methods that works the same way
  43. * as the normal ones, but har a few demands on the decoded input. Normally
  44. * though, these fast verions should be used if the source if the input is known
  45. * and it hasn't bee tampered with.<br>
  46. * <br>
  47. *
  48. * If you find the code useful or you find a bug, please send me a note at
  49. * base64 @ miginfocom . com.
  50. *
  51. * Licence (BSD): ==============
  52. *
  53. * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom . com)
  54. * All rights reserved.
  55. *
  56. * Redistribution and use in source and binary forms, with or without
  57. * modification, are permitted provided that the following conditions are met:
  58. * Redistributions of source code must retain the above copyright notice, this
  59. * list of conditions and the following disclaimer. Redistributions in binary
  60. * form must reproduce the above copyright notice, this list of conditions and
  61. * the following disclaimer in the documentation and/or other materials provided
  62. * with the distribution. Neither the name of the MiG InfoCom AB nor the names
  63. * of its contributors may be used to endorse or promote products derived from
  64. * this software without specific prior written permission.
  65. *
  66. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  67. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  68. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  69. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  70. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  71. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  72. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  73. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  74. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  75. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  76. * POSSIBILITY OF SUCH DAMAGE.
  77. *
  78. * @version 2.2
  79. * @author Mikael Grev Date: 2004-aug-02 Time: 11:31:11
  80. */
  81. public class Base64
  82. {
  83. private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
  84. .toCharArray();
  85. private static final int[] IA = new int[256];
  86. static
  87. {
  88. Arrays.fill(IA, -1);
  89. for (int i = 0, iS = CA.length; i < iS; i++)
  90. IA[CA[i]] = i;
  91. IA['='] = 0;
  92. }
  93. // ****************************************************************************************
  94. // * char[] version
  95. // ****************************************************************************************
  96. /**
  97. * Encodes a raw byte array into a BASE64 <code>char[]</code> representation
  98. * i accordance with RFC 2045.
  99. *
  100. * @param sArr
  101. * The bytes to convert. If <code>null</code> or length 0 an
  102. * empty array will be returned.
  103. * @param lineSep
  104. * Optional "\r\n" after 76 characters, unless end of file.<br>
  105. * No line separator will be in breach of RFC 2045 which
  106. * specifies max 76 per line but will be a little faster.
  107. * @return A BASE64 encoded array. Never <code>null</code>.
  108. */
  109. public final static char[] encodeToChar(byte[] sArr, boolean lineSep)
  110. {
  111. // Check special case
  112. int sLen = sArr != null ? sArr.length : 0;
  113. if (sLen == 0)
  114. return new char[0];
  115. int eLen = (sLen / 3) * 3; // Length of even 24-bits.
  116. int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
  117. int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of
  118. // returned
  119. // array
  120. char[] dArr = new char[dLen];
  121. // Encode even 24-bits
  122. for (int s = 0, d = 0, cc = 0; s < eLen;)
  123. {
  124. // Copy next three bytes into lower 24 bits of int, paying attension
  125. // to sign.
  126. int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8
  127. | (sArr[s++] & 0xff);
  128. // Encode the int into four chars
  129. dArr[d++] = CA[(i >>> 18) & 0x3f];
  130. dArr[d++] = CA[(i >>> 12) & 0x3f];
  131. dArr[d++] = CA[(i >>> 6) & 0x3f];
  132. dArr[d++] = CA[i & 0x3f];
  133. // Add optional line separator
  134. if (lineSep && ++cc == 19 && d < dLen - 2)
  135. {
  136. dArr[d++] = '\r';
  137. dArr[d++] = '\n';
  138. cc = 0;
  139. }
  140. }
  141. // Pad and encode last bits if source isn't even 24 bits.
  142. int left = sLen - eLen; // 0 - 2.
  143. if (left > 0)
  144. {
  145. // Prepare the int
  146. int i = ((sArr[eLen] & 0xff) << 10)
  147. | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
  148. // Set last four chars
  149. dArr[dLen - 4] = CA[i >> 12];
  150. dArr[dLen - 3] = CA[(i >>> 6) & 0x3f];
  151. dArr[dLen - 2] = left == 2 ? CA[i & 0x3f] : '=';
  152. dArr[dLen - 1] = '=';
  153. }
  154. return dArr;
  155. }
  156. public final static char[] encodeToChar(byte[] sArr, int off, int len,
  157. boolean lineSep)
  158. {
  159. // Check special case
  160. int sLen = (sArr != null && len > 0) ? len : 0;
  161. if (sLen == 0)
  162. return new char[0];
  163. int eLen = (sLen / 3) * 3; // Length of even 24-bits.
  164. int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
  165. int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of
  166. // returned
  167. // array
  168. char[] dArr = new char[dLen];
  169. // Encode even 24-bits
  170. for (int s = 0, d = 0, cc = 0; s < eLen;)
  171. {
  172. // Copy next three bytes into lower 24 bits of int, paying attension
  173. // to sign.
  174. int i = (sArr[off + s++] & 0xff) << 16
  175. | (sArr[off + s++] & 0xff) << 8 | (sArr[off + s++] & 0xff);
  176. // Encode the int into four chars
  177. dArr[d++] = CA[(i >>> 18) & 0x3f];
  178. dArr[d++] = CA[(i >>> 12) & 0x3f];
  179. dArr[d++] = CA[(i >>> 6) & 0x3f];
  180. dArr[d++] = CA[i & 0x3f];
  181. // Add optional line separator
  182. if (lineSep && ++cc == 19 && d < dLen - 2)
  183. {
  184. dArr[d++] = '\r';
  185. dArr[d++] = '\n';
  186. cc = 0;
  187. }
  188. }
  189. // Pad and encode last bits if source isn't even 24 bits.
  190. int left = sLen - eLen; // 0 - 2.
  191. if (left > 0)
  192. {
  193. // Prepare the int
  194. int i = ((sArr[off + eLen] & 0xff) << 10)
  195. | (left == 2 ? ((sArr[off + sLen - 1] & 0xff) << 2) : 0);
  196. // Set last four chars
  197. dArr[dLen - 4] = CA[i >> 12];
  198. dArr[dLen - 3] = CA[(i >>> 6) & 0x3f];
  199. dArr[dLen - 2] = left == 2 ? CA[i & 0x3f] : '=';
  200. dArr[dLen - 1] = '=';
  201. }
  202. return dArr;
  203. }
  204. /**
  205. * Decodes a BASE64 encoded char array. All illegal characters will be
  206. * ignored and can handle both arrays with and without line separators.
  207. *
  208. * @param sArr
  209. * The source array. <code>null</code> or length 0 will return an
  210. * empty array.
  211. * @return The decoded array of bytes. May be of length 0. Will be
  212. * <code>null</code> if the legal characters (including '=') isn't
  213. * divideable by 4. (I.e. definitely corrupted).
  214. */
  215. public final static byte[] decode(char[] sArr)
  216. {
  217. // Check special case
  218. int sLen = sArr != null ? sArr.length : 0;
  219. if (sLen == 0)
  220. return new byte[0];
  221. // Count illegal characters (including '\r', '\n') to know what size the
  222. // returned array will be,
  223. // so we don't have to reallocate & copy it later.
  224. int sepCnt = 0; // Number of separator characters. (Actually illegal
  225. // characters, but that's a bonus...)
  226. for (int i = 0; i < sLen; i++)
  227. // If input is "pure" (I.e. no line separators or illegal chars)
  228. // base64 this loop can be commented out.
  229. if (IA[sArr[i]] < 0)
  230. sepCnt++;
  231. // Check so that legal chars (including '=') are evenly divideable by 4
  232. // as specified in RFC 2045.
  233. if ((sLen - sepCnt) % 4 != 0)
  234. return null;
  235. int pad = 0;
  236. for (int i = sLen; i > 1 && IA[sArr[--i]] <= 0;)
  237. if (sArr[i] == '=')
  238. pad++;
  239. int len = ((sLen - sepCnt) * 6 >> 3) - pad;
  240. byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
  241. for (int s = 0, d = 0; d < len;)
  242. {
  243. // Assemble three bytes into an int from four "valid" characters.
  244. int i = 0;
  245. for (int j = 0; j < 4; j++)
  246. { // j only increased if a valid char was found.
  247. int c = IA[sArr[s++]];
  248. if (c >= 0)
  249. i |= c << (18 - j * 6);
  250. else
  251. j--;
  252. }
  253. // Add the bytes
  254. dArr[d++] = (byte) (i >> 16);
  255. if (d < len)
  256. {
  257. dArr[d++] = (byte) (i >> 8);
  258. if (d < len)
  259. dArr[d++] = (byte) i;
  260. }
  261. }
  262. return dArr;
  263. }
  264. /**
  265. * Decodes a BASE64 encoded char array that is known to be resonably well
  266. * formatted. The method is about twice as fast as {@link #decode(char[])}.
  267. * The preconditions are:<br>
  268. * + The array must have a line length of 76 chars OR no line separators at
  269. * all (one line).<br>
  270. * + Line separator must be "\r\n", as specified in RFC 2045 + The array
  271. * must not contain illegal characters within the encoded string<br>
  272. * + The array CAN have illegal characters at the beginning and end, those
  273. * will be dealt with appropriately.<br>
  274. *
  275. * @param sArr
  276. * The source array. Length 0 will return an empty array.
  277. * <code>null</code> will throw an exception.
  278. * @return The decoded array of bytes. May be of length 0.
  279. */
  280. public final static byte[] decodeFast(char[] sArr)
  281. {
  282. // Check special case
  283. int sLen = sArr.length;
  284. if (sLen == 0)
  285. return new byte[0];
  286. int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
  287. // Trim illegal chars from start
  288. while (sIx < eIx && IA[sArr[sIx]] < 0)
  289. sIx++;
  290. // Trim illegal chars from end
  291. while (eIx > 0 && IA[sArr[eIx]] < 0)
  292. eIx--;
  293. // get the padding count (=) (0, 1 or 2)
  294. int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count
  295. // '='
  296. // at
  297. // end.
  298. int cCnt = eIx - sIx + 1; // Content count including possible separators
  299. int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
  300. int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded
  301. // bytes
  302. byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
  303. // Decode all but the last 0 - 2 bytes.
  304. int d = 0;
  305. for (int cc = 0, eLen = (len / 3) * 3; d < eLen;)
  306. {
  307. // Assemble three bytes into an int from four "valid" characters.
  308. int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12
  309. | IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
  310. // Add the bytes
  311. dArr[d++] = (byte) (i >> 16);
  312. dArr[d++] = (byte) (i >> 8);
  313. dArr[d++] = (byte) i;
  314. // If line separator, jump over it.
  315. if (sepCnt > 0 && ++cc == 19)
  316. {
  317. sIx += 2;
  318. cc = 0;
  319. }
  320. }
  321. if (d < len)
  322. {
  323. // Decode last 1-3 bytes (incl '=') into 1-3 bytes
  324. int i = 0;
  325. for (int j = 0; sIx <= eIx - pad; j++)
  326. i |= IA[sArr[sIx++]] << (18 - j * 6);
  327. for (int r = 16; d < len; r -= 8)
  328. dArr[d++] = (byte) (i >> r);
  329. }
  330. return dArr;
  331. }
  332. // ****************************************************************************************
  333. // * byte[] version
  334. // ****************************************************************************************
  335. /**
  336. * Encodes a raw byte array into a BASE64 <code>byte[]</code> representation
  337. * i accordance with RFC 2045.
  338. *
  339. * @param sArr
  340. * The bytes to convert. If <code>null</code> or length 0 an
  341. * empty array will be returned.
  342. * @param lineSep
  343. * Optional "\r\n" after 76 characters, unless end of file.<br>
  344. * No line separator will be in breach of RFC 2045 which
  345. * specifies max 76 per line but will be a little faster.
  346. * @return A BASE64 encoded array. Never <code>null</code>.
  347. */
  348. public final static byte[] encodeToByte(byte[] sArr, boolean lineSep)
  349. {
  350. // Check special case
  351. int sLen = sArr != null ? sArr.length : 0;
  352. if (sLen == 0)
  353. return new byte[0];
  354. int eLen = (sLen / 3) * 3; // Length of even 24-bits.
  355. int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
  356. int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of
  357. // returned
  358. // array
  359. byte[] dArr = new byte[dLen];
  360. // Encode even 24-bits
  361. for (int s = 0, d = 0, cc = 0; s < eLen;)
  362. {
  363. // Copy next three bytes into lower 24 bits of int, paying attension
  364. // to sign.
  365. int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8
  366. | (sArr[s++] & 0xff);
  367. // Encode the int into four chars
  368. dArr[d++] = (byte) CA[(i >>> 18) & 0x3f];
  369. dArr[d++] = (byte) CA[(i >>> 12) & 0x3f];
  370. dArr[d++] = (byte) CA[(i >>> 6) & 0x3f];
  371. dArr[d++] = (byte) CA[i & 0x3f];
  372. // Add optional line separator
  373. if (lineSep && ++cc == 19 && d < dLen - 2)
  374. {
  375. dArr[d++] = '\r';
  376. dArr[d++] = '\n';
  377. cc = 0;
  378. }
  379. }
  380. // Pad and encode last bits if source isn't an even 24 bits.
  381. int left = sLen - eLen; // 0 - 2.
  382. if (left > 0)
  383. {
  384. // Prepare the int
  385. int i = ((sArr[eLen] & 0xff) << 10)
  386. | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
  387. // Set last four chars
  388. dArr[dLen - 4] = (byte) CA[i >> 12];
  389. dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f];
  390. dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '=';
  391. dArr[dLen - 1] = '=';
  392. }
  393. return dArr;
  394. }
  395. /**
  396. * Decodes a BASE64 encoded byte array. All illegal characters will be
  397. * ignored and can handle both arrays with and without line separators.
  398. *
  399. * @param sArr
  400. * The source array. Length 0 will return an empty array.
  401. * <code>null</code> will throw an exception.
  402. * @return The decoded array of bytes. May be of length 0. Will be
  403. * <code>null</code> if the legal characters (including '=') isn't
  404. * divideable by 4. (I.e. definitely corrupted).
  405. */
  406. public final static byte[] decode(byte[] sArr)
  407. {
  408. // Check special case
  409. int sLen = sArr.length;
  410. // Count illegal characters (including '\r', '\n') to know what size the
  411. // returned array will be,
  412. // so we don't have to reallocate & copy it later.
  413. int sepCnt = 0; // Number of separator characters. (Actually illegal
  414. // characters, but that's a bonus...)
  415. for (int i = 0; i < sLen; i++)
  416. // If input is "pure" (I.e. no line separators or illegal chars)
  417. // base64 this loop can be commented out.
  418. if (IA[sArr[i] & 0xff] < 0)
  419. sepCnt++;
  420. // Check so that legal chars (including '=') are evenly divideable by 4
  421. // as specified in RFC 2045.
  422. if ((sLen - sepCnt) % 4 != 0)
  423. return null;
  424. int pad = 0;
  425. for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;)
  426. if (sArr[i] == '=')
  427. pad++;
  428. int len = ((sLen - sepCnt) * 6 >> 3) - pad;
  429. byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
  430. for (int s = 0, d = 0; d < len;)
  431. {
  432. // Assemble three bytes into an int from four "valid" characters.
  433. int i = 0;
  434. for (int j = 0; j < 4; j++)
  435. { // j only increased if a valid char was found.
  436. int c = IA[sArr[s++] & 0xff];
  437. if (c >= 0)
  438. i |= c << (18 - j * 6);
  439. else
  440. j--;
  441. }
  442. // Add the bytes
  443. dArr[d++] = (byte) (i >> 16);
  444. if (d < len)
  445. {
  446. dArr[d++] = (byte) (i >> 8);
  447. if (d < len)
  448. dArr[d++] = (byte) i;
  449. }
  450. }
  451. return dArr;
  452. }
  453. /**
  454. * Decodes a BASE64 encoded byte array that is known to be resonably well
  455. * formatted. The method is about twice as fast as {@link #decode(byte[])}.
  456. * The preconditions are:<br>
  457. * + The array must have a line length of 76 chars OR no line separators at
  458. * all (one line).<br>
  459. * + Line separator must be "\r\n", as specified in RFC 2045 + The array
  460. * must not contain illegal characters within the encoded string<br>
  461. * + The array CAN have illegal characters at the beginning and end, those
  462. * will be dealt with appropriately.<br>
  463. *
  464. * @param sArr
  465. * The source array. Length 0 will return an empty array.
  466. * <code>null</code> will throw an exception.
  467. * @return The decoded array of bytes. May be of length 0.
  468. */
  469. public final static byte[] decodeFast(byte[] sArr)
  470. {
  471. // Check special case
  472. int sLen = sArr.length;
  473. if (sLen == 0)
  474. return new byte[0];
  475. int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
  476. // Trim illegal chars from start
  477. while (sIx < eIx && IA[sArr[sIx] & 0xff] < 0)
  478. sIx++;
  479. // Trim illegal chars from end
  480. while (eIx > 0 && IA[sArr[eIx] & 0xff] < 0)
  481. eIx--;
  482. // get the padding count (=) (0, 1 or 2)
  483. int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count
  484. // '='
  485. // at
  486. // end.
  487. int cCnt = eIx - sIx + 1; // Content count including possible separators
  488. int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
  489. int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded
  490. // bytes
  491. byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
  492. // Decode all but the last 0 - 2 bytes.
  493. int d = 0;
  494. for (int cc = 0, eLen = (len / 3) * 3; d < eLen;)
  495. {
  496. // Assemble three bytes into an int from four "valid" characters.
  497. int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12
  498. | IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
  499. // Add the bytes
  500. dArr[d++] = (byte) (i >> 16);
  501. dArr[d++] = (byte) (i >> 8);
  502. dArr[d++] = (byte) i;
  503. // If line separator, jump over it.
  504. if (sepCnt > 0 && ++cc == 19)
  505. {
  506. sIx += 2;
  507. cc = 0;
  508. }
  509. }
  510. if (d < len)
  511. {
  512. // Decode last 1-3 bytes (incl '=') into 1-3 bytes
  513. int i = 0;
  514. for (int j = 0; sIx <= eIx - pad; j++)
  515. i |= IA[sArr[sIx++]] << (18 - j * 6);
  516. for (int r = 16; d < len; r -= 8)
  517. dArr[d++] = (byte) (i >> r);
  518. }
  519. return dArr;
  520. }
  521. // ****************************************************************************************
  522. // * String version
  523. // ****************************************************************************************
  524. /**
  525. * Encodes a raw byte array into a BASE64 <code>String</code> representation
  526. * i accordance with RFC 2045.
  527. *
  528. * @param sArr
  529. * The bytes to convert. If <code>null</code> or length 0 an
  530. * empty array will be returned.
  531. * @param lineSep
  532. * Optional "\r\n" after 76 characters, unless end of file.<br>
  533. * No line separator will be in breach of RFC 2045 which
  534. * specifies max 76 per line but will be a little faster.
  535. * @return A BASE64 encoded array. Never <code>null</code>.
  536. */
  537. public final static String encodeToString(byte[] sArr, boolean lineSep)
  538. {
  539. // Reuse char[] since we can't create a String incrementally anyway and
  540. // StringBuffer/Builder would be slower.
  541. return new String(encodeToChar(sArr, lineSep));
  542. }
  543. public final static String encodeToString(byte[] sArr, int off, int len,boolean lineSep)
  544. {
  545. // Reuse char[] since we can't create a String incrementally anyway and
  546. // StringBuffer/Builder would be slower.
  547. return new String(encodeToChar(sArr,off, len, lineSep));
  548. }
  549. /**
  550. * Decodes a BASE64 encoded <code>String</code>. All illegal characters will
  551. * be ignored and can handle both strings with and without line separators.<br>
  552. * <b>Note!</b> It can be up to about 2x the speed to call
  553. * <code>decode(str.toCharArray())</code> instead. That will create a
  554. * temporary array though. This version will use <code>str.charAt(i)</code>
  555. * to iterate the string.
  556. *
  557. * @param str
  558. * The source string. <code>null</code> or length 0 will return
  559. * an empty array.
  560. * @return The decoded array of bytes. May be of length 0. Will be
  561. * <code>null</code> if the legal characters (including '=') isn't
  562. * divideable by 4. (I.e. definitely corrupted).
  563. */
  564. public final static byte[] decode(String str)
  565. {
  566. // Check special case
  567. int sLen = str != null ? str.length() : 0;
  568. if (sLen == 0)
  569. return new byte[0];
  570. // Count illegal characters (including '\r', '\n') to know what size the
  571. // returned array will be,
  572. // so we don't have to reallocate & copy it later.
  573. int sepCnt = 0; // Number of separator characters. (Actually illegal
  574. // characters, but that's a bonus...)
  575. for (int i = 0; i < sLen; i++)
  576. // If input is "pure" (I.e. no line separators or illegal chars)
  577. // base64 this loop can be commented out.
  578. if (IA[str.charAt(i)] < 0)
  579. sepCnt++;
  580. // Check so that legal chars (including '=') are evenly divideable by 4
  581. // as specified in RFC 2045.
  582. if ((sLen - sepCnt) % 4 != 0)
  583. return null;
  584. // Count '=' at end
  585. int pad = 0;
  586. for (int i = sLen; i > 1 && IA[str.charAt(--i)] <= 0;)
  587. if (str.charAt(i) == '=')
  588. pad++;
  589. int len = ((sLen - sepCnt) * 6 >> 3) - pad;
  590. byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
  591. for (int s = 0, d = 0; d < len;)
  592. {
  593. // Assemble three bytes into an int from four "valid" characters.
  594. int i = 0;
  595. for (int j = 0; j < 4; j++)
  596. { // j only increased if a valid char was found.
  597. int c = IA[str.charAt(s++)];
  598. if (c >= 0)
  599. i |= c << (18 - j * 6);
  600. else
  601. j--;
  602. }
  603. // Add the bytes
  604. dArr[d++] = (byte) (i >> 16);
  605. if (d < len)
  606. {
  607. dArr[d++] = (byte) (i >> 8);
  608. if (d < len)
  609. dArr[d++] = (byte) i;
  610. }
  611. }
  612. return dArr;
  613. }
  614. /**
  615. * Decodes a BASE64 encoded string that is known to be resonably well
  616. * formatted. The method is about twice as fast as {@link #decode(String)}.
  617. * The preconditions are:<br>
  618. * + The array must have a line length of 76 chars OR no line separators at
  619. * all (one line).<br>
  620. * + Line separator must be "\r\n", as specified in RFC 2045 + The array
  621. * must not contain illegal characters within the encoded string<br>
  622. * + The array CAN have illegal characters at the beginning and end, those
  623. * will be dealt with appropriately.<br>
  624. *
  625. * @param s
  626. * The source string. Length 0 will return an empty array.
  627. * <code>null</code> will throw an exception.
  628. * @return The decoded array of bytes. May be of length 0.
  629. */
  630. public final static byte[] decodeFast(String s)
  631. {
  632. // Check special case
  633. int sLen = s.length();
  634. if (sLen == 0)
  635. return new byte[0];
  636. int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
  637. // Trim illegal chars from start
  638. while (sIx < eIx && IA[s.charAt(sIx) & 0xff] < 0)
  639. sIx++;
  640. // Trim illegal chars from end
  641. while (eIx > 0 && IA[s.charAt(eIx) & 0xff] < 0)
  642. eIx--;
  643. // get the padding count (=) (0, 1 or 2)
  644. int pad = s.charAt(eIx) == '=' ? (s.charAt(eIx - 1) == '=' ? 2 : 1) : 0; // Count
  645. // '='
  646. // at
  647. // end.
  648. int cCnt = eIx - sIx + 1; // Content count including possible separators
  649. int sepCnt = sLen > 76 ? (s.charAt(76) == '\r' ? cCnt / 78 : 0) << 1
  650. : 0;
  651. int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded
  652. // bytes
  653. byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
  654. // Decode all but the last 0 - 2 bytes.
  655. int d = 0;
  656. for (int cc = 0, eLen = (len / 3) * 3; d < eLen;)
  657. {
  658. // Assemble three bytes into an int from four "valid" characters.
  659. int i = IA[s.charAt(sIx++)] << 18 | IA[s.charAt(sIx++)] << 12
  660. | IA[s.charAt(sIx++)] << 6 | IA[s.charAt(sIx++)];
  661. // Add the bytes
  662. dArr[d++] = (byte) (i >> 16);
  663. dArr[d++] = (byte) (i >> 8);
  664. dArr[d++] = (byte) i;
  665. // If line separator, jump over it.
  666. if (sepCnt > 0 && ++cc == 19)
  667. {
  668. sIx += 2;
  669. cc = 0;
  670. }
  671. }
  672. if (d < len)
  673. {
  674. // Decode last 1-3 bytes (incl '=') into 1-3 bytes
  675. int i = 0;
  676. for (int j = 0; sIx <= eIx - pad; j++)
  677. i |= IA[s.charAt(sIx++)] << (18 - j * 6);
  678. for (int r = 16; d < len; r -= 8)
  679. dArr[d++] = (byte) (i >> r);
  680. }
  681. return dArr;
  682. }
  683. }