PageRenderTime 52ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/jandcode-utils/src/jandcode/utils/impl/Base64Internal.java

https://bitbucket.org/gkraser/jandcode-core
Java | 516 lines | 277 code | 81 blank | 158 comment | 116 complexity | f704a0cf9939f9fa079cb2a51f410c9a MD5 | raw file
  1. package jandcode.utils.impl;
  2. import java.util.*;
  3. public class Base64Internal {
  4. private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); //NON-NLS
  5. private static final int[] IA = new int[256];
  6. static {
  7. Arrays.fill(IA, -1);
  8. for (int i = 0, iS = CA.length; i < iS; i++)
  9. IA[CA[i]] = i;
  10. IA['='] = 0;
  11. }
  12. // ****************************************************************************************
  13. // * char[] version
  14. // ****************************************************************************************
  15. /**
  16. * Encodes a raw byte array into a BASE64 <code>char[]</code> representation i accordance with RFC 2045.
  17. *
  18. * @param sArr The bytes to convert. If <code>null</code> or length 0 an empty array will be returned.
  19. * @param lineSep Optional "\r\n" after 76 characters, unless end of findFile.<br>
  20. * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a
  21. * little faster.
  22. * @return A BASE64 encoded array. Never <code>null</code>.
  23. */
  24. public final static char[] encodeToChar(byte[] sArr, boolean lineSep) {
  25. // Check special case
  26. int sLen = sArr != null ? sArr.length : 0;
  27. if (sLen == 0)
  28. return new char[0];
  29. int eLen = (sLen / 3) * 3; // Length of even 24-bits.
  30. int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
  31. int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array
  32. char[] dArr = new char[dLen];
  33. // Encode even 24-bits
  34. for (int s = 0, d = 0, cc = 0; s < eLen; ) {
  35. // Copy next three bytes into lower 24 bits of int, paying attension to sign.
  36. int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff);
  37. // Encode the int into four chars
  38. dArr[d++] = CA[(i >>> 18) & 0x3f];
  39. dArr[d++] = CA[(i >>> 12) & 0x3f];
  40. dArr[d++] = CA[(i >>> 6) & 0x3f];
  41. dArr[d++] = CA[i & 0x3f];
  42. // Add optional line separator
  43. if (lineSep && ++cc == 19 && d < dLen - 2) {
  44. dArr[d++] = '\r';
  45. dArr[d++] = '\n';
  46. cc = 0;
  47. }
  48. }
  49. // Pad and encode last bits if source isn't even 24 bits.
  50. int left = sLen - eLen; // 0 - 2.
  51. if (left > 0) {
  52. // Prepare the int
  53. int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
  54. // Set last four chars
  55. dArr[dLen - 4] = CA[i >> 12];
  56. dArr[dLen - 3] = CA[(i >>> 6) & 0x3f];
  57. dArr[dLen - 2] = left == 2 ? CA[i & 0x3f] : '=';
  58. dArr[dLen - 1] = '=';
  59. }
  60. return dArr;
  61. }
  62. /**
  63. * Decodes a BASE64 encoded char array. All illegal characters will be ignored and can handle both arrays with
  64. * and without line separators.
  65. *
  66. * @param sArr The source array. <code>null</code> or length 0 will return an empty array.
  67. * @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters
  68. * (including '=') isn't divideable by 4. (I.e. definitely corrupted).
  69. */
  70. public final static byte[] decode(char[] sArr) {
  71. // Check special case
  72. int sLen = sArr != null ? sArr.length : 0;
  73. if (sLen == 0)
  74. return new byte[0];
  75. // Count illegal characters (including '\r', '\n') to know what size the returned array will be,
  76. // so we don't have to reallocate & copy it later.
  77. int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...)
  78. for (int i = 0; i < sLen; i++) // If input is "pure" (I.e. no line separators or illegal chars) base64 this loop can be commented out.
  79. if (IA[sArr[i]] < 0)
  80. sepCnt++;
  81. // Check so that legal chars (including '=') are evenly divideable by 4 as specified in RFC 2045.
  82. if ((sLen - sepCnt) % 4 != 0)
  83. return null;
  84. int pad = 0;
  85. for (int i = sLen; i > 1 && IA[sArr[--i]] <= 0; )
  86. if (sArr[i] == '=')
  87. pad++;
  88. int len = ((sLen - sepCnt) * 6 >> 3) - pad;
  89. byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
  90. for (int s = 0, d = 0; d < len; ) {
  91. // Assemble three bytes into an int from four "valid" characters.
  92. int i = 0;
  93. for (int j = 0; j < 4; j++) { // j only increased if a valid char was found.
  94. int c = IA[sArr[s++]];
  95. if (c >= 0)
  96. i |= c << (18 - j * 6);
  97. else
  98. j--;
  99. }
  100. // Add the bytes
  101. dArr[d++] = (byte) (i >> 16);
  102. if (d < len) {
  103. dArr[d++] = (byte) (i >> 8);
  104. if (d < len)
  105. dArr[d++] = (byte) i;
  106. }
  107. }
  108. return dArr;
  109. }
  110. /**
  111. * Decodes a BASE64 encoded char array that is known to be resonably well formatted. The method is about twice as
  112. * fast as {@link #decode(char[])}. The preconditions are:<br>
  113. * + The array must have a line length of 76 chars OR no line separators at all (one line).<br>
  114. * + Line separator must be "\r\n", as specified in RFC 2045
  115. * + The array must not contain illegal characters within the encoded string<br>
  116. * + The array CAN have illegal characters at the beginning and end, those will be dealt with appropriately.<br>
  117. *
  118. * @param sArr The source array. Length 0 will return an empty array. <code>null</code> will throw an exception.
  119. * @return The decoded array of bytes. May be of length 0.
  120. */
  121. public final static byte[] decodeFast(char[] sArr) {
  122. // Check special case
  123. int sLen = sArr.length;
  124. if (sLen == 0)
  125. return new byte[0];
  126. int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
  127. // Trim illegal chars from start
  128. while (sIx < eIx && IA[sArr[sIx]] < 0)
  129. sIx++;
  130. // Trim illegal chars from end
  131. while (eIx > 0 && IA[sArr[eIx]] < 0)
  132. eIx--;
  133. // get the padding count (=) (0, 1 or 2)
  134. int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count '=' at end.
  135. int cCnt = eIx - sIx + 1; // Content count including possible separators
  136. int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
  137. int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded bytes
  138. byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
  139. // Decode all but the last 0 - 2 bytes.
  140. int d = 0;
  141. for (int cc = 0, eLen = (len / 3) * 3; d < eLen; ) {
  142. // Assemble three bytes into an int from four "valid" characters.
  143. int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12 | IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
  144. // Add the bytes
  145. dArr[d++] = (byte) (i >> 16);
  146. dArr[d++] = (byte) (i >> 8);
  147. dArr[d++] = (byte) i;
  148. // If line separator, jump over it.
  149. if (sepCnt > 0 && ++cc == 19) {
  150. sIx += 2;
  151. cc = 0;
  152. }
  153. }
  154. if (d < len) {
  155. // Decode last 1-3 bytes (incl '=') into 1-3 bytes
  156. int i = 0;
  157. for (int j = 0; sIx <= eIx - pad; j++)
  158. i |= IA[sArr[sIx++]] << (18 - j * 6);
  159. for (int r = 16; d < len; r -= 8)
  160. dArr[d++] = (byte) (i >> r);
  161. }
  162. return dArr;
  163. }
  164. // ****************************************************************************************
  165. // * byte[] version
  166. // ****************************************************************************************
  167. /**
  168. * Encodes a raw byte array into a BASE64 <code>byte[]</code> representation i accordance with RFC 2045.
  169. *
  170. * @param sArr The bytes to convert. If <code>null</code> or length 0 an empty array will be returned.
  171. * @param lineSep Optional "\r\n" after 76 characters, unless end of findFile.<br>
  172. * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a
  173. * little faster.
  174. * @return A BASE64 encoded array. Never <code>null</code>.
  175. */
  176. public final static byte[] encodeToByte(byte[] sArr, boolean lineSep) {
  177. // Check special case
  178. int sLen = sArr != null ? sArr.length : 0;
  179. if (sLen == 0)
  180. return new byte[0];
  181. int eLen = (sLen / 3) * 3; // Length of even 24-bits.
  182. int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
  183. int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array
  184. byte[] dArr = new byte[dLen];
  185. // Encode even 24-bits
  186. for (int s = 0, d = 0, cc = 0; s < eLen; ) {
  187. // Copy next three bytes into lower 24 bits of int, paying attension to sign.
  188. int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff);
  189. // Encode the int into four chars
  190. dArr[d++] = (byte) CA[(i >>> 18) & 0x3f];
  191. dArr[d++] = (byte) CA[(i >>> 12) & 0x3f];
  192. dArr[d++] = (byte) CA[(i >>> 6) & 0x3f];
  193. dArr[d++] = (byte) CA[i & 0x3f];
  194. // Add optional line separator
  195. if (lineSep && ++cc == 19 && d < dLen - 2) {
  196. dArr[d++] = '\r';
  197. dArr[d++] = '\n';
  198. cc = 0;
  199. }
  200. }
  201. // Pad and encode last bits if source isn't an even 24 bits.
  202. int left = sLen - eLen; // 0 - 2.
  203. if (left > 0) {
  204. // Prepare the int
  205. int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
  206. // Set last four chars
  207. dArr[dLen - 4] = (byte) CA[i >> 12];
  208. dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f];
  209. dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '=';
  210. dArr[dLen - 1] = '=';
  211. }
  212. return dArr;
  213. }
  214. /**
  215. * Decodes a BASE64 encoded byte array. All illegal characters will be ignored and can handle both arrays with
  216. * and without line separators.
  217. *
  218. * @param sArr The source array. Length 0 will return an empty array. <code>null</code> will throw an exception.
  219. * @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters
  220. * (including '=') isn't divideable by 4. (I.e. definitely corrupted).
  221. */
  222. public final static byte[] decode(byte[] sArr) {
  223. // Check special case
  224. int sLen = sArr.length;
  225. // Count illegal characters (including '\r', '\n') to know what size the returned array will be,
  226. // so we don't have to reallocate & copy it later.
  227. int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...)
  228. for (int i = 0; i < sLen; i++) // If input is "pure" (I.e. no line separators or illegal chars) base64 this loop can be commented out.
  229. if (IA[sArr[i] & 0xff] < 0)
  230. sepCnt++;
  231. // Check so that legal chars (including '=') are evenly divideable by 4 as specified in RFC 2045.
  232. if ((sLen - sepCnt) % 4 != 0)
  233. return null;
  234. int pad = 0;
  235. for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0; )
  236. if (sArr[i] == '=')
  237. pad++;
  238. int len = ((sLen - sepCnt) * 6 >> 3) - pad;
  239. byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
  240. for (int s = 0, d = 0; d < len; ) {
  241. // Assemble three bytes into an int from four "valid" characters.
  242. int i = 0;
  243. for (int j = 0; j < 4; j++) { // j only increased if a valid char was found.
  244. int c = IA[sArr[s++] & 0xff];
  245. if (c >= 0)
  246. i |= c << (18 - j * 6);
  247. else
  248. j--;
  249. }
  250. // Add the bytes
  251. dArr[d++] = (byte) (i >> 16);
  252. if (d < len) {
  253. dArr[d++] = (byte) (i >> 8);
  254. if (d < len)
  255. dArr[d++] = (byte) i;
  256. }
  257. }
  258. return dArr;
  259. }
  260. /**
  261. * Decodes a BASE64 encoded byte array that is known to be resonably well formatted. The method is about twice as
  262. * fast as {@link #decode(byte[])}. The preconditions are:<br>
  263. * + The array must have a line length of 76 chars OR no line separators at all (one line).<br>
  264. * + Line separator must be "\r\n", as specified in RFC 2045
  265. * + The array must not contain illegal characters within the encoded string<br>
  266. * + The array CAN have illegal characters at the beginning and end, those will be dealt with appropriately.<br>
  267. *
  268. * @param sArr The source array. Length 0 will return an empty array. <code>null</code> will throw an exception.
  269. * @return The decoded array of bytes. May be of length 0.
  270. */
  271. public final static byte[] decodeFast(byte[] sArr) {
  272. // Check special case
  273. int sLen = sArr.length;
  274. if (sLen == 0)
  275. return new byte[0];
  276. int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
  277. // Trim illegal chars from start
  278. while (sIx < eIx && IA[sArr[sIx] & 0xff] < 0)
  279. sIx++;
  280. // Trim illegal chars from end
  281. while (eIx > 0 && IA[sArr[eIx] & 0xff] < 0)
  282. eIx--;
  283. // get the padding count (=) (0, 1 or 2)
  284. int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count '=' at end.
  285. int cCnt = eIx - sIx + 1; // Content count including possible separators
  286. int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
  287. int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded bytes
  288. byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
  289. // Decode all but the last 0 - 2 bytes.
  290. int d = 0;
  291. for (int cc = 0, eLen = (len / 3) * 3; d < eLen; ) {
  292. // Assemble three bytes into an int from four "valid" characters.
  293. int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12 | IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
  294. // Add the bytes
  295. dArr[d++] = (byte) (i >> 16);
  296. dArr[d++] = (byte) (i >> 8);
  297. dArr[d++] = (byte) i;
  298. // If line separator, jump over it.
  299. if (sepCnt > 0 && ++cc == 19) {
  300. sIx += 2;
  301. cc = 0;
  302. }
  303. }
  304. if (d < len) {
  305. // Decode last 1-3 bytes (incl '=') into 1-3 bytes
  306. int i = 0;
  307. for (int j = 0; sIx <= eIx - pad; j++)
  308. i |= IA[sArr[sIx++]] << (18 - j * 6);
  309. for (int r = 16; d < len; r -= 8)
  310. dArr[d++] = (byte) (i >> r);
  311. }
  312. return dArr;
  313. }
  314. // ****************************************************************************************
  315. // * String version
  316. // ****************************************************************************************
  317. /**
  318. * Encodes a raw byte array into a BASE64 <code>String</code> representation i accordance with RFC 2045.
  319. *
  320. * @param sArr The bytes to convert. If <code>null</code> or length 0 an empty array will be returned.
  321. * @param lineSep Optional "\r\n" after 76 characters, unless end of findFile.<br>
  322. * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a
  323. * little faster.
  324. * @return A BASE64 encoded array. Never <code>null</code>.
  325. */
  326. public final static String encodeToString(byte[] sArr, boolean lineSep) {
  327. // Reuse char[] since we can't create a String incrementally anyway and StringBuffer/Builder would be slower.
  328. return new String(encodeToChar(sArr, lineSep));
  329. }
  330. /**
  331. * Decodes a BASE64 encoded <code>String</code>. All illegal characters will be ignored and can handle both strings with
  332. * and without line separators.<br>
  333. * <b>Note!</b> It can be up to about 2x the speed to call <code>decode(str.toCharArray())</code> instead. That
  334. * will create a temporary array though. This version will use <code>str.charAt(i)</code> to iterate the string.
  335. *
  336. * @param str The source string. <code>null</code> or length 0 will return an empty array.
  337. * @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters
  338. * (including '=') isn't divideable by 4. (I.e. definitely corrupted).
  339. */
  340. public final static byte[] decode(String str) {
  341. // Check special case
  342. int sLen = str != null ? str.length() : 0;
  343. if (sLen == 0)
  344. return new byte[0];
  345. // Count illegal characters (including '\r', '\n') to know what size the returned array will be,
  346. // so we don't have to reallocate & copy it later.
  347. int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...)
  348. for (int i = 0; i < sLen; i++) // If input is "pure" (I.e. no line separators or illegal chars) base64 this loop can be commented out.
  349. if (IA[str.charAt(i)] < 0)
  350. sepCnt++;
  351. // Check so that legal chars (including '=') are evenly divideable by 4 as specified in RFC 2045.
  352. if ((sLen - sepCnt) % 4 != 0)
  353. return null;
  354. // Count '=' at end
  355. int pad = 0;
  356. for (int i = sLen; i > 1 && IA[str.charAt(--i)] <= 0; )
  357. if (str.charAt(i) == '=')
  358. pad++;
  359. int len = ((sLen - sepCnt) * 6 >> 3) - pad;
  360. byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
  361. for (int s = 0, d = 0; d < len; ) {
  362. // Assemble three bytes into an int from four "valid" characters.
  363. int i = 0;
  364. for (int j = 0; j < 4; j++) { // j only increased if a valid char was found.
  365. int c = IA[str.charAt(s++)];
  366. if (c >= 0)
  367. i |= c << (18 - j * 6);
  368. else
  369. j--;
  370. }
  371. // Add the bytes
  372. dArr[d++] = (byte) (i >> 16);
  373. if (d < len) {
  374. dArr[d++] = (byte) (i >> 8);
  375. if (d < len)
  376. dArr[d++] = (byte) i;
  377. }
  378. }
  379. return dArr;
  380. }
  381. /**
  382. * Decodes a BASE64 encoded string that is known to be resonably well formatted. The method is about twice as
  383. * fast as {@link #decode(String)}. The preconditions are:<br>
  384. * + The array must have a line length of 76 chars OR no line separators at all (one line).<br>
  385. * + Line separator must be "\r\n", as specified in RFC 2045
  386. * + The array must not contain illegal characters within the encoded string<br>
  387. * + The array CAN have illegal characters at the beginning and end, those will be dealt with appropriately.<br>
  388. *
  389. * @param s The source string. Length 0 will return an empty array. <code>null</code> will throw an exception.
  390. * @return The decoded array of bytes. May be of length 0.
  391. */
  392. public final static byte[] decodeFast(String s) {
  393. // Check special case
  394. int sLen = s.length();
  395. if (sLen == 0)
  396. return new byte[0];
  397. int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
  398. // Trim illegal chars from start
  399. while (sIx < eIx && IA[s.charAt(sIx) & 0xff] < 0)
  400. sIx++;
  401. // Trim illegal chars from end
  402. while (eIx > 0 && IA[s.charAt(eIx) & 0xff] < 0)
  403. eIx--;
  404. // get the padding count (=) (0, 1 or 2)
  405. int pad = s.charAt(eIx) == '=' ? (s.charAt(eIx - 1) == '=' ? 2 : 1) : 0; // Count '=' at end.
  406. int cCnt = eIx - sIx + 1; // Content count including possible separators
  407. int sepCnt = sLen > 76 ? (s.charAt(76) == '\r' ? cCnt / 78 : 0) << 1 : 0;
  408. int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded bytes
  409. byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
  410. // Decode all but the last 0 - 2 bytes.
  411. int d = 0;
  412. for (int cc = 0, eLen = (len / 3) * 3; d < eLen; ) {
  413. // Assemble three bytes into an int from four "valid" characters.
  414. int i = IA[s.charAt(sIx++)] << 18 | IA[s.charAt(sIx++)] << 12 | IA[s.charAt(sIx++)] << 6 | IA[s.charAt(sIx++)];
  415. // Add the bytes
  416. dArr[d++] = (byte) (i >> 16);
  417. dArr[d++] = (byte) (i >> 8);
  418. dArr[d++] = (byte) i;
  419. // If line separator, jump over it.
  420. if (sepCnt > 0 && ++cc == 19) {
  421. sIx += 2;
  422. cc = 0;
  423. }
  424. }
  425. if (d < len) {
  426. // Decode last 1-3 bytes (incl '=') into 1-3 bytes
  427. int i = 0;
  428. for (int j = 0; sIx <= eIx - pad; j++)
  429. i |= IA[s.charAt(sIx++)] << (18 - j * 6);
  430. for (int r = 16; d < len; r -= 8)
  431. dArr[d++] = (byte) (i >> r);
  432. }
  433. return dArr;
  434. }
  435. }