/plugins/Archive/tags/release-0-4-2/com/aftexsw/util/bzip/CBZip2InputStream.java
# · Java · 808 lines · 626 code · 102 blank · 80 comment · 154 complexity · 01d001a1c16f2b21a27b2a38a6942726 MD5 · raw file
- /*
- * This file is a part of bzip2 and/or libbzip2, a program and
- * library for lossless, block-sorting data compression.
- *
- * Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. The origin of this software must not be misrepresented; you must
- * not claim that you wrote the original software. If you use this
- * software in a product, an acknowledgment in the product
- * documentation would be appreciated but is not required.
- *
- * 3. Altered source versions must be plainly marked as such, and must
- * not be misrepresented as being the original software.
- *
- * 4. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
- * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
- * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Java version ported by Keiron Liddle, Aftex Software <keiron@aftexsw.com> 1999-2001
- *
- */
- /**
- * An input stream that decompresses from the BZip2 format (without the file
- * header chars) to be read as any other stream.
- */
- package com.aftexsw.util.bzip;
- import java.io.*;
- public class CBZip2InputStream extends InputStream implements BZip2Constants {
- private static void cadvise() {
- System.out.println("CRC Error");
- //throw new CCoruptionError();
- }
- private static void badBGLengths() {
- cadvise();
- }
- private static void bitStreamEOF() {
- cadvise();
- }
- private static void compressedStreamEOF() {
- cadvise();
- }
- private void makeMaps() {
- int i;
- nInUse = 0;
- for (i = 0; i < 256; i++)
- if (inUse[i]) {
- seqToUnseq[nInUse] = (char)i;
- unseqToSeq[i] = (char)nInUse;
- nInUse++;
- }
- }
- /*--
- index of the last char in the block, so
- the block size == last + 1.
- --*/
- private int last;
- /*--
- index in zptr[] of original string after sorting.
- --*/
- private int origPtr;
- /*--
- always: in the range 0 .. 9.
- The current block size is 100000 * this number.
- --*/
- private int blockSize100k;
- private boolean blockRandomised;
- private int bytesIn;
- private int bytesOut;
- private int bsBuff;
- private int bsLive;
- private CRC mCrc = new CRC();
- private boolean inUse[] = new boolean[256];
- private int nInUse;
- private char seqToUnseq[] = new char[256];
- private char unseqToSeq[] = new char[256];
- private char selector[] = new char[MAX_SELECTORS];
- private char selectorMtf[] = new char[MAX_SELECTORS];
- private int[] tt;
- private char[] ll8;
- /*--
- freq table collected to save a pass over the data
- during decompression.
- --*/
- private int unzftab[] = new int[256];
- private int limit[][] = new int[N_GROUPS][MAX_ALPHA_SIZE];
- private int base[][] = new int[N_GROUPS][MAX_ALPHA_SIZE];
- private int perm[][] = new int[N_GROUPS][MAX_ALPHA_SIZE];
- private int minLens[] = new int[N_GROUPS];
- private InputStream bsStream;
- private boolean streamEnd = false;
- private int currentChar = -1;
- private static final int START_BLOCK_STATE = 1;
- private static final int RAND_PART_A_STATE = 2;
- private static final int RAND_PART_B_STATE = 3;
- private static final int RAND_PART_C_STATE = 4;
- private static final int NO_RAND_PART_A_STATE = 5;
- private static final int NO_RAND_PART_B_STATE = 6;
- private static final int NO_RAND_PART_C_STATE = 7;
- private int currentState = START_BLOCK_STATE;
- private int storedBlockCRC, storedCombinedCRC;
- private int computedBlockCRC, computedCombinedCRC;
- int i2, count, chPrev, ch2;
- int i, tPos;
- int rNToGo = 0;
- int rTPos = 0;
- int j2;
- char z;
- public CBZip2InputStream(InputStream zStream) {
- ll8 = null;
- tt = null;
- bsSetStream(zStream);
- initialize();
- initBlock();
- setupBlock();
- }
- public int read() {
- if(streamEnd) {
- return -1;
- } else {
- int retChar = currentChar;
- switch(currentState) {
- case START_BLOCK_STATE:
- break;
- case RAND_PART_A_STATE:
- break;
- case RAND_PART_B_STATE:
- setupRandPartB();
- break;
- case RAND_PART_C_STATE:
- setupRandPartC();
- break;
- case NO_RAND_PART_A_STATE:
- break;
- case NO_RAND_PART_B_STATE:
- setupNoRandPartB();
- break;
- case NO_RAND_PART_C_STATE:
- setupNoRandPartC();
- break;
- default:
- break;
- }
- return retChar;
- }
- }
- private void initialize() {
- char magic3, magic4;
- magic3 = bsGetUChar();
- magic4 = bsGetUChar();
- if(magic3 != 'h' || magic4 < '1' || magic4 > '9') {
- bsFinishedWithStream();
- streamEnd = true;
- return;
- }
- setDecompressStructureSizes(magic4 - '0');
- computedCombinedCRC = 0;
- }
- private void initBlock() {
- char magic1, magic2, magic3, magic4;
- char magic5, magic6;
- magic1 = bsGetUChar();
- magic2 = bsGetUChar();
- magic3 = bsGetUChar();
- magic4 = bsGetUChar();
- magic5 = bsGetUChar();
- magic6 = bsGetUChar();
- if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) {
- complete();
- return;
- }
- if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59 || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) {
- badBlockHeader();
- streamEnd = true;
- return;
- }
- storedBlockCRC = bsGetInt32();
- if (bsR(1) == 1)
- blockRandomised = true;
- else
- blockRandomised = false;
- // currBlockNo++;
- getAndMoveToFrontDecode();
- mCrc.initialiseCRC();
- currentState = START_BLOCK_STATE;
- }
- private void endBlock() {
- computedBlockCRC = mCrc.getFinalCRC();
- /*-- A bad CRC is considered a fatal error. --*/
- if (storedBlockCRC != computedBlockCRC)
- crcError();
- computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31);
- computedCombinedCRC ^= computedBlockCRC;
- }
- private void complete() {
- storedCombinedCRC = bsGetInt32();
- if (storedCombinedCRC != computedCombinedCRC)
- crcError();
- bsFinishedWithStream();
- streamEnd = true;
- }
- private static void blockOverrun() {
- cadvise();
- }
- private static void badBlockHeader() {
- cadvise();
- }
- private static void crcError() {
- cadvise();
- }
- private void bsFinishedWithStream() {
- bsStream = null;
- }
- private void bsSetStream(InputStream f) {
- bsStream = f;
- bsLive = 0;
- bsBuff = 0;
- bytesOut = 0;
- bytesIn = 0;
- }
- private int bsR(int n) {
- int v;
- {
- while (bsLive < n) {
- int zzi;
- char thech = 0;
- try {
- thech = (char)bsStream.read();
- } catch(IOException e) {
- compressedStreamEOF();
- }
- if(thech == -1) {
- compressedStreamEOF();
- }
- zzi = thech;
- bsBuff = (bsBuff << 8) | (zzi & 0xff);
- bsLive += 8;
- }
- }
- v = (bsBuff >> (bsLive-n)) & ((1 << n)-1);
- bsLive -= n;
- return v;
- }
- private char bsGetUChar() {
- return (char)bsR(8);
- }
- private int bsGetint() {
- int u = 0;
- u = (u << 8) | bsR(8);
- u = (u << 8) | bsR(8);
- u = (u << 8) | bsR(8);
- u = (u << 8) | bsR(8);
- return u;
- }
- private int bsGetIntVS(int numBits) {
- return (int)bsR(numBits);
- }
- private int bsGetInt32() {
- return (int)bsGetint();
- }
- private void hbCreateDecodeTables(int[] limit, int[] base, int[] perm, char[] length, int minLen, int maxLen, int alphaSize) {
- int pp, i, j, vec;
- pp = 0;
- for(i = minLen; i <= maxLen; i++)
- for(j = 0; j < alphaSize; j++)
- if (length[j] == i) {
- perm[pp] = j;
- pp++;
- };
- for(i = 0; i < MAX_CODE_LEN; i++)
- base[i] = 0;
- for(i = 0; i < alphaSize; i++)
- base[length[i]+1]++;
- for(i = 1; i < MAX_CODE_LEN; i++)
- base[i] += base[i-1];
- for (i = 0; i < MAX_CODE_LEN; i++)
- limit[i] = 0;
- vec = 0;
- for (i = minLen; i <= maxLen; i++) {
- vec += (base[i+1] - base[i]);
- limit[i] = vec-1;
- vec <<= 1;
- }
- for (i = minLen + 1; i <= maxLen; i++)
- base[i] = ((limit[i-1] + 1) << 1) - base[i];
- }
- private void recvDecodingTables() {
- char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE];
- int i, j, t, nGroups, nSelectors, alphaSize;
- int minLen, maxLen;
- boolean inUse16[] = new boolean[16];
- /*--- Receive the mapping table ---*/
- for (i = 0; i < 16; i++)
- if (bsR(1) == 1)
- inUse16[i] = true;
- else
- inUse16[i] = false;
- for (i = 0; i < 256; i++)
- inUse[i] = false;
- for (i = 0; i < 16; i++)
- if (inUse16[i])
- for (j = 0; j < 16; j++)
- if (bsR(1) == 1)
- inUse[i * 16 + j] = true;
- makeMaps();
- alphaSize = nInUse+2;
- /*--- Now the selectors ---*/
- nGroups = bsR(3);
- nSelectors = bsR(15);
- for (i = 0; i < nSelectors; i++) {
- j = 0;
- while (bsR(1) == 1)
- j++;
- selectorMtf[i] = (char)j;
- }
- /*--- Undo the MTF values for the selectors. ---*/
- {
- char pos[] = new char[N_GROUPS];
- char tmp, v;
- for (v = 0; v < nGroups; v++)
- pos[v] = v;
- for (i = 0; i < nSelectors; i++) {
- v = selectorMtf[i];
- tmp = pos[v];
- while (v > 0) {
- pos[v] = pos[v-1];
- v--;
- }
- pos[0] = tmp;
- selector[i] = tmp;
- }
- }
- /*--- Now the coding tables ---*/
- for (t = 0; t < nGroups; t++) {
- int curr = bsR ( 5 );
- for (i = 0; i < alphaSize; i++) {
- while (bsR(1) == 1) {
- if (bsR(1) == 0)
- curr++;
- else
- curr--;
- }
- len[t][i] = (char)curr;
- }
- }
- /*--- Create the Huffman decoding tables ---*/
- for (t = 0; t < nGroups; t++) {
- minLen = 32;
- maxLen = 0;
- for (i = 0; i < alphaSize; i++) {
- if (len[t][i] > maxLen)
- maxLen = len[t][i];
- if (len[t][i] < minLen)
- minLen = len[t][i];
- }
- hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, maxLen, alphaSize);
- minLens[t] = minLen;
- }
- }
- private void getAndMoveToFrontDecode() {
- char yy[] = new char[256];
- int i, j, nextSym, limitLast;
- int EOB, groupNo, groupPos;
- limitLast = baseBlockSize * blockSize100k;
- origPtr = bsGetIntVS(24);
- recvDecodingTables();
- EOB = nInUse+1;
- groupNo = -1;
- groupPos = 0;
- /*--
- Setting up the unzftab entries here is not strictly
- necessary, but it does save having to do it later
- in a separate pass, and so saves a block's worth of
- cache misses.
- --*/
- for (i = 0; i <= 255; i++)
- unzftab[i] = 0;
- for (i = 0; i <= 255; i++)
- yy[i] = (char) i;
- last = -1;
- {
- int zt, zn, zvec, zj;
- if (groupPos == 0) {
- groupNo++;
- groupPos = G_SIZE;
- }
- groupPos--;
- zt = selector[groupNo];
- zn = minLens[zt];
- zvec = bsR(zn);
- while (zvec > limit[zt][zn]) {
- zn++;
- {
- {
- while (bsLive < 1) {
- int zzi;
- char thech = 0;
- try {
- thech = (char)bsStream.read();
- } catch(IOException e) {
- compressedStreamEOF();
- }
- if(thech == -1) {
- compressedStreamEOF();
- }
- zzi = thech;
- bsBuff = (bsBuff << 8) | (zzi & 0xff);
- bsLive += 8;
- }
- }
- zj = (bsBuff >> (bsLive-1)) & 1;
- bsLive--;
- }
- zvec = (zvec << 1) | zj;
- }
- nextSym = perm[zt][zvec - base[zt][zn]];
- }
- while(true) {
- if (nextSym == EOB)
- break;
- if (nextSym == RUNA || nextSym == RUNB) {
- char ch;
- int s = -1;
- int N = 1;
- do {
- if (nextSym == RUNA)
- s = s + (0+1) * N;
- else if (nextSym == RUNB)
- s = s + (1+1) * N;
- N = N * 2;
- {
- int zt, zn, zvec, zj;
- if (groupPos == 0) {
- groupNo++;
- groupPos = G_SIZE;
- }
- groupPos--;
- zt = selector[groupNo];
- zn = minLens[zt];
- zvec = bsR(zn);
- while (zvec > limit[zt][zn]) {
- zn++;
- {
- {
- while (bsLive < 1) {
- int zzi;
- char thech = 0;
- try {
- thech = (char)bsStream.read();
- } catch(IOException e) {
- compressedStreamEOF();
- }
- if(thech == -1) {
- compressedStreamEOF();
- }
- zzi = thech;
- bsBuff = (bsBuff << 8) | (zzi & 0xff);
- bsLive += 8;
- }
- }
- zj = (bsBuff >> (bsLive-1)) & 1;
- bsLive--;
- }
- zvec = (zvec << 1) | zj;
- };
- nextSym = perm[zt][zvec - base[zt][zn]];
- }
- } while (nextSym == RUNA || nextSym == RUNB);
- s++;
- ch = seqToUnseq[yy[0]];
- unzftab[ch] += s;
- while (s > 0) {
- last++;
- ll8[last] = ch;
- s--;
- };
- if (last >= limitLast)
- blockOverrun();
- continue;
- } else {
- char tmp;
- last++;
- if (last >= limitLast)
- blockOverrun();
- tmp = yy[nextSym-1];
- unzftab[seqToUnseq[tmp]]++;
- ll8[last] = seqToUnseq[tmp];
- /*--
- This loop is hammered during decompression,
- hence the unrolling.
- for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
- --*/
- j = nextSym-1;
- for (; j > 3; j -= 4) {
- yy[j] = yy[j-1];
- yy[j-1] = yy[j-2];
- yy[j-2] = yy[j-3];
- yy[j-3] = yy[j-4];
- }
- for (; j > 0; j--)
- yy[j] = yy[j-1];
- yy[0] = tmp;
- {
- int zt, zn, zvec, zj;
- if (groupPos == 0) {
- groupNo++;
- groupPos = G_SIZE;
- }
- groupPos--;
- zt = selector[groupNo];
- zn = minLens[zt];
- zvec = bsR(zn);
- while (zvec > limit[zt][zn]) {
- zn++;
- {
- {
- while (bsLive < 1) {
- int zzi;
- char thech = 0;
- try {
- thech = (char)bsStream.read();
- } catch(IOException e) {
- compressedStreamEOF();
- }
- zzi = thech;
- bsBuff = (bsBuff << 8) | (zzi & 0xff);
- bsLive += 8;
- }
- }
- zj = (bsBuff >> (bsLive-1)) & 1;
- bsLive--;
- }
- zvec = (zvec << 1) | zj;
- };
- nextSym = perm[zt][zvec - base[zt][zn]];
- }
- continue;
- }
- }
- }
- private void setupBlock() {
- int cftab[] = new int[257];
- char ch;
- cftab[0] = 0;
- for (i = 1; i <= 256; i++)
- cftab[i] = unzftab[i-1];
- for (i = 1; i <= 256; i++)
- cftab[i] += cftab[i-1];
- for (i = 0; i <= last; i++) {
- ch = (char)ll8[i];
- tt[cftab[ch]] = i;
- cftab[ch]++;
- }
- cftab = null;
- tPos = tt[origPtr];
- count = 0;
- i2 = 0;
- ch2 = 256; /*-- not a char and not EOF --*/
- if (blockRandomised) {
- rNToGo = 0;
- rTPos = 0;
- setupRandPartA();
- } else {
- setupNoRandPartA();
- }
- }
- private void setupRandPartA() {
- if(i2 <= last) {
- chPrev = ch2;
- ch2 = ll8[tPos];
- tPos = tt[tPos];
- if (rNToGo == 0) {
- rNToGo = rNums[rTPos];
- rTPos++;
- if(rTPos == 512)
- rTPos = 0;
- }
- rNToGo--;
- ch2 ^= (int)((rNToGo == 1) ? 1 : 0);
- i2++;
- currentChar = ch2;
- currentState = RAND_PART_B_STATE;
- mCrc.updateCRC(ch2);
- } else {
- endBlock();
- initBlock();
- setupBlock();
- }
- }
- private void setupNoRandPartA() {
- if(i2 <= last) {
- chPrev = ch2;
- ch2 = ll8[tPos];
- tPos = tt[tPos];
- i2++;
- currentChar = ch2;
- currentState = NO_RAND_PART_B_STATE;
- mCrc.updateCRC(ch2);
- } else {
- endBlock();
- initBlock();
- setupBlock();
- }
- }
- private void setupRandPartB() {
- if (ch2 != chPrev) {
- currentState = RAND_PART_A_STATE;
- count = 1;
- setupRandPartA();
- } else {
- count++;
- if (count >= 4) {
- z = ll8[tPos];
- tPos = tt[tPos];
- if (rNToGo == 0) {
- rNToGo = rNums[rTPos];
- rTPos++;
- if(rTPos == 512)
- rTPos = 0;
- }
- rNToGo--;
- z ^= ((rNToGo == 1) ? 1 : 0);
- j2 = 0;
- currentState = RAND_PART_C_STATE;
- setupRandPartC();
- } else {
- currentState = RAND_PART_A_STATE;
- setupRandPartA();
- }
- }
- }
- private void setupRandPartC() {
- if(j2 < (int)z) {
- currentChar = ch2;
- mCrc.updateCRC(ch2);
- j2++;
- } else {
- currentState = RAND_PART_A_STATE;
- i2++;
- count = 0;
- setupRandPartA();
- }
- }
- private void setupNoRandPartB() {
- if (ch2 != chPrev) {
- currentState = NO_RAND_PART_A_STATE;
- count = 1;
- setupNoRandPartA();
- } else {
- count++;
- if (count >= 4) {
- z = ll8[tPos];
- tPos = tt[tPos];
- currentState = NO_RAND_PART_C_STATE;
- j2 = 0;
- setupNoRandPartC();
- } else {
- currentState = NO_RAND_PART_A_STATE;
- setupNoRandPartA();
- }
- }
- }
- private void setupNoRandPartC() {
- if(j2 < (int)z) {
- currentChar = ch2;
- mCrc.updateCRC(ch2);
- j2++;
- } else {
- currentState = NO_RAND_PART_A_STATE;
- i2++;
- count = 0;
- setupNoRandPartA();
- }
- }
- private void setDecompressStructureSizes(int newSize100k) {
- if (! (0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k
- && blockSize100k <= 9)) {
- // throw new IOException("Invalid block size");
- }
- blockSize100k = newSize100k;
- if(newSize100k == 0)
- return;
- int n = baseBlockSize * newSize100k;
- ll8 = new char[n];
- tt = new int[n];
- }
- }