/Utilities/Compression/Inflaters/Inflater.cs
# · C# · 873 lines · 496 code · 84 blank · 293 comment · 59 complexity · 572f27cac0a398b8f8780cc5d3ce924c MD5 · raw file
- // Based on Mike Krueger's SharpZipLib, Copyright (C) 2001 (GNU license).
- // Authors of the original java version: Jochen Hoenicke, John Leuner
- // See http://www.ISeeSharpCode.com for more information.
-
- using System;
- using Delta.Utilities.Compression.Checksums;
- using Delta.Utilities.Compression.Deflaters;
- using Delta.Utilities.Compression.Streams;
-
- namespace Delta.Utilities.Compression.Inflaters
- {
- /// <summary>
- /// Inflater is used to decompress data that has been compressed according
- /// to the "deflate" standard described in rfc1950.
- /// <para />
- /// The usage is as following. First you have to set some input with
- /// <code>setInput()</code>, then inflate() it. If inflate doesn't
- /// inflate any bytes there may be three reasons:
- /// <ul>
- /// <li>needsInput() returns true because the input buffer is empty.
- /// You have to provide more input with <code>setInput()</code>.
- /// NOTE: needsInput() also returns true when, the stream is finished.
- /// </li>
- /// <li>needsDictionary() returns true, you have to provide a preset
- /// dictionary with <code>setDictionary()</code>.</li>
- /// <li>finished() returns true, the inflater has finished.</li>
- /// </ul>
- /// Once the first output byte is produced, a dictionary will not be
- /// needed at a later stage.
- /// </summary>
- public class Inflater
- {
- #region Constants
- /// <summary>
- /// Copy lengths for literal codes 257..285
- /// </summary>
- private static readonly int[] CopyLengths =
- {
- 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
- 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
- };
-
- /// <summary>
- /// Extra bits for literal codes 257..285
- /// </summary>
- private static readonly int[] CopyLengthExtraBits =
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
- 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
- };
-
- /// <summary>
- /// Copy offsets for distance codes 0..29
- /// </summary>
- private static readonly int[] CopyDistanceOffsets =
- {
- 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
- 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
- 8193, 12289, 16385, 24577
- };
-
- /// <summary>
- /// Extra bits for distance codes
- /// </summary>
- private static readonly int[] CopyDistanceExtraBits =
- {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
- 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
- 12, 12, 13, 13
- };
-
- /// <summary>
- /// These are the possible states for an inflater to perform decoding.
- /// </summary>
- private const int
- DecodeHeaderState = 0,
- DecodeDictionaryState = 1,
- DecodeBlocksState = 2,
- DecodeStoredLength1 = 3,
- DecodeStoredLength2 = 4,
- DecodeStored = 5,
- DecodeDynamicHeader = 6,
- DecodeHuffmanState = 7,
- DecodeHuffmanLengthBits = 8,
- DecodeHuffmanDistance = 9,
- DecodeHuffmanDistanceBits = 10,
- DecodeChecksum = 11,
- DecodeFinished = 12;
- #endregion
-
- #region Public
- /// <summary>
- /// Returns true, if the input buffer is empty.
- /// You should then call setInput().
- /// NOTE: This method also returns true when the stream is finished.
- /// </summary>
- public bool IsNeedingInput
- {
- get
- {
- return input.IsNeedingInput;
- } // get
- }
-
- /// <summary>
- /// Returns true, if a preset dictionary is needed to inflate the input.
- /// </summary>
- public bool IsNeedingDictionary
- {
- get
- {
- return mode == DecodeDictionaryState && neededBits == 0;
- } // get
- }
-
- /// <summary>
- /// Returns true, if the inflater has finished. This means, that no
- /// input is needed and no output can be produced.
- /// </summary>
- public bool IsFinished
- {
- get
- {
- return mode == DecodeFinished && outputWindow.GetAvailable() == 0;
- } // get
- }
-
- /// <summary>
- /// Gets the adler checksum. This is either the checksum of all
- /// uncompressed bytes returned by inflate(), or if needsDictionary()
- /// returns true (and thus no output was yet produced) this is the
- /// adler checksum of the expected dictionary.
- /// </summary>
- public int Adler
- {
- get
- {
- return
- IsNeedingDictionary
- ? readAdler
- : (int)adler.Value;
- } // get
- }
-
- /// <summary>
- /// Gets the total number of output bytes returned by inflate().
- /// </summary>
- /// <returns>
- /// the total number of output bytes.
- /// </returns>
- public int TotalOut
- {
- get
- {
- return totalOut;
- } // get
- }
-
- /// <summary>
- /// Gets the total number of processed compressed input bytes.
- /// </summary>
- /// <returns>
- /// The total number of bytes of processed input bytes.
- /// </returns>
- public int TotalIn
- {
- get
- {
- return totalIn - RemainingInput;
- } // get
- }
-
- /// <summary>
- /// Gets the number of unprocessed input bytes. Useful, if the end of the
- /// stream is reached and you want to further process the bytes after
- /// the deflate stream.
- /// </summary>
- /// <returns>
- /// The number of bytes of the input which have not been processed.
- /// </returns>
- public int RemainingInput
- {
- get
- {
- return input.AvailableBytes;
- } // get
- }
- #endregion
-
- #region Private
- /// <summary>
- /// This variable contains the current state.
- /// </summary>
- private int mode;
-
- /// <summary>
- /// The adler checksum of the dictionary or of the decompressed
- /// stream, as it is written in the header resp. footer of the
- /// compressed stream.
- /// Only valid if mode is DecodeDictionaryState or DecodeChecksum.
- /// </summary>
- private int readAdler;
-
- /// <summary>
- /// The number of bits needed to complete the current state. This
- /// is valid, if mode is DecodeDictionaryState, DecodeChecksum,
- /// DecodeHuffmanLengthBits or DecodeHuffmanDistanceBits.
- /// </summary>
- private int neededBits;
-
- private int repLength;
-
- private int repDist;
-
- private int uncomprLen;
-
- /// <summary>
- /// True, if the last block flag was set in the last block of the
- /// inflated stream. This means that the stream ends after the
- /// current block.
- /// </summary>
- private bool isLastBlock;
-
- /// <summary>
- /// The total number of inflated bytes.
- /// </summary>
- private int totalOut;
-
- /// <summary>
- /// The total number of bytes set with setInput(). This is not the
- /// value returned by the TotalIn property, since this also includes the
- /// unprocessed input.
- /// </summary>
- private int totalIn;
-
- /// <summary>
- /// This variable stores the noHeader flag that was given to the
- /// constructor. True means, that the inflated stream doesn't contain a
- /// Zlib header or footer.
- /// </summary>
- private readonly bool noHeader;
-
- private readonly StreamManipulator input;
-
- private readonly OutputWindow outputWindow;
-
- private InflaterDynHeader dynHeader;
-
- private InflaterHuffmanTree litlenTree, distTree;
-
- private readonly Adler32 adler;
- #endregion
-
- #region Constructors
- /// <summary>
- /// Creates a new inflater or RFC1951 decompressor
- /// RFC1950/Zlib headers and footers will be expected in the input data
- /// </summary>
- public Inflater()
- : this(false)
- {
- }
-
- // Inflater()
-
- /// <summary>
- /// Creates a new inflater.
- /// </summary>
- /// <param name="noHeader">
- /// True if no RFC1950/Zlib header and footer fields are expected in the
- /// input data. This is used for GZIPed/Zipped input. For compatibility
- /// with Sun JDK you should provide one byte of input more than needed in
- /// this case.
- /// </param>
- public Inflater(bool noHeader)
- {
- this.noHeader = noHeader;
- adler = new Adler32();
- input = new StreamManipulator();
- outputWindow = new OutputWindow();
- mode = noHeader
- ? DecodeBlocksState
- : DecodeHeaderState;
- }
-
- // Inflater(noHeader)
- #endregion
-
- #region Methods
- /// <summary>
- /// Resets the inflater so that a new stream can be decompressed.
- /// All pending input and output will be discarded.
- /// </summary>
- public void Reset()
- {
- mode = noHeader
- ? DecodeBlocksState
- : DecodeHeaderState;
- totalIn = totalOut = 0;
- input.Reset();
- outputWindow.Reset();
- dynHeader = null;
- litlenTree = null;
- distTree = null;
- isLastBlock = false;
- adler.Reset();
- }
-
- // Reset()
-
- /// <summary>
- /// Decodes a zlib/RFC1950 header.
- /// </summary>
- /// <returns>False if more input is needed.</returns>
- /// <exception cref="CompressionException">The header is invalid.
- /// </exception>
- private bool DecodeHeader()
- {
- int header = input.PeekBits(16);
- if (header < 0)
- {
- return false;
- } // if (header)
- input.DropBits(16);
-
- // The header is written in "wrong" byte order
- header = ((header << 8) | (header >> 8)) & 0xffff;
- if (header % 31 != 0)
- {
- throw new CompressionException("Header checksum illegal");
- } // if (header)
-
- if ((header & 0x0f00) != (Deflater.Deflated << 8))
- {
- throw new CompressionException("Compression Method unknown");
- } // if (header)
-
- // Maximum size of the backwards window in bits.
- // We currently ignore this, but we could use it to make the
- // inflater window more space efficient. On the other hand the
- // full window (15 bits) is needed most times, anyway.
- //int max_wbits = ((header & 0x7000) >> 12) + 8;
-
- if ((header & 0x0020) == 0)
- {
- // Dictionary flag?
- mode = DecodeBlocksState;
- } // if (header)
- else
- {
- mode = DecodeDictionaryState;
- neededBits = 32;
- } // else
- return true;
- }
-
- // DecodeHeader()
-
- /// <summary>
- /// Decodes the dictionary checksum after the deflate header.
- /// </summary>
- /// <returns>
- /// False if more input is needed.
- /// </returns>
- private bool DecodeDict()
- {
- while (neededBits > 0)
- {
- int dictByte = input.PeekBits(8);
- if (dictByte < 0)
- {
- return false;
- } // if (dictByte)
- input.DropBits(8);
- readAdler = (readAdler << 8) | dictByte;
- neededBits -= 8;
- } // while (neededBits)
- return false;
- }
-
- // DecodeDict()
-
- /// <summary>
- /// Decodes the huffman encoded symbols in the input stream.
- /// </summary>
- /// <returns>
- /// false if more input is needed, true if output window is
- /// full or the current block ends.
- /// </returns>
- /// <exception cref="CompressionException">
- /// if deflated stream is invalid.
- /// </exception>
- private bool DecodeHuffman()
- {
- int free = outputWindow.GetFreeSpace();
- while (free >= 258)
- {
- int symbol;
- switch (mode)
- {
- case DecodeHuffmanState:
- // This is the inner loop so it is optimized a bit
- while (((symbol = litlenTree.GetSymbol(input)) & ~0xff) == 0)
- {
- outputWindow.Write(symbol);
- if (--free < 258)
- {
- return true;
- } // if (--free)
- } // while ()
-
- if (symbol < 257)
- {
- if (symbol < 0)
- {
- return false;
- } // if (symbol)
- else
- {
- // symbol == 256: end of block
- distTree = null;
- litlenTree = null;
- mode = DecodeBlocksState;
- return true;
- } // else
- } // if (symbol)
-
- try
- {
- repLength = CopyLengths[symbol - 257];
- neededBits = CopyLengthExtraBits[symbol - 257];
- } // try
- catch (Exception)
- {
- throw new CompressionException(
- "Illegal rep length code");
- } // catch (Exception)
- // fall through
- goto case DecodeHuffmanLengthBits;
-
- case DecodeHuffmanLengthBits:
- if (neededBits > 0)
- {
- mode = DecodeHuffmanLengthBits;
- int i = input.PeekBits(neededBits);
- if (i < 0)
- {
- return false;
- } // if (i)
- input.DropBits(neededBits);
- repLength += i;
- } // if (neededBits)
- mode = DecodeHuffmanDistance;
- // fall through
- goto case DecodeHuffmanDistance;
-
- case DecodeHuffmanDistance:
- symbol = distTree.GetSymbol(input);
- if (symbol < 0)
- {
- return false;
- } // if (symbol)
-
- try
- {
- repDist = CopyDistanceOffsets[symbol];
- neededBits = CopyDistanceExtraBits[symbol];
- } // try
- catch (Exception)
- {
- throw new CompressionException("Illegal rep dist code");
- } // catch (Exception)
-
- // fall through
- goto case DecodeHuffmanDistanceBits;
-
- case DecodeHuffmanDistanceBits:
- if (neededBits > 0)
- {
- mode = DecodeHuffmanDistanceBits;
- int i = input.PeekBits(neededBits);
- if (i < 0)
- {
- return false;
- } // if (i)
- input.DropBits(neededBits);
- repDist += i;
- } // if (neededBits)
-
- outputWindow.Repeat(repLength, repDist);
- free -= repLength;
- mode = DecodeHuffmanState;
- break;
-
- default:
- throw new CompressionException("Inflater unknown mode");
- } // switch
- } // while (free)
- return true;
- }
-
- // DecodeHuffman()
-
- /// <summary>
- /// Decodes the adler checksum after the deflate stream.
- /// </summary>
- /// <returns>False if more input is needed.</returns>
- /// <exception cref="CompressionException">If checksum doesn't match.
- /// </exception>
- private bool DecodeChksum()
- {
- while (neededBits > 0)
- {
- int chkByte = input.PeekBits(8);
- if (chkByte < 0)
- {
- return false;
- } // if (chkByte)
- input.DropBits(8);
- readAdler = (readAdler << 8) | chkByte;
- neededBits -= 8;
- } // while (neededBits)
- if ((int)adler.Value != readAdler)
- {
- throw new CompressionException(
- "Adler chksum doesn't match: " + (int)adler.Value +
- " vs. " + readAdler);
- } // if (int)
- mode = DecodeFinished;
- return false;
- }
-
- // DecodeChksum()
-
- /// <summary>
- /// Decodes the deflated stream.
- /// </summary>
- /// <returns>False if more input is needed, or if finished.</returns>
- /// <exception cref="CompressionException">If deflated stream is invalid.
- /// </exception>
- private bool Decode()
- {
- switch (mode)
- {
- case DecodeHeaderState:
- return DecodeHeader();
- case DecodeDictionaryState:
- return DecodeDict();
- case DecodeChecksum:
- return DecodeChksum();
-
- case DecodeBlocksState:
- if (isLastBlock)
- {
- if (noHeader)
- {
- mode = DecodeFinished;
- return false;
- } // if (noHeader)
- else
- {
- input.SkipToByteBoundary();
- neededBits = 32;
- mode = DecodeChecksum;
- return true;
- } // else
- } // if (isLastBlock)
-
- int type = input.PeekBits(3);
- if (type < 0)
- {
- return false;
- } // if (type)
- input.DropBits(3);
- DecodeLastBlockType(type);
- return true;
-
- case DecodeStoredLength1:
- {
- if ((uncomprLen = input.PeekBits(16)) < 0)
- {
- return false;
- } // if (uncomprLen)
- input.DropBits(16);
- mode = DecodeStoredLength2;
- } // block
- // fall through
- goto case DecodeStoredLength2;
-
- case DecodeStoredLength2:
- {
- int nlen = input.PeekBits(16);
- if (nlen < 0)
- {
- return false;
- } // if (nlen)
- input.DropBits(16);
- if (nlen != (uncomprLen ^ 0xffff))
- {
- throw new CompressionException(
- "broken uncompressed block");
- } // if (nlen)
- mode = DecodeStored;
- } // block
- // fall through
- goto case DecodeStored;
-
- case DecodeStored:
- {
- int more = outputWindow.CopyStored(input, uncomprLen);
- uncomprLen -= more;
- if (uncomprLen == 0)
- {
- mode = DecodeBlocksState;
- return true;
- } // if (uncomprLen)
- return !input.IsNeedingInput;
- } // block
-
- case DecodeDynamicHeader:
- if (!dynHeader.Decode(input))
- {
- return false;
- } // if ()
-
- litlenTree = dynHeader.BuildLitLenTree();
- distTree = dynHeader.BuildDistTree();
- mode = DecodeHuffmanState;
- // fall through
- goto case DecodeHuffmanState;
-
- case DecodeHuffmanState:
- case DecodeHuffmanLengthBits:
- case DecodeHuffmanDistance:
- case DecodeHuffmanDistanceBits:
- return DecodeHuffman();
-
- case DecodeFinished:
- return false;
-
- default:
- throw new CompressionException(
- "Inflater.Decode unknown mode");
- } // switch
- }
-
- // Decode()
-
- /// <summary>
- /// Decode last block type
- /// </summary>
- /// <param name="type">Type</param>
- private void DecodeLastBlockType(int type)
- {
- if ((type & 1) != 0)
- {
- isLastBlock = true;
- } // if (type)
- switch (type >> 1)
- {
- case DeflaterConstants.StoredBlock:
- input.SkipToByteBoundary();
- mode = DecodeStoredLength1;
- break;
- case DeflaterConstants.StaticTrees:
- litlenTree = InflaterHuffmanTree.defLitLenTree;
- distTree = InflaterHuffmanTree.defDistTree;
- mode = DecodeHuffmanState;
- break;
- case DeflaterConstants.DynamicTrees:
- dynHeader = new InflaterDynHeader();
- mode = DecodeDynamicHeader;
- break;
- default:
- throw new CompressionException(
- "Unknown block type " + type);
- } // switch
- }
-
- // DecodeLastBlockType(type)
-
- /// <summary>
- /// Sets the preset dictionary. This should only be called, if
- /// needsDictionary() returns true and it should set the same
- /// dictionary, that was used for deflating. The getAdler()
- /// function returns the checksum of the dictionary needed.
- /// </summary>
- /// <param name="buffer">
- /// The dictionary.
- /// </param>
- public void SetDictionary(byte[] buffer)
- {
- SetDictionary(buffer, 0, buffer.Length);
- }
-
- // SetDictionary(buffer)
-
- /// <summary>
- /// Sets the preset dictionary. This should only be called, if
- /// needsDictionary() returns true and it should set the same
- /// dictionary, that was used for deflating. The getAdler()
- /// function returns the checksum of the dictionary needed.
- /// </summary>
- /// <param name="buffer">The dictionary.</param>
- /// <param name="offset">The offset into buffer where the dictionary
- /// starts.</param>
- /// <param name="len">The length of the dictionary.</param>
- /// <exception cref="System.InvalidOperationException">
- /// No dictionary is needed.
- /// </exception>
- /// <exception cref="CompressionException">
- /// The adler checksum for the buffer is invalid
- /// </exception>
- public void SetDictionary(byte[] buffer, int offset, int len)
- {
- if (!IsNeedingDictionary)
- {
- throw new InvalidOperationException();
- } // if ()
-
- adler.Update(buffer, offset, len);
- if ((int)adler.Value != readAdler)
- {
- throw new CompressionException("Wrong adler checksum");
- } // if (int)
- adler.Reset();
- outputWindow.CopyDict(buffer, offset, len);
- mode = DecodeBlocksState;
- }
-
- // SetDictionary(buffer, offset, len)
-
- /// <summary>
- /// Sets the input. This should only be called, if needsInput()
- /// returns true.
- /// </summary>
- /// <param name="buffer">The input buffer.</param>
- public void SetInput(byte[] buffer)
- {
- SetInput(buffer, 0, buffer.Length);
- }
-
- // SetInput(buffer)
-
- /// <summary>
- /// Sets the input. This should only be called, if needsInput()
- /// returns true.
- /// </summary>
- /// <param name="buffer">
- /// The source of input data
- /// </param>
- /// <param name="offset">
- /// The offset into buffer where the input starts.
- /// </param>
- /// <param name="length">
- /// The number of bytes of input to use.
- /// </param>
- /// <exception cref="System.InvalidOperationException">
- /// No input is needed.
- /// </exception>
- /// <exception cref="System.ArgumentOutOfRangeException">
- /// The off and/or len are wrong.
- /// </exception>
- public void SetInput(byte[] buffer, int offset, int length)
- {
- input.SetInput(buffer, offset, length);
- totalIn += length;
- }
-
- // SetInput(buffer, offset, length)
-
- /// <summary>
- /// Inflates the compressed stream to the output buffer. If this
- /// returns 0, you should check, whether needsDictionary(),
- /// needsInput() or finished() returns true, to determine why no
- /// further output is produced.
- /// </summary>
- /// <param name="buffer">The output buffer.</param>
- /// <returns>
- /// the number of bytes written to the buffer, 0 if no further
- /// output can be produced.
- /// </returns>
- /// <exception cref="System.ArgumentOutOfRangeException">
- /// if buf has length 0.
- /// </exception>
- /// <exception cref="System.FormatException">
- /// if deflated stream is invalid.
- /// </exception>
- public int Inflate(byte[] buffer)
- {
- return Inflate(buffer, 0, buffer.Length);
- }
-
- // Inflate(buffer)
-
- /// <summary>
- /// Inflates the compressed stream to the output buffer. If this
- /// returns 0, you should check, whether needsDictionary(),
- /// needsInput() or finished() returns true, to determine why no
- /// further output is produced.
- /// </summary>
- /// <param name="buffer">The output buffer.</param>
- /// <param name="offset">The offset into buffer where the output should
- /// start.</param>
- /// <param name="len">The maximum length of the output.</param>
- /// <returns>The number of bytes written to the buffer, 0 if no further
- /// output can be produced.</returns>
- /// <exception cref="System.ArgumentOutOfRangeException">
- /// if len is <= 0.
- /// </exception>
- /// <exception cref="System.ArgumentOutOfRangeException">
- /// if the offset and/or len are wrong.
- /// </exception>
- /// <exception cref="System.FormatException">
- /// if deflated stream is invalid.
- /// </exception>
- public int Inflate(byte[] buffer, int offset, int len)
- {
- if (len < 0)
- {
- throw new ArgumentOutOfRangeException("len < 0");
- } // if (len)
-
- // Special case: len may be zero
- if (len == 0)
- {
- if (IsFinished == false)
- {
- // -jr- 08-Nov-2003 INFLATE_BUG fix..
- Decode();
- } // if (IsFinished)
- return 0;
- } // if (len)
- /*
- // Check for correct buff, off, len triple
- if (off < 0 || off + len >= buf.Length) {
- throw new ArgumentException("off/len outside buf bounds");
- }
- */
- int count = 0;
- int more;
- do
- {
- if (mode != DecodeChecksum)
- {
- // Don't give away any output, if we are waiting for the
- // checksum in the input stream.
- //
- // With this trick we have always:
- // needsInput() and not finished()
- // implies more output can be produced.
- more = outputWindow.CopyOutput(buffer, offset, len);
- adler.Update(buffer, offset, more);
- offset += more;
- count += more;
- totalOut += more;
- len -= more;
- if (len == 0)
- {
- return count;
- } // if (len)
- } // if (mode)
- } while (Decode() ||
- (outputWindow.GetAvailable() > 0 &&
- mode != DecodeChecksum));
-
- return count;
- }
-
- // Inflate(buf, offset, len)
- #endregion
- }
- }