/Utilities/Compression/Deflaters/DeflaterEngine.cs
C# | 937 lines | 606 code | 97 blank | 234 comment | 98 complexity | 2d1e4feae1e961ca51f7cc678255273e MD5 | raw file
Possible License(s): Apache-2.0
- // Based on Mike Krueger's SharpZipLib, Copyright (C) 2001 (GNU license).
- // Authors of the original java version: Jochen Hoenicke, John Leuner
- // See http://www.ISeeSharpCode.com for more information.
-
- using System;
- using Delta.Utilities.Compression.Checksums;
-
- namespace Delta.Utilities.Compression.Deflaters
- {
- /// <summary>
- /// Low level compression engine for deflate algorithm which uses a
- /// 32K sliding window with secondary compression from
- /// Huffman/Shannon-Fano codes.
- /// </summary>
- /// <remarks>
- /// DEFLATE ALGORITHM:
- ///
- /// The uncompressed stream is inserted into the window array. When
- /// the window array is full the first half is thrown away and the
- /// second half is copied to the beginning.
- ///
- /// The head array is a hash table. Three characters build a hash value
- /// and they the value points to the corresponding index in window of
- /// the last string with this hash. The prev array implements a
- /// linked list of matches with the same hash: prev[index & WMask] points
- /// to the previous index with the same hash.
- /// </remarks>
- public class DeflaterEngine : DeflaterConstants
- {
- #region Constants
- /// <summary>
- /// Length too far, reached limit at 4096 bytes.
- /// </summary>
- private const int LengthTooFar = 4096;
- #endregion
-
- #region Adler (Public)
- /// <summary>
- /// Get current value of Adler checksum
- /// </summary>
- public int Adler
- {
- get
- {
- return (int)adler.Value;
- }
- }
- #endregion
-
- #region TotalIn (Public)
- /// <summary>
- /// Total data processed
- /// </summary>
- public int TotalIn
- {
- get
- {
- return totalIn;
- }
- }
- #endregion
-
- #region Strategy (Public)
- /// <summary>
- /// Get/set the <see cref="DeflateStrategy">deflate strategy</see>
- /// </summary>
- public DeflateStrategy Strategy
- {
- get
- {
- return strategy;
- }
- set
- {
- strategy = value;
- }
- }
- #endregion
-
- #region Private
-
- #region ins_h (Private)
- /// <summary>
- /// Ins _h
- /// </summary>
- private int ins_h;
- #endregion
-
- #region head (Private)
- /// <summary>
- /// Hashtable, hashing three characters to an index for window, so
- /// that window[index]..window[index+2] have this hash code.
- /// Note that the array should really be unsigned short, so you need
- /// to and the values with 0xffff.
- /// </summary>
- private readonly short[] head;
- #endregion
-
- #region prev (Private)
- /// <summary>
- /// <code>prev[index & WMask]</code> points to the previous index that
- /// has the same hash code as the string starting at index.
- /// This way entries with the same hash code are in a linked list.
- /// Note that the array should really be unsigned short, so you need
- /// to and the values with 0xffff.
- /// </summary>
- private readonly short[] prev;
- #endregion
-
- #region matchStart (Private)
- /// <summary>
- /// Match Start
- /// </summary>
- private int matchStart;
- #endregion
-
- #region matchLen (Private)
- /// <summary>
- /// Match Length
- /// </summary>
- private int matchLen;
- #endregion
-
- #region prevAvailable (Private)
- /// <summary>
- /// Prev Available
- /// </summary>
- private bool prevAvailable;
- #endregion
-
- #region blockStart (Private)
- /// <summary>
- /// Block Start
- /// </summary>
- private int blockStart;
- #endregion
-
- #region strstart (Private)
- /// <summary>
- /// String Start
- /// </summary>
- private int strstart;
- #endregion
-
- #region lookahead (Private)
- /// <summary>
- /// lookahead is the number of characters starting at strstart in
- /// window that are valid.
- /// So window[strstart] until window[strstart+lookahead-1] are valid
- /// characters.
- /// </summary>
- private int lookahead;
- #endregion
-
- #region window (Private)
- /// <summary>
- /// This array contains the part of the uncompressed stream that
- /// is of relevance. The current character is indexed by strstart.
- /// </summary>
- private readonly byte[] window;
- #endregion
-
- #region strategy (Private)
- /// <summary>
- /// Deflate Strategy
- /// </summary>
- private DeflateStrategy strategy;
- #endregion
-
- #region max_chain (Private)
- /// <summary>
- /// Points to the current character in the window.
- /// </summary>
- private int max_chain;
- #endregion
-
- #region max_lazy (Private)
- /// <summary>
- /// Points to the current character in the window.
- /// </summary>
- private int max_lazy;
- #endregion
-
- #region niceLength (Private)
- /// <summary>
- /// Points to the current character in the window.
- /// </summary>
- private int niceLength;
- #endregion
-
- #region goodLength (Private)
- /// <summary>
- /// Points to the current character in the window.
- /// </summary>
- private int goodLength;
- #endregion
-
- #region comprFunc (Private)
- /// <summary>
- /// The current compression function.
- /// </summary>
- private int comprFunc;
- #endregion
-
- #region inputBuf (Private)
- /// <summary>
- /// The input data for compression.
- /// </summary>
- private byte[] inputBuf;
- #endregion
-
- #region totalIn (Private)
- /// <summary>
- /// The total bytes of input read.
- /// </summary>
- private int totalIn;
- #endregion
-
- #region inputOff (Private)
- /// <summary>
- /// The offset into inputBuf, where input data starts.
- /// </summary>
- private int inputOff;
- #endregion
-
- #region inputEnd (Private)
- /// <summary>
- /// The end offset of the input data.
- /// </summary>
- private int inputEnd;
- #endregion
-
- #region pending (Private)
- /// <summary>
- /// Pending
- /// </summary>
- private readonly DeflaterPending pending;
- #endregion
-
- #region huffman (Private)
- /// <summary>
- /// Huffman
- /// </summary>
- private readonly DeflaterHuffman huffman;
- #endregion
-
- #region adler (Private)
- /// <summary>
- /// The adler checksum
- /// </summary>
- private readonly Adler32 adler;
- #endregion
-
- #endregion
-
- #region Constructors
- /// <summary>
- /// Construct instance with pending buffer
- /// </summary>
- /// <param name="setPending">
- /// Pending buffer to use
- /// </param>>
- public DeflaterEngine(DeflaterPending setPending)
- {
- pending = setPending;
- huffman = new DeflaterHuffman(setPending);
- adler = new Adler32();
-
- window = new byte[2 * WSize];
- head = new short[HashSize];
- prev = new short[WSize];
-
- // We start at index 1, to avoid an implementation deficiency, that
- // we cannot build a repeat pattern at index 0.
- blockStart = strstart = 1;
- }
- #endregion
-
- #region Reset (Public)
- /// <summary>
- /// Reset internal state
- /// </summary>
- public void Reset()
- {
- huffman.Reset();
- adler.Reset();
- blockStart = strstart = 1;
- lookahead = 0;
- totalIn = 0;
- prevAvailable = false;
- matchLen = MinMatch - 1;
-
- for (int i = 0; i < HashSize; i++)
- {
- head[i] = 0;
- }
-
- for (int i = 0; i < WSize; i++)
- {
- prev[i] = 0;
- }
- }
- #endregion
-
- #region ResetAdler (Public)
- /// <summary>
- /// Reset Adler checksum
- /// </summary>
- public void ResetAdler()
- {
- adler.Reset();
- }
- #endregion
-
- #region SetLevel (Public)
- /// <summary>
- /// Set the deflate level (0-9)
- /// </summary>
- public void SetLevel(int lvl)
- {
- // Good length
- goodLength = GoodLength[lvl];
- max_lazy = MaxLazy[lvl];
- // Nice length
- niceLength = NiceLength[lvl];
- max_chain = MaxChain[lvl];
-
- if (CompressionFunction[lvl] != comprFunc)
- {
- switch (comprFunc)
- {
- case CompressionDeflateStored:
- if (strstart > blockStart)
- {
- huffman.FlushStoredBlock(window, blockStart,
- strstart - blockStart, false);
- blockStart = strstart;
- }
- UpdateHash();
- break;
- case CompressionDeflateFast:
- if (strstart > blockStart)
- {
- huffman.FlushBlock(window, blockStart, strstart - blockStart,
- false);
- blockStart = strstart;
- }
- break;
- case CompressionDeflateSlow:
- if (prevAvailable)
- {
- huffman.TallyLit(window[strstart - 1] & 0xff);
- }
- if (strstart > blockStart)
- {
- huffman.FlushBlock(window, blockStart, strstart - blockStart, false);
- blockStart = strstart;
- }
- prevAvailable = false;
- matchLen = MinMatch - 1;
- break;
- }
- comprFunc = CompressionFunction[lvl];
- }
- }
- #endregion
-
- #region FillWindow (Public)
- /// <summary>
- /// Fill the window
- /// </summary>
- public void FillWindow()
- {
- // If the window is almost full and there is insufficient lookahead,
- // move the upper half to the lower one to make room in the upper half.
- if (strstart >= WSize + MaxDistance)
- {
- SlideWindow();
- }
-
- // If there is not enough lookahead, but still some input left,
- // read in the input.
- while (lookahead < MinLookAhead &&
- inputOff < inputEnd)
- {
- int more = 2 * WSize - lookahead - strstart;
-
- if (more > inputEnd - inputOff)
- {
- more = inputEnd - inputOff;
- }
-
- Array.Copy(inputBuf, inputOff, window, strstart + lookahead, more);
- adler.Update(inputBuf, inputOff, more);
-
- inputOff += more;
- totalIn += more;
- lookahead += more;
- }
-
- if (lookahead >= MinMatch)
- {
- UpdateHash();
- }
- }
- #endregion
-
- #region SetDictionary (Public)
- /// <summary>
- /// Set compression dictionary
- /// </summary>
- public void SetDictionary(byte[] buffer, int offset, int length)
- {
- adler.Update(buffer, offset, length);
- if (length < MinMatch)
- {
- return;
- }
- if (length > MaxDistance)
- {
- offset += length - MaxDistance;
- length = MaxDistance;
- }
-
- Array.Copy(buffer, offset, window, strstart, length);
-
- UpdateHash();
- --length;
- while (--length > 0)
- {
- InsertString();
- strstart++;
- }
- strstart += 2;
- blockStart = strstart;
- }
- #endregion
-
- #region Deflate (Public)
- /// <summary>
- /// Deflate drives actual compression of data
- /// </summary>
- public bool Deflate(bool flush, bool finish)
- {
- bool progress;
- do
- {
- FillWindow();
- bool canFlush = flush && inputOff == inputEnd;
- switch (comprFunc)
- {
- case CompressionDeflateStored:
- progress = DeflateStored(canFlush, finish);
- break;
- case CompressionDeflateFast:
- progress = DeflateFast(canFlush, finish);
- break;
- case CompressionDeflateSlow:
- progress = DeflateSlow(canFlush, finish);
- break;
- default:
- throw new InvalidOperationException("unknown comprFunc");
- }
- // Repeat while we have no pending output and progress was made
- } while (pending.IsFlushed && progress);
- return progress;
- }
- #endregion
-
- #region SetInput (Public)
- /// <summary>
- /// Sets input data to be deflated.
- /// Should only be called when <code>NeedsInput()</code> returns true.
- /// </summary>
- /// <param name="buf">The buffer containing input data.</param>
- /// <param name="off">The index of the first byte of data.</param>
- /// <param name="len">The number of bytes of data to use as input.</param>
- public void SetInput(byte[] buf, int off, int len)
- {
- if (inputOff < inputEnd)
- {
- throw new InvalidOperationException(
- "Old input was not completely processed");
- }
-
- int end = off + len;
-
- // We want to throw an ArrayIndexOutOfBoundsException early.
- // The check is very tricky: it also handles integer wrap around.
- if (0 > off ||
- off > end ||
- end > buf.Length)
- {
- throw new ArgumentOutOfRangeException();
- }
-
- inputBuf = buf;
- inputOff = off;
- inputEnd = end;
- }
- #endregion
-
- #region NeedsInput (Public)
- /// <summary>
- /// Return true if input is needed via <see cref="SetInput">SetInput</see>
- /// </summary>
- public bool NeedsInput()
- {
- return inputEnd == inputOff;
- }
- #endregion
-
- #region Methods (Private)
-
- #region UpdateHash
- /// <summary>
- /// Update hash
- /// </summary>
- private void UpdateHash()
- {
- ins_h = (window[strstart] << HashShift) ^ window[strstart + 1];
- }
- #endregion
-
- #region InsertString
- /// <summary>
- /// Inserts the current string in the head hash and returns the previous
- /// value for this hash.
- /// </summary>
- /// <returns>The previous hash value</returns>
- private int InsertString()
- {
- short match;
- int hash =
- ((ins_h << HashShift) ^ window[strstart + (MinMatch - 1)]) &
- HashMask;
-
- prev[strstart & WMask] = match = head[hash];
- head[hash] = (short)strstart;
- ins_h = hash;
- return match & 0xffff;
- }
- #endregion
-
- #region SlideWindow
- /// <summary>
- /// Slide window
- /// </summary>
- private void SlideWindow()
- {
- Array.Copy(window, WSize, window, 0, WSize);
- matchStart -= WSize;
- strstart -= WSize;
- blockStart -= WSize;
-
- // Slide the hash table (could be avoided with 32 bit values
- // at the expense of memory usage).
- for (int i = 0; i < HashSize; ++i)
- {
- int m = head[i] & 0xffff;
- head[i] = (short)(m >= WSize
- ? (m - WSize)
- : 0);
- }
-
- // Slide the prev table.
- for (int i = 0; i < WSize; i++)
- {
- int m = prev[i] & 0xffff;
- prev[i] = (short)(m >= WSize
- ? (m - WSize)
- : 0);
- }
- }
- #endregion
-
- #region FindLongestMatch
- /// <summary>
- /// Find the best (longest) string in the window matching the
- /// string starting at strstart.
- ///
- /// Preconditions:
- /// <code>
- /// strstart + MaxMatch <= window.length.</code>
- /// </summary>
- /// <param name="curMatch"></param>
- /// <returns>True if a match greater than the minimum length is found</returns>
- private bool FindLongestMatch(int curMatch)
- {
- int chainLength = max_chain;
- int tempNiceLength = niceLength;
- short[] tempPrev = prev;
- int scan = strstart;
- int match;
- int best_end = strstart + matchLen;
- int best_len = Math.Max(matchLen, MinMatch - 1);
-
- int limit = Math.Max(strstart - MaxDistance, 0);
-
- int strend = strstart + MaxMatch - 1;
- byte scan_end1 = window[best_end - 1];
- byte scan_end = window[best_end];
-
- // Do not waste too much time if we already have a good match:
- if (best_len >= goodLength)
- {
- chainLength >>= 2;
- }
-
- // Do not look for matches beyond the end of the input.
- // This is necessary to make deflate deterministic.
- if (tempNiceLength > lookahead)
- {
- tempNiceLength = lookahead;
- }
-
- do
- {
- if (window[curMatch + best_len] != scan_end ||
- window[curMatch + best_len - 1] != scan_end1 ||
- window[curMatch] != window[scan] ||
- window[curMatch + 1] != window[scan + 1])
- {
- continue;
- }
-
- match = curMatch + 2;
- scan += 2;
-
- // We check for insufficient lookahead only every 8th comparison;
- // the 256th check will be made at strstart + 258.
- while (window[++scan] == window[++match] &&
- window[++scan] == window[++match] &&
- window[++scan] == window[++match] &&
- window[++scan] == window[++match] &&
- window[++scan] == window[++match] &&
- window[++scan] == window[++match] &&
- window[++scan] == window[++match] &&
- window[++scan] == window[++match] &&
- scan < strend)
- {
- ;
- }
-
- if (scan > best_end)
- {
- matchStart = curMatch;
- best_end = scan;
- best_len = scan - strstart;
-
- if (best_len >= tempNiceLength)
- {
- break;
- }
-
- scan_end1 = window[best_end - 1];
- scan_end = window[best_end];
- }
- scan = strstart;
- } while ((curMatch = (tempPrev[curMatch & WMask] & 0xffff)) > limit &&
- --chainLength != 0);
-
- matchLen = Math.Min(best_len, lookahead);
- return matchLen >= MinMatch;
- }
- #endregion
-
- #region DeflateStored
- /// <summary>
- /// Deflate stored
- /// </summary>
- /// <param name="flush">Flush</param>
- /// <param name="finish">Finish</param>
- /// <returns>
- /// True if storing succeeded, false if nothing was stored.
- /// </returns>
- private bool DeflateStored(bool flush, bool finish)
- {
- if (!flush && lookahead == 0)
- {
- return false;
- }
-
- strstart += lookahead;
- lookahead = 0;
-
- int storedLen = strstart - blockStart;
-
- // Block is full or Block may move out of window
- if ((storedLen >= MaxBlockSize) ||
- (blockStart < WSize && storedLen >= MaxDistance) ||
- flush)
- {
- bool lastBlock = finish;
- if (storedLen > MaxBlockSize)
- {
- storedLen = MaxBlockSize;
- lastBlock = false;
- }
-
- huffman.FlushStoredBlock(window, blockStart, storedLen, lastBlock);
- blockStart += storedLen;
- return !lastBlock;
- }
- return true;
- }
- #endregion
-
- #region DeflateFast
- /// <summary>
- /// Deflate fast
- /// </summary>
- /// <param name="flush">Flush</param>
- /// <param name="finish">Finish</param>
- /// <returns>
- /// True if storing succeeded, false if nothing was stored.
- /// </returns>
- private bool DeflateFast(bool flush, bool finish)
- {
- if (lookahead < MinLookAhead && !flush)
- {
- return false;
- }
-
- while (lookahead >= MinLookAhead || flush)
- {
- if (lookahead == 0)
- {
- // We are flushing everything
- huffman.FlushBlock(window, blockStart, strstart - blockStart, finish);
- blockStart = strstart;
- return false;
- }
-
- if (strstart > 2 * WSize - MinLookAhead)
- {
- // slide window, as findLongestMatch needs this. This should only
- // happen when flushing and the window is almost full.
- SlideWindow();
- }
-
- int hashHead;
- if (lookahead >= MinMatch &&
- (hashHead = InsertString()) != 0 &&
- strategy != DeflateStrategy.HuffmanOnly &&
- strstart - hashHead <= MaxDistance &&
- FindLongestMatch(hashHead))
- {
- // longestMatch sets matchStart and matchLen
-
- // This stops problems with fast/low compression and index out of
- // range
- if (huffman.TallyDist(strstart - matchStart, matchLen))
- {
- bool lastBlock = finish && lookahead == 0;
- huffman.FlushBlock(window, blockStart, strstart - blockStart, lastBlock);
- blockStart = strstart;
- }
-
- lookahead -= matchLen;
- if (matchLen <= max_lazy && lookahead >= MinMatch)
- {
- while (--matchLen > 0)
- {
- ++strstart;
- InsertString();
- }
- ++strstart;
- }
- else
- {
- strstart += matchLen;
- if (lookahead >= MinMatch - 1)
- {
- UpdateHash();
- }
- }
- matchLen = MinMatch - 1;
- continue;
- }
- else
- {
- /* No match found */
- huffman.TallyLit(window[strstart] & 0xff);
- ++strstart;
- --lookahead;
- }
-
- if (huffman.IsFull())
- {
- bool lastBlock = finish && lookahead == 0;
- huffman.FlushBlock(window, blockStart, strstart - blockStart, lastBlock);
- blockStart = strstart;
- return !lastBlock;
- }
- }
- return true;
- }
- #endregion
-
- #region DeflateSlow
- /// <summary>
- /// Deflate slow
- /// </summary>
- /// <param name="flush">Flush</param>
- /// <param name="finish">Finish</param>
- /// <returns>
- /// True if storing succeeded, false if nothing was stored.
- /// </returns>
- private bool DeflateSlow(bool flush, bool finish)
- {
- if (lookahead < MinLookAhead && !flush)
- {
- return false;
- }
-
- while (lookahead >= MinLookAhead || flush)
- {
- if (lookahead == 0)
- {
- if (prevAvailable)
- {
- huffman.TallyLit(window[strstart - 1] & 0xff);
- }
- prevAvailable = false;
-
- // We are flushing everything
- huffman.FlushBlock(window, blockStart, strstart - blockStart,
- finish);
- blockStart = strstart;
- return false;
- }
-
- if (strstart >= 2 * WSize - MinLookAhead)
- {
- // slide window, as findLongestMatch need this. This should only
- // happen when flushing and the window is almost full.
- SlideWindow();
- }
-
- int prevMatch = matchStart;
- int prevLen = matchLen;
- DeflateSlowCheckLookAhead();
- prevLen = DeflateSlowCheckPreviousMatch(prevMatch, prevLen);
-
- if (huffman.IsFull())
- {
- int len = strstart - blockStart;
- if (prevAvailable)
- {
- len--;
- }
- bool lastBlock = (finish && lookahead == 0 && !prevAvailable);
- huffman.FlushBlock(window, blockStart, len, lastBlock);
- blockStart += len;
- return !lastBlock;
- }
- }
-
- return true;
- }
- #endregion
-
- #region DeflateSlowCheckLookAhead
- /// <summary>
- /// Deflate slow check look ahead
- /// </summary>
- private void DeflateSlowCheckLookAhead()
- {
- if (lookahead >= MinMatch)
- {
- int hashHead = InsertString();
- if (strategy != DeflateStrategy.HuffmanOnly &&
- hashHead != 0 && strstart - hashHead <= MaxDistance &&
- FindLongestMatch(hashHead))
- {
- // longestMatch sets matchStart and matchLen
-
- // Discard match if too small and too far away
- if (matchLen <= 5 &&
- (strategy == DeflateStrategy.Filtered ||
- (matchLen == MinMatch && strstart - matchStart > LengthTooFar)))
- {
- matchLen = MinMatch - 1;
- }
- }
- }
- }
- #endregion
-
- #region DeflateSlowCheckPreviousMatch
- /// <summary>
- /// Deflate slow check previous match
- /// </summary>
- /// <param name="prevMatch">Prev match</param>
- /// <param name="prevLen">Prev len</param>
- /// <returns>Int</returns>
- private int DeflateSlowCheckPreviousMatch(int prevMatch, int prevLen)
- {
- // previous match was better
- if (prevLen >= MinMatch &&
- matchLen <= prevLen)
- {
- huffman.TallyDist(strstart - 1 - prevMatch, prevLen);
- prevLen -= 2;
- do
- {
- strstart++;
- lookahead--;
- if (lookahead >= MinMatch)
- {
- InsertString();
- }
- } while (--prevLen > 0);
- strstart++;
- lookahead--;
- prevAvailable = false;
- matchLen = MinMatch - 1;
- }
- else
- {
- if (prevAvailable)
- {
- huffman.TallyLit(window[strstart - 1] & 0xff);
- }
- prevAvailable = true;
- strstart++;
- lookahead--;
- }
-
- return prevLen;
- }
- #endregion
-
- #endregion
- }
- }