/packages/chm/src/paslzxcomp.pas
https://github.com/slibre/freepascal · Pascal · 1160 lines · 892 code · 119 blank · 149 comment · 132 complexity · cd4e51b609092e1e6f0aec07bb0a0c8f MD5 · raw file
- { Copyright (C) <2005> <Andrew Haines> paslzxcomp.pas
- This library is free software; you can redistribute it and/or modify it
- under the terms of the GNU Library General Public License as published by
- the Free Software Foundation; either version 2 of the License, or (at your
- option) any later version.
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
- for more details.
- You should have received a copy of the GNU Library General Public License
- along with this library; if not, write to the Free Software Foundation,
- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- }
- {
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- }
- unit paslzxcomp;
- {$MODE OBJFPC}
- {$GOTO ON}
- interface
- uses paslznonslide;
- const
- MIN_MATCH = 2;
- MAX_MATCH = 257;
- NUM_CHARS = 256;
- NUM_PRIMARY_LENGTHS = 7;
- NUM_SECONDARY_LENGTHS = 249;
- { the names of these constants are specific to this library }
- LZX_MAX_CODE_LENGTH = 16;
- LZX_FRAME_SIZE = 32768;
- LZX_PRETREE_SIZE = 20;
- LZX_ALIGNED_BITS = 3;
- LZX_ALIGNED_SIZE = 8;
- LZX_VERBATIM_BLOCK = 1;
- LZX_ALIGNED_OFFSET_BLOCK = 2;
- {$IFDEF FPC}
- {$PACKRECORDS C}
- {$ENDIF}
- {
- File lzx_compress.h, part of lzxcomp library
- Copyright (C) 2002 Matthew T. Russotto
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; version 2.1 only
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- }
- type
- PPlzx_data = ^Plzx_data;
- Plzx_data = ^lzx_data;
- TGetBytesFunc = function (arg:pointer; n:longint; buf:pointer):longint; cdecl;
- TWriteBytesFunc = function (arg:pointer; n:longint; buf:pointer):longint; cdecl;
- TMarkFrameFunc = procedure (arg:pointer; uncomp:dword; comp:dword); cdecl;
- TIsEndOfFileFunc = function (arg:pointer): longbool; cdecl;
- { add more here? Error codes, # blocks, # frames, etc? }
- lzx_results = record
- len_compressed_output : longint;
- len_uncompressed_input : longint;
- end;
-
- phuff_entry = ^huff_entry;
- huff_entry = record
- codelength: smallint;
- code: word;
- end;
-
- lzx_data = record
- in_arg : pointer;
- out_arg: pointer;
- mark_frame_arg: pointer;
- get_bytes: TGetBytesFunc;
- at_eof: TIsEndOfFileFunc;
- put_bytes: TWriteBytesFunc;
- mark_frame: TMarkFrameFunc;
- lzi: plz_info;
- {/* a 'frame' is an 0x8000 byte thing. Called that because otherwise
- I'd confuse myself overloading 'block' */}
- left_in_frame: longint;
- left_in_block: longint;
- R0, R1, R2: longint;
- num_position_slots: longint;
- //* this is the LZX block size */
- block_size: longint;
- main_freq_table: plongint;
- length_freq_table: array [0..NUM_SECONDARY_LENGTHS-1] of longint;
- aligned_freq_table: array [0..LZX_ALIGNED_SIZE-1] of longint;
- block_codes: plongword;
- block_codesp: plongword;
- main_tree: phuff_entry;
- length_tree: array[0..NUM_SECONDARY_LENGTHS-1] of huff_entry;
- aligned_tree: array[0..LZX_ALIGNED_SIZE-1] of huff_entry;
- main_tree_size: longint;
- bit_buf: word;
- bits_in_buf: longint;
- main_entropy: double;
- last_ratio: double;
- prev_main_treelengths: pbyte;
- prev_length_treelengths: array [0..NUM_SECONDARY_LENGTHS-1] of byte;
- len_uncompressed_input: longword;
- len_compressed_output: longword;
- need_1bit_header: smallint;
- subdivide: smallint; //* 0 = don't subdivide, 1 = allowed, -1 = requested */
- end;
- Plzx_results = ^lzx_results;
- function lzx_init(lzxdp:Pplzx_data; wsize_code:longint; get_bytes:TGetBytesFunc; get_bytes_arg:pointer; at_eof:TIsEndOfFileFunc;
- put_bytes:TWriteBytesFunc; put_bytes_arg:pointer; mark_frame:TMarkFrameFunc; mark_frame_arg:pointer):longint;
- procedure lzx_reset(lzxd:plzx_data);
- function lzx_compress_block(lzxd:plzx_data; block_size:longint; subdivide: LongBool):longint;
- function lzx_finish(lzxd:plzx_data; lzxr:plzx_results):longint;
- implementation
- uses math, sysutils;
- var
- rloge2: double; // set in initialization section
-
- const
- num_position_slots: array [0..6] of smallint = (30, 32, 34, 36, 38, 42, 50);
-
- extra_bits: array [0..50] of Byte = (
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
- 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
- 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
- 17, 17, 17
- );
- position_base: array [0..50] of dword = (
- 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192,
- 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152,
- 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
- 1835008, 1966080, 2097152
- );
- type
- pih_elem = ^ih_elem;
- ih_elem = record
- freq: longint;
- sym: smallint;
- pathlength: smallint;
- parent: pih_elem;
- left: pih_elem;
- right: pih_elem;
- end;
- ph_elem = ^h_elem;
- h_elem = record
- freq: longint;
- sym: smallint;
- pathlength: smallint;
- parent: pih_elem;
- code: word;
- end;
- function cmp_leaves(const in_a: ph_elem; const in_b: ph_elem): longint;
- begin
- if (in_a^.freq = 0) and (in_b^.freq <> 0) then
- Exit(1);
- if (in_a^.freq <> 0) and (in_b^.freq = 0) then
- Exit(-1);
- if (in_a^.freq = in_b^.freq) then
- Exit(in_a^.sym - in_b^.sym);
- Exit(in_a^.freq - in_b^.freq);
- end;
- function cmp_pathlengths(const in_a: ph_elem; const in_b: ph_elem): longint;
- begin
- if (in_a^.pathlength = in_b^.pathlength) then
- //* see note on canonical pathlengths */
- Exit(in_b^.sym - in_a^.sym);
- Exit(in_b^.pathlength - in_a^.pathlength);
- end;
- type
- TQSortCompFunc = function(const in_a: ph_elem; const in_b: ph_elem): longint;
- procedure qsort(a_array: ph_elem; nelem: integer; cmpfunc: TQSortCompFunc);
- var
- tmp: h_elem;
- procedure QuickSort(L, R: Integer);
- var
- I, J, Pivot: Integer;
- begin
- repeat
- I := L;
- J := R;
- Pivot := (L + R) div 2;
- repeat
- while cmpfunc(@a_array[I], @a_array[Pivot]) < 0 do Inc(I);
- while cmpfunc(@a_array[J], @a_array[Pivot]) > 0 do Dec(J);
- if I <= J then
- begin
- // exchange I and J
- tmp := a_array[I];
- a_array[I] := a_array[J];
- a_array[J] := tmp;
- if Pivot = I then
- Pivot := J
- else if Pivot = J then
- Pivot := I;
- Inc(I);
- Dec(j);
- end;
- until I > J;
- if L < J then
- QuickSort(L,J);
- L := I;
- until I >= R;
- end;
- begin
- QuickSort(0, nelem - 1);
- end;
- procedure build_huffman_tree(nelem: longint; max_code_length: longint; freq: plongint; tree: phuff_entry);
- var
- leaves: ph_elem;
- inodes: pih_elem;
- next_inode: pih_elem;
- cur_inode: pih_elem;
- cur_leaf :ph_elem;
- leaves_left,
- nleaves,
- pathlength: longint;
- cur_code: word;
- codes_too_long: smallint = 0;
- f1, f2: pih_elem;
- i: longint;
- begin
- leaves := GetMem(nelem * sizeof(h_elem));
- for i := 0 to nelem-1 do begin
- leaves[i].freq := freq[i];
- leaves[i].sym := i;
- leaves[i].pathlength := 0;
- end;
- qsort(leaves, nelem, @cmp_leaves);
- leaves_left := 0;
- while leaves_left < nelem do begin
- if (leaves[leaves_left].freq) = 0 then break;
- Inc(leaves_left);
- end;
- nleaves := leaves_left;
- if (nleaves >= 2) then begin
- inodes := AllocMem((nelem-1) * sizeof(ih_elem));
- repeat
- if (codes_too_long <> 0) then begin
- leaves_left := 0;
- while leaves_left < nelem do begin
- if (leaves[leaves_left].freq = 0) then break;
- if (leaves[leaves_left].freq <> 1) then begin
- leaves[leaves_left].freq := leaves[leaves_left].freq shr 1;
- codes_too_long := 0;
- end;
- Inc(leaves_left);
- end;
- if codes_too_long <> 0 then
- raise Exception.Create('!codes_too_long');
- end;
- cur_leaf := leaves;
- cur_inode := inodes;
- next_inode := cur_inode;
- repeat
- f1 := nil;
- f2 := nil;
- if (leaves_left <> 0) and
- ((cur_inode = next_inode) or
- (cur_leaf^.freq <= cur_inode^.freq)) then begin
- f1 := pih_elem(cur_leaf);
- Inc(cur_leaf);
- Dec(leaves_left);
- end
- else if (cur_inode <> next_inode) then begin
- f1 := cur_inode;
- Inc(cur_inode);
- end;
- if ((leaves_left <> 0) and
- ((cur_inode = next_inode) or
- (cur_leaf^.freq <= cur_inode^.freq))) then begin
- f2 := pih_elem(cur_leaf);
- Inc(cur_leaf);
- Dec(leaves_left);
- end
- else if (cur_inode <> next_inode) then begin
- f2 := cur_inode;
- Inc(cur_inode);
- end;
- if (f1 <> nil) and (f2 <> nil) then begin
- next_inode^.freq := f1^.freq + f2^.freq;
- next_inode^.sym := -1;
- next_inode^.left := f1;
- next_inode^.right := f2;
- next_inode^.parent := nil;
- f1^.parent := next_inode;
- f2^.parent := next_inode;
- if (f1^.pathlength > f2^.pathlength) then
- next_inode^.pathlength := f1^.pathlength + 1
- else
- next_inode^.pathlength := f2^.pathlength + 1;
- if (next_inode^.pathlength > max_code_length) then begin
- codes_too_long := 1;
- break;
- end;
- Inc(next_inode);
- end;
- until (f1 = nil) and (f2 = nil);
- until codes_too_long = 0;
- //* now traverse tree depth-first */
- cur_inode := next_inode - 1;
- pathlength := 0;
- cur_inode^.pathlength := -1;
- repeat
- //* precondition: at unmarked node*/
- if (cur_inode^.sym = -1) then begin //*&& (cur_inode^.left)*/
- //* left node of unmarked node is unmarked */
- cur_inode := cur_inode^.left;
- cur_inode^.pathlength := -1;
- Inc(pathlength);
- end
- else begin
- //* mark node */
- cur_inode^.pathlength := pathlength;
- //#if 0
- // if (cur_inode^.right) {
- // /* right node of previously unmarked node is unmarked */
- // cur_inode = cur_inode^.right;
- // cur_inode^.pathlength = -1;
- // pathlength++;
- // }
- // else
- //#endif
- begin
- //* time to come up. Keep coming up until an unmarked node is reached */
- //* or the tree is exhausted */
- repeat
- cur_inode := cur_inode^.parent;
- Dec(pathlength);
- //while (cur_inode && (cur_inode^.pathlength != -1));
- until (cur_inode = nil) or (cur_inode^.pathlength = -1);
- if (cur_inode <> nil) then begin
- //* found unmarked node; mark it and go right */
- cur_inode^.pathlength := pathlength;
- cur_inode := cur_inode^.right;
- cur_inode^.pathlength := -1;
- Inc(pathlength);
- //* would be complex if cur_inode could be null here. It can't */
- end
- end;
- end;
- until cur_inode = nil;
- freemem(inodes);
- ///* the pathlengths are already in order, so this sorts by symbol */
- qsort(leaves, nelem, @cmp_pathlengths);
- //#if 0
- // pathlength = leaves[0].pathlength;
- // cur_code = 0;
- // for (i = 0; i < nleaves; i++) {
- // while (leaves[i].pathlength < pathlength) {
- // (!(cur_code & 1));
- // cur_code >>= 1;
- // pathlength--;
- // }
- // leaves[i].code = cur_code;
- // cur_code++;
- // }
- //#else
- pathlength := leaves[nleaves-1].pathlength;
- if leaves[0].pathlength > 16 then
- raise Exception.Create('leaves[0].pathlength <= 16');
- //* this method cannot deal with bigger codes, though
- // the other canonical method can in some cases
- // (because it starts with zeros ) */
- cur_code := 0;
- for i := nleaves-1 downto 0 do begin
- while (leaves[i].pathlength > pathlength) do begin
- cur_code := cur_code shl 1;
- Inc(pathlength);
- end;
- leaves[i].code := cur_code;
- {$PUSH}
- {$R-}
- Inc(cur_code); // range error but i = 0 so it's harmless
- {$POP}
- end;
- //#endif
- end
- else if (nleaves = 1) then begin
- //* 0 symbols is OK (not according to doc, but according to Caie) */
- //* but if only one symbol is present, two symbols are required */
- nleaves := 2;
- leaves[0].pathlength := 1;
- leaves[1].pathlength := 1;
- if (leaves[1].sym > leaves[0].sym) then begin
- leaves[1].code := 1;
- leaves[0].code := 0;
- end
- else begin
- leaves[0].code := 1;
- leaves[1].code := 0;
- end;
- end;
- Fillchar(tree^, nelem * sizeof(huff_entry), 0);
- for i := 0 to nleaves-1 do begin
- tree[leaves[i].sym].codelength := leaves[i].pathlength;
- tree[leaves[i].sym].code := leaves[i].code;
- end;
- freemem(leaves);
- end;
- function lzx_get_chars(lzi: plz_info; n: longint; buf: pbyte): longint; cdecl;
- var
- //* force lz compression to stop after every block */
- chars_read,
- chars_pad: longint;
- lzud: plzx_data;
- begin
- lzud := plzx_data(lzi^.user_data);
-
- chars_read := lzud^.get_bytes(lzud^.in_arg, n, buf);
- Dec(lzud^.left_in_frame, chars_read mod LZX_FRAME_SIZE);
- if (lzud^.left_in_frame < 0) then
- Inc(lzud^.left_in_frame, LZX_FRAME_SIZE);
- if ((chars_read < n) and (lzud^.left_in_frame <> 0)) then begin
- chars_pad := n - chars_read;
- if (chars_pad > lzud^.left_in_frame) then chars_pad := lzud^.left_in_frame;
- //* never emit a full frame of padding. This prevents silliness when
- // lzx_compress is called when at EOF but EOF not yet detected */
- if (chars_pad = LZX_FRAME_SIZE) then chars_pad := 0;
- FillChar(buf[chars_read], chars_pad, 0);
- Dec(lzud^.left_in_frame, chars_pad);
- Inc(chars_read, chars_pad);
- end;
- lzx_get_chars := chars_read;
- end;
- function find_match_at(lzi: plz_info; loc: longint; match_len: longint; match_locp: plongint): longint;
- var
- matchb,
- nmatchb,
- c1, c2: pbyte;
- j: longint;
- begin
- if -match_locp^ = loc then Exit(-1);
- if loc < match_len then Exit(-1);
- matchb := lzi^.block_buf + lzi^.block_loc + match_locp^;
- nmatchb := lzi^.block_buf + lzi^.block_loc - loc;
- c1 := matchb;
- c2 := nmatchb;
- j := 0;
- while j < match_len do begin
- if c1^ <> c2^ then begin
- break;
- end;
- Inc(c1);
- Inc(c2);
- Inc(j);
- end;
-
- if (j = match_len) then begin
- match_locp^ := -loc;
- Exit(0);
- end;
- Exit(-1);
- end;
- procedure check_entropy(lzud: plzx_data; main_index: longint);
- var
- freq,
- n_ln_n,
- rn_ln2,
- cur_ratio: double;
- n: longint;
- begin
- //* delete old entropy accumulation */
- if (lzud^.main_freq_table[main_index] <> 1) then begin
- freq := double(lzud^.main_freq_table[main_index])-1;
- lzud^.main_entropy := lzud^.main_entropy + (freq * ln(freq));
- end;
- //* add new entropy accumulation */
- freq := double(lzud^.main_freq_table[main_index]);
- lzud^.main_entropy := lzud^.main_entropy - (freq * ln(freq));
- n := lzud^.block_codesp - lzud^.block_codes;
- if (((n and $0FFF) = 0) and (lzud^.left_in_block >= $1000)) then begin
- n_ln_n := (double(n) * ln(double(n)));
- rn_ln2 := (rloge2 / double(n));
- cur_ratio := (n * rn_ln2 *(n_ln_n + lzud^.main_entropy) + 24 + 3 * 80 + NUM_CHARS + (lzud^.main_tree_size-NUM_CHARS)*3 + NUM_SECONDARY_LENGTHS ) / double(n);
- if (cur_ratio > lzud^.last_ratio) then begin
- lzud^.subdivide := -1;
- lz_stop_compressing(lzud^.lzi);
- end;
- lzud^.last_ratio := cur_ratio;
- end;
- end;
- function lzx_output_match(lzi: plz_info; match_pos, match_len: longint): longint; cdecl;
- var
- lzud: plzx_data;
- formatted_offset,
- position_footer: longword;
- length_footer,
- length_header: byte;
- len_pos_header: word;
- position_slot: longint;
- btdt: smallint;
- left, right, mid: longint;
- label testforr;
- begin
- lzud := plzx_data(lzi^.user_data);
- position_footer := 0;
- btdt := 0;
- testforr:
- if (match_pos = -lzud^.R0) then begin
- match_pos := 0;
- formatted_offset := 0;
- position_slot := 0;
- end
- else if (match_pos = -lzud^.R1) then begin
- lzud^.R1 := lzud^.R0;
- lzud^.R0 := -match_pos;
- match_pos := 1;
- formatted_offset := 1;
- position_slot := 1;
- end
- else if (match_pos = -lzud^.R2) then begin
- lzud^.R2 := lzud^.R0;
- lzud^.R0 := -match_pos;
- match_pos := 2;
- formatted_offset := 2;
- position_slot := 2;
- end
- else begin
- if (btdt = 0) then begin
- btdt := 1;
- if (find_match_at(lzi, lzud^.R0, match_len, @match_pos) = 0) then
- goto testforr;
- if (find_match_at(lzi, lzud^.R1, match_len, @match_pos) = 0) then
- goto testforr;
- if (find_match_at(lzi, lzud^.R2, match_len, @match_pos) = 0) then
- goto testforr;
- end;
- formatted_offset := -match_pos + 2;
- if ((match_len < 3) or
- ((formatted_offset >= 64) and (match_len < 4)) or
- ((formatted_offset >= 2048) and (match_len < 5)) or
- ((formatted_offset >= 65536) and (match_len < 6))) then begin
- //* reject matches where extra_bits will likely be bigger than just outputting
- // literals. The numbers are basically derived through guessing
- // and trial and error */
- Exit(-1); //* reject the match */
- end;
- lzud^.R2 := lzud^.R1;
- lzud^.R1 := lzud^.R0;
- lzud^.R0 := -match_pos;
- ///* calculate position base using binary search of table; if log2 can be
- // done in hardware, approximation might work;
- // trunc(log2(formatted_offset*formatted_offset)) gets either the proper
- // position slot or the next one, except for slots 0, 1, and 39-49
- // Slots 0-1 are handled by the R0-R1 procedures
- // Slots 36-49 (formatted_offset >= 262144) can be found by
- // (formatted_offset/131072) + 34 ==
- // (formatted_offset >> 17) + 34;
- //*/
- if (formatted_offset >= 262144) then begin
- position_slot := (formatted_offset shr 17) + 34;
- end
- else begin
- left := 3;
- right := lzud^.num_position_slots - 1;
- position_slot := -1;
- while (left <= right) do begin
- mid := (left + right) div 2;
- if (position_base[mid] <= formatted_offset) and
- (position_base[mid+1] > formatted_offset) then begin
- position_slot := mid;
- break;
- end;
- if (formatted_offset > position_base[mid]) then
- //* too low */
- left := mid + 1
- else //* too high */
- right := mid;
- end;
- if not(position_slot >= 0) then
- raise Exception.Create('position_slot >= 0');
- //* FIXME precalc extra_mask table */
- end;
- position_footer := ((LongWord(1) shl extra_bits[position_slot]) - 1) and formatted_offset;
- end;
- //* match length = 8 bits */
- //* position_slot = 6 bits */
- //* position_footer = 17 bits */
- //* total = 31 bits */
- //* plus one to say whether it's a literal or not */
- lzud^.block_codesp^ := $80000000 or //* bit 31 in intelligent bit ordering */
- (position_slot shl 25) or //* bits 30-25 */
- (position_footer shl 8) or //* bits 8-24 */
- (match_len - MIN_MATCH); //* bits 0-7 */
- Inc(lzud^.block_codesp);
- if (match_len < (NUM_PRIMARY_LENGTHS + MIN_MATCH)) then begin
- length_header := match_len - MIN_MATCH;
- //* length_footer = 255; */ /* not necessary */
- end
- else begin
- length_header := NUM_PRIMARY_LENGTHS;
- length_footer := match_len - (NUM_PRIMARY_LENGTHS + MIN_MATCH);
- Inc(lzud^.length_freq_table[length_footer]);
- end;
- len_pos_header := (position_slot shl 3) or length_header;
- Inc(lzud^.main_freq_table[len_pos_header + NUM_CHARS]);
- if (extra_bits[position_slot] >= 3) then begin
- Inc(lzud^.aligned_freq_table[position_footer and 7]);
- end;
- Dec(lzud^.left_in_block, match_len);
- if (lzud^.subdivide <> 0) then
- check_entropy(lzud, len_pos_header + NUM_CHARS);
- Exit(0); ///* accept the match */
- end;
- procedure lzx_output_literal(lzi: plz_info; ch: byte); cdecl;
- var
- lzud: plzx_data;
- begin
- lzud := plzx_data(lzi^.user_data);
- Dec(lzud^.left_in_block);
- lzud^.block_codesp^ := ch;
- Inc(lzud^.block_codesp);
- Inc(lzud^.main_freq_table[ch]);
- if (lzud^.subdivide <> 0) then
- check_entropy(lzud, ch);
- end;
- procedure lzx_write_bits(lzxd: plzx_data; nbits: longint; bits: longword); cdecl;
- var
- cur_bits,
- shift_bits,
- rshift_bits: longint;
- mask_bits: word;
- begin
- cur_bits := lzxd^.bits_in_buf;
- while ((cur_bits + nbits) >= 16) do begin
- shift_bits := 16 - cur_bits;
- rshift_bits := nbits - shift_bits;
- if (shift_bits = 16) then begin
- lzxd^.bit_buf := (bits shr rshift_bits) and $FFFF;
- end
- else begin
- mask_bits := (1 shl shift_bits) - 1;
- lzxd^.bit_buf := word(lzxd^.bit_buf shl shift_bits);
- lzxd^.bit_buf := word(lzxd^.bit_buf or (bits shr rshift_bits) and mask_bits);
- end;
- {$IFDEF ENDIAN_BIG}
- lzxd^.bit_buf := word(((lzxd^.bit_buf and $FF)shl 8) or (lzxd^.bit_buf shr 8));
- {$ENDIF}
- lzxd^.put_bytes(lzxd^.out_arg, sizeof(lzxd^.bit_buf), @lzxd^.bit_buf);
- Inc(lzxd^.len_compressed_output, sizeof(lzxd^.bit_buf));
- lzxd^.bit_buf := 0;
- Dec(nbits, shift_bits);
- cur_bits := 0;
- end;
- //* (cur_bits + nbits) < 16. If nbits := 0, we're done.
- // otherwise move bits in */
- shift_bits := nbits;
- mask_bits := (1 shl shift_bits) - 1;
- lzxd^.bit_buf := word(lzxd^.bit_buf shl shift_bits);
- lzxd^.bit_buf := word(lzxd^.bit_buf or bits and mask_bits);
- Inc(cur_bits, nbits);
- lzxd^.bits_in_buf := cur_bits;
- end;
- procedure lzx_align_output(lzxd: plzx_data);
- begin
- if (lzxd^.bits_in_buf <> 0) then begin
- lzx_write_bits(lzxd, 16 - lzxd^.bits_in_buf, 0);
- end;
- if (lzxd^.mark_frame <> nil) then
- lzxd^.mark_frame(lzxd^.mark_frame_arg, lzxd^.len_uncompressed_input, lzxd^.len_compressed_output);
- end;
- procedure lzx_write_compressed_literals(lzxd: plzx_data; block_type: longint);
- var
- cursor: plongword;
- endp: plongword;
- position_slot: word;
- position_footer,
- match_len_m2, //* match length minus 2, which is MIN_MATCH */
- verbatim_bits,
- block_code: longword;
- length_header,
- length_footer,
- len_pos_header: word;
- huffe: phuff_entry;
- frame_count: longint;
- begin
- cursor := lzxd^.block_codes;
- endp := lzxd^.block_codesp;
- frame_count := (lzxd^.len_uncompressed_input mod LZX_FRAME_SIZE);
- Dec(lzxd^.len_uncompressed_input, frame_count); //* will be added back in later */
- while (cursor < endp) do begin
- block_code := cursor^;
- Inc(cursor);
- if (block_code and $80000000) <> 0 then begin
- {*
- * 0x80000000 | bit 31 in intelligent bit ordering
- * (position_slot shl 25) | bits 30-25
- * (position_footer shl 8) | bits 8-24
- * (match_len - MIN_MATCH); bits 0-7
- *
- *}
- match_len_m2 := block_code and $FF; //* 8 bits */
- position_footer := (block_code shr 8)and $1FFFF; //* 17 bits */
- position_slot := (block_code shr 25) and $3F; //* 6 bits */
- if (match_len_m2 < NUM_PRIMARY_LENGTHS) then begin
- length_header := match_len_m2;
- length_footer := 255; //* personal encoding for NULL */
- end
- else begin
- length_header := NUM_PRIMARY_LENGTHS;
- length_footer := match_len_m2 - NUM_PRIMARY_LENGTHS;
- end;
- len_pos_header := (position_slot shl 3) or length_header;
- huffe := @lzxd^.main_tree[len_pos_header+NUM_CHARS];
- lzx_write_bits(lzxd, huffe^.codelength, huffe^.code);
- if (length_footer <> 255) then begin
- huffe := @lzxd^.length_tree[length_footer];
- lzx_write_bits(lzxd, huffe^.codelength, huffe^.code);
- end;
- if ((block_type = LZX_ALIGNED_OFFSET_BLOCK) and (extra_bits[position_slot] >= 3)) then begin
- //* aligned offset block and code */
- verbatim_bits := position_footer shr 3;
- lzx_write_bits(lzxd, extra_bits[position_slot] - 3, verbatim_bits);
- huffe := @lzxd^.aligned_tree[position_footer and 7];
- lzx_write_bits(lzxd, huffe^.codelength, huffe^.code);
- end
- else begin
- verbatim_bits := position_footer;
- lzx_write_bits(lzxd, extra_bits[position_slot], verbatim_bits);
- end;
- Inc(frame_count, match_len_m2 + 2);
- end
- else begin
- //* literal */
- if not(block_code < NUM_CHARS) then
- raise Exception.Create('block_code < NUM_CHARS');
-
- huffe := @lzxd^.main_tree[block_code];
- lzx_write_bits(lzxd, huffe^.codelength, huffe^.code);
- Inc(frame_count);
- end;
- if (frame_count = LZX_FRAME_SIZE) then begin
- Inc(lzxd^.len_uncompressed_input, frame_count);
- lzx_align_output(lzxd);
- frame_count := 0;
- end;
- if not(frame_count < LZX_FRAME_SIZE) then
- raise Exception.Create('frame_count < LZX_FRAME_SIZE');
- end;
- Inc(lzxd^.len_uncompressed_input, frame_count);
- end;
- function lzx_write_compressed_tree(lzxd: plzx_data; tree: phuff_entry; prevlengths: pbyte;
- treesize: longint): longint;
- var
- codes,
- runs: pbyte;
- freqs: array [0..LZX_PRETREE_SIZE-1] of longint;
- cur_run: longint;
- last_len: longint;
- pretree: array [0..19] of huff_entry;
- codep,
- codee,
- runp: pbyte;
- excess,
- i,
- cur_code: longint;
- begin
- codes := getmem(treesize*sizeof(byte));
- codep := codes;
- runs := getmem(treesize*sizeof(byte));
- runp := runs;
- Fillchar(freqs[0], sizeof(freqs), 0);
- cur_run := 1;
- last_len := tree[0].codelength;
- for i := 1 to treesize do begin
- if ((i = treesize) or (tree[i].codelength <> last_len)) then begin
- if (last_len = 0) then begin
- while (cur_run >= 20) do begin
- excess := cur_run - 20;
- if (excess > 31) then excess := 31;
- codep^ := 18;
- Inc(codep);
- runp^ := excess;
- Inc(runp);
- Dec(cur_run, excess + 20);
- Inc(freqs[18]);
- end;
- while (cur_run >= 4) do begin
- excess := cur_run - 4;
- if (excess > 15) then excess := 15;
- codep^ := 17;
- Inc(codep);
- runp^ := excess;
- Inc(runp);
- Dec(cur_run, excess + 4);
- Inc(freqs[17]);
- end;
- while (cur_run > 0) do begin
- codep^ := prevlengths[i - cur_run];
- Inc(freqs[codep^]);
- Inc(codep);
- runp^ := 0; //* not necessary */
- Inc(runp);
- Dec(cur_run);
- end;
- end
- else begin
- while (cur_run >= 4) do begin
- if (cur_run = 4) then excess := 0
- else excess := 1;
- codep^ := 19;
- Inc(codep);
- runp^ := excess;
- Inc(runp);
- Inc(freqs[19]);
- //* right, MS lies again. Code is NOT
- // prev_len + len (mod 17), it's prev_len - len (mod 17)*/
- codep^ := byte(prevlengths[i-cur_run] - last_len);
- if (codep^ > 16) then codep^ := byte(codep^ + 17); //Inc(codep^, 17);
- Inc(freqs[codep^]);
- Inc(codep);
- runp^ := 0; //* not necessary */
- Inc(runp);
- Dec(cur_run, excess+4);
- end;
- while (cur_run > 0) do begin
- codep^ := byte(prevlengths[i-cur_run] - last_len);
- if (codep^ > 16) then codep^ := byte(codep^ + 17); //Inc(codep^, byte(17));
- runp^ := 0; //* not necessary */
- Inc(runp);
- Dec(cur_run);
- Inc(freqs[codep^]);
- Inc(codep);
- end;
- end;
- if (i <> treesize) then
- last_len := tree[i].codelength;
- cur_run := 0;
- end;
- Inc(cur_run);
- end;
- codee := codep;
- //* now create the huffman table and write out the pretree */
- build_huffman_tree(LZX_PRETREE_SIZE, 16, @freqs[0], pretree);
- for i := 0 to LZX_PRETREE_SIZE-1 do begin
- lzx_write_bits(lzxd, 4, pretree[i].codelength);
- end;
- codep := codes;
- runp := runs;
- cur_run := 0;
- while (codep < codee) do begin
- cur_code := codep^;
- Inc(codep);
- lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code);
- if (cur_code = 17) then begin
- Inc(cur_run, runp^ + 4);
- lzx_write_bits(lzxd, 4, runp^);
- end
- else if (cur_code = 18) then begin
- Inc(cur_run, runp^ + 20);
- lzx_write_bits(lzxd, 5, runp^);
- end
- else if (cur_code = 19) then begin
- Inc(cur_run, runp^ + 4);
- lzx_write_bits(lzxd, 1, runp^);
- cur_code := codep^;
- Inc(codep);
- lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code);
- Inc(runp);
- end
- else begin
- Inc(cur_run);
- end;
- Inc(runp);
- end;
- freemem(codes);
- freemem(runs);
- Exit(0);
- end;
- procedure lzx_reset(lzxd:plzx_data);
- begin
- lzxd^.need_1bit_header := 1;
- lzxd^.R0 := 1;
- lzxd^.R1 := 1;
- lzxd^.R2 := 1;
- Fillchar(lzxd^.prev_main_treelengths[0], lzxd^.main_tree_size * sizeof(byte), 0);
- Fillchar(lzxd^.prev_length_treelengths[0], NUM_SECONDARY_LENGTHS * sizeof(byte), 0);
- lz_reset(lzxd^.lzi);
- end;
- function lzx_compress_block(lzxd:plzx_data; block_size:longint; subdivide:longbool):longint;
- var
- i: longint;
- written_sofar: longword = 0;
- block_type: longint;
- uncomp_bits,
- comp_bits,
- comp_bits_ovh,
- uncomp_length: longword;
- begin
- if ((lzxd^.block_size <> block_size) or (lzxd^.block_codes = nil)) then begin
- if (lzxd^.block_codes <> nil) then freemem(lzxd^.block_codes);
- lzxd^.block_size := block_size;
- lzxd^.block_codes := GetMem(block_size * sizeof(longword));
- end;
- lzxd^.subdivide := Ord(subdivide);
- lzxd^.left_in_block := block_size;
- lzxd^.left_in_frame := LZX_FRAME_SIZE;
- lzxd^.main_entropy := 0.0;
- lzxd^.last_ratio := 9999999.0;
- lzxd^.block_codesp := lzxd^.block_codes;
- Fillchar(lzxd^.length_freq_table[0], NUM_SECONDARY_LENGTHS * sizeof(longint), 0);
- Fillchar(lzxd^.main_freq_table[0], lzxd^.main_tree_size * sizeof(longint), 0);
- Fillchar(lzxd^.aligned_freq_table[0], LZX_ALIGNED_SIZE * sizeof(longint), 0);
- while ((lzxd^.left_in_block<>0) and ((lz_left_to_process(lzxd^.lzi)<>0) or not(lzxd^.at_eof(lzxd^.in_arg)))) do begin
- lz_compress(lzxd^.lzi, lzxd^.left_in_block);
- if (lzxd^.left_in_frame = 0) then begin
- lzxd^.left_in_frame := LZX_FRAME_SIZE;
- end;
-
- if ((lzxd^.subdivide<0)
- or (lzxd^.left_in_block = 0)
- or ((lz_left_to_process(lzxd^.lzi) = 0) and lzxd^.at_eof(lzxd^.in_arg))) then begin
- //* now one block is LZ-analyzed. */
- //* time to write it out */
- uncomp_length := lzxd^.block_size - lzxd^.left_in_block - written_sofar;
- //* uncomp_length will sometimes be 0 when input length is
- // an exact multiple of frame size */
- if (uncomp_length = 0) then
- continue;
- if (lzxd^.subdivide < 0) then begin
- lzxd^.subdivide := 1;
- end;
- if (lzxd^.need_1bit_header <> 0) then begin
- //* one bit Intel preprocessing header */
- //* always 0 because this implementation doesn't do Intel preprocessing */
- lzx_write_bits(lzxd, 1, 0);
- lzxd^.need_1bit_header := 0;
- end;
- //* handle extra bits */
- uncomp_bits := 0;
- comp_bits := 0;
-
- build_huffman_tree(LZX_ALIGNED_SIZE, 7, @lzxd^.aligned_freq_table[0], @lzxd^.aligned_tree[0]);
- for i := 0 to LZX_ALIGNED_SIZE-1 do begin
- Inc(uncomp_bits, lzxd^.aligned_freq_table[i]* 3);
- Inc(comp_bits, lzxd^.aligned_freq_table[i]* lzxd^.aligned_tree[i].codelength);
- end;
- comp_bits_ovh := comp_bits + LZX_ALIGNED_SIZE * 3;
- if (comp_bits_ovh < uncomp_bits) then
- block_type := LZX_ALIGNED_OFFSET_BLOCK
- else
- block_type := LZX_VERBATIM_BLOCK;
- //* block type */
- lzx_write_bits(lzxd, 3, block_type);
- //* uncompressed length */
- lzx_write_bits(lzxd, 24, uncomp_length);
- written_sofar := lzxd^.block_size - lzxd^.left_in_block;
- //* now write out the aligned offset trees if present */
- if (block_type = LZX_ALIGNED_OFFSET_BLOCK) then begin
- for i := 0 to LZX_ALIGNED_SIZE-1 do begin
- lzx_write_bits(lzxd, 3, lzxd^.aligned_tree[i].codelength);
- end;
- end;
- //* end extra bits */
- build_huffman_tree(lzxd^.main_tree_size, LZX_MAX_CODE_LENGTH,
- lzxd^.main_freq_table, lzxd^.main_tree);
- build_huffman_tree(NUM_SECONDARY_LENGTHS, 16,
- @lzxd^.length_freq_table[0], @lzxd^.length_tree[0]);
- //* now write the pre-tree and tree for main 1 */
- lzx_write_compressed_tree(lzxd, lzxd^.main_tree, lzxd^.prev_main_treelengths, NUM_CHARS);
- //* now write the pre-tree and tree for main 2*/
- lzx_write_compressed_tree(lzxd, lzxd^.main_tree + NUM_CHARS,
- lzxd^.prev_main_treelengths + NUM_CHARS,
- lzxd^.main_tree_size - NUM_CHARS);
- //* now write the pre tree and tree for length */
- lzx_write_compressed_tree(lzxd, @lzxd^.length_tree[0], @lzxd^.prev_length_treelengths[0],
- NUM_SECONDARY_LENGTHS);
- //* now write literals */
- lzx_write_compressed_literals(lzxd, block_type);
- //* copy treelengths somewhere safe to do delta compression */
- for i := 0 to lzxd^.main_tree_size-1 do begin
- lzxd^.prev_main_treelengths[i] := lzxd^.main_tree[i].codelength;
- end;
- for i := 0 to NUM_SECONDARY_LENGTHS-1 do begin
- lzxd^.prev_length_treelengths[i] := lzxd^.length_tree[i].codelength;
- end;
- lzxd^.main_entropy := 0.0;
- lzxd^.last_ratio := 9999999.0;
- lzxd^.block_codesp := lzxd^.block_codes;
- Fillchar(lzxd^.length_freq_table[0], NUM_SECONDARY_LENGTHS * sizeof(longint), 0);
- Fillchar(lzxd^.main_freq_table[0], lzxd^.main_tree_size * sizeof(longint), 0);
- Fillchar(lzxd^.aligned_freq_table[0], LZX_ALIGNED_SIZE * sizeof(longint), 0);
- end;
- end;
- Exit(0);
- end;
- function lzx_init(lzxdp:Pplzx_data; wsize_code:longint; get_bytes:TGetBytesFunc; get_bytes_arg:pointer; at_eof:TIsEndOfFileFunc;
- put_bytes:TWriteBytesFunc; put_bytes_arg:pointer; mark_frame:TMarkFrameFunc; mark_frame_arg:pointer):longint;var
- wsize: longint;
- lzxd: plzx_data;
- begin
- if ((wsize_code < 15) or (wsize_code > 21)) then begin
- Exit(-1);
- end;
-
- //lzx_init_static(); I hardcoded this instead
- New(lzxd);
- FillChar(lzxd^, Sizeof(lzxd), 0);
- lzxdp^ := lzxd;
- if (lzxd = nil) then
- Exit(-2);
- lzxd^.in_arg := get_bytes_arg;
- lzxd^.out_arg := put_bytes_arg;
- lzxd^.mark_frame_arg := mark_frame_arg;
- lzxd^.get_bytes := get_bytes;
- lzxd^.put_bytes := put_bytes;
- lzxd^.at_eof := at_eof;
- lzxd^.mark_frame := mark_frame;
- wsize := 1 shl (wsize_code);
- lzxd^.bits_in_buf := 0;
- lzxd^.block_codes := nil;
- lzxd^.num_position_slots := num_position_slots[wsize_code-15];
- lzxd^.main_tree_size := (NUM_CHARS + 8 * lzxd^.num_position_slots);
- lzxd^.main_freq_table := GetMem(sizeof(longint) * lzxd^.main_tree_size);
- lzxd^.main_tree := GetMem(sizeof(huff_entry)* lzxd^.main_tree_size);
- lzxd^.prev_main_treelengths := GetMem(sizeof(byte)*lzxd^.main_tree_size);
- New(lzxd^.lzi);
- //* the -3 prevents matches at wsize, wsize-1, wsize-2, all of which are illegal */
- lz_init(lzxd^.lzi, wsize, wsize - 3, MAX_MATCH, MIN_MATCH, LZX_FRAME_SIZE,
- @lzx_get_chars, @lzx_output_match, @lzx_output_literal,lzxd);
- lzxd^.len_uncompressed_input := 0;
- lzxd^.len_compressed_output := 0;
- lzx_reset(lzxd);
- Exit(0);
- end;
- function lzx_finish(lzxd:plzx_data; lzxr:plzx_results):longint;
- begin
- if (lzxr <> nil) then begin
- lzxr^.len_compressed_output := lzxd^.len_compressed_output;
- lzxr^.len_uncompressed_input := lzxd^.len_uncompressed_input;
- end;
- lz_release(lzxd^.lzi);
- Dispose(lzxd^.lzi);
- freemem(lzxd^.prev_main_treelengths);
- freemem(lzxd^.main_tree);
- freemem(lzxd^.main_freq_table);
- freemem(lzxd^.block_codes);
- dispose(lzxd);
- Exit(0);
- end;
- initialization
- rloge2 := 1.0 / ln(2);
- end.