/packages/chm/src/paslzxcomp.pas
Pascal | 1160 lines | 892 code | 119 blank | 149 comment | 132 complexity | cd4e51b609092e1e6f0aec07bb0a0c8f MD5 | raw file
Possible License(s): LGPL-2.0, LGPL-2.1, LGPL-3.0
1{ Copyright (C) <2005> <Andrew Haines> paslzxcomp.pas 2 3 This library is free software; you can redistribute it and/or modify it 4 under the terms of the GNU Library General Public License as published by 5 the Free Software Foundation; either version 2 of the License, or (at your 6 option) any later version. 7 8 This program is distributed in the hope that it will be useful, but WITHOUT 9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License 11 for more details. 12 13 You should have received a copy of the GNU Library General Public License 14 along with this library; if not, write to the Free Software Foundation, 15 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16} 17{ 18 See the file COPYING.FPC, included in this distribution, 19 for details about the copyright. 20} 21unit paslzxcomp; 22{$MODE OBJFPC} 23{$GOTO ON} 24interface 25 26uses paslznonslide; 27 28 const 29 MIN_MATCH = 2; 30 MAX_MATCH = 257; 31 NUM_CHARS = 256; 32 NUM_PRIMARY_LENGTHS = 7; 33 NUM_SECONDARY_LENGTHS = 249; 34 { the names of these constants are specific to this library } 35 LZX_MAX_CODE_LENGTH = 16; 36 LZX_FRAME_SIZE = 32768; 37 LZX_PRETREE_SIZE = 20; 38 LZX_ALIGNED_BITS = 3; 39 LZX_ALIGNED_SIZE = 8; 40 LZX_VERBATIM_BLOCK = 1; 41 LZX_ALIGNED_OFFSET_BLOCK = 2; 42 43 44{$IFDEF FPC} 45{$PACKRECORDS C} 46{$ENDIF} 47 48 49 { 50 File lzx_compress.h, part of lzxcomp library 51 Copyright (C) 2002 Matthew T. Russotto 52 53 This program is free software; you can redistribute it and/or modify 54 it under the terms of the GNU Lesser General Public License as published by 55 the Free Software Foundation; version 2.1 only 56 57 This program is distributed in the hope that it will be useful, 58 but WITHOUT ANY WARRANTY; without even the implied warranty of 59 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 60 GNU Lesser General Public License for more details. 61 62 You should have received a copy of the GNU Lesser General Public License 63 along with this program; if not, write to the Free Software 64 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 65 } 66 type 67 PPlzx_data = ^Plzx_data; 68 Plzx_data = ^lzx_data; 69 70 71 TGetBytesFunc = function (arg:pointer; n:longint; buf:pointer):longint; cdecl; 72 73 TWriteBytesFunc = function (arg:pointer; n:longint; buf:pointer):longint; cdecl; 74 75 TMarkFrameFunc = procedure (arg:pointer; uncomp:dword; comp:dword); cdecl; 76 77 TIsEndOfFileFunc = function (arg:pointer): longbool; cdecl; 78 { add more here? Error codes, # blocks, # frames, etc? } 79 80 lzx_results = record 81 len_compressed_output : longint; 82 len_uncompressed_input : longint; 83 end; 84 85 phuff_entry = ^huff_entry; 86 huff_entry = record 87 codelength: smallint; 88 code: word; 89 end; 90 91 lzx_data = record 92 in_arg : pointer; 93 out_arg: pointer; 94 mark_frame_arg: pointer; 95 get_bytes: TGetBytesFunc; 96 at_eof: TIsEndOfFileFunc; 97 put_bytes: TWriteBytesFunc; 98 mark_frame: TMarkFrameFunc; 99 lzi: plz_info; 100 {/* a 'frame' is an 0x8000 byte thing. Called that because otherwise 101 I'd confuse myself overloading 'block' */} 102 left_in_frame: longint; 103 left_in_block: longint; 104 R0, R1, R2: longint; 105 num_position_slots: longint; 106 //* this is the LZX block size */ 107 block_size: longint; 108 main_freq_table: plongint; 109 length_freq_table: array [0..NUM_SECONDARY_LENGTHS-1] of longint; 110 aligned_freq_table: array [0..LZX_ALIGNED_SIZE-1] of longint; 111 block_codes: plongword; 112 block_codesp: plongword; 113 main_tree: phuff_entry; 114 length_tree: array[0..NUM_SECONDARY_LENGTHS-1] of huff_entry; 115 aligned_tree: array[0..LZX_ALIGNED_SIZE-1] of huff_entry; 116 main_tree_size: longint; 117 bit_buf: word; 118 bits_in_buf: longint; 119 main_entropy: double; 120 last_ratio: double; 121 prev_main_treelengths: pbyte; 122 prev_length_treelengths: array [0..NUM_SECONDARY_LENGTHS-1] of byte; 123 len_uncompressed_input: longword; 124 len_compressed_output: longword; 125 need_1bit_header: smallint; 126 subdivide: smallint; //* 0 = don't subdivide, 1 = allowed, -1 = requested */ 127 end; 128 Plzx_results = ^lzx_results; 129 130 function lzx_init(lzxdp:Pplzx_data; wsize_code:longint; get_bytes:TGetBytesFunc; get_bytes_arg:pointer; at_eof:TIsEndOfFileFunc; 131 put_bytes:TWriteBytesFunc; put_bytes_arg:pointer; mark_frame:TMarkFrameFunc; mark_frame_arg:pointer):longint; 132 133 procedure lzx_reset(lzxd:plzx_data); 134 135 function lzx_compress_block(lzxd:plzx_data; block_size:longint; subdivide: LongBool):longint; 136 137 function lzx_finish(lzxd:plzx_data; lzxr:plzx_results):longint; 138 139implementation 140uses math, sysutils; 141var 142 rloge2: double; // set in initialization section 143 144const 145 num_position_slots: array [0..6] of smallint = (30, 32, 34, 36, 38, 42, 50); 146 147 extra_bits: array [0..50] of Byte = ( 148 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 149 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 150 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 151 17, 17, 17 152 ); 153 154 position_base: array [0..50] of dword = ( 155 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 156 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 157 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936, 158 1835008, 1966080, 2097152 159 ); 160 161type 162 pih_elem = ^ih_elem; 163 ih_elem = record 164 freq: longint; 165 sym: smallint; 166 pathlength: smallint; 167 parent: pih_elem; 168 left: pih_elem; 169 right: pih_elem; 170 end; 171 ph_elem = ^h_elem; 172 h_elem = record 173 freq: longint; 174 sym: smallint; 175 pathlength: smallint; 176 parent: pih_elem; 177 code: word; 178 end; 179 180function cmp_leaves(const in_a: ph_elem; const in_b: ph_elem): longint; 181begin 182 183 if (in_a^.freq = 0) and (in_b^.freq <> 0) then 184 Exit(1); 185 if (in_a^.freq <> 0) and (in_b^.freq = 0) then 186 Exit(-1); 187 188 if (in_a^.freq = in_b^.freq) then 189 Exit(in_a^.sym - in_b^.sym); 190 191 Exit(in_a^.freq - in_b^.freq); 192end; 193 194function cmp_pathlengths(const in_a: ph_elem; const in_b: ph_elem): longint; 195begin 196 if (in_a^.pathlength = in_b^.pathlength) then 197 //* see note on canonical pathlengths */ 198 Exit(in_b^.sym - in_a^.sym); 199 200 Exit(in_b^.pathlength - in_a^.pathlength); 201end; 202 203type 204 TQSortCompFunc = function(const in_a: ph_elem; const in_b: ph_elem): longint; 205 206procedure qsort(a_array: ph_elem; nelem: integer; cmpfunc: TQSortCompFunc); 207 208var 209 tmp: h_elem; 210 211 procedure QuickSort(L, R: Integer); 212 var 213 I, J, Pivot: Integer; 214 begin 215 repeat 216 I := L; 217 J := R; 218 Pivot := (L + R) div 2; 219 repeat 220 while cmpfunc(@a_array[I], @a_array[Pivot]) < 0 do Inc(I); 221 while cmpfunc(@a_array[J], @a_array[Pivot]) > 0 do Dec(J); 222 if I <= J then 223 begin 224 // exchange I and J 225 tmp := a_array[I]; 226 a_array[I] := a_array[J]; 227 a_array[J] := tmp; 228 229 if Pivot = I then 230 Pivot := J 231 else if Pivot = J then 232 Pivot := I; 233 Inc(I); 234 Dec(j); 235 end; 236 until I > J; 237 if L < J then 238 QuickSort(L,J); 239 L := I; 240 until I >= R; 241 end; 242begin 243 QuickSort(0, nelem - 1); 244end; 245 246procedure build_huffman_tree(nelem: longint; max_code_length: longint; freq: plongint; tree: phuff_entry); 247var 248 leaves: ph_elem; 249 inodes: pih_elem; 250 next_inode: pih_elem; 251 cur_inode: pih_elem; 252 cur_leaf :ph_elem; 253 leaves_left, 254 nleaves, 255 pathlength: longint; 256 cur_code: word; 257 codes_too_long: smallint = 0; 258 f1, f2: pih_elem; 259 i: longint; 260begin 261 leaves := GetMem(nelem * sizeof(h_elem)); 262 for i := 0 to nelem-1 do begin 263 leaves[i].freq := freq[i]; 264 leaves[i].sym := i; 265 leaves[i].pathlength := 0; 266 end; 267 268 qsort(leaves, nelem, @cmp_leaves); 269 270 271 leaves_left := 0; 272 while leaves_left < nelem do begin 273 if (leaves[leaves_left].freq) = 0 then break; 274 Inc(leaves_left); 275 end; 276 nleaves := leaves_left; 277 278 if (nleaves >= 2) then begin 279 inodes := AllocMem((nelem-1) * sizeof(ih_elem)); 280 repeat 281 if (codes_too_long <> 0) then begin 282 leaves_left := 0; 283 while leaves_left < nelem do begin 284 if (leaves[leaves_left].freq = 0) then break; 285 if (leaves[leaves_left].freq <> 1) then begin 286 leaves[leaves_left].freq := leaves[leaves_left].freq shr 1; 287 codes_too_long := 0; 288 end; 289 Inc(leaves_left); 290 end; 291 if codes_too_long <> 0 then 292 raise Exception.Create('!codes_too_long'); 293 end; 294 295 cur_leaf := leaves; 296 cur_inode := inodes; 297 next_inode := cur_inode; 298 299 repeat 300 f1 := nil; 301 f2 := nil; 302 if (leaves_left <> 0) and 303 ((cur_inode = next_inode) or 304 (cur_leaf^.freq <= cur_inode^.freq)) then begin 305 f1 := pih_elem(cur_leaf); 306 Inc(cur_leaf); 307 Dec(leaves_left); 308 end 309 else if (cur_inode <> next_inode) then begin 310 f1 := cur_inode; 311 Inc(cur_inode); 312 end; 313 314 if ((leaves_left <> 0) and 315 ((cur_inode = next_inode) or 316 (cur_leaf^.freq <= cur_inode^.freq))) then begin 317 f2 := pih_elem(cur_leaf); 318 Inc(cur_leaf); 319 Dec(leaves_left); 320 end 321 else if (cur_inode <> next_inode) then begin 322 f2 := cur_inode; 323 Inc(cur_inode); 324 end; 325 326 if (f1 <> nil) and (f2 <> nil) then begin 327 next_inode^.freq := f1^.freq + f2^.freq; 328 next_inode^.sym := -1; 329 next_inode^.left := f1; 330 next_inode^.right := f2; 331 next_inode^.parent := nil; 332 f1^.parent := next_inode; 333 f2^.parent := next_inode; 334 if (f1^.pathlength > f2^.pathlength) then 335 next_inode^.pathlength := f1^.pathlength + 1 336 else 337 next_inode^.pathlength := f2^.pathlength + 1; 338 if (next_inode^.pathlength > max_code_length) then begin 339 codes_too_long := 1; 340 break; 341 end; 342 Inc(next_inode); 343 end; 344 until (f1 = nil) and (f2 = nil); 345 until codes_too_long = 0; 346 347 //* now traverse tree depth-first */ 348 cur_inode := next_inode - 1; 349 pathlength := 0; 350 cur_inode^.pathlength := -1; 351 repeat 352 //* precondition: at unmarked node*/ 353 if (cur_inode^.sym = -1) then begin //*&& (cur_inode^.left)*/ 354 //* left node of unmarked node is unmarked */ 355 cur_inode := cur_inode^.left; 356 cur_inode^.pathlength := -1; 357 Inc(pathlength); 358 end 359 else begin 360 //* mark node */ 361 cur_inode^.pathlength := pathlength; 362//#if 0 363// if (cur_inode^.right) { 364// /* right node of previously unmarked node is unmarked */ 365// cur_inode = cur_inode^.right; 366// cur_inode^.pathlength = -1; 367// pathlength++; 368// } 369// else 370//#endif 371 begin 372 373 //* time to come up. Keep coming up until an unmarked node is reached */ 374 //* or the tree is exhausted */ 375 repeat 376 cur_inode := cur_inode^.parent; 377 Dec(pathlength); 378 379 //while (cur_inode && (cur_inode^.pathlength != -1)); 380 until (cur_inode = nil) or (cur_inode^.pathlength = -1); 381 if (cur_inode <> nil) then begin 382 //* found unmarked node; mark it and go right */ 383 cur_inode^.pathlength := pathlength; 384 cur_inode := cur_inode^.right; 385 cur_inode^.pathlength := -1; 386 Inc(pathlength); 387 //* would be complex if cur_inode could be null here. It can't */ 388 end 389 end; 390 end; 391 until cur_inode = nil; 392 393 freemem(inodes); 394 395 ///* the pathlengths are already in order, so this sorts by symbol */ 396 qsort(leaves, nelem, @cmp_pathlengths); 397 398//#if 0 399// pathlength = leaves[0].pathlength; 400// cur_code = 0; 401// for (i = 0; i < nleaves; i++) { 402// while (leaves[i].pathlength < pathlength) { 403// (!(cur_code & 1)); 404// cur_code >>= 1; 405// pathlength--; 406// } 407// leaves[i].code = cur_code; 408// cur_code++; 409// } 410//#else 411 pathlength := leaves[nleaves-1].pathlength; 412 if leaves[0].pathlength > 16 then 413 raise Exception.Create('leaves[0].pathlength <= 16'); 414 //* this method cannot deal with bigger codes, though 415 // the other canonical method can in some cases 416 // (because it starts with zeros ) */ 417 cur_code := 0; 418 for i := nleaves-1 downto 0 do begin 419 while (leaves[i].pathlength > pathlength) do begin 420 cur_code := cur_code shl 1; 421 Inc(pathlength); 422 end; 423 leaves[i].code := cur_code; 424 {$PUSH} 425 {$R-} 426 Inc(cur_code); // range error but i = 0 so it's harmless 427 {$POP} 428 end; 429//#endif 430 431 end 432 else if (nleaves = 1) then begin 433 //* 0 symbols is OK (not according to doc, but according to Caie) */ 434 //* but if only one symbol is present, two symbols are required */ 435 nleaves := 2; 436 leaves[0].pathlength := 1; 437 leaves[1].pathlength := 1; 438 if (leaves[1].sym > leaves[0].sym) then begin 439 leaves[1].code := 1; 440 leaves[0].code := 0; 441 end 442 else begin 443 leaves[0].code := 1; 444 leaves[1].code := 0; 445 end; 446 end; 447 448 Fillchar(tree^, nelem * sizeof(huff_entry), 0); 449 for i := 0 to nleaves-1 do begin 450 tree[leaves[i].sym].codelength := leaves[i].pathlength; 451 tree[leaves[i].sym].code := leaves[i].code; 452 end; 453 454 freemem(leaves); 455end; 456 457function lzx_get_chars(lzi: plz_info; n: longint; buf: pbyte): longint; cdecl; 458var 459 //* force lz compression to stop after every block */ 460 chars_read, 461 chars_pad: longint; 462 463 lzud: plzx_data; 464begin 465 lzud := plzx_data(lzi^.user_data); 466 467 chars_read := lzud^.get_bytes(lzud^.in_arg, n, buf); 468 Dec(lzud^.left_in_frame, chars_read mod LZX_FRAME_SIZE); 469 if (lzud^.left_in_frame < 0) then 470 Inc(lzud^.left_in_frame, LZX_FRAME_SIZE); 471 472 if ((chars_read < n) and (lzud^.left_in_frame <> 0)) then begin 473 chars_pad := n - chars_read; 474 if (chars_pad > lzud^.left_in_frame) then chars_pad := lzud^.left_in_frame; 475 //* never emit a full frame of padding. This prevents silliness when 476 // lzx_compress is called when at EOF but EOF not yet detected */ 477 if (chars_pad = LZX_FRAME_SIZE) then chars_pad := 0; 478 FillChar(buf[chars_read], chars_pad, 0); 479 Dec(lzud^.left_in_frame, chars_pad); 480 Inc(chars_read, chars_pad); 481 end; 482 lzx_get_chars := chars_read; 483end; 484 485function find_match_at(lzi: plz_info; loc: longint; match_len: longint; match_locp: plongint): longint; 486var 487 matchb, 488 nmatchb, 489 c1, c2: pbyte; 490 j: longint; 491begin 492 if -match_locp^ = loc then Exit(-1); 493 if loc < match_len then Exit(-1); 494 495 matchb := lzi^.block_buf + lzi^.block_loc + match_locp^; 496 nmatchb := lzi^.block_buf + lzi^.block_loc - loc; 497 c1 := matchb; 498 c2 := nmatchb; 499 j := 0; 500 while j < match_len do begin 501 if c1^ <> c2^ then begin 502 break; 503 end; 504 Inc(c1); 505 Inc(c2); 506 Inc(j); 507 end; 508 509 if (j = match_len) then begin 510 match_locp^ := -loc; 511 Exit(0); 512 end; 513 Exit(-1); 514end; 515 516procedure check_entropy(lzud: plzx_data; main_index: longint); 517var 518 freq, 519 n_ln_n, 520 rn_ln2, 521 cur_ratio: double; 522 n: longint; 523begin 524 //* delete old entropy accumulation */ 525 if (lzud^.main_freq_table[main_index] <> 1) then begin 526 freq := double(lzud^.main_freq_table[main_index])-1; 527 lzud^.main_entropy := lzud^.main_entropy + (freq * ln(freq)); 528 end; 529 //* add new entropy accumulation */ 530 freq := double(lzud^.main_freq_table[main_index]); 531 lzud^.main_entropy := lzud^.main_entropy - (freq * ln(freq)); 532 n := lzud^.block_codesp - lzud^.block_codes; 533 534 if (((n and $0FFF) = 0) and (lzud^.left_in_block >= $1000)) then begin 535 n_ln_n := (double(n) * ln(double(n))); 536 rn_ln2 := (rloge2 / double(n)); 537 cur_ratio := (n * rn_ln2 *(n_ln_n + lzud^.main_entropy) + 24 + 3 * 80 + NUM_CHARS + (lzud^.main_tree_size-NUM_CHARS)*3 + NUM_SECONDARY_LENGTHS ) / double(n); 538 539 if (cur_ratio > lzud^.last_ratio) then begin 540 lzud^.subdivide := -1; 541 lz_stop_compressing(lzud^.lzi); 542 end; 543 lzud^.last_ratio := cur_ratio; 544 545 end; 546 547end; 548 549function lzx_output_match(lzi: plz_info; match_pos, match_len: longint): longint; cdecl; 550var 551 lzud: plzx_data; 552 formatted_offset, 553 position_footer: longword; 554 length_footer, 555 length_header: byte; 556 len_pos_header: word; 557 position_slot: longint; 558 btdt: smallint; 559 left, right, mid: longint; 560label testforr; 561begin 562 lzud := plzx_data(lzi^.user_data); 563 564 position_footer := 0; 565 btdt := 0; 566 testforr: 567 if (match_pos = -lzud^.R0) then begin 568 match_pos := 0; 569 formatted_offset := 0; 570 position_slot := 0; 571 end 572 else if (match_pos = -lzud^.R1) then begin 573 lzud^.R1 := lzud^.R0; 574 lzud^.R0 := -match_pos; 575 match_pos := 1; 576 formatted_offset := 1; 577 position_slot := 1; 578 end 579 else if (match_pos = -lzud^.R2) then begin 580 lzud^.R2 := lzud^.R0; 581 lzud^.R0 := -match_pos; 582 match_pos := 2; 583 formatted_offset := 2; 584 position_slot := 2; 585 end 586 else begin 587 if (btdt = 0) then begin 588 btdt := 1; 589 if (find_match_at(lzi, lzud^.R0, match_len, @match_pos) = 0) then 590 goto testforr; 591 if (find_match_at(lzi, lzud^.R1, match_len, @match_pos) = 0) then 592 goto testforr; 593 if (find_match_at(lzi, lzud^.R2, match_len, @match_pos) = 0) then 594 goto testforr; 595 end; 596 597 formatted_offset := -match_pos + 2; 598 599 if ((match_len < 3) or 600 ((formatted_offset >= 64) and (match_len < 4)) or 601 ((formatted_offset >= 2048) and (match_len < 5)) or 602 ((formatted_offset >= 65536) and (match_len < 6))) then begin 603 //* reject matches where extra_bits will likely be bigger than just outputting 604 // literals. The numbers are basically derived through guessing 605 // and trial and error */ 606 Exit(-1); //* reject the match */ 607 end; 608 609 lzud^.R2 := lzud^.R1; 610 lzud^.R1 := lzud^.R0; 611 lzud^.R0 := -match_pos; 612 613 ///* calculate position base using binary search of table; if log2 can be 614 // done in hardware, approximation might work; 615 // trunc(log2(formatted_offset*formatted_offset)) gets either the proper 616 // position slot or the next one, except for slots 0, 1, and 39-49 617 618 // Slots 0-1 are handled by the R0-R1 procedures 619 620 // Slots 36-49 (formatted_offset >= 262144) can be found by 621 // (formatted_offset/131072) + 34 == 622 // (formatted_offset >> 17) + 34; 623 //*/ 624 if (formatted_offset >= 262144) then begin 625 position_slot := (formatted_offset shr 17) + 34; 626 end 627 else begin 628 left := 3; 629 right := lzud^.num_position_slots - 1; 630 position_slot := -1; 631 while (left <= right) do begin 632 mid := (left + right) div 2; 633 if (position_base[mid] <= formatted_offset) and 634 (position_base[mid+1] > formatted_offset) then begin 635 position_slot := mid; 636 break; 637 end; 638 if (formatted_offset > position_base[mid]) then 639 //* too low */ 640 left := mid + 1 641 else //* too high */ 642 right := mid; 643 end; 644 if not(position_slot >= 0) then 645 raise Exception.Create('position_slot >= 0'); 646 647 //* FIXME precalc extra_mask table */ 648 end; 649 position_footer := ((LongWord(1) shl extra_bits[position_slot]) - 1) and formatted_offset; 650 end; 651 652 //* match length = 8 bits */ 653 //* position_slot = 6 bits */ 654 //* position_footer = 17 bits */ 655 //* total = 31 bits */ 656 //* plus one to say whether it's a literal or not */ 657 lzud^.block_codesp^ := $80000000 or //* bit 31 in intelligent bit ordering */ 658 (position_slot shl 25) or //* bits 30-25 */ 659 (position_footer shl 8) or //* bits 8-24 */ 660 (match_len - MIN_MATCH); //* bits 0-7 */ 661 Inc(lzud^.block_codesp); 662 663 if (match_len < (NUM_PRIMARY_LENGTHS + MIN_MATCH)) then begin 664 length_header := match_len - MIN_MATCH; 665 //* length_footer = 255; */ /* not necessary */ 666 end 667 else begin 668 length_header := NUM_PRIMARY_LENGTHS; 669 length_footer := match_len - (NUM_PRIMARY_LENGTHS + MIN_MATCH); 670 Inc(lzud^.length_freq_table[length_footer]); 671 end; 672 len_pos_header := (position_slot shl 3) or length_header; 673 Inc(lzud^.main_freq_table[len_pos_header + NUM_CHARS]); 674 if (extra_bits[position_slot] >= 3) then begin 675 Inc(lzud^.aligned_freq_table[position_footer and 7]); 676 end; 677 678 Dec(lzud^.left_in_block, match_len); 679 680 if (lzud^.subdivide <> 0) then 681 check_entropy(lzud, len_pos_header + NUM_CHARS); 682 Exit(0); ///* accept the match */ 683end; 684 685procedure lzx_output_literal(lzi: plz_info; ch: byte); cdecl; 686var 687 lzud: plzx_data; 688begin 689 lzud := plzx_data(lzi^.user_data); 690 691 Dec(lzud^.left_in_block); 692 lzud^.block_codesp^ := ch; 693 Inc(lzud^.block_codesp); 694 Inc(lzud^.main_freq_table[ch]); 695 if (lzud^.subdivide <> 0) then 696 check_entropy(lzud, ch); 697end; 698 699procedure lzx_write_bits(lzxd: plzx_data; nbits: longint; bits: longword); cdecl; 700var 701 cur_bits, 702 shift_bits, 703 rshift_bits: longint; 704 mask_bits: word; 705begin 706 cur_bits := lzxd^.bits_in_buf; 707 while ((cur_bits + nbits) >= 16) do begin 708 shift_bits := 16 - cur_bits; 709 rshift_bits := nbits - shift_bits; 710 if (shift_bits = 16) then begin 711 lzxd^.bit_buf := (bits shr rshift_bits) and $FFFF; 712 end 713 else begin 714 mask_bits := (1 shl shift_bits) - 1; 715 lzxd^.bit_buf := word(lzxd^.bit_buf shl shift_bits); 716 lzxd^.bit_buf := word(lzxd^.bit_buf or (bits shr rshift_bits) and mask_bits); 717 end; 718{$IFDEF ENDIAN_BIG} 719 lzxd^.bit_buf := word(((lzxd^.bit_buf and $FF)shl 8) or (lzxd^.bit_buf shr 8)); 720{$ENDIF} 721 lzxd^.put_bytes(lzxd^.out_arg, sizeof(lzxd^.bit_buf), @lzxd^.bit_buf); 722 Inc(lzxd^.len_compressed_output, sizeof(lzxd^.bit_buf)); 723 lzxd^.bit_buf := 0; 724 Dec(nbits, shift_bits); 725 cur_bits := 0; 726 end; 727 //* (cur_bits + nbits) < 16. If nbits := 0, we're done. 728 // otherwise move bits in */ 729 shift_bits := nbits; 730 mask_bits := (1 shl shift_bits) - 1; 731 lzxd^.bit_buf := word(lzxd^.bit_buf shl shift_bits); 732 lzxd^.bit_buf := word(lzxd^.bit_buf or bits and mask_bits); 733 Inc(cur_bits, nbits); 734 735 lzxd^.bits_in_buf := cur_bits; 736end; 737 738procedure lzx_align_output(lzxd: plzx_data); 739begin 740 if (lzxd^.bits_in_buf <> 0) then begin 741 lzx_write_bits(lzxd, 16 - lzxd^.bits_in_buf, 0); 742 end; 743 if (lzxd^.mark_frame <> nil) then 744 lzxd^.mark_frame(lzxd^.mark_frame_arg, lzxd^.len_uncompressed_input, lzxd^.len_compressed_output); 745end; 746 747procedure lzx_write_compressed_literals(lzxd: plzx_data; block_type: longint); 748var 749 cursor: plongword; 750 endp: plongword; 751 position_slot: word; 752 position_footer, 753 match_len_m2, //* match length minus 2, which is MIN_MATCH */ 754 verbatim_bits, 755 block_code: longword; 756 length_header, 757 length_footer, 758 len_pos_header: word; 759 huffe: phuff_entry; 760 frame_count: longint; 761begin 762 cursor := lzxd^.block_codes; 763 endp := lzxd^.block_codesp; 764 frame_count := (lzxd^.len_uncompressed_input mod LZX_FRAME_SIZE); 765 766 Dec(lzxd^.len_uncompressed_input, frame_count); //* will be added back in later */ 767 while (cursor < endp) do begin 768 block_code := cursor^; 769 Inc(cursor); 770 if (block_code and $80000000) <> 0 then begin 771 {* 772 * 0x80000000 | bit 31 in intelligent bit ordering 773 * (position_slot shl 25) | bits 30-25 774 * (position_footer shl 8) | bits 8-24 775 * (match_len - MIN_MATCH); bits 0-7 776 * 777 *} 778 779 match_len_m2 := block_code and $FF; //* 8 bits */ 780 position_footer := (block_code shr 8)and $1FFFF; //* 17 bits */ 781 position_slot := (block_code shr 25) and $3F; //* 6 bits */ 782 783 if (match_len_m2 < NUM_PRIMARY_LENGTHS) then begin 784 length_header := match_len_m2; 785 length_footer := 255; //* personal encoding for NULL */ 786 end 787 else begin 788 length_header := NUM_PRIMARY_LENGTHS; 789 length_footer := match_len_m2 - NUM_PRIMARY_LENGTHS; 790 end; 791 len_pos_header := (position_slot shl 3) or length_header; 792 huffe := @lzxd^.main_tree[len_pos_header+NUM_CHARS]; 793 lzx_write_bits(lzxd, huffe^.codelength, huffe^.code); 794 if (length_footer <> 255) then begin 795 huffe := @lzxd^.length_tree[length_footer]; 796 lzx_write_bits(lzxd, huffe^.codelength, huffe^.code); 797 end; 798 if ((block_type = LZX_ALIGNED_OFFSET_BLOCK) and (extra_bits[position_slot] >= 3)) then begin 799 //* aligned offset block and code */ 800 verbatim_bits := position_footer shr 3; 801 lzx_write_bits(lzxd, extra_bits[position_slot] - 3, verbatim_bits); 802 huffe := @lzxd^.aligned_tree[position_footer and 7]; 803 lzx_write_bits(lzxd, huffe^.codelength, huffe^.code); 804 end 805 else begin 806 verbatim_bits := position_footer; 807 lzx_write_bits(lzxd, extra_bits[position_slot], verbatim_bits); 808 end; 809 Inc(frame_count, match_len_m2 + 2); 810 end 811 else begin 812 //* literal */ 813 if not(block_code < NUM_CHARS) then 814 raise Exception.Create('block_code < NUM_CHARS'); 815 816 huffe := @lzxd^.main_tree[block_code]; 817 lzx_write_bits(lzxd, huffe^.codelength, huffe^.code); 818 Inc(frame_count); 819 end; 820 if (frame_count = LZX_FRAME_SIZE) then begin 821 Inc(lzxd^.len_uncompressed_input, frame_count); 822 lzx_align_output(lzxd); 823 frame_count := 0; 824 end; 825 if not(frame_count < LZX_FRAME_SIZE) then 826 raise Exception.Create('frame_count < LZX_FRAME_SIZE'); 827 end; 828 Inc(lzxd^.len_uncompressed_input, frame_count); 829end; 830 831function lzx_write_compressed_tree(lzxd: plzx_data; tree: phuff_entry; prevlengths: pbyte; 832 treesize: longint): longint; 833var 834 codes, 835 runs: pbyte; 836 freqs: array [0..LZX_PRETREE_SIZE-1] of longint; 837 cur_run: longint; 838 last_len: longint; 839 pretree: array [0..19] of huff_entry; 840 codep, 841 codee, 842 runp: pbyte; 843 excess, 844 i, 845 cur_code: longint; 846begin 847 codes := getmem(treesize*sizeof(byte)); 848 codep := codes; 849 runs := getmem(treesize*sizeof(byte)); 850 runp := runs; 851 Fillchar(freqs[0], sizeof(freqs), 0); 852 cur_run := 1; 853 last_len := tree[0].codelength; 854 for i := 1 to treesize do begin 855 if ((i = treesize) or (tree[i].codelength <> last_len)) then begin 856 if (last_len = 0) then begin 857 while (cur_run >= 20) do begin 858 excess := cur_run - 20; 859 if (excess > 31) then excess := 31; 860 codep^ := 18; 861 Inc(codep); 862 runp^ := excess; 863 Inc(runp); 864 Dec(cur_run, excess + 20); 865 Inc(freqs[18]); 866 end; 867 while (cur_run >= 4) do begin 868 excess := cur_run - 4; 869 if (excess > 15) then excess := 15; 870 codep^ := 17; 871 Inc(codep); 872 runp^ := excess; 873 Inc(runp); 874 Dec(cur_run, excess + 4); 875 Inc(freqs[17]); 876 end; 877 while (cur_run > 0) do begin 878 codep^ := prevlengths[i - cur_run]; 879 Inc(freqs[codep^]); 880 Inc(codep); 881 runp^ := 0; //* not necessary */ 882 Inc(runp); 883 Dec(cur_run); 884 end; 885 end 886 else begin 887 while (cur_run >= 4) do begin 888 if (cur_run = 4) then excess := 0 889 else excess := 1; 890 codep^ := 19; 891 Inc(codep); 892 runp^ := excess; 893 Inc(runp); 894 Inc(freqs[19]); 895 //* right, MS lies again. Code is NOT 896 // prev_len + len (mod 17), it's prev_len - len (mod 17)*/ 897 codep^ := byte(prevlengths[i-cur_run] - last_len); 898 if (codep^ > 16) then codep^ := byte(codep^ + 17); //Inc(codep^, 17); 899 Inc(freqs[codep^]); 900 Inc(codep); 901 runp^ := 0; //* not necessary */ 902 Inc(runp); 903 Dec(cur_run, excess+4); 904 end; 905 while (cur_run > 0) do begin 906 codep^ := byte(prevlengths[i-cur_run] - last_len); 907 if (codep^ > 16) then codep^ := byte(codep^ + 17); //Inc(codep^, byte(17)); 908 runp^ := 0; //* not necessary */ 909 Inc(runp); 910 Dec(cur_run); 911 Inc(freqs[codep^]); 912 Inc(codep); 913 end; 914 end; 915 if (i <> treesize) then 916 last_len := tree[i].codelength; 917 cur_run := 0; 918 end; 919 Inc(cur_run); 920 end; 921 codee := codep; 922 //* now create the huffman table and write out the pretree */ 923 build_huffman_tree(LZX_PRETREE_SIZE, 16, @freqs[0], pretree); 924 for i := 0 to LZX_PRETREE_SIZE-1 do begin 925 lzx_write_bits(lzxd, 4, pretree[i].codelength); 926 end; 927 codep := codes; 928 runp := runs; 929 cur_run := 0; 930 while (codep < codee) do begin 931 cur_code := codep^; 932 Inc(codep); 933 lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code); 934 if (cur_code = 17) then begin 935 Inc(cur_run, runp^ + 4); 936 lzx_write_bits(lzxd, 4, runp^); 937 end 938 else if (cur_code = 18) then begin 939 Inc(cur_run, runp^ + 20); 940 lzx_write_bits(lzxd, 5, runp^); 941 end 942 else if (cur_code = 19) then begin 943 Inc(cur_run, runp^ + 4); 944 lzx_write_bits(lzxd, 1, runp^); 945 cur_code := codep^; 946 Inc(codep); 947 lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code); 948 Inc(runp); 949 end 950 else begin 951 Inc(cur_run); 952 end; 953 Inc(runp); 954 end; 955 freemem(codes); 956 freemem(runs); 957 Exit(0); 958end; 959 960procedure lzx_reset(lzxd:plzx_data); 961begin 962 lzxd^.need_1bit_header := 1; 963 lzxd^.R0 := 1; 964 lzxd^.R1 := 1; 965 lzxd^.R2 := 1; 966 Fillchar(lzxd^.prev_main_treelengths[0], lzxd^.main_tree_size * sizeof(byte), 0); 967 Fillchar(lzxd^.prev_length_treelengths[0], NUM_SECONDARY_LENGTHS * sizeof(byte), 0); 968 lz_reset(lzxd^.lzi); 969end; 970 971function lzx_compress_block(lzxd:plzx_data; block_size:longint; subdivide:longbool):longint; 972var 973 i: longint; 974 written_sofar: longword = 0; 975 block_type: longint; 976 uncomp_bits, 977 comp_bits, 978 comp_bits_ovh, 979 uncomp_length: longword; 980begin 981 if ((lzxd^.block_size <> block_size) or (lzxd^.block_codes = nil)) then begin 982 if (lzxd^.block_codes <> nil) then freemem(lzxd^.block_codes); 983 lzxd^.block_size := block_size; 984 lzxd^.block_codes := GetMem(block_size * sizeof(longword)); 985 end; 986 987 lzxd^.subdivide := Ord(subdivide); 988 989 lzxd^.left_in_block := block_size; 990 lzxd^.left_in_frame := LZX_FRAME_SIZE; 991 lzxd^.main_entropy := 0.0; 992 lzxd^.last_ratio := 9999999.0; 993 lzxd^.block_codesp := lzxd^.block_codes; 994 995 996 Fillchar(lzxd^.length_freq_table[0], NUM_SECONDARY_LENGTHS * sizeof(longint), 0); 997 Fillchar(lzxd^.main_freq_table[0], lzxd^.main_tree_size * sizeof(longint), 0); 998 Fillchar(lzxd^.aligned_freq_table[0], LZX_ALIGNED_SIZE * sizeof(longint), 0); 999 while ((lzxd^.left_in_block<>0) and ((lz_left_to_process(lzxd^.lzi)<>0) or not(lzxd^.at_eof(lzxd^.in_arg)))) do begin 1000 lz_compress(lzxd^.lzi, lzxd^.left_in_block); 1001 1002 if (lzxd^.left_in_frame = 0) then begin 1003 lzxd^.left_in_frame := LZX_FRAME_SIZE; 1004 end; 1005 1006 if ((lzxd^.subdivide<0) 1007 or (lzxd^.left_in_block = 0) 1008 or ((lz_left_to_process(lzxd^.lzi) = 0) and lzxd^.at_eof(lzxd^.in_arg))) then begin 1009 //* now one block is LZ-analyzed. */ 1010 //* time to write it out */ 1011 uncomp_length := lzxd^.block_size - lzxd^.left_in_block - written_sofar; 1012 //* uncomp_length will sometimes be 0 when input length is 1013 // an exact multiple of frame size */ 1014 if (uncomp_length = 0) then 1015 continue; 1016 if (lzxd^.subdivide < 0) then begin 1017 lzxd^.subdivide := 1; 1018 end; 1019 1020 if (lzxd^.need_1bit_header <> 0) then begin 1021 //* one bit Intel preprocessing header */ 1022 //* always 0 because this implementation doesn't do Intel preprocessing */ 1023 lzx_write_bits(lzxd, 1, 0); 1024 lzxd^.need_1bit_header := 0; 1025 end; 1026 //* handle extra bits */ 1027 uncomp_bits := 0; 1028 comp_bits := 0; 1029 1030 build_huffman_tree(LZX_ALIGNED_SIZE, 7, @lzxd^.aligned_freq_table[0], @lzxd^.aligned_tree[0]); 1031 for i := 0 to LZX_ALIGNED_SIZE-1 do begin 1032 Inc(uncomp_bits, lzxd^.aligned_freq_table[i]* 3); 1033 Inc(comp_bits, lzxd^.aligned_freq_table[i]* lzxd^.aligned_tree[i].codelength); 1034 end; 1035 comp_bits_ovh := comp_bits + LZX_ALIGNED_SIZE * 3; 1036 if (comp_bits_ovh < uncomp_bits) then 1037 block_type := LZX_ALIGNED_OFFSET_BLOCK 1038 else 1039 block_type := LZX_VERBATIM_BLOCK; 1040 1041 1042 //* block type */ 1043 lzx_write_bits(lzxd, 3, block_type); 1044 //* uncompressed length */ 1045 lzx_write_bits(lzxd, 24, uncomp_length); 1046 1047 written_sofar := lzxd^.block_size - lzxd^.left_in_block; 1048 1049 //* now write out the aligned offset trees if present */ 1050 if (block_type = LZX_ALIGNED_OFFSET_BLOCK) then begin 1051 for i := 0 to LZX_ALIGNED_SIZE-1 do begin 1052 lzx_write_bits(lzxd, 3, lzxd^.aligned_tree[i].codelength); 1053 end; 1054 end; 1055 //* end extra bits */ 1056 build_huffman_tree(lzxd^.main_tree_size, LZX_MAX_CODE_LENGTH, 1057 lzxd^.main_freq_table, lzxd^.main_tree); 1058 build_huffman_tree(NUM_SECONDARY_LENGTHS, 16, 1059 @lzxd^.length_freq_table[0], @lzxd^.length_tree[0]); 1060 1061 //* now write the pre-tree and tree for main 1 */ 1062 lzx_write_compressed_tree(lzxd, lzxd^.main_tree, lzxd^.prev_main_treelengths, NUM_CHARS); 1063 1064 //* now write the pre-tree and tree for main 2*/ 1065 lzx_write_compressed_tree(lzxd, lzxd^.main_tree + NUM_CHARS, 1066 lzxd^.prev_main_treelengths + NUM_CHARS, 1067 lzxd^.main_tree_size - NUM_CHARS); 1068 1069 //* now write the pre tree and tree for length */ 1070 lzx_write_compressed_tree(lzxd, @lzxd^.length_tree[0], @lzxd^.prev_length_treelengths[0], 1071 NUM_SECONDARY_LENGTHS); 1072 1073 //* now write literals */ 1074 lzx_write_compressed_literals(lzxd, block_type); 1075 1076 //* copy treelengths somewhere safe to do delta compression */ 1077 for i := 0 to lzxd^.main_tree_size-1 do begin 1078 lzxd^.prev_main_treelengths[i] := lzxd^.main_tree[i].codelength; 1079 end; 1080 for i := 0 to NUM_SECONDARY_LENGTHS-1 do begin 1081 lzxd^.prev_length_treelengths[i] := lzxd^.length_tree[i].codelength; 1082 end; 1083 lzxd^.main_entropy := 0.0; 1084 lzxd^.last_ratio := 9999999.0; 1085 lzxd^.block_codesp := lzxd^.block_codes; 1086 1087 Fillchar(lzxd^.length_freq_table[0], NUM_SECONDARY_LENGTHS * sizeof(longint), 0); 1088 Fillchar(lzxd^.main_freq_table[0], lzxd^.main_tree_size * sizeof(longint), 0); 1089 Fillchar(lzxd^.aligned_freq_table[0], LZX_ALIGNED_SIZE * sizeof(longint), 0); 1090 end; 1091 end; 1092 Exit(0); 1093end; 1094 1095function lzx_init(lzxdp:Pplzx_data; wsize_code:longint; get_bytes:TGetBytesFunc; get_bytes_arg:pointer; at_eof:TIsEndOfFileFunc; 1096 put_bytes:TWriteBytesFunc; put_bytes_arg:pointer; mark_frame:TMarkFrameFunc; mark_frame_arg:pointer):longint;var 1097 wsize: longint; 1098 lzxd: plzx_data; 1099begin 1100 if ((wsize_code < 15) or (wsize_code > 21)) then begin 1101 Exit(-1); 1102 end; 1103 1104 //lzx_init_static(); I hardcoded this instead 1105 1106 New(lzxd); 1107 FillChar(lzxd^, Sizeof(lzxd), 0); 1108 lzxdp^ := lzxd; 1109 1110 if (lzxd = nil) then 1111 Exit(-2); 1112 1113 lzxd^.in_arg := get_bytes_arg; 1114 lzxd^.out_arg := put_bytes_arg; 1115 lzxd^.mark_frame_arg := mark_frame_arg; 1116 lzxd^.get_bytes := get_bytes; 1117 lzxd^.put_bytes := put_bytes; 1118 lzxd^.at_eof := at_eof; 1119 lzxd^.mark_frame := mark_frame; 1120 1121 wsize := 1 shl (wsize_code); 1122 1123 lzxd^.bits_in_buf := 0; 1124 lzxd^.block_codes := nil; 1125 lzxd^.num_position_slots := num_position_slots[wsize_code-15]; 1126 lzxd^.main_tree_size := (NUM_CHARS + 8 * lzxd^.num_position_slots); 1127 1128 lzxd^.main_freq_table := GetMem(sizeof(longint) * lzxd^.main_tree_size); 1129 lzxd^.main_tree := GetMem(sizeof(huff_entry)* lzxd^.main_tree_size); 1130 lzxd^.prev_main_treelengths := GetMem(sizeof(byte)*lzxd^.main_tree_size); 1131 1132 New(lzxd^.lzi); 1133 //* the -3 prevents matches at wsize, wsize-1, wsize-2, all of which are illegal */ 1134 lz_init(lzxd^.lzi, wsize, wsize - 3, MAX_MATCH, MIN_MATCH, LZX_FRAME_SIZE, 1135 @lzx_get_chars, @lzx_output_match, @lzx_output_literal,lzxd); 1136 lzxd^.len_uncompressed_input := 0; 1137 lzxd^.len_compressed_output := 0; 1138 lzx_reset(lzxd); 1139 Exit(0); 1140end; 1141 1142function lzx_finish(lzxd:plzx_data; lzxr:plzx_results):longint; 1143begin 1144 if (lzxr <> nil) then begin 1145 lzxr^.len_compressed_output := lzxd^.len_compressed_output; 1146 lzxr^.len_uncompressed_input := lzxd^.len_uncompressed_input; 1147 end; 1148 lz_release(lzxd^.lzi); 1149 Dispose(lzxd^.lzi); 1150 freemem(lzxd^.prev_main_treelengths); 1151 freemem(lzxd^.main_tree); 1152 freemem(lzxd^.main_freq_table); 1153 freemem(lzxd^.block_codes); 1154 dispose(lzxd); 1155 Exit(0); 1156end; 1157 1158initialization 1159 rloge2 := 1.0 / ln(2); 1160end.