/packages/chm/src/paslzxcomp.pas

https://github.com/slibre/freepascal · Pascal · 1160 lines · 892 code · 119 blank · 149 comment · 132 complexity · cd4e51b609092e1e6f0aec07bb0a0c8f MD5 · raw file

  1. { Copyright (C) <2005> <Andrew Haines> paslzxcomp.pas
  2. This library is free software; you can redistribute it and/or modify it
  3. under the terms of the GNU Library General Public License as published by
  4. the Free Software Foundation; either version 2 of the License, or (at your
  5. option) any later version.
  6. This program is distributed in the hope that it will be useful, but WITHOUT
  7. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  8. FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
  9. for more details.
  10. You should have received a copy of the GNU Library General Public License
  11. along with this library; if not, write to the Free Software Foundation,
  12. Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  13. }
  14. {
  15. See the file COPYING.FPC, included in this distribution,
  16. for details about the copyright.
  17. }
  18. unit paslzxcomp;
  19. {$MODE OBJFPC}
  20. {$GOTO ON}
  21. interface
  22. uses paslznonslide;
  23. const
  24. MIN_MATCH = 2;
  25. MAX_MATCH = 257;
  26. NUM_CHARS = 256;
  27. NUM_PRIMARY_LENGTHS = 7;
  28. NUM_SECONDARY_LENGTHS = 249;
  29. { the names of these constants are specific to this library }
  30. LZX_MAX_CODE_LENGTH = 16;
  31. LZX_FRAME_SIZE = 32768;
  32. LZX_PRETREE_SIZE = 20;
  33. LZX_ALIGNED_BITS = 3;
  34. LZX_ALIGNED_SIZE = 8;
  35. LZX_VERBATIM_BLOCK = 1;
  36. LZX_ALIGNED_OFFSET_BLOCK = 2;
  37. {$IFDEF FPC}
  38. {$PACKRECORDS C}
  39. {$ENDIF}
  40. {
  41. File lzx_compress.h, part of lzxcomp library
  42. Copyright (C) 2002 Matthew T. Russotto
  43. This program is free software; you can redistribute it and/or modify
  44. it under the terms of the GNU Lesser General Public License as published by
  45. the Free Software Foundation; version 2.1 only
  46. This program is distributed in the hope that it will be useful,
  47. but WITHOUT ANY WARRANTY; without even the implied warranty of
  48. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  49. GNU Lesser General Public License for more details.
  50. You should have received a copy of the GNU Lesser General Public License
  51. along with this program; if not, write to the Free Software
  52. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  53. }
  54. type
  55. PPlzx_data = ^Plzx_data;
  56. Plzx_data = ^lzx_data;
  57. TGetBytesFunc = function (arg:pointer; n:longint; buf:pointer):longint; cdecl;
  58. TWriteBytesFunc = function (arg:pointer; n:longint; buf:pointer):longint; cdecl;
  59. TMarkFrameFunc = procedure (arg:pointer; uncomp:dword; comp:dword); cdecl;
  60. TIsEndOfFileFunc = function (arg:pointer): longbool; cdecl;
  61. { add more here? Error codes, # blocks, # frames, etc? }
  62. lzx_results = record
  63. len_compressed_output : longint;
  64. len_uncompressed_input : longint;
  65. end;
  66. phuff_entry = ^huff_entry;
  67. huff_entry = record
  68. codelength: smallint;
  69. code: word;
  70. end;
  71. lzx_data = record
  72. in_arg : pointer;
  73. out_arg: pointer;
  74. mark_frame_arg: pointer;
  75. get_bytes: TGetBytesFunc;
  76. at_eof: TIsEndOfFileFunc;
  77. put_bytes: TWriteBytesFunc;
  78. mark_frame: TMarkFrameFunc;
  79. lzi: plz_info;
  80. {/* a 'frame' is an 0x8000 byte thing. Called that because otherwise
  81. I'd confuse myself overloading 'block' */}
  82. left_in_frame: longint;
  83. left_in_block: longint;
  84. R0, R1, R2: longint;
  85. num_position_slots: longint;
  86. //* this is the LZX block size */
  87. block_size: longint;
  88. main_freq_table: plongint;
  89. length_freq_table: array [0..NUM_SECONDARY_LENGTHS-1] of longint;
  90. aligned_freq_table: array [0..LZX_ALIGNED_SIZE-1] of longint;
  91. block_codes: plongword;
  92. block_codesp: plongword;
  93. main_tree: phuff_entry;
  94. length_tree: array[0..NUM_SECONDARY_LENGTHS-1] of huff_entry;
  95. aligned_tree: array[0..LZX_ALIGNED_SIZE-1] of huff_entry;
  96. main_tree_size: longint;
  97. bit_buf: word;
  98. bits_in_buf: longint;
  99. main_entropy: double;
  100. last_ratio: double;
  101. prev_main_treelengths: pbyte;
  102. prev_length_treelengths: array [0..NUM_SECONDARY_LENGTHS-1] of byte;
  103. len_uncompressed_input: longword;
  104. len_compressed_output: longword;
  105. need_1bit_header: smallint;
  106. subdivide: smallint; //* 0 = don't subdivide, 1 = allowed, -1 = requested */
  107. end;
  108. Plzx_results = ^lzx_results;
  109. function lzx_init(lzxdp:Pplzx_data; wsize_code:longint; get_bytes:TGetBytesFunc; get_bytes_arg:pointer; at_eof:TIsEndOfFileFunc;
  110. put_bytes:TWriteBytesFunc; put_bytes_arg:pointer; mark_frame:TMarkFrameFunc; mark_frame_arg:pointer):longint;
  111. procedure lzx_reset(lzxd:plzx_data);
  112. function lzx_compress_block(lzxd:plzx_data; block_size:longint; subdivide: LongBool):longint;
  113. function lzx_finish(lzxd:plzx_data; lzxr:plzx_results):longint;
  114. implementation
  115. uses math, sysutils;
  116. var
  117. rloge2: double; // set in initialization section
  118. const
  119. num_position_slots: array [0..6] of smallint = (30, 32, 34, 36, 38, 42, 50);
  120. extra_bits: array [0..50] of Byte = (
  121. 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
  122. 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
  123. 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
  124. 17, 17, 17
  125. );
  126. position_base: array [0..50] of dword = (
  127. 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192,
  128. 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152,
  129. 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
  130. 1835008, 1966080, 2097152
  131. );
  132. type
  133. pih_elem = ^ih_elem;
  134. ih_elem = record
  135. freq: longint;
  136. sym: smallint;
  137. pathlength: smallint;
  138. parent: pih_elem;
  139. left: pih_elem;
  140. right: pih_elem;
  141. end;
  142. ph_elem = ^h_elem;
  143. h_elem = record
  144. freq: longint;
  145. sym: smallint;
  146. pathlength: smallint;
  147. parent: pih_elem;
  148. code: word;
  149. end;
  150. function cmp_leaves(const in_a: ph_elem; const in_b: ph_elem): longint;
  151. begin
  152. if (in_a^.freq = 0) and (in_b^.freq <> 0) then
  153. Exit(1);
  154. if (in_a^.freq <> 0) and (in_b^.freq = 0) then
  155. Exit(-1);
  156. if (in_a^.freq = in_b^.freq) then
  157. Exit(in_a^.sym - in_b^.sym);
  158. Exit(in_a^.freq - in_b^.freq);
  159. end;
  160. function cmp_pathlengths(const in_a: ph_elem; const in_b: ph_elem): longint;
  161. begin
  162. if (in_a^.pathlength = in_b^.pathlength) then
  163. //* see note on canonical pathlengths */
  164. Exit(in_b^.sym - in_a^.sym);
  165. Exit(in_b^.pathlength - in_a^.pathlength);
  166. end;
  167. type
  168. TQSortCompFunc = function(const in_a: ph_elem; const in_b: ph_elem): longint;
  169. procedure qsort(a_array: ph_elem; nelem: integer; cmpfunc: TQSortCompFunc);
  170. var
  171. tmp: h_elem;
  172. procedure QuickSort(L, R: Integer);
  173. var
  174. I, J, Pivot: Integer;
  175. begin
  176. repeat
  177. I := L;
  178. J := R;
  179. Pivot := (L + R) div 2;
  180. repeat
  181. while cmpfunc(@a_array[I], @a_array[Pivot]) < 0 do Inc(I);
  182. while cmpfunc(@a_array[J], @a_array[Pivot]) > 0 do Dec(J);
  183. if I <= J then
  184. begin
  185. // exchange I and J
  186. tmp := a_array[I];
  187. a_array[I] := a_array[J];
  188. a_array[J] := tmp;
  189. if Pivot = I then
  190. Pivot := J
  191. else if Pivot = J then
  192. Pivot := I;
  193. Inc(I);
  194. Dec(j);
  195. end;
  196. until I > J;
  197. if L < J then
  198. QuickSort(L,J);
  199. L := I;
  200. until I >= R;
  201. end;
  202. begin
  203. QuickSort(0, nelem - 1);
  204. end;
  205. procedure build_huffman_tree(nelem: longint; max_code_length: longint; freq: plongint; tree: phuff_entry);
  206. var
  207. leaves: ph_elem;
  208. inodes: pih_elem;
  209. next_inode: pih_elem;
  210. cur_inode: pih_elem;
  211. cur_leaf :ph_elem;
  212. leaves_left,
  213. nleaves,
  214. pathlength: longint;
  215. cur_code: word;
  216. codes_too_long: smallint = 0;
  217. f1, f2: pih_elem;
  218. i: longint;
  219. begin
  220. leaves := GetMem(nelem * sizeof(h_elem));
  221. for i := 0 to nelem-1 do begin
  222. leaves[i].freq := freq[i];
  223. leaves[i].sym := i;
  224. leaves[i].pathlength := 0;
  225. end;
  226. qsort(leaves, nelem, @cmp_leaves);
  227. leaves_left := 0;
  228. while leaves_left < nelem do begin
  229. if (leaves[leaves_left].freq) = 0 then break;
  230. Inc(leaves_left);
  231. end;
  232. nleaves := leaves_left;
  233. if (nleaves >= 2) then begin
  234. inodes := AllocMem((nelem-1) * sizeof(ih_elem));
  235. repeat
  236. if (codes_too_long <> 0) then begin
  237. leaves_left := 0;
  238. while leaves_left < nelem do begin
  239. if (leaves[leaves_left].freq = 0) then break;
  240. if (leaves[leaves_left].freq <> 1) then begin
  241. leaves[leaves_left].freq := leaves[leaves_left].freq shr 1;
  242. codes_too_long := 0;
  243. end;
  244. Inc(leaves_left);
  245. end;
  246. if codes_too_long <> 0 then
  247. raise Exception.Create('!codes_too_long');
  248. end;
  249. cur_leaf := leaves;
  250. cur_inode := inodes;
  251. next_inode := cur_inode;
  252. repeat
  253. f1 := nil;
  254. f2 := nil;
  255. if (leaves_left <> 0) and
  256. ((cur_inode = next_inode) or
  257. (cur_leaf^.freq <= cur_inode^.freq)) then begin
  258. f1 := pih_elem(cur_leaf);
  259. Inc(cur_leaf);
  260. Dec(leaves_left);
  261. end
  262. else if (cur_inode <> next_inode) then begin
  263. f1 := cur_inode;
  264. Inc(cur_inode);
  265. end;
  266. if ((leaves_left <> 0) and
  267. ((cur_inode = next_inode) or
  268. (cur_leaf^.freq <= cur_inode^.freq))) then begin
  269. f2 := pih_elem(cur_leaf);
  270. Inc(cur_leaf);
  271. Dec(leaves_left);
  272. end
  273. else if (cur_inode <> next_inode) then begin
  274. f2 := cur_inode;
  275. Inc(cur_inode);
  276. end;
  277. if (f1 <> nil) and (f2 <> nil) then begin
  278. next_inode^.freq := f1^.freq + f2^.freq;
  279. next_inode^.sym := -1;
  280. next_inode^.left := f1;
  281. next_inode^.right := f2;
  282. next_inode^.parent := nil;
  283. f1^.parent := next_inode;
  284. f2^.parent := next_inode;
  285. if (f1^.pathlength > f2^.pathlength) then
  286. next_inode^.pathlength := f1^.pathlength + 1
  287. else
  288. next_inode^.pathlength := f2^.pathlength + 1;
  289. if (next_inode^.pathlength > max_code_length) then begin
  290. codes_too_long := 1;
  291. break;
  292. end;
  293. Inc(next_inode);
  294. end;
  295. until (f1 = nil) and (f2 = nil);
  296. until codes_too_long = 0;
  297. //* now traverse tree depth-first */
  298. cur_inode := next_inode - 1;
  299. pathlength := 0;
  300. cur_inode^.pathlength := -1;
  301. repeat
  302. //* precondition: at unmarked node*/
  303. if (cur_inode^.sym = -1) then begin //*&& (cur_inode^.left)*/
  304. //* left node of unmarked node is unmarked */
  305. cur_inode := cur_inode^.left;
  306. cur_inode^.pathlength := -1;
  307. Inc(pathlength);
  308. end
  309. else begin
  310. //* mark node */
  311. cur_inode^.pathlength := pathlength;
  312. //#if 0
  313. // if (cur_inode^.right) {
  314. // /* right node of previously unmarked node is unmarked */
  315. // cur_inode = cur_inode^.right;
  316. // cur_inode^.pathlength = -1;
  317. // pathlength++;
  318. // }
  319. // else
  320. //#endif
  321. begin
  322. //* time to come up. Keep coming up until an unmarked node is reached */
  323. //* or the tree is exhausted */
  324. repeat
  325. cur_inode := cur_inode^.parent;
  326. Dec(pathlength);
  327. //while (cur_inode && (cur_inode^.pathlength != -1));
  328. until (cur_inode = nil) or (cur_inode^.pathlength = -1);
  329. if (cur_inode <> nil) then begin
  330. //* found unmarked node; mark it and go right */
  331. cur_inode^.pathlength := pathlength;
  332. cur_inode := cur_inode^.right;
  333. cur_inode^.pathlength := -1;
  334. Inc(pathlength);
  335. //* would be complex if cur_inode could be null here. It can't */
  336. end
  337. end;
  338. end;
  339. until cur_inode = nil;
  340. freemem(inodes);
  341. ///* the pathlengths are already in order, so this sorts by symbol */
  342. qsort(leaves, nelem, @cmp_pathlengths);
  343. //#if 0
  344. // pathlength = leaves[0].pathlength;
  345. // cur_code = 0;
  346. // for (i = 0; i < nleaves; i++) {
  347. // while (leaves[i].pathlength < pathlength) {
  348. // (!(cur_code & 1));
  349. // cur_code >>= 1;
  350. // pathlength--;
  351. // }
  352. // leaves[i].code = cur_code;
  353. // cur_code++;
  354. // }
  355. //#else
  356. pathlength := leaves[nleaves-1].pathlength;
  357. if leaves[0].pathlength > 16 then
  358. raise Exception.Create('leaves[0].pathlength <= 16');
  359. //* this method cannot deal with bigger codes, though
  360. // the other canonical method can in some cases
  361. // (because it starts with zeros ) */
  362. cur_code := 0;
  363. for i := nleaves-1 downto 0 do begin
  364. while (leaves[i].pathlength > pathlength) do begin
  365. cur_code := cur_code shl 1;
  366. Inc(pathlength);
  367. end;
  368. leaves[i].code := cur_code;
  369. {$PUSH}
  370. {$R-}
  371. Inc(cur_code); // range error but i = 0 so it's harmless
  372. {$POP}
  373. end;
  374. //#endif
  375. end
  376. else if (nleaves = 1) then begin
  377. //* 0 symbols is OK (not according to doc, but according to Caie) */
  378. //* but if only one symbol is present, two symbols are required */
  379. nleaves := 2;
  380. leaves[0].pathlength := 1;
  381. leaves[1].pathlength := 1;
  382. if (leaves[1].sym > leaves[0].sym) then begin
  383. leaves[1].code := 1;
  384. leaves[0].code := 0;
  385. end
  386. else begin
  387. leaves[0].code := 1;
  388. leaves[1].code := 0;
  389. end;
  390. end;
  391. Fillchar(tree^, nelem * sizeof(huff_entry), 0);
  392. for i := 0 to nleaves-1 do begin
  393. tree[leaves[i].sym].codelength := leaves[i].pathlength;
  394. tree[leaves[i].sym].code := leaves[i].code;
  395. end;
  396. freemem(leaves);
  397. end;
  398. function lzx_get_chars(lzi: plz_info; n: longint; buf: pbyte): longint; cdecl;
  399. var
  400. //* force lz compression to stop after every block */
  401. chars_read,
  402. chars_pad: longint;
  403. lzud: plzx_data;
  404. begin
  405. lzud := plzx_data(lzi^.user_data);
  406. chars_read := lzud^.get_bytes(lzud^.in_arg, n, buf);
  407. Dec(lzud^.left_in_frame, chars_read mod LZX_FRAME_SIZE);
  408. if (lzud^.left_in_frame < 0) then
  409. Inc(lzud^.left_in_frame, LZX_FRAME_SIZE);
  410. if ((chars_read < n) and (lzud^.left_in_frame <> 0)) then begin
  411. chars_pad := n - chars_read;
  412. if (chars_pad > lzud^.left_in_frame) then chars_pad := lzud^.left_in_frame;
  413. //* never emit a full frame of padding. This prevents silliness when
  414. // lzx_compress is called when at EOF but EOF not yet detected */
  415. if (chars_pad = LZX_FRAME_SIZE) then chars_pad := 0;
  416. FillChar(buf[chars_read], chars_pad, 0);
  417. Dec(lzud^.left_in_frame, chars_pad);
  418. Inc(chars_read, chars_pad);
  419. end;
  420. lzx_get_chars := chars_read;
  421. end;
  422. function find_match_at(lzi: plz_info; loc: longint; match_len: longint; match_locp: plongint): longint;
  423. var
  424. matchb,
  425. nmatchb,
  426. c1, c2: pbyte;
  427. j: longint;
  428. begin
  429. if -match_locp^ = loc then Exit(-1);
  430. if loc < match_len then Exit(-1);
  431. matchb := lzi^.block_buf + lzi^.block_loc + match_locp^;
  432. nmatchb := lzi^.block_buf + lzi^.block_loc - loc;
  433. c1 := matchb;
  434. c2 := nmatchb;
  435. j := 0;
  436. while j < match_len do begin
  437. if c1^ <> c2^ then begin
  438. break;
  439. end;
  440. Inc(c1);
  441. Inc(c2);
  442. Inc(j);
  443. end;
  444. if (j = match_len) then begin
  445. match_locp^ := -loc;
  446. Exit(0);
  447. end;
  448. Exit(-1);
  449. end;
  450. procedure check_entropy(lzud: plzx_data; main_index: longint);
  451. var
  452. freq,
  453. n_ln_n,
  454. rn_ln2,
  455. cur_ratio: double;
  456. n: longint;
  457. begin
  458. //* delete old entropy accumulation */
  459. if (lzud^.main_freq_table[main_index] <> 1) then begin
  460. freq := double(lzud^.main_freq_table[main_index])-1;
  461. lzud^.main_entropy := lzud^.main_entropy + (freq * ln(freq));
  462. end;
  463. //* add new entropy accumulation */
  464. freq := double(lzud^.main_freq_table[main_index]);
  465. lzud^.main_entropy := lzud^.main_entropy - (freq * ln(freq));
  466. n := lzud^.block_codesp - lzud^.block_codes;
  467. if (((n and $0FFF) = 0) and (lzud^.left_in_block >= $1000)) then begin
  468. n_ln_n := (double(n) * ln(double(n)));
  469. rn_ln2 := (rloge2 / double(n));
  470. cur_ratio := (n * rn_ln2 *(n_ln_n + lzud^.main_entropy) + 24 + 3 * 80 + NUM_CHARS + (lzud^.main_tree_size-NUM_CHARS)*3 + NUM_SECONDARY_LENGTHS ) / double(n);
  471. if (cur_ratio > lzud^.last_ratio) then begin
  472. lzud^.subdivide := -1;
  473. lz_stop_compressing(lzud^.lzi);
  474. end;
  475. lzud^.last_ratio := cur_ratio;
  476. end;
  477. end;
  478. function lzx_output_match(lzi: plz_info; match_pos, match_len: longint): longint; cdecl;
  479. var
  480. lzud: plzx_data;
  481. formatted_offset,
  482. position_footer: longword;
  483. length_footer,
  484. length_header: byte;
  485. len_pos_header: word;
  486. position_slot: longint;
  487. btdt: smallint;
  488. left, right, mid: longint;
  489. label testforr;
  490. begin
  491. lzud := plzx_data(lzi^.user_data);
  492. position_footer := 0;
  493. btdt := 0;
  494. testforr:
  495. if (match_pos = -lzud^.R0) then begin
  496. match_pos := 0;
  497. formatted_offset := 0;
  498. position_slot := 0;
  499. end
  500. else if (match_pos = -lzud^.R1) then begin
  501. lzud^.R1 := lzud^.R0;
  502. lzud^.R0 := -match_pos;
  503. match_pos := 1;
  504. formatted_offset := 1;
  505. position_slot := 1;
  506. end
  507. else if (match_pos = -lzud^.R2) then begin
  508. lzud^.R2 := lzud^.R0;
  509. lzud^.R0 := -match_pos;
  510. match_pos := 2;
  511. formatted_offset := 2;
  512. position_slot := 2;
  513. end
  514. else begin
  515. if (btdt = 0) then begin
  516. btdt := 1;
  517. if (find_match_at(lzi, lzud^.R0, match_len, @match_pos) = 0) then
  518. goto testforr;
  519. if (find_match_at(lzi, lzud^.R1, match_len, @match_pos) = 0) then
  520. goto testforr;
  521. if (find_match_at(lzi, lzud^.R2, match_len, @match_pos) = 0) then
  522. goto testforr;
  523. end;
  524. formatted_offset := -match_pos + 2;
  525. if ((match_len < 3) or
  526. ((formatted_offset >= 64) and (match_len < 4)) or
  527. ((formatted_offset >= 2048) and (match_len < 5)) or
  528. ((formatted_offset >= 65536) and (match_len < 6))) then begin
  529. //* reject matches where extra_bits will likely be bigger than just outputting
  530. // literals. The numbers are basically derived through guessing
  531. // and trial and error */
  532. Exit(-1); //* reject the match */
  533. end;
  534. lzud^.R2 := lzud^.R1;
  535. lzud^.R1 := lzud^.R0;
  536. lzud^.R0 := -match_pos;
  537. ///* calculate position base using binary search of table; if log2 can be
  538. // done in hardware, approximation might work;
  539. // trunc(log2(formatted_offset*formatted_offset)) gets either the proper
  540. // position slot or the next one, except for slots 0, 1, and 39-49
  541. // Slots 0-1 are handled by the R0-R1 procedures
  542. // Slots 36-49 (formatted_offset >= 262144) can be found by
  543. // (formatted_offset/131072) + 34 ==
  544. // (formatted_offset >> 17) + 34;
  545. //*/
  546. if (formatted_offset >= 262144) then begin
  547. position_slot := (formatted_offset shr 17) + 34;
  548. end
  549. else begin
  550. left := 3;
  551. right := lzud^.num_position_slots - 1;
  552. position_slot := -1;
  553. while (left <= right) do begin
  554. mid := (left + right) div 2;
  555. if (position_base[mid] <= formatted_offset) and
  556. (position_base[mid+1] > formatted_offset) then begin
  557. position_slot := mid;
  558. break;
  559. end;
  560. if (formatted_offset > position_base[mid]) then
  561. //* too low */
  562. left := mid + 1
  563. else //* too high */
  564. right := mid;
  565. end;
  566. if not(position_slot >= 0) then
  567. raise Exception.Create('position_slot >= 0');
  568. //* FIXME precalc extra_mask table */
  569. end;
  570. position_footer := ((LongWord(1) shl extra_bits[position_slot]) - 1) and formatted_offset;
  571. end;
  572. //* match length = 8 bits */
  573. //* position_slot = 6 bits */
  574. //* position_footer = 17 bits */
  575. //* total = 31 bits */
  576. //* plus one to say whether it's a literal or not */
  577. lzud^.block_codesp^ := $80000000 or //* bit 31 in intelligent bit ordering */
  578. (position_slot shl 25) or //* bits 30-25 */
  579. (position_footer shl 8) or //* bits 8-24 */
  580. (match_len - MIN_MATCH); //* bits 0-7 */
  581. Inc(lzud^.block_codesp);
  582. if (match_len < (NUM_PRIMARY_LENGTHS + MIN_MATCH)) then begin
  583. length_header := match_len - MIN_MATCH;
  584. //* length_footer = 255; */ /* not necessary */
  585. end
  586. else begin
  587. length_header := NUM_PRIMARY_LENGTHS;
  588. length_footer := match_len - (NUM_PRIMARY_LENGTHS + MIN_MATCH);
  589. Inc(lzud^.length_freq_table[length_footer]);
  590. end;
  591. len_pos_header := (position_slot shl 3) or length_header;
  592. Inc(lzud^.main_freq_table[len_pos_header + NUM_CHARS]);
  593. if (extra_bits[position_slot] >= 3) then begin
  594. Inc(lzud^.aligned_freq_table[position_footer and 7]);
  595. end;
  596. Dec(lzud^.left_in_block, match_len);
  597. if (lzud^.subdivide <> 0) then
  598. check_entropy(lzud, len_pos_header + NUM_CHARS);
  599. Exit(0); ///* accept the match */
  600. end;
  601. procedure lzx_output_literal(lzi: plz_info; ch: byte); cdecl;
  602. var
  603. lzud: plzx_data;
  604. begin
  605. lzud := plzx_data(lzi^.user_data);
  606. Dec(lzud^.left_in_block);
  607. lzud^.block_codesp^ := ch;
  608. Inc(lzud^.block_codesp);
  609. Inc(lzud^.main_freq_table[ch]);
  610. if (lzud^.subdivide <> 0) then
  611. check_entropy(lzud, ch);
  612. end;
  613. procedure lzx_write_bits(lzxd: plzx_data; nbits: longint; bits: longword); cdecl;
  614. var
  615. cur_bits,
  616. shift_bits,
  617. rshift_bits: longint;
  618. mask_bits: word;
  619. begin
  620. cur_bits := lzxd^.bits_in_buf;
  621. while ((cur_bits + nbits) >= 16) do begin
  622. shift_bits := 16 - cur_bits;
  623. rshift_bits := nbits - shift_bits;
  624. if (shift_bits = 16) then begin
  625. lzxd^.bit_buf := (bits shr rshift_bits) and $FFFF;
  626. end
  627. else begin
  628. mask_bits := (1 shl shift_bits) - 1;
  629. lzxd^.bit_buf := word(lzxd^.bit_buf shl shift_bits);
  630. lzxd^.bit_buf := word(lzxd^.bit_buf or (bits shr rshift_bits) and mask_bits);
  631. end;
  632. {$IFDEF ENDIAN_BIG}
  633. lzxd^.bit_buf := word(((lzxd^.bit_buf and $FF)shl 8) or (lzxd^.bit_buf shr 8));
  634. {$ENDIF}
  635. lzxd^.put_bytes(lzxd^.out_arg, sizeof(lzxd^.bit_buf), @lzxd^.bit_buf);
  636. Inc(lzxd^.len_compressed_output, sizeof(lzxd^.bit_buf));
  637. lzxd^.bit_buf := 0;
  638. Dec(nbits, shift_bits);
  639. cur_bits := 0;
  640. end;
  641. //* (cur_bits + nbits) < 16. If nbits := 0, we're done.
  642. // otherwise move bits in */
  643. shift_bits := nbits;
  644. mask_bits := (1 shl shift_bits) - 1;
  645. lzxd^.bit_buf := word(lzxd^.bit_buf shl shift_bits);
  646. lzxd^.bit_buf := word(lzxd^.bit_buf or bits and mask_bits);
  647. Inc(cur_bits, nbits);
  648. lzxd^.bits_in_buf := cur_bits;
  649. end;
  650. procedure lzx_align_output(lzxd: plzx_data);
  651. begin
  652. if (lzxd^.bits_in_buf <> 0) then begin
  653. lzx_write_bits(lzxd, 16 - lzxd^.bits_in_buf, 0);
  654. end;
  655. if (lzxd^.mark_frame <> nil) then
  656. lzxd^.mark_frame(lzxd^.mark_frame_arg, lzxd^.len_uncompressed_input, lzxd^.len_compressed_output);
  657. end;
  658. procedure lzx_write_compressed_literals(lzxd: plzx_data; block_type: longint);
  659. var
  660. cursor: plongword;
  661. endp: plongword;
  662. position_slot: word;
  663. position_footer,
  664. match_len_m2, //* match length minus 2, which is MIN_MATCH */
  665. verbatim_bits,
  666. block_code: longword;
  667. length_header,
  668. length_footer,
  669. len_pos_header: word;
  670. huffe: phuff_entry;
  671. frame_count: longint;
  672. begin
  673. cursor := lzxd^.block_codes;
  674. endp := lzxd^.block_codesp;
  675. frame_count := (lzxd^.len_uncompressed_input mod LZX_FRAME_SIZE);
  676. Dec(lzxd^.len_uncompressed_input, frame_count); //* will be added back in later */
  677. while (cursor < endp) do begin
  678. block_code := cursor^;
  679. Inc(cursor);
  680. if (block_code and $80000000) <> 0 then begin
  681. {*
  682. * 0x80000000 | bit 31 in intelligent bit ordering
  683. * (position_slot shl 25) | bits 30-25
  684. * (position_footer shl 8) | bits 8-24
  685. * (match_len - MIN_MATCH); bits 0-7
  686. *
  687. *}
  688. match_len_m2 := block_code and $FF; //* 8 bits */
  689. position_footer := (block_code shr 8)and $1FFFF; //* 17 bits */
  690. position_slot := (block_code shr 25) and $3F; //* 6 bits */
  691. if (match_len_m2 < NUM_PRIMARY_LENGTHS) then begin
  692. length_header := match_len_m2;
  693. length_footer := 255; //* personal encoding for NULL */
  694. end
  695. else begin
  696. length_header := NUM_PRIMARY_LENGTHS;
  697. length_footer := match_len_m2 - NUM_PRIMARY_LENGTHS;
  698. end;
  699. len_pos_header := (position_slot shl 3) or length_header;
  700. huffe := @lzxd^.main_tree[len_pos_header+NUM_CHARS];
  701. lzx_write_bits(lzxd, huffe^.codelength, huffe^.code);
  702. if (length_footer <> 255) then begin
  703. huffe := @lzxd^.length_tree[length_footer];
  704. lzx_write_bits(lzxd, huffe^.codelength, huffe^.code);
  705. end;
  706. if ((block_type = LZX_ALIGNED_OFFSET_BLOCK) and (extra_bits[position_slot] >= 3)) then begin
  707. //* aligned offset block and code */
  708. verbatim_bits := position_footer shr 3;
  709. lzx_write_bits(lzxd, extra_bits[position_slot] - 3, verbatim_bits);
  710. huffe := @lzxd^.aligned_tree[position_footer and 7];
  711. lzx_write_bits(lzxd, huffe^.codelength, huffe^.code);
  712. end
  713. else begin
  714. verbatim_bits := position_footer;
  715. lzx_write_bits(lzxd, extra_bits[position_slot], verbatim_bits);
  716. end;
  717. Inc(frame_count, match_len_m2 + 2);
  718. end
  719. else begin
  720. //* literal */
  721. if not(block_code < NUM_CHARS) then
  722. raise Exception.Create('block_code < NUM_CHARS');
  723. huffe := @lzxd^.main_tree[block_code];
  724. lzx_write_bits(lzxd, huffe^.codelength, huffe^.code);
  725. Inc(frame_count);
  726. end;
  727. if (frame_count = LZX_FRAME_SIZE) then begin
  728. Inc(lzxd^.len_uncompressed_input, frame_count);
  729. lzx_align_output(lzxd);
  730. frame_count := 0;
  731. end;
  732. if not(frame_count < LZX_FRAME_SIZE) then
  733. raise Exception.Create('frame_count < LZX_FRAME_SIZE');
  734. end;
  735. Inc(lzxd^.len_uncompressed_input, frame_count);
  736. end;
  737. function lzx_write_compressed_tree(lzxd: plzx_data; tree: phuff_entry; prevlengths: pbyte;
  738. treesize: longint): longint;
  739. var
  740. codes,
  741. runs: pbyte;
  742. freqs: array [0..LZX_PRETREE_SIZE-1] of longint;
  743. cur_run: longint;
  744. last_len: longint;
  745. pretree: array [0..19] of huff_entry;
  746. codep,
  747. codee,
  748. runp: pbyte;
  749. excess,
  750. i,
  751. cur_code: longint;
  752. begin
  753. codes := getmem(treesize*sizeof(byte));
  754. codep := codes;
  755. runs := getmem(treesize*sizeof(byte));
  756. runp := runs;
  757. Fillchar(freqs[0], sizeof(freqs), 0);
  758. cur_run := 1;
  759. last_len := tree[0].codelength;
  760. for i := 1 to treesize do begin
  761. if ((i = treesize) or (tree[i].codelength <> last_len)) then begin
  762. if (last_len = 0) then begin
  763. while (cur_run >= 20) do begin
  764. excess := cur_run - 20;
  765. if (excess > 31) then excess := 31;
  766. codep^ := 18;
  767. Inc(codep);
  768. runp^ := excess;
  769. Inc(runp);
  770. Dec(cur_run, excess + 20);
  771. Inc(freqs[18]);
  772. end;
  773. while (cur_run >= 4) do begin
  774. excess := cur_run - 4;
  775. if (excess > 15) then excess := 15;
  776. codep^ := 17;
  777. Inc(codep);
  778. runp^ := excess;
  779. Inc(runp);
  780. Dec(cur_run, excess + 4);
  781. Inc(freqs[17]);
  782. end;
  783. while (cur_run > 0) do begin
  784. codep^ := prevlengths[i - cur_run];
  785. Inc(freqs[codep^]);
  786. Inc(codep);
  787. runp^ := 0; //* not necessary */
  788. Inc(runp);
  789. Dec(cur_run);
  790. end;
  791. end
  792. else begin
  793. while (cur_run >= 4) do begin
  794. if (cur_run = 4) then excess := 0
  795. else excess := 1;
  796. codep^ := 19;
  797. Inc(codep);
  798. runp^ := excess;
  799. Inc(runp);
  800. Inc(freqs[19]);
  801. //* right, MS lies again. Code is NOT
  802. // prev_len + len (mod 17), it's prev_len - len (mod 17)*/
  803. codep^ := byte(prevlengths[i-cur_run] - last_len);
  804. if (codep^ > 16) then codep^ := byte(codep^ + 17); //Inc(codep^, 17);
  805. Inc(freqs[codep^]);
  806. Inc(codep);
  807. runp^ := 0; //* not necessary */
  808. Inc(runp);
  809. Dec(cur_run, excess+4);
  810. end;
  811. while (cur_run > 0) do begin
  812. codep^ := byte(prevlengths[i-cur_run] - last_len);
  813. if (codep^ > 16) then codep^ := byte(codep^ + 17); //Inc(codep^, byte(17));
  814. runp^ := 0; //* not necessary */
  815. Inc(runp);
  816. Dec(cur_run);
  817. Inc(freqs[codep^]);
  818. Inc(codep);
  819. end;
  820. end;
  821. if (i <> treesize) then
  822. last_len := tree[i].codelength;
  823. cur_run := 0;
  824. end;
  825. Inc(cur_run);
  826. end;
  827. codee := codep;
  828. //* now create the huffman table and write out the pretree */
  829. build_huffman_tree(LZX_PRETREE_SIZE, 16, @freqs[0], pretree);
  830. for i := 0 to LZX_PRETREE_SIZE-1 do begin
  831. lzx_write_bits(lzxd, 4, pretree[i].codelength);
  832. end;
  833. codep := codes;
  834. runp := runs;
  835. cur_run := 0;
  836. while (codep < codee) do begin
  837. cur_code := codep^;
  838. Inc(codep);
  839. lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code);
  840. if (cur_code = 17) then begin
  841. Inc(cur_run, runp^ + 4);
  842. lzx_write_bits(lzxd, 4, runp^);
  843. end
  844. else if (cur_code = 18) then begin
  845. Inc(cur_run, runp^ + 20);
  846. lzx_write_bits(lzxd, 5, runp^);
  847. end
  848. else if (cur_code = 19) then begin
  849. Inc(cur_run, runp^ + 4);
  850. lzx_write_bits(lzxd, 1, runp^);
  851. cur_code := codep^;
  852. Inc(codep);
  853. lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code);
  854. Inc(runp);
  855. end
  856. else begin
  857. Inc(cur_run);
  858. end;
  859. Inc(runp);
  860. end;
  861. freemem(codes);
  862. freemem(runs);
  863. Exit(0);
  864. end;
  865. procedure lzx_reset(lzxd:plzx_data);
  866. begin
  867. lzxd^.need_1bit_header := 1;
  868. lzxd^.R0 := 1;
  869. lzxd^.R1 := 1;
  870. lzxd^.R2 := 1;
  871. Fillchar(lzxd^.prev_main_treelengths[0], lzxd^.main_tree_size * sizeof(byte), 0);
  872. Fillchar(lzxd^.prev_length_treelengths[0], NUM_SECONDARY_LENGTHS * sizeof(byte), 0);
  873. lz_reset(lzxd^.lzi);
  874. end;
  875. function lzx_compress_block(lzxd:plzx_data; block_size:longint; subdivide:longbool):longint;
  876. var
  877. i: longint;
  878. written_sofar: longword = 0;
  879. block_type: longint;
  880. uncomp_bits,
  881. comp_bits,
  882. comp_bits_ovh,
  883. uncomp_length: longword;
  884. begin
  885. if ((lzxd^.block_size <> block_size) or (lzxd^.block_codes = nil)) then begin
  886. if (lzxd^.block_codes <> nil) then freemem(lzxd^.block_codes);
  887. lzxd^.block_size := block_size;
  888. lzxd^.block_codes := GetMem(block_size * sizeof(longword));
  889. end;
  890. lzxd^.subdivide := Ord(subdivide);
  891. lzxd^.left_in_block := block_size;
  892. lzxd^.left_in_frame := LZX_FRAME_SIZE;
  893. lzxd^.main_entropy := 0.0;
  894. lzxd^.last_ratio := 9999999.0;
  895. lzxd^.block_codesp := lzxd^.block_codes;
  896. Fillchar(lzxd^.length_freq_table[0], NUM_SECONDARY_LENGTHS * sizeof(longint), 0);
  897. Fillchar(lzxd^.main_freq_table[0], lzxd^.main_tree_size * sizeof(longint), 0);
  898. Fillchar(lzxd^.aligned_freq_table[0], LZX_ALIGNED_SIZE * sizeof(longint), 0);
  899. while ((lzxd^.left_in_block<>0) and ((lz_left_to_process(lzxd^.lzi)<>0) or not(lzxd^.at_eof(lzxd^.in_arg)))) do begin
  900. lz_compress(lzxd^.lzi, lzxd^.left_in_block);
  901. if (lzxd^.left_in_frame = 0) then begin
  902. lzxd^.left_in_frame := LZX_FRAME_SIZE;
  903. end;
  904. if ((lzxd^.subdivide<0)
  905. or (lzxd^.left_in_block = 0)
  906. or ((lz_left_to_process(lzxd^.lzi) = 0) and lzxd^.at_eof(lzxd^.in_arg))) then begin
  907. //* now one block is LZ-analyzed. */
  908. //* time to write it out */
  909. uncomp_length := lzxd^.block_size - lzxd^.left_in_block - written_sofar;
  910. //* uncomp_length will sometimes be 0 when input length is
  911. // an exact multiple of frame size */
  912. if (uncomp_length = 0) then
  913. continue;
  914. if (lzxd^.subdivide < 0) then begin
  915. lzxd^.subdivide := 1;
  916. end;
  917. if (lzxd^.need_1bit_header <> 0) then begin
  918. //* one bit Intel preprocessing header */
  919. //* always 0 because this implementation doesn't do Intel preprocessing */
  920. lzx_write_bits(lzxd, 1, 0);
  921. lzxd^.need_1bit_header := 0;
  922. end;
  923. //* handle extra bits */
  924. uncomp_bits := 0;
  925. comp_bits := 0;
  926. build_huffman_tree(LZX_ALIGNED_SIZE, 7, @lzxd^.aligned_freq_table[0], @lzxd^.aligned_tree[0]);
  927. for i := 0 to LZX_ALIGNED_SIZE-1 do begin
  928. Inc(uncomp_bits, lzxd^.aligned_freq_table[i]* 3);
  929. Inc(comp_bits, lzxd^.aligned_freq_table[i]* lzxd^.aligned_tree[i].codelength);
  930. end;
  931. comp_bits_ovh := comp_bits + LZX_ALIGNED_SIZE * 3;
  932. if (comp_bits_ovh < uncomp_bits) then
  933. block_type := LZX_ALIGNED_OFFSET_BLOCK
  934. else
  935. block_type := LZX_VERBATIM_BLOCK;
  936. //* block type */
  937. lzx_write_bits(lzxd, 3, block_type);
  938. //* uncompressed length */
  939. lzx_write_bits(lzxd, 24, uncomp_length);
  940. written_sofar := lzxd^.block_size - lzxd^.left_in_block;
  941. //* now write out the aligned offset trees if present */
  942. if (block_type = LZX_ALIGNED_OFFSET_BLOCK) then begin
  943. for i := 0 to LZX_ALIGNED_SIZE-1 do begin
  944. lzx_write_bits(lzxd, 3, lzxd^.aligned_tree[i].codelength);
  945. end;
  946. end;
  947. //* end extra bits */
  948. build_huffman_tree(lzxd^.main_tree_size, LZX_MAX_CODE_LENGTH,
  949. lzxd^.main_freq_table, lzxd^.main_tree);
  950. build_huffman_tree(NUM_SECONDARY_LENGTHS, 16,
  951. @lzxd^.length_freq_table[0], @lzxd^.length_tree[0]);
  952. //* now write the pre-tree and tree for main 1 */
  953. lzx_write_compressed_tree(lzxd, lzxd^.main_tree, lzxd^.prev_main_treelengths, NUM_CHARS);
  954. //* now write the pre-tree and tree for main 2*/
  955. lzx_write_compressed_tree(lzxd, lzxd^.main_tree + NUM_CHARS,
  956. lzxd^.prev_main_treelengths + NUM_CHARS,
  957. lzxd^.main_tree_size - NUM_CHARS);
  958. //* now write the pre tree and tree for length */
  959. lzx_write_compressed_tree(lzxd, @lzxd^.length_tree[0], @lzxd^.prev_length_treelengths[0],
  960. NUM_SECONDARY_LENGTHS);
  961. //* now write literals */
  962. lzx_write_compressed_literals(lzxd, block_type);
  963. //* copy treelengths somewhere safe to do delta compression */
  964. for i := 0 to lzxd^.main_tree_size-1 do begin
  965. lzxd^.prev_main_treelengths[i] := lzxd^.main_tree[i].codelength;
  966. end;
  967. for i := 0 to NUM_SECONDARY_LENGTHS-1 do begin
  968. lzxd^.prev_length_treelengths[i] := lzxd^.length_tree[i].codelength;
  969. end;
  970. lzxd^.main_entropy := 0.0;
  971. lzxd^.last_ratio := 9999999.0;
  972. lzxd^.block_codesp := lzxd^.block_codes;
  973. Fillchar(lzxd^.length_freq_table[0], NUM_SECONDARY_LENGTHS * sizeof(longint), 0);
  974. Fillchar(lzxd^.main_freq_table[0], lzxd^.main_tree_size * sizeof(longint), 0);
  975. Fillchar(lzxd^.aligned_freq_table[0], LZX_ALIGNED_SIZE * sizeof(longint), 0);
  976. end;
  977. end;
  978. Exit(0);
  979. end;
  980. function lzx_init(lzxdp:Pplzx_data; wsize_code:longint; get_bytes:TGetBytesFunc; get_bytes_arg:pointer; at_eof:TIsEndOfFileFunc;
  981. put_bytes:TWriteBytesFunc; put_bytes_arg:pointer; mark_frame:TMarkFrameFunc; mark_frame_arg:pointer):longint;var
  982. wsize: longint;
  983. lzxd: plzx_data;
  984. begin
  985. if ((wsize_code < 15) or (wsize_code > 21)) then begin
  986. Exit(-1);
  987. end;
  988. //lzx_init_static(); I hardcoded this instead
  989. New(lzxd);
  990. FillChar(lzxd^, Sizeof(lzxd), 0);
  991. lzxdp^ := lzxd;
  992. if (lzxd = nil) then
  993. Exit(-2);
  994. lzxd^.in_arg := get_bytes_arg;
  995. lzxd^.out_arg := put_bytes_arg;
  996. lzxd^.mark_frame_arg := mark_frame_arg;
  997. lzxd^.get_bytes := get_bytes;
  998. lzxd^.put_bytes := put_bytes;
  999. lzxd^.at_eof := at_eof;
  1000. lzxd^.mark_frame := mark_frame;
  1001. wsize := 1 shl (wsize_code);
  1002. lzxd^.bits_in_buf := 0;
  1003. lzxd^.block_codes := nil;
  1004. lzxd^.num_position_slots := num_position_slots[wsize_code-15];
  1005. lzxd^.main_tree_size := (NUM_CHARS + 8 * lzxd^.num_position_slots);
  1006. lzxd^.main_freq_table := GetMem(sizeof(longint) * lzxd^.main_tree_size);
  1007. lzxd^.main_tree := GetMem(sizeof(huff_entry)* lzxd^.main_tree_size);
  1008. lzxd^.prev_main_treelengths := GetMem(sizeof(byte)*lzxd^.main_tree_size);
  1009. New(lzxd^.lzi);
  1010. //* the -3 prevents matches at wsize, wsize-1, wsize-2, all of which are illegal */
  1011. lz_init(lzxd^.lzi, wsize, wsize - 3, MAX_MATCH, MIN_MATCH, LZX_FRAME_SIZE,
  1012. @lzx_get_chars, @lzx_output_match, @lzx_output_literal,lzxd);
  1013. lzxd^.len_uncompressed_input := 0;
  1014. lzxd^.len_compressed_output := 0;
  1015. lzx_reset(lzxd);
  1016. Exit(0);
  1017. end;
  1018. function lzx_finish(lzxd:plzx_data; lzxr:plzx_results):longint;
  1019. begin
  1020. if (lzxr <> nil) then begin
  1021. lzxr^.len_compressed_output := lzxd^.len_compressed_output;
  1022. lzxr^.len_uncompressed_input := lzxd^.len_uncompressed_input;
  1023. end;
  1024. lz_release(lzxd^.lzi);
  1025. Dispose(lzxd^.lzi);
  1026. freemem(lzxd^.prev_main_treelengths);
  1027. freemem(lzxd^.main_tree);
  1028. freemem(lzxd^.main_freq_table);
  1029. freemem(lzxd^.block_codes);
  1030. dispose(lzxd);
  1031. Exit(0);
  1032. end;
  1033. initialization
  1034. rloge2 := 1.0 / ln(2);
  1035. end.