PageRenderTime 44ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/misc/win32/hitfilemaker/compress.c

http://sgfork.googlecode.com/
C | 750 lines | 549 code | 113 blank | 88 comment | 123 complexity | 6ef69d7ffa3eccb88a091363a09c03c5 MD5 | raw file
Possible License(s): AGPL-1.0, Unlicense
  1. #include "q3data.h"
  2. #if 0
  3. /*
  4. ==================
  5. MTF
  6. ==================
  7. */
  8. cblock_t MTF (cblock_t in)
  9. {
  10. int i, j, b, code;
  11. byte *out_p;
  12. int index[256];
  13. cblock_t out;
  14. out_p = out.data = malloc(in.count + 4);
  15. // write count
  16. *out_p++ = in.count&255;
  17. *out_p++ = (in.count>>8)&255;
  18. *out_p++ = (in.count>>16)&255;
  19. *out_p++ = (in.count>>24)&255;
  20. for (i=0 ; i<256 ; i++)
  21. index[i] = i;
  22. for (i=0 ; i<in.count ; i++)
  23. {
  24. b = in.data[i];
  25. code = index[b];
  26. *out_p++ = code;
  27. // shuffle b indexes to 0
  28. for (j=0 ; j<256 ; j++)
  29. if (index[j] < code)
  30. index[j]++;
  31. index[b] = 0;
  32. }
  33. out.count = out_p - out.data;
  34. return out;
  35. }
  36. //==========================================================================
  37. int bwt_size;
  38. byte *bwt_data;
  39. int bwtCompare (const void *elem1, const void *elem2)
  40. {
  41. int i;
  42. int i1, i2;
  43. int b1, b2;
  44. i1 = *(int *)elem1;
  45. i2 = *(int *)elem2;
  46. for (i=0 ; i<bwt_size ; i++)
  47. {
  48. b1 = bwt_data[i1];
  49. b2 = bwt_data[i2];
  50. if (b1 < b2)
  51. return -1;
  52. if (b1 > b2)
  53. return 1;
  54. if (++i1 == bwt_size)
  55. i1 = 0;
  56. if (++i2 == bwt_size)
  57. i2 = 0;
  58. }
  59. return 0;
  60. }
  61. /*
  62. ==================
  63. BWT
  64. ==================
  65. */
  66. cblock_t BWT (cblock_t in)
  67. {
  68. int *sorted;
  69. int i;
  70. byte *out_p;
  71. cblock_t out;
  72. bwt_size = in.count;
  73. bwt_data = in.data;
  74. sorted = malloc(in.count*sizeof(*sorted));
  75. for (i=0 ; i<in.count ; i++)
  76. sorted[i] = i;
  77. qsort (sorted, in.count, sizeof(*sorted), bwtCompare);
  78. out_p = out.data = malloc(in.count + 8);
  79. // write count
  80. *out_p++ = in.count&255;
  81. *out_p++ = (in.count>>8)&255;
  82. *out_p++ = (in.count>>16)&255;
  83. *out_p++ = (in.count>>24)&255;
  84. // write head index
  85. for (i=0 ; i<in.count ; i++)
  86. if (sorted[i] == 0)
  87. break;
  88. *out_p++ = i&255;
  89. *out_p++ = (i>>8)&255;
  90. *out_p++ = (i>>16)&255;
  91. *out_p++ = (i>>24)&255;
  92. // write the L column
  93. for (i=0 ; i<in.count ; i++)
  94. *out_p++ = in.data[(sorted[i]+in.count-1)%in.count];
  95. free (sorted);
  96. out.count = out_p - out.data;
  97. return out;
  98. }
  99. //==========================================================================
  100. typedef struct hnode_s
  101. {
  102. int count;
  103. qbool used;
  104. int children[2];
  105. } hnode_t;
  106. int numhnodes;
  107. hnode_t hnodes[512];
  108. unsigned charbits[256];
  109. int charbitscount[256];
  110. int SmallestNode (void)
  111. {
  112. int i;
  113. int best, bestnode;
  114. best = 99999999;
  115. bestnode = -1;
  116. for (i=0 ; i<numhnodes ; i++)
  117. {
  118. if (hnodes[i].used)
  119. continue;
  120. if (!hnodes[i].count)
  121. continue;
  122. if (hnodes[i].count < best)
  123. {
  124. best = hnodes[i].count;
  125. bestnode = i;
  126. }
  127. }
  128. if (bestnode == -1)
  129. return -1;
  130. hnodes[bestnode].used = true;
  131. return bestnode;
  132. }
  133. void BuildChars (int nodenum, unsigned bits, int bitcount)
  134. {
  135. hnode_t *node;
  136. if (nodenum < 256)
  137. {
  138. if (bitcount > 32)
  139. Error ("bitcount > 32");
  140. charbits[nodenum] = bits;
  141. charbitscount[nodenum] = bitcount;
  142. return;
  143. }
  144. node = &hnodes[nodenum];
  145. bits <<= 1;
  146. BuildChars (node->children[0], bits, bitcount+1);
  147. bits |= 1;
  148. BuildChars (node->children[1], bits, bitcount+1);
  149. }
  150. /*
  151. ==================
  152. Huffman
  153. ==================
  154. */
  155. cblock_t Huffman (cblock_t in)
  156. {
  157. int i;
  158. hnode_t *node;
  159. int outbits, c;
  160. unsigned bits;
  161. byte *out_p;
  162. cblock_t out;
  163. int max, maxchar;
  164. // count
  165. memset (hnodes, 0, sizeof(hnodes));
  166. for (i=0 ; i<in.count ; i++)
  167. hnodes[in.data[i]].count++;
  168. // normalize counts
  169. max = 0;
  170. maxchar = 0;
  171. for (i=0 ; i<256 ; i++)
  172. {
  173. if (hnodes[i].count > max)
  174. {
  175. max = hnodes[i].count;
  176. maxchar = i;
  177. }
  178. }
  179. if (max == 0)
  180. Error ("Huffman: max == 0");
  181. for (i=0 ; i<256 ; i++)
  182. {
  183. hnodes[i].count = (hnodes[i].count*255+max-1) / max;
  184. }
  185. // build the nodes
  186. numhnodes = 256;
  187. while (numhnodes != 511)
  188. {
  189. node = &hnodes[numhnodes];
  190. // pick two lowest counts
  191. node->children[0] = SmallestNode ();
  192. if (node->children[0] == -1)
  193. break; // no more
  194. node->children[1] = SmallestNode ();
  195. if (node->children[1] == -1)
  196. {
  197. if (node->children[0] != numhnodes-1)
  198. Error ("Bad smallestnode");
  199. break;
  200. }
  201. node->count = hnodes[node->children[0]].count +
  202. hnodes[node->children[1]].count;
  203. numhnodes++;
  204. }
  205. BuildChars (numhnodes-1, 0, 0);
  206. out_p = out.data = malloc(in.count*2 + 1024);
  207. memset (out_p, 0, in.count*2+1024);
  208. // write count
  209. *out_p++ = in.count&255;
  210. *out_p++ = (in.count>>8)&255;
  211. *out_p++ = (in.count>>16)&255;
  212. *out_p++ = (in.count>>24)&255;
  213. // save out the 256 normalized counts so the tree can be recreated
  214. for (i=0 ; i<256 ; i++)
  215. *out_p++ = hnodes[i].count;
  216. // write bits
  217. outbits = 0;
  218. for (i=0 ; i<in.count ; i++)
  219. {
  220. c = charbitscount[in.data[i]];
  221. bits = charbits[in.data[i]];
  222. while (c)
  223. {
  224. c--;
  225. if (bits & (1<<c))
  226. out_p[outbits>>3] |= 1<<(outbits&7);
  227. outbits++;
  228. }
  229. }
  230. out_p += (outbits+7)>>3;
  231. out.count = out_p - out.data;
  232. return out;
  233. }
  234. //==========================================================================
  235. /*
  236. ==================
  237. RLE
  238. ==================
  239. */
  240. #define RLE_CODE 0xe8
  241. #define RLE_TRIPPLE 0xe9
  242. int rle_counts[256];
  243. int rle_bytes[256];
  244. cblock_t RLE (cblock_t in)
  245. {
  246. int i;
  247. byte *out_p;
  248. int val;
  249. int repeat;
  250. cblock_t out;
  251. out_p = out.data = malloc (in.count*2);
  252. // write count
  253. *out_p++ = in.count&255;
  254. *out_p++ = (in.count>>8)&255;
  255. *out_p++ = (in.count>>16)&255;
  256. *out_p++ = (in.count>>24)&255;
  257. for (i=0 ; i<in.count ; )
  258. {
  259. val = in.data[i];
  260. rle_bytes[val]++;
  261. repeat = 1;
  262. i++;
  263. while (i<in.count && repeat < 255 && in.data[i] == val)
  264. {
  265. repeat++;
  266. i++;
  267. }
  268. if (repeat < 256)
  269. rle_counts[repeat]++;
  270. if (repeat > 3 || val == RLE_CODE)
  271. {
  272. *out_p++ = RLE_CODE;
  273. *out_p++ = val;
  274. *out_p++ = repeat;
  275. }
  276. else
  277. {
  278. while (repeat--)
  279. *out_p++ = val;
  280. }
  281. }
  282. out.count = out_p - out.data;
  283. return out;
  284. }
  285. //==========================================================================
  286. unsigned lzss_head[256];
  287. unsigned lzss_next[0x20000];
  288. /*
  289. ==================
  290. LZSS
  291. ==================
  292. */
  293. #define BACK_WINDOW 0x10000
  294. #define BACK_BITS 16
  295. #define FRONT_WINDOW 16
  296. #define FRONT_BITS 4
  297. cblock_t LZSS (cblock_t in)
  298. {
  299. int i;
  300. byte *out_p;
  301. cblock_t out;
  302. int val;
  303. int j, start, max;
  304. int bestlength, beststart;
  305. int outbits;
  306. if (in.count >= sizeof(lzss_next)/4)
  307. Error ("LZSS: too big");
  308. memset (lzss_head, -1, sizeof(lzss_head));
  309. out_p = out.data = malloc (in.count*2);
  310. memset (out.data, 0, in.count*2);
  311. // write count
  312. *out_p++ = in.count&255;
  313. *out_p++ = (in.count>>8)&255;
  314. *out_p++ = (in.count>>16)&255;
  315. *out_p++ = (in.count>>24)&255;
  316. outbits = 0;
  317. for (i=0 ; i<in.count ; )
  318. {
  319. val = in.data[i];
  320. #if 1
  321. // chained search
  322. bestlength = 0;
  323. beststart = 0;
  324. max = FRONT_WINDOW;
  325. if (i + max > in.count)
  326. max = in.count - i;
  327. start = lzss_head[val];
  328. while (start != -1 && start >= i-BACK_WINDOW)
  329. {
  330. // count match length
  331. for (j=0 ; j<max ; j++)
  332. if (in.data[start+j] != in.data[i+j])
  333. break;
  334. if (j > bestlength)
  335. {
  336. bestlength = j;
  337. beststart = start;
  338. }
  339. start = lzss_next[start];
  340. }
  341. #else
  342. // slow simple search
  343. // search for a match
  344. max = FRONT_WINDOW;
  345. if (i + max > in.count)
  346. max = in.count - i;
  347. start = i - BACK_WINDOW;
  348. if (start < 0)
  349. start = 0;
  350. bestlength = 0;
  351. beststart = 0;
  352. for ( ; start < i ; start++)
  353. {
  354. if (in.data[start] != val)
  355. continue;
  356. // count match length
  357. for (j=0 ; j<max ; j++)
  358. if (in.data[start+j] != in.data[i+j])
  359. break;
  360. if (j > bestlength)
  361. {
  362. bestlength = j;
  363. beststart = start;
  364. }
  365. }
  366. #endif
  367. beststart = BACK_WINDOW - (i-beststart);
  368. if (bestlength < 3)
  369. { // output a single char
  370. bestlength = 1;
  371. out_p[outbits>>3] |= 1<<(outbits&7); // set bit to mark char
  372. outbits++;
  373. for (j=0 ; j<8 ; j++, outbits++)
  374. if (val & (1<<j) )
  375. out_p[outbits>>3] |= 1<<(outbits&7);
  376. }
  377. else
  378. { // output a phrase
  379. outbits++; // leave a 0 bit to mark phrase
  380. for (j=0 ; j<BACK_BITS ; j++, outbits++)
  381. if (beststart & (1<<j) )
  382. out_p[outbits>>3] |= 1<<(outbits&7);
  383. for (j=0 ; j<FRONT_BITS ; j++, outbits++)
  384. if (bestlength & (1<<j) )
  385. out_p[outbits>>3] |= 1<<(outbits&7);
  386. }
  387. while (bestlength--)
  388. {
  389. val = in.data[i];
  390. lzss_next[i] = lzss_head[val];
  391. lzss_head[val] = i;
  392. i++;
  393. }
  394. }
  395. out_p += (outbits+7)>>3;
  396. out.count = out_p - out.data;
  397. return out;
  398. }
  399. //==========================================================================
  400. #define MIN_REPT 15
  401. #define MAX_REPT 0
  402. #define HUF_TOKENS (256+MAX_REPT)
  403. unsigned charbits1[256][HUF_TOKENS];
  404. int charbitscount1[256][HUF_TOKENS];
  405. hnode_t hnodes1[256][HUF_TOKENS*2];
  406. int numhnodes1[256];
  407. int order0counts[256];
  408. /*
  409. ==================
  410. SmallestNode1
  411. ==================
  412. */
  413. int SmallestNode1 (hnode_t *hnodes, int numhnodes)
  414. {
  415. int i;
  416. int best, bestnode;
  417. best = 99999999;
  418. bestnode = -1;
  419. for (i=0 ; i<numhnodes ; i++)
  420. {
  421. if (hnodes[i].used)
  422. continue;
  423. if (!hnodes[i].count)
  424. continue;
  425. if (hnodes[i].count < best)
  426. {
  427. best = hnodes[i].count;
  428. bestnode = i;
  429. }
  430. }
  431. if (bestnode == -1)
  432. return -1;
  433. hnodes[bestnode].used = true;
  434. return bestnode;
  435. }
  436. /*
  437. ==================
  438. BuildChars1
  439. ==================
  440. */
  441. void BuildChars1 (int prev, int nodenum, unsigned bits, int bitcount)
  442. {
  443. hnode_t *node;
  444. if (nodenum < HUF_TOKENS)
  445. {
  446. if (bitcount > 32)
  447. Error ("bitcount > 32");
  448. charbits1[prev][nodenum] = bits;
  449. charbitscount1[prev][nodenum] = bitcount;
  450. return;
  451. }
  452. node = &hnodes1[prev][nodenum];
  453. bits <<= 1;
  454. BuildChars1 (prev, node->children[0], bits, bitcount+1);
  455. bits |= 1;
  456. BuildChars1 (prev, node->children[1], bits, bitcount+1);
  457. }
  458. /*
  459. ==================
  460. BuildTree1
  461. ==================
  462. */
  463. void BuildTree1 (int prev)
  464. {
  465. hnode_t *node, *nodebase;
  466. int numhnodes;
  467. // build the nodes
  468. numhnodes = HUF_TOKENS;
  469. nodebase = hnodes1[prev];
  470. while (1)
  471. {
  472. node = &nodebase[numhnodes];
  473. // pick two lowest counts
  474. node->children[0] = SmallestNode1 (nodebase, numhnodes);
  475. if (node->children[0] == -1)
  476. break; // no more
  477. node->children[1] = SmallestNode1 (nodebase, numhnodes);
  478. if (node->children[1] == -1)
  479. break;
  480. node->count = nodebase[node->children[0]].count +
  481. nodebase[node->children[1]].count;
  482. numhnodes++;
  483. }
  484. numhnodes1[prev] = numhnodes-1;
  485. BuildChars1 (prev, numhnodes-1, 0, 0);
  486. }
  487. /*
  488. ==================
  489. Huffman1_Count
  490. ==================
  491. */
  492. void Huffman1_Count (cblock_t in)
  493. {
  494. int i;
  495. int prev;
  496. int v;
  497. int rept;
  498. prev = 0;
  499. for (i=0 ; i<in.count ; i++)
  500. {
  501. v = in.data[i];
  502. order0counts[v]++;
  503. hnodes1[prev][v].count++;
  504. prev = v;
  505. #if 1
  506. for (rept=1 ; i+rept < in.count && rept < MAX_REPT ; rept++)
  507. if (in.data[i+rept] != v)
  508. break;
  509. if (rept > MIN_REPT)
  510. {
  511. hnodes1[prev][255+rept].count++;
  512. i += rept-1;
  513. }
  514. #endif
  515. }
  516. }
  517. /*
  518. ==================
  519. Huffman1_Build
  520. ==================
  521. */
  522. byte scaled[256][HUF_TOKENS];
  523. void Huffman1_Build (FILE *f)
  524. {
  525. int i, j, v;
  526. int max;
  527. int total;
  528. for (i=0 ; i<256 ; i++)
  529. {
  530. // normalize and save the counts
  531. max = 0;
  532. for (j=0 ; j<HUF_TOKENS ; j++)
  533. {
  534. if (hnodes1[i][j].count > max)
  535. max = hnodes1[i][j].count;
  536. }
  537. if (max == 0)
  538. max = 1;
  539. total = 0;
  540. for (j=0 ; j<HUF_TOKENS ; j++)
  541. { // easy to overflow 32 bits here!
  542. v = (hnodes1[i][j].count*(double)255+max-1)/max;
  543. if (v > 255)
  544. Error ("v > 255");
  545. scaled[i][j] = hnodes1[i][j].count = v;
  546. if (v)
  547. total++;
  548. }
  549. if (total == 1)
  550. { // must have two tokens
  551. if (!scaled[i][0])
  552. scaled[i][0] = hnodes1[i][0].count = 1;
  553. else
  554. scaled[i][1] = hnodes1[i][1].count = 1;
  555. }
  556. BuildTree1 (i);
  557. }
  558. #if 0
  559. // count up the total bits
  560. total = 0;
  561. for (i=0 ; i<256 ; i++)
  562. for (j=0 ; j<256 ; j++)
  563. total += charbitscount1[i][j] * hnodes1[i][j].count;
  564. total = (total+7)/8;
  565. printf ("%i bytes huffman1 compressed\n", total);
  566. #endif
  567. fwrite (scaled, 1, sizeof(scaled), f);
  568. }
  569. /*
  570. ==================
  571. Huffman1
  572. Order 1 compression with pre-built table
  573. ==================
  574. */
  575. cblock_t Huffman1 (cblock_t in)
  576. {
  577. int i;
  578. int outbits, c;
  579. unsigned bits;
  580. byte *out_p;
  581. cblock_t out;
  582. int prev;
  583. int v;
  584. int rept;
  585. out_p = out.data = malloc(in.count*2 + 1024);
  586. memset (out_p, 0, in.count*2+1024);
  587. // write count
  588. *out_p++ = in.count&255;
  589. *out_p++ = (in.count>>8)&255;
  590. *out_p++ = (in.count>>16)&255;
  591. *out_p++ = (in.count>>24)&255;
  592. // write bits
  593. outbits = 0;
  594. prev = 0;
  595. for (i=0 ; i<in.count ; i++)
  596. {
  597. v = in.data[i];
  598. c = charbitscount1[prev][v];
  599. bits = charbits1[prev][v];
  600. if (!c)
  601. Error ("!bits");
  602. while (c)
  603. {
  604. c--;
  605. if (bits & (1<<c))
  606. out_p[outbits>>3] |= 1<<(outbits&7);
  607. outbits++;
  608. }
  609. prev = v;
  610. #if 1
  611. // check for repeat encodes
  612. for (rept=1 ; i+rept < in.count && rept < MAX_REPT ; rept++)
  613. if (in.data[i+rept] != v)
  614. break;
  615. if (rept > MIN_REPT)
  616. {
  617. c = charbitscount1[prev][255+rept];
  618. bits = charbits1[prev][255+rept];
  619. if (!c)
  620. Error ("!bits");
  621. while (c)
  622. {
  623. c--;
  624. if (bits & (1<<c))
  625. out_p[outbits>>3] |= 1<<(outbits&7);
  626. outbits++;
  627. }
  628. i += rept-1;
  629. }
  630. #endif
  631. }
  632. out_p += (outbits+7)>>3;
  633. out.count = out_p - out.data;
  634. return out;
  635. }
  636. #endif