PageRenderTime 47ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/R-2.10.1-patched/src/extra/bzip2/blocksort.c

http://r4jvm.googlecode.com/
C | 1095 lines | 729 code | 142 blank | 224 comment | 226 complexity | 5155083934dbd0154da28b3e697a8738 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.0, LGPL-3.0, BSD-3-Clause, AGPL-3.0, GPL-2.0, LGPL-2.1
  1. /*-------------------------------------------------------------*/
  2. /*--- Block sorting machinery ---*/
  3. /*--- blocksort.c ---*/
  4. /*-------------------------------------------------------------*/
  5. /* ------------------------------------------------------------------
  6. This file is part of bzip2/libbzip2, a program and library for
  7. lossless, block-sorting data compression.
  8. bzip2/libbzip2 version 1.0.5 of 10 December 2007
  9. Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
  10. Please read the WARNING, DISCLAIMER and PATENTS sections in the
  11. README file.
  12. This program is released under the terms of the license contained
  13. in the file LICENSE.
  14. ------------------------------------------------------------------ */
  15. #include "bzlib_private.h"
  16. /*---------------------------------------------*/
  17. /*--- Fallback O(N log(N)^2) sorting ---*/
  18. /*--- algorithm, for repetitive blocks ---*/
  19. /*---------------------------------------------*/
  20. /*---------------------------------------------*/
  21. static
  22. R_INLINE
  23. void fallbackSimpleSort ( UInt32* fmap,
  24. UInt32* eclass,
  25. Int32 lo,
  26. Int32 hi )
  27. {
  28. Int32 i, j, tmp;
  29. UInt32 ec_tmp;
  30. if (lo == hi) return;
  31. if (hi - lo > 3) {
  32. for ( i = hi-4; i >= lo; i-- ) {
  33. tmp = fmap[i];
  34. ec_tmp = eclass[tmp];
  35. for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
  36. fmap[j-4] = fmap[j];
  37. fmap[j-4] = tmp;
  38. }
  39. }
  40. for ( i = hi-1; i >= lo; i-- ) {
  41. tmp = fmap[i];
  42. ec_tmp = eclass[tmp];
  43. for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
  44. fmap[j-1] = fmap[j];
  45. fmap[j-1] = tmp;
  46. }
  47. }
  48. /*---------------------------------------------*/
  49. #define fswap(zz1, zz2) \
  50. { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
  51. #define fvswap(zzp1, zzp2, zzn) \
  52. { \
  53. Int32 yyp1 = (zzp1); \
  54. Int32 yyp2 = (zzp2); \
  55. Int32 yyn = (zzn); \
  56. while (yyn > 0) { \
  57. fswap(fmap[yyp1], fmap[yyp2]); \
  58. yyp1++; yyp2++; yyn--; \
  59. } \
  60. }
  61. #define fmin(a,b) ((a) < (b)) ? (a) : (b)
  62. #define fpush(lz,hz) { stackLo[sp] = lz; \
  63. stackHi[sp] = hz; \
  64. sp++; }
  65. #define fpop(lz,hz) { sp--; \
  66. lz = stackLo[sp]; \
  67. hz = stackHi[sp]; }
  68. #define FALLBACK_QSORT_SMALL_THRESH 10
  69. #define FALLBACK_QSORT_STACK_SIZE 100
  70. static
  71. void fallbackQSort3 ( UInt32* fmap,
  72. UInt32* eclass,
  73. Int32 loSt,
  74. Int32 hiSt )
  75. {
  76. Int32 unLo, unHi, ltLo, gtHi, n, m;
  77. Int32 sp, lo, hi;
  78. UInt32 med, r, r3;
  79. Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
  80. Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
  81. r = 0;
  82. sp = 0;
  83. fpush ( loSt, hiSt );
  84. while (sp > 0) {
  85. AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
  86. fpop ( lo, hi );
  87. if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
  88. fallbackSimpleSort ( fmap, eclass, lo, hi );
  89. continue;
  90. }
  91. /* Random partitioning. Median of 3 sometimes fails to
  92. avoid bad cases. Median of 9 seems to help but
  93. looks rather expensive. This too seems to work but
  94. is cheaper. Guidance for the magic constants
  95. 7621 and 32768 is taken from Sedgewick's algorithms
  96. book, chapter 35.
  97. */
  98. r = ((r * 7621) + 1) % 32768;
  99. r3 = r % 3;
  100. if (r3 == 0) med = eclass[fmap[lo]]; else
  101. if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
  102. med = eclass[fmap[hi]];
  103. unLo = ltLo = lo;
  104. unHi = gtHi = hi;
  105. while (1) {
  106. while (1) {
  107. if (unLo > unHi) break;
  108. n = (Int32)eclass[fmap[unLo]] - (Int32)med;
  109. if (n == 0) {
  110. fswap(fmap[unLo], fmap[ltLo]);
  111. ltLo++; unLo++;
  112. continue;
  113. };
  114. if (n > 0) break;
  115. unLo++;
  116. }
  117. while (1) {
  118. if (unLo > unHi) break;
  119. n = (Int32)eclass[fmap[unHi]] - (Int32)med;
  120. if (n == 0) {
  121. fswap(fmap[unHi], fmap[gtHi]);
  122. gtHi--; unHi--;
  123. continue;
  124. };
  125. if (n < 0) break;
  126. unHi--;
  127. }
  128. if (unLo > unHi) break;
  129. fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
  130. }
  131. AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
  132. if (gtHi < ltLo) continue;
  133. n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
  134. m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
  135. n = lo + unLo - ltLo - 1;
  136. m = hi - (gtHi - unHi) + 1;
  137. if (n - lo > hi - m) {
  138. fpush ( lo, n );
  139. fpush ( m, hi );
  140. } else {
  141. fpush ( m, hi );
  142. fpush ( lo, n );
  143. }
  144. }
  145. }
  146. #undef fmin
  147. #undef fpush
  148. #undef fpop
  149. #undef fswap
  150. #undef fvswap
  151. #undef FALLBACK_QSORT_SMALL_THRESH
  152. #undef FALLBACK_QSORT_STACK_SIZE
  153. /*---------------------------------------------*/
  154. /* Pre:
  155. nblock > 0
  156. eclass exists for [0 .. nblock-1]
  157. ((UChar*)eclass) [0 .. nblock-1] holds block
  158. ptr exists for [0 .. nblock-1]
  159. Post:
  160. ((UChar*)eclass) [0 .. nblock-1] holds block
  161. All other areas of eclass destroyed
  162. fmap [0 .. nblock-1] holds sorted order
  163. bhtab [ 0 .. 2+(nblock/32) ] destroyed
  164. */
  165. #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
  166. #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
  167. #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
  168. #define WORD_BH(zz) bhtab[(zz) >> 5]
  169. #define UNALIGNED_BH(zz) ((zz) & 0x01f)
  170. static
  171. void fallbackSort ( UInt32* fmap,
  172. UInt32* eclass,
  173. UInt32* bhtab,
  174. Int32 nblock,
  175. Int32 verb )
  176. {
  177. Int32 ftab[257];
  178. Int32 ftabCopy[256];
  179. Int32 H, i, j, k, l, r, cc, cc1;
  180. Int32 nNotDone;
  181. Int32 nBhtab;
  182. UChar* eclass8 = (UChar*)eclass;
  183. /*--
  184. Initial 1-char radix sort to generate
  185. initial fmap and initial BH bits.
  186. --*/
  187. if (verb >= 4)
  188. VPrintf0 ( " bucket sorting ...\n" );
  189. for (i = 0; i < 257; i++) ftab[i] = 0;
  190. for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
  191. for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
  192. for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
  193. for (i = 0; i < nblock; i++) {
  194. j = eclass8[i];
  195. k = ftab[j] - 1;
  196. ftab[j] = k;
  197. fmap[k] = i;
  198. }
  199. nBhtab = 2 + (nblock / 32);
  200. for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
  201. for (i = 0; i < 256; i++) SET_BH(ftab[i]);
  202. /*--
  203. Inductively refine the buckets. Kind-of an
  204. "exponential radix sort" (!), inspired by the
  205. Manber-Myers suffix array construction algorithm.
  206. --*/
  207. /*-- set sentinel bits for block-end detection --*/
  208. for (i = 0; i < 32; i++) {
  209. SET_BH(nblock + 2*i);
  210. CLEAR_BH(nblock + 2*i + 1);
  211. }
  212. /*-- the log(N) loop --*/
  213. H = 1;
  214. while (1) {
  215. if (verb >= 4)
  216. VPrintf1 ( " depth %6d has ", H );
  217. j = 0;
  218. for (i = 0; i < nblock; i++) {
  219. if (ISSET_BH(i)) j = i;
  220. k = fmap[i] - H; if (k < 0) k += nblock;
  221. eclass[k] = j;
  222. }
  223. nNotDone = 0;
  224. r = -1;
  225. while (1) {
  226. /*-- find the next non-singleton bucket --*/
  227. k = r + 1;
  228. while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
  229. if (ISSET_BH(k)) {
  230. while (WORD_BH(k) == 0xffffffff) k += 32;
  231. while (ISSET_BH(k)) k++;
  232. }
  233. l = k - 1;
  234. if (l >= nblock) break;
  235. while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
  236. if (!ISSET_BH(k)) {
  237. while (WORD_BH(k) == 0x00000000) k += 32;
  238. while (!ISSET_BH(k)) k++;
  239. }
  240. r = k - 1;
  241. if (r >= nblock) break;
  242. /*-- now [l, r] bracket current bucket --*/
  243. if (r > l) {
  244. nNotDone += (r - l + 1);
  245. fallbackQSort3 ( fmap, eclass, l, r );
  246. /*-- scan bucket and generate header bits-- */
  247. cc = -1;
  248. for (i = l; i <= r; i++) {
  249. cc1 = eclass[fmap[i]];
  250. if (cc != cc1) { SET_BH(i); cc = cc1; };
  251. }
  252. }
  253. }
  254. if (verb >= 4)
  255. VPrintf1 ( "%6d unresolved strings\n", nNotDone );
  256. H *= 2;
  257. if (H > nblock || nNotDone == 0) break;
  258. }
  259. /*--
  260. Reconstruct the original block in
  261. eclass8 [0 .. nblock-1], since the
  262. previous phase destroyed it.
  263. --*/
  264. if (verb >= 4)
  265. VPrintf0 ( " reconstructing block ...\n" );
  266. j = 0;
  267. for (i = 0; i < nblock; i++) {
  268. while (ftabCopy[j] == 0) j++;
  269. ftabCopy[j]--;
  270. eclass8[fmap[i]] = (UChar)j;
  271. }
  272. AssertH ( j < 256, 1005 );
  273. }
  274. #undef SET_BH
  275. #undef CLEAR_BH
  276. #undef ISSET_BH
  277. #undef WORD_BH
  278. #undef UNALIGNED_BH
  279. /*---------------------------------------------*/
  280. /*--- The main, O(N^2 log(N)) sorting ---*/
  281. /*--- algorithm. Faster for "normal" ---*/
  282. /*--- non-repetitive blocks. ---*/
  283. /*---------------------------------------------*/
  284. /*---------------------------------------------*/
  285. /* Solaris cc objects to inlining functions whose names start with `main' */
  286. static
  287. R_INLINE
  288. Bool BZmainGtU ( UInt32 i1,
  289. UInt32 i2,
  290. UChar* block,
  291. UInt16* quadrant,
  292. UInt32 nblock,
  293. Int32* budget )
  294. {
  295. Int32 k;
  296. UChar c1, c2;
  297. UInt16 s1, s2;
  298. AssertD ( i1 != i2, "BZmainGtU" );
  299. /* 1 */
  300. c1 = block[i1]; c2 = block[i2];
  301. if (c1 != c2) return (c1 > c2);
  302. i1++; i2++;
  303. /* 2 */
  304. c1 = block[i1]; c2 = block[i2];
  305. if (c1 != c2) return (c1 > c2);
  306. i1++; i2++;
  307. /* 3 */
  308. c1 = block[i1]; c2 = block[i2];
  309. if (c1 != c2) return (c1 > c2);
  310. i1++; i2++;
  311. /* 4 */
  312. c1 = block[i1]; c2 = block[i2];
  313. if (c1 != c2) return (c1 > c2);
  314. i1++; i2++;
  315. /* 5 */
  316. c1 = block[i1]; c2 = block[i2];
  317. if (c1 != c2) return (c1 > c2);
  318. i1++; i2++;
  319. /* 6 */
  320. c1 = block[i1]; c2 = block[i2];
  321. if (c1 != c2) return (c1 > c2);
  322. i1++; i2++;
  323. /* 7 */
  324. c1 = block[i1]; c2 = block[i2];
  325. if (c1 != c2) return (c1 > c2);
  326. i1++; i2++;
  327. /* 8 */
  328. c1 = block[i1]; c2 = block[i2];
  329. if (c1 != c2) return (c1 > c2);
  330. i1++; i2++;
  331. /* 9 */
  332. c1 = block[i1]; c2 = block[i2];
  333. if (c1 != c2) return (c1 > c2);
  334. i1++; i2++;
  335. /* 10 */
  336. c1 = block[i1]; c2 = block[i2];
  337. if (c1 != c2) return (c1 > c2);
  338. i1++; i2++;
  339. /* 11 */
  340. c1 = block[i1]; c2 = block[i2];
  341. if (c1 != c2) return (c1 > c2);
  342. i1++; i2++;
  343. /* 12 */
  344. c1 = block[i1]; c2 = block[i2];
  345. if (c1 != c2) return (c1 > c2);
  346. i1++; i2++;
  347. k = nblock + 8;
  348. do {
  349. /* 1 */
  350. c1 = block[i1]; c2 = block[i2];
  351. if (c1 != c2) return (c1 > c2);
  352. s1 = quadrant[i1]; s2 = quadrant[i2];
  353. if (s1 != s2) return (s1 > s2);
  354. i1++; i2++;
  355. /* 2 */
  356. c1 = block[i1]; c2 = block[i2];
  357. if (c1 != c2) return (c1 > c2);
  358. s1 = quadrant[i1]; s2 = quadrant[i2];
  359. if (s1 != s2) return (s1 > s2);
  360. i1++; i2++;
  361. /* 3 */
  362. c1 = block[i1]; c2 = block[i2];
  363. if (c1 != c2) return (c1 > c2);
  364. s1 = quadrant[i1]; s2 = quadrant[i2];
  365. if (s1 != s2) return (s1 > s2);
  366. i1++; i2++;
  367. /* 4 */
  368. c1 = block[i1]; c2 = block[i2];
  369. if (c1 != c2) return (c1 > c2);
  370. s1 = quadrant[i1]; s2 = quadrant[i2];
  371. if (s1 != s2) return (s1 > s2);
  372. i1++; i2++;
  373. /* 5 */
  374. c1 = block[i1]; c2 = block[i2];
  375. if (c1 != c2) return (c1 > c2);
  376. s1 = quadrant[i1]; s2 = quadrant[i2];
  377. if (s1 != s2) return (s1 > s2);
  378. i1++; i2++;
  379. /* 6 */
  380. c1 = block[i1]; c2 = block[i2];
  381. if (c1 != c2) return (c1 > c2);
  382. s1 = quadrant[i1]; s2 = quadrant[i2];
  383. if (s1 != s2) return (s1 > s2);
  384. i1++; i2++;
  385. /* 7 */
  386. c1 = block[i1]; c2 = block[i2];
  387. if (c1 != c2) return (c1 > c2);
  388. s1 = quadrant[i1]; s2 = quadrant[i2];
  389. if (s1 != s2) return (s1 > s2);
  390. i1++; i2++;
  391. /* 8 */
  392. c1 = block[i1]; c2 = block[i2];
  393. if (c1 != c2) return (c1 > c2);
  394. s1 = quadrant[i1]; s2 = quadrant[i2];
  395. if (s1 != s2) return (s1 > s2);
  396. i1++; i2++;
  397. if (i1 >= nblock) i1 -= nblock;
  398. if (i2 >= nblock) i2 -= nblock;
  399. k -= 8;
  400. (*budget)--;
  401. }
  402. while (k >= 0);
  403. return False;
  404. }
  405. /*---------------------------------------------*/
  406. /*--
  407. Knuth's increments seem to work better
  408. than Incerpi-Sedgewick here. Possibly
  409. because the number of elems to sort is
  410. usually small, typically <= 20.
  411. --*/
  412. static
  413. Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
  414. 9841, 29524, 88573, 265720,
  415. 797161, 2391484 };
  416. static
  417. void mainSimpleSort ( UInt32* ptr,
  418. UChar* block,
  419. UInt16* quadrant,
  420. Int32 nblock,
  421. Int32 lo,
  422. Int32 hi,
  423. Int32 d,
  424. Int32* budget )
  425. {
  426. Int32 i, j, h, bigN, hp;
  427. UInt32 v;
  428. bigN = hi - lo + 1;
  429. if (bigN < 2) return;
  430. hp = 0;
  431. while (incs[hp] < bigN) hp++;
  432. hp--;
  433. for (; hp >= 0; hp--) {
  434. h = incs[hp];
  435. i = lo + h;
  436. while (True) {
  437. /*-- copy 1 --*/
  438. if (i > hi) break;
  439. v = ptr[i];
  440. j = i;
  441. while ( BZmainGtU (
  442. ptr[j-h]+d, v+d, block, quadrant, nblock, budget
  443. ) ) {
  444. ptr[j] = ptr[j-h];
  445. j = j - h;
  446. if (j <= (lo + h - 1)) break;
  447. }
  448. ptr[j] = v;
  449. i++;
  450. /*-- copy 2 --*/
  451. if (i > hi) break;
  452. v = ptr[i];
  453. j = i;
  454. while ( BZmainGtU (
  455. ptr[j-h]+d, v+d, block, quadrant, nblock, budget
  456. ) ) {
  457. ptr[j] = ptr[j-h];
  458. j = j - h;
  459. if (j <= (lo + h - 1)) break;
  460. }
  461. ptr[j] = v;
  462. i++;
  463. /*-- copy 3 --*/
  464. if (i > hi) break;
  465. v = ptr[i];
  466. j = i;
  467. while ( BZmainGtU (
  468. ptr[j-h]+d, v+d, block, quadrant, nblock, budget
  469. ) ) {
  470. ptr[j] = ptr[j-h];
  471. j = j - h;
  472. if (j <= (lo + h - 1)) break;
  473. }
  474. ptr[j] = v;
  475. i++;
  476. if (*budget < 0) return;
  477. }
  478. }
  479. }
  480. /*---------------------------------------------*/
  481. /*--
  482. The following is an implementation of
  483. an elegant 3-way quicksort for strings,
  484. described in a paper "Fast Algorithms for
  485. Sorting and Searching Strings", by Robert
  486. Sedgewick and Jon L. Bentley.
  487. --*/
  488. #define mswap(zz1, zz2) \
  489. { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
  490. #define mvswap(zzp1, zzp2, zzn) \
  491. { \
  492. Int32 yyp1 = (zzp1); \
  493. Int32 yyp2 = (zzp2); \
  494. Int32 yyn = (zzn); \
  495. while (yyn > 0) { \
  496. mswap(ptr[yyp1], ptr[yyp2]); \
  497. yyp1++; yyp2++; yyn--; \
  498. } \
  499. }
  500. static
  501. R_INLINE
  502. UChar mmed3 ( UChar a, UChar b, UChar c )
  503. {
  504. UChar t;
  505. if (a > b) { t = a; a = b; b = t; };
  506. if (b > c) {
  507. b = c;
  508. if (a > b) b = a;
  509. }
  510. return b;
  511. }
  512. #define mmin(a,b) ((a) < (b)) ? (a) : (b)
  513. #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
  514. stackHi[sp] = hz; \
  515. stackD [sp] = dz; \
  516. sp++; }
  517. #define mpop(lz,hz,dz) { sp--; \
  518. lz = stackLo[sp]; \
  519. hz = stackHi[sp]; \
  520. dz = stackD [sp]; }
  521. #define mnextsize(az) (nextHi[az]-nextLo[az])
  522. #define mnextswap(az,bz) \
  523. { Int32 tz; \
  524. tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
  525. tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
  526. tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
  527. #define MAIN_QSORT_SMALL_THRESH 20
  528. #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
  529. #define MAIN_QSORT_STACK_SIZE 100
  530. static
  531. void mainQSort3 ( UInt32* ptr,
  532. UChar* block,
  533. UInt16* quadrant,
  534. Int32 nblock,
  535. Int32 loSt,
  536. Int32 hiSt,
  537. Int32 dSt,
  538. Int32* budget )
  539. {
  540. Int32 unLo, unHi, ltLo, gtHi, n, m, med;
  541. Int32 sp, lo, hi, d;
  542. Int32 stackLo[MAIN_QSORT_STACK_SIZE];
  543. Int32 stackHi[MAIN_QSORT_STACK_SIZE];
  544. Int32 stackD [MAIN_QSORT_STACK_SIZE];
  545. Int32 nextLo[3];
  546. Int32 nextHi[3];
  547. Int32 nextD [3];
  548. sp = 0;
  549. mpush ( loSt, hiSt, dSt );
  550. while (sp > 0) {
  551. AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
  552. mpop ( lo, hi, d );
  553. if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
  554. d > MAIN_QSORT_DEPTH_THRESH) {
  555. mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
  556. if (*budget < 0) return;
  557. continue;
  558. }
  559. med = (Int32)
  560. mmed3 ( block[ptr[ lo ]+d],
  561. block[ptr[ hi ]+d],
  562. block[ptr[ (lo+hi)>>1 ]+d] );
  563. unLo = ltLo = lo;
  564. unHi = gtHi = hi;
  565. while (True) {
  566. while (True) {
  567. if (unLo > unHi) break;
  568. n = ((Int32)block[ptr[unLo]+d]) - med;
  569. if (n == 0) {
  570. mswap(ptr[unLo], ptr[ltLo]);
  571. ltLo++; unLo++; continue;
  572. };
  573. if (n > 0) break;
  574. unLo++;
  575. }
  576. while (True) {
  577. if (unLo > unHi) break;
  578. n = ((Int32)block[ptr[unHi]+d]) - med;
  579. if (n == 0) {
  580. mswap(ptr[unHi], ptr[gtHi]);
  581. gtHi--; unHi--; continue;
  582. };
  583. if (n < 0) break;
  584. unHi--;
  585. }
  586. if (unLo > unHi) break;
  587. mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
  588. }
  589. AssertD ( unHi == unLo-1, "mainQSort3(2)" );
  590. if (gtHi < ltLo) {
  591. mpush(lo, hi, d+1 );
  592. continue;
  593. }
  594. n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
  595. m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
  596. n = lo + unLo - ltLo - 1;
  597. m = hi - (gtHi - unHi) + 1;
  598. nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
  599. nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
  600. nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
  601. if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
  602. if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
  603. if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
  604. AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
  605. AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
  606. mpush (nextLo[0], nextHi[0], nextD[0]);
  607. mpush (nextLo[1], nextHi[1], nextD[1]);
  608. mpush (nextLo[2], nextHi[2], nextD[2]);
  609. }
  610. }
  611. #undef mswap
  612. #undef mvswap
  613. #undef mpush
  614. #undef mpop
  615. #undef mmin
  616. #undef mnextsize
  617. #undef mnextswap
  618. #undef MAIN_QSORT_SMALL_THRESH
  619. #undef MAIN_QSORT_DEPTH_THRESH
  620. #undef MAIN_QSORT_STACK_SIZE
  621. /*---------------------------------------------*/
  622. /* Pre:
  623. nblock > N_OVERSHOOT
  624. block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
  625. ((UChar*)block32) [0 .. nblock-1] holds block
  626. ptr exists for [0 .. nblock-1]
  627. Post:
  628. ((UChar*)block32) [0 .. nblock-1] holds block
  629. All other areas of block32 destroyed
  630. ftab [0 .. 65536 ] destroyed
  631. ptr [0 .. nblock-1] holds sorted order
  632. if (*budget < 0), sorting was abandoned
  633. */
  634. #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
  635. #define SETMASK (1 << 21)
  636. #define CLEARMASK (~(SETMASK))
  637. static
  638. void mainSort ( UInt32* ptr,
  639. UChar* block,
  640. UInt16* quadrant,
  641. UInt32* ftab,
  642. Int32 nblock,
  643. Int32 verb,
  644. Int32* budget )
  645. {
  646. Int32 i, j, k, ss, sb;
  647. Int32 runningOrder[256];
  648. Bool bigDone[256];
  649. Int32 copyStart[256];
  650. Int32 copyEnd [256];
  651. UChar c1;
  652. Int32 numQSorted;
  653. UInt16 s;
  654. if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
  655. /*-- set up the 2-byte frequency table --*/
  656. for (i = 65536; i >= 0; i--) ftab[i] = 0;
  657. j = block[0] << 8;
  658. i = nblock-1;
  659. for (; i >= 3; i -= 4) {
  660. quadrant[i] = 0;
  661. j = (j >> 8) | ( ((UInt16)block[i]) << 8);
  662. ftab[j]++;
  663. quadrant[i-1] = 0;
  664. j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
  665. ftab[j]++;
  666. quadrant[i-2] = 0;
  667. j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
  668. ftab[j]++;
  669. quadrant[i-3] = 0;
  670. j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
  671. ftab[j]++;
  672. }
  673. for (; i >= 0; i--) {
  674. quadrant[i] = 0;
  675. j = (j >> 8) | ( ((UInt16)block[i]) << 8);
  676. ftab[j]++;
  677. }
  678. /*-- (emphasises close relationship of block & quadrant) --*/
  679. for (i = 0; i < BZ_N_OVERSHOOT; i++) {
  680. block [nblock+i] = block[i];
  681. quadrant[nblock+i] = 0;
  682. }
  683. if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
  684. /*-- Complete the initial radix sort --*/
  685. for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
  686. s = block[0] << 8;
  687. i = nblock-1;
  688. for (; i >= 3; i -= 4) {
  689. s = (s >> 8) | (block[i] << 8);
  690. j = ftab[s] -1;
  691. ftab[s] = j;
  692. ptr[j] = i;
  693. s = (s >> 8) | (block[i-1] << 8);
  694. j = ftab[s] -1;
  695. ftab[s] = j;
  696. ptr[j] = i-1;
  697. s = (s >> 8) | (block[i-2] << 8);
  698. j = ftab[s] -1;
  699. ftab[s] = j;
  700. ptr[j] = i-2;
  701. s = (s >> 8) | (block[i-3] << 8);
  702. j = ftab[s] -1;
  703. ftab[s] = j;
  704. ptr[j] = i-3;
  705. }
  706. for (; i >= 0; i--) {
  707. s = (s >> 8) | (block[i] << 8);
  708. j = ftab[s] -1;
  709. ftab[s] = j;
  710. ptr[j] = i;
  711. }
  712. /*--
  713. Now ftab contains the first loc of every small bucket.
  714. Calculate the running order, from smallest to largest
  715. big bucket.
  716. --*/
  717. for (i = 0; i <= 255; i++) {
  718. bigDone [i] = False;
  719. runningOrder[i] = i;
  720. }
  721. {
  722. Int32 vv;
  723. Int32 h = 1;
  724. do h = 3 * h + 1; while (h <= 256);
  725. do {
  726. h = h / 3;
  727. for (i = h; i <= 255; i++) {
  728. vv = runningOrder[i];
  729. j = i;
  730. while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
  731. runningOrder[j] = runningOrder[j-h];
  732. j = j - h;
  733. if (j <= (h - 1)) goto zero;
  734. }
  735. zero:
  736. runningOrder[j] = vv;
  737. }
  738. } while (h != 1);
  739. }
  740. /*--
  741. The main sorting loop.
  742. --*/
  743. numQSorted = 0;
  744. for (i = 0; i <= 255; i++) {
  745. /*--
  746. Process big buckets, starting with the least full.
  747. Basically this is a 3-step process in which we call
  748. mainQSort3 to sort the small buckets [ss, j], but
  749. also make a big effort to avoid the calls if we can.
  750. --*/
  751. ss = runningOrder[i];
  752. /*--
  753. Step 1:
  754. Complete the big bucket [ss] by quicksorting
  755. any unsorted small buckets [ss, j], for j != ss.
  756. Hopefully previous pointer-scanning phases have already
  757. completed many of the small buckets [ss, j], so
  758. we don't have to sort them at all.
  759. --*/
  760. for (j = 0; j <= 255; j++) {
  761. if (j != ss) {
  762. sb = (ss << 8) + j;
  763. if ( ! (ftab[sb] & SETMASK) ) {
  764. Int32 lo = ftab[sb] & CLEARMASK;
  765. Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
  766. if (hi > lo) {
  767. if (verb >= 4)
  768. VPrintf4 ( " qsort [0x%x, 0x%x] "
  769. "done %d this %d\n",
  770. ss, j, numQSorted, hi - lo + 1 );
  771. mainQSort3 (
  772. ptr, block, quadrant, nblock,
  773. lo, hi, BZ_N_RADIX, budget
  774. );
  775. numQSorted += (hi - lo + 1);
  776. if (*budget < 0) return;
  777. }
  778. }
  779. ftab[sb] |= SETMASK;
  780. }
  781. }
  782. AssertH ( !bigDone[ss], 1006 );
  783. /*--
  784. Step 2:
  785. Now scan this big bucket [ss] so as to synthesise the
  786. sorted order for small buckets [t, ss] for all t,
  787. including, magically, the bucket [ss,ss] too.
  788. This will avoid doing Real Work in subsequent Step 1's.
  789. --*/
  790. {
  791. for (j = 0; j <= 255; j++) {
  792. copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
  793. copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
  794. }
  795. for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
  796. k = ptr[j]-1; if (k < 0) k += nblock;
  797. c1 = block[k];
  798. if (!bigDone[c1])
  799. ptr[ copyStart[c1]++ ] = k;
  800. }
  801. for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
  802. k = ptr[j]-1; if (k < 0) k += nblock;
  803. c1 = block[k];
  804. if (!bigDone[c1])
  805. ptr[ copyEnd[c1]-- ] = k;
  806. }
  807. }
  808. AssertH ( (copyStart[ss]-1 == copyEnd[ss])
  809. ||
  810. /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
  811. Necessity for this case is demonstrated by compressing
  812. a sequence of approximately 48.5 million of character
  813. 251; 1.0.0/1.0.1 will then die here. */
  814. (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
  815. 1007 )
  816. for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
  817. /*--
  818. Step 3:
  819. The [ss] big bucket is now done. Record this fact,
  820. and update the quadrant descriptors. Remember to
  821. update quadrants in the overshoot area too, if
  822. necessary. The "if (i < 255)" test merely skips
  823. this updating for the last bucket processed, since
  824. updating for the last bucket is pointless.
  825. The quadrant array provides a way to incrementally
  826. cache sort orderings, as they appear, so as to
  827. make subsequent comparisons in fullGtU() complete
  828. faster. For repetitive blocks this makes a big
  829. difference (but not big enough to be able to avoid
  830. the fallback sorting mechanism, exponential radix sort).
  831. The precise meaning is: at all times:
  832. for 0 <= i < nblock and 0 <= j <= nblock
  833. if block[i] != block[j],
  834. then the relative values of quadrant[i] and
  835. quadrant[j] are meaningless.
  836. else {
  837. if quadrant[i] < quadrant[j]
  838. then the string starting at i lexicographically
  839. precedes the string starting at j
  840. else if quadrant[i] > quadrant[j]
  841. then the string starting at j lexicographically
  842. precedes the string starting at i
  843. else
  844. the relative ordering of the strings starting
  845. at i and j has not yet been determined.
  846. }
  847. --*/
  848. bigDone[ss] = True;
  849. if (i < 255) {
  850. Int32 bbStart = ftab[ss << 8] & CLEARMASK;
  851. Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
  852. Int32 shifts = 0;
  853. while ((bbSize >> shifts) > 65534) shifts++;
  854. for (j = bbSize-1; j >= 0; j--) {
  855. Int32 a2update = ptr[bbStart + j];
  856. UInt16 qVal = (UInt16)(j >> shifts);
  857. quadrant[a2update] = qVal;
  858. if (a2update < BZ_N_OVERSHOOT)
  859. quadrant[a2update + nblock] = qVal;
  860. }
  861. AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
  862. }
  863. }
  864. if (verb >= 4)
  865. VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
  866. nblock, numQSorted, nblock - numQSorted );
  867. }
  868. #undef BIGFREQ
  869. #undef SETMASK
  870. #undef CLEARMASK
  871. /*---------------------------------------------*/
  872. /* Pre:
  873. nblock > 0
  874. arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
  875. ((UChar*)arr2) [0 .. nblock-1] holds block
  876. arr1 exists for [0 .. nblock-1]
  877. Post:
  878. ((UChar*)arr2) [0 .. nblock-1] holds block
  879. All other areas of block destroyed
  880. ftab [ 0 .. 65536 ] destroyed
  881. arr1 [0 .. nblock-1] holds sorted order
  882. */
  883. void BZ2_blockSort ( EState* s )
  884. {
  885. UInt32* ptr = s->ptr;
  886. UChar* block = s->block;
  887. UInt32* ftab = s->ftab;
  888. Int32 nblock = s->nblock;
  889. Int32 verb = s->verbosity;
  890. Int32 wfact = s->workFactor;
  891. UInt16* quadrant;
  892. Int32 budget;
  893. Int32 budgetInit;
  894. Int32 i;
  895. if (nblock < 10000) {
  896. fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
  897. } else {
  898. /* Calculate the location for quadrant, remembering to get
  899. the alignment right. Assumes that &(block[0]) is at least
  900. 2-byte aligned -- this should be ok since block is really
  901. the first section of arr2.
  902. */
  903. i = nblock+BZ_N_OVERSHOOT;
  904. if (i & 1) i++;
  905. quadrant = (UInt16*)(&(block[i]));
  906. /* (wfact-1) / 3 puts the default-factor-30
  907. transition point at very roughly the same place as
  908. with v0.1 and v0.9.0.
  909. Not that it particularly matters any more, since the
  910. resulting compressed stream is now the same regardless
  911. of whether or not we use the main sort or fallback sort.
  912. */
  913. if (wfact < 1 ) wfact = 1;
  914. if (wfact > 100) wfact = 100;
  915. budgetInit = nblock * ((wfact-1) / 3);
  916. budget = budgetInit;
  917. mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
  918. if (verb >= 3)
  919. VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
  920. budgetInit - budget,
  921. nblock,
  922. (float)(budgetInit - budget) /
  923. (float)(nblock==0 ? 1 : nblock) );
  924. if (budget < 0) {
  925. if (verb >= 2)
  926. VPrintf0 ( " too repetitive; using fallback"
  927. " sorting algorithm\n" );
  928. fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
  929. }
  930. }
  931. s->origPtr = -1;
  932. for (i = 0; i < s->nblock; i++)
  933. if (ptr[i] == 0)
  934. { s->origPtr = i; break; };
  935. AssertH( s->origPtr != -1, 1003 );
  936. }
  937. /*-------------------------------------------------------------*/
  938. /*--- end blocksort.c ---*/
  939. /*-------------------------------------------------------------*/