/src/idx-opt.c

https://github.com/tnorth/PyTables · C · 1168 lines · 969 code · 110 blank · 89 comment · 274 complexity · 68314394fa40b54d227a3f84bc096278 MD5 · raw file

  1. #include "stdio.h"
  2. #include "idx-opt.h"
  3. /*-------------------------------------------------------------------------
  4. *
  5. * Binary search functions
  6. *
  7. *-------------------------------------------------------------------------
  8. */
  9. /*-------------------------------------------------------------------------
  10. * Function: bisect_{left,right}_optim_*
  11. *
  12. * Purpose: Look-up for a value in sorted arrays
  13. *
  14. * Return: The index of the value in array
  15. *
  16. * Programmer: Francesc Alted
  17. *
  18. * Date: August, 2005
  19. *
  20. * Comments:
  21. *
  22. * Modifications:
  23. *
  24. *-------------------------------------------------------------------------
  25. */
  26. /* Optimised version for left/int8 */
  27. int bisect_left_b(npy_int8 *a, long x, int hi, int offset) {
  28. int lo = 0;
  29. int mid;
  30. if (x <= a[offset]) return 0;
  31. if (a[hi-1+offset] < x) return hi;
  32. while (lo < hi) {
  33. mid = lo + (hi-lo)/2;
  34. if (a[mid+offset] < x) lo = mid+1;
  35. else hi = mid;
  36. }
  37. return lo;
  38. }
  39. /* Optimised version for left/uint8 */
  40. int bisect_left_ub(npy_uint8 *a, long x, int hi, int offset) {
  41. int lo = 0;
  42. int mid;
  43. if (x <= a[offset]) return 0;
  44. if (a[hi-1+offset] < x) return hi;
  45. while (lo < hi) {
  46. mid = lo + (hi-lo)/2;
  47. if (a[mid+offset] < x) lo = mid+1;
  48. else hi = mid;
  49. }
  50. return lo;
  51. }
  52. /* Optimised version for right/int8 */
  53. int bisect_right_b(npy_int8 *a, long x, int hi, int offset) {
  54. int lo = 0;
  55. int mid;
  56. if (x < a[offset]) return 0;
  57. if (a[hi-1+offset] <= x) return hi;
  58. while (lo < hi) {
  59. mid = lo + (hi-lo)/2;
  60. if (x < a[mid+offset]) hi = mid;
  61. else lo = mid+1;
  62. }
  63. return lo;
  64. }
  65. /* Optimised version for right/uint8 */
  66. int bisect_right_ub(npy_uint8 *a, long x, int hi, int offset) {
  67. int lo = 0;
  68. int mid;
  69. if (x < a[offset]) return 0;
  70. if (a[hi-1+offset] <= x) return hi;
  71. while (lo < hi) {
  72. mid = lo + (hi-lo)/2;
  73. if (x < a[mid+offset]) hi = mid;
  74. else lo = mid+1;
  75. }
  76. return lo;
  77. }
  78. /* Optimised version for left/int16 */
  79. int bisect_left_s(npy_int16 *a, long x, int hi, int offset) {
  80. int lo = 0;
  81. int mid;
  82. if (x <= a[offset]) return 0;
  83. if (a[hi-1+offset] < x) return hi;
  84. while (lo < hi) {
  85. mid = lo + (hi-lo)/2;
  86. if (a[mid+offset] < x) lo = mid+1;
  87. else hi = mid;
  88. }
  89. return lo;
  90. }
  91. /* Optimised version for left/uint16 */
  92. int bisect_left_us(npy_uint16 *a, long x, int hi, int offset) {
  93. int lo = 0;
  94. int mid;
  95. if (x <= a[offset]) return 0;
  96. if (a[hi-1+offset] < x) return hi;
  97. while (lo < hi) {
  98. mid = lo + (hi-lo)/2;
  99. if (a[mid+offset] < x) lo = mid+1;
  100. else hi = mid;
  101. }
  102. return lo;
  103. }
  104. /* Optimised version for right/int16 */
  105. int bisect_right_s(npy_int16 *a, long x, int hi, int offset) {
  106. int lo = 0;
  107. int mid;
  108. if (x < a[offset]) return 0;
  109. if (a[hi-1+offset] <= x) return hi;
  110. while (lo < hi) {
  111. mid = lo + (hi-lo)/2;
  112. if (x < a[mid+offset]) hi = mid;
  113. else lo = mid+1;
  114. }
  115. return lo;
  116. }
  117. /* Optimised version for right/uint16 */
  118. int bisect_right_us(npy_uint16 *a, long x, int hi, int offset) {
  119. int lo = 0;
  120. int mid;
  121. if (x < a[offset]) return 0;
  122. if (a[hi-1+offset] <= x) return hi;
  123. while (lo < hi) {
  124. mid = lo + (hi-lo)/2;
  125. if (x < a[mid+offset]) hi = mid;
  126. else lo = mid+1;
  127. }
  128. return lo;
  129. }
  130. /* Optimised version for left/int32 */
  131. int bisect_left_i(npy_int32 *a, long x, int hi, int offset) {
  132. int lo = 0;
  133. int mid;
  134. if (x <= a[offset]) return 0;
  135. if (a[hi-1+offset] < x) return hi;
  136. while (lo < hi) {
  137. mid = lo + (hi-lo)/2;
  138. if (a[mid+offset] < x) lo = mid+1;
  139. else hi = mid;
  140. }
  141. return lo;
  142. }
  143. /* Optimised version for left/uint32 */
  144. int bisect_left_ui(npy_uint32 *a, npy_uint32 x, int hi, int offset) {
  145. int lo = 0;
  146. int mid;
  147. if (x <= a[offset]) return 0;
  148. if (a[hi-1+offset] < x) return hi;
  149. while (lo < hi) {
  150. mid = lo + (hi-lo)/2;
  151. if (a[mid+offset] < x) lo = mid+1;
  152. else hi = mid;
  153. }
  154. return lo;
  155. }
  156. /* Optimised version for right/int32 */
  157. int bisect_right_i(npy_int32 *a, long x, int hi, int offset) {
  158. int lo = 0;
  159. int mid;
  160. if (x < a[offset]) return 0;
  161. if (a[hi-1+offset] <= x) return hi;
  162. while (lo < hi) {
  163. mid = lo + (hi-lo)/2;
  164. if (x < a[mid+offset]) hi = mid;
  165. else lo = mid+1;
  166. }
  167. return lo;
  168. }
  169. /* Optimised version for right/uint32 */
  170. int bisect_right_ui(npy_uint32 *a, npy_uint32 x, int hi, int offset) {
  171. int lo = 0;
  172. int mid;
  173. if (x < a[offset]) return 0;
  174. if (a[hi-1+offset] <= x) return hi;
  175. while (lo < hi) {
  176. mid = lo + (hi-lo)/2;
  177. if (x < a[mid+offset]) hi = mid;
  178. else lo = mid+1;
  179. }
  180. return lo;
  181. }
  182. /* Optimised version for left/int64 */
  183. int bisect_left_ll(npy_int64 *a, npy_int64 x, int hi, int offset) {
  184. int lo = 0;
  185. int mid;
  186. if (x <= a[offset]) return 0;
  187. if (a[hi-1+offset] < x) return hi;
  188. while (lo < hi) {
  189. mid = lo + (hi-lo)/2;
  190. if (a[mid+offset] < x) lo = mid+1;
  191. else hi = mid;
  192. }
  193. return lo;
  194. }
  195. /* Optimised version for left/uint64 */
  196. int bisect_left_ull(npy_uint64 *a, npy_uint64 x, int hi, int offset) {
  197. int lo = 0;
  198. int mid;
  199. if (x <= a[offset]) return 0;
  200. if (a[hi-1+offset] < x) return hi;
  201. while (lo < hi) {
  202. mid = lo + (hi-lo)/2;
  203. if (a[mid+offset] < x) lo = mid+1;
  204. else hi = mid;
  205. }
  206. return lo;
  207. }
  208. /* Optimised version for right/int64 */
  209. int bisect_right_ll(npy_int64 *a, npy_int64 x, int hi, int offset) {
  210. int lo = 0;
  211. int mid;
  212. if (x < a[offset]) return 0;
  213. if (a[hi-1+offset] <= x) return hi;
  214. while (lo < hi) {
  215. mid = lo + (hi-lo)/2;
  216. if (x < a[mid+offset]) hi = mid;
  217. else lo = mid+1;
  218. }
  219. return lo;
  220. }
  221. /* Optimised version for right/uint64 */
  222. int bisect_right_ull(npy_uint64 *a, npy_uint64 x, int hi, int offset) {
  223. int lo = 0;
  224. int mid;
  225. if (x < a[offset]) return 0;
  226. if (a[hi-1+offset] <= x) return hi;
  227. while (lo < hi) {
  228. mid = lo + (hi-lo)/2;
  229. if (x < a[mid+offset]) hi = mid;
  230. else lo = mid+1;
  231. }
  232. return lo;
  233. }
  234. /* Optimised version for left/float32 */
  235. int bisect_left_f(npy_float32 *a, npy_float64 x, int hi, int offset) {
  236. int lo = 0;
  237. int mid;
  238. if (x <= a[offset]) return 0;
  239. if (a[hi-1+offset] < x) return hi;
  240. while (lo < hi) {
  241. mid = lo + (hi-lo)/2;
  242. if (a[mid+offset] < x) lo = mid+1;
  243. else hi = mid;
  244. }
  245. return lo;
  246. }
  247. /* Optimised version for right/float32 */
  248. int bisect_right_f(npy_float32 *a, npy_float64 x, int hi, int offset) {
  249. int lo = 0;
  250. int mid;
  251. if (x < a[offset]) return 0;
  252. if (a[hi-1+offset] <= x) return hi;
  253. while (lo < hi) {
  254. mid = lo + (hi-lo)/2;
  255. if (x < a[mid+offset]) hi = mid;
  256. else lo = mid+1;
  257. }
  258. return lo;
  259. }
  260. /* Optimised version for left/float64 */
  261. int bisect_left_d(npy_float64 *a, npy_float64 x, int hi, int offset) {
  262. int lo = 0;
  263. int mid;
  264. if (x <= a[offset]) return 0;
  265. if (a[hi-1+offset] < x) return hi;
  266. while (lo < hi) {
  267. mid = lo + (hi-lo)/2;
  268. if (a[mid+offset] < x) lo = mid+1;
  269. else hi = mid;
  270. }
  271. return lo;
  272. }
  273. /* Optimised version for left/float64 */
  274. int bisect_right_d(npy_float64 *a, npy_float64 x, int hi, int offset) {
  275. int lo = 0;
  276. int mid;
  277. if (x < a[offset]) return 0;
  278. if (a[hi-1+offset] <= x) return hi;
  279. while (lo < hi) {
  280. mid = lo + (hi-lo)/2;
  281. if (x < a[mid+offset]) hi = mid;
  282. else lo = mid+1;
  283. }
  284. return lo;
  285. }
  286. /* Now, it follows a series of functions for doing in-place sorting.
  287. The array that starts at start1 is sorted in-place. array2 is also
  288. sorted in-place, but following the array1 order.
  289. */
  290. #define PYA_QS_STACK 100
  291. #define SMALL_QUICKSORT 15
  292. #define SMALL_STRING 16
  293. #define SWAP(a,b) {SWAP_temp = (b); (b) = (a); (a) = SWAP_temp;}
  294. /* This is for swaping strings */
  295. static void
  296. sSWAP(char *s1, char *s2, size_t len)
  297. {
  298. while(len--) {
  299. const char t = *s1;
  300. *s1++ = *s2;
  301. *s2++ = t;
  302. }
  303. }
  304. /* The iSWAP macro is safe because it will always work on aligned ints */
  305. #define iSWAP(a,b) { \
  306. switch(ts) { \
  307. case 8: \
  308. *(npy_int64 *)iSWAP_temp = *(npy_int64 *)(b); \
  309. *(npy_int64 *)(b) = *(npy_int64 *)(a); \
  310. *(npy_int64 *)(a) = *(npy_int64 *)iSWAP_temp; \
  311. break; \
  312. case 4: \
  313. *(npy_int32 *)iSWAP_temp = *(npy_int32 *)(b); \
  314. *(npy_int32 *)(b) = *(npy_int32 *)(a); \
  315. *(npy_int32 *)(a) = *(npy_int32 *)iSWAP_temp; \
  316. break; \
  317. case 1: \
  318. *(npy_int8 *)iSWAP_temp = *(npy_int8 *)(b); \
  319. *(npy_int8 *)(b) = *(npy_int8 *)(a); \
  320. *(npy_int8 *)(a) = *(npy_int8 *)iSWAP_temp; \
  321. break; \
  322. case 2: \
  323. *(npy_int16 *)iSWAP_temp = *(npy_int16 *)(b); \
  324. *(npy_int16 *)(b) = *(npy_int16 *)(a); \
  325. *(npy_int16 *)(a) = *(npy_int16 *)iSWAP_temp; \
  326. break; \
  327. default: \
  328. for (i=0; i<ts; i++) { \
  329. ((char *)iSWAP_temp)[i] = ((char *)(b))[i]; \
  330. ((char *)(b))[i] = ((char *)(a))[i]; \
  331. ((char *)(a))[i] = ((char *)(iSWAP_temp))[i]; \
  332. } \
  333. } \
  334. }
  335. /* #define opt_memcpy(a,b,n) memcpy((a),(b),(n)) */
  336. /* For small values of n, a loop seems faster for memcpy.
  337. See http://www.mail-archive.com/numpy-discussion@scipy.org/msg06639.html
  338. and other messages in the same thread
  339. F. Alted 2008-02-11
  340. */
  341. static void
  342. opt_memcpy(char *s1, char *s2, size_t len)
  343. {
  344. size_t i;
  345. if (len < SMALL_STRING) {
  346. for (i=0; i < len; i++) {
  347. s1[i] = s2[i];
  348. }
  349. }
  350. else {
  351. memcpy(s1, s2, len);
  352. }
  353. }
  354. static int
  355. opt_strncmp(char *a, char *b, size_t n)
  356. {
  357. size_t i;
  358. unsigned char c, d;
  359. for (i = 0; i < n; i++) {
  360. c = a[i]; d = b[i];
  361. if (c != d) return c - d;
  362. }
  363. return 0;
  364. }
  365. int keysort_f64(npy_float64 *start1, char *start2, npy_intp num, int ts)
  366. {
  367. npy_float64 *pl = start1;
  368. char *ipl = start2;
  369. npy_float64 *pr = start1 + num - 1;
  370. char *ipr = start2 + (num - 1) * ts;
  371. npy_float64 vp;
  372. char *ivp;
  373. npy_float64 SWAP_temp;
  374. char *iSWAP_temp;
  375. npy_float64 *stack[PYA_QS_STACK], **sptr = stack;
  376. char *istack[PYA_QS_STACK], **isptr = istack;
  377. npy_float64 *pm, *pi, *pj, *pt;
  378. char *ipm, *ipi, *ipj, *ipt;
  379. npy_intp i;
  380. ivp = malloc(ts);
  381. iSWAP_temp = malloc(ts);
  382. for(;;) {
  383. while ((pr - pl) > SMALL_QUICKSORT) {
  384. /* quicksort partition */
  385. pm = pl + ((pr - pl) >> 1); ipm = ipl + (((ipr - ipl)/ts) >> 1)*ts;
  386. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  387. if (*pr < *pm) { SWAP(*pr, *pm); iSWAP(ipr, ipm); }
  388. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  389. vp = *pm;
  390. pi = pl; ipi = ipl;
  391. pj = pr - 1; ipj = ipr - ts;
  392. SWAP(*pm, *pj); iSWAP(ipm, ipj);
  393. for(;;) {
  394. do { ++pi; ipi += ts; } while (*pi < vp);
  395. do { --pj; ipj -= ts; } while (vp < *pj);
  396. if (pi >= pj) break;
  397. SWAP(*pi, *pj); iSWAP(ipi, ipj);
  398. }
  399. SWAP(*pi, *(pr-1)); iSWAP(ipi, ipr-ts);
  400. /* push largest partition on stack */
  401. if (pi - pl < pr - pi) {
  402. *sptr++ = pi + 1; *isptr++ = ipi + ts;
  403. *sptr++ = pr; *isptr++ = ipr;
  404. pr = pi - 1; ipr = ipi - ts;
  405. }else{
  406. *sptr++ = pl; *isptr++ = ipl;
  407. *sptr++ = pi - 1; *isptr++ = ipi - ts;
  408. pl = pi + 1; ipl = ipi + ts;
  409. }
  410. }
  411. /* insertion sort */
  412. for(pi = pl + 1, ipi = ipl + ts; pi <= pr; ++pi, ipi += ts) {
  413. vp = *pi; opt_memcpy(ivp, ipi, ts);
  414. for(pj = pi, pt = pi - 1, ipj = ipi, ipt = ipi - ts; \
  415. pj > pl && vp < *pt;) {
  416. *pj-- = *pt--; opt_memcpy(ipj, ipt, ts); ipj -= ts; ipt -= ts;
  417. }
  418. *pj = vp; opt_memcpy(ipj, ivp, ts);
  419. }
  420. if (sptr == stack) break;
  421. pr = *(--sptr); ipr = *(--isptr);
  422. pl = *(--sptr); ipl = *(--isptr);
  423. }
  424. free(ivp);
  425. free(iSWAP_temp);
  426. return 0;
  427. }
  428. int keysort_f32(npy_float32 *start1, char *start2, npy_intp num, int ts)
  429. {
  430. npy_float32 *pl = start1;
  431. char *ipl = start2;
  432. npy_float32 *pr = start1 + num - 1;
  433. char *ipr = start2 + (num - 1) * ts;
  434. npy_float32 vp;
  435. char *ivp;
  436. npy_float32 SWAP_temp;
  437. char *iSWAP_temp;
  438. npy_float32 *stack[PYA_QS_STACK], **sptr = stack;
  439. char *istack[PYA_QS_STACK], **isptr = istack;
  440. npy_float32 *pm, *pi, *pj, *pt;
  441. char *ipm, *ipi, *ipj, *ipt;
  442. npy_intp i;
  443. ivp = malloc(ts);
  444. iSWAP_temp = malloc(ts);
  445. for(;;) {
  446. while ((pr - pl) > SMALL_QUICKSORT) {
  447. /* quicksort partition */
  448. pm = pl + ((pr - pl) >> 1); ipm = ipl + (((ipr - ipl)/ts) >> 1)*ts;
  449. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  450. if (*pr < *pm) { SWAP(*pr, *pm); iSWAP(ipr, ipm); }
  451. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  452. vp = *pm;
  453. pi = pl; ipi = ipl;
  454. pj = pr - 1; ipj = ipr - ts;
  455. SWAP(*pm, *pj); iSWAP(ipm, ipj);
  456. for(;;) {
  457. do { ++pi; ipi += ts; } while (*pi < vp);
  458. do { --pj; ipj -= ts; } while (vp < *pj);
  459. if (pi >= pj) break;
  460. SWAP(*pi, *pj); iSWAP(ipi, ipj);
  461. }
  462. SWAP(*pi, *(pr-1)); iSWAP(ipi, ipr-ts);
  463. /* push largest partition on stack */
  464. if (pi - pl < pr - pi) {
  465. *sptr++ = pi + 1; *isptr++ = ipi + ts;
  466. *sptr++ = pr; *isptr++ = ipr;
  467. pr = pi - 1; ipr = ipi - ts;
  468. }else{
  469. *sptr++ = pl; *isptr++ = ipl;
  470. *sptr++ = pi - 1; *isptr++ = ipi - ts;
  471. pl = pi + 1; ipl = ipi + ts;
  472. }
  473. }
  474. /* insertion sort */
  475. for(pi = pl + 1, ipi = ipl + ts; pi <= pr; ++pi, ipi += ts) {
  476. vp = *pi; opt_memcpy(ivp, ipi, ts);
  477. for(pj = pi, pt = pi - 1, ipj = ipi, ipt = ipi - ts; \
  478. pj > pl && vp < *pt;) {
  479. *pj-- = *pt--; opt_memcpy(ipj, ipt, ts); ipj -= ts; ipt -= ts;
  480. }
  481. *pj = vp; opt_memcpy(ipj, ivp, ts);
  482. }
  483. if (sptr == stack) break;
  484. pr = *(--sptr); ipr = *(--isptr);
  485. pl = *(--sptr); ipl = *(--isptr);
  486. }
  487. free(ivp);
  488. free(iSWAP_temp);
  489. return 0;
  490. }
  491. int keysort_i64(npy_int64 *start1, char *start2, npy_intp num, int ts)
  492. {
  493. npy_int64 *pl = start1;
  494. char *ipl = start2;
  495. npy_int64 *pr = start1 + num - 1;
  496. char *ipr = start2 + (num - 1) * ts;
  497. npy_int64 vp;
  498. char *ivp;
  499. npy_int64 SWAP_temp;
  500. char *iSWAP_temp;
  501. npy_int64 *stack[PYA_QS_STACK], **sptr = stack;
  502. char *istack[PYA_QS_STACK], **isptr = istack;
  503. npy_int64 *pm, *pi, *pj, *pt;
  504. char *ipm, *ipi, *ipj, *ipt;
  505. npy_intp i;
  506. ivp = malloc(ts);
  507. iSWAP_temp = malloc(ts);
  508. for(;;) {
  509. while ((pr - pl) > SMALL_QUICKSORT) {
  510. /* quicksort partition */
  511. pm = pl + ((pr - pl) >> 1); ipm = ipl + (((ipr - ipl)/ts) >> 1)*ts;
  512. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  513. if (*pr < *pm) { SWAP(*pr, *pm); iSWAP(ipr, ipm); }
  514. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  515. vp = *pm;
  516. pi = pl; ipi = ipl;
  517. pj = pr - 1; ipj = ipr - ts;
  518. SWAP(*pm, *pj); iSWAP(ipm, ipj);
  519. for(;;) {
  520. do { ++pi; ipi += ts; } while (*pi < vp);
  521. do { --pj; ipj -= ts; } while (vp < *pj);
  522. if (pi >= pj) break;
  523. SWAP(*pi, *pj); iSWAP(ipi, ipj);
  524. }
  525. SWAP(*pi, *(pr-1)); iSWAP(ipi, ipr-ts);
  526. /* push largest partition on stack */
  527. if (pi - pl < pr - pi) {
  528. *sptr++ = pi + 1; *isptr++ = ipi + ts;
  529. *sptr++ = pr; *isptr++ = ipr;
  530. pr = pi - 1; ipr = ipi - ts;
  531. }else{
  532. *sptr++ = pl; *isptr++ = ipl;
  533. *sptr++ = pi - 1; *isptr++ = ipi - ts;
  534. pl = pi + 1; ipl = ipi + ts;
  535. }
  536. }
  537. /* insertion sort */
  538. for(pi = pl + 1, ipi = ipl + ts; pi <= pr; ++pi, ipi += ts) {
  539. vp = *pi; opt_memcpy(ivp, ipi, ts);
  540. for(pj = pi, pt = pi - 1, ipj = ipi, ipt = ipi - ts; \
  541. pj > pl && vp < *pt;) {
  542. *pj-- = *pt--; opt_memcpy(ipj, ipt, ts); ipj -= ts; ipt -= ts;
  543. }
  544. *pj = vp; opt_memcpy(ipj, ivp, ts);
  545. }
  546. if (sptr == stack) break;
  547. pr = *(--sptr); ipr = *(--isptr);
  548. pl = *(--sptr); ipl = *(--isptr);
  549. }
  550. free(ivp);
  551. free(iSWAP_temp);
  552. return 0;
  553. }
  554. int keysort_u64(npy_uint64 *start1, char *start2, npy_intp num, int ts)
  555. {
  556. npy_uint64 *pl = start1;
  557. char *ipl = start2;
  558. npy_uint64 *pr = start1 + num - 1;
  559. char *ipr = start2 + (num - 1) * ts;
  560. npy_uint64 vp;
  561. char *ivp;
  562. npy_uint64 SWAP_temp;
  563. char *iSWAP_temp;
  564. npy_uint64 *stack[PYA_QS_STACK], **sptr = stack;
  565. char *istack[PYA_QS_STACK], **isptr = istack;
  566. npy_uint64 *pm, *pi, *pj, *pt;
  567. char *ipm, *ipi, *ipj, *ipt;
  568. npy_intp i;
  569. ivp = malloc(ts);
  570. iSWAP_temp = malloc(ts);
  571. for(;;) {
  572. while ((pr - pl) > SMALL_QUICKSORT) {
  573. /* quicksort partition */
  574. pm = pl + ((pr - pl) >> 1); ipm = ipl + (((ipr - ipl)/ts) >> 1)*ts;
  575. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  576. if (*pr < *pm) { SWAP(*pr, *pm); iSWAP(ipr, ipm); }
  577. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  578. vp = *pm;
  579. pi = pl; ipi = ipl;
  580. pj = pr - 1; ipj = ipr - ts;
  581. SWAP(*pm, *pj); iSWAP(ipm, ipj);
  582. for(;;) {
  583. do { ++pi; ipi += ts; } while (*pi < vp);
  584. do { --pj; ipj -= ts; } while (vp < *pj);
  585. if (pi >= pj) break;
  586. SWAP(*pi, *pj); iSWAP(ipi, ipj);
  587. }
  588. SWAP(*pi, *(pr-1)); iSWAP(ipi, ipr-ts);
  589. /* push largest partition on stack */
  590. if (pi - pl < pr - pi) {
  591. *sptr++ = pi + 1; *isptr++ = ipi + ts;
  592. *sptr++ = pr; *isptr++ = ipr;
  593. pr = pi - 1; ipr = ipi - ts;
  594. }else{
  595. *sptr++ = pl; *isptr++ = ipl;
  596. *sptr++ = pi - 1; *isptr++ = ipi - ts;
  597. pl = pi + 1; ipl = ipi + ts;
  598. }
  599. }
  600. /* insertion sort */
  601. for(pi = pl + 1, ipi = ipl + ts; pi <= pr; ++pi, ipi += ts) {
  602. vp = *pi; opt_memcpy(ivp, ipi, ts);
  603. for(pj = pi, pt = pi - 1, ipj = ipi, ipt = ipi - ts; \
  604. pj > pl && vp < *pt;) {
  605. *pj-- = *pt--; opt_memcpy(ipj, ipt, ts); ipj -= ts; ipt -= ts;
  606. }
  607. *pj = vp; opt_memcpy(ipj, ivp, ts);
  608. }
  609. if (sptr == stack) break;
  610. pr = *(--sptr); ipr = *(--isptr);
  611. pl = *(--sptr); ipl = *(--isptr);
  612. }
  613. free(ivp);
  614. free(iSWAP_temp);
  615. return 0;
  616. }
  617. int keysort_i32(npy_int32 *start1, char *start2, npy_intp num, int ts)
  618. {
  619. npy_int32 *pl = start1;
  620. char *ipl = start2;
  621. npy_int32 *pr = start1 + num - 1;
  622. char *ipr = start2 + (num - 1) * ts;
  623. npy_int32 vp;
  624. char *ivp;
  625. npy_int32 SWAP_temp;
  626. char *iSWAP_temp;
  627. npy_int32 *stack[PYA_QS_STACK], **sptr = stack;
  628. char *istack[PYA_QS_STACK], **isptr = istack;
  629. npy_int32 *pm, *pi, *pj, *pt;
  630. char *ipm, *ipi, *ipj, *ipt;
  631. npy_intp i;
  632. ivp = malloc(ts);
  633. iSWAP_temp = malloc(ts);
  634. for(;;) {
  635. while ((pr - pl) > SMALL_QUICKSORT) {
  636. /* quicksort partition */
  637. pm = pl + ((pr - pl) >> 1); ipm = ipl + (((ipr - ipl)/ts) >> 1)*ts;
  638. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  639. if (*pr < *pm) { SWAP(*pr, *pm); iSWAP(ipr, ipm); }
  640. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  641. vp = *pm;
  642. pi = pl; ipi = ipl;
  643. pj = pr - 1; ipj = ipr - ts;
  644. SWAP(*pm, *pj); iSWAP(ipm, ipj);
  645. for(;;) {
  646. do { ++pi; ipi += ts; } while (*pi < vp);
  647. do { --pj; ipj -= ts; } while (vp < *pj);
  648. if (pi >= pj) break;
  649. SWAP(*pi, *pj); iSWAP(ipi, ipj);
  650. }
  651. SWAP(*pi, *(pr-1)); iSWAP(ipi, ipr-ts);
  652. /* push largest partition on stack */
  653. if (pi - pl < pr - pi) {
  654. *sptr++ = pi + 1; *isptr++ = ipi + ts;
  655. *sptr++ = pr; *isptr++ = ipr;
  656. pr = pi - 1; ipr = ipi - ts;
  657. }else{
  658. *sptr++ = pl; *isptr++ = ipl;
  659. *sptr++ = pi - 1; *isptr++ = ipi - ts;
  660. pl = pi + 1; ipl = ipi + ts;
  661. }
  662. }
  663. /* insertion sort */
  664. for(pi = pl + 1, ipi = ipl + ts; pi <= pr; ++pi, ipi += ts) {
  665. vp = *pi; opt_memcpy(ivp, ipi, ts);
  666. for(pj = pi, pt = pi - 1, ipj = ipi, ipt = ipi - ts; \
  667. pj > pl && vp < *pt;) {
  668. *pj-- = *pt--; opt_memcpy(ipj, ipt, ts); ipj -= ts; ipt -= ts;
  669. }
  670. *pj = vp; opt_memcpy(ipj, ivp, ts);
  671. }
  672. if (sptr == stack) break;
  673. pr = *(--sptr); ipr = *(--isptr);
  674. pl = *(--sptr); ipl = *(--isptr);
  675. }
  676. free(ivp);
  677. free(iSWAP_temp);
  678. return 0;
  679. }
  680. int keysort_u32(npy_uint32 *start1, char *start2, npy_intp num, int ts)
  681. {
  682. npy_uint32 *pl = start1;
  683. char *ipl = start2;
  684. npy_uint32 *pr = start1 + num - 1;
  685. char *ipr = start2 + (num - 1) * ts;
  686. npy_uint32 vp;
  687. char *ivp;
  688. npy_uint32 SWAP_temp;
  689. char *iSWAP_temp;
  690. npy_uint32 *stack[PYA_QS_STACK], **sptr = stack;
  691. char *istack[PYA_QS_STACK], **isptr = istack;
  692. npy_uint32 *pm, *pi, *pj, *pt;
  693. char *ipm, *ipi, *ipj, *ipt;
  694. npy_intp i;
  695. ivp = malloc(ts);
  696. iSWAP_temp = malloc(ts);
  697. for(;;) {
  698. while ((pr - pl) > SMALL_QUICKSORT) {
  699. /* quicksort partition */
  700. pm = pl + ((pr - pl) >> 1); ipm = ipl + (((ipr - ipl)/ts) >> 1)*ts;
  701. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  702. if (*pr < *pm) { SWAP(*pr, *pm); iSWAP(ipr, ipm); }
  703. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  704. vp = *pm;
  705. pi = pl; ipi = ipl;
  706. pj = pr - 1; ipj = ipr - ts;
  707. SWAP(*pm, *pj); iSWAP(ipm, ipj);
  708. for(;;) {
  709. do { ++pi; ipi += ts; } while (*pi < vp);
  710. do { --pj; ipj -= ts; } while (vp < *pj);
  711. if (pi >= pj) break;
  712. SWAP(*pi, *pj); iSWAP(ipi, ipj);
  713. }
  714. SWAP(*pi, *(pr-1)); iSWAP(ipi, ipr-ts);
  715. /* push largest partition on stack */
  716. if (pi - pl < pr - pi) {
  717. *sptr++ = pi + 1; *isptr++ = ipi + ts;
  718. *sptr++ = pr; *isptr++ = ipr;
  719. pr = pi - 1; ipr = ipi - ts;
  720. }else{
  721. *sptr++ = pl; *isptr++ = ipl;
  722. *sptr++ = pi - 1; *isptr++ = ipi - ts;
  723. pl = pi + 1; ipl = ipi + ts;
  724. }
  725. }
  726. /* insertion sort */
  727. for(pi = pl + 1, ipi = ipl + ts; pi <= pr; ++pi, ipi += ts) {
  728. vp = *pi; opt_memcpy(ivp, ipi, ts);
  729. for(pj = pi, pt = pi - 1, ipj = ipi, ipt = ipi - ts; \
  730. pj > pl && vp < *pt;) {
  731. *pj-- = *pt--; opt_memcpy(ipj, ipt, ts); ipj -= ts; ipt -= ts;
  732. }
  733. *pj = vp; opt_memcpy(ipj, ivp, ts);
  734. }
  735. if (sptr == stack) break;
  736. pr = *(--sptr); ipr = *(--isptr);
  737. pl = *(--sptr); ipl = *(--isptr);
  738. }
  739. free(ivp);
  740. free(iSWAP_temp);
  741. return 0;
  742. }
  743. int keysort_i16(npy_int16 *start1, char *start2, npy_intp num, int ts)
  744. {
  745. npy_int16 *pl = start1;
  746. char *ipl = start2;
  747. npy_int16 *pr = start1 + num - 1;
  748. char *ipr = start2 + (num - 1) * ts;
  749. npy_int16 vp;
  750. char *ivp;
  751. npy_int16 SWAP_temp;
  752. char *iSWAP_temp;
  753. npy_int16 *stack[PYA_QS_STACK], **sptr = stack;
  754. char *istack[PYA_QS_STACK], **isptr = istack;
  755. npy_int16 *pm, *pi, *pj, *pt;
  756. char *ipm, *ipi, *ipj, *ipt;
  757. npy_intp i;
  758. ivp = malloc(ts);
  759. iSWAP_temp = malloc(ts);
  760. for(;;) {
  761. while ((pr - pl) > SMALL_QUICKSORT) {
  762. /* quicksort partition */
  763. pm = pl + ((pr - pl) >> 1); ipm = ipl + (((ipr - ipl)/ts) >> 1)*ts;
  764. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  765. if (*pr < *pm) { SWAP(*pr, *pm); iSWAP(ipr, ipm); }
  766. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  767. vp = *pm;
  768. pi = pl; ipi = ipl;
  769. pj = pr - 1; ipj = ipr - ts;
  770. SWAP(*pm, *pj); iSWAP(ipm, ipj);
  771. for(;;) {
  772. do { ++pi; ipi += ts; } while (*pi < vp);
  773. do { --pj; ipj -= ts; } while (vp < *pj);
  774. if (pi >= pj) break;
  775. SWAP(*pi, *pj); iSWAP(ipi, ipj);
  776. }
  777. SWAP(*pi, *(pr-1)); iSWAP(ipi, ipr-ts);
  778. /* push largest partition on stack */
  779. if (pi - pl < pr - pi) {
  780. *sptr++ = pi + 1; *isptr++ = ipi + ts;
  781. *sptr++ = pr; *isptr++ = ipr;
  782. pr = pi - 1; ipr = ipi - ts;
  783. }else{
  784. *sptr++ = pl; *isptr++ = ipl;
  785. *sptr++ = pi - 1; *isptr++ = ipi - ts;
  786. pl = pi + 1; ipl = ipi + ts;
  787. }
  788. }
  789. /* insertion sort */
  790. for(pi = pl + 1, ipi = ipl + ts; pi <= pr; ++pi, ipi += ts) {
  791. vp = *pi; opt_memcpy(ivp, ipi, ts);
  792. for(pj = pi, pt = pi - 1, ipj = ipi, ipt = ipi - ts; \
  793. pj > pl && vp < *pt;) {
  794. *pj-- = *pt--; opt_memcpy(ipj, ipt, ts); ipj -= ts; ipt -= ts;
  795. }
  796. *pj = vp; opt_memcpy(ipj, ivp, ts);
  797. }
  798. if (sptr == stack) break;
  799. pr = *(--sptr); ipr = *(--isptr);
  800. pl = *(--sptr); ipl = *(--isptr);
  801. }
  802. free(ivp);
  803. free(iSWAP_temp);
  804. return 0;
  805. }
  806. int keysort_u16(npy_uint16 *start1, char *start2, npy_intp num, int ts)
  807. {
  808. npy_uint16 *pl = start1;
  809. char *ipl = start2;
  810. npy_uint16 *pr = start1 + num - 1;
  811. char *ipr = start2 + (num - 1) * ts;
  812. npy_uint16 vp;
  813. char *ivp;
  814. npy_uint16 SWAP_temp;
  815. char *iSWAP_temp;
  816. npy_uint16 *stack[PYA_QS_STACK], **sptr = stack;
  817. char *istack[PYA_QS_STACK], **isptr = istack;
  818. npy_uint16 *pm, *pi, *pj, *pt;
  819. char *ipm, *ipi, *ipj, *ipt;
  820. npy_intp i;
  821. ivp = malloc(ts);
  822. iSWAP_temp = malloc(ts);
  823. for(;;) {
  824. while ((pr - pl) > SMALL_QUICKSORT) {
  825. /* quicksort partition */
  826. pm = pl + ((pr - pl) >> 1); ipm = ipl + (((ipr - ipl)/ts) >> 1)*ts;
  827. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  828. if (*pr < *pm) { SWAP(*pr, *pm); iSWAP(ipr, ipm); }
  829. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  830. vp = *pm;
  831. pi = pl; ipi = ipl;
  832. pj = pr - 1; ipj = ipr - ts;
  833. SWAP(*pm, *pj); iSWAP(ipm, ipj);
  834. for(;;) {
  835. do { ++pi; ipi += ts; } while (*pi < vp);
  836. do { --pj; ipj -= ts; } while (vp < *pj);
  837. if (pi >= pj) break;
  838. SWAP(*pi, *pj); iSWAP(ipi, ipj);
  839. }
  840. SWAP(*pi, *(pr-1)); iSWAP(ipi, ipr-ts);
  841. /* push largest partition on stack */
  842. if (pi - pl < pr - pi) {
  843. *sptr++ = pi + 1; *isptr++ = ipi + ts;
  844. *sptr++ = pr; *isptr++ = ipr;
  845. pr = pi - 1; ipr = ipi - ts;
  846. }else{
  847. *sptr++ = pl; *isptr++ = ipl;
  848. *sptr++ = pi - 1; *isptr++ = ipi - ts;
  849. pl = pi + 1; ipl = ipi + ts;
  850. }
  851. }
  852. /* insertion sort */
  853. for(pi = pl + 1, ipi = ipl + ts; pi <= pr; ++pi, ipi += ts) {
  854. vp = *pi; opt_memcpy(ivp, ipi, ts);
  855. for(pj = pi, pt = pi - 1, ipj = ipi, ipt = ipi - ts; \
  856. pj > pl && vp < *pt;) {
  857. *pj-- = *pt--; opt_memcpy(ipj, ipt, ts); ipj -= ts; ipt -= ts;
  858. }
  859. *pj = vp; opt_memcpy(ipj, ivp, ts);
  860. }
  861. if (sptr == stack) break;
  862. pr = *(--sptr); ipr = *(--isptr);
  863. pl = *(--sptr); ipl = *(--isptr);
  864. }
  865. free(ivp);
  866. free(iSWAP_temp);
  867. return 0;
  868. }
  869. int keysort_i8(npy_int8 *start1, char *start2, npy_intp num, int ts)
  870. {
  871. npy_int8 *pl = start1;
  872. char *ipl = start2;
  873. npy_int8 *pr = start1 + num - 1;
  874. char *ipr = start2 + (num - 1) * ts;
  875. npy_int8 vp;
  876. char *ivp;
  877. npy_int8 SWAP_temp;
  878. char *iSWAP_temp;
  879. npy_int8 *stack[PYA_QS_STACK], **sptr = stack;
  880. char *istack[PYA_QS_STACK], **isptr = istack;
  881. npy_int8 *pm, *pi, *pj, *pt;
  882. char *ipm, *ipi, *ipj, *ipt;
  883. npy_intp i;
  884. ivp = malloc(ts);
  885. iSWAP_temp = malloc(ts);
  886. for(;;) {
  887. while ((pr - pl) > SMALL_QUICKSORT) {
  888. /* quicksort partition */
  889. pm = pl + ((pr - pl) >> 1); ipm = ipl + (((ipr - ipl)/ts) >> 1)*ts;
  890. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  891. if (*pr < *pm) { SWAP(*pr, *pm); iSWAP(ipr, ipm); }
  892. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  893. vp = *pm;
  894. pi = pl; ipi = ipl;
  895. pj = pr - 1; ipj = ipr - ts;
  896. SWAP(*pm, *pj); iSWAP(ipm, ipj);
  897. for(;;) {
  898. do { ++pi; ipi += ts; } while (*pi < vp);
  899. do { --pj; ipj -= ts; } while (vp < *pj);
  900. if (pi >= pj) break;
  901. SWAP(*pi, *pj); iSWAP(ipi, ipj);
  902. }
  903. SWAP(*pi, *(pr-1)); iSWAP(ipi, ipr-ts);
  904. /* push largest partition on stack */
  905. if (pi - pl < pr - pi) {
  906. *sptr++ = pi + 1; *isptr++ = ipi + ts;
  907. *sptr++ = pr; *isptr++ = ipr;
  908. pr = pi - 1; ipr = ipi - ts;
  909. }else{
  910. *sptr++ = pl; *isptr++ = ipl;
  911. *sptr++ = pi - 1; *isptr++ = ipi - ts;
  912. pl = pi + 1; ipl = ipi + ts;
  913. }
  914. }
  915. /* insertion sort */
  916. for(pi = pl + 1, ipi = ipl + ts; pi <= pr; ++pi, ipi += ts) {
  917. vp = *pi; opt_memcpy(ivp, ipi, ts);
  918. for(pj = pi, pt = pi - 1, ipj = ipi, ipt = ipi - ts; \
  919. pj > pl && vp < *pt;) {
  920. *pj-- = *pt--; opt_memcpy(ipj, ipt, ts); ipj -= ts; ipt -= ts;
  921. }
  922. *pj = vp; opt_memcpy(ipj, ivp, ts);
  923. }
  924. if (sptr == stack) break;
  925. pr = *(--sptr); ipr = *(--isptr);
  926. pl = *(--sptr); ipl = *(--isptr);
  927. }
  928. free(ivp);
  929. free(iSWAP_temp);
  930. return 0;
  931. }
  932. int keysort_u8(npy_uint8 *start1, char *start2, npy_intp num, int ts)
  933. {
  934. npy_uint8 *pl = start1;
  935. char *ipl = start2;
  936. npy_uint8 *pr = start1 + num - 1;
  937. char *ipr = start2 + (num - 1) * ts;
  938. npy_uint8 vp;
  939. char *ivp;
  940. npy_uint8 SWAP_temp;
  941. char *iSWAP_temp;
  942. npy_uint8 *stack[PYA_QS_STACK], **sptr = stack;
  943. char *istack[PYA_QS_STACK], **isptr = istack;
  944. npy_uint8 *pm, *pi, *pj, *pt;
  945. char *ipm, *ipi, *ipj, *ipt;
  946. npy_intp i;
  947. ivp = malloc(ts);
  948. iSWAP_temp = malloc(ts);
  949. for(;;) {
  950. while ((pr - pl) > SMALL_QUICKSORT) {
  951. /* quicksort partition */
  952. pm = pl + ((pr - pl) >> 1); ipm = ipl + (((ipr - ipl)/ts) >> 1)*ts;
  953. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  954. if (*pr < *pm) { SWAP(*pr, *pm); iSWAP(ipr, ipm); }
  955. if (*pm < *pl) { SWAP(*pm, *pl); iSWAP(ipm, ipl); }
  956. vp = *pm;
  957. pi = pl; ipi = ipl;
  958. pj = pr - 1; ipj = ipr - ts;
  959. SWAP(*pm, *pj); iSWAP(ipm, ipj);
  960. for(;;) {
  961. do { ++pi; ipi += ts; } while (*pi < vp);
  962. do { --pj; ipj -= ts; } while (vp < *pj);
  963. if (pi >= pj) break;
  964. SWAP(*pi, *pj); iSWAP(ipi, ipj);
  965. }
  966. SWAP(*pi, *(pr-1)); iSWAP(ipi, ipr-ts);
  967. /* push largest partition on stack */
  968. if (pi - pl < pr - pi) {
  969. *sptr++ = pi + 1; *isptr++ = ipi + ts;
  970. *sptr++ = pr; *isptr++ = ipr;
  971. pr = pi - 1; ipr = ipi - ts;
  972. }else{
  973. *sptr++ = pl; *isptr++ = ipl;
  974. *sptr++ = pi - 1; *isptr++ = ipi - ts;
  975. pl = pi + 1; ipl = ipi + ts;
  976. }
  977. }
  978. /* insertion sort */
  979. for(pi = pl + 1, ipi = ipl + ts; pi <= pr; ++pi, ipi += ts) {
  980. vp = *pi; opt_memcpy(ivp, ipi, ts);
  981. for(pj = pi, pt = pi - 1, ipj = ipi, ipt = ipi - ts; \
  982. pj > pl && vp < *pt;) {
  983. *pj-- = *pt--; opt_memcpy(ipj, ipt, ts); ipj -= ts; ipt -= ts;
  984. }
  985. *pj = vp; opt_memcpy(ipj, ivp, ts);
  986. }
  987. if (sptr == stack) break;
  988. pr = *(--sptr); ipr = *(--isptr);
  989. pl = *(--sptr); ipl = *(--isptr);
  990. }
  991. free(ivp);
  992. free(iSWAP_temp);
  993. return 0;
  994. }
  995. int keysort_S(char *start1, int ss, char *start2, npy_intp num, int ts)
  996. {
  997. char *pl = start1;
  998. char *ipl = start2;
  999. char *pr = start1 + (num - 1) * ss;
  1000. char *ipr = start2 + (num - 1) * ts;
  1001. char *vp;
  1002. char *ivp;
  1003. char *iSWAP_temp;
  1004. char *stack[PYA_QS_STACK], **sptr = stack;
  1005. char *istack[PYA_QS_STACK], **isptr = istack;
  1006. char *pm, *pi, *pj, *pt;
  1007. char *ipm, *ipi, *ipj, *ipt;
  1008. npy_intp i;
  1009. vp = malloc(ss); ivp = malloc(ts);
  1010. iSWAP_temp = malloc(ts);
  1011. for(;;) {
  1012. while ((pr - pl) > SMALL_QUICKSORT*ss) {
  1013. /* quicksort partition */
  1014. pm = pl + (((pr-pl)/ss) >> 1)*ss; ipm = ipl + (((ipr-ipl)/ts) >> 1)*ts;
  1015. if (opt_strncmp(pm, pl, ss) < 0) { sSWAP(pm, pl, ss); iSWAP(ipm, ipl); }
  1016. if (opt_strncmp(pr, pm, ss) < 0) { sSWAP(pr, pm, ss); iSWAP(ipr, ipm); }
  1017. if (opt_strncmp(pm, pl, ss) < 0) { sSWAP(pm, pl, ss); iSWAP(ipm, ipl); }
  1018. opt_memcpy(vp, pm, ss);
  1019. pi = pl; ipi = ipl;
  1020. pj = pr - ss; ipj = ipr - ts;
  1021. sSWAP(pm, pj, ss); iSWAP(ipm, ipj);
  1022. for(;;) {
  1023. do { pi += ss; ipi += ts; } while (opt_strncmp(pi, vp, ss) < 0);
  1024. do { pj -= ss; ipj -= ts; } while (opt_strncmp(vp, pj, ss) < 0);
  1025. if (pi >= pj) break;
  1026. sSWAP(pi, pj, ss); iSWAP(ipi, ipj);
  1027. }
  1028. sSWAP(pi, pr-ss, ss); iSWAP(ipi, ipr-ts);
  1029. /* push largest partition on stack */
  1030. if (pi - pl < pr - pi) {
  1031. *sptr++ = pi + ss; *isptr++ = ipi + ts;
  1032. *sptr++ = pr; *isptr++ = ipr;
  1033. pr = pi - ss; ipr = ipi - ts;
  1034. }else{
  1035. *sptr++ = pl; *isptr++ = ipl;
  1036. *sptr++ = pi - ss; *isptr++ = ipi - ts;
  1037. pl = pi + ss; ipl = ipi + ts;
  1038. }
  1039. }
  1040. /* insertion sort */
  1041. for(pi = pl + ss, ipi = ipl + ts; pi <= pr; pi += ss, ipi += ts) {
  1042. opt_memcpy(vp, pi, ss); opt_memcpy(ivp, ipi, ts);
  1043. for(pj = pi, pt = pi - ss, ipj = ipi, ipt = ipi - ts; \
  1044. pj > pl && opt_strncmp(vp, pt, ss) < 0;) {
  1045. opt_memcpy(pj, pt, ss); opt_memcpy(ipj, ipt, ts);
  1046. pj -= ss; pt -= ss; ipj -= ts; ipt -= ts;
  1047. }
  1048. opt_memcpy(pj, vp, ss); opt_memcpy(ipj, ivp, ts);
  1049. }
  1050. if (sptr == stack) break;
  1051. pr = *(--sptr); ipr = *(--isptr);
  1052. pl = *(--sptr); ipl = *(--isptr);
  1053. }
  1054. free(vp); free(ivp);
  1055. free(iSWAP_temp);
  1056. return 0;
  1057. }