PageRenderTime 44ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/Build/source/texk/makeindexk/sortid.c

https://bitbucket.org/preining/tex-live
C | 310 lines | 182 code | 43 blank | 85 comment | 86 complexity | fd2db881fae86503fa8b818e9e1fc76a MD5 | raw file
  1. /*
  2. *
  3. * This file is part of
  4. * MakeIndex - A formatter and format independent index processor
  5. *
  6. * Copyright (C) 1998-2012 by the TeX Live project.
  7. * Copyright (C) 1989 by Chen & Harrison International Systems, Inc.
  8. * Copyright (C) 1988 by Olivetti Research Center
  9. * Copyright (C) 1987 by Regents of the University of California
  10. *
  11. * Author:
  12. * Pehong Chen
  13. * Chen & Harrison International Systems, Inc.
  14. * Palo Alto, California
  15. * USA
  16. *
  17. * Contributors:
  18. * Please refer to the CONTRIB file that comes with this release
  19. * for a list of people who have contributed to this and/or previous
  20. * release(s) of MakeIndex.
  21. *
  22. * All rights reserved by the copyright holders. See the copyright
  23. * notice distributed with this software for a complete description of
  24. * the conditions under which it is made available.
  25. *
  26. */
  27. #include "mkind.h"
  28. #include "qsort.h"
  29. #ifdef HAVE_LOCALE_H
  30. #include <locale.h>
  31. #endif
  32. static long idx_gc;
  33. static int check_mixsym (const char *x, const char *y);
  34. static int compare (const void *va, const void *vb);
  35. static int compare_one (const char *x, const char *y);
  36. static int compare_page (const FIELD_PTR *a, const FIELD_PTR *b);
  37. static int compare_string (const unsigned char *a, const unsigned char *b);
  38. static int new_strcmp (const unsigned char *a, const unsigned char *b,
  39. int option);
  40. void
  41. sort_idx(void)
  42. {
  43. #ifdef HAVE_SETLOCALE
  44. char *prev_locale;
  45. #endif
  46. MESSAGE("Sorting entries...");
  47. #ifdef HAVE_SETLOCALE
  48. prev_locale = setlocale(LC_COLLATE, NULL);
  49. setlocale(LC_COLLATE, "");
  50. #endif
  51. idx_dc = 0;
  52. idx_gc = 0L;
  53. qqsort(idx_key, idx_gt, sizeof(FIELD_PTR), compare);
  54. #ifdef HAVE_SETLOCALE
  55. setlocale(LC_COLLATE, prev_locale);
  56. #endif
  57. MESSAGE1("done (%ld comparisons).\n", idx_gc);
  58. }
  59. static int
  60. compare(const void *va, const void *vb)
  61. {
  62. const FIELD_PTR *a = va;
  63. const FIELD_PTR *b = vb;
  64. int i;
  65. int dif;
  66. idx_gc++;
  67. IDX_DOT(CMP_MAX);
  68. for (i = 0; i < FIELD_MAX; i++) {
  69. /* compare the sort fields */
  70. if ((dif = compare_one((*a)->sf[i], (*b)->sf[i])) != 0)
  71. break;
  72. /* compare the actual fields */
  73. if ((dif = compare_one((*a)->af[i], (*b)->af[i])) != 0)
  74. break;
  75. }
  76. /* both key aggregates are identical, compare page numbers */
  77. if (i == FIELD_MAX)
  78. dif = compare_page(a, b);
  79. return (dif);
  80. }
  81. static int
  82. compare_one(const char *x, const char *y)
  83. {
  84. int m;
  85. int n;
  86. if ((x[0] == NUL) && (y[0] == NUL))
  87. return (0);
  88. if (x[0] == NUL)
  89. return (-1);
  90. if (y[0] == NUL)
  91. return (1);
  92. m = group_type(x);
  93. n = group_type(y);
  94. /* both pure digits */
  95. if ((m >= 0) && (n >= 0))
  96. return (m - n);
  97. /* x digit, y non-digit */
  98. if (m >= 0) {
  99. if (german_sort)
  100. return (1);
  101. else
  102. return ((n == -1) ? 1 : -1);
  103. }
  104. /* x non-digit, y digit */
  105. if (n >= 0) {
  106. if (german_sort)
  107. return (-1);
  108. else
  109. return ((m == -1) ? -1 : 1);
  110. }
  111. /* strings started with a symbol (including digit) */
  112. if ((m == SYMBOL) && (n == SYMBOL))
  113. return (check_mixsym(x, y));
  114. /* x symbol, y non-symbol */
  115. if (m == SYMBOL)
  116. return (-1);
  117. /* x non-symbol, y symbol */
  118. if (n == SYMBOL)
  119. return (1);
  120. /* strings with a leading letter, the ALPHA type */
  121. return (compare_string((const unsigned char*)x, (const unsigned char*)y));
  122. }
  123. static int
  124. check_mixsym(const char *x, const char *y)
  125. {
  126. int m;
  127. int n;
  128. m = ISDIGIT(x[0]);
  129. n = ISDIGIT(y[0]);
  130. if (m && !n)
  131. return (1);
  132. if (!m && n)
  133. return (-1);
  134. return (locale_sort ? strcoll(x, y) : strcmp(x, y));
  135. }
  136. static int
  137. compare_string(const unsigned char *a, const unsigned char *b)
  138. {
  139. int i = 0;
  140. int j = 0;
  141. int al;
  142. int bl;
  143. if (locale_sort) return strcoll((const char *)a, (const char *)b);
  144. while ((a[i] != NUL) || (b[j] != NUL)) {
  145. if (a[i] == NUL)
  146. return (-1);
  147. if (b[j] == NUL)
  148. return (1);
  149. if (letter_ordering) {
  150. if (a[i] == SPC)
  151. i++;
  152. if (b[j] == SPC)
  153. j++;
  154. }
  155. al = TOLOWER(a[i]);
  156. bl = TOLOWER(b[j]);
  157. if (al != bl)
  158. return (al - bl);
  159. i++;
  160. j++;
  161. }
  162. if (german_sort)
  163. return (new_strcmp(a, b, GERMAN));
  164. else
  165. return (strcmp((const char*)a, (const char*)b));
  166. }
  167. static int
  168. compare_page(const FIELD_PTR *a, const FIELD_PTR *b)
  169. {
  170. int m = 0;
  171. short i = 0;
  172. while ((i < (*a)->count) && (i < (*b)->count) &&
  173. ((m = (*a)->npg[i] - (*b)->npg[i]) == 0))
  174. {
  175. i++;
  176. }
  177. if (m == 0)
  178. { /* common leading page numbers match */
  179. if ((i == (*a)->count) && (i == (*b)->count))
  180. { /* all page numbers match */
  181. /***********************************************************
  182. We have identical entries, except possibly in encap fields.
  183. The ordering is tricky here. Consider the following input
  184. sequence of index names, encaps, and page numbers:
  185. foo|( 2
  186. foo|) 6
  187. foo|( 6
  188. foo|) 10
  189. This might legimately occur when a page range ends, and
  190. subsequently, a new range starts, on the same page. If we
  191. just order by range_open and range_close (here, parens),
  192. then we will produce
  193. foo|( 2
  194. foo|( 6
  195. foo|) 6
  196. foo|) 10
  197. This will later generate the index entry
  198. foo, 2--6, \({6}, 10
  199. which is not only wrong, but has also introduced an illegal
  200. LaTeX macro, \({6}, because the merging step treated this
  201. like a \see{6} entry.
  202. The solution is to preserve the original input order, which
  203. we can do by treating range_open and range_close as equal,
  204. and then ordering by input line number. This will then
  205. generate the correct index entry
  206. foo, 2--10
  207. Ordering inconsistencies from missing range open or close
  208. entries, or mixing roman and arabic page numbers, will be
  209. detected later.
  210. ***********************************************************/
  211. #define isrange(c) ( ((c) == idx_ropen) || ((c) == idx_rclose) )
  212. /* Order two range values by input line number */
  213. if (isrange(*(*a)->encap) && isrange(*(*b)->encap))
  214. m = (*a)->lc - (*b)->lc;
  215. /* Handle identical encap fields; neither is a range delimiter */
  216. else if (STREQ((*a)->encap, (*b)->encap))
  217. {
  218. /* If neither are yet marked duplicate, mark the second
  219. of them to be ignored. */
  220. if (((*a)->type != DUPLICATE) &&
  221. ((*b)->type != DUPLICATE))
  222. (*b)->type = DUPLICATE;
  223. /* leave m == 0 to show equality */
  224. }
  225. /* Encap fields differ: only one may be a range delimiter, */
  226. /* or else neither of them is. If either of them is a range */
  227. /* delimiter, order by input line number; otherwise, order */
  228. /* by name. */
  229. else
  230. {
  231. if ( isrange(*(*a)->encap) || isrange(*(*b)->encap) )
  232. m = (*a)->lc - (*b)->lc; /* order by input line number */
  233. else /* order non-range items by */
  234. /* their encap strings */
  235. m = compare_string((const unsigned char*)((*a)->encap),
  236. (const unsigned char*)((*b)->encap));
  237. }
  238. }
  239. else if ((i == (*a)->count) && (i < (*b)->count))
  240. m = -1;
  241. else if ((i < (*a)->count) && (i == (*b)->count))
  242. m = 1;
  243. }
  244. return (m);
  245. }
  246. static int
  247. new_strcmp(const unsigned char *s1, const unsigned char *s2, int option)
  248. {
  249. int i;
  250. i = 0;
  251. while (s1[i] == s2[i])
  252. if (s1[i++] == NUL)
  253. return (0);
  254. if (option) /* ASCII */
  255. return (isupper(s1[i]) ? -1 : 1);
  256. else /* GERMAN */
  257. return (isupper(s1[i]) ? 1 : -1);
  258. }