/Build/source/texk/makeindexk/sortid.c
C | 310 lines | 182 code | 43 blank | 85 comment | 86 complexity | fd2db881fae86503fa8b818e9e1fc76a MD5 | raw file
- /*
- *
- * This file is part of
- * MakeIndex - A formatter and format independent index processor
- *
- * Copyright (C) 1998-2012 by the TeX Live project.
- * Copyright (C) 1989 by Chen & Harrison International Systems, Inc.
- * Copyright (C) 1988 by Olivetti Research Center
- * Copyright (C) 1987 by Regents of the University of California
- *
- * Author:
- * Pehong Chen
- * Chen & Harrison International Systems, Inc.
- * Palo Alto, California
- * USA
- *
- * Contributors:
- * Please refer to the CONTRIB file that comes with this release
- * for a list of people who have contributed to this and/or previous
- * release(s) of MakeIndex.
- *
- * All rights reserved by the copyright holders. See the copyright
- * notice distributed with this software for a complete description of
- * the conditions under which it is made available.
- *
- */
- #include "mkind.h"
- #include "qsort.h"
- #ifdef HAVE_LOCALE_H
- #include <locale.h>
- #endif
- static long idx_gc;
- static int check_mixsym (const char *x, const char *y);
- static int compare (const void *va, const void *vb);
- static int compare_one (const char *x, const char *y);
- static int compare_page (const FIELD_PTR *a, const FIELD_PTR *b);
- static int compare_string (const unsigned char *a, const unsigned char *b);
- static int new_strcmp (const unsigned char *a, const unsigned char *b,
- int option);
- void
- sort_idx(void)
- {
- #ifdef HAVE_SETLOCALE
- char *prev_locale;
- #endif
- MESSAGE("Sorting entries...");
- #ifdef HAVE_SETLOCALE
- prev_locale = setlocale(LC_COLLATE, NULL);
- setlocale(LC_COLLATE, "");
- #endif
- idx_dc = 0;
- idx_gc = 0L;
- qqsort(idx_key, idx_gt, sizeof(FIELD_PTR), compare);
- #ifdef HAVE_SETLOCALE
- setlocale(LC_COLLATE, prev_locale);
- #endif
- MESSAGE1("done (%ld comparisons).\n", idx_gc);
- }
- static int
- compare(const void *va, const void *vb)
- {
- const FIELD_PTR *a = va;
- const FIELD_PTR *b = vb;
- int i;
- int dif;
- idx_gc++;
- IDX_DOT(CMP_MAX);
- for (i = 0; i < FIELD_MAX; i++) {
- /* compare the sort fields */
- if ((dif = compare_one((*a)->sf[i], (*b)->sf[i])) != 0)
- break;
- /* compare the actual fields */
- if ((dif = compare_one((*a)->af[i], (*b)->af[i])) != 0)
- break;
- }
- /* both key aggregates are identical, compare page numbers */
- if (i == FIELD_MAX)
- dif = compare_page(a, b);
- return (dif);
- }
- static int
- compare_one(const char *x, const char *y)
- {
- int m;
- int n;
- if ((x[0] == NUL) && (y[0] == NUL))
- return (0);
- if (x[0] == NUL)
- return (-1);
- if (y[0] == NUL)
- return (1);
- m = group_type(x);
- n = group_type(y);
- /* both pure digits */
- if ((m >= 0) && (n >= 0))
- return (m - n);
- /* x digit, y non-digit */
- if (m >= 0) {
- if (german_sort)
- return (1);
- else
- return ((n == -1) ? 1 : -1);
- }
- /* x non-digit, y digit */
- if (n >= 0) {
- if (german_sort)
- return (-1);
- else
- return ((m == -1) ? -1 : 1);
- }
- /* strings started with a symbol (including digit) */
- if ((m == SYMBOL) && (n == SYMBOL))
- return (check_mixsym(x, y));
- /* x symbol, y non-symbol */
- if (m == SYMBOL)
- return (-1);
- /* x non-symbol, y symbol */
- if (n == SYMBOL)
- return (1);
- /* strings with a leading letter, the ALPHA type */
- return (compare_string((const unsigned char*)x, (const unsigned char*)y));
- }
- static int
- check_mixsym(const char *x, const char *y)
- {
- int m;
- int n;
- m = ISDIGIT(x[0]);
- n = ISDIGIT(y[0]);
- if (m && !n)
- return (1);
- if (!m && n)
- return (-1);
- return (locale_sort ? strcoll(x, y) : strcmp(x, y));
- }
- static int
- compare_string(const unsigned char *a, const unsigned char *b)
- {
- int i = 0;
- int j = 0;
- int al;
- int bl;
- if (locale_sort) return strcoll((const char *)a, (const char *)b);
- while ((a[i] != NUL) || (b[j] != NUL)) {
- if (a[i] == NUL)
- return (-1);
- if (b[j] == NUL)
- return (1);
- if (letter_ordering) {
- if (a[i] == SPC)
- i++;
- if (b[j] == SPC)
- j++;
- }
- al = TOLOWER(a[i]);
- bl = TOLOWER(b[j]);
- if (al != bl)
- return (al - bl);
- i++;
- j++;
- }
- if (german_sort)
- return (new_strcmp(a, b, GERMAN));
- else
- return (strcmp((const char*)a, (const char*)b));
- }
- static int
- compare_page(const FIELD_PTR *a, const FIELD_PTR *b)
- {
- int m = 0;
- short i = 0;
- while ((i < (*a)->count) && (i < (*b)->count) &&
- ((m = (*a)->npg[i] - (*b)->npg[i]) == 0))
- {
- i++;
- }
- if (m == 0)
- { /* common leading page numbers match */
- if ((i == (*a)->count) && (i == (*b)->count))
- { /* all page numbers match */
- /***********************************************************
- We have identical entries, except possibly in encap fields.
- The ordering is tricky here. Consider the following input
- sequence of index names, encaps, and page numbers:
- foo|( 2
- foo|) 6
- foo|( 6
- foo|) 10
- This might legimately occur when a page range ends, and
- subsequently, a new range starts, on the same page. If we
- just order by range_open and range_close (here, parens),
- then we will produce
- foo|( 2
- foo|( 6
- foo|) 6
- foo|) 10
- This will later generate the index entry
- foo, 2--6, \({6}, 10
- which is not only wrong, but has also introduced an illegal
- LaTeX macro, \({6}, because the merging step treated this
- like a \see{6} entry.
- The solution is to preserve the original input order, which
- we can do by treating range_open and range_close as equal,
- and then ordering by input line number. This will then
- generate the correct index entry
- foo, 2--10
- Ordering inconsistencies from missing range open or close
- entries, or mixing roman and arabic page numbers, will be
- detected later.
- ***********************************************************/
- #define isrange(c) ( ((c) == idx_ropen) || ((c) == idx_rclose) )
- /* Order two range values by input line number */
- if (isrange(*(*a)->encap) && isrange(*(*b)->encap))
- m = (*a)->lc - (*b)->lc;
- /* Handle identical encap fields; neither is a range delimiter */
- else if (STREQ((*a)->encap, (*b)->encap))
- {
- /* If neither are yet marked duplicate, mark the second
- of them to be ignored. */
- if (((*a)->type != DUPLICATE) &&
- ((*b)->type != DUPLICATE))
- (*b)->type = DUPLICATE;
- /* leave m == 0 to show equality */
- }
- /* Encap fields differ: only one may be a range delimiter, */
- /* or else neither of them is. If either of them is a range */
- /* delimiter, order by input line number; otherwise, order */
- /* by name. */
- else
- {
- if ( isrange(*(*a)->encap) || isrange(*(*b)->encap) )
- m = (*a)->lc - (*b)->lc; /* order by input line number */
- else /* order non-range items by */
- /* their encap strings */
- m = compare_string((const unsigned char*)((*a)->encap),
- (const unsigned char*)((*b)->encap));
- }
- }
- else if ((i == (*a)->count) && (i < (*b)->count))
- m = -1;
- else if ((i < (*a)->count) && (i == (*b)->count))
- m = 1;
- }
- return (m);
- }
- static int
- new_strcmp(const unsigned char *s1, const unsigned char *s2, int option)
- {
- int i;
- i = 0;
- while (s1[i] == s2[i])
- if (s1[i++] == NUL)
- return (0);
- if (option) /* ASCII */
- return (isupper(s1[i]) ? -1 : 1);
- else /* GERMAN */
- return (isupper(s1[i]) ? 1 : -1);
- }