PageRenderTime 124ms CodeModel.GetById 7ms app.highlight 108ms RepoModel.GetById 2ms app.codeStats 0ms

/contrib/groff/src/preproc/refer/refer.cpp

https://bitbucket.org/freebsd/freebsd-head/
C++ | 1242 lines | 1159 code | 49 blank | 34 comment | 402 complexity | 869a4955fe18e747a2b03997d9f74efe MD5 | raw file
   1// -*- C++ -*-
   2/* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004
   3   Free Software Foundation, Inc.
   4     Written by James Clark (jjc@jclark.com)
   5
   6This file is part of groff.
   7
   8groff is free software; you can redistribute it and/or modify it under
   9the terms of the GNU General Public License as published by the Free
  10Software Foundation; either version 2, or (at your option) any later
  11version.
  12
  13groff is distributed in the hope that it will be useful, but WITHOUT ANY
  14WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16for more details.
  17
  18You should have received a copy of the GNU General Public License along
  19with groff; see the file COPYING.  If not, write to the Free Software
  20Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
  21
  22#include "refer.h"
  23#include "refid.h"
  24#include "ref.h"
  25#include "token.h"
  26#include "search.h"
  27#include "command.h"
  28
  29extern "C" const char *Version_string;
  30
  31const char PRE_LABEL_MARKER = '\013';
  32const char POST_LABEL_MARKER = '\014';
  33const char LABEL_MARKER = '\015'; // label_type is added on
  34
  35#define FORCE_LEFT_BRACKET 04
  36#define FORCE_RIGHT_BRACKET 010
  37
  38static FILE *outfp = stdout;
  39
  40string capitalize_fields;
  41string reverse_fields;
  42string abbreviate_fields;
  43string period_before_last_name = ". ";
  44string period_before_initial = ".";
  45string period_before_hyphen = "";
  46string period_before_other = ". ";
  47string sort_fields;
  48int annotation_field = -1;
  49string annotation_macro;
  50string discard_fields = "XYZ";
  51string pre_label = "\\*([.";
  52string post_label = "\\*(.]";
  53string sep_label = ", ";
  54int accumulate = 0;
  55int move_punctuation = 0;
  56int abbreviate_label_ranges = 0;
  57string label_range_indicator;
  58int label_in_text = 1;
  59int label_in_reference = 1;
  60int date_as_label = 0;
  61int sort_adjacent_labels = 0;
  62// Join exactly two authors with this.
  63string join_authors_exactly_two = " and ";
  64// When there are more than two authors join the last two with this.
  65string join_authors_last_two = ", and ";
  66// Otherwise join authors with this.
  67string join_authors_default = ", ";
  68string separate_label_second_parts = ", ";
  69// Use this string to represent that there are other authors.
  70string et_al = " et al";
  71// Use et al only if it can replace at least this many authors.
  72int et_al_min_elide = 2;
  73// Use et al only if the total number of authors is at least this.
  74int et_al_min_total = 3;
  75
  76
  77int compatible_flag = 0;
  78
  79int short_label_flag = 0;
  80
  81static int recognize_R1_R2 = 1;
  82
  83search_list database_list;
  84int search_default = 1;
  85static int default_database_loaded = 0;
  86
  87static reference **citation = 0;
  88static int ncitations = 0;
  89static int citation_max = 0;
  90
  91static reference **reference_hash_table = 0;
  92static int hash_table_size;
  93static int nreferences = 0;
  94
  95static int need_syncing = 0;
  96string pending_line;
  97string pending_lf_lines;
  98
  99static void output_pending_line();
 100static unsigned immediately_handle_reference(const string &);
 101static void immediately_output_references();
 102static unsigned store_reference(const string &);
 103static void divert_to_temporary_file();
 104static reference *make_reference(const string &, unsigned *);
 105static void usage(FILE *stream);
 106static void do_file(const char *);
 107static void split_punct(string &line, string &punct);
 108static void output_citation_group(reference **v, int n, label_type, FILE *fp);
 109static void possibly_load_default_database();
 110
 111int main(int argc, char **argv)
 112{
 113  program_name = argv[0];
 114  static char stderr_buf[BUFSIZ];
 115  setbuf(stderr, stderr_buf);
 116  outfp = stdout;
 117  int finished_options = 0;
 118  int bib_flag = 0;
 119  int done_spec = 0;
 120
 121  for (--argc, ++argv;
 122       !finished_options && argc > 0 && argv[0][0] == '-'
 123       && argv[0][1] != '\0';
 124       argv++, argc--) {
 125    const char *opt = argv[0] + 1; 
 126    while (opt != 0 && *opt != '\0') {
 127      switch (*opt) {
 128      case 'C':
 129	compatible_flag = 1;
 130	opt++;
 131	break;
 132      case 'B':
 133	bib_flag = 1;
 134	label_in_reference = 0;
 135	label_in_text = 0;
 136	++opt;
 137	if (*opt == '\0') {
 138	  annotation_field = 'X';
 139	  annotation_macro = "AP";
 140	}
 141	else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
 142	  annotation_field = opt[0];
 143	  annotation_macro = opt + 2;
 144	}
 145	opt = 0;
 146	break;
 147      case 'P':
 148	move_punctuation = 1;
 149	opt++;
 150	break;
 151      case 'R':
 152	recognize_R1_R2 = 0;
 153	opt++;
 154	break;
 155      case 'S':
 156	// Not a very useful spec.
 157	set_label_spec("(A.n|Q)', '(D.y|D)");
 158	done_spec = 1;
 159	pre_label = " (";
 160	post_label = ")";
 161	sep_label = "; ";
 162	opt++;
 163	break;
 164      case 'V':
 165	verify_flag = 1;
 166	opt++;
 167	break;
 168      case 'f':
 169	{
 170	  const char *num = 0;
 171	  if (*++opt == '\0') {
 172	    if (argc > 1) {
 173	      num = *++argv;
 174	      --argc;
 175	    }
 176	    else {
 177	      error("option `f' requires an argument");
 178	      usage(stderr);
 179	      exit(1);
 180	    }
 181	  }
 182	  else {
 183	    num = opt;
 184	    opt = 0;
 185	  }
 186	  const char *ptr;
 187	  for (ptr = num; *ptr; ptr++)
 188	    if (!csdigit(*ptr)) {
 189	      error("bad character `%1' in argument to -f option", *ptr);
 190	      break;
 191	    }
 192	  if (*ptr == '\0') {
 193	    string spec;
 194	    spec = '%';
 195	    spec += num;
 196	    spec += '\0';
 197	    set_label_spec(spec.contents());
 198	    done_spec = 1;
 199	  }
 200	  break;
 201	}
 202      case 'b':
 203	label_in_text = 0;
 204	label_in_reference = 0;
 205	opt++;
 206	break;
 207      case 'e':
 208	accumulate = 1;
 209	opt++;
 210	break;
 211      case 'c':
 212	capitalize_fields = ++opt;
 213	opt = 0;
 214	break;
 215      case 'k':
 216	{
 217	  char buf[5];
 218	  if (csalpha(*++opt))
 219	    buf[0] = *opt++;
 220	  else {
 221	    if (*opt != '\0')
 222	      error("bad field name `%1'", *opt++);
 223	    buf[0] = 'L';
 224	  }
 225	  buf[1] = '~';
 226	  buf[2] = '%';
 227	  buf[3] = 'a';
 228	  buf[4] = '\0';
 229	  set_label_spec(buf);
 230	  done_spec = 1;
 231	}
 232	break;
 233      case 'a':
 234	{
 235	  const char *ptr;
 236	  for (ptr = ++opt; *ptr; ptr++)
 237	    if (!csdigit(*ptr)) {
 238	      error("argument to `a' option not a number");
 239	      break;
 240	    }
 241	  if (*ptr == '\0') {
 242	    reverse_fields = 'A';
 243	    reverse_fields += opt;
 244	  }
 245	  opt = 0;
 246	}
 247	break;
 248      case 'i':
 249	linear_ignore_fields = ++opt;
 250	opt = 0;
 251	break;
 252      case 'l':
 253	{
 254	  char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
 255	  strcpy(buf, "A.n");
 256	  if (*++opt != '\0' && *opt != ',') {
 257	    char *ptr;
 258	    long n = strtol(opt, &ptr, 10);
 259	    if (n == 0 && ptr == opt) {
 260	      error("bad integer `%1' in `l' option", opt);
 261	      opt = 0;
 262	      break;
 263	    }
 264	    if (n < 0)
 265	      n = 0;
 266	    opt = ptr;
 267	    sprintf(strchr(buf, '\0'), "+%ld", n);
 268	  }
 269	  strcat(buf, "D.y");
 270	  if (*opt == ',')
 271	    opt++;
 272	  if (*opt != '\0') {
 273	    char *ptr;
 274	    long n = strtol(opt, &ptr, 10);
 275	    if (n == 0 && ptr == opt) {
 276	      error("bad integer `%1' in `l' option", opt);
 277	      opt = 0;
 278	      break;
 279	    }
 280	    if (n < 0)
 281	      n = 0;
 282	    sprintf(strchr(buf, '\0'), "-%ld", n);
 283	    opt = ptr;
 284	    if (*opt != '\0')
 285	      error("argument to `l' option not of form `m,n'");
 286	  }
 287	  strcat(buf, "%a");
 288	  if (!set_label_spec(buf))
 289	    assert(0);
 290	  done_spec = 1;
 291	}
 292	break;
 293      case 'n':
 294	search_default = 0;
 295	opt++;
 296	break;
 297      case 'p':
 298	{
 299	  const char *filename = 0;
 300	  if (*++opt == '\0') {
 301	    if (argc > 1) {
 302	      filename = *++argv;
 303	      argc--;
 304	    }
 305	    else {
 306	      error("option `p' requires an argument");
 307	      usage(stderr);
 308	      exit(1);
 309	    }
 310	  }
 311	  else {
 312	    filename = opt;
 313	    opt = 0;
 314	  }
 315	  database_list.add_file(filename);
 316	}
 317	break;
 318      case 's':
 319	if (*++opt == '\0')
 320	  sort_fields = "AD";
 321	else {
 322	  sort_fields = opt;
 323	  opt = 0;
 324	}
 325	accumulate = 1;
 326	break;
 327      case 't':
 328	{
 329	  char *ptr;
 330	  long n = strtol(opt, &ptr, 10);
 331	  if (n == 0 && ptr == opt) {
 332	    error("bad integer `%1' in `t' option", opt);
 333	    opt = 0;
 334	    break;
 335	  }
 336	  if (n < 1)
 337	    n = 1;
 338	  linear_truncate_len = int(n);
 339	  opt = ptr;
 340	  break;
 341	}
 342      case '-':
 343	if (opt[1] == '\0') {
 344	  finished_options = 1;
 345	  opt++;
 346	  break;
 347	}
 348	if (strcmp(opt,"-version")==0) {
 349      case 'v':
 350	  printf("GNU refer (groff) version %s\n", Version_string);
 351	  exit(0);
 352	  break;
 353	}
 354	if (strcmp(opt,"-help")==0) {
 355	  usage(stdout);
 356	  exit(0);
 357	  break;
 358	}
 359	// fall through
 360      default:
 361	error("unrecognized option `%1'", *opt);
 362	usage(stderr);
 363	exit(1);
 364	break;
 365      }
 366    }
 367  }
 368  if (!done_spec)
 369    set_label_spec("%1");
 370  if (argc <= 0) {
 371    if (bib_flag)
 372      do_bib("-");
 373    else
 374      do_file("-");
 375  }
 376  else {
 377    for (int i = 0; i < argc; i++) {
 378      if (bib_flag)
 379	do_bib(argv[i]);
 380      else
 381	do_file(argv[i]);
 382    }
 383  }
 384  if (accumulate)
 385    output_references();
 386  if (fflush(stdout) < 0)
 387    fatal("output error");
 388  return 0;
 389}
 390
 391static void usage(FILE *stream)
 392{
 393  fprintf(stream,
 394"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
 395"       [-sXYZ] [-tN] [-BL.M] [files ...]\n",
 396	  program_name);
 397}
 398
 399static void possibly_load_default_database()
 400{
 401  if (search_default && !default_database_loaded) {
 402    char *filename = getenv("REFER");
 403    if (filename)
 404      database_list.add_file(filename);
 405    else
 406      database_list.add_file(DEFAULT_INDEX, 1);
 407    default_database_loaded = 1;
 408  }
 409}
 410
 411static int is_list(const string &str)
 412{
 413  const char *start = str.contents();
 414  const char *end = start + str.length();
 415  while (end > start && csspace(end[-1]))
 416    end--;
 417  while (start < end && csspace(*start))
 418    start++;
 419  return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
 420}
 421
 422static void do_file(const char *filename)
 423{
 424  FILE *fp;
 425  if (strcmp(filename, "-") == 0) {
 426    fp = stdin;
 427  }
 428  else {
 429    errno = 0;
 430    fp = fopen(filename, "r");
 431    if (fp == 0) {
 432      error("can't open `%1': %2", filename, strerror(errno));
 433      return;
 434    }
 435  }
 436  current_filename = filename;
 437  fprintf(outfp, ".lf 1 %s\n", filename);
 438  string line;
 439  current_lineno = 0;
 440  for (;;) {
 441    line.clear();
 442    for (;;) {
 443      int c = getc(fp);
 444      if (c == EOF) {
 445	if (line.length() > 0)
 446	  line += '\n';
 447	break;
 448      }
 449      if (invalid_input_char(c))
 450	error("invalid input character code %1", c);
 451      else {
 452	line += c;
 453	if (c == '\n')
 454	  break;
 455      }
 456    }
 457    int len = line.length();
 458    if (len == 0)
 459      break;
 460    current_lineno++;
 461    if (len >= 2 && line[0] == '.' && line[1] == '[') {
 462      int start_lineno = current_lineno;
 463      int start_of_line = 1;
 464      string str;
 465      string post;
 466      string pre(line.contents() + 2, line.length() - 3);
 467      for (;;) {
 468	int c = getc(fp);
 469	if (c == EOF) {
 470	  error_with_file_and_line(current_filename, start_lineno,
 471				   "missing `.]' line");
 472	  break;
 473	}
 474	if (start_of_line)
 475	  current_lineno++;
 476	if (start_of_line && c == '.') {
 477	  int d = getc(fp);
 478	  if (d == ']') {
 479	    while ((d = getc(fp)) != '\n' && d != EOF) {
 480	      if (invalid_input_char(d))
 481		error("invalid input character code %1", d);
 482	      else
 483		post += d;
 484	    }
 485	    break;
 486	  }
 487	  if (d != EOF)
 488	    ungetc(d, fp);
 489	}
 490	if (invalid_input_char(c))
 491	  error("invalid input character code %1", c);
 492	else
 493	  str += c;
 494	start_of_line = (c == '\n');
 495      }
 496      if (is_list(str)) {
 497	output_pending_line();
 498	if (accumulate)
 499	  output_references();
 500	else
 501	  error("found `$LIST$' but not accumulating references");
 502      }
 503      else {
 504	unsigned flags = (accumulate
 505			  ? store_reference(str)
 506			  : immediately_handle_reference(str));
 507	if (label_in_text) {
 508	  if (accumulate && outfp == stdout)
 509	    divert_to_temporary_file();
 510	  if (pending_line.length() == 0) {
 511	    warning("can't attach citation to previous line");
 512	  }
 513	  else
 514	    pending_line.set_length(pending_line.length() - 1);
 515	  string punct;
 516	  if (move_punctuation)
 517	    split_punct(pending_line, punct);
 518	  int have_text = pre.length() > 0 || post.length() > 0;
 519	  label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
 520					       |FORCE_RIGHT_BRACKET));
 521	  if ((flags & FORCE_LEFT_BRACKET) || !have_text)
 522	    pending_line += PRE_LABEL_MARKER;
 523	  pending_line += pre;
 524	  char lm = LABEL_MARKER + (int)lt;
 525	  pending_line += lm;
 526	  pending_line += post;
 527	  if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
 528	    pending_line += POST_LABEL_MARKER;
 529	  pending_line += punct;
 530	  pending_line += '\n';
 531	}
 532      }
 533      need_syncing = 1;
 534    }
 535    else if (len >= 4
 536	     && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
 537	     && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
 538      pending_lf_lines += line;
 539      line += '\0';
 540      if (interpret_lf_args(line.contents() + 3))
 541	current_lineno--;
 542    }
 543    else if (recognize_R1_R2
 544	     && len >= 4
 545	     && line[0] == '.' && line[1] == 'R' && line[2] == '1'
 546	     && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
 547      line.clear();
 548      int start_of_line = 1;
 549      int start_lineno = current_lineno;
 550      for (;;) {
 551	int c = getc(fp);
 552	if (c != EOF && start_of_line)
 553	  current_lineno++;
 554	if (start_of_line && c == '.') {
 555	  c = getc(fp);
 556	  if (c == 'R') {
 557	    c = getc(fp);
 558	    if (c == '2') {
 559	      c = getc(fp);
 560	      if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
 561		while (c != EOF && c != '\n')
 562		  c = getc(fp);
 563		break;
 564	      }
 565	      else {
 566		line += '.';
 567		line += 'R';
 568		line += '2';
 569	      }
 570	    }
 571	    else {
 572	      line += '.';
 573	      line += 'R';
 574	    }
 575	  }
 576	  else
 577	    line += '.';
 578	}
 579	if (c == EOF) {
 580	  error_with_file_and_line(current_filename, start_lineno,
 581				   "missing `.R2' line");
 582	  break;
 583	}
 584	if (invalid_input_char(c))
 585	  error("invalid input character code %1", int(c));
 586	else {
 587	  line += c;
 588	  start_of_line = c == '\n';
 589	}
 590      }
 591      output_pending_line();
 592      if (accumulate)
 593	output_references();
 594      else
 595	nreferences = 0;
 596      process_commands(line, current_filename, start_lineno + 1);
 597      need_syncing = 1;
 598    }
 599    else {
 600      output_pending_line();
 601      pending_line = line;
 602    }
 603  }
 604  need_syncing = 0;
 605  output_pending_line();
 606  if (fp != stdin)
 607    fclose(fp);
 608}
 609
 610class label_processing_state {
 611  enum {
 612    NORMAL,
 613    PENDING_LABEL,
 614    PENDING_LABEL_POST,
 615    PENDING_LABEL_POST_PRE,
 616    PENDING_POST
 617    } state;
 618  label_type type;		// type of pending labels
 619  int count;			// number of pending labels
 620  reference **rptr;		// pointer to next reference
 621  int rcount;			// number of references left
 622  FILE *fp;
 623  int handle_pending(int c);
 624public:
 625  label_processing_state(reference **, int, FILE *);
 626  ~label_processing_state();
 627  void process(int c);
 628};
 629
 630static void output_pending_line()
 631{
 632  if (label_in_text && !accumulate && ncitations > 0) {
 633    label_processing_state state(citation, ncitations, outfp);
 634    int len = pending_line.length();
 635    for (int i = 0; i < len; i++)
 636      state.process((unsigned char)(pending_line[i]));
 637  }
 638  else
 639    put_string(pending_line, outfp);
 640  pending_line.clear();
 641  if (pending_lf_lines.length() > 0) {
 642    put_string(pending_lf_lines, outfp);
 643    pending_lf_lines.clear();
 644  }
 645  if (!accumulate)
 646    immediately_output_references();
 647  if (need_syncing) {
 648    fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
 649    need_syncing = 0;
 650  }
 651}
 652
 653static void split_punct(string &line, string &punct)
 654{
 655  const char *start = line.contents();
 656  const char *end = start + line.length();
 657  const char *ptr = start;
 658  const char *last_token_start = 0;
 659  for (;;) {
 660    if (ptr >= end)
 661      break;
 662    last_token_start = ptr;
 663    if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
 664	|| (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
 665      ptr++;
 666    else if (!get_token(&ptr, end))
 667      break;
 668  }
 669  if (last_token_start) {
 670    const token_info *ti = lookup_token(last_token_start, end);
 671    if (ti->is_punct()) {
 672      punct.append(last_token_start, end - last_token_start);
 673      line.set_length(last_token_start - start);
 674    }
 675  }
 676}
 677
 678static void divert_to_temporary_file()
 679{
 680  outfp = xtmpfile();
 681}
 682
 683static void store_citation(reference *ref)
 684{
 685  if (ncitations >= citation_max) {
 686    if (citation == 0)
 687      citation = new reference*[citation_max = 100];
 688    else {
 689      reference **old_citation = citation;
 690      citation_max *= 2;
 691      citation = new reference *[citation_max];
 692      memcpy(citation, old_citation, ncitations*sizeof(reference *));
 693      a_delete old_citation;
 694    }
 695  }
 696  citation[ncitations++] = ref;
 697}
 698
 699static unsigned store_reference(const string &str)
 700{
 701  if (reference_hash_table == 0) {
 702    reference_hash_table = new reference *[17];
 703    hash_table_size = 17;
 704    for (int i = 0; i < hash_table_size; i++)
 705      reference_hash_table[i] = 0;
 706  }
 707  unsigned flags;
 708  reference *ref = make_reference(str, &flags);
 709  ref->compute_hash_code();
 710  unsigned h = ref->hash();
 711  reference **ptr;
 712  for (ptr = reference_hash_table + (h % hash_table_size);
 713       *ptr != 0;
 714       ((ptr == reference_hash_table)
 715	? (ptr = reference_hash_table + hash_table_size - 1)
 716	: --ptr))
 717    if (same_reference(**ptr, *ref))
 718      break;
 719  if (*ptr != 0) {
 720    if (ref->is_merged())
 721      warning("fields ignored because reference already used");
 722    delete ref;
 723    ref = *ptr;
 724  }
 725  else {
 726    *ptr = ref;
 727    ref->set_number(nreferences);
 728    nreferences++;
 729    ref->pre_compute_label();
 730    ref->compute_sort_key();
 731    if (nreferences*2 >= hash_table_size) {
 732      // Rehash it.
 733      reference **old_table = reference_hash_table;
 734      int old_size = hash_table_size;
 735      hash_table_size = next_size(hash_table_size);
 736      reference_hash_table = new reference*[hash_table_size];
 737      int i;
 738      for (i = 0; i < hash_table_size; i++)
 739	reference_hash_table[i] = 0;
 740      for (i = 0; i < old_size; i++)
 741	if (old_table[i]) {
 742	  reference **p;
 743	  for (p = (reference_hash_table
 744				+ (old_table[i]->hash() % hash_table_size));
 745	       *p;
 746	       ((p == reference_hash_table)
 747		? (p = reference_hash_table + hash_table_size - 1)
 748		: --p))
 749	    ;
 750	  *p = old_table[i];
 751	}
 752      a_delete old_table;
 753    }
 754  }
 755  if (label_in_text)
 756    store_citation(ref);
 757  return flags;
 758}
 759
 760unsigned immediately_handle_reference(const string &str)
 761{
 762  unsigned flags;
 763  reference *ref = make_reference(str, &flags);
 764  ref->set_number(nreferences);
 765  if (label_in_text || label_in_reference) {
 766    ref->pre_compute_label();
 767    ref->immediate_compute_label();
 768  }
 769  nreferences++;
 770  store_citation(ref);
 771  return flags;
 772}
 773
 774static void immediately_output_references()
 775{
 776  for (int i = 0; i < ncitations; i++) {
 777    reference *ref = citation[i];
 778    if (label_in_reference) {
 779      fputs(".ds [F ", outfp);
 780      const string &label = ref->get_label(NORMAL_LABEL);
 781      if (label.length() > 0
 782	  && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
 783	putc('"', outfp);
 784      put_string(label, outfp);
 785      putc('\n', outfp);
 786    }
 787    ref->output(outfp);
 788    delete ref;
 789  }
 790  ncitations = 0;
 791}
 792
 793static void output_citation_group(reference **v, int n, label_type type,
 794				  FILE *fp)
 795{
 796  if (sort_adjacent_labels) {
 797    // Do an insertion sort.  Usually n will be very small.
 798    for (int i = 1; i < n; i++) {
 799      int num = v[i]->get_number();
 800      reference *temp = v[i];
 801      int j;
 802      for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
 803	v[j + 1] = v[j];
 804      v[j + 1] = temp;
 805    }
 806  }
 807  // This messes up if !accumulate.
 808  if (accumulate && n > 1) {
 809    // remove duplicates
 810    int j = 1;
 811    for (int i = 1; i < n; i++)
 812      if (v[i]->get_label(type) != v[i - 1]->get_label(type))
 813	v[j++] = v[i];
 814    n = j;
 815  }
 816  string merged_label;
 817  for (int i = 0; i < n; i++) {
 818    int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
 819    if (nmerged > 0) {
 820      put_string(merged_label, fp);
 821      i += nmerged;
 822    }
 823    else
 824      put_string(v[i]->get_label(type), fp);
 825    if (i < n - 1)
 826      put_string(sep_label, fp);
 827  }
 828}
 829
 830
 831label_processing_state::label_processing_state(reference **p, int n, FILE *f)
 832: state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
 833{
 834}
 835
 836label_processing_state::~label_processing_state()
 837{
 838  int handled = handle_pending(EOF);
 839  assert(!handled);
 840  assert(rcount == 0);
 841}
 842
 843int label_processing_state::handle_pending(int c)
 844{
 845  switch (state) {
 846  case NORMAL:
 847    break;
 848  case PENDING_LABEL:
 849    if (c == POST_LABEL_MARKER) {
 850      state = PENDING_LABEL_POST;
 851      return 1;
 852    }
 853    else {
 854      output_citation_group(rptr, count, type, fp);
 855      rptr += count ;
 856      rcount -= count;
 857      state = NORMAL;
 858    }
 859    break;
 860  case PENDING_LABEL_POST:
 861    if (c == PRE_LABEL_MARKER) {
 862      state = PENDING_LABEL_POST_PRE;
 863      return 1;
 864    }
 865    else {
 866      output_citation_group(rptr, count, type, fp);
 867      rptr += count;
 868      rcount -= count;
 869      put_string(post_label, fp);
 870      state = NORMAL;
 871    }
 872    break;
 873  case PENDING_LABEL_POST_PRE:
 874    if (c >= LABEL_MARKER
 875	&& c < LABEL_MARKER + N_LABEL_TYPES
 876	&& c - LABEL_MARKER == type) {
 877      count += 1;
 878      state = PENDING_LABEL;
 879      return 1;
 880    }
 881    else {
 882      output_citation_group(rptr, count, type, fp);
 883      rptr += count;
 884      rcount -= count;
 885      put_string(sep_label, fp);
 886      state = NORMAL;
 887    }
 888    break;
 889  case PENDING_POST:
 890    if (c == PRE_LABEL_MARKER) {
 891      put_string(sep_label, fp);
 892      state = NORMAL;
 893      return 1;
 894    }
 895    else {
 896      put_string(post_label, fp);
 897      state = NORMAL;
 898    }
 899    break;
 900  }
 901  return 0;
 902}
 903
 904void label_processing_state::process(int c)
 905{
 906  if (handle_pending(c))
 907    return;
 908  assert(state == NORMAL);
 909  switch (c) {
 910  case PRE_LABEL_MARKER:
 911    put_string(pre_label, fp);
 912    state = NORMAL;
 913    break;
 914  case POST_LABEL_MARKER:
 915    state = PENDING_POST;
 916    break;
 917  case LABEL_MARKER:
 918  case LABEL_MARKER + 1:
 919    count = 1;
 920    state = PENDING_LABEL;
 921    type = label_type(c - LABEL_MARKER);
 922    break;
 923  default:
 924    state = NORMAL;
 925    putc(c, fp);
 926    break;
 927  }
 928}
 929
 930extern "C" {
 931
 932int rcompare(const void *p1, const void *p2)
 933{
 934  return compare_reference(**(reference **)p1, **(reference **)p2);
 935}
 936
 937}
 938
 939void output_references()
 940{
 941  assert(accumulate);
 942  if (!hash_table_size) {
 943    error("nothing to reference (probably `bibliography' before `sort')");
 944    accumulate = 0;
 945    nreferences = 0;
 946    return;
 947  }
 948  if (nreferences > 0) {
 949    int j = 0;
 950    int i;
 951    for (i = 0; i < hash_table_size; i++)
 952      if (reference_hash_table[i] != 0)
 953	reference_hash_table[j++] = reference_hash_table[i];
 954    assert(j == nreferences);
 955    for (; j < hash_table_size; j++)
 956      reference_hash_table[j] = 0;
 957    qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
 958    for (i = 0; i < nreferences; i++)
 959      reference_hash_table[i]->set_number(i);
 960    compute_labels(reference_hash_table, nreferences);
 961  }
 962  if (outfp != stdout) {
 963    rewind(outfp);
 964    {
 965      label_processing_state state(citation, ncitations, stdout);
 966      int c;
 967      while ((c = getc(outfp)) != EOF)
 968	state.process(c);
 969    }
 970    ncitations = 0;
 971    fclose(outfp);
 972    outfp = stdout;
 973  }
 974  if (nreferences > 0) {
 975    fputs(".]<\n", outfp);
 976    for (int i = 0; i < nreferences; i++) {
 977      if (sort_fields.length() > 0)
 978	reference_hash_table[i]->print_sort_key_comment(outfp);
 979      if (label_in_reference) {
 980	fputs(".ds [F ", outfp);
 981	const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
 982	if (label.length() > 0
 983	    && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
 984	  putc('"', outfp);
 985	put_string(label, outfp);
 986	putc('\n', outfp);
 987      }
 988      reference_hash_table[i]->output(outfp);
 989      delete reference_hash_table[i];
 990      reference_hash_table[i] = 0;
 991    }
 992    fputs(".]>\n", outfp);
 993    nreferences = 0;
 994  }
 995  clear_labels();
 996}
 997
 998static reference *find_reference(const char *query, int query_len)
 999{
1000  // This is so that error messages look better.
1001  while (query_len > 0 && csspace(query[query_len - 1]))
1002    query_len--;
1003  string str;
1004  for (int i = 0; i < query_len; i++)
1005    str += query[i] == '\n' ? ' ' : query[i];
1006  str += '\0';
1007  possibly_load_default_database();
1008  search_list_iterator iter(&database_list, str.contents());
1009  reference_id rid;
1010  const char *start;
1011  int len;
1012  if (!iter.next(&start, &len, &rid)) {
1013    error("no matches for `%1'", str.contents());
1014    return 0;
1015  }
1016  const char *end = start + len;
1017  while (start < end) {
1018    if (*start == '%')
1019      break;
1020    while (start < end && *start++ != '\n')
1021      ;
1022  }
1023  if (start >= end) {
1024    error("found a reference for `%1' but it didn't contain any fields",
1025	  str.contents());
1026    return 0;
1027  }
1028  reference *result = new reference(start, end - start, &rid);
1029  if (iter.next(&start, &len, &rid))
1030    warning("multiple matches for `%1'", str.contents());
1031  return result;
1032}
1033
1034static reference *make_reference(const string &str, unsigned *flagsp)
1035{
1036  const char *start = str.contents();
1037  const char *end = start + str.length();
1038  const char *ptr = start;
1039  while (ptr < end) {
1040    if (*ptr == '%')
1041      break;
1042    while (ptr < end && *ptr++ != '\n')
1043      ;
1044  }
1045  *flagsp = 0;
1046  for (; start < ptr; start++) {
1047    if (*start == '#')
1048      *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1049					   | FORCE_LEFT_BRACKET)));
1050    else if (*start == '[')
1051      *flagsp |= FORCE_LEFT_BRACKET;
1052    else if (*start == ']')
1053      *flagsp |= FORCE_RIGHT_BRACKET;
1054    else if (!csspace(*start))
1055      break;
1056  }
1057  if (start >= end) {
1058    error("empty reference");
1059    return new reference;
1060  }
1061  reference *database_ref = 0;
1062  if (start < ptr)
1063    database_ref = find_reference(start, ptr - start);
1064  reference *inline_ref = 0;
1065  if (ptr < end)
1066    inline_ref = new reference(ptr, end - ptr);
1067  if (inline_ref) {
1068    if (database_ref) {
1069      database_ref->merge(*inline_ref);
1070      delete inline_ref;
1071      return database_ref;
1072    }
1073    else
1074      return inline_ref;
1075  }
1076  else if (database_ref)
1077    return database_ref;
1078  else
1079    return new reference;
1080}
1081
1082static void do_ref(const string &str)
1083{
1084  if (accumulate)
1085    (void)store_reference(str);
1086  else {
1087    (void)immediately_handle_reference(str);
1088    immediately_output_references();
1089  }
1090}
1091
1092static void trim_blanks(string &str)
1093{
1094  const char *start = str.contents();
1095  const char *end = start + str.length();
1096  while (end > start && end[-1] != '\n' && csspace(end[-1]))
1097    --end;
1098  str.set_length(end - start);
1099}
1100
1101void do_bib(const char *filename)
1102{
1103  FILE *fp;
1104  if (strcmp(filename, "-") == 0)
1105    fp = stdin;
1106  else {
1107    errno = 0;
1108    fp = fopen(filename, "r");
1109    if (fp == 0) {
1110      error("can't open `%1': %2", filename, strerror(errno));
1111      return;
1112    }
1113    current_filename = filename;
1114  }
1115  enum {
1116    START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1117    } state = START;
1118  string body;
1119  for (;;) {
1120    int c = getc(fp);
1121    if (c == EOF)
1122      break;
1123    if (invalid_input_char(c)) {
1124      error("invalid input character code %1", c);
1125      continue;
1126    }
1127    switch (state) {
1128    case START:
1129      if (c == '%') {
1130	body = c;
1131	state = BODY;
1132      }
1133      else if (c != '\n')
1134	state = MIDDLE;
1135      break;
1136    case MIDDLE:
1137      if (c == '\n')
1138	state = START;
1139      break;
1140    case BODY:
1141      body += c;
1142      if (c == '\n')
1143	state = BODY_START;
1144      break;
1145    case BODY_START:
1146      if (c == '\n') {
1147	do_ref(body);
1148	state = START;
1149      }
1150      else if (c == '.')
1151	state = BODY_DOT;
1152      else if (csspace(c)) {
1153	state = BODY_BLANK;
1154	body += c;
1155      }
1156      else {
1157	body += c;
1158	state = BODY;
1159      }
1160      break;
1161    case BODY_BLANK:
1162      if (c == '\n') {
1163	trim_blanks(body);
1164	do_ref(body);
1165	state = START;
1166      }
1167      else if (csspace(c))
1168	body += c;
1169      else {
1170	body += c;
1171	state = BODY;
1172      }
1173      break;
1174    case BODY_DOT:
1175      if (c == ']') {
1176	do_ref(body);
1177	state = MIDDLE;
1178      }
1179      else {
1180	body += '.';
1181	body += c;
1182	state = c == '\n' ? BODY_START : BODY;
1183      }
1184      break;
1185    default:
1186      assert(0);
1187    }
1188    if (c == '\n')
1189      current_lineno++;
1190  }
1191  switch (state) {
1192  case START:
1193  case MIDDLE:
1194    break;
1195  case BODY:
1196    body += '\n';
1197    do_ref(body);
1198    break;
1199  case BODY_DOT:
1200  case BODY_START:
1201    do_ref(body);
1202    break;
1203  case BODY_BLANK:
1204    trim_blanks(body);
1205    do_ref(body);
1206    break;
1207  }
1208  fclose(fp);
1209}
1210
1211// from the Dragon Book
1212
1213unsigned hash_string(const char *s, int len)
1214{
1215  const char *end = s + len;
1216  unsigned h = 0, g;
1217  while (s < end) {
1218    h <<= 4;
1219    h += *s++;
1220    if ((g = h & 0xf0000000) != 0) {
1221      h ^= g >> 24;
1222      h ^= g;
1223    }
1224  }
1225  return h;
1226}
1227
1228int next_size(int n)
1229{
1230  static const int table_sizes[] = { 
1231    101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1232    80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1233    16000057, 32000011, 64000031, 128000003, 0 
1234  };
1235
1236  const int *p;
1237  for (p = table_sizes; *p <= n && *p != 0; p++)
1238    ;
1239  assert(*p != 0);
1240  return *p;
1241}
1242