/contrib/groff/src/preproc/refer/refer.cpp

https://bitbucket.org/freebsd/freebsd-head/ · C++ · 1242 lines · 1159 code · 49 blank · 34 comment · 402 complexity · 869a4955fe18e747a2b03997d9f74efe MD5 · raw file

  1. // -*- C++ -*-
  2. /* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004
  3. Free Software Foundation, Inc.
  4. Written by James Clark (jjc@jclark.com)
  5. This file is part of groff.
  6. groff is free software; you can redistribute it and/or modify it under
  7. the terms of the GNU General Public License as published by the Free
  8. Software Foundation; either version 2, or (at your option) any later
  9. version.
  10. groff is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  13. for more details.
  14. You should have received a copy of the GNU General Public License along
  15. with groff; see the file COPYING. If not, write to the Free Software
  16. Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
  17. #include "refer.h"
  18. #include "refid.h"
  19. #include "ref.h"
  20. #include "token.h"
  21. #include "search.h"
  22. #include "command.h"
  23. extern "C" const char *Version_string;
  24. const char PRE_LABEL_MARKER = '\013';
  25. const char POST_LABEL_MARKER = '\014';
  26. const char LABEL_MARKER = '\015'; // label_type is added on
  27. #define FORCE_LEFT_BRACKET 04
  28. #define FORCE_RIGHT_BRACKET 010
  29. static FILE *outfp = stdout;
  30. string capitalize_fields;
  31. string reverse_fields;
  32. string abbreviate_fields;
  33. string period_before_last_name = ". ";
  34. string period_before_initial = ".";
  35. string period_before_hyphen = "";
  36. string period_before_other = ". ";
  37. string sort_fields;
  38. int annotation_field = -1;
  39. string annotation_macro;
  40. string discard_fields = "XYZ";
  41. string pre_label = "\\*([.";
  42. string post_label = "\\*(.]";
  43. string sep_label = ", ";
  44. int accumulate = 0;
  45. int move_punctuation = 0;
  46. int abbreviate_label_ranges = 0;
  47. string label_range_indicator;
  48. int label_in_text = 1;
  49. int label_in_reference = 1;
  50. int date_as_label = 0;
  51. int sort_adjacent_labels = 0;
  52. // Join exactly two authors with this.
  53. string join_authors_exactly_two = " and ";
  54. // When there are more than two authors join the last two with this.
  55. string join_authors_last_two = ", and ";
  56. // Otherwise join authors with this.
  57. string join_authors_default = ", ";
  58. string separate_label_second_parts = ", ";
  59. // Use this string to represent that there are other authors.
  60. string et_al = " et al";
  61. // Use et al only if it can replace at least this many authors.
  62. int et_al_min_elide = 2;
  63. // Use et al only if the total number of authors is at least this.
  64. int et_al_min_total = 3;
  65. int compatible_flag = 0;
  66. int short_label_flag = 0;
  67. static int recognize_R1_R2 = 1;
  68. search_list database_list;
  69. int search_default = 1;
  70. static int default_database_loaded = 0;
  71. static reference **citation = 0;
  72. static int ncitations = 0;
  73. static int citation_max = 0;
  74. static reference **reference_hash_table = 0;
  75. static int hash_table_size;
  76. static int nreferences = 0;
  77. static int need_syncing = 0;
  78. string pending_line;
  79. string pending_lf_lines;
  80. static void output_pending_line();
  81. static unsigned immediately_handle_reference(const string &);
  82. static void immediately_output_references();
  83. static unsigned store_reference(const string &);
  84. static void divert_to_temporary_file();
  85. static reference *make_reference(const string &, unsigned *);
  86. static void usage(FILE *stream);
  87. static void do_file(const char *);
  88. static void split_punct(string &line, string &punct);
  89. static void output_citation_group(reference **v, int n, label_type, FILE *fp);
  90. static void possibly_load_default_database();
  91. int main(int argc, char **argv)
  92. {
  93. program_name = argv[0];
  94. static char stderr_buf[BUFSIZ];
  95. setbuf(stderr, stderr_buf);
  96. outfp = stdout;
  97. int finished_options = 0;
  98. int bib_flag = 0;
  99. int done_spec = 0;
  100. for (--argc, ++argv;
  101. !finished_options && argc > 0 && argv[0][0] == '-'
  102. && argv[0][1] != '\0';
  103. argv++, argc--) {
  104. const char *opt = argv[0] + 1;
  105. while (opt != 0 && *opt != '\0') {
  106. switch (*opt) {
  107. case 'C':
  108. compatible_flag = 1;
  109. opt++;
  110. break;
  111. case 'B':
  112. bib_flag = 1;
  113. label_in_reference = 0;
  114. label_in_text = 0;
  115. ++opt;
  116. if (*opt == '\0') {
  117. annotation_field = 'X';
  118. annotation_macro = "AP";
  119. }
  120. else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
  121. annotation_field = opt[0];
  122. annotation_macro = opt + 2;
  123. }
  124. opt = 0;
  125. break;
  126. case 'P':
  127. move_punctuation = 1;
  128. opt++;
  129. break;
  130. case 'R':
  131. recognize_R1_R2 = 0;
  132. opt++;
  133. break;
  134. case 'S':
  135. // Not a very useful spec.
  136. set_label_spec("(A.n|Q)', '(D.y|D)");
  137. done_spec = 1;
  138. pre_label = " (";
  139. post_label = ")";
  140. sep_label = "; ";
  141. opt++;
  142. break;
  143. case 'V':
  144. verify_flag = 1;
  145. opt++;
  146. break;
  147. case 'f':
  148. {
  149. const char *num = 0;
  150. if (*++opt == '\0') {
  151. if (argc > 1) {
  152. num = *++argv;
  153. --argc;
  154. }
  155. else {
  156. error("option `f' requires an argument");
  157. usage(stderr);
  158. exit(1);
  159. }
  160. }
  161. else {
  162. num = opt;
  163. opt = 0;
  164. }
  165. const char *ptr;
  166. for (ptr = num; *ptr; ptr++)
  167. if (!csdigit(*ptr)) {
  168. error("bad character `%1' in argument to -f option", *ptr);
  169. break;
  170. }
  171. if (*ptr == '\0') {
  172. string spec;
  173. spec = '%';
  174. spec += num;
  175. spec += '\0';
  176. set_label_spec(spec.contents());
  177. done_spec = 1;
  178. }
  179. break;
  180. }
  181. case 'b':
  182. label_in_text = 0;
  183. label_in_reference = 0;
  184. opt++;
  185. break;
  186. case 'e':
  187. accumulate = 1;
  188. opt++;
  189. break;
  190. case 'c':
  191. capitalize_fields = ++opt;
  192. opt = 0;
  193. break;
  194. case 'k':
  195. {
  196. char buf[5];
  197. if (csalpha(*++opt))
  198. buf[0] = *opt++;
  199. else {
  200. if (*opt != '\0')
  201. error("bad field name `%1'", *opt++);
  202. buf[0] = 'L';
  203. }
  204. buf[1] = '~';
  205. buf[2] = '%';
  206. buf[3] = 'a';
  207. buf[4] = '\0';
  208. set_label_spec(buf);
  209. done_spec = 1;
  210. }
  211. break;
  212. case 'a':
  213. {
  214. const char *ptr;
  215. for (ptr = ++opt; *ptr; ptr++)
  216. if (!csdigit(*ptr)) {
  217. error("argument to `a' option not a number");
  218. break;
  219. }
  220. if (*ptr == '\0') {
  221. reverse_fields = 'A';
  222. reverse_fields += opt;
  223. }
  224. opt = 0;
  225. }
  226. break;
  227. case 'i':
  228. linear_ignore_fields = ++opt;
  229. opt = 0;
  230. break;
  231. case 'l':
  232. {
  233. char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
  234. strcpy(buf, "A.n");
  235. if (*++opt != '\0' && *opt != ',') {
  236. char *ptr;
  237. long n = strtol(opt, &ptr, 10);
  238. if (n == 0 && ptr == opt) {
  239. error("bad integer `%1' in `l' option", opt);
  240. opt = 0;
  241. break;
  242. }
  243. if (n < 0)
  244. n = 0;
  245. opt = ptr;
  246. sprintf(strchr(buf, '\0'), "+%ld", n);
  247. }
  248. strcat(buf, "D.y");
  249. if (*opt == ',')
  250. opt++;
  251. if (*opt != '\0') {
  252. char *ptr;
  253. long n = strtol(opt, &ptr, 10);
  254. if (n == 0 && ptr == opt) {
  255. error("bad integer `%1' in `l' option", opt);
  256. opt = 0;
  257. break;
  258. }
  259. if (n < 0)
  260. n = 0;
  261. sprintf(strchr(buf, '\0'), "-%ld", n);
  262. opt = ptr;
  263. if (*opt != '\0')
  264. error("argument to `l' option not of form `m,n'");
  265. }
  266. strcat(buf, "%a");
  267. if (!set_label_spec(buf))
  268. assert(0);
  269. done_spec = 1;
  270. }
  271. break;
  272. case 'n':
  273. search_default = 0;
  274. opt++;
  275. break;
  276. case 'p':
  277. {
  278. const char *filename = 0;
  279. if (*++opt == '\0') {
  280. if (argc > 1) {
  281. filename = *++argv;
  282. argc--;
  283. }
  284. else {
  285. error("option `p' requires an argument");
  286. usage(stderr);
  287. exit(1);
  288. }
  289. }
  290. else {
  291. filename = opt;
  292. opt = 0;
  293. }
  294. database_list.add_file(filename);
  295. }
  296. break;
  297. case 's':
  298. if (*++opt == '\0')
  299. sort_fields = "AD";
  300. else {
  301. sort_fields = opt;
  302. opt = 0;
  303. }
  304. accumulate = 1;
  305. break;
  306. case 't':
  307. {
  308. char *ptr;
  309. long n = strtol(opt, &ptr, 10);
  310. if (n == 0 && ptr == opt) {
  311. error("bad integer `%1' in `t' option", opt);
  312. opt = 0;
  313. break;
  314. }
  315. if (n < 1)
  316. n = 1;
  317. linear_truncate_len = int(n);
  318. opt = ptr;
  319. break;
  320. }
  321. case '-':
  322. if (opt[1] == '\0') {
  323. finished_options = 1;
  324. opt++;
  325. break;
  326. }
  327. if (strcmp(opt,"-version")==0) {
  328. case 'v':
  329. printf("GNU refer (groff) version %s\n", Version_string);
  330. exit(0);
  331. break;
  332. }
  333. if (strcmp(opt,"-help")==0) {
  334. usage(stdout);
  335. exit(0);
  336. break;
  337. }
  338. // fall through
  339. default:
  340. error("unrecognized option `%1'", *opt);
  341. usage(stderr);
  342. exit(1);
  343. break;
  344. }
  345. }
  346. }
  347. if (!done_spec)
  348. set_label_spec("%1");
  349. if (argc <= 0) {
  350. if (bib_flag)
  351. do_bib("-");
  352. else
  353. do_file("-");
  354. }
  355. else {
  356. for (int i = 0; i < argc; i++) {
  357. if (bib_flag)
  358. do_bib(argv[i]);
  359. else
  360. do_file(argv[i]);
  361. }
  362. }
  363. if (accumulate)
  364. output_references();
  365. if (fflush(stdout) < 0)
  366. fatal("output error");
  367. return 0;
  368. }
  369. static void usage(FILE *stream)
  370. {
  371. fprintf(stream,
  372. "usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
  373. " [-sXYZ] [-tN] [-BL.M] [files ...]\n",
  374. program_name);
  375. }
  376. static void possibly_load_default_database()
  377. {
  378. if (search_default && !default_database_loaded) {
  379. char *filename = getenv("REFER");
  380. if (filename)
  381. database_list.add_file(filename);
  382. else
  383. database_list.add_file(DEFAULT_INDEX, 1);
  384. default_database_loaded = 1;
  385. }
  386. }
  387. static int is_list(const string &str)
  388. {
  389. const char *start = str.contents();
  390. const char *end = start + str.length();
  391. while (end > start && csspace(end[-1]))
  392. end--;
  393. while (start < end && csspace(*start))
  394. start++;
  395. return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
  396. }
  397. static void do_file(const char *filename)
  398. {
  399. FILE *fp;
  400. if (strcmp(filename, "-") == 0) {
  401. fp = stdin;
  402. }
  403. else {
  404. errno = 0;
  405. fp = fopen(filename, "r");
  406. if (fp == 0) {
  407. error("can't open `%1': %2", filename, strerror(errno));
  408. return;
  409. }
  410. }
  411. current_filename = filename;
  412. fprintf(outfp, ".lf 1 %s\n", filename);
  413. string line;
  414. current_lineno = 0;
  415. for (;;) {
  416. line.clear();
  417. for (;;) {
  418. int c = getc(fp);
  419. if (c == EOF) {
  420. if (line.length() > 0)
  421. line += '\n';
  422. break;
  423. }
  424. if (invalid_input_char(c))
  425. error("invalid input character code %1", c);
  426. else {
  427. line += c;
  428. if (c == '\n')
  429. break;
  430. }
  431. }
  432. int len = line.length();
  433. if (len == 0)
  434. break;
  435. current_lineno++;
  436. if (len >= 2 && line[0] == '.' && line[1] == '[') {
  437. int start_lineno = current_lineno;
  438. int start_of_line = 1;
  439. string str;
  440. string post;
  441. string pre(line.contents() + 2, line.length() - 3);
  442. for (;;) {
  443. int c = getc(fp);
  444. if (c == EOF) {
  445. error_with_file_and_line(current_filename, start_lineno,
  446. "missing `.]' line");
  447. break;
  448. }
  449. if (start_of_line)
  450. current_lineno++;
  451. if (start_of_line && c == '.') {
  452. int d = getc(fp);
  453. if (d == ']') {
  454. while ((d = getc(fp)) != '\n' && d != EOF) {
  455. if (invalid_input_char(d))
  456. error("invalid input character code %1", d);
  457. else
  458. post += d;
  459. }
  460. break;
  461. }
  462. if (d != EOF)
  463. ungetc(d, fp);
  464. }
  465. if (invalid_input_char(c))
  466. error("invalid input character code %1", c);
  467. else
  468. str += c;
  469. start_of_line = (c == '\n');
  470. }
  471. if (is_list(str)) {
  472. output_pending_line();
  473. if (accumulate)
  474. output_references();
  475. else
  476. error("found `$LIST$' but not accumulating references");
  477. }
  478. else {
  479. unsigned flags = (accumulate
  480. ? store_reference(str)
  481. : immediately_handle_reference(str));
  482. if (label_in_text) {
  483. if (accumulate && outfp == stdout)
  484. divert_to_temporary_file();
  485. if (pending_line.length() == 0) {
  486. warning("can't attach citation to previous line");
  487. }
  488. else
  489. pending_line.set_length(pending_line.length() - 1);
  490. string punct;
  491. if (move_punctuation)
  492. split_punct(pending_line, punct);
  493. int have_text = pre.length() > 0 || post.length() > 0;
  494. label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
  495. |FORCE_RIGHT_BRACKET));
  496. if ((flags & FORCE_LEFT_BRACKET) || !have_text)
  497. pending_line += PRE_LABEL_MARKER;
  498. pending_line += pre;
  499. char lm = LABEL_MARKER + (int)lt;
  500. pending_line += lm;
  501. pending_line += post;
  502. if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
  503. pending_line += POST_LABEL_MARKER;
  504. pending_line += punct;
  505. pending_line += '\n';
  506. }
  507. }
  508. need_syncing = 1;
  509. }
  510. else if (len >= 4
  511. && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
  512. && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
  513. pending_lf_lines += line;
  514. line += '\0';
  515. if (interpret_lf_args(line.contents() + 3))
  516. current_lineno--;
  517. }
  518. else if (recognize_R1_R2
  519. && len >= 4
  520. && line[0] == '.' && line[1] == 'R' && line[2] == '1'
  521. && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
  522. line.clear();
  523. int start_of_line = 1;
  524. int start_lineno = current_lineno;
  525. for (;;) {
  526. int c = getc(fp);
  527. if (c != EOF && start_of_line)
  528. current_lineno++;
  529. if (start_of_line && c == '.') {
  530. c = getc(fp);
  531. if (c == 'R') {
  532. c = getc(fp);
  533. if (c == '2') {
  534. c = getc(fp);
  535. if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
  536. while (c != EOF && c != '\n')
  537. c = getc(fp);
  538. break;
  539. }
  540. else {
  541. line += '.';
  542. line += 'R';
  543. line += '2';
  544. }
  545. }
  546. else {
  547. line += '.';
  548. line += 'R';
  549. }
  550. }
  551. else
  552. line += '.';
  553. }
  554. if (c == EOF) {
  555. error_with_file_and_line(current_filename, start_lineno,
  556. "missing `.R2' line");
  557. break;
  558. }
  559. if (invalid_input_char(c))
  560. error("invalid input character code %1", int(c));
  561. else {
  562. line += c;
  563. start_of_line = c == '\n';
  564. }
  565. }
  566. output_pending_line();
  567. if (accumulate)
  568. output_references();
  569. else
  570. nreferences = 0;
  571. process_commands(line, current_filename, start_lineno + 1);
  572. need_syncing = 1;
  573. }
  574. else {
  575. output_pending_line();
  576. pending_line = line;
  577. }
  578. }
  579. need_syncing = 0;
  580. output_pending_line();
  581. if (fp != stdin)
  582. fclose(fp);
  583. }
  584. class label_processing_state {
  585. enum {
  586. NORMAL,
  587. PENDING_LABEL,
  588. PENDING_LABEL_POST,
  589. PENDING_LABEL_POST_PRE,
  590. PENDING_POST
  591. } state;
  592. label_type type; // type of pending labels
  593. int count; // number of pending labels
  594. reference **rptr; // pointer to next reference
  595. int rcount; // number of references left
  596. FILE *fp;
  597. int handle_pending(int c);
  598. public:
  599. label_processing_state(reference **, int, FILE *);
  600. ~label_processing_state();
  601. void process(int c);
  602. };
  603. static void output_pending_line()
  604. {
  605. if (label_in_text && !accumulate && ncitations > 0) {
  606. label_processing_state state(citation, ncitations, outfp);
  607. int len = pending_line.length();
  608. for (int i = 0; i < len; i++)
  609. state.process((unsigned char)(pending_line[i]));
  610. }
  611. else
  612. put_string(pending_line, outfp);
  613. pending_line.clear();
  614. if (pending_lf_lines.length() > 0) {
  615. put_string(pending_lf_lines, outfp);
  616. pending_lf_lines.clear();
  617. }
  618. if (!accumulate)
  619. immediately_output_references();
  620. if (need_syncing) {
  621. fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
  622. need_syncing = 0;
  623. }
  624. }
  625. static void split_punct(string &line, string &punct)
  626. {
  627. const char *start = line.contents();
  628. const char *end = start + line.length();
  629. const char *ptr = start;
  630. const char *last_token_start = 0;
  631. for (;;) {
  632. if (ptr >= end)
  633. break;
  634. last_token_start = ptr;
  635. if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
  636. || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
  637. ptr++;
  638. else if (!get_token(&ptr, end))
  639. break;
  640. }
  641. if (last_token_start) {
  642. const token_info *ti = lookup_token(last_token_start, end);
  643. if (ti->is_punct()) {
  644. punct.append(last_token_start, end - last_token_start);
  645. line.set_length(last_token_start - start);
  646. }
  647. }
  648. }
  649. static void divert_to_temporary_file()
  650. {
  651. outfp = xtmpfile();
  652. }
  653. static void store_citation(reference *ref)
  654. {
  655. if (ncitations >= citation_max) {
  656. if (citation == 0)
  657. citation = new reference*[citation_max = 100];
  658. else {
  659. reference **old_citation = citation;
  660. citation_max *= 2;
  661. citation = new reference *[citation_max];
  662. memcpy(citation, old_citation, ncitations*sizeof(reference *));
  663. a_delete old_citation;
  664. }
  665. }
  666. citation[ncitations++] = ref;
  667. }
  668. static unsigned store_reference(const string &str)
  669. {
  670. if (reference_hash_table == 0) {
  671. reference_hash_table = new reference *[17];
  672. hash_table_size = 17;
  673. for (int i = 0; i < hash_table_size; i++)
  674. reference_hash_table[i] = 0;
  675. }
  676. unsigned flags;
  677. reference *ref = make_reference(str, &flags);
  678. ref->compute_hash_code();
  679. unsigned h = ref->hash();
  680. reference **ptr;
  681. for (ptr = reference_hash_table + (h % hash_table_size);
  682. *ptr != 0;
  683. ((ptr == reference_hash_table)
  684. ? (ptr = reference_hash_table + hash_table_size - 1)
  685. : --ptr))
  686. if (same_reference(**ptr, *ref))
  687. break;
  688. if (*ptr != 0) {
  689. if (ref->is_merged())
  690. warning("fields ignored because reference already used");
  691. delete ref;
  692. ref = *ptr;
  693. }
  694. else {
  695. *ptr = ref;
  696. ref->set_number(nreferences);
  697. nreferences++;
  698. ref->pre_compute_label();
  699. ref->compute_sort_key();
  700. if (nreferences*2 >= hash_table_size) {
  701. // Rehash it.
  702. reference **old_table = reference_hash_table;
  703. int old_size = hash_table_size;
  704. hash_table_size = next_size(hash_table_size);
  705. reference_hash_table = new reference*[hash_table_size];
  706. int i;
  707. for (i = 0; i < hash_table_size; i++)
  708. reference_hash_table[i] = 0;
  709. for (i = 0; i < old_size; i++)
  710. if (old_table[i]) {
  711. reference **p;
  712. for (p = (reference_hash_table
  713. + (old_table[i]->hash() % hash_table_size));
  714. *p;
  715. ((p == reference_hash_table)
  716. ? (p = reference_hash_table + hash_table_size - 1)
  717. : --p))
  718. ;
  719. *p = old_table[i];
  720. }
  721. a_delete old_table;
  722. }
  723. }
  724. if (label_in_text)
  725. store_citation(ref);
  726. return flags;
  727. }
  728. unsigned immediately_handle_reference(const string &str)
  729. {
  730. unsigned flags;
  731. reference *ref = make_reference(str, &flags);
  732. ref->set_number(nreferences);
  733. if (label_in_text || label_in_reference) {
  734. ref->pre_compute_label();
  735. ref->immediate_compute_label();
  736. }
  737. nreferences++;
  738. store_citation(ref);
  739. return flags;
  740. }
  741. static void immediately_output_references()
  742. {
  743. for (int i = 0; i < ncitations; i++) {
  744. reference *ref = citation[i];
  745. if (label_in_reference) {
  746. fputs(".ds [F ", outfp);
  747. const string &label = ref->get_label(NORMAL_LABEL);
  748. if (label.length() > 0
  749. && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
  750. putc('"', outfp);
  751. put_string(label, outfp);
  752. putc('\n', outfp);
  753. }
  754. ref->output(outfp);
  755. delete ref;
  756. }
  757. ncitations = 0;
  758. }
  759. static void output_citation_group(reference **v, int n, label_type type,
  760. FILE *fp)
  761. {
  762. if (sort_adjacent_labels) {
  763. // Do an insertion sort. Usually n will be very small.
  764. for (int i = 1; i < n; i++) {
  765. int num = v[i]->get_number();
  766. reference *temp = v[i];
  767. int j;
  768. for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
  769. v[j + 1] = v[j];
  770. v[j + 1] = temp;
  771. }
  772. }
  773. // This messes up if !accumulate.
  774. if (accumulate && n > 1) {
  775. // remove duplicates
  776. int j = 1;
  777. for (int i = 1; i < n; i++)
  778. if (v[i]->get_label(type) != v[i - 1]->get_label(type))
  779. v[j++] = v[i];
  780. n = j;
  781. }
  782. string merged_label;
  783. for (int i = 0; i < n; i++) {
  784. int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
  785. if (nmerged > 0) {
  786. put_string(merged_label, fp);
  787. i += nmerged;
  788. }
  789. else
  790. put_string(v[i]->get_label(type), fp);
  791. if (i < n - 1)
  792. put_string(sep_label, fp);
  793. }
  794. }
  795. label_processing_state::label_processing_state(reference **p, int n, FILE *f)
  796. : state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
  797. {
  798. }
  799. label_processing_state::~label_processing_state()
  800. {
  801. int handled = handle_pending(EOF);
  802. assert(!handled);
  803. assert(rcount == 0);
  804. }
  805. int label_processing_state::handle_pending(int c)
  806. {
  807. switch (state) {
  808. case NORMAL:
  809. break;
  810. case PENDING_LABEL:
  811. if (c == POST_LABEL_MARKER) {
  812. state = PENDING_LABEL_POST;
  813. return 1;
  814. }
  815. else {
  816. output_citation_group(rptr, count, type, fp);
  817. rptr += count ;
  818. rcount -= count;
  819. state = NORMAL;
  820. }
  821. break;
  822. case PENDING_LABEL_POST:
  823. if (c == PRE_LABEL_MARKER) {
  824. state = PENDING_LABEL_POST_PRE;
  825. return 1;
  826. }
  827. else {
  828. output_citation_group(rptr, count, type, fp);
  829. rptr += count;
  830. rcount -= count;
  831. put_string(post_label, fp);
  832. state = NORMAL;
  833. }
  834. break;
  835. case PENDING_LABEL_POST_PRE:
  836. if (c >= LABEL_MARKER
  837. && c < LABEL_MARKER + N_LABEL_TYPES
  838. && c - LABEL_MARKER == type) {
  839. count += 1;
  840. state = PENDING_LABEL;
  841. return 1;
  842. }
  843. else {
  844. output_citation_group(rptr, count, type, fp);
  845. rptr += count;
  846. rcount -= count;
  847. put_string(sep_label, fp);
  848. state = NORMAL;
  849. }
  850. break;
  851. case PENDING_POST:
  852. if (c == PRE_LABEL_MARKER) {
  853. put_string(sep_label, fp);
  854. state = NORMAL;
  855. return 1;
  856. }
  857. else {
  858. put_string(post_label, fp);
  859. state = NORMAL;
  860. }
  861. break;
  862. }
  863. return 0;
  864. }
  865. void label_processing_state::process(int c)
  866. {
  867. if (handle_pending(c))
  868. return;
  869. assert(state == NORMAL);
  870. switch (c) {
  871. case PRE_LABEL_MARKER:
  872. put_string(pre_label, fp);
  873. state = NORMAL;
  874. break;
  875. case POST_LABEL_MARKER:
  876. state = PENDING_POST;
  877. break;
  878. case LABEL_MARKER:
  879. case LABEL_MARKER + 1:
  880. count = 1;
  881. state = PENDING_LABEL;
  882. type = label_type(c - LABEL_MARKER);
  883. break;
  884. default:
  885. state = NORMAL;
  886. putc(c, fp);
  887. break;
  888. }
  889. }
  890. extern "C" {
  891. int rcompare(const void *p1, const void *p2)
  892. {
  893. return compare_reference(**(reference **)p1, **(reference **)p2);
  894. }
  895. }
  896. void output_references()
  897. {
  898. assert(accumulate);
  899. if (!hash_table_size) {
  900. error("nothing to reference (probably `bibliography' before `sort')");
  901. accumulate = 0;
  902. nreferences = 0;
  903. return;
  904. }
  905. if (nreferences > 0) {
  906. int j = 0;
  907. int i;
  908. for (i = 0; i < hash_table_size; i++)
  909. if (reference_hash_table[i] != 0)
  910. reference_hash_table[j++] = reference_hash_table[i];
  911. assert(j == nreferences);
  912. for (; j < hash_table_size; j++)
  913. reference_hash_table[j] = 0;
  914. qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
  915. for (i = 0; i < nreferences; i++)
  916. reference_hash_table[i]->set_number(i);
  917. compute_labels(reference_hash_table, nreferences);
  918. }
  919. if (outfp != stdout) {
  920. rewind(outfp);
  921. {
  922. label_processing_state state(citation, ncitations, stdout);
  923. int c;
  924. while ((c = getc(outfp)) != EOF)
  925. state.process(c);
  926. }
  927. ncitations = 0;
  928. fclose(outfp);
  929. outfp = stdout;
  930. }
  931. if (nreferences > 0) {
  932. fputs(".]<\n", outfp);
  933. for (int i = 0; i < nreferences; i++) {
  934. if (sort_fields.length() > 0)
  935. reference_hash_table[i]->print_sort_key_comment(outfp);
  936. if (label_in_reference) {
  937. fputs(".ds [F ", outfp);
  938. const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
  939. if (label.length() > 0
  940. && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
  941. putc('"', outfp);
  942. put_string(label, outfp);
  943. putc('\n', outfp);
  944. }
  945. reference_hash_table[i]->output(outfp);
  946. delete reference_hash_table[i];
  947. reference_hash_table[i] = 0;
  948. }
  949. fputs(".]>\n", outfp);
  950. nreferences = 0;
  951. }
  952. clear_labels();
  953. }
  954. static reference *find_reference(const char *query, int query_len)
  955. {
  956. // This is so that error messages look better.
  957. while (query_len > 0 && csspace(query[query_len - 1]))
  958. query_len--;
  959. string str;
  960. for (int i = 0; i < query_len; i++)
  961. str += query[i] == '\n' ? ' ' : query[i];
  962. str += '\0';
  963. possibly_load_default_database();
  964. search_list_iterator iter(&database_list, str.contents());
  965. reference_id rid;
  966. const char *start;
  967. int len;
  968. if (!iter.next(&start, &len, &rid)) {
  969. error("no matches for `%1'", str.contents());
  970. return 0;
  971. }
  972. const char *end = start + len;
  973. while (start < end) {
  974. if (*start == '%')
  975. break;
  976. while (start < end && *start++ != '\n')
  977. ;
  978. }
  979. if (start >= end) {
  980. error("found a reference for `%1' but it didn't contain any fields",
  981. str.contents());
  982. return 0;
  983. }
  984. reference *result = new reference(start, end - start, &rid);
  985. if (iter.next(&start, &len, &rid))
  986. warning("multiple matches for `%1'", str.contents());
  987. return result;
  988. }
  989. static reference *make_reference(const string &str, unsigned *flagsp)
  990. {
  991. const char *start = str.contents();
  992. const char *end = start + str.length();
  993. const char *ptr = start;
  994. while (ptr < end) {
  995. if (*ptr == '%')
  996. break;
  997. while (ptr < end && *ptr++ != '\n')
  998. ;
  999. }
  1000. *flagsp = 0;
  1001. for (; start < ptr; start++) {
  1002. if (*start == '#')
  1003. *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
  1004. | FORCE_LEFT_BRACKET)));
  1005. else if (*start == '[')
  1006. *flagsp |= FORCE_LEFT_BRACKET;
  1007. else if (*start == ']')
  1008. *flagsp |= FORCE_RIGHT_BRACKET;
  1009. else if (!csspace(*start))
  1010. break;
  1011. }
  1012. if (start >= end) {
  1013. error("empty reference");
  1014. return new reference;
  1015. }
  1016. reference *database_ref = 0;
  1017. if (start < ptr)
  1018. database_ref = find_reference(start, ptr - start);
  1019. reference *inline_ref = 0;
  1020. if (ptr < end)
  1021. inline_ref = new reference(ptr, end - ptr);
  1022. if (inline_ref) {
  1023. if (database_ref) {
  1024. database_ref->merge(*inline_ref);
  1025. delete inline_ref;
  1026. return database_ref;
  1027. }
  1028. else
  1029. return inline_ref;
  1030. }
  1031. else if (database_ref)
  1032. return database_ref;
  1033. else
  1034. return new reference;
  1035. }
  1036. static void do_ref(const string &str)
  1037. {
  1038. if (accumulate)
  1039. (void)store_reference(str);
  1040. else {
  1041. (void)immediately_handle_reference(str);
  1042. immediately_output_references();
  1043. }
  1044. }
  1045. static void trim_blanks(string &str)
  1046. {
  1047. const char *start = str.contents();
  1048. const char *end = start + str.length();
  1049. while (end > start && end[-1] != '\n' && csspace(end[-1]))
  1050. --end;
  1051. str.set_length(end - start);
  1052. }
  1053. void do_bib(const char *filename)
  1054. {
  1055. FILE *fp;
  1056. if (strcmp(filename, "-") == 0)
  1057. fp = stdin;
  1058. else {
  1059. errno = 0;
  1060. fp = fopen(filename, "r");
  1061. if (fp == 0) {
  1062. error("can't open `%1': %2", filename, strerror(errno));
  1063. return;
  1064. }
  1065. current_filename = filename;
  1066. }
  1067. enum {
  1068. START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
  1069. } state = START;
  1070. string body;
  1071. for (;;) {
  1072. int c = getc(fp);
  1073. if (c == EOF)
  1074. break;
  1075. if (invalid_input_char(c)) {
  1076. error("invalid input character code %1", c);
  1077. continue;
  1078. }
  1079. switch (state) {
  1080. case START:
  1081. if (c == '%') {
  1082. body = c;
  1083. state = BODY;
  1084. }
  1085. else if (c != '\n')
  1086. state = MIDDLE;
  1087. break;
  1088. case MIDDLE:
  1089. if (c == '\n')
  1090. state = START;
  1091. break;
  1092. case BODY:
  1093. body += c;
  1094. if (c == '\n')
  1095. state = BODY_START;
  1096. break;
  1097. case BODY_START:
  1098. if (c == '\n') {
  1099. do_ref(body);
  1100. state = START;
  1101. }
  1102. else if (c == '.')
  1103. state = BODY_DOT;
  1104. else if (csspace(c)) {
  1105. state = BODY_BLANK;
  1106. body += c;
  1107. }
  1108. else {
  1109. body += c;
  1110. state = BODY;
  1111. }
  1112. break;
  1113. case BODY_BLANK:
  1114. if (c == '\n') {
  1115. trim_blanks(body);
  1116. do_ref(body);
  1117. state = START;
  1118. }
  1119. else if (csspace(c))
  1120. body += c;
  1121. else {
  1122. body += c;
  1123. state = BODY;
  1124. }
  1125. break;
  1126. case BODY_DOT:
  1127. if (c == ']') {
  1128. do_ref(body);
  1129. state = MIDDLE;
  1130. }
  1131. else {
  1132. body += '.';
  1133. body += c;
  1134. state = c == '\n' ? BODY_START : BODY;
  1135. }
  1136. break;
  1137. default:
  1138. assert(0);
  1139. }
  1140. if (c == '\n')
  1141. current_lineno++;
  1142. }
  1143. switch (state) {
  1144. case START:
  1145. case MIDDLE:
  1146. break;
  1147. case BODY:
  1148. body += '\n';
  1149. do_ref(body);
  1150. break;
  1151. case BODY_DOT:
  1152. case BODY_START:
  1153. do_ref(body);
  1154. break;
  1155. case BODY_BLANK:
  1156. trim_blanks(body);
  1157. do_ref(body);
  1158. break;
  1159. }
  1160. fclose(fp);
  1161. }
  1162. // from the Dragon Book
  1163. unsigned hash_string(const char *s, int len)
  1164. {
  1165. const char *end = s + len;
  1166. unsigned h = 0, g;
  1167. while (s < end) {
  1168. h <<= 4;
  1169. h += *s++;
  1170. if ((g = h & 0xf0000000) != 0) {
  1171. h ^= g >> 24;
  1172. h ^= g;
  1173. }
  1174. }
  1175. return h;
  1176. }
  1177. int next_size(int n)
  1178. {
  1179. static const int table_sizes[] = {
  1180. 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
  1181. 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
  1182. 16000057, 32000011, 64000031, 128000003, 0
  1183. };
  1184. const int *p;
  1185. for (p = table_sizes; *p <= n && *p != 0; p++)
  1186. ;
  1187. assert(*p != 0);
  1188. return *p;
  1189. }