/contrib/groff/src/preproc/refer/command.cpp

https://bitbucket.org/freebsd/freebsd-head/ · C++ · 809 lines · 757 code · 21 blank · 31 comment · 85 complexity · 7c2b1970ca058f50163fbe35dda22c23 MD5 · raw file

  1. // -*- C++ -*-
  2. /* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2002, 2004
  3. Free Software Foundation, Inc.
  4. Written by James Clark (jjc@jclark.com)
  5. This file is part of groff.
  6. groff is free software; you can redistribute it and/or modify it under
  7. the terms of the GNU General Public License as published by the Free
  8. Software Foundation; either version 2, or (at your option) any later
  9. version.
  10. groff is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  13. for more details.
  14. You should have received a copy of the GNU General Public License along
  15. with groff; see the file COPYING. If not, write to the Free Software
  16. Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
  17. #include "refer.h"
  18. #include "refid.h"
  19. #include "search.h"
  20. #include "command.h"
  21. cset cs_field_name = csalpha;
  22. class input_item {
  23. input_item *next;
  24. char *filename;
  25. int first_lineno;
  26. string buffer;
  27. const char *ptr;
  28. const char *end;
  29. public:
  30. input_item(string &, const char *, int = 1);
  31. ~input_item();
  32. int get_char();
  33. int peek_char();
  34. void skip_char();
  35. int get_location(const char **, int *);
  36. friend class input_stack;
  37. };
  38. input_item::input_item(string &s, const char *fn, int ln)
  39. : filename(strsave(fn)), first_lineno(ln)
  40. {
  41. buffer.move(s);
  42. ptr = buffer.contents();
  43. end = ptr + buffer.length();
  44. }
  45. input_item::~input_item()
  46. {
  47. a_delete filename;
  48. }
  49. inline int input_item::peek_char()
  50. {
  51. if (ptr >= end)
  52. return EOF;
  53. else
  54. return (unsigned char)*ptr;
  55. }
  56. inline int input_item::get_char()
  57. {
  58. if (ptr >= end)
  59. return EOF;
  60. else
  61. return (unsigned char)*ptr++;
  62. }
  63. inline void input_item::skip_char()
  64. {
  65. ptr++;
  66. }
  67. int input_item::get_location(const char **filenamep, int *linenop)
  68. {
  69. *filenamep = filename;
  70. if (ptr == buffer.contents())
  71. *linenop = first_lineno;
  72. else {
  73. int ln = first_lineno;
  74. const char *e = ptr - 1;
  75. for (const char *p = buffer.contents(); p < e; p++)
  76. if (*p == '\n')
  77. ln++;
  78. *linenop = ln;
  79. }
  80. return 1;
  81. }
  82. class input_stack {
  83. static input_item *top;
  84. public:
  85. static void init();
  86. static int get_char();
  87. static int peek_char();
  88. static void skip_char() { top->skip_char(); }
  89. static void push_file(const char *);
  90. static void push_string(string &, const char *, int);
  91. static void error(const char *format,
  92. const errarg &arg1 = empty_errarg,
  93. const errarg &arg2 = empty_errarg,
  94. const errarg &arg3 = empty_errarg);
  95. };
  96. input_item *input_stack::top = 0;
  97. void input_stack::init()
  98. {
  99. while (top) {
  100. input_item *tem = top;
  101. top = top->next;
  102. delete tem;
  103. }
  104. }
  105. int input_stack::get_char()
  106. {
  107. while (top) {
  108. int c = top->get_char();
  109. if (c >= 0)
  110. return c;
  111. input_item *tem = top;
  112. top = top->next;
  113. delete tem;
  114. }
  115. return -1;
  116. }
  117. int input_stack::peek_char()
  118. {
  119. while (top) {
  120. int c = top->peek_char();
  121. if (c >= 0)
  122. return c;
  123. input_item *tem = top;
  124. top = top->next;
  125. delete tem;
  126. }
  127. return -1;
  128. }
  129. void input_stack::push_file(const char *fn)
  130. {
  131. FILE *fp;
  132. if (strcmp(fn, "-") == 0) {
  133. fp = stdin;
  134. fn = "<standard input>";
  135. }
  136. else {
  137. errno = 0;
  138. fp = fopen(fn, "r");
  139. if (fp == 0) {
  140. error("can't open `%1': %2", fn, strerror(errno));
  141. return;
  142. }
  143. }
  144. string buf;
  145. int bol = 1;
  146. int lineno = 1;
  147. for (;;) {
  148. int c = getc(fp);
  149. if (bol && c == '.') {
  150. // replace lines beginning with .R1 or .R2 with a blank line
  151. c = getc(fp);
  152. if (c == 'R') {
  153. c = getc(fp);
  154. if (c == '1' || c == '2') {
  155. int cc = c;
  156. c = getc(fp);
  157. if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
  158. while (c != '\n' && c != EOF)
  159. c = getc(fp);
  160. }
  161. else {
  162. buf += '.';
  163. buf += 'R';
  164. buf += cc;
  165. }
  166. }
  167. else {
  168. buf += '.';
  169. buf += 'R';
  170. }
  171. }
  172. else
  173. buf += '.';
  174. }
  175. if (c == EOF)
  176. break;
  177. if (invalid_input_char(c))
  178. error_with_file_and_line(fn, lineno,
  179. "invalid input character code %1", int(c));
  180. else {
  181. buf += c;
  182. if (c == '\n') {
  183. bol = 1;
  184. lineno++;
  185. }
  186. else
  187. bol = 0;
  188. }
  189. }
  190. if (fp != stdin)
  191. fclose(fp);
  192. if (buf.length() > 0 && buf[buf.length() - 1] != '\n')
  193. buf += '\n';
  194. input_item *it = new input_item(buf, fn);
  195. it->next = top;
  196. top = it;
  197. }
  198. void input_stack::push_string(string &s, const char *filename, int lineno)
  199. {
  200. input_item *it = new input_item(s, filename, lineno);
  201. it->next = top;
  202. top = it;
  203. }
  204. void input_stack::error(const char *format, const errarg &arg1,
  205. const errarg &arg2, const errarg &arg3)
  206. {
  207. const char *filename;
  208. int lineno;
  209. for (input_item *it = top; it; it = it->next)
  210. if (it->get_location(&filename, &lineno)) {
  211. error_with_file_and_line(filename, lineno, format, arg1, arg2, arg3);
  212. return;
  213. }
  214. ::error(format, arg1, arg2, arg3);
  215. }
  216. void command_error(const char *format, const errarg &arg1,
  217. const errarg &arg2, const errarg &arg3)
  218. {
  219. input_stack::error(format, arg1, arg2, arg3);
  220. }
  221. // # not recognized in ""
  222. // \<newline> is recognized in ""
  223. // # does not conceal newline
  224. // if missing closing quote, word extends to end of line
  225. // no special treatment of \ other than before newline
  226. // \<newline> not recognized after #
  227. // ; allowed as alternative to newline
  228. // ; not recognized in ""
  229. // don't clear word_buffer; just append on
  230. // return -1 for EOF, 0 for newline, 1 for word
  231. int get_word(string &word_buffer)
  232. {
  233. int c = input_stack::get_char();
  234. for (;;) {
  235. if (c == '#') {
  236. do {
  237. c = input_stack::get_char();
  238. } while (c != '\n' && c != EOF);
  239. break;
  240. }
  241. if (c == '\\' && input_stack::peek_char() == '\n')
  242. input_stack::skip_char();
  243. else if (c != ' ' && c != '\t')
  244. break;
  245. c = input_stack::get_char();
  246. }
  247. if (c == EOF)
  248. return -1;
  249. if (c == '\n' || c == ';')
  250. return 0;
  251. if (c == '"') {
  252. for (;;) {
  253. c = input_stack::peek_char();
  254. if (c == EOF || c == '\n')
  255. break;
  256. input_stack::skip_char();
  257. if (c == '"') {
  258. int d = input_stack::peek_char();
  259. if (d == '"')
  260. input_stack::skip_char();
  261. else
  262. break;
  263. }
  264. else if (c == '\\') {
  265. int d = input_stack::peek_char();
  266. if (d == '\n')
  267. input_stack::skip_char();
  268. else
  269. word_buffer += '\\';
  270. }
  271. else
  272. word_buffer += c;
  273. }
  274. return 1;
  275. }
  276. word_buffer += c;
  277. for (;;) {
  278. c = input_stack::peek_char();
  279. if (c == ' ' || c == '\t' || c == '\n' || c == '#' || c == ';')
  280. break;
  281. input_stack::skip_char();
  282. if (c == '\\') {
  283. int d = input_stack::peek_char();
  284. if (d == '\n')
  285. input_stack::skip_char();
  286. else
  287. word_buffer += '\\';
  288. }
  289. else
  290. word_buffer += c;
  291. }
  292. return 1;
  293. }
  294. union argument {
  295. const char *s;
  296. int n;
  297. };
  298. // This is for debugging.
  299. static void echo_command(int argc, argument *argv)
  300. {
  301. for (int i = 0; i < argc; i++)
  302. fprintf(stderr, "%s\n", argv[i].s);
  303. }
  304. static void include_command(int argc, argument *argv)
  305. {
  306. assert(argc == 1);
  307. input_stack::push_file(argv[0].s);
  308. }
  309. static void capitalize_command(int argc, argument *argv)
  310. {
  311. if (argc > 0)
  312. capitalize_fields = argv[0].s;
  313. else
  314. capitalize_fields.clear();
  315. }
  316. static void accumulate_command(int, argument *)
  317. {
  318. accumulate = 1;
  319. }
  320. static void no_accumulate_command(int, argument *)
  321. {
  322. accumulate = 0;
  323. }
  324. static void move_punctuation_command(int, argument *)
  325. {
  326. move_punctuation = 1;
  327. }
  328. static void no_move_punctuation_command(int, argument *)
  329. {
  330. move_punctuation = 0;
  331. }
  332. static void sort_command(int argc, argument *argv)
  333. {
  334. if (argc == 0)
  335. sort_fields = "AD";
  336. else
  337. sort_fields = argv[0].s;
  338. accumulate = 1;
  339. }
  340. static void no_sort_command(int, argument *)
  341. {
  342. sort_fields.clear();
  343. }
  344. static void articles_command(int argc, argument *argv)
  345. {
  346. articles.clear();
  347. int i;
  348. for (i = 0; i < argc; i++) {
  349. articles += argv[i].s;
  350. articles += '\0';
  351. }
  352. int len = articles.length();
  353. for (i = 0; i < len; i++)
  354. articles[i] = cmlower(articles[i]);
  355. }
  356. static void database_command(int argc, argument *argv)
  357. {
  358. for (int i = 0; i < argc; i++)
  359. database_list.add_file(argv[i].s);
  360. }
  361. static void default_database_command(int, argument *)
  362. {
  363. search_default = 1;
  364. }
  365. static void no_default_database_command(int, argument *)
  366. {
  367. search_default = 0;
  368. }
  369. static void bibliography_command(int argc, argument *argv)
  370. {
  371. const char *saved_filename = current_filename;
  372. int saved_lineno = current_lineno;
  373. int saved_label_in_text = label_in_text;
  374. label_in_text = 0;
  375. if (!accumulate)
  376. fputs(".]<\n", stdout);
  377. for (int i = 0; i < argc; i++)
  378. do_bib(argv[i].s);
  379. if (accumulate)
  380. output_references();
  381. else
  382. fputs(".]>\n", stdout);
  383. current_filename = saved_filename;
  384. current_lineno = saved_lineno;
  385. label_in_text = saved_label_in_text;
  386. }
  387. static void annotate_command(int argc, argument *argv)
  388. {
  389. if (argc > 0)
  390. annotation_field = argv[0].s[0];
  391. else
  392. annotation_field = 'X';
  393. if (argc == 2)
  394. annotation_macro = argv[1].s;
  395. else
  396. annotation_macro = "AP";
  397. }
  398. static void no_annotate_command(int, argument *)
  399. {
  400. annotation_macro.clear();
  401. annotation_field = -1;
  402. }
  403. static void reverse_command(int, argument *argv)
  404. {
  405. reverse_fields = argv[0].s;
  406. }
  407. static void no_reverse_command(int, argument *)
  408. {
  409. reverse_fields.clear();
  410. }
  411. static void abbreviate_command(int argc, argument *argv)
  412. {
  413. abbreviate_fields = argv[0].s;
  414. period_before_initial = argc > 1 ? argv[1].s : ". ";
  415. period_before_last_name = argc > 2 ? argv[2].s : ". ";
  416. period_before_other = argc > 3 ? argv[3].s : ". ";
  417. period_before_hyphen = argc > 4 ? argv[4].s : ".";
  418. }
  419. static void no_abbreviate_command(int, argument *)
  420. {
  421. abbreviate_fields.clear();
  422. }
  423. string search_ignore_fields;
  424. static void search_ignore_command(int argc, argument *argv)
  425. {
  426. if (argc > 0)
  427. search_ignore_fields = argv[0].s;
  428. else
  429. search_ignore_fields = "XYZ";
  430. search_ignore_fields += '\0';
  431. linear_ignore_fields = search_ignore_fields.contents();
  432. }
  433. static void no_search_ignore_command(int, argument *)
  434. {
  435. linear_ignore_fields = "";
  436. }
  437. static void search_truncate_command(int argc, argument *argv)
  438. {
  439. if (argc > 0)
  440. linear_truncate_len = argv[0].n;
  441. else
  442. linear_truncate_len = 6;
  443. }
  444. static void no_search_truncate_command(int, argument *)
  445. {
  446. linear_truncate_len = -1;
  447. }
  448. static void discard_command(int argc, argument *argv)
  449. {
  450. if (argc == 0)
  451. discard_fields = "XYZ";
  452. else
  453. discard_fields = argv[0].s;
  454. accumulate = 1;
  455. }
  456. static void no_discard_command(int, argument *)
  457. {
  458. discard_fields.clear();
  459. }
  460. static void label_command(int, argument *argv)
  461. {
  462. set_label_spec(argv[0].s);
  463. }
  464. static void abbreviate_label_ranges_command(int argc, argument *argv)
  465. {
  466. abbreviate_label_ranges = 1;
  467. label_range_indicator = argc > 0 ? argv[0].s : "-";
  468. }
  469. static void no_abbreviate_label_ranges_command(int, argument *)
  470. {
  471. abbreviate_label_ranges = 0;
  472. }
  473. static void label_in_reference_command(int, argument *)
  474. {
  475. label_in_reference = 1;
  476. }
  477. static void no_label_in_reference_command(int, argument *)
  478. {
  479. label_in_reference = 0;
  480. }
  481. static void label_in_text_command(int, argument *)
  482. {
  483. label_in_text = 1;
  484. }
  485. static void no_label_in_text_command(int, argument *)
  486. {
  487. label_in_text = 0;
  488. }
  489. static void sort_adjacent_labels_command(int, argument *)
  490. {
  491. sort_adjacent_labels = 1;
  492. }
  493. static void no_sort_adjacent_labels_command(int, argument *)
  494. {
  495. sort_adjacent_labels = 0;
  496. }
  497. static void date_as_label_command(int argc, argument *argv)
  498. {
  499. if (set_date_label_spec(argc > 0 ? argv[0].s : "D%a*"))
  500. date_as_label = 1;
  501. }
  502. static void no_date_as_label_command(int, argument *)
  503. {
  504. date_as_label = 0;
  505. }
  506. static void short_label_command(int, argument *argv)
  507. {
  508. if (set_short_label_spec(argv[0].s))
  509. short_label_flag = 1;
  510. }
  511. static void no_short_label_command(int, argument *)
  512. {
  513. short_label_flag = 0;
  514. }
  515. static void compatible_command(int, argument *)
  516. {
  517. compatible_flag = 1;
  518. }
  519. static void no_compatible_command(int, argument *)
  520. {
  521. compatible_flag = 0;
  522. }
  523. static void join_authors_command(int argc, argument *argv)
  524. {
  525. join_authors_exactly_two = argv[0].s;
  526. join_authors_default = argc > 1 ? argv[1].s : argv[0].s;
  527. join_authors_last_two = argc == 3 ? argv[2].s : argv[0].s;
  528. }
  529. static void bracket_label_command(int, argument *argv)
  530. {
  531. pre_label = argv[0].s;
  532. post_label = argv[1].s;
  533. sep_label = argv[2].s;
  534. }
  535. static void separate_label_second_parts_command(int, argument *argv)
  536. {
  537. separate_label_second_parts = argv[0].s;
  538. }
  539. static void et_al_command(int argc, argument *argv)
  540. {
  541. et_al = argv[0].s;
  542. et_al_min_elide = argv[1].n;
  543. if (et_al_min_elide < 1)
  544. et_al_min_elide = 1;
  545. et_al_min_total = argc >= 3 ? argv[2].n : 0;
  546. }
  547. static void no_et_al_command(int, argument *)
  548. {
  549. et_al.clear();
  550. et_al_min_elide = 0;
  551. }
  552. typedef void (*command_t)(int, argument *);
  553. /* arg_types is a string describing the numbers and types of arguments.
  554. s means a string, i means an integer, f is a list of fields, F is
  555. a single field,
  556. ? means that the previous argument is optional, * means that the
  557. previous argument can occur any number of times. */
  558. struct S {
  559. const char *name;
  560. command_t func;
  561. const char *arg_types;
  562. } command_table[] = {
  563. { "include", include_command, "s" },
  564. { "echo", echo_command, "s*" },
  565. { "capitalize", capitalize_command, "f?" },
  566. { "accumulate", accumulate_command, "" },
  567. { "no-accumulate", no_accumulate_command, "" },
  568. { "move-punctuation", move_punctuation_command, "" },
  569. { "no-move-punctuation", no_move_punctuation_command, "" },
  570. { "sort", sort_command, "s?" },
  571. { "no-sort", no_sort_command, "" },
  572. { "articles", articles_command, "s*" },
  573. { "database", database_command, "ss*" },
  574. { "default-database", default_database_command, "" },
  575. { "no-default-database", no_default_database_command, "" },
  576. { "bibliography", bibliography_command, "ss*" },
  577. { "annotate", annotate_command, "F?s?" },
  578. { "no-annotate", no_annotate_command, "" },
  579. { "reverse", reverse_command, "s" },
  580. { "no-reverse", no_reverse_command, "" },
  581. { "abbreviate", abbreviate_command, "ss?s?s?s?" },
  582. { "no-abbreviate", no_abbreviate_command, "" },
  583. { "search-ignore", search_ignore_command, "f?" },
  584. { "no-search-ignore", no_search_ignore_command, "" },
  585. { "search-truncate", search_truncate_command, "i?" },
  586. { "no-search-truncate", no_search_truncate_command, "" },
  587. { "discard", discard_command, "f?" },
  588. { "no-discard", no_discard_command, "" },
  589. { "label", label_command, "s" },
  590. { "abbreviate-label-ranges", abbreviate_label_ranges_command, "s?" },
  591. { "no-abbreviate-label-ranges", no_abbreviate_label_ranges_command, "" },
  592. { "label-in-reference", label_in_reference_command, "" },
  593. { "no-label-in-reference", no_label_in_reference_command, "" },
  594. { "label-in-text", label_in_text_command, "" },
  595. { "no-label-in-text", no_label_in_text_command, "" },
  596. { "sort-adjacent-labels", sort_adjacent_labels_command, "" },
  597. { "no-sort-adjacent-labels", no_sort_adjacent_labels_command, "" },
  598. { "date-as-label", date_as_label_command, "s?" },
  599. { "no-date-as-label", no_date_as_label_command, "" },
  600. { "short-label", short_label_command, "s" },
  601. { "no-short-label", no_short_label_command, "" },
  602. { "compatible", compatible_command, "" },
  603. { "no-compatible", no_compatible_command, "" },
  604. { "join-authors", join_authors_command, "sss?" },
  605. { "bracket-label", bracket_label_command, "sss" },
  606. { "separate-label-second-parts", separate_label_second_parts_command, "s" },
  607. { "et-al", et_al_command, "sii?" },
  608. { "no-et-al", no_et_al_command, "" },
  609. };
  610. static int check_args(const char *types, const char *name,
  611. int argc, argument *argv)
  612. {
  613. int argno = 0;
  614. while (*types) {
  615. if (argc == 0) {
  616. if (types[1] == '?')
  617. break;
  618. else if (types[1] == '*') {
  619. assert(types[2] == '\0');
  620. break;
  621. }
  622. else {
  623. input_stack::error("missing argument for command `%1'", name);
  624. return 0;
  625. }
  626. }
  627. switch (*types) {
  628. case 's':
  629. break;
  630. case 'i':
  631. {
  632. char *ptr;
  633. long n = strtol(argv->s, &ptr, 10);
  634. if ((n == 0 && ptr == argv->s)
  635. || *ptr != '\0') {
  636. input_stack::error("argument %1 for command `%2' must be an integer",
  637. argno + 1, name);
  638. return 0;
  639. }
  640. argv->n = (int)n;
  641. break;
  642. }
  643. case 'f':
  644. {
  645. for (const char *ptr = argv->s; *ptr != '\0'; ptr++)
  646. if (!cs_field_name(*ptr)) {
  647. input_stack::error("argument %1 for command `%2' must be a list of fields",
  648. argno + 1, name);
  649. return 0;
  650. }
  651. break;
  652. }
  653. case 'F':
  654. if (argv->s[0] == '\0' || argv->s[1] != '\0'
  655. || !cs_field_name(argv->s[0])) {
  656. input_stack::error("argument %1 for command `%2' must be a field name",
  657. argno + 1, name);
  658. return 0;
  659. }
  660. break;
  661. default:
  662. assert(0);
  663. }
  664. if (types[1] == '?')
  665. types += 2;
  666. else if (types[1] != '*')
  667. types += 1;
  668. --argc;
  669. ++argv;
  670. ++argno;
  671. }
  672. if (argc > 0) {
  673. input_stack::error("too many arguments for command `%1'", name);
  674. return 0;
  675. }
  676. return 1;
  677. }
  678. static void execute_command(const char *name, int argc, argument *argv)
  679. {
  680. for (unsigned int i = 0;
  681. i < sizeof(command_table)/sizeof(command_table[0]); i++)
  682. if (strcmp(name, command_table[i].name) == 0) {
  683. if (check_args(command_table[i].arg_types, name, argc, argv))
  684. (*command_table[i].func)(argc, argv);
  685. return;
  686. }
  687. input_stack::error("unknown command `%1'", name);
  688. }
  689. static void command_loop()
  690. {
  691. string command;
  692. for (;;) {
  693. command.clear();
  694. int res = get_word(command);
  695. if (res != 1) {
  696. if (res == 0)
  697. continue;
  698. break;
  699. }
  700. int argc = 0;
  701. command += '\0';
  702. while ((res = get_word(command)) == 1) {
  703. argc++;
  704. command += '\0';
  705. }
  706. argument *argv = new argument[argc];
  707. const char *ptr = command.contents();
  708. for (int i = 0; i < argc; i++)
  709. argv[i].s = ptr = strchr(ptr, '\0') + 1;
  710. execute_command(command.contents(), argc, argv);
  711. a_delete argv;
  712. if (res == -1)
  713. break;
  714. }
  715. }
  716. void process_commands(const char *file)
  717. {
  718. input_stack::init();
  719. input_stack::push_file(file);
  720. command_loop();
  721. }
  722. void process_commands(string &s, const char *file, int lineno)
  723. {
  724. input_stack::init();
  725. input_stack::push_string(s, file, lineno);
  726. command_loop();
  727. }