/contrib/groff/src/preproc/tbl/main.cpp

https://bitbucket.org/freebsd/freebsd-head/ · C++ · 1590 lines · 1517 code · 36 blank · 37 comment · 539 complexity · 942a81191c6abe366596efe8cb3455a5 MD5 · raw file

  1. // -*- C++ -*-
  2. /* Copyright (C) 1989, 1990, 1991, 1992, 2000, 2001, 2002, 2003, 2004, 2005
  3. Free Software Foundation, Inc.
  4. Written by James Clark (jjc@jclark.com)
  5. This file is part of groff.
  6. groff is free software; you can redistribute it and/or modify it under
  7. the terms of the GNU General Public License as published by the Free
  8. Software Foundation; either version 2, or (at your option) any later
  9. version.
  10. groff is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  13. for more details.
  14. You should have received a copy of the GNU General Public License along
  15. with groff; see the file COPYING. If not, write to the Free Software
  16. Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
  17. #include "table.h"
  18. #define MAX_POINT_SIZE 99
  19. #define MAX_VERTICAL_SPACING 72
  20. extern "C" const char *Version_string;
  21. int compatible_flag = 0;
  22. class table_input {
  23. FILE *fp;
  24. enum { START, MIDDLE,
  25. REREAD_T, REREAD_TE, REREAD_E,
  26. LEADER_1, LEADER_2, LEADER_3, LEADER_4,
  27. END, ERROR } state;
  28. string unget_stack;
  29. public:
  30. table_input(FILE *);
  31. int get();
  32. int ended() { return unget_stack.empty() && state == END; }
  33. void unget(char);
  34. };
  35. table_input::table_input(FILE *p)
  36. : fp(p), state(START)
  37. {
  38. }
  39. void table_input::unget(char c)
  40. {
  41. assert(c != '\0');
  42. unget_stack += c;
  43. if (c == '\n')
  44. current_lineno--;
  45. }
  46. int table_input::get()
  47. {
  48. int len = unget_stack.length();
  49. if (len != 0) {
  50. unsigned char c = unget_stack[len - 1];
  51. unget_stack.set_length(len - 1);
  52. if (c == '\n')
  53. current_lineno++;
  54. return c;
  55. }
  56. int c;
  57. for (;;) {
  58. switch (state) {
  59. case START:
  60. if ((c = getc(fp)) == '.') {
  61. if ((c = getc(fp)) == 'T') {
  62. if ((c = getc(fp)) == 'E') {
  63. if (compatible_flag) {
  64. state = END;
  65. return EOF;
  66. }
  67. else {
  68. c = getc(fp);
  69. if (c != EOF)
  70. ungetc(c, fp);
  71. if (c == EOF || c == ' ' || c == '\n') {
  72. state = END;
  73. return EOF;
  74. }
  75. state = REREAD_TE;
  76. return '.';
  77. }
  78. }
  79. else {
  80. if (c != EOF)
  81. ungetc(c, fp);
  82. state = REREAD_T;
  83. return '.';
  84. }
  85. }
  86. else {
  87. if (c != EOF)
  88. ungetc(c, fp);
  89. state = MIDDLE;
  90. return '.';
  91. }
  92. }
  93. else if (c == EOF) {
  94. state = ERROR;
  95. return EOF;
  96. }
  97. else {
  98. if (c == '\n')
  99. current_lineno++;
  100. else {
  101. state = MIDDLE;
  102. if (c == '\0') {
  103. error("invalid input character code 0");
  104. break;
  105. }
  106. }
  107. return c;
  108. }
  109. break;
  110. case MIDDLE:
  111. // handle line continuation and uninterpreted leader character
  112. if ((c = getc(fp)) == '\\') {
  113. c = getc(fp);
  114. if (c == '\n')
  115. c = getc(fp); // perhaps state ought to be START now
  116. else if (c == 'a' && compatible_flag) {
  117. state = LEADER_1;
  118. return '\\';
  119. }
  120. else {
  121. if (c != EOF)
  122. ungetc(c, fp);
  123. c = '\\';
  124. }
  125. }
  126. if (c == EOF) {
  127. state = ERROR;
  128. return EOF;
  129. }
  130. else {
  131. if (c == '\n') {
  132. state = START;
  133. current_lineno++;
  134. }
  135. else if (c == '\0') {
  136. error("invalid input character code 0");
  137. break;
  138. }
  139. return c;
  140. }
  141. case REREAD_T:
  142. state = MIDDLE;
  143. return 'T';
  144. case REREAD_TE:
  145. state = REREAD_E;
  146. return 'T';
  147. case REREAD_E:
  148. state = MIDDLE;
  149. return 'E';
  150. case LEADER_1:
  151. state = LEADER_2;
  152. return '*';
  153. case LEADER_2:
  154. state = LEADER_3;
  155. return '(';
  156. case LEADER_3:
  157. state = LEADER_4;
  158. return PREFIX_CHAR;
  159. case LEADER_4:
  160. state = MIDDLE;
  161. return LEADER_CHAR;
  162. case END:
  163. case ERROR:
  164. return EOF;
  165. }
  166. }
  167. }
  168. void process_input_file(FILE *);
  169. void process_table(table_input &in);
  170. void process_input_file(FILE *fp)
  171. {
  172. enum { START, MIDDLE, HAD_DOT, HAD_T, HAD_TS, HAD_l, HAD_lf } state;
  173. state = START;
  174. int c;
  175. while ((c = getc(fp)) != EOF)
  176. switch (state) {
  177. case START:
  178. if (c == '.')
  179. state = HAD_DOT;
  180. else {
  181. if (c == '\n')
  182. current_lineno++;
  183. else
  184. state = MIDDLE;
  185. putchar(c);
  186. }
  187. break;
  188. case MIDDLE:
  189. if (c == '\n') {
  190. current_lineno++;
  191. state = START;
  192. }
  193. putchar(c);
  194. break;
  195. case HAD_DOT:
  196. if (c == 'T')
  197. state = HAD_T;
  198. else if (c == 'l')
  199. state = HAD_l;
  200. else {
  201. putchar('.');
  202. putchar(c);
  203. if (c == '\n') {
  204. current_lineno++;
  205. state = START;
  206. }
  207. else
  208. state = MIDDLE;
  209. }
  210. break;
  211. case HAD_T:
  212. if (c == 'S')
  213. state = HAD_TS;
  214. else {
  215. putchar('.');
  216. putchar('T');
  217. putchar(c);
  218. if (c == '\n') {
  219. current_lineno++;
  220. state = START;
  221. }
  222. else
  223. state = MIDDLE;
  224. }
  225. break;
  226. case HAD_TS:
  227. if (c == ' ' || c == '\n' || compatible_flag) {
  228. putchar('.');
  229. putchar('T');
  230. putchar('S');
  231. while (c != '\n') {
  232. if (c == EOF) {
  233. error("end of file at beginning of table");
  234. return;
  235. }
  236. putchar(c);
  237. c = getc(fp);
  238. }
  239. putchar('\n');
  240. current_lineno++;
  241. {
  242. table_input input(fp);
  243. process_table(input);
  244. set_troff_location(current_filename, current_lineno);
  245. if (input.ended()) {
  246. fputs(".TE", stdout);
  247. while ((c = getc(fp)) != '\n') {
  248. if (c == EOF) {
  249. putchar('\n');
  250. return;
  251. }
  252. putchar(c);
  253. }
  254. putchar('\n');
  255. current_lineno++;
  256. }
  257. }
  258. state = START;
  259. }
  260. else {
  261. fputs(".TS", stdout);
  262. putchar(c);
  263. state = MIDDLE;
  264. }
  265. break;
  266. case HAD_l:
  267. if (c == 'f')
  268. state = HAD_lf;
  269. else {
  270. putchar('.');
  271. putchar('l');
  272. putchar(c);
  273. if (c == '\n') {
  274. current_lineno++;
  275. state = START;
  276. }
  277. else
  278. state = MIDDLE;
  279. }
  280. break;
  281. case HAD_lf:
  282. if (c == ' ' || c == '\n' || compatible_flag) {
  283. string line;
  284. while (c != EOF) {
  285. line += c;
  286. if (c == '\n') {
  287. current_lineno++;
  288. break;
  289. }
  290. c = getc(fp);
  291. }
  292. line += '\0';
  293. interpret_lf_args(line.contents());
  294. printf(".lf%s", line.contents());
  295. state = START;
  296. }
  297. else {
  298. fputs(".lf", stdout);
  299. putchar(c);
  300. state = MIDDLE;
  301. }
  302. break;
  303. default:
  304. assert(0);
  305. }
  306. switch(state) {
  307. case START:
  308. break;
  309. case MIDDLE:
  310. putchar('\n');
  311. break;
  312. case HAD_DOT:
  313. fputs(".\n", stdout);
  314. break;
  315. case HAD_l:
  316. fputs(".l\n", stdout);
  317. break;
  318. case HAD_T:
  319. fputs(".T\n", stdout);
  320. break;
  321. case HAD_lf:
  322. fputs(".lf\n", stdout);
  323. break;
  324. case HAD_TS:
  325. fputs(".TS\n", stdout);
  326. break;
  327. }
  328. if (fp != stdin)
  329. fclose(fp);
  330. }
  331. struct options {
  332. unsigned flags;
  333. int linesize;
  334. char delim[2];
  335. char tab_char;
  336. char decimal_point_char;
  337. options();
  338. };
  339. options::options()
  340. : flags(0), linesize(0), tab_char('\t'), decimal_point_char('.')
  341. {
  342. delim[0] = delim[1] = '\0';
  343. }
  344. // Return non-zero if p and q are the same ignoring case.
  345. int strieq(const char *p, const char *q)
  346. {
  347. for (; cmlower(*p) == cmlower(*q); p++, q++)
  348. if (*p == '\0')
  349. return 1;
  350. return 0;
  351. }
  352. // return 0 if we should give up in this table
  353. options *process_options(table_input &in)
  354. {
  355. options *opt = new options;
  356. string line;
  357. int level = 0;
  358. for (;;) {
  359. int c = in.get();
  360. if (c == EOF) {
  361. int i = line.length();
  362. while (--i >= 0)
  363. in.unget(line[i]);
  364. return opt;
  365. }
  366. if (c == '\n') {
  367. in.unget(c);
  368. int i = line.length();
  369. while (--i >= 0)
  370. in.unget(line[i]);
  371. return opt;
  372. }
  373. else if (c == '(')
  374. level++;
  375. else if (c == ')')
  376. level--;
  377. else if (c == ';' && level == 0) {
  378. line += '\0';
  379. break;
  380. }
  381. line += c;
  382. }
  383. if (line.empty())
  384. return opt;
  385. char *p = &line[0];
  386. for (;;) {
  387. while (!csalpha(*p) && *p != '\0')
  388. p++;
  389. if (*p == '\0')
  390. break;
  391. char *q = p;
  392. while (csalpha(*q))
  393. q++;
  394. char *arg = 0;
  395. if (*q != '(' && *q != '\0')
  396. *q++ = '\0';
  397. while (csspace(*q))
  398. q++;
  399. if (*q == '(') {
  400. *q++ = '\0';
  401. arg = q;
  402. while (*q != ')' && *q != '\0')
  403. q++;
  404. if (*q == '\0')
  405. error("missing `)'");
  406. else
  407. *q++ = '\0';
  408. }
  409. if (*p == '\0') {
  410. if (arg)
  411. error("argument without option");
  412. }
  413. else if (strieq(p, "tab")) {
  414. if (!arg)
  415. error("`tab' option requires argument in parentheses");
  416. else {
  417. if (arg[0] == '\0' || arg[1] != '\0')
  418. error("argument to `tab' option must be a single character");
  419. else
  420. opt->tab_char = arg[0];
  421. }
  422. }
  423. else if (strieq(p, "linesize")) {
  424. if (!arg)
  425. error("`linesize' option requires argument in parentheses");
  426. else {
  427. if (sscanf(arg, "%d", &opt->linesize) != 1)
  428. error("bad linesize `%s'", arg);
  429. else if (opt->linesize <= 0) {
  430. error("linesize must be positive");
  431. opt->linesize = 0;
  432. }
  433. }
  434. }
  435. else if (strieq(p, "delim")) {
  436. if (!arg)
  437. error("`delim' option requires argument in parentheses");
  438. else if (arg[0] == '\0' || arg[1] == '\0' || arg[2] != '\0')
  439. error("argument to `delim' option must be two characters");
  440. else {
  441. opt->delim[0] = arg[0];
  442. opt->delim[1] = arg[1];
  443. }
  444. }
  445. else if (strieq(p, "center") || strieq(p, "centre")) {
  446. if (arg)
  447. error("`center' option does not take an argument");
  448. opt->flags |= table::CENTER;
  449. }
  450. else if (strieq(p, "expand")) {
  451. if (arg)
  452. error("`expand' option does not take an argument");
  453. opt->flags |= table::EXPAND;
  454. }
  455. else if (strieq(p, "box") || strieq(p, "frame")) {
  456. if (arg)
  457. error("`box' option does not take an argument");
  458. opt->flags |= table::BOX;
  459. }
  460. else if (strieq(p, "doublebox") || strieq(p, "doubleframe")) {
  461. if (arg)
  462. error("`doublebox' option does not take an argument");
  463. opt->flags |= table::DOUBLEBOX;
  464. }
  465. else if (strieq(p, "allbox")) {
  466. if (arg)
  467. error("`allbox' option does not take an argument");
  468. opt->flags |= table::ALLBOX;
  469. }
  470. else if (strieq(p, "nokeep")) {
  471. if (arg)
  472. error("`nokeep' option does not take an argument");
  473. opt->flags |= table::NOKEEP;
  474. }
  475. else if (strieq(p, "nospaces")) {
  476. if (arg)
  477. error("`nospaces' option does not take an argument");
  478. opt->flags |= table::NOSPACES;
  479. }
  480. else if (strieq(p, "decimalpoint")) {
  481. if (!arg)
  482. error("`decimalpoint' option requires argument in parentheses");
  483. else {
  484. if (arg[0] == '\0' || arg[1] != '\0')
  485. error("argument to `decimalpoint' option must be a single character");
  486. else
  487. opt->decimal_point_char = arg[0];
  488. }
  489. }
  490. else {
  491. error("unrecognised global option `%1'", p);
  492. // delete opt;
  493. // return 0;
  494. }
  495. p = q;
  496. }
  497. return opt;
  498. }
  499. entry_modifier::entry_modifier()
  500. : vertical_alignment(CENTER), zero_width(0), stagger(0)
  501. {
  502. vertical_spacing.inc = vertical_spacing.val = 0;
  503. point_size.inc = point_size.val = 0;
  504. }
  505. entry_modifier::~entry_modifier()
  506. {
  507. }
  508. entry_format::entry_format() : type(FORMAT_LEFT)
  509. {
  510. }
  511. entry_format::entry_format(format_type t) : type(t)
  512. {
  513. }
  514. void entry_format::debug_print() const
  515. {
  516. switch (type) {
  517. case FORMAT_LEFT:
  518. putc('l', stderr);
  519. break;
  520. case FORMAT_CENTER:
  521. putc('c', stderr);
  522. break;
  523. case FORMAT_RIGHT:
  524. putc('r', stderr);
  525. break;
  526. case FORMAT_NUMERIC:
  527. putc('n', stderr);
  528. break;
  529. case FORMAT_ALPHABETIC:
  530. putc('a', stderr);
  531. break;
  532. case FORMAT_SPAN:
  533. putc('s', stderr);
  534. break;
  535. case FORMAT_VSPAN:
  536. putc('^', stderr);
  537. break;
  538. case FORMAT_HLINE:
  539. putc('_', stderr);
  540. break;
  541. case FORMAT_DOUBLE_HLINE:
  542. putc('=', stderr);
  543. break;
  544. default:
  545. assert(0);
  546. break;
  547. }
  548. if (point_size.val != 0) {
  549. putc('p', stderr);
  550. if (point_size.inc > 0)
  551. putc('+', stderr);
  552. else if (point_size.inc < 0)
  553. putc('-', stderr);
  554. fprintf(stderr, "%d ", point_size.val);
  555. }
  556. if (vertical_spacing.val != 0) {
  557. putc('v', stderr);
  558. if (vertical_spacing.inc > 0)
  559. putc('+', stderr);
  560. else if (vertical_spacing.inc < 0)
  561. putc('-', stderr);
  562. fprintf(stderr, "%d ", vertical_spacing.val);
  563. }
  564. if (!font.empty()) {
  565. putc('f', stderr);
  566. put_string(font, stderr);
  567. putc(' ', stderr);
  568. }
  569. if (!macro.empty()) {
  570. putc('m', stderr);
  571. put_string(macro, stderr);
  572. putc(' ', stderr);
  573. }
  574. switch (vertical_alignment) {
  575. case entry_modifier::CENTER:
  576. break;
  577. case entry_modifier::TOP:
  578. putc('t', stderr);
  579. break;
  580. case entry_modifier::BOTTOM:
  581. putc('d', stderr);
  582. break;
  583. }
  584. if (zero_width)
  585. putc('z', stderr);
  586. if (stagger)
  587. putc('u', stderr);
  588. }
  589. struct format {
  590. int nrows;
  591. int ncolumns;
  592. int *separation;
  593. string *width;
  594. char *equal;
  595. entry_format **entry;
  596. char **vline;
  597. format(int nr, int nc);
  598. ~format();
  599. void add_rows(int n);
  600. };
  601. format::format(int nr, int nc) : nrows(nr), ncolumns(nc)
  602. {
  603. int i;
  604. separation = ncolumns > 1 ? new int[ncolumns - 1] : 0;
  605. for (i = 0; i < ncolumns-1; i++)
  606. separation[i] = -1;
  607. width = new string[ncolumns];
  608. equal = new char[ncolumns];
  609. for (i = 0; i < ncolumns; i++)
  610. equal[i] = 0;
  611. entry = new entry_format *[nrows];
  612. for (i = 0; i < nrows; i++)
  613. entry[i] = new entry_format[ncolumns];
  614. vline = new char*[nrows];
  615. for (i = 0; i < nrows; i++) {
  616. vline[i] = new char[ncolumns+1];
  617. for (int j = 0; j < ncolumns+1; j++)
  618. vline[i][j] = 0;
  619. }
  620. }
  621. void format::add_rows(int n)
  622. {
  623. int i;
  624. char **old_vline = vline;
  625. vline = new char*[nrows + n];
  626. for (i = 0; i < nrows; i++)
  627. vline[i] = old_vline[i];
  628. a_delete old_vline;
  629. for (i = 0; i < n; i++) {
  630. vline[nrows + i] = new char[ncolumns + 1];
  631. for (int j = 0; j < ncolumns + 1; j++)
  632. vline[nrows + i][j] = 0;
  633. }
  634. entry_format **old_entry = entry;
  635. entry = new entry_format *[nrows + n];
  636. for (i = 0; i < nrows; i++)
  637. entry[i] = old_entry[i];
  638. a_delete old_entry;
  639. for (i = 0; i < n; i++)
  640. entry[nrows + i] = new entry_format[ncolumns];
  641. nrows += n;
  642. }
  643. format::~format()
  644. {
  645. a_delete separation;
  646. ad_delete(ncolumns) width;
  647. a_delete equal;
  648. for (int i = 0; i < nrows; i++) {
  649. a_delete vline[i];
  650. ad_delete(ncolumns) entry[i];
  651. }
  652. a_delete vline;
  653. a_delete entry;
  654. }
  655. struct input_entry_format : public entry_format {
  656. input_entry_format *next;
  657. string width;
  658. int separation;
  659. int vline;
  660. int pre_vline;
  661. int last_column;
  662. int equal;
  663. input_entry_format(format_type, input_entry_format * = 0);
  664. ~input_entry_format();
  665. void debug_print();
  666. };
  667. input_entry_format::input_entry_format(format_type t, input_entry_format *p)
  668. : entry_format(t), next(p)
  669. {
  670. separation = -1;
  671. last_column = 0;
  672. vline = 0;
  673. pre_vline = 0;
  674. equal = 0;
  675. }
  676. input_entry_format::~input_entry_format()
  677. {
  678. }
  679. void free_input_entry_format_list(input_entry_format *list)
  680. {
  681. while (list) {
  682. input_entry_format *tem = list;
  683. list = list->next;
  684. delete tem;
  685. }
  686. }
  687. void input_entry_format::debug_print()
  688. {
  689. int i;
  690. for (i = 0; i < pre_vline; i++)
  691. putc('|', stderr);
  692. entry_format::debug_print();
  693. if (!width.empty()) {
  694. putc('w', stderr);
  695. putc('(', stderr);
  696. put_string(width, stderr);
  697. putc(')', stderr);
  698. }
  699. if (equal)
  700. putc('e', stderr);
  701. if (separation >= 0)
  702. fprintf(stderr, "%d", separation);
  703. for (i = 0; i < vline; i++)
  704. putc('|', stderr);
  705. if (last_column)
  706. putc(',', stderr);
  707. }
  708. // Return zero if we should give up on this table.
  709. // If this is a continuation format line, current_format will be the current
  710. // format line.
  711. format *process_format(table_input &in, options *opt,
  712. format *current_format = 0)
  713. {
  714. input_entry_format *list = 0;
  715. int c = in.get();
  716. for (;;) {
  717. int pre_vline = 0;
  718. int got_format = 0;
  719. int got_period = 0;
  720. format_type t = FORMAT_LEFT;
  721. for (;;) {
  722. if (c == EOF) {
  723. error("end of input while processing format");
  724. free_input_entry_format_list(list);
  725. return 0;
  726. }
  727. switch (c) {
  728. case 'n':
  729. case 'N':
  730. t = FORMAT_NUMERIC;
  731. got_format = 1;
  732. break;
  733. case 'a':
  734. case 'A':
  735. got_format = 1;
  736. t = FORMAT_ALPHABETIC;
  737. break;
  738. case 'c':
  739. case 'C':
  740. got_format = 1;
  741. t = FORMAT_CENTER;
  742. break;
  743. case 'l':
  744. case 'L':
  745. got_format = 1;
  746. t = FORMAT_LEFT;
  747. break;
  748. case 'r':
  749. case 'R':
  750. got_format = 1;
  751. t = FORMAT_RIGHT;
  752. break;
  753. case 's':
  754. case 'S':
  755. got_format = 1;
  756. t = FORMAT_SPAN;
  757. break;
  758. case '^':
  759. got_format = 1;
  760. t = FORMAT_VSPAN;
  761. break;
  762. case '_':
  763. case '-': // tbl also accepts this
  764. got_format = 1;
  765. t = FORMAT_HLINE;
  766. break;
  767. case '=':
  768. got_format = 1;
  769. t = FORMAT_DOUBLE_HLINE;
  770. break;
  771. case '.':
  772. got_period = 1;
  773. break;
  774. case '|':
  775. pre_vline++;
  776. break;
  777. case ' ':
  778. case '\t':
  779. case '\n':
  780. break;
  781. default:
  782. if (c == opt->tab_char)
  783. break;
  784. error("unrecognised format `%1'", char(c));
  785. free_input_entry_format_list(list);
  786. return 0;
  787. }
  788. if (got_period)
  789. break;
  790. c = in.get();
  791. if (got_format)
  792. break;
  793. }
  794. if (got_period)
  795. break;
  796. list = new input_entry_format(t, list);
  797. if (pre_vline)
  798. list->pre_vline = pre_vline;
  799. int success = 1;
  800. do {
  801. switch (c) {
  802. case 't':
  803. case 'T':
  804. c = in.get();
  805. list->vertical_alignment = entry_modifier::TOP;
  806. break;
  807. case 'd':
  808. case 'D':
  809. c = in.get();
  810. list->vertical_alignment = entry_modifier::BOTTOM;
  811. break;
  812. case 'u':
  813. case 'U':
  814. c = in.get();
  815. list->stagger = 1;
  816. break;
  817. case 'z':
  818. case 'Z':
  819. c = in.get();
  820. list->zero_width = 1;
  821. break;
  822. case '0':
  823. case '1':
  824. case '2':
  825. case '3':
  826. case '4':
  827. case '5':
  828. case '6':
  829. case '7':
  830. case '8':
  831. case '9':
  832. {
  833. int w = 0;
  834. do {
  835. w = w*10 + (c - '0');
  836. c = in.get();
  837. } while (c != EOF && csdigit(c));
  838. list->separation = w;
  839. }
  840. break;
  841. case 'f':
  842. case 'F':
  843. do {
  844. c = in.get();
  845. } while (c == ' ' || c == '\t');
  846. if (c == EOF) {
  847. error("missing font name");
  848. break;
  849. }
  850. if (c == '(') {
  851. for (;;) {
  852. c = in.get();
  853. if (c == EOF || c == ' ' || c == '\t') {
  854. error("missing `)'");
  855. break;
  856. }
  857. if (c == ')') {
  858. c = in.get();
  859. break;
  860. }
  861. list->font += char(c);
  862. }
  863. }
  864. else {
  865. list->font = c;
  866. char cc = c;
  867. c = in.get();
  868. if (!csdigit(cc)
  869. && c != EOF && c != ' ' && c != '\t' && c != '.' && c != '\n') {
  870. list->font += char(c);
  871. c = in.get();
  872. }
  873. }
  874. break;
  875. case 'x':
  876. case 'X':
  877. do {
  878. c = in.get();
  879. } while (c == ' ' || c == '\t');
  880. if (c == EOF) {
  881. error("missing macro name");
  882. break;
  883. }
  884. if (c == '(') {
  885. for (;;) {
  886. c = in.get();
  887. if (c == EOF || c == ' ' || c == '\t') {
  888. error("missing `)'");
  889. break;
  890. }
  891. if (c == ')') {
  892. c = in.get();
  893. break;
  894. }
  895. list->macro += char(c);
  896. }
  897. }
  898. else {
  899. list->macro = c;
  900. char cc = c;
  901. c = in.get();
  902. if (!csdigit(cc)
  903. && c != EOF && c != ' ' && c != '\t' && c != '.' && c != '\n') {
  904. list->macro += char(c);
  905. c = in.get();
  906. }
  907. }
  908. break;
  909. case 'v':
  910. case 'V':
  911. c = in.get();
  912. list->vertical_spacing.val = 0;
  913. list->vertical_spacing.inc = 0;
  914. if (c == '+' || c == '-') {
  915. list->vertical_spacing.inc = (c == '+' ? 1 : -1);
  916. c = in.get();
  917. }
  918. if (c == EOF || !csdigit(c)) {
  919. error("`v' modifier must be followed by number");
  920. list->vertical_spacing.inc = 0;
  921. }
  922. else {
  923. do {
  924. list->vertical_spacing.val *= 10;
  925. list->vertical_spacing.val += c - '0';
  926. c = in.get();
  927. } while (c != EOF && csdigit(c));
  928. }
  929. if (list->vertical_spacing.val > MAX_VERTICAL_SPACING
  930. || list->vertical_spacing.val < -MAX_VERTICAL_SPACING) {
  931. error("unreasonable vertical spacing");
  932. list->vertical_spacing.val = 0;
  933. list->vertical_spacing.inc = 0;
  934. }
  935. break;
  936. case 'p':
  937. case 'P':
  938. c = in.get();
  939. list->point_size.val = 0;
  940. list->point_size.inc = 0;
  941. if (c == '+' || c == '-') {
  942. list->point_size.inc = (c == '+' ? 1 : -1);
  943. c = in.get();
  944. }
  945. if (c == EOF || !csdigit(c)) {
  946. error("`p' modifier must be followed by number");
  947. list->point_size.inc = 0;
  948. }
  949. else {
  950. do {
  951. list->point_size.val *= 10;
  952. list->point_size.val += c - '0';
  953. c = in.get();
  954. } while (c != EOF && csdigit(c));
  955. }
  956. if (list->point_size.val > MAX_POINT_SIZE
  957. || list->point_size.val < -MAX_POINT_SIZE) {
  958. error("unreasonable point size");
  959. list->point_size.val = 0;
  960. list->point_size.inc = 0;
  961. }
  962. break;
  963. case 'w':
  964. case 'W':
  965. c = in.get();
  966. while (c == ' ' || c == '\t')
  967. c = in.get();
  968. if (c == '(') {
  969. list->width = "";
  970. c = in.get();
  971. while (c != ')') {
  972. if (c == EOF || c == '\n') {
  973. error("missing `)'");
  974. free_input_entry_format_list(list);
  975. return 0;
  976. }
  977. list->width += c;
  978. c = in.get();
  979. }
  980. c = in.get();
  981. }
  982. else {
  983. if (c == '+' || c == '-') {
  984. list->width = char(c);
  985. c = in.get();
  986. }
  987. else
  988. list->width = "";
  989. if (c == EOF || !csdigit(c))
  990. error("bad argument for `w' modifier");
  991. else {
  992. do {
  993. list->width += char(c);
  994. c = in.get();
  995. } while (c != EOF && csdigit(c));
  996. }
  997. }
  998. break;
  999. case 'e':
  1000. case 'E':
  1001. c = in.get();
  1002. list->equal++;
  1003. break;
  1004. case '|':
  1005. c = in.get();
  1006. list->vline++;
  1007. break;
  1008. case 'B':
  1009. case 'b':
  1010. c = in.get();
  1011. list->font = "B";
  1012. break;
  1013. case 'I':
  1014. case 'i':
  1015. c = in.get();
  1016. list->font = "I";
  1017. break;
  1018. case ' ':
  1019. case '\t':
  1020. c = in.get();
  1021. break;
  1022. default:
  1023. if (c == opt->tab_char)
  1024. c = in.get();
  1025. else
  1026. success = 0;
  1027. break;
  1028. }
  1029. } while (success);
  1030. if (list->vline > 2) {
  1031. list->vline = 2;
  1032. error("more than 2 vertical bars between key letters");
  1033. }
  1034. if (c == '\n' || c == ',') {
  1035. c = in.get();
  1036. list->last_column = 1;
  1037. }
  1038. }
  1039. if (c == '.') {
  1040. do {
  1041. c = in.get();
  1042. } while (c == ' ' || c == '\t');
  1043. if (c != '\n') {
  1044. error("`.' not last character on line");
  1045. free_input_entry_format_list(list);
  1046. return 0;
  1047. }
  1048. }
  1049. if (!list) {
  1050. error("no format");
  1051. free_input_entry_format_list(list);
  1052. return 0;
  1053. }
  1054. list->last_column = 1;
  1055. // now reverse the list so that the first row is at the beginning
  1056. input_entry_format *rev = 0;
  1057. while (list != 0) {
  1058. input_entry_format *tem = list->next;
  1059. list->next = rev;
  1060. rev = list;
  1061. list = tem;
  1062. }
  1063. list = rev;
  1064. input_entry_format *tem;
  1065. #if 0
  1066. for (tem = list; tem; tem = tem->next)
  1067. tem->debug_print();
  1068. putc('\n', stderr);
  1069. #endif
  1070. // compute number of columns and rows
  1071. int ncolumns = 0;
  1072. int nrows = 0;
  1073. int col = 0;
  1074. for (tem = list; tem; tem = tem->next) {
  1075. if (tem->last_column) {
  1076. if (col >= ncolumns)
  1077. ncolumns = col + 1;
  1078. col = 0;
  1079. nrows++;
  1080. }
  1081. else
  1082. col++;
  1083. }
  1084. int row;
  1085. format *f;
  1086. if (current_format) {
  1087. if (ncolumns > current_format->ncolumns) {
  1088. error("cannot increase the number of columns in a continued format");
  1089. free_input_entry_format_list(list);
  1090. return 0;
  1091. }
  1092. f = current_format;
  1093. row = f->nrows;
  1094. f->add_rows(nrows);
  1095. }
  1096. else {
  1097. f = new format(nrows, ncolumns);
  1098. row = 0;
  1099. }
  1100. col = 0;
  1101. for (tem = list; tem; tem = tem->next) {
  1102. f->entry[row][col] = *tem;
  1103. if (col < ncolumns-1) {
  1104. // use the greatest separation
  1105. if (tem->separation > f->separation[col]) {
  1106. if (current_format)
  1107. error("cannot change column separation in continued format");
  1108. else
  1109. f->separation[col] = tem->separation;
  1110. }
  1111. }
  1112. else if (tem->separation >= 0)
  1113. error("column separation specified for last column");
  1114. if (tem->equal && !f->equal[col]) {
  1115. if (current_format)
  1116. error("cannot change which columns are equal in continued format");
  1117. else
  1118. f->equal[col] = 1;
  1119. }
  1120. if (!tem->width.empty()) {
  1121. // use the last width
  1122. if (!f->width[col].empty() && f->width[col] != tem->width)
  1123. error("multiple widths for column %1", col+1);
  1124. f->width[col] = tem->width;
  1125. }
  1126. if (tem->pre_vline) {
  1127. assert(col == 0);
  1128. f->vline[row][col] = tem->pre_vline;
  1129. }
  1130. f->vline[row][col+1] = tem->vline;
  1131. if (tem->last_column) {
  1132. row++;
  1133. col = 0;
  1134. }
  1135. else
  1136. col++;
  1137. }
  1138. free_input_entry_format_list(list);
  1139. for (col = 0; col < ncolumns; col++) {
  1140. entry_format *e = f->entry[f->nrows-1] + col;
  1141. if (e->type != FORMAT_HLINE
  1142. && e->type != FORMAT_DOUBLE_HLINE
  1143. && e->type != FORMAT_SPAN)
  1144. break;
  1145. }
  1146. if (col >= ncolumns) {
  1147. error("last row of format is all lines");
  1148. delete f;
  1149. return 0;
  1150. }
  1151. return f;
  1152. }
  1153. table *process_data(table_input &in, format *f, options *opt)
  1154. {
  1155. char tab_char = opt->tab_char;
  1156. int ncolumns = f->ncolumns;
  1157. int current_row = 0;
  1158. int format_index = 0;
  1159. int give_up = 0;
  1160. enum { DATA_INPUT_LINE, TROFF_INPUT_LINE, SINGLE_HLINE, DOUBLE_HLINE } type;
  1161. table *tbl = new table(ncolumns, opt->flags, opt->linesize,
  1162. opt->decimal_point_char);
  1163. if (opt->delim[0] != '\0')
  1164. tbl->set_delim(opt->delim[0], opt->delim[1]);
  1165. for (;;) {
  1166. // first determine what type of line this is
  1167. int c = in.get();
  1168. if (c == EOF)
  1169. break;
  1170. if (c == '.') {
  1171. int d = in.get();
  1172. if (d != EOF && csdigit(d)) {
  1173. in.unget(d);
  1174. type = DATA_INPUT_LINE;
  1175. }
  1176. else {
  1177. in.unget(d);
  1178. type = TROFF_INPUT_LINE;
  1179. }
  1180. }
  1181. else if (c == '_' || c == '=') {
  1182. int d = in.get();
  1183. if (d == '\n') {
  1184. if (c == '_')
  1185. type = SINGLE_HLINE;
  1186. else
  1187. type = DOUBLE_HLINE;
  1188. }
  1189. else {
  1190. in.unget(d);
  1191. type = DATA_INPUT_LINE;
  1192. }
  1193. }
  1194. else {
  1195. type = DATA_INPUT_LINE;
  1196. }
  1197. switch (type) {
  1198. case DATA_INPUT_LINE:
  1199. {
  1200. string input_entry;
  1201. if (format_index >= f->nrows)
  1202. format_index = f->nrows - 1;
  1203. // A format row that is all lines doesn't use up a data line.
  1204. while (format_index < f->nrows - 1) {
  1205. int cnt;
  1206. for (cnt = 0; cnt < ncolumns; cnt++) {
  1207. entry_format *e = f->entry[format_index] + cnt;
  1208. if (e->type != FORMAT_HLINE
  1209. && e->type != FORMAT_DOUBLE_HLINE
  1210. // Unfortunately tbl treats a span as needing data.
  1211. // && e->type != FORMAT_SPAN
  1212. )
  1213. break;
  1214. }
  1215. if (cnt < ncolumns)
  1216. break;
  1217. for (cnt = 0; cnt < ncolumns; cnt++)
  1218. tbl->add_entry(current_row, cnt, input_entry,
  1219. f->entry[format_index] + cnt, current_filename,
  1220. current_lineno);
  1221. tbl->add_vlines(current_row, f->vline[format_index]);
  1222. format_index++;
  1223. current_row++;
  1224. }
  1225. entry_format *line_format = f->entry[format_index];
  1226. int col = 0;
  1227. int row_comment = 0;
  1228. for (;;) {
  1229. if (c == tab_char || c == '\n') {
  1230. int ln = current_lineno;
  1231. if (c == '\n')
  1232. --ln;
  1233. if ((opt->flags & table::NOSPACES))
  1234. input_entry.remove_spaces();
  1235. while (col < ncolumns
  1236. && line_format[col].type == FORMAT_SPAN) {
  1237. tbl->add_entry(current_row, col, "", &line_format[col],
  1238. current_filename, ln);
  1239. col++;
  1240. }
  1241. if (c == '\n' && input_entry.length() == 2
  1242. && input_entry[0] == 'T' && input_entry[1] == '{') {
  1243. input_entry = "";
  1244. ln++;
  1245. enum {
  1246. START, MIDDLE, GOT_T, GOT_RIGHT_BRACE, GOT_DOT,
  1247. GOT_l, GOT_lf, END
  1248. } state = START;
  1249. while (state != END) {
  1250. c = in.get();
  1251. if (c == EOF)
  1252. break;
  1253. switch (state) {
  1254. case START:
  1255. if (c == 'T')
  1256. state = GOT_T;
  1257. else if (c == '.')
  1258. state = GOT_DOT;
  1259. else {
  1260. input_entry += c;
  1261. if (c != '\n')
  1262. state = MIDDLE;
  1263. }
  1264. break;
  1265. case GOT_T:
  1266. if (c == '}')
  1267. state = GOT_RIGHT_BRACE;
  1268. else {
  1269. input_entry += 'T';
  1270. input_entry += c;
  1271. state = c == '\n' ? START : MIDDLE;
  1272. }
  1273. break;
  1274. case GOT_DOT:
  1275. if (c == 'l')
  1276. state = GOT_l;
  1277. else {
  1278. input_entry += '.';
  1279. input_entry += c;
  1280. state = c == '\n' ? START : MIDDLE;
  1281. }
  1282. break;
  1283. case GOT_l:
  1284. if (c == 'f')
  1285. state = GOT_lf;
  1286. else {
  1287. input_entry += ".l";
  1288. input_entry += c;
  1289. state = c == '\n' ? START : MIDDLE;
  1290. }
  1291. break;
  1292. case GOT_lf:
  1293. if (c == ' ' || c == '\n' || compatible_flag) {
  1294. string args;
  1295. input_entry += ".lf";
  1296. while (c != EOF) {
  1297. args += c;
  1298. if (c == '\n')
  1299. break;
  1300. c = in.get();
  1301. }
  1302. args += '\0';
  1303. interpret_lf_args(args.contents());
  1304. // remove the '\0'
  1305. args.set_length(args.length() - 1);
  1306. input_entry += args;
  1307. state = START;
  1308. }
  1309. else {
  1310. input_entry += ".lf";
  1311. input_entry += c;
  1312. state = MIDDLE;
  1313. }
  1314. break;
  1315. case GOT_RIGHT_BRACE:
  1316. if ((opt->flags & table::NOSPACES)) {
  1317. while (c == ' ')
  1318. c = in.get();
  1319. if (c == EOF)
  1320. break;
  1321. }
  1322. if (c == '\n' || c == tab_char)
  1323. state = END;
  1324. else {
  1325. input_entry += 'T';
  1326. input_entry += '}';
  1327. input_entry += c;
  1328. state = MIDDLE;
  1329. }
  1330. break;
  1331. case MIDDLE:
  1332. if (c == '\n')
  1333. state = START;
  1334. input_entry += c;
  1335. break;
  1336. case END:
  1337. default:
  1338. assert(0);
  1339. }
  1340. }
  1341. if (c == EOF) {
  1342. error("end of data in middle of text block");
  1343. give_up = 1;
  1344. break;
  1345. }
  1346. }
  1347. if (col >= ncolumns) {
  1348. if (!input_entry.empty()) {
  1349. if (input_entry.length() >= 2
  1350. && input_entry[0] == '\\'
  1351. && input_entry[1] == '"')
  1352. row_comment = 1;
  1353. else if (!row_comment) {
  1354. if (c == '\n')
  1355. in.unget(c);
  1356. input_entry += '\0';
  1357. error("excess data entry `%1' discarded",
  1358. input_entry.contents());
  1359. if (c == '\n')
  1360. (void)in.get();
  1361. }
  1362. }
  1363. }
  1364. else
  1365. tbl->add_entry(current_row, col, input_entry,
  1366. &line_format[col], current_filename, ln);
  1367. col++;
  1368. if (c == '\n')
  1369. break;
  1370. input_entry = "";
  1371. }
  1372. else
  1373. input_entry += c;
  1374. c = in.get();
  1375. if (c == EOF)
  1376. break;
  1377. }
  1378. if (give_up)
  1379. break;
  1380. input_entry = "";
  1381. for (; col < ncolumns; col++)
  1382. tbl->add_entry(current_row, col, input_entry, &line_format[col],
  1383. current_filename, current_lineno - 1);
  1384. tbl->add_vlines(current_row, f->vline[format_index]);
  1385. current_row++;
  1386. format_index++;
  1387. }
  1388. break;
  1389. case TROFF_INPUT_LINE:
  1390. {
  1391. string line;
  1392. int ln = current_lineno;
  1393. for (;;) {
  1394. line += c;
  1395. if (c == '\n')
  1396. break;
  1397. c = in.get();
  1398. if (c == EOF) {
  1399. break;
  1400. }
  1401. }
  1402. tbl->add_text_line(current_row, line, current_filename, ln);
  1403. if (line.length() >= 4
  1404. && line[0] == '.' && line[1] == 'T' && line[2] == '&') {
  1405. format *newf = process_format(in, opt, f);
  1406. if (newf == 0)
  1407. give_up = 1;
  1408. else
  1409. f = newf;
  1410. }
  1411. if (line.length() >= 3
  1412. && line[0] == '.' && line[1] == 'l' && line[2] == 'f') {
  1413. line += '\0';
  1414. interpret_lf_args(line.contents() + 3);
  1415. }
  1416. }
  1417. break;
  1418. case SINGLE_HLINE:
  1419. tbl->add_single_hline(current_row);
  1420. break;
  1421. case DOUBLE_HLINE:
  1422. tbl->add_double_hline(current_row);
  1423. break;
  1424. default:
  1425. assert(0);
  1426. }
  1427. if (give_up)
  1428. break;
  1429. }
  1430. if (!give_up && current_row == 0) {
  1431. error("no real data");
  1432. give_up = 1;
  1433. }
  1434. if (give_up) {
  1435. delete tbl;
  1436. return 0;
  1437. }
  1438. // Do this here rather than at the beginning in case continued formats
  1439. // change it.
  1440. int i;
  1441. for (i = 0; i < ncolumns - 1; i++)
  1442. if (f->separation[i] >= 0)
  1443. tbl->set_column_separation(i, f->separation[i]);
  1444. for (i = 0; i < ncolumns; i++)
  1445. if (!f->width[i].empty())
  1446. tbl->set_minimum_width(i, f->width[i]);
  1447. for (i = 0; i < ncolumns; i++)
  1448. if (f->equal[i])
  1449. tbl->set_equal_column(i);
  1450. return tbl;
  1451. }
  1452. void process_table(table_input &in)
  1453. {
  1454. options *opt = 0;
  1455. format *form = 0;
  1456. table *tbl = 0;
  1457. if ((opt = process_options(in)) != 0
  1458. && (form = process_format(in, opt)) != 0
  1459. && (tbl = process_data(in, form, opt)) != 0) {
  1460. tbl->print();
  1461. delete tbl;
  1462. }
  1463. else {
  1464. error("giving up on this table");
  1465. while (in.get() != EOF)
  1466. ;
  1467. }
  1468. delete opt;
  1469. delete form;
  1470. if (!in.ended())
  1471. error("premature end of file");
  1472. }
  1473. static void usage(FILE *stream)
  1474. {
  1475. fprintf(stream, "usage: %s [ -vC ] [ files... ]\n", program_name);
  1476. }
  1477. int main(int argc, char **argv)
  1478. {
  1479. program_name = argv[0];
  1480. static char stderr_buf[BUFSIZ];
  1481. setbuf(stderr, stderr_buf);
  1482. int opt;
  1483. static const struct option long_options[] = {
  1484. { "help", no_argument, 0, CHAR_MAX + 1 },
  1485. { "version", no_argument, 0, 'v' },
  1486. { NULL, 0, 0, 0 }
  1487. };
  1488. while ((opt = getopt_long(argc, argv, "vCT:", long_options, NULL)) != EOF)
  1489. switch (opt) {
  1490. case 'C':
  1491. compatible_flag = 1;
  1492. break;
  1493. case 'v':
  1494. {
  1495. printf("GNU tbl (groff) version %s\n", Version_string);
  1496. exit(0);
  1497. break;
  1498. }
  1499. case 'T':
  1500. // I'm sick of getting bug reports from IRIX users
  1501. break;
  1502. case CHAR_MAX + 1: // --help
  1503. usage(stdout);
  1504. exit(0);
  1505. break;
  1506. case '?':
  1507. usage(stderr);
  1508. exit(1);
  1509. break;
  1510. default:
  1511. assert(0);
  1512. }
  1513. printf(".if !\\n(.g .ab GNU tbl requires GNU troff.\n"
  1514. ".if !dTS .ds TS\n"
  1515. ".if !dTE .ds TE\n");
  1516. if (argc > optind) {
  1517. for (int i = optind; i < argc; i++)
  1518. if (argv[i][0] == '-' && argv[i][1] == '\0') {
  1519. current_filename = "-";
  1520. current_lineno = 1;
  1521. printf(".lf 1 -\n");
  1522. process_input_file(stdin);
  1523. }
  1524. else {
  1525. errno = 0;
  1526. FILE *fp = fopen(argv[i], "r");
  1527. if (fp == 0)
  1528. fatal("can't open `%1': %2", argv[i], strerror(errno));
  1529. else {
  1530. current_lineno = 1;
  1531. current_filename = argv[i];
  1532. printf(".lf 1 %s\n", current_filename);
  1533. process_input_file(fp);
  1534. }
  1535. }
  1536. }
  1537. else {
  1538. current_filename = "-";
  1539. current_lineno = 1;
  1540. printf(".lf 1 -\n");
  1541. process_input_file(stdin);
  1542. }
  1543. if (ferror(stdout) || fflush(stdout) < 0)
  1544. fatal("output error");
  1545. return 0;
  1546. }