PageRenderTime 61ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/commands/sed/compile.c

http://github.com/vivekp/minix-nbsd
C | 859 lines | 649 code | 50 blank | 160 comment | 228 complexity | a84283939ab51a156321b30c9cd28f50 MD5 | raw file
Possible License(s): AGPL-1.0, BSD-3-Clause
  1. /* $NetBSD: compile.c,v 1.35 2007/04/17 20:30:29 christos Exp $ */
  2. /*-
  3. * Copyright (c) 1992, 1993
  4. * The Regents of the University of California. All rights reserved.
  5. *
  6. * This code is derived from software contributed to Berkeley by
  7. * Diomidis Spinellis of Imperial College, University of London.
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions
  11. * are met:
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in the
  16. * documentation and/or other materials provided with the distribution.
  17. * 3. Neither the name of the University nor the names of its contributors
  18. * may be used to endorse or promote products derived from this software
  19. * without specific prior written permission.
  20. *
  21. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31. * SUCH DAMAGE.
  32. */
  33. /*-
  34. * Copyright (c) 1992 Diomidis Spinellis.
  35. *
  36. * This code is derived from software contributed to Berkeley by
  37. * Diomidis Spinellis of Imperial College, University of London.
  38. *
  39. * Redistribution and use in source and binary forms, with or without
  40. * modification, are permitted provided that the following conditions
  41. * are met:
  42. * 1. Redistributions of source code must retain the above copyright
  43. * notice, this list of conditions and the following disclaimer.
  44. * 2. Redistributions in binary form must reproduce the above copyright
  45. * notice, this list of conditions and the following disclaimer in the
  46. * documentation and/or other materials provided with the distribution.
  47. * 3. All advertising materials mentioning features or use of this software
  48. * must display the following acknowledgement:
  49. * This product includes software developed by the University of
  50. * California, Berkeley and its contributors.
  51. * 4. Neither the name of the University nor the names of its contributors
  52. * may be used to endorse or promote products derived from this software
  53. * without specific prior written permission.
  54. *
  55. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  56. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  57. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  58. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  59. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  60. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  61. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  62. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  63. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  64. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  65. * SUCH DAMAGE.
  66. */
  67. #if HAVE_NBTOOL_CONFIG_H
  68. #include "nbtool_config.h"
  69. #endif
  70. #include <sys/cdefs.h>
  71. #include <sys/types.h>
  72. #include <sys/stat.h>
  73. #include <ctype.h>
  74. #include <errno.h>
  75. #include <fcntl.h>
  76. #include <limits.h>
  77. #include <regex.h>
  78. #include <stdio.h>
  79. #include <stdlib.h>
  80. #include <string.h>
  81. #include "defs.h"
  82. #include "extern.h"
  83. #ifndef _POSIX2_LINE_MAX
  84. #define _POSIX2_LINE_MAX (2 * BUFSIZ)
  85. #endif
  86. #define LHSZ 128
  87. #define LHMASK (LHSZ - 1)
  88. static struct labhash {
  89. struct labhash *lh_next;
  90. u_int lh_hash;
  91. struct s_command *lh_cmd;
  92. int lh_ref;
  93. } *labels[LHSZ];
  94. static char *compile_addr(char *, struct s_addr *);
  95. static char *compile_ccl(char **, char *);
  96. static char *compile_delimited(char *, char *);
  97. static char *compile_flags(char *, struct s_subst *);
  98. static char *compile_re(char *, regex_t **);
  99. static char *compile_subst(char *, struct s_subst *);
  100. static char *compile_text(void);
  101. static char *compile_tr(char *, char **);
  102. static struct s_command
  103. **compile_stream(struct s_command **);
  104. static char *duptoeol(char *, char *);
  105. static void enterlabel(struct s_command *);
  106. static struct s_command
  107. *findlabel(char *);
  108. static void fixuplabel(struct s_command *, struct s_command *);
  109. static void uselabel(void);
  110. /*
  111. * Command specification. This is used to drive the command parser.
  112. */
  113. struct s_format {
  114. char code; /* Command code */
  115. int naddr; /* Number of address args */
  116. enum e_args args; /* Argument type */
  117. };
  118. static struct s_format cmd_fmts[] = {
  119. {'{', 2, GROUP},
  120. {'}', 0, ENDGROUP},
  121. {'a', 1, TEXT},
  122. {'b', 2, BRANCH},
  123. {'c', 2, TEXT},
  124. {'d', 2, EMPTY},
  125. {'D', 2, EMPTY},
  126. {'g', 2, EMPTY},
  127. {'G', 2, EMPTY},
  128. {'h', 2, EMPTY},
  129. {'H', 2, EMPTY},
  130. {'i', 1, TEXT},
  131. {'l', 2, EMPTY},
  132. {'n', 2, EMPTY},
  133. {'N', 2, EMPTY},
  134. {'p', 2, EMPTY},
  135. {'P', 2, EMPTY},
  136. {'q', 1, EMPTY},
  137. {'r', 1, RFILE},
  138. {'s', 2, SUBST},
  139. {'t', 2, BRANCH},
  140. {'w', 2, WFILE},
  141. {'x', 2, EMPTY},
  142. {'y', 2, TR},
  143. {'!', 2, NONSEL},
  144. {':', 0, LABEL},
  145. {'#', 0, COMMENT},
  146. {'=', 1, EMPTY},
  147. {'\0', 0, COMMENT},
  148. };
  149. /* The compiled program. */
  150. struct s_command *prog;
  151. /*
  152. * Compile the program into prog.
  153. * Initialise appends.
  154. */
  155. void
  156. compile(void)
  157. {
  158. *compile_stream(&prog) = NULL;
  159. fixuplabel(prog, NULL);
  160. uselabel();
  161. if (appendnum > 0)
  162. appends = xmalloc(sizeof(struct s_appends) * appendnum);
  163. match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
  164. }
  165. #define EATSPACE() \
  166. while (*p && isascii((unsigned char)*p) && \
  167. isspace((unsigned char)*p)) \
  168. p++ \
  169. static struct s_command **
  170. compile_stream(struct s_command **link)
  171. {
  172. char *p;
  173. static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */
  174. struct s_command *cmd, *cmd2, *stack;
  175. struct s_format *fp;
  176. int naddr; /* Number of addresses */
  177. stack = 0;
  178. for (;;) {
  179. if ((p = cu_fgets(lbuf, sizeof(lbuf))) == NULL) {
  180. if (stack != 0)
  181. err(COMPILE, "unexpected EOF (pending }'s)");
  182. return (link);
  183. }
  184. semicolon: EATSPACE();
  185. if (*p == '#' || *p == '\0')
  186. continue;
  187. else if (*p == ';') {
  188. p++;
  189. goto semicolon;
  190. }
  191. *link = cmd = xmalloc(sizeof(struct s_command));
  192. link = &cmd->next;
  193. cmd->nonsel = cmd->inrange = 0;
  194. /* First parse the addresses */
  195. naddr = 0;
  196. /* Valid characters to start an address */
  197. #define addrchar(c) (strchr("0123456789/\\$", (c)))
  198. if (addrchar(*p)) {
  199. naddr++;
  200. cmd->a1 = xmalloc(sizeof(struct s_addr));
  201. p = compile_addr(p, cmd->a1);
  202. EATSPACE(); /* EXTENSION */
  203. if (*p == ',') {
  204. p++;
  205. EATSPACE(); /* EXTENSION */
  206. naddr++;
  207. cmd->a2 = xmalloc(sizeof(struct s_addr));
  208. p = compile_addr(p, cmd->a2);
  209. EATSPACE();
  210. } else
  211. cmd->a2 = 0;
  212. } else
  213. cmd->a1 = cmd->a2 = 0;
  214. nonsel: /* Now parse the command */
  215. if (!*p)
  216. err(COMPILE, "command expected");
  217. cmd->code = *p;
  218. for (fp = cmd_fmts; fp->code; fp++)
  219. if (fp->code == *p)
  220. break;
  221. if (!fp->code)
  222. err(COMPILE, "invalid command code %c", *p);
  223. if (naddr > fp->naddr)
  224. err(COMPILE,
  225. "command %c expects up to %d address(es), found %d", *p, fp->naddr, naddr);
  226. switch (fp->args) {
  227. case NONSEL: /* ! */
  228. p++;
  229. EATSPACE();
  230. cmd->nonsel = ! cmd->nonsel;
  231. goto nonsel;
  232. case GROUP: /* { */
  233. p++;
  234. EATSPACE();
  235. cmd->next = stack;
  236. stack = cmd;
  237. link = &cmd->u.c;
  238. if (*p)
  239. goto semicolon;
  240. break;
  241. case ENDGROUP:
  242. /*
  243. * Short-circuit command processing, since end of
  244. * group is really just a noop.
  245. */
  246. cmd->nonsel = 1;
  247. if (stack == 0)
  248. err(COMPILE, "unexpected }");
  249. cmd2 = stack;
  250. stack = cmd2->next;
  251. cmd2->next = cmd;
  252. /*FALLTHROUGH*/
  253. case EMPTY: /* d D g G h H l n N p P q x = \0 */
  254. p++;
  255. EATSPACE();
  256. if (*p == ';') {
  257. p++;
  258. link = &cmd->next;
  259. goto semicolon;
  260. }
  261. if (*p)
  262. err(COMPILE,
  263. "extra characters at the end of %c command", cmd->code);
  264. break;
  265. case TEXT: /* a c i */
  266. p++;
  267. EATSPACE();
  268. if (*p != '\\')
  269. err(COMPILE,
  270. "command %c expects \\ followed by text", cmd->code);
  271. p++;
  272. EATSPACE();
  273. if (*p)
  274. err(COMPILE,
  275. "extra characters after \\ at the end of %c command", cmd->code);
  276. cmd->t = compile_text();
  277. break;
  278. case COMMENT: /* \0 # */
  279. break;
  280. case WFILE: /* w */
  281. p++;
  282. EATSPACE();
  283. if (*p == '\0')
  284. err(COMPILE, "filename expected");
  285. cmd->t = duptoeol(p, "w command");
  286. if (aflag)
  287. cmd->u.fd = -1;
  288. else if ((cmd->u.fd = open(p,
  289. O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
  290. DEFFILEMODE)) == -1)
  291. err(FATAL, "%s: %s", p, strerror(errno));
  292. break;
  293. case RFILE: /* r */
  294. p++;
  295. EATSPACE();
  296. if (*p == '\0')
  297. err(COMPILE, "filename expected");
  298. else
  299. cmd->t = duptoeol(p, "read command");
  300. break;
  301. case BRANCH: /* b t */
  302. p++;
  303. EATSPACE();
  304. if (*p == '\0')
  305. cmd->t = NULL;
  306. else
  307. cmd->t = duptoeol(p, "branch");
  308. break;
  309. case LABEL: /* : */
  310. p++;
  311. EATSPACE();
  312. cmd->t = duptoeol(p, "label");
  313. if (strlen(p) == 0)
  314. err(COMPILE, "empty label");
  315. enterlabel(cmd);
  316. break;
  317. case SUBST: /* s */
  318. p++;
  319. if (*p == '\0' || *p == '\\')
  320. err(COMPILE,
  321. "substitute pattern can not be delimited by newline or backslash");
  322. cmd->u.s = xmalloc(sizeof(struct s_subst));
  323. p = compile_re(p, &cmd->u.s->re);
  324. if (p == NULL)
  325. err(COMPILE, "unterminated substitute pattern");
  326. --p;
  327. p = compile_subst(p, cmd->u.s);
  328. p = compile_flags(p, cmd->u.s);
  329. EATSPACE();
  330. if (*p == ';') {
  331. p++;
  332. link = &cmd->next;
  333. goto semicolon;
  334. }
  335. break;
  336. case TR: /* y */
  337. p++;
  338. p = compile_tr(p, (char **)(void *)&cmd->u.y);
  339. EATSPACE();
  340. if (*p == ';') {
  341. p++;
  342. link = &cmd->next;
  343. goto semicolon;
  344. }
  345. if (*p)
  346. err(COMPILE,
  347. "extra text at the end of a transform command");
  348. break;
  349. }
  350. }
  351. }
  352. /*
  353. * Get a delimited string. P points to the delimiter of the string; d points
  354. * to a buffer area. Newline and delimiter escapes are processed; other
  355. * escapes are ignored.
  356. *
  357. * Returns a pointer to the first character after the final delimiter or NULL
  358. * in the case of a non-terminated string. The character array d is filled
  359. * with the processed string.
  360. */
  361. static char *
  362. compile_delimited(char *p, char *d)
  363. {
  364. char c;
  365. c = *p++;
  366. if (c == '\0')
  367. return (NULL);
  368. else if (c == '\\')
  369. err(COMPILE, "\\ can not be used as a string delimiter");
  370. else if (c == '\n')
  371. err(COMPILE, "newline can not be used as a string delimiter");
  372. while (*p) {
  373. if (*p == '[') {
  374. if ((d = compile_ccl(&p, d)) == NULL)
  375. err(COMPILE, "unbalanced brackets ([])");
  376. continue;
  377. } else if (*p == '\\' && p[1] == '[') {
  378. *d++ = *p++;
  379. } else if (*p == '\\' && p[1] == c)
  380. p++;
  381. else if (*p == '\\' && p[1] == 'n') {
  382. *d++ = '\n';
  383. p += 2;
  384. continue;
  385. } else if (*p == '\\' && p[1] == 't') {
  386. *d++ = '\t';
  387. p += 2;
  388. continue;
  389. } else if (*p == '\\' && p[1] == '\\')
  390. *d++ = *p++;
  391. else if (*p == c) {
  392. *d = '\0';
  393. return (p + 1);
  394. }
  395. *d++ = *p++;
  396. }
  397. return (NULL);
  398. }
  399. /* compile_ccl: expand a POSIX character class */
  400. static char *
  401. compile_ccl(char **sp, char *t)
  402. {
  403. int c, d;
  404. char *s = *sp;
  405. *t++ = *s++;
  406. if (*s == '^')
  407. *t++ = *s++;
  408. if (*s == ']')
  409. *t++ = *s++;
  410. for (; *s && (*t = *s) != ']'; s++, t++)
  411. if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
  412. *++t = *++s, t++, s++;
  413. for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
  414. if ((c = *s) == '\0')
  415. return NULL;
  416. } else if (*s == '\\' && s[1] == 'n')
  417. *t = '\n', s++;
  418. return (*s == ']') ? *sp = ++s, ++t : NULL;
  419. }
  420. /*
  421. * Get a regular expression. P points to the delimiter of the regular
  422. * expression; repp points to the address of a regexp pointer. Newline
  423. * and delimiter escapes are processed; other escapes are ignored.
  424. * Returns a pointer to the first character after the final delimiter
  425. * or NULL in the case of a non terminated regular expression. The regexp
  426. * pointer is set to the compiled regular expression.
  427. * Cflags are passed to regcomp.
  428. */
  429. static char *
  430. compile_re(char *p, regex_t **repp)
  431. {
  432. int eval;
  433. char re[_POSIX2_LINE_MAX + 1];
  434. p = compile_delimited(p, re);
  435. if (p && strlen(re) == 0) {
  436. *repp = NULL;
  437. return (p);
  438. }
  439. *repp = xmalloc(sizeof(regex_t));
  440. if (p && (eval = regcomp(*repp, re, ere)) != 0)
  441. err(COMPILE, "RE error: %s", strregerror(eval, *repp));
  442. if (maxnsub < (*repp)->re_nsub)
  443. maxnsub = (*repp)->re_nsub;
  444. return (p);
  445. }
  446. /*
  447. * Compile the substitution string of a regular expression and set res to
  448. * point to a saved copy of it. Nsub is the number of parenthesized regular
  449. * expressions.
  450. */
  451. static char *
  452. compile_subst(char *p, struct s_subst *s)
  453. {
  454. static char lbuf[_POSIX2_LINE_MAX + 1];
  455. int asize, ref, size;
  456. char c, *text, *op, *sp;
  457. int sawesc = 0;
  458. c = *p++; /* Terminator character */
  459. if (c == '\0')
  460. return (NULL);
  461. s->maxbref = 0;
  462. s->linenum = linenum;
  463. asize = 2 * _POSIX2_LINE_MAX + 1;
  464. text = xmalloc(asize);
  465. size = 0;
  466. do {
  467. op = sp = text + size;
  468. for (; *p; p++) {
  469. if (*p == '\\' || sawesc) {
  470. /*
  471. * If this is a continuation from the last
  472. * buffer, we won't have a character to
  473. * skip over.
  474. */
  475. if (sawesc)
  476. sawesc = 0;
  477. else
  478. p++;
  479. if (*p == '\0') {
  480. /*
  481. * This escaped character is continued
  482. * in the next part of the line. Note
  483. * this fact, then cause the loop to
  484. * exit w/ normal EOL case and reenter
  485. * above with the new buffer.
  486. */
  487. sawesc = 1;
  488. p--;
  489. continue;
  490. } else if (strchr("123456789", *p) != NULL) {
  491. *sp++ = '\\';
  492. ref = *p - '0';
  493. if (s->re != NULL &&
  494. ref > s->re->re_nsub)
  495. err(COMPILE,
  496. "\\%c not defined in the RE", *p);
  497. if (s->maxbref < ref)
  498. s->maxbref = ref;
  499. } else if (*p == '&' || *p == '\\')
  500. *sp++ = '\\';
  501. } else if (*p == c) {
  502. p++;
  503. *sp++ = '\0';
  504. size += sp - op;
  505. s->new = xrealloc(text, size);
  506. return (p);
  507. } else if (*p == '\n') {
  508. err(COMPILE,
  509. "unescaped newline inside substitute pattern");
  510. /* NOTREACHED */
  511. }
  512. *sp++ = *p;
  513. }
  514. size += sp - op;
  515. if (asize - size < _POSIX2_LINE_MAX + 1) {
  516. asize *= 2;
  517. text = xrealloc(text, asize);
  518. }
  519. } while (cu_fgets(p = lbuf, sizeof(lbuf)));
  520. err(COMPILE, "unterminated substitute in regular expression");
  521. /* NOTREACHED */
  522. return (NULL);
  523. }
  524. /*
  525. * Compile the flags of the s command
  526. */
  527. static char *
  528. compile_flags(char *p, struct s_subst *s)
  529. {
  530. int gn; /* True if we have seen g or n */
  531. char wfile[_POSIX2_LINE_MAX + 1], *q;
  532. s->n = 1; /* Default */
  533. s->p = 0;
  534. s->wfile = NULL;
  535. s->wfd = -1;
  536. for (gn = 0;;) {
  537. EATSPACE(); /* EXTENSION */
  538. switch (*p) {
  539. case 'g':
  540. if (gn)
  541. err(COMPILE,
  542. "more than one number or 'g' in substitute flags");
  543. gn = 1;
  544. s->n = 0;
  545. break;
  546. case '\0':
  547. case '\n':
  548. case ';':
  549. return (p);
  550. case 'p':
  551. s->p = 1;
  552. break;
  553. case '1': case '2': case '3':
  554. case '4': case '5': case '6':
  555. case '7': case '8': case '9':
  556. if (gn)
  557. err(COMPILE,
  558. "more than one number or 'g' in substitute flags");
  559. gn = 1;
  560. /* XXX Check for overflow */
  561. s->n = (int)strtol(p, &p, 10);
  562. p--;
  563. break;
  564. case 'w':
  565. p++;
  566. #ifdef HISTORIC_PRACTICE
  567. if (*p != ' ') {
  568. err(WARNING, "space missing before w wfile");
  569. return (p);
  570. }
  571. #endif
  572. EATSPACE();
  573. q = wfile;
  574. while (*p) {
  575. if (*p == '\n')
  576. break;
  577. *q++ = *p++;
  578. }
  579. *q = '\0';
  580. if (q == wfile)
  581. err(COMPILE, "no wfile specified");
  582. s->wfile = strdup(wfile);
  583. if (!aflag && (s->wfd = open(wfile,
  584. O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
  585. DEFFILEMODE)) == -1)
  586. err(FATAL, "%s: %s", wfile, strerror(errno));
  587. return (p);
  588. default:
  589. err(COMPILE,
  590. "bad flag in substitute command: '%c'", *p);
  591. break;
  592. }
  593. p++;
  594. }
  595. }
  596. /*
  597. * Compile a translation set of strings into a lookup table.
  598. */
  599. static char *
  600. compile_tr(char *p, char **transtab)
  601. {
  602. int i;
  603. char *lt, *op, *np;
  604. char old[_POSIX2_LINE_MAX + 1];
  605. char new[_POSIX2_LINE_MAX + 1];
  606. if (*p == '\0' || *p == '\\')
  607. err(COMPILE,
  608. "transform pattern can not be delimited by newline or backslash");
  609. p = compile_delimited(p, old);
  610. if (p == NULL) {
  611. err(COMPILE, "unterminated transform source string");
  612. return (NULL);
  613. }
  614. p = compile_delimited(--p, new);
  615. if (p == NULL) {
  616. err(COMPILE, "unterminated transform target string");
  617. return (NULL);
  618. }
  619. EATSPACE();
  620. if (strlen(new) != strlen(old)) {
  621. err(COMPILE, "transform strings are not the same length");
  622. return (NULL);
  623. }
  624. /* We assume characters are 8 bits */
  625. lt = xmalloc(UCHAR_MAX+1);
  626. for (i = 0; i <= UCHAR_MAX; i++)
  627. lt[i] = (char)i;
  628. for (op = old, np = new; *op; op++, np++)
  629. lt[(u_char)*op] = *np;
  630. *transtab = lt;
  631. return (p);
  632. }
  633. /*
  634. * Compile the text following an a, c, or i command.
  635. */
  636. static char *
  637. compile_text(void)
  638. {
  639. int asize, size;
  640. char *text, *p, *op, *s;
  641. char lbuf[_POSIX2_LINE_MAX + 1];
  642. asize = 2 * _POSIX2_LINE_MAX + 1;
  643. text = xmalloc(asize);
  644. size = 0;
  645. while (cu_fgets(lbuf, sizeof(lbuf))) {
  646. op = s = text + size;
  647. p = lbuf;
  648. for (; *p; p++) {
  649. if (*p == '\\')
  650. p++;
  651. *s++ = *p;
  652. }
  653. size += s - op;
  654. if (p[-2] != '\\') {
  655. *s = '\0';
  656. break;
  657. }
  658. if (asize - size < _POSIX2_LINE_MAX + 1) {
  659. asize *= 2;
  660. text = xrealloc(text, asize);
  661. }
  662. }
  663. return (xrealloc(text, size + 1));
  664. }
  665. /*
  666. * Get an address and return a pointer to the first character after
  667. * it. Fill the structure pointed to according to the address.
  668. */
  669. static char *
  670. compile_addr(char *p, struct s_addr *a)
  671. {
  672. char *end;
  673. switch (*p) {
  674. case '\\': /* Context address */
  675. ++p;
  676. /* FALLTHROUGH */
  677. case '/': /* Context address */
  678. p = compile_re(p, &a->u.r);
  679. if (p == NULL)
  680. err(COMPILE, "unterminated regular expression");
  681. a->type = AT_RE;
  682. return (p);
  683. case '$': /* Last line */
  684. a->type = AT_LAST;
  685. return (p + 1);
  686. /* Line number */
  687. case '0': case '1': case '2': case '3': case '4':
  688. case '5': case '6': case '7': case '8': case '9':
  689. a->type = AT_LINE;
  690. a->u.l = strtol(p, &end, 10);
  691. return (end);
  692. default:
  693. err(COMPILE, "expected context address");
  694. return (NULL);
  695. }
  696. }
  697. /*
  698. * duptoeol --
  699. * Return a copy of all the characters up to \n or \0.
  700. */
  701. static char *
  702. duptoeol(char *s, char *ctype)
  703. {
  704. size_t len;
  705. int ws;
  706. char *start;
  707. ws = 0;
  708. for (start = s; *s != '\0' && *s != '\n'; ++s)
  709. ws = isspace((unsigned char)*s);
  710. *s = '\0';
  711. if (ws)
  712. err(WARNING, "whitespace after %s", ctype);
  713. len = s - start + 1;
  714. return (memmove(xmalloc(len), start, len));
  715. }
  716. /*
  717. * Convert goto label names to addresses, and count a and r commands, in
  718. * the given subset of the script. Free the memory used by labels in b
  719. * and t commands (but not by :).
  720. *
  721. * TODO: Remove } nodes
  722. */
  723. static void
  724. fixuplabel(struct s_command *cp, struct s_command *end)
  725. {
  726. for (; cp != end; cp = cp->next)
  727. switch (cp->code) {
  728. case 'a':
  729. case 'r':
  730. appendnum++;
  731. break;
  732. case 'b':
  733. case 't':
  734. /* Resolve branch target. */
  735. if (cp->t == NULL) {
  736. cp->u.c = NULL;
  737. break;
  738. }
  739. if ((cp->u.c = findlabel(cp->t)) == NULL)
  740. err(COMPILE2, "undefined label '%s'", cp->t);
  741. free(cp->t);
  742. break;
  743. case '{':
  744. /* Do interior commands. */
  745. fixuplabel(cp->u.c, cp->next);
  746. break;
  747. }
  748. }
  749. /*
  750. * Associate the given command label for later lookup.
  751. */
  752. static void
  753. enterlabel(struct s_command *cp)
  754. {
  755. struct labhash **lhp, *lh;
  756. u_char *p;
  757. u_int h, c;
  758. for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
  759. h = (h << 5) + h + c;
  760. lhp = &labels[h & LHMASK];
  761. for (lh = *lhp; lh != NULL; lh = lh->lh_next)
  762. if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
  763. err(COMPILE2, "duplicate label '%s'", cp->t);
  764. lh = xmalloc(sizeof *lh);
  765. lh->lh_next = *lhp;
  766. lh->lh_hash = h;
  767. lh->lh_cmd = cp;
  768. lh->lh_ref = 0;
  769. *lhp = lh;
  770. }
  771. /*
  772. * Find the label contained in the command l in the command linked
  773. * list cp. L is excluded from the search. Return NULL if not found.
  774. */
  775. static struct s_command *
  776. findlabel(char *name)
  777. {
  778. struct labhash *lh;
  779. u_char *p;
  780. u_int h, c;
  781. for (h = 0, p = (u_char *)name; (c = *p) != 0; p++)
  782. h = (h << 5) + h + c;
  783. for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
  784. if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
  785. lh->lh_ref = 1;
  786. return (lh->lh_cmd);
  787. }
  788. }
  789. return (NULL);
  790. }
  791. /*
  792. * Warn about any unused labels. As a side effect, release the label hash
  793. * table space.
  794. */
  795. static void
  796. uselabel(void)
  797. {
  798. struct labhash *lh, *next;
  799. int i;
  800. for (i = 0; i < LHSZ; i++) {
  801. for (lh = labels[i]; lh != NULL; lh = next) {
  802. next = lh->lh_next;
  803. if (!lh->lh_ref)
  804. err(WARNING, "unused label '%s'",
  805. lh->lh_cmd->t);
  806. free(lh);
  807. }
  808. }
  809. }