PageRenderTime 49ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/src/freebsd/contrib/one-true-awk/b.c

https://bitbucket.org/killerpenguinassassins/open_distrib_devel
C | 976 lines | 826 code | 76 blank | 74 comment | 309 complexity | 1393f0be131943bea71aaff9e875ea85 MD5 | raw file
Possible License(s): CC0-1.0, MIT, LGPL-2.0, LGPL-3.0, WTFPL, GPL-2.0, BSD-2-Clause, AGPL-3.0, CC-BY-SA-3.0, MPL-2.0, JSON, BSD-3-Clause-No-Nuclear-License-2014, LGPL-2.1, CPL-1.0, AGPL-1.0, 0BSD, ISC, Apache-2.0, GPL-3.0, IPL-1.0, MPL-2.0-no-copyleft-exception, BSD-3-Clause
  1. /****************************************************************
  2. Copyright (C) Lucent Technologies 1997
  3. All Rights Reserved
  4. Permission to use, copy, modify, and distribute this software and
  5. its documentation for any purpose and without fee is hereby
  6. granted, provided that the above copyright notice appear in all
  7. copies and that both that the copyright notice and this
  8. permission notice and warranty disclaimer appear in supporting
  9. documentation, and that the name Lucent Technologies or any of
  10. its entities not be used in advertising or publicity pertaining
  11. to distribution of the software without specific, written prior
  12. permission.
  13. LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
  14. INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
  15. IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
  16. SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  17. WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
  18. IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
  19. ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
  20. THIS SOFTWARE.
  21. ****************************************************************/
  22. /* lasciate ogne speranza, voi ch'intrate. */
  23. #include <sys/cdefs.h>
  24. __FBSDID("$FreeBSD$");
  25. #define DEBUG
  26. #include <ctype.h>
  27. #include <stdio.h>
  28. #include <string.h>
  29. #include <stdlib.h>
  30. #include "awk.h"
  31. #include "ytab.h"
  32. #define HAT (NCHARS+2) /* matches ^ in regular expr */
  33. /* NCHARS is 2**n */
  34. #define MAXLIN 22
  35. #define type(v) (v)->nobj /* badly overloaded here */
  36. #define info(v) (v)->ntype /* badly overloaded here */
  37. #define left(v) (v)->narg[0]
  38. #define right(v) (v)->narg[1]
  39. #define parent(v) (v)->nnext
  40. #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
  41. #define ELEAF case EMPTYRE: /* empty string in regexp */
  42. #define UNARY case STAR: case PLUS: case QUEST:
  43. /* encoding in tree Nodes:
  44. leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
  45. left is index, right contains value or pointer to value
  46. unary (STAR, PLUS, QUEST): left is child, right is null
  47. binary (CAT, OR): left and right are children
  48. parent contains pointer to parent
  49. */
  50. int *setvec;
  51. int *tmpset;
  52. int maxsetvec = 0;
  53. int rtok; /* next token in current re */
  54. int rlxval;
  55. static uschar *rlxstr;
  56. static uschar *prestr; /* current position in current re */
  57. static uschar *lastre; /* origin of last re */
  58. static int setcnt;
  59. static int poscnt;
  60. char *patbeg;
  61. int patlen;
  62. #define NFA 20 /* cache this many dynamic fa's */
  63. fa *fatab[NFA];
  64. int nfatab = 0; /* entries in fatab */
  65. fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
  66. {
  67. int i, use, nuse;
  68. fa *pfa;
  69. static int now = 1;
  70. if (setvec == 0) { /* first time through any RE */
  71. maxsetvec = MAXLIN;
  72. setvec = (int *) malloc(maxsetvec * sizeof(int));
  73. tmpset = (int *) malloc(maxsetvec * sizeof(int));
  74. if (setvec == 0 || tmpset == 0)
  75. overflo("out of space initializing makedfa");
  76. }
  77. if (compile_time) /* a constant for sure */
  78. return mkdfa(s, anchor);
  79. for (i = 0; i < nfatab; i++) /* is it there already? */
  80. if (fatab[i]->anchor == anchor
  81. && strcmp((const char *) fatab[i]->restr, s) == 0) {
  82. fatab[i]->use = now++;
  83. return fatab[i];
  84. }
  85. pfa = mkdfa(s, anchor);
  86. if (nfatab < NFA) { /* room for another */
  87. fatab[nfatab] = pfa;
  88. fatab[nfatab]->use = now++;
  89. nfatab++;
  90. return pfa;
  91. }
  92. use = fatab[0]->use; /* replace least-recently used */
  93. nuse = 0;
  94. for (i = 1; i < nfatab; i++)
  95. if (fatab[i]->use < use) {
  96. use = fatab[i]->use;
  97. nuse = i;
  98. }
  99. freefa(fatab[nuse]);
  100. fatab[nuse] = pfa;
  101. pfa->use = now++;
  102. return pfa;
  103. }
  104. fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
  105. /* anchor = 1 for anchored matches, else 0 */
  106. {
  107. Node *p, *p1;
  108. fa *f;
  109. p = reparse(s);
  110. p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
  111. /* put ALL STAR in front of reg. exp. */
  112. p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
  113. /* put FINAL after reg. exp. */
  114. poscnt = 0;
  115. penter(p1); /* enter parent pointers and leaf indices */
  116. if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
  117. overflo("out of space for fa");
  118. f->accept = poscnt-1; /* penter has computed number of positions in re */
  119. cfoll(f, p1); /* set up follow sets */
  120. freetr(p1);
  121. if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
  122. overflo("out of space in makedfa");
  123. if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
  124. overflo("out of space in makedfa");
  125. *f->posns[1] = 0;
  126. f->initstat = makeinit(f, anchor);
  127. f->anchor = anchor;
  128. f->restr = (uschar *) tostring(s);
  129. return f;
  130. }
  131. int makeinit(fa *f, int anchor)
  132. {
  133. int i, k;
  134. f->curstat = 2;
  135. f->out[2] = 0;
  136. f->reset = 0;
  137. k = *(f->re[0].lfollow);
  138. xfree(f->posns[2]);
  139. if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
  140. overflo("out of space in makeinit");
  141. for (i=0; i <= k; i++) {
  142. (f->posns[2])[i] = (f->re[0].lfollow)[i];
  143. }
  144. if ((f->posns[2])[1] == f->accept)
  145. f->out[2] = 1;
  146. for (i=0; i < NCHARS; i++)
  147. f->gototab[2][i] = 0;
  148. f->curstat = cgoto(f, 2, HAT);
  149. if (anchor) {
  150. *f->posns[2] = k-1; /* leave out position 0 */
  151. for (i=0; i < k; i++) {
  152. (f->posns[0])[i] = (f->posns[2])[i];
  153. }
  154. f->out[0] = f->out[2];
  155. if (f->curstat != 2)
  156. --(*f->posns[f->curstat]);
  157. }
  158. return f->curstat;
  159. }
  160. void penter(Node *p) /* set up parent pointers and leaf indices */
  161. {
  162. switch (type(p)) {
  163. ELEAF
  164. LEAF
  165. info(p) = poscnt;
  166. poscnt++;
  167. break;
  168. UNARY
  169. penter(left(p));
  170. parent(left(p)) = p;
  171. break;
  172. case CAT:
  173. case OR:
  174. penter(left(p));
  175. penter(right(p));
  176. parent(left(p)) = p;
  177. parent(right(p)) = p;
  178. break;
  179. default: /* can't happen */
  180. FATAL("can't happen: unknown type %d in penter", type(p));
  181. break;
  182. }
  183. }
  184. void freetr(Node *p) /* free parse tree */
  185. {
  186. switch (type(p)) {
  187. ELEAF
  188. LEAF
  189. xfree(p);
  190. break;
  191. UNARY
  192. freetr(left(p));
  193. xfree(p);
  194. break;
  195. case CAT:
  196. case OR:
  197. freetr(left(p));
  198. freetr(right(p));
  199. xfree(p);
  200. break;
  201. default: /* can't happen */
  202. FATAL("can't happen: unknown type %d in freetr", type(p));
  203. break;
  204. }
  205. }
  206. /* in the parsing of regular expressions, metacharacters like . have */
  207. /* to be seen literally; \056 is not a metacharacter. */
  208. int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */
  209. { /* only pick up one 8-bit byte (2 chars) */
  210. uschar *p;
  211. int n = 0;
  212. int i;
  213. for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
  214. if (isdigit(*p))
  215. n = 16 * n + *p - '0';
  216. else if (*p >= 'a' && *p <= 'f')
  217. n = 16 * n + *p - 'a' + 10;
  218. else if (*p >= 'A' && *p <= 'F')
  219. n = 16 * n + *p - 'A' + 10;
  220. }
  221. *pp = (uschar *) p;
  222. return n;
  223. }
  224. #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
  225. int quoted(uschar **pp) /* pick up next thing after a \\ */
  226. /* and increment *pp */
  227. {
  228. uschar *p = *pp;
  229. int c;
  230. if ((c = *p++) == 't')
  231. c = '\t';
  232. else if (c == 'n')
  233. c = '\n';
  234. else if (c == 'f')
  235. c = '\f';
  236. else if (c == 'r')
  237. c = '\r';
  238. else if (c == 'b')
  239. c = '\b';
  240. else if (c == '\\')
  241. c = '\\';
  242. else if (c == 'x') { /* hexadecimal goo follows */
  243. c = hexstr(&p); /* this adds a null if number is invalid */
  244. } else if (isoctdigit(c)) { /* \d \dd \ddd */
  245. int n = c - '0';
  246. if (isoctdigit(*p)) {
  247. n = 8 * n + *p++ - '0';
  248. if (isoctdigit(*p))
  249. n = 8 * n + *p++ - '0';
  250. }
  251. c = n;
  252. } /* else */
  253. /* c = c; */
  254. *pp = p;
  255. return c;
  256. }
  257. static int collate_range_cmp(int a, int b)
  258. {
  259. static char s[2][2];
  260. if ((uschar)a == (uschar)b)
  261. return 0;
  262. s[0][0] = a;
  263. s[1][0] = b;
  264. return (strcoll(s[0], s[1]));
  265. }
  266. char *cclenter(const char *argp) /* add a character class */
  267. {
  268. int i, c, c2;
  269. int j;
  270. uschar *p = (uschar *) argp;
  271. uschar *op, *bp;
  272. static uschar *buf = 0;
  273. static int bufsz = 100;
  274. op = p;
  275. if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
  276. FATAL("out of space for character class [%.10s...] 1", p);
  277. bp = buf;
  278. for (i = 0; (c = *p++) != 0; ) {
  279. if (c == '\\') {
  280. c = quoted(&p);
  281. } else if (c == '-' && i > 0 && bp[-1] != 0) {
  282. if (*p != 0) {
  283. c = bp[-1];
  284. c2 = *p++;
  285. if (c2 == '\\')
  286. c2 = quoted(&p);
  287. if (collate_range_cmp(c, c2) > 0) {
  288. bp--;
  289. i--;
  290. continue;
  291. }
  292. for (j = 0; j < NCHARS; j++) {
  293. if ((collate_range_cmp(c, j) > 0) ||
  294. collate_range_cmp(j, c2) > 0)
  295. continue;
  296. if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
  297. FATAL("out of space for character class [%.10s...] 2", p);
  298. *bp++ = j;
  299. i++;
  300. }
  301. continue;
  302. }
  303. }
  304. if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
  305. FATAL("out of space for character class [%.10s...] 3", p);
  306. *bp++ = c;
  307. i++;
  308. }
  309. *bp = 0;
  310. dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
  311. xfree(op);
  312. return (char *) tostring((char *) buf);
  313. }
  314. void overflo(const char *s)
  315. {
  316. FATAL("regular expression too big: %.30s...", s);
  317. }
  318. void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
  319. {
  320. int i;
  321. int *p;
  322. switch (type(v)) {
  323. ELEAF
  324. LEAF
  325. f->re[info(v)].ltype = type(v);
  326. f->re[info(v)].lval.np = right(v);
  327. while (f->accept >= maxsetvec) { /* guessing here! */
  328. maxsetvec *= 4;
  329. setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
  330. tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
  331. if (setvec == 0 || tmpset == 0)
  332. overflo("out of space in cfoll()");
  333. }
  334. for (i = 0; i <= f->accept; i++)
  335. setvec[i] = 0;
  336. setcnt = 0;
  337. follow(v); /* computes setvec and setcnt */
  338. if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
  339. overflo("out of space building follow set");
  340. f->re[info(v)].lfollow = p;
  341. *p = setcnt;
  342. for (i = f->accept; i >= 0; i--)
  343. if (setvec[i] == 1)
  344. *++p = i;
  345. break;
  346. UNARY
  347. cfoll(f,left(v));
  348. break;
  349. case CAT:
  350. case OR:
  351. cfoll(f,left(v));
  352. cfoll(f,right(v));
  353. break;
  354. default: /* can't happen */
  355. FATAL("can't happen: unknown type %d in cfoll", type(v));
  356. }
  357. }
  358. int first(Node *p) /* collects initially active leaves of p into setvec */
  359. /* returns 0 if p matches empty string */
  360. {
  361. int b, lp;
  362. switch (type(p)) {
  363. ELEAF
  364. LEAF
  365. lp = info(p); /* look for high-water mark of subscripts */
  366. while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
  367. maxsetvec *= 4;
  368. setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
  369. tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
  370. if (setvec == 0 || tmpset == 0)
  371. overflo("out of space in first()");
  372. }
  373. if (type(p) == EMPTYRE) {
  374. setvec[lp] = 0;
  375. return(0);
  376. }
  377. if (setvec[lp] != 1) {
  378. setvec[lp] = 1;
  379. setcnt++;
  380. }
  381. if (type(p) == CCL && (*(char *) right(p)) == '\0')
  382. return(0); /* empty CCL */
  383. else return(1);
  384. case PLUS:
  385. if (first(left(p)) == 0) return(0);
  386. return(1);
  387. case STAR:
  388. case QUEST:
  389. first(left(p));
  390. return(0);
  391. case CAT:
  392. if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
  393. return(1);
  394. case OR:
  395. b = first(right(p));
  396. if (first(left(p)) == 0 || b == 0) return(0);
  397. return(1);
  398. }
  399. FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
  400. return(-1);
  401. }
  402. void follow(Node *v) /* collects leaves that can follow v into setvec */
  403. {
  404. Node *p;
  405. if (type(v) == FINAL)
  406. return;
  407. p = parent(v);
  408. switch (type(p)) {
  409. case STAR:
  410. case PLUS:
  411. first(v);
  412. follow(p);
  413. return;
  414. case OR:
  415. case QUEST:
  416. follow(p);
  417. return;
  418. case CAT:
  419. if (v == left(p)) { /* v is left child of p */
  420. if (first(right(p)) == 0) {
  421. follow(p);
  422. return;
  423. }
  424. } else /* v is right child */
  425. follow(p);
  426. return;
  427. }
  428. }
  429. int member(int c, const char *sarg) /* is c in s? */
  430. {
  431. uschar *s = (uschar *) sarg;
  432. while (*s)
  433. if (c == *s++)
  434. return(1);
  435. return(0);
  436. }
  437. int match(fa *f, const char *p0) /* shortest match ? */
  438. {
  439. int s, ns;
  440. uschar *p = (uschar *) p0;
  441. s = f->reset ? makeinit(f,0) : f->initstat;
  442. if (f->out[s])
  443. return(1);
  444. do {
  445. /* assert(*p < NCHARS); */
  446. if ((ns = f->gototab[s][*p]) != 0)
  447. s = ns;
  448. else
  449. s = cgoto(f, s, *p);
  450. if (f->out[s])
  451. return(1);
  452. } while (*p++ != 0);
  453. return(0);
  454. }
  455. int pmatch(fa *f, const char *p0) /* longest match, for sub */
  456. {
  457. int s, ns;
  458. uschar *p = (uschar *) p0;
  459. uschar *q;
  460. int i, k;
  461. /* s = f->reset ? makeinit(f,1) : f->initstat; */
  462. if (f->reset) {
  463. f->initstat = s = makeinit(f,1);
  464. } else {
  465. s = f->initstat;
  466. }
  467. patbeg = (char *) p;
  468. patlen = -1;
  469. do {
  470. q = p;
  471. do {
  472. if (f->out[s]) /* final state */
  473. patlen = q-p;
  474. /* assert(*q < NCHARS); */
  475. if ((ns = f->gototab[s][*q]) != 0)
  476. s = ns;
  477. else
  478. s = cgoto(f, s, *q);
  479. if (s == 1) { /* no transition */
  480. if (patlen >= 0) {
  481. patbeg = (char *) p;
  482. return(1);
  483. }
  484. else
  485. goto nextin; /* no match */
  486. }
  487. } while (*q++ != 0);
  488. if (f->out[s])
  489. patlen = q-p-1; /* don't count $ */
  490. if (patlen >= 0) {
  491. patbeg = (char *) p;
  492. return(1);
  493. }
  494. nextin:
  495. s = 2;
  496. if (f->reset) {
  497. for (i = 2; i <= f->curstat; i++)
  498. xfree(f->posns[i]);
  499. k = *f->posns[0];
  500. if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
  501. overflo("out of space in pmatch");
  502. for (i = 0; i <= k; i++)
  503. (f->posns[2])[i] = (f->posns[0])[i];
  504. f->initstat = f->curstat = 2;
  505. f->out[2] = f->out[0];
  506. for (i = 0; i < NCHARS; i++)
  507. f->gototab[2][i] = 0;
  508. }
  509. } while (*p++ != 0);
  510. return (0);
  511. }
  512. int nematch(fa *f, const char *p0) /* non-empty match, for sub */
  513. {
  514. int s, ns;
  515. uschar *p = (uschar *) p0;
  516. uschar *q;
  517. int i, k;
  518. /* s = f->reset ? makeinit(f,1) : f->initstat; */
  519. if (f->reset) {
  520. f->initstat = s = makeinit(f,1);
  521. } else {
  522. s = f->initstat;
  523. }
  524. patlen = -1;
  525. while (*p) {
  526. q = p;
  527. do {
  528. if (f->out[s]) /* final state */
  529. patlen = q-p;
  530. /* assert(*q < NCHARS); */
  531. if ((ns = f->gototab[s][*q]) != 0)
  532. s = ns;
  533. else
  534. s = cgoto(f, s, *q);
  535. if (s == 1) { /* no transition */
  536. if (patlen > 0) {
  537. patbeg = (char *) p;
  538. return(1);
  539. } else
  540. goto nnextin; /* no nonempty match */
  541. }
  542. } while (*q++ != 0);
  543. if (f->out[s])
  544. patlen = q-p-1; /* don't count $ */
  545. if (patlen > 0 ) {
  546. patbeg = (char *) p;
  547. return(1);
  548. }
  549. nnextin:
  550. s = 2;
  551. if (f->reset) {
  552. for (i = 2; i <= f->curstat; i++)
  553. xfree(f->posns[i]);
  554. k = *f->posns[0];
  555. if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
  556. overflo("out of state space");
  557. for (i = 0; i <= k; i++)
  558. (f->posns[2])[i] = (f->posns[0])[i];
  559. f->initstat = f->curstat = 2;
  560. f->out[2] = f->out[0];
  561. for (i = 0; i < NCHARS; i++)
  562. f->gototab[2][i] = 0;
  563. }
  564. p++;
  565. }
  566. return (0);
  567. }
  568. Node *reparse(const char *p) /* parses regular expression pointed to by p */
  569. { /* uses relex() to scan regular expression */
  570. Node *np;
  571. dprintf( ("reparse <%s>\n", p) );
  572. lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
  573. rtok = relex();
  574. /* GNU compatibility: an empty regexp matches anything */
  575. if (rtok == '\0') {
  576. /* FATAL("empty regular expression"); previous */
  577. return(op2(EMPTYRE, NIL, NIL));
  578. }
  579. np = regexp();
  580. if (rtok != '\0')
  581. FATAL("syntax error in regular expression %s at %s", lastre, prestr);
  582. return(np);
  583. }
  584. Node *regexp(void) /* top-level parse of reg expr */
  585. {
  586. return (alt(concat(primary())));
  587. }
  588. Node *primary(void)
  589. {
  590. Node *np;
  591. switch (rtok) {
  592. case CHAR:
  593. np = op2(CHAR, NIL, itonp(rlxval));
  594. rtok = relex();
  595. return (unary(np));
  596. case ALL:
  597. rtok = relex();
  598. return (unary(op2(ALL, NIL, NIL)));
  599. case EMPTYRE:
  600. rtok = relex();
  601. return (unary(op2(ALL, NIL, NIL)));
  602. case DOT:
  603. rtok = relex();
  604. return (unary(op2(DOT, NIL, NIL)));
  605. case CCL:
  606. np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
  607. rtok = relex();
  608. return (unary(np));
  609. case NCCL:
  610. np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
  611. rtok = relex();
  612. return (unary(np));
  613. case '^':
  614. rtok = relex();
  615. return (unary(op2(CHAR, NIL, itonp(HAT))));
  616. case '$':
  617. rtok = relex();
  618. return (unary(op2(CHAR, NIL, NIL)));
  619. case '(':
  620. rtok = relex();
  621. if (rtok == ')') { /* special pleading for () */
  622. rtok = relex();
  623. return unary(op2(CCL, NIL, (Node *) tostring("")));
  624. }
  625. np = regexp();
  626. if (rtok == ')') {
  627. rtok = relex();
  628. return (unary(np));
  629. }
  630. else
  631. FATAL("syntax error in regular expression %s at %s", lastre, prestr);
  632. default:
  633. FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
  634. }
  635. return 0; /*NOTREACHED*/
  636. }
  637. Node *concat(Node *np)
  638. {
  639. switch (rtok) {
  640. case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
  641. return (concat(op2(CAT, np, primary())));
  642. }
  643. return (np);
  644. }
  645. Node *alt(Node *np)
  646. {
  647. if (rtok == OR) {
  648. rtok = relex();
  649. return (alt(op2(OR, np, concat(primary()))));
  650. }
  651. return (np);
  652. }
  653. Node *unary(Node *np)
  654. {
  655. switch (rtok) {
  656. case STAR:
  657. rtok = relex();
  658. return (unary(op2(STAR, np, NIL)));
  659. case PLUS:
  660. rtok = relex();
  661. return (unary(op2(PLUS, np, NIL)));
  662. case QUEST:
  663. rtok = relex();
  664. return (unary(op2(QUEST, np, NIL)));
  665. default:
  666. return (np);
  667. }
  668. }
  669. /*
  670. * Character class definitions conformant to the POSIX locale as
  671. * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
  672. * and operating character sets are both ASCII (ISO646) or supersets
  673. * thereof.
  674. *
  675. * Note that to avoid overflowing the temporary buffer used in
  676. * relex(), the expanded character class (prior to range expansion)
  677. * must be less than twice the size of their full name.
  678. */
  679. /* Because isblank doesn't show up in any of the header files on any
  680. * system i use, it's defined here. if some other locale has a richer
  681. * definition of "blank", define HAS_ISBLANK and provide your own
  682. * version.
  683. * the parentheses here are an attempt to find a path through the maze
  684. * of macro definition and/or function and/or version provided. thanks
  685. * to nelson beebe for the suggestion; let's see if it works everywhere.
  686. */
  687. /* #define HAS_ISBLANK */
  688. #ifndef HAS_ISBLANK
  689. int (xisblank)(int c)
  690. {
  691. return c==' ' || c=='\t';
  692. }
  693. #endif
  694. struct charclass {
  695. const char *cc_name;
  696. int cc_namelen;
  697. int (*cc_func)(int);
  698. } charclasses[] = {
  699. { "alnum", 5, isalnum },
  700. { "alpha", 5, isalpha },
  701. #ifndef HAS_ISBLANK
  702. { "blank", 5, isspace }, /* was isblank */
  703. #else
  704. { "blank", 5, isblank },
  705. #endif
  706. { "cntrl", 5, iscntrl },
  707. { "digit", 5, isdigit },
  708. { "graph", 5, isgraph },
  709. { "lower", 5, islower },
  710. { "print", 5, isprint },
  711. { "punct", 5, ispunct },
  712. { "space", 5, isspace },
  713. { "upper", 5, isupper },
  714. { "xdigit", 6, isxdigit },
  715. { NULL, 0, NULL },
  716. };
  717. int relex(void) /* lexical analyzer for reparse */
  718. {
  719. int c, n;
  720. int cflag;
  721. static uschar *buf = 0;
  722. static int bufsz = 100;
  723. uschar *bp;
  724. struct charclass *cc;
  725. int i;
  726. switch (c = *prestr++) {
  727. case '|': return OR;
  728. case '*': return STAR;
  729. case '+': return PLUS;
  730. case '?': return QUEST;
  731. case '.': return DOT;
  732. case '\0': prestr--; return '\0';
  733. case '^':
  734. case '$':
  735. case '(':
  736. case ')':
  737. return c;
  738. case '\\':
  739. rlxval = quoted(&prestr);
  740. return CHAR;
  741. default:
  742. rlxval = c;
  743. return CHAR;
  744. case '[':
  745. if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
  746. FATAL("out of space in reg expr %.10s..", lastre);
  747. bp = buf;
  748. if (*prestr == '^') {
  749. cflag = 1;
  750. prestr++;
  751. }
  752. else
  753. cflag = 0;
  754. n = 2 * strlen((const char *) prestr)+1;
  755. if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
  756. FATAL("out of space for reg expr %.10s...", lastre);
  757. for (; ; ) {
  758. if ((c = *prestr++) == '\\') {
  759. *bp++ = '\\';
  760. if ((c = *prestr++) == '\0')
  761. FATAL("nonterminated character class %.20s...", lastre);
  762. *bp++ = c;
  763. /* } else if (c == '\n') { */
  764. /* FATAL("newline in character class %.20s...", lastre); */
  765. } else if (c == '[' && *prestr == ':') {
  766. /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
  767. for (cc = charclasses; cc->cc_name; cc++)
  768. if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
  769. break;
  770. if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
  771. prestr[2 + cc->cc_namelen] == ']') {
  772. prestr += cc->cc_namelen + 3;
  773. for (i = 0; i < NCHARS; i++) {
  774. if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
  775. FATAL("out of space for reg expr %.10s...", lastre);
  776. if (cc->cc_func(i)) {
  777. *bp++ = i;
  778. n++;
  779. }
  780. }
  781. } else
  782. *bp++ = c;
  783. } else if (c == '\0') {
  784. FATAL("nonterminated character class %.20s", lastre);
  785. } else if (bp == buf) { /* 1st char is special */
  786. *bp++ = c;
  787. } else if (c == ']') {
  788. *bp++ = 0;
  789. rlxstr = (uschar *) tostring((char *) buf);
  790. if (cflag == 0)
  791. return CCL;
  792. else
  793. return NCCL;
  794. } else
  795. *bp++ = c;
  796. }
  797. }
  798. }
  799. int cgoto(fa *f, int s, int c)
  800. {
  801. int i, j, k;
  802. int *p, *q;
  803. assert(c == HAT || c < NCHARS);
  804. while (f->accept >= maxsetvec) { /* guessing here! */
  805. maxsetvec *= 4;
  806. setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
  807. tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
  808. if (setvec == 0 || tmpset == 0)
  809. overflo("out of space in cgoto()");
  810. }
  811. for (i = 0; i <= f->accept; i++)
  812. setvec[i] = 0;
  813. setcnt = 0;
  814. /* compute positions of gototab[s,c] into setvec */
  815. p = f->posns[s];
  816. for (i = 1; i <= *p; i++) {
  817. if ((k = f->re[p[i]].ltype) != FINAL) {
  818. if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
  819. || (k == DOT && c != 0 && c != HAT)
  820. || (k == ALL && c != 0)
  821. || (k == EMPTYRE && c != 0)
  822. || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
  823. || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
  824. q = f->re[p[i]].lfollow;
  825. for (j = 1; j <= *q; j++) {
  826. if (q[j] >= maxsetvec) {
  827. maxsetvec *= 4;
  828. setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
  829. tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
  830. if (setvec == 0 || tmpset == 0)
  831. overflo("cgoto overflow");
  832. }
  833. if (setvec[q[j]] == 0) {
  834. setcnt++;
  835. setvec[q[j]] = 1;
  836. }
  837. }
  838. }
  839. }
  840. }
  841. /* determine if setvec is a previous state */
  842. tmpset[0] = setcnt;
  843. j = 1;
  844. for (i = f->accept; i >= 0; i--)
  845. if (setvec[i]) {
  846. tmpset[j++] = i;
  847. }
  848. /* tmpset == previous state? */
  849. for (i = 1; i <= f->curstat; i++) {
  850. p = f->posns[i];
  851. if ((k = tmpset[0]) != p[0])
  852. goto different;
  853. for (j = 1; j <= k; j++)
  854. if (tmpset[j] != p[j])
  855. goto different;
  856. /* setvec is state i */
  857. f->gototab[s][c] = i;
  858. return i;
  859. different:;
  860. }
  861. /* add tmpset to current set of states */
  862. if (f->curstat >= NSTATES-1) {
  863. f->curstat = 2;
  864. f->reset = 1;
  865. for (i = 2; i < NSTATES; i++)
  866. xfree(f->posns[i]);
  867. } else
  868. ++(f->curstat);
  869. for (i = 0; i < NCHARS; i++)
  870. f->gototab[f->curstat][i] = 0;
  871. xfree(f->posns[f->curstat]);
  872. if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
  873. overflo("out of space in cgoto");
  874. f->posns[f->curstat] = p;
  875. f->gototab[s][c] = f->curstat;
  876. for (i = 0; i <= setcnt; i++)
  877. p[i] = tmpset[i];
  878. if (setvec[f->accept])
  879. f->out[f->curstat] = 1;
  880. else
  881. f->out[f->curstat] = 0;
  882. return f->curstat;
  883. }
  884. void freefa(fa *f) /* free a finite automaton */
  885. {
  886. int i;
  887. if (f == NULL)
  888. return;
  889. for (i = 0; i <= f->curstat; i++)
  890. xfree(f->posns[i]);
  891. for (i = 0; i <= f->accept; i++) {
  892. xfree(f->re[i].lfollow);
  893. if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
  894. xfree((f->re[i].lval.np));
  895. }
  896. xfree(f->restr);
  897. xfree(f);
  898. }