PageRenderTime 56ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/postfix-2.9.3/src/util/dict_regexp.c

#
C | 830 lines | 562 code | 88 blank | 180 comment | 148 complexity | 2517e093d437f797517f46c5bae1e524 MD5 | raw file
Possible License(s): IPL-1.0, AGPL-3.0
  1. /*++
  2. /* NAME
  3. /* dict_regexp 3
  4. /* SUMMARY
  5. /* dictionary manager interface to REGEXP regular expression library
  6. /* SYNOPSIS
  7. /* #include <dict_regexp.h>
  8. /*
  9. /* DICT *dict_regexp_open(name, dummy, dict_flags)
  10. /* const char *name;
  11. /* int dummy;
  12. /* int dict_flags;
  13. /* DESCRIPTION
  14. /* dict_regexp_open() opens the named file and compiles the contained
  15. /* regular expressions. The result object can be used to match strings
  16. /* against the table.
  17. /* SEE ALSO
  18. /* dict(3) generic dictionary manager
  19. /* regexp_table(5) format of Postfix regular expression tables
  20. /* AUTHOR(S)
  21. /* LaMont Jones
  22. /* lamont@hp.com
  23. /*
  24. /* Based on PCRE dictionary contributed by Andrew McNamara
  25. /* andrewm@connect.com.au
  26. /* connect.com.au Pty. Ltd.
  27. /* Level 3, 213 Miller St
  28. /* North Sydney, NSW, Australia
  29. /*
  30. /* Heavily rewritten by Wietse Venema
  31. /* IBM T.J. Watson Research
  32. /* P.O. Box 704
  33. /* Yorktown Heights, NY 10598, USA
  34. /*--*/
  35. /* System library. */
  36. #include "sys_defs.h"
  37. #ifdef HAS_POSIX_REGEXP
  38. #include <sys/stat.h>
  39. #include <stdlib.h>
  40. #include <unistd.h>
  41. #include <string.h>
  42. #include <ctype.h>
  43. #include <regex.h>
  44. #ifdef STRCASECMP_IN_STRINGS_H
  45. #include <strings.h>
  46. #endif
  47. /* Utility library. */
  48. #include "mymalloc.h"
  49. #include "msg.h"
  50. #include "safe.h"
  51. #include "vstream.h"
  52. #include "vstring.h"
  53. #include "stringops.h"
  54. #include "readlline.h"
  55. #include "dict.h"
  56. #include "dict_regexp.h"
  57. #include "mac_parse.h"
  58. #include "warn_stat.h"
  59. /*
  60. * Support for IF/ENDIF based on an idea by Bert Driehuis.
  61. */
  62. #define DICT_REGEXP_OP_MATCH 1 /* Match this regexp */
  63. #define DICT_REGEXP_OP_IF 2 /* Increase if/endif nesting on match */
  64. #define DICT_REGEXP_OP_ENDIF 3 /* Decrease if/endif nesting on match */
  65. /*
  66. * Regular expression before compiling.
  67. */
  68. typedef struct {
  69. char *regexp; /* regular expression */
  70. int options; /* regcomp() options */
  71. int match; /* positive or negative match */
  72. } DICT_REGEXP_PATTERN;
  73. /*
  74. * Compiled generic rule, and subclasses that derive from it.
  75. */
  76. typedef struct DICT_REGEXP_RULE {
  77. int op; /* DICT_REGEXP_OP_MATCH/IF/ENDIF */
  78. int nesting; /* Level of search nesting */
  79. int lineno; /* source file line number */
  80. struct DICT_REGEXP_RULE *next; /* next rule in dict */
  81. } DICT_REGEXP_RULE;
  82. typedef struct {
  83. DICT_REGEXP_RULE rule; /* generic part */
  84. regex_t *first_exp; /* compiled primary pattern */
  85. int first_match; /* positive or negative match */
  86. regex_t *second_exp; /* compiled secondary pattern */
  87. int second_match; /* positive or negative match */
  88. char *replacement; /* replacement text */
  89. size_t max_sub; /* largest $number in replacement */
  90. } DICT_REGEXP_MATCH_RULE;
  91. typedef struct {
  92. DICT_REGEXP_RULE rule; /* generic members */
  93. regex_t *expr; /* the condition */
  94. int match; /* positive or negative match */
  95. } DICT_REGEXP_IF_RULE;
  96. /*
  97. * Regexp map.
  98. */
  99. typedef struct {
  100. DICT dict; /* generic members */
  101. regmatch_t *pmatch; /* matched substring info */
  102. DICT_REGEXP_RULE *head; /* first rule */
  103. VSTRING *expansion_buf; /* lookup result */
  104. } DICT_REGEXP;
  105. /*
  106. * Macros to make dense code more readable.
  107. */
  108. #define NULL_SUBSTITUTIONS (0)
  109. #define NULL_MATCH_RESULT ((regmatch_t *) 0)
  110. /*
  111. * Context for $number expansion callback.
  112. */
  113. typedef struct {
  114. DICT_REGEXP *dict_regexp; /* the dictionary handle */
  115. DICT_REGEXP_MATCH_RULE *match_rule; /* the rule we matched */
  116. const char *lookup_string; /* matched text */
  117. } DICT_REGEXP_EXPAND_CONTEXT;
  118. /*
  119. * Context for $number pre-scan callback.
  120. */
  121. typedef struct {
  122. const char *mapname; /* name of regexp map */
  123. int lineno; /* where in file */
  124. size_t max_sub; /* largest $number seen */
  125. char *literal; /* constant result, $$ -> $ */
  126. } DICT_REGEXP_PRESCAN_CONTEXT;
  127. /*
  128. * Compatibility.
  129. */
  130. #ifndef MAC_PARSE_OK
  131. #define MAC_PARSE_OK 0
  132. #endif
  133. /* dict_regexp_expand - replace $number with substring from matched text */
  134. static int dict_regexp_expand(int type, VSTRING *buf, char *ptr)
  135. {
  136. DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr;
  137. DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule;
  138. DICT_REGEXP *dict_regexp = ctxt->dict_regexp;
  139. regmatch_t *pmatch;
  140. size_t n;
  141. /*
  142. * Replace $number by the corresponding substring from the matched text.
  143. * We pre-scanned the replacement text at compile time, so any out of
  144. * range $number means that something impossible has happened.
  145. */
  146. if (type == MAC_PARSE_VARNAME) {
  147. n = atoi(vstring_str(buf));
  148. if (n < 1 || n > match_rule->max_sub)
  149. msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"",
  150. dict_regexp->dict.name, match_rule->rule.lineno,
  151. vstring_str(buf));
  152. pmatch = dict_regexp->pmatch + n;
  153. if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo)
  154. return (MAC_PARSE_UNDEF); /* empty or not matched */
  155. vstring_strncat(dict_regexp->expansion_buf,
  156. ctxt->lookup_string + pmatch->rm_so,
  157. pmatch->rm_eo - pmatch->rm_so);
  158. return (MAC_PARSE_OK);
  159. }
  160. /*
  161. * Straight text - duplicate with no substitution.
  162. */
  163. else {
  164. vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf));
  165. return (MAC_PARSE_OK);
  166. }
  167. }
  168. /* dict_regexp_regerror - report regexp compile/execute error */
  169. static void dict_regexp_regerror(const char *mapname, int lineno, int error,
  170. const regex_t *expr)
  171. {
  172. char errbuf[256];
  173. (void) regerror(error, expr, errbuf, sizeof(errbuf));
  174. msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf);
  175. }
  176. /*
  177. * Inlined to reduce function call overhead in the time-critical loop.
  178. */
  179. #define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \
  180. ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \
  181. ((err) == REG_NOMATCH ? !(match) : \
  182. (err) == 0 ? (match) : \
  183. (dict_regexp_regerror((map), (line), (err), (expr)), 0)))
  184. /* dict_regexp_lookup - match string and perform optional substitution */
  185. static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string)
  186. {
  187. DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
  188. DICT_REGEXP_RULE *rule;
  189. DICT_REGEXP_IF_RULE *if_rule;
  190. DICT_REGEXP_MATCH_RULE *match_rule;
  191. DICT_REGEXP_EXPAND_CONTEXT expand_context;
  192. int error;
  193. int nesting = 0;
  194. dict->error = 0;
  195. if (msg_verbose)
  196. msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string);
  197. /*
  198. * Optionally fold the key.
  199. */
  200. if (dict->flags & DICT_FLAG_FOLD_MUL) {
  201. if (dict->fold_buf == 0)
  202. dict->fold_buf = vstring_alloc(10);
  203. vstring_strcpy(dict->fold_buf, lookup_string);
  204. lookup_string = lowercase(vstring_str(dict->fold_buf));
  205. }
  206. for (rule = dict_regexp->head; rule; rule = rule->next) {
  207. /*
  208. * Skip rules inside failed IF/ENDIF.
  209. */
  210. if (nesting < rule->nesting)
  211. continue;
  212. switch (rule->op) {
  213. /*
  214. * Search for the first matching primary expression. Limit the
  215. * overhead for substring substitution to the bare minimum.
  216. */
  217. case DICT_REGEXP_OP_MATCH:
  218. match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
  219. if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
  220. match_rule->first_exp,
  221. match_rule->first_match,
  222. lookup_string,
  223. match_rule->max_sub > 0 ?
  224. match_rule->max_sub + 1 : 0,
  225. dict_regexp->pmatch))
  226. continue;
  227. if (match_rule->second_exp
  228. && !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
  229. match_rule->second_exp,
  230. match_rule->second_match,
  231. lookup_string,
  232. NULL_SUBSTITUTIONS,
  233. NULL_MATCH_RESULT))
  234. continue;
  235. /*
  236. * Skip $number substitutions when the replacement text contains
  237. * no $number strings, as learned during the compile time
  238. * pre-scan. The pre-scan already replaced $$ by $.
  239. */
  240. if (match_rule->max_sub == 0)
  241. return (match_rule->replacement);
  242. /*
  243. * Perform $number substitutions on the replacement text. We
  244. * pre-scanned the replacement text at compile time. Any macro
  245. * expansion errors at this point mean something impossible has
  246. * happened.
  247. */
  248. if (!dict_regexp->expansion_buf)
  249. dict_regexp->expansion_buf = vstring_alloc(10);
  250. VSTRING_RESET(dict_regexp->expansion_buf);
  251. expand_context.lookup_string = lookup_string;
  252. expand_context.match_rule = match_rule;
  253. expand_context.dict_regexp = dict_regexp;
  254. if (mac_parse(match_rule->replacement, dict_regexp_expand,
  255. (char *) &expand_context) & MAC_PARSE_ERROR)
  256. msg_panic("regexp map %s, line %d: bad replacement syntax",
  257. dict->name, rule->lineno);
  258. VSTRING_TERMINATE(dict_regexp->expansion_buf);
  259. return (vstring_str(dict_regexp->expansion_buf));
  260. /*
  261. * Conditional.
  262. */
  263. case DICT_REGEXP_OP_IF:
  264. if_rule = (DICT_REGEXP_IF_RULE *) rule;
  265. if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
  266. if_rule->expr, if_rule->match, lookup_string,
  267. NULL_SUBSTITUTIONS, NULL_MATCH_RESULT))
  268. nesting++;
  269. continue;
  270. /*
  271. * ENDIF after successful IF.
  272. */
  273. case DICT_REGEXP_OP_ENDIF:
  274. nesting--;
  275. continue;
  276. default:
  277. msg_panic("dict_regexp_lookup: impossible operation %d", rule->op);
  278. }
  279. }
  280. return (0);
  281. }
  282. /* dict_regexp_close - close regexp dictionary */
  283. static void dict_regexp_close(DICT *dict)
  284. {
  285. DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
  286. DICT_REGEXP_RULE *rule;
  287. DICT_REGEXP_RULE *next;
  288. DICT_REGEXP_MATCH_RULE *match_rule;
  289. DICT_REGEXP_IF_RULE *if_rule;
  290. for (rule = dict_regexp->head; rule; rule = next) {
  291. next = rule->next;
  292. switch (rule->op) {
  293. case DICT_REGEXP_OP_MATCH:
  294. match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
  295. if (match_rule->first_exp) {
  296. regfree(match_rule->first_exp);
  297. myfree((char *) match_rule->first_exp);
  298. }
  299. if (match_rule->second_exp) {
  300. regfree(match_rule->second_exp);
  301. myfree((char *) match_rule->second_exp);
  302. }
  303. if (match_rule->replacement)
  304. myfree((char *) match_rule->replacement);
  305. break;
  306. case DICT_REGEXP_OP_IF:
  307. if_rule = (DICT_REGEXP_IF_RULE *) rule;
  308. if (if_rule->expr) {
  309. regfree(if_rule->expr);
  310. myfree((char *) if_rule->expr);
  311. }
  312. break;
  313. case DICT_REGEXP_OP_ENDIF:
  314. break;
  315. default:
  316. msg_panic("dict_regexp_close: unknown operation %d", rule->op);
  317. }
  318. myfree((char *) rule);
  319. }
  320. if (dict_regexp->pmatch)
  321. myfree((char *) dict_regexp->pmatch);
  322. if (dict_regexp->expansion_buf)
  323. vstring_free(dict_regexp->expansion_buf);
  324. if (dict->fold_buf)
  325. vstring_free(dict->fold_buf);
  326. dict_free(dict);
  327. }
  328. /* dict_regexp_get_pat - extract one pattern with options from rule */
  329. static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp,
  330. DICT_REGEXP_PATTERN *pat)
  331. {
  332. char *p = *bufp;
  333. char re_delim;
  334. /*
  335. * Process negation operators.
  336. */
  337. pat->match = 1;
  338. while (*p == '!') {
  339. pat->match = !pat->match;
  340. p++;
  341. }
  342. /*
  343. * Grr...aceful handling of whitespace after '!'.
  344. */
  345. while (*p && ISSPACE(*p))
  346. p++;
  347. if (*p == 0) {
  348. msg_warn("regexp map %s, line %d: no regexp: skipping this rule",
  349. mapname, lineno);
  350. return (0);
  351. }
  352. /*
  353. * Search for the closing delimiter, handling backslash escape.
  354. */
  355. re_delim = *p++;
  356. pat->regexp = p;
  357. while (*p) {
  358. if (*p == '\\') {
  359. if (p[1])
  360. p++;
  361. else
  362. break;
  363. } else if (*p == re_delim) {
  364. break;
  365. }
  366. ++p;
  367. }
  368. if (!*p) {
  369. msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": "
  370. "skipping this rule", mapname, lineno, re_delim);
  371. return (0);
  372. }
  373. *p++ = 0; /* null terminate */
  374. /*
  375. * Search for options.
  376. */
  377. pat->options = REG_EXTENDED | REG_ICASE;
  378. while (*p && !ISSPACE(*p) && *p != '!') {
  379. switch (*p) {
  380. case 'i':
  381. pat->options ^= REG_ICASE;
  382. break;
  383. case 'm':
  384. pat->options ^= REG_NEWLINE;
  385. break;
  386. case 'x':
  387. pat->options ^= REG_EXTENDED;
  388. break;
  389. default:
  390. msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": "
  391. "skipping this rule", mapname, lineno, *p);
  392. return (0);
  393. }
  394. ++p;
  395. }
  396. *bufp = p;
  397. return (1);
  398. }
  399. /* dict_regexp_get_pats - get the primary and second patterns and flags */
  400. static int dict_regexp_get_pats(const char *mapname, int lineno, char **p,
  401. DICT_REGEXP_PATTERN *first_pat,
  402. DICT_REGEXP_PATTERN *second_pat)
  403. {
  404. /*
  405. * Get the primary and optional secondary patterns and their flags.
  406. */
  407. if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0)
  408. return (0);
  409. if (**p == '!') {
  410. #if 0
  411. static int bitrot_warned = 0;
  412. if (bitrot_warned == 0) {
  413. msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away,"
  414. " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead",
  415. mapname, lineno);
  416. bitrot_warned = 1;
  417. }
  418. #endif
  419. if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0)
  420. return (0);
  421. } else {
  422. second_pat->regexp = 0;
  423. }
  424. return (1);
  425. }
  426. /* dict_regexp_prescan - find largest $number in replacement text */
  427. static int dict_regexp_prescan(int type, VSTRING *buf, char *context)
  428. {
  429. DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context;
  430. size_t n;
  431. /*
  432. * Keep a copy of literal text (with $$ already replaced by $) if and
  433. * only if the replacement text contains no $number expression. This way
  434. * we can avoid having to scan the replacement text at lookup time.
  435. */
  436. if (type == MAC_PARSE_VARNAME) {
  437. if (ctxt->literal) {
  438. myfree(ctxt->literal);
  439. ctxt->literal = 0;
  440. }
  441. if (!alldig(vstring_str(buf))) {
  442. msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"",
  443. ctxt->mapname, ctxt->lineno, vstring_str(buf));
  444. return (MAC_PARSE_ERROR);
  445. }
  446. n = atoi(vstring_str(buf));
  447. if (n < 1) {
  448. msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"",
  449. ctxt->mapname, ctxt->lineno, vstring_str(buf));
  450. return (MAC_PARSE_ERROR);
  451. }
  452. if (n > ctxt->max_sub)
  453. ctxt->max_sub = n;
  454. } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
  455. if (ctxt->literal)
  456. msg_panic("regexp map %s, line %d: multiple literals but no $number",
  457. ctxt->mapname, ctxt->lineno);
  458. ctxt->literal = mystrdup(vstring_str(buf));
  459. }
  460. return (MAC_PARSE_OK);
  461. }
  462. /* dict_regexp_compile_pat - compile one pattern */
  463. static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno,
  464. DICT_REGEXP_PATTERN *pat)
  465. {
  466. int error;
  467. regex_t *expr;
  468. expr = (regex_t *) mymalloc(sizeof(*expr));
  469. error = regcomp(expr, pat->regexp, pat->options);
  470. if (error != 0) {
  471. dict_regexp_regerror(mapname, lineno, error, expr);
  472. myfree((char *) expr);
  473. return (0);
  474. }
  475. return (expr);
  476. }
  477. /* dict_regexp_rule_alloc - fill in a generic rule structure */
  478. static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int nesting,
  479. int lineno,
  480. size_t size)
  481. {
  482. DICT_REGEXP_RULE *rule;
  483. rule = (DICT_REGEXP_RULE *) mymalloc(size);
  484. rule->op = op;
  485. rule->nesting = nesting;
  486. rule->lineno = lineno;
  487. rule->next = 0;
  488. return (rule);
  489. }
  490. /* dict_regexp_parseline - parse one rule */
  491. static DICT_REGEXP_RULE *dict_regexp_parseline(const char *mapname, int lineno,
  492. char *line, int nesting,
  493. int dict_flags)
  494. {
  495. char *p;
  496. p = line;
  497. /*
  498. * An ordinary rule takes one or two patterns and replacement text.
  499. */
  500. if (!ISALNUM(*p)) {
  501. DICT_REGEXP_PATTERN first_pat;
  502. DICT_REGEXP_PATTERN second_pat;
  503. DICT_REGEXP_PRESCAN_CONTEXT prescan_context;
  504. regex_t *first_exp = 0;
  505. regex_t *second_exp;
  506. DICT_REGEXP_MATCH_RULE *match_rule;
  507. /*
  508. * Get the primary and the optional secondary patterns.
  509. */
  510. if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat))
  511. return (0);
  512. /*
  513. * Get the replacement text.
  514. */
  515. while (*p && ISSPACE(*p))
  516. ++p;
  517. if (!*p) {
  518. msg_warn("regexp map %s, line %d: using empty replacement string",
  519. mapname, lineno);
  520. }
  521. /*
  522. * Find the highest-numbered $number in the replacement text. We can
  523. * speed up pattern matching 1) by passing hints to the regexp
  524. * compiler, setting the REG_NOSUB flag when the replacement text
  525. * contains no $number string; 2) by passing hints to the regexp
  526. * execution code, limiting the amount of text that is made available
  527. * for substitution.
  528. */
  529. prescan_context.mapname = mapname;
  530. prescan_context.lineno = lineno;
  531. prescan_context.max_sub = 0;
  532. prescan_context.literal = 0;
  533. /*
  534. * The optimizer will eliminate code duplication and/or dead code.
  535. */
  536. #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
  537. if (first_exp) { \
  538. regfree(first_exp); \
  539. myfree((char *) first_exp); \
  540. } \
  541. if (prescan_context.literal) \
  542. myfree(prescan_context.literal); \
  543. return (rval); \
  544. } while (0)
  545. if (mac_parse(p, dict_regexp_prescan, (char *) &prescan_context)
  546. & MAC_PARSE_ERROR) {
  547. msg_warn("regexp map %s, line %d: bad replacement syntax: "
  548. "skipping this rule", mapname, lineno);
  549. CREATE_MATCHOP_ERROR_RETURN(0);
  550. }
  551. /*
  552. * Compile the primary and the optional secondary pattern. Speed up
  553. * execution when no matched text needs to be substituted into the
  554. * result string, or when the highest numbered substring is less than
  555. * the total number of () subpatterns.
  556. */
  557. if (prescan_context.max_sub == 0)
  558. first_pat.options |= REG_NOSUB;
  559. if (prescan_context.max_sub > 0 && first_pat.match == 0) {
  560. msg_warn("regexp map %s, line %d: $number found in negative match "
  561. "replacement text: skipping this rule", mapname, lineno);
  562. CREATE_MATCHOP_ERROR_RETURN(0);
  563. }
  564. if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) {
  565. msg_warn("regexp map %s, line %d: "
  566. "regular expression substitution is not allowed: "
  567. "skipping this rule", mapname, lineno);
  568. CREATE_MATCHOP_ERROR_RETURN(0);
  569. }
  570. if ((first_exp = dict_regexp_compile_pat(mapname, lineno,
  571. &first_pat)) == 0)
  572. CREATE_MATCHOP_ERROR_RETURN(0);
  573. if (prescan_context.max_sub > first_exp->re_nsub) {
  574. msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": "
  575. "skipping this rule", mapname, lineno,
  576. (int) prescan_context.max_sub);
  577. CREATE_MATCHOP_ERROR_RETURN(0);
  578. }
  579. if (second_pat.regexp != 0) {
  580. second_pat.options |= REG_NOSUB;
  581. if ((second_exp = dict_regexp_compile_pat(mapname, lineno,
  582. &second_pat)) == 0)
  583. CREATE_MATCHOP_ERROR_RETURN(0);
  584. } else {
  585. second_exp = 0;
  586. }
  587. match_rule = (DICT_REGEXP_MATCH_RULE *)
  588. dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, nesting, lineno,
  589. sizeof(DICT_REGEXP_MATCH_RULE));
  590. match_rule->first_exp = first_exp;
  591. match_rule->first_match = first_pat.match;
  592. match_rule->max_sub = prescan_context.max_sub;
  593. match_rule->second_exp = second_exp;
  594. match_rule->second_match = second_pat.match;
  595. if (prescan_context.literal)
  596. match_rule->replacement = prescan_context.literal;
  597. else
  598. match_rule->replacement = mystrdup(p);
  599. return ((DICT_REGEXP_RULE *) match_rule);
  600. }
  601. /*
  602. * The IF operator takes one pattern but no replacement text.
  603. */
  604. else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
  605. DICT_REGEXP_PATTERN pattern;
  606. regex_t *expr;
  607. DICT_REGEXP_IF_RULE *if_rule;
  608. p += 2;
  609. while (*p && ISSPACE(*p))
  610. p++;
  611. if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern))
  612. return (0);
  613. while (*p && ISSPACE(*p))
  614. ++p;
  615. if (*p) {
  616. msg_warn("regexp map %s, line %d: ignoring extra text after"
  617. " IF statement: \"%s\"", mapname, lineno, p);
  618. msg_warn("regexp map %s, line %d: do not prepend whitespace"
  619. " to statements between IF and ENDIF", mapname, lineno);
  620. }
  621. if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0)
  622. return (0);
  623. if_rule = (DICT_REGEXP_IF_RULE *)
  624. dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, nesting, lineno,
  625. sizeof(DICT_REGEXP_IF_RULE));
  626. if_rule->expr = expr;
  627. if_rule->match = pattern.match;
  628. return ((DICT_REGEXP_RULE *) if_rule);
  629. }
  630. /*
  631. * The ENDIF operator takes no patterns and no replacement text.
  632. */
  633. else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
  634. DICT_REGEXP_RULE *rule;
  635. p += 5;
  636. if (nesting == 0) {
  637. msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF",
  638. mapname, lineno);
  639. return (0);
  640. }
  641. while (*p && ISSPACE(*p))
  642. ++p;
  643. if (*p)
  644. msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF",
  645. mapname, lineno);
  646. rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, nesting, lineno,
  647. sizeof(DICT_REGEXP_RULE));
  648. return (rule);
  649. }
  650. /*
  651. * Unrecognized input.
  652. */
  653. else {
  654. msg_warn("regexp map %s, line %d: ignoring unrecognized request",
  655. mapname, lineno);
  656. return (0);
  657. }
  658. }
  659. /* dict_regexp_open - load and compile a file containing regular expressions */
  660. DICT *dict_regexp_open(const char *mapname, int open_flags, int dict_flags)
  661. {
  662. DICT_REGEXP *dict_regexp;
  663. VSTREAM *map_fp;
  664. struct stat st;
  665. VSTRING *line_buffer;
  666. DICT_REGEXP_RULE *rule;
  667. DICT_REGEXP_RULE *last_rule = 0;
  668. int lineno = 0;
  669. size_t max_sub = 0;
  670. int nesting = 0;
  671. char *p;
  672. /*
  673. * Sanity checks.
  674. */
  675. if (open_flags != O_RDONLY)
  676. return (dict_surrogate(DICT_TYPE_REGEXP, mapname, open_flags, dict_flags,
  677. "%s:%s map requires O_RDONLY access mode",
  678. DICT_TYPE_REGEXP, mapname));
  679. /*
  680. * Open the configuration file.
  681. */
  682. if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
  683. return (dict_surrogate(DICT_TYPE_REGEXP, mapname, open_flags, dict_flags,
  684. "open %s: %m", mapname));
  685. if (fstat(vstream_fileno(map_fp), &st) < 0)
  686. msg_fatal("fstat %s: %m", mapname);
  687. line_buffer = vstring_alloc(100);
  688. dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname,
  689. sizeof(*dict_regexp));
  690. dict_regexp->dict.lookup = dict_regexp_lookup;
  691. dict_regexp->dict.close = dict_regexp_close;
  692. dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN;
  693. if (dict_flags & DICT_FLAG_FOLD_MUL)
  694. dict_regexp->dict.fold_buf = vstring_alloc(10);
  695. dict_regexp->head = 0;
  696. dict_regexp->pmatch = 0;
  697. dict_regexp->expansion_buf = 0;
  698. dict_regexp->dict.owner.uid = st.st_uid;
  699. dict_regexp->dict.owner.status = (st.st_uid != 0);
  700. /*
  701. * Parse the regexp table.
  702. */
  703. while (readlline(line_buffer, map_fp, &lineno)) {
  704. p = vstring_str(line_buffer);
  705. trimblanks(p, 0)[0] = 0;
  706. if (*p == 0)
  707. continue;
  708. rule = dict_regexp_parseline(mapname, lineno, p, nesting, dict_flags);
  709. if (rule == 0)
  710. continue;
  711. if (rule->op == DICT_REGEXP_OP_MATCH) {
  712. if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub)
  713. max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub;
  714. } else if (rule->op == DICT_REGEXP_OP_IF) {
  715. nesting++;
  716. } else if (rule->op == DICT_REGEXP_OP_ENDIF) {
  717. nesting--;
  718. }
  719. if (last_rule == 0)
  720. dict_regexp->head = rule;
  721. else
  722. last_rule->next = rule;
  723. last_rule = rule;
  724. }
  725. if (nesting)
  726. msg_warn("regexp map %s, line %d: more IFs than ENDIFs",
  727. mapname, lineno);
  728. /*
  729. * Allocate space for only as many matched substrings as used in the
  730. * replacement text.
  731. */
  732. if (max_sub > 0)
  733. dict_regexp->pmatch =
  734. (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1));
  735. /*
  736. * Clean up.
  737. */
  738. vstring_free(line_buffer);
  739. vstream_fclose(map_fp);
  740. return (DICT_DEBUG (&dict_regexp->dict));
  741. }
  742. #endif