PageRenderTime 50ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/src/netbsd/src/tests/lib/libc/regex/main.c

https://bitbucket.org/killerpenguinassassins/open_distrib_devel
C | 523 lines | 409 code | 45 blank | 69 comment | 154 complexity | 93a609f52de4c64c61fac71776ac1b8a MD5 | raw file
Possible License(s): CC0-1.0, MIT, LGPL-2.0, LGPL-3.0, WTFPL, GPL-2.0, BSD-2-Clause, AGPL-3.0, CC-BY-SA-3.0, MPL-2.0, JSON, BSD-3-Clause-No-Nuclear-License-2014, LGPL-2.1, CPL-1.0, AGPL-1.0, 0BSD, ISC, Apache-2.0, GPL-3.0, IPL-1.0, MPL-2.0-no-copyleft-exception, BSD-3-Clause
  1. /* $NetBSD: main.c,v 1.2 2011/09/16 16:13:18 plunky Exp $ */
  2. /*-
  3. * Copyright (c) 1993 The NetBSD Foundation, Inc.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  16. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  17. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  18. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  19. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  20. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  21. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  22. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  24. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  25. * POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <assert.h>
  28. #include <regex.h>
  29. #include <stdio.h>
  30. #include <stdlib.h>
  31. #include <string.h>
  32. #include <unistd.h>
  33. #include <sys/types.h>
  34. #include "test_regex.h"
  35. char *progname;
  36. int debug = 0;
  37. int line = 0;
  38. int status = 0;
  39. int copts = REG_EXTENDED;
  40. int eopts = 0;
  41. regoff_t startoff = 0;
  42. regoff_t endoff = 0;
  43. static char empty = '\0';
  44. static char *eprint(int);
  45. static int efind(char *);
  46. /*
  47. * main - do the simple case, hand off to regress() for regression
  48. */
  49. int
  50. main(int argc, char *argv[])
  51. {
  52. regex_t re;
  53. # define NS 10
  54. regmatch_t subs[NS];
  55. char erbuf[100];
  56. int err;
  57. size_t len;
  58. int c;
  59. int errflg = 0;
  60. int i;
  61. extern int optind;
  62. extern char *optarg;
  63. progname = argv[0];
  64. while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
  65. switch (c) {
  66. case 'c': /* compile options */
  67. copts = options('c', optarg);
  68. break;
  69. case 'e': /* execute options */
  70. eopts = options('e', optarg);
  71. break;
  72. case 'S': /* start offset */
  73. startoff = (regoff_t)atoi(optarg);
  74. break;
  75. case 'E': /* end offset */
  76. endoff = (regoff_t)atoi(optarg);
  77. break;
  78. case 'x': /* Debugging. */
  79. debug++;
  80. break;
  81. case '?':
  82. default:
  83. errflg++;
  84. break;
  85. }
  86. if (errflg) {
  87. fprintf(stderr, "usage: %s ", progname);
  88. fprintf(stderr, "[-c copt][-C][-d] [re]\n");
  89. exit(2);
  90. }
  91. if (optind >= argc) {
  92. regress(stdin);
  93. exit(status);
  94. }
  95. err = regcomp(&re, argv[optind++], copts);
  96. if (err) {
  97. len = regerror(err, &re, erbuf, sizeof(erbuf));
  98. fprintf(stderr, "error %s, %zd/%zd `%s'\n",
  99. eprint(err), len, (size_t)sizeof(erbuf), erbuf);
  100. exit(status);
  101. }
  102. regprint(&re, stdout);
  103. if (optind >= argc) {
  104. regfree(&re);
  105. exit(status);
  106. }
  107. if (eopts&REG_STARTEND) {
  108. subs[0].rm_so = startoff;
  109. subs[0].rm_eo = strlen(argv[optind]) - endoff;
  110. }
  111. err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
  112. if (err) {
  113. len = regerror(err, &re, erbuf, sizeof(erbuf));
  114. fprintf(stderr, "error %s, %zd/%zd `%s'\n",
  115. eprint(err), len, (size_t)sizeof(erbuf), erbuf);
  116. exit(status);
  117. }
  118. if (!(copts&REG_NOSUB)) {
  119. len = (int)(subs[0].rm_eo - subs[0].rm_so);
  120. if (subs[0].rm_so != -1) {
  121. if (len != 0)
  122. printf("match `%.*s'\n", (int)len,
  123. argv[optind] + subs[0].rm_so);
  124. else
  125. printf("match `'@%.1s\n",
  126. argv[optind] + subs[0].rm_so);
  127. }
  128. for (i = 1; i < NS; i++)
  129. if (subs[i].rm_so != -1)
  130. printf("(%d) `%.*s'\n", i,
  131. (int)(subs[i].rm_eo - subs[i].rm_so),
  132. argv[optind] + subs[i].rm_so);
  133. }
  134. exit(status);
  135. }
  136. /*
  137. * regress - main loop of regression test
  138. */
  139. void
  140. regress(FILE *in)
  141. {
  142. char inbuf[1000];
  143. # define MAXF 10
  144. char *f[MAXF];
  145. int nf;
  146. int i;
  147. char erbuf[100];
  148. size_t ne;
  149. const char *badpat = "invalid regular expression";
  150. # define SHORT 10
  151. const char *bpname = "REG_BADPAT";
  152. regex_t re;
  153. while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
  154. line++;
  155. if (inbuf[0] == '#' || inbuf[0] == '\n')
  156. continue; /* NOTE CONTINUE */
  157. inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
  158. if (debug)
  159. fprintf(stdout, "%d:\n", line);
  160. nf = split(inbuf, f, MAXF, "\t\t");
  161. if (nf < 3) {
  162. fprintf(stderr, "bad input, line %d\n", line);
  163. exit(1);
  164. }
  165. for (i = 0; i < nf; i++)
  166. if (strcmp(f[i], "\"\"") == 0)
  167. f[i] = &empty;
  168. if (nf <= 3)
  169. f[3] = NULL;
  170. if (nf <= 4)
  171. f[4] = NULL;
  172. try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
  173. if (opt('&', f[1])) /* try with either type of RE */
  174. try(f[0], f[1], f[2], f[3], f[4],
  175. options('c', f[1]) &~ REG_EXTENDED);
  176. }
  177. ne = regerror(REG_BADPAT, NULL, erbuf, sizeof(erbuf));
  178. if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
  179. fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
  180. erbuf, badpat);
  181. status = 1;
  182. }
  183. ne = regerror(REG_BADPAT, NULL, erbuf, (size_t)SHORT);
  184. if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
  185. ne != strlen(badpat)+1) {
  186. fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
  187. erbuf, SHORT-1, badpat);
  188. status = 1;
  189. }
  190. ne = regerror(REG_ITOA|REG_BADPAT, NULL, erbuf, sizeof(erbuf));
  191. if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
  192. fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
  193. erbuf, bpname);
  194. status = 1;
  195. }
  196. re.re_endp = bpname;
  197. ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
  198. if (atoi(erbuf) != (int)REG_BADPAT) {
  199. fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
  200. erbuf, (long)REG_BADPAT);
  201. status = 1;
  202. } else if (ne != strlen(erbuf)+1) {
  203. fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
  204. erbuf, (long)REG_BADPAT);
  205. status = 1;
  206. }
  207. }
  208. /*
  209. - try - try it, and report on problems
  210. == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
  211. */
  212. void
  213. try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
  214. {
  215. regex_t re;
  216. # define NSUBS 10
  217. regmatch_t subs[NSUBS];
  218. # define NSHOULD 15
  219. char *should[NSHOULD];
  220. int nshould;
  221. char erbuf[100];
  222. int err;
  223. int len;
  224. const char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
  225. int i;
  226. char *grump;
  227. char f0copy[1000];
  228. char f2copy[1000];
  229. strcpy(f0copy, f0);
  230. re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
  231. fixstr(f0copy);
  232. err = regcomp(&re, f0copy, opts);
  233. if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
  234. /* unexpected error or wrong error */
  235. len = regerror(err, &re, erbuf, sizeof(erbuf));
  236. fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
  237. line, type, eprint(err), len,
  238. (int)sizeof(erbuf), erbuf);
  239. status = 1;
  240. } else if (err == 0 && opt('C', f1)) {
  241. /* unexpected success */
  242. fprintf(stderr, "%d: %s should have given REG_%s\n",
  243. line, type, f2);
  244. status = 1;
  245. err = 1; /* so we won't try regexec */
  246. }
  247. if (err != 0) {
  248. regfree(&re);
  249. return;
  250. }
  251. strcpy(f2copy, f2);
  252. fixstr(f2copy);
  253. if (options('e', f1)&REG_STARTEND) {
  254. if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
  255. fprintf(stderr, "%d: bad STARTEND syntax\n", line);
  256. subs[0].rm_so = strchr(f2, '(') - f2 + 1;
  257. subs[0].rm_eo = strchr(f2, ')') - f2;
  258. }
  259. err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
  260. if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
  261. /* unexpected error or wrong error */
  262. len = regerror(err, &re, erbuf, sizeof(erbuf));
  263. fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
  264. line, type, eprint(err), len,
  265. (int)sizeof(erbuf), erbuf);
  266. status = 1;
  267. } else if (err != 0) {
  268. /* nothing more to check */
  269. } else if (f3 == NULL) {
  270. /* unexpected success */
  271. fprintf(stderr, "%d: %s exec should have failed\n",
  272. line, type);
  273. status = 1;
  274. err = 1; /* just on principle */
  275. } else if (opts&REG_NOSUB) {
  276. /* nothing more to check */
  277. } else if ((grump = check(f2, subs[0], f3)) != NULL) {
  278. fprintf(stderr, "%d: %s %s\n", line, type, grump);
  279. status = 1;
  280. err = 1;
  281. }
  282. if (err != 0 || f4 == NULL) {
  283. regfree(&re);
  284. return;
  285. }
  286. for (i = 1; i < NSHOULD; i++)
  287. should[i] = NULL;
  288. nshould = split(f4, &should[1], NSHOULD-1, ",");
  289. if (nshould == 0) {
  290. nshould = 1;
  291. should[1] = &empty;
  292. }
  293. for (i = 1; i < NSUBS; i++) {
  294. grump = check(f2, subs[i], should[i]);
  295. if (grump != NULL) {
  296. fprintf(stderr, "%d: %s $%d %s\n", line,
  297. type, i, grump);
  298. status = 1;
  299. err = 1;
  300. }
  301. }
  302. regfree(&re);
  303. }
  304. /*
  305. - options - pick options out of a regression-test string
  306. == int options(int type, char *s);
  307. */
  308. int
  309. options(int type, char *s)
  310. {
  311. char *p;
  312. int o = (type == 'c') ? copts : eopts;
  313. const char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
  314. for (p = s; *p != '\0'; p++)
  315. if (strchr(legal, *p) != NULL)
  316. switch (*p) {
  317. case 'b':
  318. o &= ~REG_EXTENDED;
  319. break;
  320. case 'i':
  321. o |= REG_ICASE;
  322. break;
  323. case 's':
  324. o |= REG_NOSUB;
  325. break;
  326. case 'n':
  327. o |= REG_NEWLINE;
  328. break;
  329. case 'm':
  330. o &= ~REG_EXTENDED;
  331. o |= REG_NOSPEC;
  332. break;
  333. case 'p':
  334. o |= REG_PEND;
  335. break;
  336. case '^':
  337. o |= REG_NOTBOL;
  338. break;
  339. case '$':
  340. o |= REG_NOTEOL;
  341. break;
  342. case '#':
  343. o |= REG_STARTEND;
  344. break;
  345. case 't': /* trace */
  346. o |= REG_TRACE;
  347. break;
  348. case 'l': /* force long representation */
  349. o |= REG_LARGE;
  350. break;
  351. case 'r': /* force backref use */
  352. o |= REG_BACKR;
  353. break;
  354. }
  355. return(o);
  356. }
  357. /*
  358. - opt - is a particular option in a regression string?
  359. == int opt(int c, char *s);
  360. */
  361. int /* predicate */
  362. opt(int c, char *s)
  363. {
  364. return(strchr(s, c) != NULL);
  365. }
  366. /*
  367. - fixstr - transform magic characters in strings
  368. == void fixstr(char *p);
  369. */
  370. void
  371. fixstr(char *p)
  372. {
  373. if (p == NULL)
  374. return;
  375. for (; *p != '\0'; p++)
  376. if (*p == 'N')
  377. *p = '\n';
  378. else if (*p == 'T')
  379. *p = '\t';
  380. else if (*p == 'S')
  381. *p = ' ';
  382. else if (*p == 'Z')
  383. *p = '\0';
  384. }
  385. /*
  386. * check - check a substring match
  387. */
  388. char * /* NULL or complaint */
  389. check(char *str, regmatch_t sub, char *should)
  390. {
  391. int len;
  392. int shlen;
  393. char *p;
  394. static char grump[500];
  395. char *at = NULL;
  396. if (should != NULL && strcmp(should, "-") == 0)
  397. should = NULL;
  398. if (should != NULL && should[0] == '@') {
  399. at = should + 1;
  400. should = &empty;
  401. }
  402. /* check rm_so and rm_eo for consistency */
  403. if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
  404. (sub.rm_so != -1 && sub.rm_eo == -1) ||
  405. (sub.rm_so != -1 && sub.rm_so < 0) ||
  406. (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
  407. sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
  408. (long)sub.rm_eo);
  409. return(grump);
  410. }
  411. /* check for no match */
  412. if (sub.rm_so == -1) {
  413. if (should == NULL)
  414. return(NULL);
  415. else {
  416. sprintf(grump, "did not match");
  417. return(grump);
  418. }
  419. }
  420. /* check for in range */
  421. if (sub.rm_eo > (ssize_t)strlen(str)) {
  422. sprintf(grump, "start %ld end %ld, past end of string",
  423. (long)sub.rm_so, (long)sub.rm_eo);
  424. return(grump);
  425. }
  426. len = (int)(sub.rm_eo - sub.rm_so);
  427. p = str + sub.rm_so;
  428. /* check for not supposed to match */
  429. if (should == NULL) {
  430. sprintf(grump, "matched `%.*s'", len, p);
  431. return(grump);
  432. }
  433. /* check for wrong match */
  434. shlen = (int)strlen(should);
  435. if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
  436. sprintf(grump, "matched `%.*s' instead", len, p);
  437. return(grump);
  438. }
  439. if (shlen > 0)
  440. return(NULL);
  441. /* check null match in right place */
  442. if (at == NULL)
  443. return(NULL);
  444. shlen = strlen(at);
  445. if (shlen == 0)
  446. shlen = 1; /* force check for end-of-string */
  447. if (strncmp(p, at, shlen) != 0) {
  448. sprintf(grump, "matched null at `%.20s'", p);
  449. return(grump);
  450. }
  451. return(NULL);
  452. }
  453. /*
  454. * eprint - convert error number to name
  455. */
  456. static char *
  457. eprint(int err)
  458. {
  459. static char epbuf[100];
  460. size_t len;
  461. len = regerror(REG_ITOA|err, NULL, epbuf, sizeof(epbuf));
  462. assert(len <= sizeof(epbuf));
  463. return(epbuf);
  464. }
  465. /*
  466. * efind - convert error name to number
  467. */
  468. static int
  469. efind(char *name)
  470. {
  471. static char efbuf[100];
  472. regex_t re;
  473. sprintf(efbuf, "REG_%s", name);
  474. assert(strlen(efbuf) < sizeof(efbuf));
  475. re.re_endp = efbuf;
  476. (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
  477. return(atoi(efbuf));
  478. }