PageRenderTime 46ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/libc/regex/grot/main.c

https://bitbucket.org/freebsd/freebsd-base
C | 494 lines | 403 code | 42 blank | 49 comment | 157 complexity | c7962b2775fbafa0626ccb9e2733ef93 MD5 | raw file
  1. #include <sys/cdefs.h>
  2. __FBSDID("$FreeBSD$");
  3. #include <sys/types.h>
  4. #include <assert.h>
  5. #include <regex.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <unistd.h>
  10. #include "debug.ih"
  11. #include "main.ih"
  12. #include "split.ih"
  13. char *progname;
  14. int debug = 0;
  15. int line = 0;
  16. int status = 0;
  17. int copts = REG_EXTENDED;
  18. int eopts = 0;
  19. regoff_t startoff = 0;
  20. regoff_t endoff = 0;
  21. /*
  22. - main - do the simple case, hand off to regress() for regression
  23. */
  24. int
  25. main(int argc, char **argv)
  26. {
  27. regex_t re;
  28. # define NS 10
  29. regmatch_t subs[NS];
  30. char erbuf[100];
  31. int err;
  32. size_t len;
  33. int c;
  34. int errflg = 0;
  35. int i;
  36. extern int optind;
  37. extern char *optarg;
  38. progname = argv[0];
  39. while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
  40. switch (c) {
  41. case 'c': /* compile options */
  42. copts = options('c', optarg);
  43. break;
  44. case 'e': /* execute options */
  45. eopts = options('e', optarg);
  46. break;
  47. case 'S': /* start offset */
  48. startoff = (regoff_t)atoi(optarg);
  49. break;
  50. case 'E': /* end offset */
  51. endoff = (regoff_t)atoi(optarg);
  52. break;
  53. case 'x': /* Debugging. */
  54. debug++;
  55. break;
  56. case '?':
  57. default:
  58. errflg++;
  59. break;
  60. }
  61. if (errflg) {
  62. fprintf(stderr, "usage: %s ", progname);
  63. fprintf(stderr, "[-c copt][-C][-d] [re]\n");
  64. exit(2);
  65. }
  66. if (optind >= argc) {
  67. regress(stdin);
  68. exit(status);
  69. }
  70. err = regcomp(&re, argv[optind++], copts);
  71. if (err) {
  72. len = regerror(err, &re, erbuf, sizeof(erbuf));
  73. fprintf(stderr, "error %s, %zu/%zu `%s'\n",
  74. eprint(err), len, sizeof(erbuf), erbuf);
  75. exit(status);
  76. }
  77. regprint(&re, stdout);
  78. if (optind >= argc) {
  79. regfree(&re);
  80. exit(status);
  81. }
  82. if ((eopts & REG_STARTEND) != 0) {
  83. subs[0].rm_so = startoff;
  84. subs[0].rm_eo = strlen(argv[optind]) - endoff;
  85. }
  86. err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
  87. if (err) {
  88. len = regerror(err, &re, erbuf, sizeof(erbuf));
  89. fprintf(stderr, "error %s, %zu/%zu `%s'\n",
  90. eprint(err), len, sizeof(erbuf), erbuf);
  91. exit(status);
  92. }
  93. if ((copts & REG_NOSUB) == 0) {
  94. len = (int)(subs[0].rm_eo - subs[0].rm_so);
  95. if (subs[0].rm_so != -1) {
  96. if (len != 0)
  97. printf("match `%.*s'\n", (int)len,
  98. argv[optind] + subs[0].rm_so);
  99. else
  100. printf("match `'@%.1s\n",
  101. argv[optind] + subs[0].rm_so);
  102. }
  103. for (i = 1; i < NS; i++)
  104. if (subs[i].rm_so != -1)
  105. printf("(%d) `%.*s'\n", i,
  106. (int)(subs[i].rm_eo - subs[i].rm_so),
  107. argv[optind] + subs[i].rm_so);
  108. }
  109. exit(status);
  110. }
  111. /*
  112. - regress - main loop of regression test
  113. == void regress(FILE *in);
  114. */
  115. void
  116. regress(FILE *in)
  117. {
  118. char inbuf[1000];
  119. # define MAXF 10
  120. char *f[MAXF];
  121. int nf;
  122. int i;
  123. char erbuf[100];
  124. size_t ne;
  125. char *badpat = "invalid regular expression";
  126. # define SHORT 10
  127. char *bpname = "REG_BADPAT";
  128. regex_t re;
  129. while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
  130. line++;
  131. if (inbuf[0] == '#' || inbuf[0] == '\n')
  132. continue; /* NOTE CONTINUE */
  133. inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
  134. if (debug)
  135. fprintf(stdout, "%d:\n", line);
  136. nf = split(inbuf, f, MAXF, "\t\t");
  137. if (nf < 3) {
  138. fprintf(stderr, "bad input, line %d\n", line);
  139. exit(1);
  140. }
  141. for (i = 0; i < nf; i++)
  142. if (strcmp(f[i], "\"\"") == 0)
  143. f[i] = "";
  144. if (nf <= 3)
  145. f[3] = NULL;
  146. if (nf <= 4)
  147. f[4] = NULL;
  148. try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
  149. if (opt('&', f[1])) /* try with either type of RE */
  150. try(f[0], f[1], f[2], f[3], f[4],
  151. options('c', f[1]) &~ REG_EXTENDED);
  152. }
  153. ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
  154. if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
  155. fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
  156. erbuf, badpat);
  157. status = 1;
  158. }
  159. ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
  160. if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
  161. ne != strlen(badpat)+1) {
  162. fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
  163. erbuf, SHORT-1, badpat);
  164. status = 1;
  165. }
  166. ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
  167. if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname) + 1) {
  168. fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
  169. erbuf, bpname);
  170. status = 1;
  171. }
  172. re.re_endp = bpname;
  173. ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
  174. if (atoi(erbuf) != (int)REG_BADPAT) {
  175. fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
  176. erbuf, (long)REG_BADPAT);
  177. status = 1;
  178. } else if (ne != strlen(erbuf) + 1) {
  179. fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
  180. erbuf, (long)REG_BADPAT);
  181. status = 1;
  182. }
  183. }
  184. /*
  185. - try - try it, and report on problems
  186. == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
  187. - opts: may not match f1
  188. */
  189. void
  190. try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
  191. {
  192. regex_t re;
  193. # define NSUBS 10
  194. regmatch_t subs[NSUBS];
  195. # define NSHOULD 15
  196. char *should[NSHOULD];
  197. char erbuf[100];
  198. size_t len;
  199. int err, i, nshould;
  200. char *grump;
  201. char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
  202. char f0copy[1000];
  203. char f2copy[1000];
  204. strcpy(f0copy, f0);
  205. re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
  206. fixstr(f0copy);
  207. err = regcomp(&re, f0copy, opts);
  208. if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
  209. /* unexpected error or wrong error */
  210. len = regerror(err, &re, erbuf, sizeof(erbuf));
  211. fprintf(stderr, "%d: %s error %s, %zu/%zu `%s'\n",
  212. line, type, eprint(err), len, sizeof(erbuf), erbuf);
  213. status = 1;
  214. } else if (err == 0 && opt('C', f1)) {
  215. /* unexpected success */
  216. fprintf(stderr, "%d: %s should have given REG_%s\n",
  217. line, type, f2);
  218. status = 1;
  219. err = 1; /* so we won't try regexec */
  220. }
  221. if (err != 0) {
  222. regfree(&re);
  223. return;
  224. }
  225. strcpy(f2copy, f2);
  226. fixstr(f2copy);
  227. if (options('e', f1)&REG_STARTEND) {
  228. if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
  229. fprintf(stderr, "%d: bad STARTEND syntax\n", line);
  230. subs[0].rm_so = strchr(f2, '(') - f2 + 1;
  231. subs[0].rm_eo = strchr(f2, ')') - f2;
  232. }
  233. err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
  234. if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
  235. /* unexpected error or wrong error */
  236. len = regerror(err, &re, erbuf, sizeof(erbuf));
  237. fprintf(stderr, "%d: %s exec error %s, %zu/%zu `%s'\n",
  238. line, type, eprint(err), len, sizeof(erbuf), erbuf);
  239. status = 1;
  240. } else if (err != 0) {
  241. /* nothing more to check */
  242. } else if (f3 == NULL) {
  243. /* unexpected success */
  244. fprintf(stderr, "%d: %s exec should have failed\n",
  245. line, type);
  246. status = 1;
  247. err = 1; /* just on principle */
  248. } else if (opts&REG_NOSUB) {
  249. /* nothing more to check */
  250. } else if ((grump = check(f2, subs[0], f3)) != NULL) {
  251. fprintf(stderr, "%d: %s %s\n", line, type, grump);
  252. status = 1;
  253. err = 1;
  254. }
  255. if (err != 0 || f4 == NULL) {
  256. regfree(&re);
  257. return;
  258. }
  259. for (i = 1; i < NSHOULD; i++)
  260. should[i] = NULL;
  261. nshould = split(f4, should+1, NSHOULD-1, ",");
  262. if (nshould == 0) {
  263. nshould = 1;
  264. should[1] = "";
  265. }
  266. for (i = 1; i < NSUBS; i++) {
  267. grump = check(f2, subs[i], should[i]);
  268. if (grump != NULL) {
  269. fprintf(stderr, "%d: %s $%d %s\n", line,
  270. type, i, grump);
  271. status = 1;
  272. err = 1;
  273. }
  274. }
  275. regfree(&re);
  276. }
  277. /*
  278. - options - pick options out of a regression-test string
  279. - type: 'c' - compile, 'e' - exec
  280. == int options(int type, char *s);
  281. */
  282. int
  283. options(int type, char *s)
  284. {
  285. char *p;
  286. int o = (type == 'c') ? copts : eopts;
  287. char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
  288. for (p = s; *p != '\0'; p++)
  289. if (strchr(legal, *p) != NULL)
  290. switch (*p) {
  291. case 'b':
  292. o &= ~REG_EXTENDED;
  293. break;
  294. case 'i':
  295. o |= REG_ICASE;
  296. break;
  297. case 's':
  298. o |= REG_NOSUB;
  299. break;
  300. case 'n':
  301. o |= REG_NEWLINE;
  302. break;
  303. case 'm':
  304. o &= ~REG_EXTENDED;
  305. o |= REG_NOSPEC;
  306. break;
  307. case 'p':
  308. o |= REG_PEND;
  309. break;
  310. case '^':
  311. o |= REG_NOTBOL;
  312. break;
  313. case '$':
  314. o |= REG_NOTEOL;
  315. break;
  316. case '#':
  317. o |= REG_STARTEND;
  318. break;
  319. case 't': /* trace */
  320. o |= REG_TRACE;
  321. break;
  322. case 'l': /* force long representation */
  323. o |= REG_LARGE;
  324. break;
  325. case 'r': /* force backref use */
  326. o |= REG_BACKR;
  327. break;
  328. }
  329. return(o);
  330. }
  331. /*
  332. - opt - is a particular option in a regression string?
  333. == int opt(int c, char *s);
  334. */
  335. int /* predicate */
  336. opt(int c, char *s)
  337. {
  338. return(strchr(s, c) != NULL);
  339. }
  340. /*
  341. - fixstr - transform magic characters in strings
  342. == void fixstr(char *p);
  343. */
  344. void
  345. fixstr(char *p)
  346. {
  347. if (p == NULL)
  348. return;
  349. for (; *p != '\0'; p++)
  350. if (*p == 'N')
  351. *p = '\n';
  352. else if (*p == 'T')
  353. *p = '\t';
  354. else if (*p == 'S')
  355. *p = ' ';
  356. else if (*p == 'Z')
  357. *p = '\0';
  358. }
  359. /*
  360. - check - check a substring match
  361. == char *check(char *str, regmatch_t sub, char *should);
  362. */
  363. char * /* NULL or complaint */
  364. check(char *str, regmatch_t sub, char *should)
  365. {
  366. int len;
  367. int shlen;
  368. char *p;
  369. static char grump[500];
  370. char *at = NULL;
  371. if (should != NULL && strcmp(should, "-") == 0)
  372. should = NULL;
  373. if (should != NULL && should[0] == '@') {
  374. at = should + 1;
  375. should = "";
  376. }
  377. /* check rm_so and rm_eo for consistency */
  378. if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
  379. (sub.rm_so != -1 && sub.rm_eo == -1) ||
  380. (sub.rm_so != -1 && sub.rm_so < 0) ||
  381. (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
  382. sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
  383. (long)sub.rm_eo);
  384. return(grump);
  385. }
  386. /* check for no match */
  387. if (sub.rm_so == -1 && should == NULL)
  388. return(NULL);
  389. if (sub.rm_so == -1)
  390. return("did not match");
  391. /* check for in range */
  392. if (sub.rm_eo > strlen(str)) {
  393. sprintf(grump, "start %ld end %ld, past end of string",
  394. (long)sub.rm_so, (long)sub.rm_eo);
  395. return(grump);
  396. }
  397. len = (int)(sub.rm_eo - sub.rm_so);
  398. shlen = (int)strlen(should);
  399. p = str + sub.rm_so;
  400. /* check for not supposed to match */
  401. if (should == NULL) {
  402. sprintf(grump, "matched `%.*s'", len, p);
  403. return(grump);
  404. }
  405. /* check for wrong match */
  406. if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
  407. sprintf(grump, "matched `%.*s' instead", len, p);
  408. return(grump);
  409. }
  410. if (shlen > 0)
  411. return(NULL);
  412. /* check null match in right place */
  413. if (at == NULL)
  414. return(NULL);
  415. shlen = strlen(at);
  416. if (shlen == 0)
  417. shlen = 1; /* force check for end-of-string */
  418. if (strncmp(p, at, shlen) != 0) {
  419. sprintf(grump, "matched null at `%.20s'", p);
  420. return(grump);
  421. }
  422. return(NULL);
  423. }
  424. /*
  425. - eprint - convert error number to name
  426. == static char *eprint(int err);
  427. */
  428. static char *
  429. eprint(int err)
  430. {
  431. static char epbuf[100];
  432. size_t len;
  433. len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
  434. assert(len <= sizeof(epbuf));
  435. return(epbuf);
  436. }
  437. /*
  438. - efind - convert error name to number
  439. == static int efind(char *name);
  440. */
  441. static int
  442. efind(char *name)
  443. {
  444. static char efbuf[100];
  445. size_t n;
  446. regex_t re;
  447. sprintf(efbuf, "REG_%s", name);
  448. assert(strlen(efbuf) < sizeof(efbuf));
  449. re.re_endp = efbuf;
  450. (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
  451. return(atoi(efbuf));
  452. }