PageRenderTime 27ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 1ms

/uClinux-dist/user/mysql/regex/main.c

https://bitbucket.org/__wp__/mb-linux-msli
C | 509 lines | 420 code | 42 blank | 47 comment | 155 complexity | fb44e8c6f7c3c611a44a581f8ca7b211 MD5 | raw file
Possible License(s): AGPL-3.0, GPL-2.0, LGPL-2.0, MPL-2.0, ISC, BSD-3-Clause, LGPL-2.1, MPL-2.0-no-copyleft-exception, 0BSD, CC-BY-SA-3.0, GPL-3.0, LGPL-3.0, AGPL-1.0, Unlicense
  1. #include <my_global.h>
  2. #include <m_string.h>
  3. #include <sys/types.h>
  4. #include <regex.h>
  5. #include <assert.h>
  6. #include "main.ih"
  7. char *progname;
  8. int debug = 0;
  9. int line = 0;
  10. int status = 0;
  11. int copts = REG_EXTENDED;
  12. int eopts = 0;
  13. regoff_t startoff = 0;
  14. regoff_t endoff = 0;
  15. extern int split();
  16. extern void regprint();
  17. /*
  18. - main - do the simple case, hand off to regress() for regression
  19. */
  20. int main(argc, argv)
  21. int argc;
  22. char *argv[];
  23. {
  24. regex_t re;
  25. # define NS 10
  26. regmatch_t subs[NS];
  27. char erbuf[100];
  28. int err;
  29. size_t len;
  30. int c;
  31. int errflg = 0;
  32. register int i;
  33. extern int optind;
  34. extern char *optarg;
  35. progname = argv[0];
  36. while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
  37. switch (c) {
  38. case 'c': /* compile options */
  39. copts = options('c', optarg);
  40. break;
  41. case 'e': /* execute options */
  42. eopts = options('e', optarg);
  43. break;
  44. case 'S': /* start offset */
  45. startoff = (regoff_t)atoi(optarg);
  46. break;
  47. case 'E': /* end offset */
  48. endoff = (regoff_t)atoi(optarg);
  49. break;
  50. case 'x': /* Debugging. */
  51. debug++;
  52. break;
  53. case '?':
  54. default:
  55. errflg++;
  56. break;
  57. }
  58. if (errflg) {
  59. fprintf(stderr, "usage: %s ", progname);
  60. fprintf(stderr, "[-c copt][-C][-d] [re]\n");
  61. exit(2);
  62. }
  63. if (optind >= argc) {
  64. regress(stdin);
  65. exit(status);
  66. }
  67. err = regcomp(&re, argv[optind++], copts);
  68. if (err) {
  69. len = regerror(err, &re, erbuf, sizeof(erbuf));
  70. fprintf(stderr, "error %s, %d/%d `%s'\n",
  71. eprint(err), len, sizeof(erbuf), erbuf);
  72. exit(status);
  73. }
  74. regprint(&re, stdout);
  75. if (optind >= argc) {
  76. regfree(&re);
  77. exit(status);
  78. }
  79. if (eopts&REG_STARTEND) {
  80. subs[0].rm_so = startoff;
  81. subs[0].rm_eo = strlen(argv[optind]) - endoff;
  82. }
  83. err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
  84. if (err) {
  85. len = regerror(err, &re, erbuf, sizeof(erbuf));
  86. fprintf(stderr, "error %s, %d/%d `%s'\n",
  87. eprint(err), len, sizeof(erbuf), erbuf);
  88. exit(status);
  89. }
  90. if (!(copts&REG_NOSUB)) {
  91. len = (int)(subs[0].rm_eo - subs[0].rm_so);
  92. if (subs[0].rm_so != -1) {
  93. if (len != 0)
  94. printf("match `%.*s'\n", (int)len,
  95. argv[optind] + subs[0].rm_so);
  96. else
  97. printf("match `'@%.1s\n",
  98. argv[optind] + subs[0].rm_so);
  99. }
  100. for (i = 1; i < NS; i++)
  101. if (subs[i].rm_so != -1)
  102. printf("(%d) `%.*s'\n", i,
  103. (int)(subs[i].rm_eo - subs[i].rm_so),
  104. argv[optind] + subs[i].rm_so);
  105. }
  106. exit(status);
  107. }
  108. /*
  109. - regress - main loop of regression test
  110. == void regress(FILE *in);
  111. */
  112. void
  113. regress(in)
  114. FILE *in;
  115. {
  116. char inbuf[1000];
  117. # define MAXF 10
  118. char *f[MAXF];
  119. int nf;
  120. int i;
  121. char erbuf[100];
  122. size_t ne;
  123. const char *badpat = "invalid regular expression";
  124. # define SHORT 10
  125. const char *bpname = "REG_BADPAT";
  126. regex_t re;
  127. while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
  128. line++;
  129. if (inbuf[0] == '#' || inbuf[0] == '\n')
  130. continue; /* NOTE CONTINUE */
  131. inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
  132. if (debug)
  133. fprintf(stdout, "%d:\n", line);
  134. nf = split(inbuf, f, MAXF, "\t\t");
  135. if (nf < 3) {
  136. fprintf(stderr, "bad input, line %d\n", line);
  137. exit(1);
  138. }
  139. for (i = 0; i < nf; i++)
  140. if (strcmp(f[i], "\"\"") == 0)
  141. f[i] = (char*) "";
  142. if (nf <= 3)
  143. f[3] = NULL;
  144. if (nf <= 4)
  145. f[4] = NULL;
  146. rx_try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
  147. if (opt('&', f[1])) /* try with either type of RE */
  148. rx_try(f[0], f[1], f[2], f[3], f[4],
  149. options('c', f[1]) &~ REG_EXTENDED);
  150. }
  151. ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
  152. if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
  153. fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
  154. erbuf, badpat);
  155. status = 1;
  156. }
  157. ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
  158. if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
  159. ne != strlen(badpat)+1) {
  160. fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
  161. erbuf, SHORT-1, badpat);
  162. status = 1;
  163. }
  164. ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
  165. if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
  166. fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
  167. erbuf, bpname);
  168. status = 1;
  169. }
  170. re.re_endp = bpname;
  171. ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
  172. if (atoi(erbuf) != (int)REG_BADPAT) {
  173. fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
  174. erbuf, (long)REG_BADPAT);
  175. status = 1;
  176. } else if (ne != strlen(erbuf)+1) {
  177. fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
  178. erbuf, (long)REG_BADPAT);
  179. status = 1;
  180. }
  181. }
  182. /*
  183. - rx_try - try it, and report on problems
  184. == void rx_try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
  185. */
  186. void
  187. rx_try(f0, f1, f2, f3, f4, opts)
  188. char *f0;
  189. char *f1;
  190. char *f2;
  191. char *f3;
  192. char *f4;
  193. int opts; /* may not match f1 */
  194. {
  195. regex_t re;
  196. # define NSUBS 10
  197. regmatch_t subs[NSUBS];
  198. # define NSHOULD 15
  199. char *should[NSHOULD];
  200. int nshould;
  201. char erbuf[100];
  202. int err;
  203. int len;
  204. const char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
  205. register int i;
  206. char *grump;
  207. char f0copy[1000];
  208. char f2copy[1000];
  209. strcpy(f0copy, f0);
  210. re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
  211. fixstr(f0copy);
  212. err = regcomp(&re, f0copy, opts);
  213. if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
  214. /* unexpected error or wrong error */
  215. len = regerror(err, &re, erbuf, sizeof(erbuf));
  216. fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
  217. line, type, eprint(err), len,
  218. sizeof(erbuf), erbuf);
  219. status = 1;
  220. } else if (err == 0 && opt('C', f1)) {
  221. /* unexpected success */
  222. fprintf(stderr, "%d: %s should have given REG_%s\n",
  223. line, type, f2);
  224. status = 1;
  225. err = 1; /* so we won't try regexec */
  226. }
  227. if (err != 0) {
  228. regfree(&re);
  229. return;
  230. }
  231. strcpy(f2copy, f2);
  232. fixstr(f2copy);
  233. if (options('e', f1)&REG_STARTEND) {
  234. if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
  235. fprintf(stderr, "%d: bad STARTEND syntax\n", line);
  236. subs[0].rm_so = strchr(f2, '(') - f2 + 1;
  237. subs[0].rm_eo = strchr(f2, ')') - f2;
  238. }
  239. err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
  240. if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
  241. /* unexpected error or wrong error */
  242. len = regerror(err, &re, erbuf, sizeof(erbuf));
  243. fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
  244. line, type, eprint(err), len,
  245. sizeof(erbuf), erbuf);
  246. status = 1;
  247. } else if (err != 0) {
  248. /* nothing more to check */
  249. } else if (f3 == NULL) {
  250. /* unexpected success */
  251. fprintf(stderr, "%d: %s exec should have failed\n",
  252. line, type);
  253. status = 1;
  254. err = 1; /* just on principle */
  255. } else if (opts&REG_NOSUB) {
  256. /* nothing more to check */
  257. } else if ((grump = check(f2, subs[0], f3)) != NULL) {
  258. fprintf(stderr, "%d: %s %s\n", line, type, grump);
  259. status = 1;
  260. err = 1;
  261. }
  262. if (err != 0 || f4 == NULL) {
  263. regfree(&re);
  264. return;
  265. }
  266. for (i = 1; i < NSHOULD; i++)
  267. should[i] = NULL;
  268. nshould = split(f4, should+1, NSHOULD-1, ",");
  269. if (nshould == 0) {
  270. nshould = 1;
  271. should[1] = (char*) "";
  272. }
  273. for (i = 1; i < NSUBS; i++) {
  274. grump = check(f2, subs[i], should[i]);
  275. if (grump != NULL) {
  276. fprintf(stderr, "%d: %s $%d %s\n", line,
  277. type, i, grump);
  278. status = 1;
  279. err = 1;
  280. }
  281. }
  282. regfree(&re);
  283. }
  284. /*
  285. - options - pick options out of a regression-test string
  286. == int options(int type, char *s);
  287. */
  288. int
  289. options(type, s)
  290. int type; /* 'c' compile, 'e' exec */
  291. char *s;
  292. {
  293. register char *p;
  294. register int o = (type == 'c') ? copts : eopts;
  295. register const char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
  296. for (p = s; *p != '\0'; p++)
  297. if (strchr(legal, *p) != NULL)
  298. switch (*p) {
  299. case 'b':
  300. o &= ~REG_EXTENDED;
  301. break;
  302. case 'i':
  303. o |= REG_ICASE;
  304. break;
  305. case 's':
  306. o |= REG_NOSUB;
  307. break;
  308. case 'n':
  309. o |= REG_NEWLINE;
  310. break;
  311. case 'm':
  312. o &= ~REG_EXTENDED;
  313. o |= REG_NOSPEC;
  314. break;
  315. case 'p':
  316. o |= REG_PEND;
  317. break;
  318. case '^':
  319. o |= REG_NOTBOL;
  320. break;
  321. case '$':
  322. o |= REG_NOTEOL;
  323. break;
  324. case '#':
  325. o |= REG_STARTEND;
  326. break;
  327. case 't': /* trace */
  328. o |= REG_TRACE;
  329. break;
  330. case 'l': /* force long representation */
  331. o |= REG_LARGE;
  332. break;
  333. case 'r': /* force backref use */
  334. o |= REG_BACKR;
  335. break;
  336. }
  337. return(o);
  338. }
  339. /*
  340. - opt - is a particular option in a regression string?
  341. == int opt(int c, char *s);
  342. */
  343. int /* predicate */
  344. opt(c, s)
  345. int c;
  346. char *s;
  347. {
  348. return(strchr(s, c) != NULL);
  349. }
  350. /*
  351. - fixstr - transform magic characters in strings
  352. == void fixstr(register char *p);
  353. */
  354. void
  355. fixstr(p)
  356. register char *p;
  357. {
  358. if (p == NULL)
  359. return;
  360. for (; *p != '\0'; p++)
  361. if (*p == 'N')
  362. *p = '\n';
  363. else if (*p == 'T')
  364. *p = '\t';
  365. else if (*p == 'S')
  366. *p = ' ';
  367. else if (*p == 'Z')
  368. *p = '\0';
  369. }
  370. /*
  371. - check - check a substring match
  372. == char *check(char *str, regmatch_t sub, char *should);
  373. */
  374. char * /* NULL or complaint */
  375. check(str, sub, should)
  376. char *str;
  377. regmatch_t sub;
  378. char *should;
  379. {
  380. register int len;
  381. register int shlen;
  382. register char *p;
  383. static char grump[500];
  384. register char *at = NULL;
  385. if (should != NULL && strcmp(should, "-") == 0)
  386. should = NULL;
  387. if (should != NULL && should[0] == '@') {
  388. at = should + 1;
  389. should = (char*) "";
  390. }
  391. /* check rm_so and rm_eo for consistency */
  392. if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
  393. (sub.rm_so != -1 && sub.rm_eo == -1) ||
  394. (sub.rm_so != -1 && sub.rm_so < 0) ||
  395. (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
  396. sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
  397. (long)sub.rm_eo);
  398. return(grump);
  399. }
  400. /* check for no match */
  401. if (sub.rm_so == -1 && should == NULL)
  402. return(NULL);
  403. if (sub.rm_so == -1)
  404. return((char*) "did not match");
  405. /* check for in range */
  406. if ((int) sub.rm_eo > (int) strlen(str)) {
  407. sprintf(grump, "start %ld end %ld, past end of string",
  408. (long)sub.rm_so, (long)sub.rm_eo);
  409. return(grump);
  410. }
  411. len = (int)(sub.rm_eo - sub.rm_so);
  412. shlen = (int)strlen(should);
  413. p = str + sub.rm_so;
  414. /* check for not supposed to match */
  415. if (should == NULL) {
  416. sprintf(grump, "matched `%.*s'", len, p);
  417. return(grump);
  418. }
  419. /* check for wrong match */
  420. if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
  421. sprintf(grump, "matched `%.*s' instead", len, p);
  422. return(grump);
  423. }
  424. if (shlen > 0)
  425. return(NULL);
  426. /* check null match in right place */
  427. if (at == NULL)
  428. return(NULL);
  429. shlen = strlen(at);
  430. if (shlen == 0)
  431. shlen = 1; /* force check for end-of-string */
  432. if (strncmp(p, at, shlen) != 0) {
  433. sprintf(grump, "matched null at `%.20s'", p);
  434. return(grump);
  435. }
  436. return(NULL);
  437. }
  438. /*
  439. - eprint - convert error number to name
  440. == static char *eprint(int err);
  441. */
  442. static char *
  443. eprint(err)
  444. int err;
  445. {
  446. static char epbuf[100];
  447. size_t len;
  448. len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
  449. assert(len <= sizeof(epbuf));
  450. return(epbuf);
  451. }
  452. /*
  453. - efind - convert error name to number
  454. == static int efind(char *name);
  455. */
  456. static int
  457. efind(name)
  458. char *name;
  459. {
  460. static char efbuf[100];
  461. regex_t re;
  462. sprintf(efbuf, "REG_%s", name);
  463. assert(strlen(efbuf) < sizeof(efbuf));
  464. re.re_endp = efbuf;
  465. (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
  466. return(atoi(efbuf));
  467. }