PageRenderTime 72ms CodeModel.GetById 37ms RepoModel.GetById 0ms app.codeStats 0ms

/mks-svn4458/ctags/ctags/lregex.c

#
C | 704 lines | 577 code | 60 blank | 67 comment | 159 complexity | 26526ff0cc3523db4a33f92e44fdfaa6 MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, LGPL-3.0, Zlib, AGPL-1.0
  1. /*
  2. * $Id: lregex.c 576 2007-06-30 04:16:23Z elliotth $
  3. *
  4. * Copyright (c) 2000-2003, Darren Hiebert
  5. *
  6. * This source code is released for free distribution under the terms of the
  7. * GNU General Public License.
  8. *
  9. * This module contains functions for applying regular expression matching.
  10. *
  11. * The code for utlizing the Gnu regex package with regards to processing the
  12. * regex option and checking for regex matches was adapted from routines in
  13. * Gnu etags.
  14. */
  15. /*
  16. * INCLUDE FILES
  17. */
  18. #include "general.h" /* must always come first */
  19. #include <string.h>
  20. #ifdef HAVE_REGCOMP
  21. # include <ctype.h>
  22. # include <stddef.h>
  23. # ifdef HAVE_SYS_TYPES_H
  24. # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
  25. # endif
  26. # include <regex.h>
  27. #endif
  28. #include "debug.h"
  29. #include "entry.h"
  30. #include "parse.h"
  31. #include "read.h"
  32. #include "routines.h"
  33. #ifdef HAVE_REGEX
  34. /*
  35. * MACROS
  36. */
  37. /* Back-references \0 through \9 */
  38. #define BACK_REFERENCE_COUNT 10
  39. #if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
  40. # define POSIX_REGEX
  41. #endif
  42. #define REGEX_NAME "Regex"
  43. /*
  44. * DATA DECLARATIONS
  45. */
  46. #if defined (POSIX_REGEX)
  47. struct sKind {
  48. boolean enabled;
  49. char letter;
  50. char* name;
  51. char* description;
  52. };
  53. enum pType { PTRN_TAG, PTRN_CALLBACK };
  54. typedef struct {
  55. regex_t *pattern;
  56. enum pType type;
  57. union {
  58. struct {
  59. char *name_pattern;
  60. struct sKind kind;
  61. } tag;
  62. struct {
  63. regexCallback function;
  64. } callback;
  65. } u;
  66. } regexPattern;
  67. #endif
  68. typedef struct {
  69. regexPattern *patterns;
  70. unsigned int count;
  71. } patternSet;
  72. /*
  73. * DATA DEFINITIONS
  74. */
  75. static boolean regexBroken = FALSE;
  76. /* Array of pattern sets, indexed by language */
  77. static patternSet* Sets = NULL;
  78. static int SetUpper = -1; /* upper language index in list */
  79. /*
  80. * FUNCTION DEFINITIONS
  81. */
  82. static void clearPatternSet (const langType language)
  83. {
  84. if (language <= SetUpper)
  85. {
  86. patternSet* const set = Sets + language;
  87. unsigned int i;
  88. for (i = 0 ; i < set->count ; ++i)
  89. {
  90. regexPattern *p = &set->patterns [i];
  91. #if defined (POSIX_REGEX)
  92. regfree (p->pattern);
  93. #endif
  94. eFree (p->pattern);
  95. p->pattern = NULL;
  96. if (p->type == PTRN_TAG)
  97. {
  98. eFree (p->u.tag.name_pattern);
  99. p->u.tag.name_pattern = NULL;
  100. eFree (p->u.tag.kind.name);
  101. p->u.tag.kind.name = NULL;
  102. if (p->u.tag.kind.description != NULL)
  103. {
  104. eFree (p->u.tag.kind.description);
  105. p->u.tag.kind.description = NULL;
  106. }
  107. }
  108. }
  109. if (set->patterns != NULL)
  110. eFree (set->patterns);
  111. set->patterns = NULL;
  112. set->count = 0;
  113. }
  114. }
  115. /*
  116. * Regex psuedo-parser
  117. */
  118. static void makeRegexTag (
  119. const vString* const name, const struct sKind* const kind)
  120. {
  121. if (kind->enabled)
  122. {
  123. tagEntryInfo e;
  124. Assert (name != NULL && vStringLength (name) > 0);
  125. Assert (kind != NULL);
  126. initTagEntry (&e, vStringValue (name));
  127. e.kind = kind->letter;
  128. e.kindName = kind->name;
  129. makeTagEntry (&e);
  130. }
  131. }
  132. /*
  133. * Regex pattern definition
  134. */
  135. /* Take a string like "/blah/" and turn it into "blah", making sure
  136. * that the first and last characters are the same, and handling
  137. * quoted separator characters. Actually, stops on the occurrence of
  138. * an unquoted separator. Also turns "\t" into a Tab character.
  139. * Returns pointer to terminating separator. Works in place. Null
  140. * terminates name string.
  141. */
  142. static char* scanSeparators (char* name)
  143. {
  144. char sep = name [0];
  145. char *copyto = name;
  146. boolean quoted = FALSE;
  147. for (++name ; *name != '\0' ; ++name)
  148. {
  149. if (quoted)
  150. {
  151. if (*name == sep)
  152. *copyto++ = sep;
  153. else if (*name == 't')
  154. *copyto++ = '\t';
  155. else
  156. {
  157. /* Something else is quoted, so preserve the quote. */
  158. *copyto++ = '\\';
  159. *copyto++ = *name;
  160. }
  161. quoted = FALSE;
  162. }
  163. else if (*name == '\\')
  164. quoted = TRUE;
  165. else if (*name == sep)
  166. {
  167. break;
  168. }
  169. else
  170. *copyto++ = *name;
  171. }
  172. *copyto = '\0';
  173. return name;
  174. }
  175. /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
  176. * character is whatever the first character of `regexp' is), by breaking it
  177. * up into null terminated strings, removing the separators, and expanding
  178. * '\t' into tabs. When complete, `regexp' points to the line matching
  179. * pattern, a pointer to the name matching pattern is written to `name', a
  180. * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
  181. * to the trailing flags is written to `flags'. If the pattern is not in the
  182. * correct format, a false value is returned.
  183. */
  184. static boolean parseTagRegex (
  185. char* const regexp, char** const name,
  186. char** const kinds, char** const flags)
  187. {
  188. boolean result = FALSE;
  189. const int separator = (unsigned char) regexp [0];
  190. *name = scanSeparators (regexp);
  191. if (*regexp == '\0')
  192. error (WARNING, "empty regexp");
  193. else if (**name != separator)
  194. error (WARNING, "%s: incomplete regexp", regexp);
  195. else
  196. {
  197. char* const third = scanSeparators (*name);
  198. if (**name == '\0')
  199. error (WARNING, "%s: regexp missing name pattern", regexp);
  200. if ((*name) [strlen (*name) - 1] == '\\')
  201. error (WARNING, "error in name pattern: \"%s\"", *name);
  202. if (*third != separator)
  203. error (WARNING, "%s: regexp missing final separator", regexp);
  204. else
  205. {
  206. char* const fourth = scanSeparators (third);
  207. if (*fourth == separator)
  208. {
  209. *kinds = third;
  210. scanSeparators (fourth);
  211. *flags = fourth;
  212. }
  213. else
  214. {
  215. *flags = third;
  216. *kinds = NULL;
  217. }
  218. result = TRUE;
  219. }
  220. }
  221. return result;
  222. }
  223. static void addCompiledTagPattern (
  224. const langType language, regex_t* const pattern,
  225. char* const name, const char kind, char* const kindName,
  226. char *const description)
  227. {
  228. patternSet* set;
  229. regexPattern *ptrn;
  230. if (language > SetUpper)
  231. {
  232. int i;
  233. Sets = xRealloc (Sets, (language + 1), patternSet);
  234. for (i = SetUpper + 1 ; i <= language ; ++i)
  235. {
  236. Sets [i].patterns = NULL;
  237. Sets [i].count = 0;
  238. }
  239. SetUpper = language;
  240. }
  241. set = Sets + language;
  242. set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
  243. ptrn = &set->patterns [set->count];
  244. set->count += 1;
  245. ptrn->pattern = pattern;
  246. ptrn->type = PTRN_TAG;
  247. ptrn->u.tag.name_pattern = name;
  248. ptrn->u.tag.kind.enabled = TRUE;
  249. ptrn->u.tag.kind.letter = kind;
  250. ptrn->u.tag.kind.name = kindName;
  251. ptrn->u.tag.kind.description = description;
  252. }
  253. static void addCompiledCallbackPattern (
  254. const langType language, regex_t* const pattern,
  255. const regexCallback callback)
  256. {
  257. patternSet* set;
  258. regexPattern *ptrn;
  259. if (language > SetUpper)
  260. {
  261. int i;
  262. Sets = xRealloc (Sets, (language + 1), patternSet);
  263. for (i = SetUpper + 1 ; i <= language ; ++i)
  264. {
  265. Sets [i].patterns = NULL;
  266. Sets [i].count = 0;
  267. }
  268. SetUpper = language;
  269. }
  270. set = Sets + language;
  271. set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
  272. ptrn = &set->patterns [set->count];
  273. set->count += 1;
  274. ptrn->pattern = pattern;
  275. ptrn->type = PTRN_CALLBACK;
  276. ptrn->u.callback.function = callback;
  277. }
  278. #if defined (POSIX_REGEX)
  279. static regex_t* compileRegex (const char* const regexp, const char* const flags)
  280. {
  281. int cflags = REG_EXTENDED | REG_NEWLINE;
  282. regex_t *result = NULL;
  283. int errcode;
  284. int i;
  285. for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i)
  286. {
  287. switch ((int) flags [i])
  288. {
  289. case 'b': cflags &= ~REG_EXTENDED; break;
  290. case 'e': cflags |= REG_EXTENDED; break;
  291. case 'i': cflags |= REG_ICASE; break;
  292. default: error (WARNING, "unknown regex flag: '%c'", *flags); break;
  293. }
  294. }
  295. result = xMalloc (1, regex_t);
  296. errcode = regcomp (result, regexp, cflags);
  297. if (errcode != 0)
  298. {
  299. char errmsg[256];
  300. regerror (errcode, result, errmsg, 256);
  301. error (WARNING, "regcomp %s: %s", regexp, errmsg);
  302. regfree (result);
  303. eFree (result);
  304. result = NULL;
  305. }
  306. return result;
  307. }
  308. #endif
  309. static void parseKinds (
  310. const char* const kinds, char* const kind, char** const kindName,
  311. char **description)
  312. {
  313. *kind = '\0';
  314. *kindName = NULL;
  315. *description = NULL;
  316. if (kinds == NULL || kinds [0] == '\0')
  317. {
  318. *kind = 'r';
  319. *kindName = eStrdup ("regex");
  320. }
  321. else if (kinds [0] != '\0')
  322. {
  323. const char* k = kinds;
  324. if (k [0] != ',' && (k [1] == ',' || k [1] == '\0'))
  325. *kind = *k++;
  326. else
  327. *kind = 'r';
  328. if (*k == ',')
  329. ++k;
  330. if (k [0] == '\0')
  331. *kindName = eStrdup ("regex");
  332. else
  333. {
  334. const char *const comma = strchr (k, ',');
  335. if (comma == NULL)
  336. *kindName = eStrdup (k);
  337. else
  338. {
  339. *kindName = (char*) eMalloc (comma - k + 1);
  340. strncpy (*kindName, k, comma - k);
  341. (*kindName) [comma - k] = '\0';
  342. k = comma + 1;
  343. if (k [0] != '\0')
  344. *description = eStrdup (k);
  345. }
  346. }
  347. }
  348. }
  349. static void printRegexKind (const regexPattern *pat, unsigned int i, boolean indent)
  350. {
  351. const struct sKind *const kind = &pat [i].u.tag.kind;
  352. const char *const indentation = indent ? " " : "";
  353. Assert (pat [i].type == PTRN_TAG);
  354. printf ("%s%c %s %s\n", indentation,
  355. kind->letter != '\0' ? kind->letter : '?',
  356. kind->description != NULL ? kind->description : kind->name,
  357. kind->enabled ? "" : " [off]");
  358. }
  359. static void processLanguageRegex (const langType language,
  360. const char* const parameter)
  361. {
  362. if (parameter == NULL || parameter [0] == '\0')
  363. clearPatternSet (language);
  364. else if (parameter [0] != '@')
  365. addLanguageRegex (language, parameter);
  366. else if (! doesFileExist (parameter + 1))
  367. error (WARNING, "cannot open regex file");
  368. else
  369. {
  370. const char* regexfile = parameter + 1;
  371. FILE* const fp = fopen (regexfile, "r");
  372. if (fp == NULL)
  373. error (WARNING | PERROR, regexfile);
  374. else
  375. {
  376. vString* const regex = vStringNew ();
  377. while (readLine (regex, fp))
  378. addLanguageRegex (language, vStringValue (regex));
  379. fclose (fp);
  380. vStringDelete (regex);
  381. }
  382. }
  383. }
  384. /*
  385. * Regex pattern matching
  386. */
  387. #if defined (POSIX_REGEX)
  388. static vString* substitute (
  389. const char* const in, const char* out,
  390. const int nmatch, const regmatch_t* const pmatch)
  391. {
  392. vString* result = vStringNew ();
  393. const char* p;
  394. for (p = out ; *p != '\0' ; p++)
  395. {
  396. if (*p == '\\' && isdigit ((int) *++p))
  397. {
  398. const int dig = *p - '0';
  399. if (0 < dig && dig < nmatch && pmatch [dig].rm_so != -1)
  400. {
  401. const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so;
  402. vStringNCatS (result, in + pmatch [dig].rm_so, diglen);
  403. }
  404. }
  405. else if (*p != '\n' && *p != '\r')
  406. vStringPut (result, *p);
  407. }
  408. vStringTerminate (result);
  409. return result;
  410. }
  411. static void matchTagPattern (const vString* const line,
  412. const regexPattern* const patbuf,
  413. const regmatch_t* const pmatch)
  414. {
  415. vString *const name = substitute (vStringValue (line),
  416. patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch);
  417. vStringStripLeading (name);
  418. vStringStripTrailing (name);
  419. if (vStringLength (name) > 0)
  420. makeRegexTag (name, &patbuf->u.tag.kind);
  421. else
  422. error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
  423. getInputFileName (), getInputLineNumber (),
  424. patbuf->u.tag.name_pattern);
  425. vStringDelete (name);
  426. }
  427. static void matchCallbackPattern (
  428. const vString* const line, const regexPattern* const patbuf,
  429. const regmatch_t* const pmatch)
  430. {
  431. regexMatch matches [BACK_REFERENCE_COUNT];
  432. unsigned int count = 0;
  433. int i;
  434. for (i = 0 ; i < BACK_REFERENCE_COUNT && pmatch [i].rm_so != -1 ; ++i)
  435. {
  436. matches [i].start = pmatch [i].rm_so;
  437. matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so;
  438. ++count;
  439. }
  440. patbuf->u.callback.function (vStringValue (line), matches, count);
  441. }
  442. static boolean matchRegexPattern (const vString* const line,
  443. const regexPattern* const patbuf)
  444. {
  445. boolean result = FALSE;
  446. regmatch_t pmatch [BACK_REFERENCE_COUNT];
  447. const int match = regexec (patbuf->pattern, vStringValue (line),
  448. BACK_REFERENCE_COUNT, pmatch, 0);
  449. if (match == 0)
  450. {
  451. result = TRUE;
  452. if (patbuf->type == PTRN_TAG)
  453. matchTagPattern (line, patbuf, pmatch);
  454. else if (patbuf->type == PTRN_CALLBACK)
  455. matchCallbackPattern (line, patbuf, pmatch);
  456. else
  457. {
  458. Assert ("invalid pattern type" == NULL);
  459. result = FALSE;
  460. }
  461. }
  462. return result;
  463. }
  464. #endif
  465. /* PUBLIC INTERFACE */
  466. /* Match against all patterns for specified language. Returns true if at least
  467. * on pattern matched.
  468. */
  469. extern boolean matchRegex (const vString* const line, const langType language)
  470. {
  471. boolean result = FALSE;
  472. if (language != LANG_IGNORE && language <= SetUpper &&
  473. Sets [language].count > 0)
  474. {
  475. const patternSet* const set = Sets + language;
  476. unsigned int i;
  477. for (i = 0 ; i < set->count ; ++i)
  478. if (matchRegexPattern (line, set->patterns + i))
  479. result = TRUE;
  480. }
  481. return result;
  482. }
  483. extern void findRegexTags (void)
  484. {
  485. /* merely read all lines of the file */
  486. while (fileReadLine () != NULL)
  487. ;
  488. }
  489. #endif /* HAVE_REGEX */
  490. extern void addTagRegex (
  491. const langType language __unused__,
  492. const char* const regex __unused__,
  493. const char* const name __unused__,
  494. const char* const kinds __unused__,
  495. const char* const flags __unused__)
  496. {
  497. #ifdef HAVE_REGEX
  498. Assert (regex != NULL);
  499. Assert (name != NULL);
  500. if (! regexBroken)
  501. {
  502. regex_t* const cp = compileRegex (regex, flags);
  503. if (cp != NULL)
  504. {
  505. char kind;
  506. char* kindName;
  507. char* description;
  508. parseKinds (kinds, &kind, &kindName, &description);
  509. addCompiledTagPattern (language, cp, eStrdup (name),
  510. kind, kindName, description);
  511. }
  512. }
  513. #endif
  514. }
  515. extern void addCallbackRegex (
  516. const langType language __unused__,
  517. const char* const regex __unused__,
  518. const char* const flags __unused__,
  519. const regexCallback callback __unused__)
  520. {
  521. #ifdef HAVE_REGEX
  522. Assert (regex != NULL);
  523. if (! regexBroken)
  524. {
  525. regex_t* const cp = compileRegex (regex, flags);
  526. if (cp != NULL)
  527. addCompiledCallbackPattern (language, cp, callback);
  528. }
  529. #endif
  530. }
  531. extern void addLanguageRegex (
  532. const langType language __unused__, const char* const regex __unused__)
  533. {
  534. #ifdef HAVE_REGEX
  535. if (! regexBroken)
  536. {
  537. char *const regex_pat = eStrdup (regex);
  538. char *name, *kinds, *flags;
  539. if (parseTagRegex (regex_pat, &name, &kinds, &flags))
  540. {
  541. addTagRegex (language, regex_pat, name, kinds, flags);
  542. eFree (regex_pat);
  543. }
  544. }
  545. #endif
  546. }
  547. /*
  548. * Regex option parsing
  549. */
  550. extern boolean processRegexOption (const char *const option,
  551. const char *const parameter __unused__)
  552. {
  553. boolean handled = FALSE;
  554. const char* const dash = strchr (option, '-');
  555. if (dash != NULL && strncmp (option, "regex", dash - option) == 0)
  556. {
  557. #ifdef HAVE_REGEX
  558. langType language;
  559. language = getNamedLanguage (dash + 1);
  560. if (language == LANG_IGNORE)
  561. error (WARNING, "unknown language \"%s\" in --%s option", (dash + 1), option);
  562. else
  563. processLanguageRegex (language, parameter);
  564. #else
  565. error (WARNING, "regex support not available; required for --%s option",
  566. option);
  567. #endif
  568. handled = TRUE;
  569. }
  570. return handled;
  571. }
  572. extern void disableRegexKinds (const langType language __unused__)
  573. {
  574. #ifdef HAVE_REGEX
  575. if (language <= SetUpper && Sets [language].count > 0)
  576. {
  577. patternSet* const set = Sets + language;
  578. unsigned int i;
  579. for (i = 0 ; i < set->count ; ++i)
  580. if (set->patterns [i].type == PTRN_TAG)
  581. set->patterns [i].u.tag.kind.enabled = FALSE;
  582. }
  583. #endif
  584. }
  585. extern boolean enableRegexKind (
  586. const langType language __unused__,
  587. const int kind __unused__, const boolean mode __unused__)
  588. {
  589. boolean result = FALSE;
  590. #ifdef HAVE_REGEX
  591. if (language <= SetUpper && Sets [language].count > 0)
  592. {
  593. patternSet* const set = Sets + language;
  594. unsigned int i;
  595. for (i = 0 ; i < set->count ; ++i)
  596. if (set->patterns [i].type == PTRN_TAG &&
  597. set->patterns [i].u.tag.kind.letter == kind)
  598. {
  599. set->patterns [i].u.tag.kind.enabled = mode;
  600. result = TRUE;
  601. }
  602. }
  603. #endif
  604. return result;
  605. }
  606. extern void printRegexKinds (const langType language __unused__, boolean indent __unused__)
  607. {
  608. #ifdef HAVE_REGEX
  609. if (language <= SetUpper && Sets [language].count > 0)
  610. {
  611. patternSet* const set = Sets + language;
  612. unsigned int i;
  613. for (i = 0 ; i < set->count ; ++i)
  614. if (set->patterns [i].type == PTRN_TAG)
  615. printRegexKind (set->patterns, i, indent);
  616. }
  617. #endif
  618. }
  619. extern void freeRegexResources (void)
  620. {
  621. #ifdef HAVE_REGEX
  622. int i;
  623. for (i = 0 ; i <= SetUpper ; ++i)
  624. clearPatternSet (i);
  625. if (Sets != NULL)
  626. eFree (Sets);
  627. Sets = NULL;
  628. SetUpper = -1;
  629. #endif
  630. }
  631. /* Check for broken regcomp() on Cygwin */
  632. extern void checkRegex (void)
  633. {
  634. #if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
  635. regex_t patbuf;
  636. int errcode;
  637. if (regcomp (&patbuf, "/hello/", 0) != 0)
  638. {
  639. error (WARNING, "Disabling broken regex");
  640. regexBroken = TRUE;
  641. }
  642. #endif
  643. }
  644. /* vi:set tabstop=4 shiftwidth=4: */