/mks-svn4458/ctags/ctags/lregex.c
C | 704 lines | 577 code | 60 blank | 67 comment | 159 complexity | 26526ff0cc3523db4a33f92e44fdfaa6 MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, LGPL-3.0, Zlib, AGPL-1.0
- /*
- * $Id: lregex.c 576 2007-06-30 04:16:23Z elliotth $
- *
- * Copyright (c) 2000-2003, Darren Hiebert
- *
- * This source code is released for free distribution under the terms of the
- * GNU General Public License.
- *
- * This module contains functions for applying regular expression matching.
- *
- * The code for utlizing the Gnu regex package with regards to processing the
- * regex option and checking for regex matches was adapted from routines in
- * Gnu etags.
- */
- /*
- * INCLUDE FILES
- */
- #include "general.h" /* must always come first */
- #include <string.h>
- #ifdef HAVE_REGCOMP
- # include <ctype.h>
- # include <stddef.h>
- # ifdef HAVE_SYS_TYPES_H
- # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
- # endif
- # include <regex.h>
- #endif
- #include "debug.h"
- #include "entry.h"
- #include "parse.h"
- #include "read.h"
- #include "routines.h"
- #ifdef HAVE_REGEX
- /*
- * MACROS
- */
- /* Back-references \0 through \9 */
- #define BACK_REFERENCE_COUNT 10
- #if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
- # define POSIX_REGEX
- #endif
- #define REGEX_NAME "Regex"
- /*
- * DATA DECLARATIONS
- */
- #if defined (POSIX_REGEX)
- struct sKind {
- boolean enabled;
- char letter;
- char* name;
- char* description;
- };
- enum pType { PTRN_TAG, PTRN_CALLBACK };
- typedef struct {
- regex_t *pattern;
- enum pType type;
- union {
- struct {
- char *name_pattern;
- struct sKind kind;
- } tag;
- struct {
- regexCallback function;
- } callback;
- } u;
- } regexPattern;
- #endif
- typedef struct {
- regexPattern *patterns;
- unsigned int count;
- } patternSet;
- /*
- * DATA DEFINITIONS
- */
- static boolean regexBroken = FALSE;
- /* Array of pattern sets, indexed by language */
- static patternSet* Sets = NULL;
- static int SetUpper = -1; /* upper language index in list */
- /*
- * FUNCTION DEFINITIONS
- */
- static void clearPatternSet (const langType language)
- {
- if (language <= SetUpper)
- {
- patternSet* const set = Sets + language;
- unsigned int i;
- for (i = 0 ; i < set->count ; ++i)
- {
- regexPattern *p = &set->patterns [i];
- #if defined (POSIX_REGEX)
- regfree (p->pattern);
- #endif
- eFree (p->pattern);
- p->pattern = NULL;
- if (p->type == PTRN_TAG)
- {
- eFree (p->u.tag.name_pattern);
- p->u.tag.name_pattern = NULL;
- eFree (p->u.tag.kind.name);
- p->u.tag.kind.name = NULL;
- if (p->u.tag.kind.description != NULL)
- {
- eFree (p->u.tag.kind.description);
- p->u.tag.kind.description = NULL;
- }
- }
- }
- if (set->patterns != NULL)
- eFree (set->patterns);
- set->patterns = NULL;
- set->count = 0;
- }
- }
- /*
- * Regex psuedo-parser
- */
- static void makeRegexTag (
- const vString* const name, const struct sKind* const kind)
- {
- if (kind->enabled)
- {
- tagEntryInfo e;
- Assert (name != NULL && vStringLength (name) > 0);
- Assert (kind != NULL);
- initTagEntry (&e, vStringValue (name));
- e.kind = kind->letter;
- e.kindName = kind->name;
- makeTagEntry (&e);
- }
- }
- /*
- * Regex pattern definition
- */
- /* Take a string like "/blah/" and turn it into "blah", making sure
- * that the first and last characters are the same, and handling
- * quoted separator characters. Actually, stops on the occurrence of
- * an unquoted separator. Also turns "\t" into a Tab character.
- * Returns pointer to terminating separator. Works in place. Null
- * terminates name string.
- */
- static char* scanSeparators (char* name)
- {
- char sep = name [0];
- char *copyto = name;
- boolean quoted = FALSE;
- for (++name ; *name != '\0' ; ++name)
- {
- if (quoted)
- {
- if (*name == sep)
- *copyto++ = sep;
- else if (*name == 't')
- *copyto++ = '\t';
- else
- {
- /* Something else is quoted, so preserve the quote. */
- *copyto++ = '\\';
- *copyto++ = *name;
- }
- quoted = FALSE;
- }
- else if (*name == '\\')
- quoted = TRUE;
- else if (*name == sep)
- {
- break;
- }
- else
- *copyto++ = *name;
- }
- *copyto = '\0';
- return name;
- }
- /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
- * character is whatever the first character of `regexp' is), by breaking it
- * up into null terminated strings, removing the separators, and expanding
- * '\t' into tabs. When complete, `regexp' points to the line matching
- * pattern, a pointer to the name matching pattern is written to `name', a
- * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
- * to the trailing flags is written to `flags'. If the pattern is not in the
- * correct format, a false value is returned.
- */
- static boolean parseTagRegex (
- char* const regexp, char** const name,
- char** const kinds, char** const flags)
- {
- boolean result = FALSE;
- const int separator = (unsigned char) regexp [0];
- *name = scanSeparators (regexp);
- if (*regexp == '\0')
- error (WARNING, "empty regexp");
- else if (**name != separator)
- error (WARNING, "%s: incomplete regexp", regexp);
- else
- {
- char* const third = scanSeparators (*name);
- if (**name == '\0')
- error (WARNING, "%s: regexp missing name pattern", regexp);
- if ((*name) [strlen (*name) - 1] == '\\')
- error (WARNING, "error in name pattern: \"%s\"", *name);
- if (*third != separator)
- error (WARNING, "%s: regexp missing final separator", regexp);
- else
- {
- char* const fourth = scanSeparators (third);
- if (*fourth == separator)
- {
- *kinds = third;
- scanSeparators (fourth);
- *flags = fourth;
- }
- else
- {
- *flags = third;
- *kinds = NULL;
- }
- result = TRUE;
- }
- }
- return result;
- }
- static void addCompiledTagPattern (
- const langType language, regex_t* const pattern,
- char* const name, const char kind, char* const kindName,
- char *const description)
- {
- patternSet* set;
- regexPattern *ptrn;
- if (language > SetUpper)
- {
- int i;
- Sets = xRealloc (Sets, (language + 1), patternSet);
- for (i = SetUpper + 1 ; i <= language ; ++i)
- {
- Sets [i].patterns = NULL;
- Sets [i].count = 0;
- }
- SetUpper = language;
- }
- set = Sets + language;
- set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
- ptrn = &set->patterns [set->count];
- set->count += 1;
- ptrn->pattern = pattern;
- ptrn->type = PTRN_TAG;
- ptrn->u.tag.name_pattern = name;
- ptrn->u.tag.kind.enabled = TRUE;
- ptrn->u.tag.kind.letter = kind;
- ptrn->u.tag.kind.name = kindName;
- ptrn->u.tag.kind.description = description;
- }
- static void addCompiledCallbackPattern (
- const langType language, regex_t* const pattern,
- const regexCallback callback)
- {
- patternSet* set;
- regexPattern *ptrn;
- if (language > SetUpper)
- {
- int i;
- Sets = xRealloc (Sets, (language + 1), patternSet);
- for (i = SetUpper + 1 ; i <= language ; ++i)
- {
- Sets [i].patterns = NULL;
- Sets [i].count = 0;
- }
- SetUpper = language;
- }
- set = Sets + language;
- set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
- ptrn = &set->patterns [set->count];
- set->count += 1;
- ptrn->pattern = pattern;
- ptrn->type = PTRN_CALLBACK;
- ptrn->u.callback.function = callback;
- }
- #if defined (POSIX_REGEX)
- static regex_t* compileRegex (const char* const regexp, const char* const flags)
- {
- int cflags = REG_EXTENDED | REG_NEWLINE;
- regex_t *result = NULL;
- int errcode;
- int i;
- for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i)
- {
- switch ((int) flags [i])
- {
- case 'b': cflags &= ~REG_EXTENDED; break;
- case 'e': cflags |= REG_EXTENDED; break;
- case 'i': cflags |= REG_ICASE; break;
- default: error (WARNING, "unknown regex flag: '%c'", *flags); break;
- }
- }
- result = xMalloc (1, regex_t);
- errcode = regcomp (result, regexp, cflags);
- if (errcode != 0)
- {
- char errmsg[256];
- regerror (errcode, result, errmsg, 256);
- error (WARNING, "regcomp %s: %s", regexp, errmsg);
- regfree (result);
- eFree (result);
- result = NULL;
- }
- return result;
- }
- #endif
- static void parseKinds (
- const char* const kinds, char* const kind, char** const kindName,
- char **description)
- {
- *kind = '\0';
- *kindName = NULL;
- *description = NULL;
- if (kinds == NULL || kinds [0] == '\0')
- {
- *kind = 'r';
- *kindName = eStrdup ("regex");
- }
- else if (kinds [0] != '\0')
- {
- const char* k = kinds;
- if (k [0] != ',' && (k [1] == ',' || k [1] == '\0'))
- *kind = *k++;
- else
- *kind = 'r';
- if (*k == ',')
- ++k;
- if (k [0] == '\0')
- *kindName = eStrdup ("regex");
- else
- {
- const char *const comma = strchr (k, ',');
- if (comma == NULL)
- *kindName = eStrdup (k);
- else
- {
- *kindName = (char*) eMalloc (comma - k + 1);
- strncpy (*kindName, k, comma - k);
- (*kindName) [comma - k] = '\0';
- k = comma + 1;
- if (k [0] != '\0')
- *description = eStrdup (k);
- }
- }
- }
- }
- static void printRegexKind (const regexPattern *pat, unsigned int i, boolean indent)
- {
- const struct sKind *const kind = &pat [i].u.tag.kind;
- const char *const indentation = indent ? " " : "";
- Assert (pat [i].type == PTRN_TAG);
- printf ("%s%c %s %s\n", indentation,
- kind->letter != '\0' ? kind->letter : '?',
- kind->description != NULL ? kind->description : kind->name,
- kind->enabled ? "" : " [off]");
- }
- static void processLanguageRegex (const langType language,
- const char* const parameter)
- {
- if (parameter == NULL || parameter [0] == '\0')
- clearPatternSet (language);
- else if (parameter [0] != '@')
- addLanguageRegex (language, parameter);
- else if (! doesFileExist (parameter + 1))
- error (WARNING, "cannot open regex file");
- else
- {
- const char* regexfile = parameter + 1;
- FILE* const fp = fopen (regexfile, "r");
- if (fp == NULL)
- error (WARNING | PERROR, regexfile);
- else
- {
- vString* const regex = vStringNew ();
- while (readLine (regex, fp))
- addLanguageRegex (language, vStringValue (regex));
- fclose (fp);
- vStringDelete (regex);
- }
- }
- }
- /*
- * Regex pattern matching
- */
- #if defined (POSIX_REGEX)
- static vString* substitute (
- const char* const in, const char* out,
- const int nmatch, const regmatch_t* const pmatch)
- {
- vString* result = vStringNew ();
- const char* p;
- for (p = out ; *p != '\0' ; p++)
- {
- if (*p == '\\' && isdigit ((int) *++p))
- {
- const int dig = *p - '0';
- if (0 < dig && dig < nmatch && pmatch [dig].rm_so != -1)
- {
- const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so;
- vStringNCatS (result, in + pmatch [dig].rm_so, diglen);
- }
- }
- else if (*p != '\n' && *p != '\r')
- vStringPut (result, *p);
- }
- vStringTerminate (result);
- return result;
- }
- static void matchTagPattern (const vString* const line,
- const regexPattern* const patbuf,
- const regmatch_t* const pmatch)
- {
- vString *const name = substitute (vStringValue (line),
- patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch);
- vStringStripLeading (name);
- vStringStripTrailing (name);
- if (vStringLength (name) > 0)
- makeRegexTag (name, &patbuf->u.tag.kind);
- else
- error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
- getInputFileName (), getInputLineNumber (),
- patbuf->u.tag.name_pattern);
- vStringDelete (name);
- }
- static void matchCallbackPattern (
- const vString* const line, const regexPattern* const patbuf,
- const regmatch_t* const pmatch)
- {
- regexMatch matches [BACK_REFERENCE_COUNT];
- unsigned int count = 0;
- int i;
- for (i = 0 ; i < BACK_REFERENCE_COUNT && pmatch [i].rm_so != -1 ; ++i)
- {
- matches [i].start = pmatch [i].rm_so;
- matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so;
- ++count;
- }
- patbuf->u.callback.function (vStringValue (line), matches, count);
- }
- static boolean matchRegexPattern (const vString* const line,
- const regexPattern* const patbuf)
- {
- boolean result = FALSE;
- regmatch_t pmatch [BACK_REFERENCE_COUNT];
- const int match = regexec (patbuf->pattern, vStringValue (line),
- BACK_REFERENCE_COUNT, pmatch, 0);
- if (match == 0)
- {
- result = TRUE;
- if (patbuf->type == PTRN_TAG)
- matchTagPattern (line, patbuf, pmatch);
- else if (patbuf->type == PTRN_CALLBACK)
- matchCallbackPattern (line, patbuf, pmatch);
- else
- {
- Assert ("invalid pattern type" == NULL);
- result = FALSE;
- }
- }
- return result;
- }
- #endif
- /* PUBLIC INTERFACE */
- /* Match against all patterns for specified language. Returns true if at least
- * on pattern matched.
- */
- extern boolean matchRegex (const vString* const line, const langType language)
- {
- boolean result = FALSE;
- if (language != LANG_IGNORE && language <= SetUpper &&
- Sets [language].count > 0)
- {
- const patternSet* const set = Sets + language;
- unsigned int i;
- for (i = 0 ; i < set->count ; ++i)
- if (matchRegexPattern (line, set->patterns + i))
- result = TRUE;
- }
- return result;
- }
- extern void findRegexTags (void)
- {
- /* merely read all lines of the file */
- while (fileReadLine () != NULL)
- ;
- }
- #endif /* HAVE_REGEX */
- extern void addTagRegex (
- const langType language __unused__,
- const char* const regex __unused__,
- const char* const name __unused__,
- const char* const kinds __unused__,
- const char* const flags __unused__)
- {
- #ifdef HAVE_REGEX
- Assert (regex != NULL);
- Assert (name != NULL);
- if (! regexBroken)
- {
- regex_t* const cp = compileRegex (regex, flags);
- if (cp != NULL)
- {
- char kind;
- char* kindName;
- char* description;
- parseKinds (kinds, &kind, &kindName, &description);
- addCompiledTagPattern (language, cp, eStrdup (name),
- kind, kindName, description);
- }
- }
- #endif
- }
- extern void addCallbackRegex (
- const langType language __unused__,
- const char* const regex __unused__,
- const char* const flags __unused__,
- const regexCallback callback __unused__)
- {
- #ifdef HAVE_REGEX
- Assert (regex != NULL);
- if (! regexBroken)
- {
- regex_t* const cp = compileRegex (regex, flags);
- if (cp != NULL)
- addCompiledCallbackPattern (language, cp, callback);
- }
- #endif
- }
- extern void addLanguageRegex (
- const langType language __unused__, const char* const regex __unused__)
- {
- #ifdef HAVE_REGEX
- if (! regexBroken)
- {
- char *const regex_pat = eStrdup (regex);
- char *name, *kinds, *flags;
- if (parseTagRegex (regex_pat, &name, &kinds, &flags))
- {
- addTagRegex (language, regex_pat, name, kinds, flags);
- eFree (regex_pat);
- }
- }
- #endif
- }
- /*
- * Regex option parsing
- */
- extern boolean processRegexOption (const char *const option,
- const char *const parameter __unused__)
- {
- boolean handled = FALSE;
- const char* const dash = strchr (option, '-');
- if (dash != NULL && strncmp (option, "regex", dash - option) == 0)
- {
- #ifdef HAVE_REGEX
- langType language;
- language = getNamedLanguage (dash + 1);
- if (language == LANG_IGNORE)
- error (WARNING, "unknown language \"%s\" in --%s option", (dash + 1), option);
- else
- processLanguageRegex (language, parameter);
- #else
- error (WARNING, "regex support not available; required for --%s option",
- option);
- #endif
- handled = TRUE;
- }
- return handled;
- }
- extern void disableRegexKinds (const langType language __unused__)
- {
- #ifdef HAVE_REGEX
- if (language <= SetUpper && Sets [language].count > 0)
- {
- patternSet* const set = Sets + language;
- unsigned int i;
- for (i = 0 ; i < set->count ; ++i)
- if (set->patterns [i].type == PTRN_TAG)
- set->patterns [i].u.tag.kind.enabled = FALSE;
- }
- #endif
- }
- extern boolean enableRegexKind (
- const langType language __unused__,
- const int kind __unused__, const boolean mode __unused__)
- {
- boolean result = FALSE;
- #ifdef HAVE_REGEX
- if (language <= SetUpper && Sets [language].count > 0)
- {
- patternSet* const set = Sets + language;
- unsigned int i;
- for (i = 0 ; i < set->count ; ++i)
- if (set->patterns [i].type == PTRN_TAG &&
- set->patterns [i].u.tag.kind.letter == kind)
- {
- set->patterns [i].u.tag.kind.enabled = mode;
- result = TRUE;
- }
- }
- #endif
- return result;
- }
- extern void printRegexKinds (const langType language __unused__, boolean indent __unused__)
- {
- #ifdef HAVE_REGEX
- if (language <= SetUpper && Sets [language].count > 0)
- {
- patternSet* const set = Sets + language;
- unsigned int i;
- for (i = 0 ; i < set->count ; ++i)
- if (set->patterns [i].type == PTRN_TAG)
- printRegexKind (set->patterns, i, indent);
- }
- #endif
- }
- extern void freeRegexResources (void)
- {
- #ifdef HAVE_REGEX
- int i;
- for (i = 0 ; i <= SetUpper ; ++i)
- clearPatternSet (i);
- if (Sets != NULL)
- eFree (Sets);
- Sets = NULL;
- SetUpper = -1;
- #endif
- }
- /* Check for broken regcomp() on Cygwin */
- extern void checkRegex (void)
- {
- #if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
- regex_t patbuf;
- int errcode;
- if (regcomp (&patbuf, "/hello/", 0) != 0)
- {
- error (WARNING, "Disabling broken regex");
- regexBroken = TRUE;
- }
- #endif
- }
- /* vi:set tabstop=4 shiftwidth=4: */