PageRenderTime 33ms CodeModel.GetById 7ms app.highlight 21ms RepoModel.GetById 2ms app.codeStats 0ms

/regexp.c

http://github.com/fizx/parsley
C | 359 lines | 259 code | 53 blank | 47 comment | 98 complexity | 988eb1855545776a35733577473ecc18 MD5 | raw file
  1/*
  2 * regexp.c: Implementation of the EXSLT -- Regular Expressions module
  3 *
  4 * References:
  5 *   http://exslt.org/regexp/index.html
  6 *
  7 * See Copyright for the status of this software.
  8 *
  9 * Authors:
 10 *   Joel W. Reed <joelwreed@gmail.com>
 11 *   Some modification by Kyle Maxwell
 12 *
 13 * TODO:
 14 * functions:
 15 *   regexp:match
 16 *   regexp:replace
 17 *   regexp:test
 18 */
 19#include "regexp.h"
 20
 21static void
 22exsltRegexpFlagsFromString(const xmlChar* flagstr, 
 23                           int* global, int* flags)
 24{
 25  const xmlChar* i = flagstr;
 26
 27  /* defaults */
 28  (*flags) = PCRE_UTF8;
 29  (*global) =  0;
 30
 31  while (*i != '\0')
 32    {
 33      if (*i == 'i') (*flags) |= PCRE_CASELESS;
 34      else if (*i == 'g') (*global)= 1;
 35      /* TODO: support other flags? */
 36      i++;
 37    }
 38}
 39
 40static int
 41exsltRegexpExecute(xmlXPathParserContextPtr ctxt, 
 42                   const xmlChar* haystack, const xmlChar* regexp,
 43                   int flags, int ovector[], int ovector_len)
 44{
 45  int haystack_len = 0;
 46  pcre *compiled_regexp = NULL;
 47  int rc = 0, erroffset = 0;
 48  const char *error = 0;
 49
 50  compiled_regexp = pcre_compile(regexp,      /* the pattern */
 51                                 flags,       /* default options */
 52                                 &error,      /* for error message */
 53                                 &erroffset,  /* for error offset */
 54                                 NULL);       /* use default character tables */
 55
 56  if (compiled_regexp == NULL) {
 57    xsltTransformError (xsltXPathGetTransformContext (ctxt), NULL, NULL,
 58                        "exslt:regexp failed to compile %s (char: %d). %s", regexp, erroffset, error);
 59    return -1;
 60  }
 61
 62  haystack_len = xmlUTF8Strlen (haystack);
 63
 64  rc = pcre_exec(compiled_regexp,               /* result of pcre_compile() */
 65                 NULL,                          /* we didn't study the pattern */
 66                 haystack,                      /* the subject string */
 67                 haystack_len,                  /* the length of the subject string */
 68                 0,                             /* start at offset 0 in the subject */
 69                 0,                             /* default options */
 70                 (int*)ovector,                       /* vector of integers for substring information */
 71                 ovector_len);  /* number of elements in the vector  (NOT size in bytes) */
 72
 73  if (rc < -1) {
 74    xsltTransformError (xsltXPathGetTransformContext (ctxt), NULL, NULL,
 75                        "exslt:regexp failed to execute %s for %s", regexp, haystack);
 76    rc = 0;
 77  }
 78  
 79  if (compiled_regexp != NULL) 
 80    pcre_free(compiled_regexp);
 81
 82  return rc;
 83}
 84
 85/**
 86 * exsltRegexpMatchFunction:
 87 * @ns:     
 88 *
 89 * Returns a node set of string matches
 90 */
 91
 92static void
 93exsltRegexpMatchFunction (xmlXPathParserContextPtr ctxt, int nargs)
 94{
 95    xsltTransformContextPtr tctxt;
 96    xmlNodePtr node;
 97    xmlDocPtr container;
 98    xmlXPathObjectPtr ret = NULL;
 99    xmlChar *haystack, *regexp, *flagstr, *working, *match;
100    int rc, x, flags, global, ovector[30];
101
102    if ((nargs < 1) || (nargs > 3)) {
103        xmlXPathSetArityError(ctxt);
104        return;
105    }
106
107
108    if (nargs > 2) {
109      flagstr = xmlXPathPopString(ctxt);
110      if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
111          return;
112      }
113    } else {
114     flagstr = xmlStrdup("");
115    }
116    
117    regexp = xmlXPathPopString(ctxt);
118    if (xmlXPathCheckError(ctxt) || (regexp == NULL)) {
119        xmlFree(flagstr);
120        return;
121    }
122
123    haystack = xmlXPathPopString(ctxt);
124    if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
125        xmlFree(regexp);
126        xmlFree(flagstr);
127        return;
128    }
129
130    /* Return a result tree fragment */
131    tctxt = xsltXPathGetTransformContext(ctxt);
132    if (tctxt == NULL) {
133      xsltTransformError(xsltXPathGetTransformContext(ctxt), NULL, NULL,
134                         "exslt:regexp : internal error tctxt == NULL\n");
135      goto fail;
136    }
137
138    container = xsltCreateRVT(tctxt);
139    if (container != NULL) {
140      xsltRegisterTmpRVT(tctxt, container);
141      ret = xmlXPathNewNodeSet(NULL);
142      if (ret != NULL) {
143        ret->boolval = 0; 
144
145        exsltRegexpFlagsFromString(flagstr, &global, &flags);
146        working = haystack;
147        rc = exsltRegexpExecute(ctxt, working, regexp, flags, 
148                                ovector, sizeof(ovector)/sizeof(int));
149
150        while (rc > 0) {
151					for(int group = 0; group < rc; group++) {
152          	match = xmlStrsub(working, ovector[group*2], ovector[group*2+1]-ovector[group*2]);
153          	if (NULL == match) goto fail;
154
155	          node = xmlNewDocRawNode(container, NULL, "match", match);
156	          xmlFree(match);
157
158	          xmlAddChild((xmlNodePtr) container, node);
159	          xmlXPathNodeSetAddUnique(ret->nodesetval, node);
160					}
161          if (!global) break;
162
163          working = working + ovector[1];
164          rc = exsltRegexpExecute(ctxt, working, regexp, flags, 
165                                  ovector, sizeof(ovector)/sizeof(int));
166        }
167      }
168    }
169    
170 fail:
171    if (flagstr != NULL)
172      xmlFree(flagstr);
173    if (regexp != NULL)
174      xmlFree(regexp);
175    if (haystack != NULL)
176      xmlFree(haystack);
177
178    if (ret != NULL)
179      valuePush(ctxt, ret);
180    else
181      valuePush(ctxt, xmlXPathNewNodeSet(NULL));
182}
183
184/**
185 * exsltRegexpReplaceFunction:
186 * @ns:     
187 *
188 * Returns a node set of string matches
189 */
190
191static void
192exsltRegexpReplaceFunction (xmlXPathParserContextPtr ctxt, int nargs)
193{
194    xmlChar *haystack, *regexp, *flagstr, *replace, *tmp;
195    xmlChar *result = NULL, *working, *end;
196    int rc, x, flags, global, ovector[3];
197
198    if ((nargs < 1) || (nargs > 4)) {
199        xmlXPathSetArityError(ctxt);
200        return;
201    }
202
203    replace = xmlXPathPopString(ctxt);
204    if (xmlXPathCheckError(ctxt) || (replace == NULL)) {
205        return;
206    }
207
208    flagstr = xmlXPathPopString(ctxt);
209    if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
210        xmlFree(replace);
211        return;
212    }
213
214    regexp = xmlXPathPopString(ctxt);
215    if (xmlXPathCheckError(ctxt) || (regexp == NULL)) {
216        xmlFree(flagstr);
217        xmlFree(replace);
218        return;
219    }
220
221    haystack = xmlXPathPopString(ctxt);
222    if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
223        xmlFree(regexp);
224        xmlFree(flagstr);
225        xmlFree(replace);
226        return;
227    }
228
229    exsltRegexpFlagsFromString(flagstr, &global, &flags);
230
231    working = haystack;
232    rc = exsltRegexpExecute(ctxt, working, regexp, flags, 
233                            ovector, sizeof(ovector)/sizeof(int));
234
235    while (rc > 0 ) {
236      if (0==ovector[0]) {
237        if (NULL==result) result = xmlStrdup(replace);
238        else result = xmlStrcat(result, replace);
239      }
240      else {
241        tmp = xmlStrsub(working, 0, ovector[0]);
242        if (NULL==result) result = tmp;
243        else {
244          result = xmlStrcat(result, tmp);
245          xmlFree(tmp);
246        }
247        result = xmlStrcat(result, replace);
248      }
249      
250      working = working + ovector[1];
251
252      if (!global) break;
253      rc = exsltRegexpExecute(ctxt, working, regexp, flags, 
254                              ovector, sizeof(ovector)/sizeof(int));
255    }
256
257    end = haystack + xmlUTF8Strlen(haystack);
258    if (working < end ) {
259        if (NULL==result) result = xmlStrdup(working);
260        else {
261          result = xmlStrcat(result, working);
262        }
263    }
264
265fail:
266    if (replace != NULL)
267            xmlFree(replace);
268    if (flagstr != NULL)
269            xmlFree(flagstr);
270    if (regexp != NULL)
271            xmlFree(regexp);
272    if (haystack != NULL)
273            xmlFree(haystack);
274
275    xmlXPathReturnString(ctxt, result);
276}
277
278/**
279 * exsltRegexpTestFunction:
280 * @ns:     
281 *
282 * returns true if the string given as the first argument 
283 * matches the regular expression given as the second argument
284 * 
285 */
286
287static void
288exsltRegexpTestFunction (xmlXPathParserContextPtr ctxt, int nargs)
289{
290    xmlChar *haystack, *regexp_middle, *regexp, *flagstr;
291    int rc = 0, flags, global, ovector[3];
292
293    if ((nargs < 1) || (nargs > 3)) {
294        xmlXPathSetArityError(ctxt);
295        return;
296    }
297
298    if(nargs > 2) {
299    flagstr = xmlXPathPopString(ctxt);
300      if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
301          return;
302      }
303    } else {
304      flagstr = xmlStrdup("");
305    }
306
307    regexp_middle = xmlXPathPopString(ctxt);
308    if (xmlXPathCheckError(ctxt) || (regexp_middle == NULL)) {
309        xmlFree(flagstr);
310        return;
311    }
312
313    haystack = xmlXPathPopString(ctxt);
314    if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
315        xmlFree(regexp_middle);
316        xmlFree(flagstr);
317        return;
318    }
319
320    /* build the regexp */
321    regexp = xmlStrdup("\\A");
322    regexp = xmlStrcat(regexp, regexp_middle);
323    regexp = xmlStrcat(regexp, "\\Z");
324
325    exsltRegexpFlagsFromString(flagstr, &global, &flags);
326    rc = exsltRegexpExecute(ctxt, haystack, regexp, flags, 
327                            ovector, sizeof(ovector)/sizeof(int));
328
329fail:
330    if (flagstr != NULL)
331            xmlFree(flagstr);
332    if (regexp != NULL)
333            xmlFree(regexp);
334    if (regexp_middle != NULL)
335            xmlFree(regexp_middle);
336    if (haystack != NULL)
337            xmlFree(haystack);
338
339    xmlXPathReturnBoolean(ctxt, (rc > 0));
340}
341
342/**
343 * exsltRegexpRegister:
344 *
345 * Registers the EXSLT - Regexp module
346 */
347void
348PLUGINPUBFUN exslt_org_regular_expressions_init (void)
349{
350    xsltRegisterExtModuleFunction ((const xmlChar *) "match",
351                                   (const xmlChar *) EXSLT_REGEXP_NAMESPACE,
352                                   exsltRegexpMatchFunction);
353    xsltRegisterExtModuleFunction ((const xmlChar *) "replace",
354                                   (const xmlChar *) EXSLT_REGEXP_NAMESPACE,
355                                   exsltRegexpReplaceFunction);
356    xsltRegisterExtModuleFunction ((const xmlChar *) "test",
357                                   (const xmlChar *) EXSLT_REGEXP_NAMESPACE,
358                                   exsltRegexpTestFunction);
359}