/regexp.c

http://github.com/fizx/parsley · C · 359 lines · 259 code · 53 blank · 47 comment · 98 complexity · 988eb1855545776a35733577473ecc18 MD5 · raw file

  1. /*
  2. * regexp.c: Implementation of the EXSLT -- Regular Expressions module
  3. *
  4. * References:
  5. * http://exslt.org/regexp/index.html
  6. *
  7. * See Copyright for the status of this software.
  8. *
  9. * Authors:
  10. * Joel W. Reed <joelwreed@gmail.com>
  11. * Some modification by Kyle Maxwell
  12. *
  13. * TODO:
  14. * functions:
  15. * regexp:match
  16. * regexp:replace
  17. * regexp:test
  18. */
  19. #include "regexp.h"
  20. static void
  21. exsltRegexpFlagsFromString(const xmlChar* flagstr,
  22. int* global, int* flags)
  23. {
  24. const xmlChar* i = flagstr;
  25. /* defaults */
  26. (*flags) = PCRE_UTF8;
  27. (*global) = 0;
  28. while (*i != '\0')
  29. {
  30. if (*i == 'i') (*flags) |= PCRE_CASELESS;
  31. else if (*i == 'g') (*global)= 1;
  32. /* TODO: support other flags? */
  33. i++;
  34. }
  35. }
  36. static int
  37. exsltRegexpExecute(xmlXPathParserContextPtr ctxt,
  38. const xmlChar* haystack, const xmlChar* regexp,
  39. int flags, int ovector[], int ovector_len)
  40. {
  41. int haystack_len = 0;
  42. pcre *compiled_regexp = NULL;
  43. int rc = 0, erroffset = 0;
  44. const char *error = 0;
  45. compiled_regexp = pcre_compile(regexp, /* the pattern */
  46. flags, /* default options */
  47. &error, /* for error message */
  48. &erroffset, /* for error offset */
  49. NULL); /* use default character tables */
  50. if (compiled_regexp == NULL) {
  51. xsltTransformError (xsltXPathGetTransformContext (ctxt), NULL, NULL,
  52. "exslt:regexp failed to compile %s (char: %d). %s", regexp, erroffset, error);
  53. return -1;
  54. }
  55. haystack_len = xmlUTF8Strlen (haystack);
  56. rc = pcre_exec(compiled_regexp, /* result of pcre_compile() */
  57. NULL, /* we didn't study the pattern */
  58. haystack, /* the subject string */
  59. haystack_len, /* the length of the subject string */
  60. 0, /* start at offset 0 in the subject */
  61. 0, /* default options */
  62. (int*)ovector, /* vector of integers for substring information */
  63. ovector_len); /* number of elements in the vector (NOT size in bytes) */
  64. if (rc < -1) {
  65. xsltTransformError (xsltXPathGetTransformContext (ctxt), NULL, NULL,
  66. "exslt:regexp failed to execute %s for %s", regexp, haystack);
  67. rc = 0;
  68. }
  69. if (compiled_regexp != NULL)
  70. pcre_free(compiled_regexp);
  71. return rc;
  72. }
  73. /**
  74. * exsltRegexpMatchFunction:
  75. * @ns:
  76. *
  77. * Returns a node set of string matches
  78. */
  79. static void
  80. exsltRegexpMatchFunction (xmlXPathParserContextPtr ctxt, int nargs)
  81. {
  82. xsltTransformContextPtr tctxt;
  83. xmlNodePtr node;
  84. xmlDocPtr container;
  85. xmlXPathObjectPtr ret = NULL;
  86. xmlChar *haystack, *regexp, *flagstr, *working, *match;
  87. int rc, x, flags, global, ovector[30];
  88. if ((nargs < 1) || (nargs > 3)) {
  89. xmlXPathSetArityError(ctxt);
  90. return;
  91. }
  92. if (nargs > 2) {
  93. flagstr = xmlXPathPopString(ctxt);
  94. if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
  95. return;
  96. }
  97. } else {
  98. flagstr = xmlStrdup("");
  99. }
  100. regexp = xmlXPathPopString(ctxt);
  101. if (xmlXPathCheckError(ctxt) || (regexp == NULL)) {
  102. xmlFree(flagstr);
  103. return;
  104. }
  105. haystack = xmlXPathPopString(ctxt);
  106. if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
  107. xmlFree(regexp);
  108. xmlFree(flagstr);
  109. return;
  110. }
  111. /* Return a result tree fragment */
  112. tctxt = xsltXPathGetTransformContext(ctxt);
  113. if (tctxt == NULL) {
  114. xsltTransformError(xsltXPathGetTransformContext(ctxt), NULL, NULL,
  115. "exslt:regexp : internal error tctxt == NULL\n");
  116. goto fail;
  117. }
  118. container = xsltCreateRVT(tctxt);
  119. if (container != NULL) {
  120. xsltRegisterTmpRVT(tctxt, container);
  121. ret = xmlXPathNewNodeSet(NULL);
  122. if (ret != NULL) {
  123. ret->boolval = 0;
  124. exsltRegexpFlagsFromString(flagstr, &global, &flags);
  125. working = haystack;
  126. rc = exsltRegexpExecute(ctxt, working, regexp, flags,
  127. ovector, sizeof(ovector)/sizeof(int));
  128. while (rc > 0) {
  129. for(int group = 0; group < rc; group++) {
  130. match = xmlStrsub(working, ovector[group*2], ovector[group*2+1]-ovector[group*2]);
  131. if (NULL == match) goto fail;
  132. node = xmlNewDocRawNode(container, NULL, "match", match);
  133. xmlFree(match);
  134. xmlAddChild((xmlNodePtr) container, node);
  135. xmlXPathNodeSetAddUnique(ret->nodesetval, node);
  136. }
  137. if (!global) break;
  138. working = working + ovector[1];
  139. rc = exsltRegexpExecute(ctxt, working, regexp, flags,
  140. ovector, sizeof(ovector)/sizeof(int));
  141. }
  142. }
  143. }
  144. fail:
  145. if (flagstr != NULL)
  146. xmlFree(flagstr);
  147. if (regexp != NULL)
  148. xmlFree(regexp);
  149. if (haystack != NULL)
  150. xmlFree(haystack);
  151. if (ret != NULL)
  152. valuePush(ctxt, ret);
  153. else
  154. valuePush(ctxt, xmlXPathNewNodeSet(NULL));
  155. }
  156. /**
  157. * exsltRegexpReplaceFunction:
  158. * @ns:
  159. *
  160. * Returns a node set of string matches
  161. */
  162. static void
  163. exsltRegexpReplaceFunction (xmlXPathParserContextPtr ctxt, int nargs)
  164. {
  165. xmlChar *haystack, *regexp, *flagstr, *replace, *tmp;
  166. xmlChar *result = NULL, *working, *end;
  167. int rc, x, flags, global, ovector[3];
  168. if ((nargs < 1) || (nargs > 4)) {
  169. xmlXPathSetArityError(ctxt);
  170. return;
  171. }
  172. replace = xmlXPathPopString(ctxt);
  173. if (xmlXPathCheckError(ctxt) || (replace == NULL)) {
  174. return;
  175. }
  176. flagstr = xmlXPathPopString(ctxt);
  177. if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
  178. xmlFree(replace);
  179. return;
  180. }
  181. regexp = xmlXPathPopString(ctxt);
  182. if (xmlXPathCheckError(ctxt) || (regexp == NULL)) {
  183. xmlFree(flagstr);
  184. xmlFree(replace);
  185. return;
  186. }
  187. haystack = xmlXPathPopString(ctxt);
  188. if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
  189. xmlFree(regexp);
  190. xmlFree(flagstr);
  191. xmlFree(replace);
  192. return;
  193. }
  194. exsltRegexpFlagsFromString(flagstr, &global, &flags);
  195. working = haystack;
  196. rc = exsltRegexpExecute(ctxt, working, regexp, flags,
  197. ovector, sizeof(ovector)/sizeof(int));
  198. while (rc > 0 ) {
  199. if (0==ovector[0]) {
  200. if (NULL==result) result = xmlStrdup(replace);
  201. else result = xmlStrcat(result, replace);
  202. }
  203. else {
  204. tmp = xmlStrsub(working, 0, ovector[0]);
  205. if (NULL==result) result = tmp;
  206. else {
  207. result = xmlStrcat(result, tmp);
  208. xmlFree(tmp);
  209. }
  210. result = xmlStrcat(result, replace);
  211. }
  212. working = working + ovector[1];
  213. if (!global) break;
  214. rc = exsltRegexpExecute(ctxt, working, regexp, flags,
  215. ovector, sizeof(ovector)/sizeof(int));
  216. }
  217. end = haystack + xmlUTF8Strlen(haystack);
  218. if (working < end ) {
  219. if (NULL==result) result = xmlStrdup(working);
  220. else {
  221. result = xmlStrcat(result, working);
  222. }
  223. }
  224. fail:
  225. if (replace != NULL)
  226. xmlFree(replace);
  227. if (flagstr != NULL)
  228. xmlFree(flagstr);
  229. if (regexp != NULL)
  230. xmlFree(regexp);
  231. if (haystack != NULL)
  232. xmlFree(haystack);
  233. xmlXPathReturnString(ctxt, result);
  234. }
  235. /**
  236. * exsltRegexpTestFunction:
  237. * @ns:
  238. *
  239. * returns true if the string given as the first argument
  240. * matches the regular expression given as the second argument
  241. *
  242. */
  243. static void
  244. exsltRegexpTestFunction (xmlXPathParserContextPtr ctxt, int nargs)
  245. {
  246. xmlChar *haystack, *regexp_middle, *regexp, *flagstr;
  247. int rc = 0, flags, global, ovector[3];
  248. if ((nargs < 1) || (nargs > 3)) {
  249. xmlXPathSetArityError(ctxt);
  250. return;
  251. }
  252. if(nargs > 2) {
  253. flagstr = xmlXPathPopString(ctxt);
  254. if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
  255. return;
  256. }
  257. } else {
  258. flagstr = xmlStrdup("");
  259. }
  260. regexp_middle = xmlXPathPopString(ctxt);
  261. if (xmlXPathCheckError(ctxt) || (regexp_middle == NULL)) {
  262. xmlFree(flagstr);
  263. return;
  264. }
  265. haystack = xmlXPathPopString(ctxt);
  266. if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
  267. xmlFree(regexp_middle);
  268. xmlFree(flagstr);
  269. return;
  270. }
  271. /* build the regexp */
  272. regexp = xmlStrdup("\\A");
  273. regexp = xmlStrcat(regexp, regexp_middle);
  274. regexp = xmlStrcat(regexp, "\\Z");
  275. exsltRegexpFlagsFromString(flagstr, &global, &flags);
  276. rc = exsltRegexpExecute(ctxt, haystack, regexp, flags,
  277. ovector, sizeof(ovector)/sizeof(int));
  278. fail:
  279. if (flagstr != NULL)
  280. xmlFree(flagstr);
  281. if (regexp != NULL)
  282. xmlFree(regexp);
  283. if (regexp_middle != NULL)
  284. xmlFree(regexp_middle);
  285. if (haystack != NULL)
  286. xmlFree(haystack);
  287. xmlXPathReturnBoolean(ctxt, (rc > 0));
  288. }
  289. /**
  290. * exsltRegexpRegister:
  291. *
  292. * Registers the EXSLT - Regexp module
  293. */
  294. void
  295. PLUGINPUBFUN exslt_org_regular_expressions_init (void)
  296. {
  297. xsltRegisterExtModuleFunction ((const xmlChar *) "match",
  298. (const xmlChar *) EXSLT_REGEXP_NAMESPACE,
  299. exsltRegexpMatchFunction);
  300. xsltRegisterExtModuleFunction ((const xmlChar *) "replace",
  301. (const xmlChar *) EXSLT_REGEXP_NAMESPACE,
  302. exsltRegexpReplaceFunction);
  303. xsltRegisterExtModuleFunction ((const xmlChar *) "test",
  304. (const xmlChar *) EXSLT_REGEXP_NAMESPACE,
  305. exsltRegexpTestFunction);
  306. }