PageRenderTime 47ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/src/modules/regexp/regexp.c

https://github.com/x86-64/frozen
C | 248 lines | 168 code | 41 blank | 39 comment | 37 complexity | 8a763b334152d4b7cb3099ac873c692d MD5 | raw file
  1. #include <libfrozen.h>
  2. #include <regex.h>
  3. #include <regexp_t.h>
  4. #include <errors_list.c>
  5. /**
  6. * @ingroup machine
  7. * @addtogroup mod_machine_regexp data/regexp
  8. */
  9. /**
  10. * @ingroup mod_machine_regexp
  11. * @page page_regexp_info Description
  12. *
  13. * This module use POSIX regular expressions to match data.
  14. *
  15. * Any non-TYPE_STRINGT data converted, so it is not so fast as can be.
  16. */
  17. /**
  18. * @ingroup mod_machine_regexp
  19. * @page page_regexp_config Configuration
  20. *
  21. * Accepted configuration:
  22. * @code
  23. * {
  24. * class = "data/regexp",
  25. * regexp = "aaa.*", # regexp for matching, default ".*"
  26. * input = "url", # input key for string to match, default "buffer"
  27. * extended = (uint_t)'0', # 1 - use extended regexp, 0 - basic, default 0
  28. * icase = (uint_t)'0', # 1 - no case matching, 0 - case matching, default 0
  29. * newline = (uint_t)'0', # see "man regcomp", default 0
  30. * notbol = (uint_t)'0', # see "man regcomp", default 0
  31. * noteol = (uint_t)'0', # see "man regcomp", default 0
  32. * marker = (hashkey_t)'marker',# on match - pass request with this key set
  33. * marker_value = (uint_t)'1', # value for marker
  34. * capture = { # capture key names
  35. * key_global = (void_t)'', # - key for whole match
  36. * key1 = (void_t)'', # - key for first capture braces
  37. * key2 = (void_t)'', # - key for second capture braces
  38. * ....
  39. * }
  40. * }
  41. * @endcode
  42. */
  43. typedef struct regexp_userdata {
  44. char *regexp_str;
  45. uintmax_t cflags;
  46. uintmax_t eflags;
  47. regmatch_t *regmatch;
  48. hash_t *capture;
  49. hashkey_t input;
  50. hashkey_t marker;
  51. data_t marker_data;
  52. uintmax_t compiled;
  53. uintmax_t ncaptures;
  54. regex_t regex;
  55. } regexp_userdata;
  56. data_t marker_default = DATA_UINTT(1);
  57. static void config_updateflag(hash_t *config, hashkey_t key, uintmax_t value, uintmax_t *flag){ // {{{
  58. ssize_t ret;
  59. uintmax_t new_value;
  60. hash_data_get(ret, TYPE_UINTT, new_value, config, key);
  61. if(ret == 0){
  62. if(new_value == 0){
  63. *flag &= ~value;
  64. }else{
  65. *flag |= value;
  66. }
  67. }
  68. } // }}}
  69. static ssize_t config_newregexp(regexp_userdata *userdata){ // {{{
  70. if(regcomp(&userdata->regex, userdata->regexp_str, userdata->cflags) != 0)
  71. return error("invalid regexp supplied - compilation error");
  72. userdata->compiled = 1;
  73. return 0;
  74. } // }}}
  75. static void config_freeregexp(regexp_userdata *userdata){ // {{{
  76. if(userdata->compiled == 1)
  77. regfree(&userdata->regex);
  78. } // }}}
  79. static ssize_t config_newmarkerdata(regexp_userdata *userdata, data_t *marker_data){ // {{{
  80. ssize_t ret;
  81. holder_consume(ret, userdata->marker_data, marker_data);
  82. return 0;
  83. } // }}}
  84. static void config_freemarkerdata(regexp_userdata *userdata){ // {{{
  85. if(memcmp(&userdata->marker_data, &marker_default, sizeof(data_t)) != 0){
  86. data_free(&userdata->marker_data);
  87. }
  88. } // }}}
  89. static ssize_t regexp_init(machine_t *machine){ // {{{
  90. regexp_userdata *userdata;
  91. if((userdata = machine->userdata = calloc(1, sizeof(regexp_userdata))) == NULL)
  92. return error("calloc failed");
  93. userdata->input = HDK(buffer);
  94. userdata->marker = HDK(marker);
  95. userdata->marker_data = marker_default;
  96. userdata->regexp_str = strdup(".*");
  97. return 0;
  98. } // }}}
  99. static ssize_t regexp_destroy(machine_t *machine){ // {{{
  100. regexp_userdata *userdata = (regexp_userdata *)machine->userdata;
  101. config_freeregexp(userdata);
  102. config_freemarkerdata(userdata);
  103. if(userdata->regmatch)
  104. free(userdata->regmatch);
  105. if(userdata->regexp_str)
  106. free(userdata->regexp_str);
  107. hash_free(userdata->capture);
  108. free(userdata);
  109. return 0;
  110. } // }}}
  111. static ssize_t regexp_configure(machine_t *machine, hash_t *config){ // {{{
  112. ssize_t ret;
  113. data_t *marker_data;
  114. char *regexp_str = NULL;
  115. regexp_userdata *userdata = (regexp_userdata *)machine->userdata;
  116. config_updateflag(config, HDK(extended), REG_EXTENDED, &userdata->cflags);
  117. config_updateflag(config, HDK(icase), REG_ICASE, &userdata->cflags);
  118. config_updateflag(config, HDK(newline), REG_NEWLINE, &userdata->cflags);
  119. config_updateflag(config, HDK(notbol), REG_NOTBOL, &userdata->eflags);
  120. config_updateflag(config, HDK(noteol), REG_NOTEOL, &userdata->eflags);
  121. hash_data_consume(ret, TYPE_HASHT, userdata->capture, config, HDK(capture));
  122. userdata->ncaptures = hash_nelements(userdata->capture); // nelements return 0 on null hash, 1 on hash_end, 2 on element + hash_end, so on
  123. if(userdata->ncaptures > 1){
  124. userdata->ncaptures--;
  125. if( (userdata->regmatch = malloc(sizeof(regmatch_t) * userdata->ncaptures)) == NULL)
  126. return errorn(ENOMEM);
  127. }
  128. hash_data_get(ret, TYPE_HASHKEYT, userdata->input, config, HDK(input));
  129. hash_data_get(ret, TYPE_HASHKEYT, userdata->marker, config, HDK(marker));
  130. hash_data_convert(ret, TYPE_STRINGT, regexp_str, config, HDK(regexp));
  131. if(ret == 0){
  132. free(userdata->regexp_str);
  133. userdata->regexp_str = regexp_str;
  134. }
  135. config_freeregexp(userdata);
  136. if( (ret = config_newregexp(userdata)) != 0)
  137. return ret;
  138. if( (marker_data = hash_data_find(config, HDK(marker_data))) != NULL){
  139. config_freemarkerdata(userdata);
  140. if( (ret = config_newmarkerdata(userdata, marker_data)) != 0)
  141. return ret;
  142. }
  143. return 0;
  144. } // }}}
  145. static ssize_t regexp_matched(machine_t *machine, request_t *request, data_t *input, uintmax_t capture_id){ // {{{
  146. regexp_userdata *userdata = (regexp_userdata *)machine->userdata;
  147. if(capture_id == 0){
  148. request_t r_next[] = {
  149. { userdata->marker, userdata->marker_data },
  150. hash_inline(request),
  151. hash_end
  152. };
  153. return machine_pass(machine, r_next);
  154. }
  155. uintmax_t sl_soffset = MAX(userdata->regmatch[capture_id - 1].rm_so, 0);
  156. uintmax_t sl_eoffset = MAX(userdata->regmatch[capture_id - 1].rm_eo, 0);
  157. data_t sl_input = DATA_SLICET(input, sl_soffset, sl_eoffset - sl_soffset);
  158. request_t r_next[] = {
  159. { userdata->capture[userdata->ncaptures - capture_id].key, sl_input },
  160. hash_inline(request),
  161. hash_end
  162. };
  163. return regexp_matched(machine, r_next, input, capture_id - 1);
  164. } // }}}
  165. static ssize_t regexp_handler(machine_t *machine, request_t *request){ // {{{
  166. ssize_t ret;
  167. data_t *input;
  168. char *input_str = NULL;
  169. uintmax_t freeme = 0;
  170. regexp_userdata *userdata = (regexp_userdata *)machine->userdata;
  171. if( (input = hash_data_find(request, userdata->input)) == NULL)
  172. return error("no input string in request");
  173. if(input->type != TYPE_STRINGT){
  174. data_convert(ret, TYPE_STRINGT, input_str, input);
  175. if(ret != 0)
  176. return error("can not convert data to string");
  177. freeme = 1;
  178. }else{
  179. input_str = (char *)input->ptr;
  180. }
  181. ret = regexec(&userdata->regex, input_str, userdata->ncaptures, userdata->regmatch, userdata->eflags);
  182. if(freeme == 1){
  183. data_t d_string = DATA_PTR_STRING(input_str);
  184. static fastcall_free r_free = { { 2, ACTION_FREE } };
  185. data_query(&d_string, &r_free);
  186. }
  187. if(ret == 0)
  188. return regexp_matched(machine, request, input, userdata->ncaptures);
  189. return machine_pass(machine, request);
  190. } // }}}
  191. machine_t regexp_proto = {
  192. .class = "data/regexp",
  193. .supported_api = API_HASH,
  194. .func_init = &regexp_init,
  195. .func_configure = &regexp_configure,
  196. .func_destroy = &regexp_destroy,
  197. .machine_type_hash = {
  198. .func_handler = &regexp_handler
  199. }
  200. };
  201. int main(void){
  202. errors_register((err_item *)&errs_list, &emodule);
  203. //data_register(&regexp_t_proto);
  204. class_register(&regexp_proto);
  205. type_regexpt = datatype_t_getid_byname(REGEXPT_NAME, NULL);
  206. return 0;
  207. }