/src/modules/regexp/regexp.c
C | 248 lines | 168 code | 41 blank | 39 comment | 37 complexity | 8a763b334152d4b7cb3099ac873c692d MD5 | raw file
- #include <libfrozen.h>
- #include <regex.h>
- #include <regexp_t.h>
- #include <errors_list.c>
- /**
- * @ingroup machine
- * @addtogroup mod_machine_regexp data/regexp
- */
- /**
- * @ingroup mod_machine_regexp
- * @page page_regexp_info Description
- *
- * This module use POSIX regular expressions to match data.
- *
- * Any non-TYPE_STRINGT data converted, so it is not so fast as can be.
- */
- /**
- * @ingroup mod_machine_regexp
- * @page page_regexp_config Configuration
- *
- * Accepted configuration:
- * @code
- * {
- * class = "data/regexp",
- * regexp = "aaa.*", # regexp for matching, default ".*"
- * input = "url", # input key for string to match, default "buffer"
- * extended = (uint_t)'0', # 1 - use extended regexp, 0 - basic, default 0
- * icase = (uint_t)'0', # 1 - no case matching, 0 - case matching, default 0
- * newline = (uint_t)'0', # see "man regcomp", default 0
- * notbol = (uint_t)'0', # see "man regcomp", default 0
- * noteol = (uint_t)'0', # see "man regcomp", default 0
- * marker = (hashkey_t)'marker',# on match - pass request with this key set
- * marker_value = (uint_t)'1', # value for marker
- * capture = { # capture key names
- * key_global = (void_t)'', # - key for whole match
- * key1 = (void_t)'', # - key for first capture braces
- * key2 = (void_t)'', # - key for second capture braces
- * ....
- * }
- * }
- * @endcode
- */
- typedef struct regexp_userdata {
- char *regexp_str;
- uintmax_t cflags;
- uintmax_t eflags;
- regmatch_t *regmatch;
- hash_t *capture;
- hashkey_t input;
- hashkey_t marker;
- data_t marker_data;
- uintmax_t compiled;
- uintmax_t ncaptures;
- regex_t regex;
- } regexp_userdata;
- data_t marker_default = DATA_UINTT(1);
- static void config_updateflag(hash_t *config, hashkey_t key, uintmax_t value, uintmax_t *flag){ // {{{
- ssize_t ret;
- uintmax_t new_value;
-
- hash_data_get(ret, TYPE_UINTT, new_value, config, key);
- if(ret == 0){
- if(new_value == 0){
- *flag &= ~value;
- }else{
- *flag |= value;
- }
- }
- } // }}}
- static ssize_t config_newregexp(regexp_userdata *userdata){ // {{{
- if(regcomp(&userdata->regex, userdata->regexp_str, userdata->cflags) != 0)
- return error("invalid regexp supplied - compilation error");
-
- userdata->compiled = 1;
- return 0;
- } // }}}
- static void config_freeregexp(regexp_userdata *userdata){ // {{{
- if(userdata->compiled == 1)
- regfree(&userdata->regex);
- } // }}}
- static ssize_t config_newmarkerdata(regexp_userdata *userdata, data_t *marker_data){ // {{{
- ssize_t ret;
- holder_consume(ret, userdata->marker_data, marker_data);
- return 0;
- } // }}}
- static void config_freemarkerdata(regexp_userdata *userdata){ // {{{
- if(memcmp(&userdata->marker_data, &marker_default, sizeof(data_t)) != 0){
- data_free(&userdata->marker_data);
- }
- } // }}}
- static ssize_t regexp_init(machine_t *machine){ // {{{
- regexp_userdata *userdata;
- if((userdata = machine->userdata = calloc(1, sizeof(regexp_userdata))) == NULL)
- return error("calloc failed");
-
- userdata->input = HDK(buffer);
- userdata->marker = HDK(marker);
- userdata->marker_data = marker_default;
- userdata->regexp_str = strdup(".*");
- return 0;
- } // }}}
- static ssize_t regexp_destroy(machine_t *machine){ // {{{
- regexp_userdata *userdata = (regexp_userdata *)machine->userdata;
-
- config_freeregexp(userdata);
- config_freemarkerdata(userdata);
-
- if(userdata->regmatch)
- free(userdata->regmatch);
-
- if(userdata->regexp_str)
- free(userdata->regexp_str);
-
- hash_free(userdata->capture);
- free(userdata);
- return 0;
- } // }}}
- static ssize_t regexp_configure(machine_t *machine, hash_t *config){ // {{{
- ssize_t ret;
- data_t *marker_data;
- char *regexp_str = NULL;
- regexp_userdata *userdata = (regexp_userdata *)machine->userdata;
-
- config_updateflag(config, HDK(extended), REG_EXTENDED, &userdata->cflags);
- config_updateflag(config, HDK(icase), REG_ICASE, &userdata->cflags);
- config_updateflag(config, HDK(newline), REG_NEWLINE, &userdata->cflags);
-
- config_updateflag(config, HDK(notbol), REG_NOTBOL, &userdata->eflags);
- config_updateflag(config, HDK(noteol), REG_NOTEOL, &userdata->eflags);
-
- hash_data_consume(ret, TYPE_HASHT, userdata->capture, config, HDK(capture));
- userdata->ncaptures = hash_nelements(userdata->capture); // nelements return 0 on null hash, 1 on hash_end, 2 on element + hash_end, so on
- if(userdata->ncaptures > 1){
- userdata->ncaptures--;
- if( (userdata->regmatch = malloc(sizeof(regmatch_t) * userdata->ncaptures)) == NULL)
- return errorn(ENOMEM);
- }
-
- hash_data_get(ret, TYPE_HASHKEYT, userdata->input, config, HDK(input));
- hash_data_get(ret, TYPE_HASHKEYT, userdata->marker, config, HDK(marker));
-
- hash_data_convert(ret, TYPE_STRINGT, regexp_str, config, HDK(regexp));
- if(ret == 0){
- free(userdata->regexp_str);
- userdata->regexp_str = regexp_str;
- }
-
- config_freeregexp(userdata);
- if( (ret = config_newregexp(userdata)) != 0)
- return ret;
-
- if( (marker_data = hash_data_find(config, HDK(marker_data))) != NULL){
- config_freemarkerdata(userdata);
- if( (ret = config_newmarkerdata(userdata, marker_data)) != 0)
- return ret;
- }
- return 0;
- } // }}}
- static ssize_t regexp_matched(machine_t *machine, request_t *request, data_t *input, uintmax_t capture_id){ // {{{
- regexp_userdata *userdata = (regexp_userdata *)machine->userdata;
-
- if(capture_id == 0){
- request_t r_next[] = {
- { userdata->marker, userdata->marker_data },
- hash_inline(request),
- hash_end
- };
- return machine_pass(machine, r_next);
- }
-
- uintmax_t sl_soffset = MAX(userdata->regmatch[capture_id - 1].rm_so, 0);
- uintmax_t sl_eoffset = MAX(userdata->regmatch[capture_id - 1].rm_eo, 0);
- data_t sl_input = DATA_SLICET(input, sl_soffset, sl_eoffset - sl_soffset);
-
- request_t r_next[] = {
- { userdata->capture[userdata->ncaptures - capture_id].key, sl_input },
- hash_inline(request),
- hash_end
- };
- return regexp_matched(machine, r_next, input, capture_id - 1);
- } // }}}
- static ssize_t regexp_handler(machine_t *machine, request_t *request){ // {{{
- ssize_t ret;
- data_t *input;
- char *input_str = NULL;
- uintmax_t freeme = 0;
- regexp_userdata *userdata = (regexp_userdata *)machine->userdata;
-
- if( (input = hash_data_find(request, userdata->input)) == NULL)
- return error("no input string in request");
-
- if(input->type != TYPE_STRINGT){
- data_convert(ret, TYPE_STRINGT, input_str, input);
- if(ret != 0)
- return error("can not convert data to string");
-
- freeme = 1;
- }else{
- input_str = (char *)input->ptr;
- }
-
- ret = regexec(&userdata->regex, input_str, userdata->ncaptures, userdata->regmatch, userdata->eflags);
-
- if(freeme == 1){
- data_t d_string = DATA_PTR_STRING(input_str);
- static fastcall_free r_free = { { 2, ACTION_FREE } };
- data_query(&d_string, &r_free);
- }
-
- if(ret == 0)
- return regexp_matched(machine, request, input, userdata->ncaptures);
-
- return machine_pass(machine, request);
- } // }}}
- machine_t regexp_proto = {
- .class = "data/regexp",
- .supported_api = API_HASH,
- .func_init = ®exp_init,
- .func_configure = ®exp_configure,
- .func_destroy = ®exp_destroy,
- .machine_type_hash = {
- .func_handler = ®exp_handler
- }
- };
- int main(void){
- errors_register((err_item *)&errs_list, &emodule);
- //data_register(®exp_t_proto);
- class_register(®exp_proto);
-
- type_regexpt = datatype_t_getid_byname(REGEXPT_NAME, NULL);
- return 0;
- }