/regcomp.c
C | 6314 lines | 5385 code | 864 blank | 65 comment | 1386 complexity | 7830f555aaa385edcde24e93f132e8e9 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause
Large files files are truncated, but you can click here to view the full file
- /**********************************************************************
- regcomp.c - Oniguruma (regular expression library)
- **********************************************************************/
- /*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- #include "regparse.h"
- OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
- extern OnigCaseFoldType
- onig_get_default_case_fold_flag(void)
- {
- return OnigDefaultCaseFoldFlag;
- }
- extern int
- onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
- {
- OnigDefaultCaseFoldFlag = case_fold_flag;
- return 0;
- }
- #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
- static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
- #endif
- static UChar*
- str_dup(UChar* s, UChar* end)
- {
- ptrdiff_t len = end - s;
- if (len > 0) {
- UChar* r = (UChar* )xmalloc(len + 1);
- CHECK_NULL_RETURN(r);
- xmemcpy(r, s, len);
- r[len] = (UChar )0;
- return r;
- }
- else return NULL;
- }
- static void
- swap_node(Node* a, Node* b)
- {
- Node c;
- c = *a; *a = *b; *b = c;
- if (NTYPE(a) == NT_STR) {
- StrNode* sn = NSTR(a);
- if (sn->capa == 0) {
- size_t len = sn->end - sn->s;
- sn->s = sn->buf;
- sn->end = sn->s + len;
- }
- }
- if (NTYPE(b) == NT_STR) {
- StrNode* sn = NSTR(b);
- if (sn->capa == 0) {
- size_t len = sn->end - sn->s;
- sn->s = sn->buf;
- sn->end = sn->s + len;
- }
- }
- }
- static OnigDistance
- distance_add(OnigDistance d1, OnigDistance d2)
- {
- if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
- return ONIG_INFINITE_DISTANCE;
- else {
- if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
- else return ONIG_INFINITE_DISTANCE;
- }
- }
- static OnigDistance
- distance_multiply(OnigDistance d, int m)
- {
- if (m == 0) return 0;
- if (d < ONIG_INFINITE_DISTANCE / m)
- return d * m;
- else
- return ONIG_INFINITE_DISTANCE;
- }
- static int
- bitset_is_empty(BitSetRef bs)
- {
- int i;
- for (i = 0; i < (int )BITSET_SIZE; i++) {
- if (bs[i] != 0) return 0;
- }
- return 1;
- }
- #ifdef ONIG_DEBUG
- static int
- bitset_on_num(BitSetRef bs)
- {
- int i, n;
- n = 0;
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (BITSET_AT(bs, i)) n++;
- }
- return n;
- }
- #endif
- extern int
- onig_bbuf_init(BBuf* buf, int size)
- {
- if (size <= 0) {
- size = 0;
- buf->p = NULL;
- }
- else {
- buf->p = (UChar* )xmalloc(size);
- if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
- }
- buf->alloc = size;
- buf->used = 0;
- return 0;
- }
- #ifdef USE_SUBEXP_CALL
- static int
- unset_addr_list_init(UnsetAddrList* uslist, int size)
- {
- UnsetAddr* p;
- p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
- CHECK_NULL_RETURN_MEMERR(p);
- uslist->num = 0;
- uslist->alloc = size;
- uslist->us = p;
- return 0;
- }
- static void
- unset_addr_list_end(UnsetAddrList* uslist)
- {
- if (IS_NOT_NULL(uslist->us))
- xfree(uslist->us);
- }
- static int
- unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
- {
- UnsetAddr* p;
- int size;
- if (uslist->num >= uslist->alloc) {
- size = uslist->alloc * 2;
- p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
- CHECK_NULL_RETURN_MEMERR(p);
- uslist->alloc = size;
- uslist->us = p;
- }
- uslist->us[uslist->num].offset = offset;
- uslist->us[uslist->num].target = node;
- uslist->num++;
- return 0;
- }
- #endif /* USE_SUBEXP_CALL */
- static int
- add_opcode(regex_t* reg, int opcode)
- {
- BBUF_ADD1(reg, opcode);
- return 0;
- }
- #ifdef USE_COMBINATION_EXPLOSION_CHECK
- static int
- add_state_check_num(regex_t* reg, int num)
- {
- StateCheckNumType n = (StateCheckNumType )num;
- BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
- return 0;
- }
- #endif
- static int
- add_rel_addr(regex_t* reg, int addr)
- {
- RelAddrType ra = (RelAddrType )addr;
- BBUF_ADD(reg, &ra, SIZE_RELADDR);
- return 0;
- }
- static int
- add_abs_addr(regex_t* reg, int addr)
- {
- AbsAddrType ra = (AbsAddrType )addr;
- BBUF_ADD(reg, &ra, SIZE_ABSADDR);
- return 0;
- }
- static int
- add_length(regex_t* reg, int len)
- {
- LengthType l = (LengthType )len;
- BBUF_ADD(reg, &l, SIZE_LENGTH);
- return 0;
- }
- static int
- add_mem_num(regex_t* reg, int num)
- {
- MemNumType n = (MemNumType )num;
- BBUF_ADD(reg, &n, SIZE_MEMNUM);
- return 0;
- }
- static int
- add_pointer(regex_t* reg, void* addr)
- {
- PointerType ptr = (PointerType )addr;
- BBUF_ADD(reg, &ptr, SIZE_POINTER);
- return 0;
- }
- static int
- add_option(regex_t* reg, OnigOptionType option)
- {
- BBUF_ADD(reg, &option, SIZE_OPTION);
- return 0;
- }
- static int
- add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
- {
- int r;
- r = add_opcode(reg, opcode);
- if (r) return r;
- r = add_rel_addr(reg, addr);
- return r;
- }
- static int
- add_bytes(regex_t* reg, UChar* bytes, int len)
- {
- BBUF_ADD(reg, bytes, len);
- return 0;
- }
- static int
- add_bitset(regex_t* reg, BitSetRef bs)
- {
- BBUF_ADD(reg, bs, SIZE_BITSET);
- return 0;
- }
- static int
- add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
- {
- int r;
- r = add_opcode(reg, opcode);
- if (r) return r;
- r = add_option(reg, option);
- return r;
- }
- static int compile_length_tree(Node* node, regex_t* reg);
- static int compile_tree(Node* node, regex_t* reg);
- #define IS_NEED_STR_LEN_OP_EXACT(op) \
- ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
- (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
- static int
- select_str_opcode(int mb_len, int str_len, int ignore_case)
- {
- int op;
- if (ignore_case) {
- switch (str_len) {
- case 1: op = OP_EXACT1_IC; break;
- default: op = OP_EXACTN_IC; break;
- }
- }
- else {
- switch (mb_len) {
- case 1:
- switch (str_len) {
- case 1: op = OP_EXACT1; break;
- case 2: op = OP_EXACT2; break;
- case 3: op = OP_EXACT3; break;
- case 4: op = OP_EXACT4; break;
- case 5: op = OP_EXACT5; break;
- default: op = OP_EXACTN; break;
- }
- break;
- case 2:
- switch (str_len) {
- case 1: op = OP_EXACTMB2N1; break;
- case 2: op = OP_EXACTMB2N2; break;
- case 3: op = OP_EXACTMB2N3; break;
- default: op = OP_EXACTMB2N; break;
- }
- break;
- case 3:
- op = OP_EXACTMB3N;
- break;
- default:
- op = OP_EXACTMBN;
- break;
- }
- }
- return op;
- }
- static int
- compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
- {
- int r;
- int saved_num_null_check = reg->num_null_check;
- if (empty_info != 0) {
- r = add_opcode(reg, OP_NULL_CHECK_START);
- if (r) return r;
- r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
- if (r) return r;
- reg->num_null_check++;
- }
- r = compile_tree(node, reg);
- if (r) return r;
- if (empty_info != 0) {
- if (empty_info == NQ_TARGET_IS_EMPTY)
- r = add_opcode(reg, OP_NULL_CHECK_END);
- else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
- r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
- else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
- r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
- if (r) return r;
- r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
- }
- return r;
- }
- #ifdef USE_SUBEXP_CALL
- static int
- compile_call(CallNode* node, regex_t* reg)
- {
- int r;
- r = add_opcode(reg, OP_CALL);
- if (r) return r;
- r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
- node->target);
- if (r) return r;
- r = add_abs_addr(reg, 0 /*dummy addr.*/);
- return r;
- }
- #endif
- static int
- compile_tree_n_times(Node* node, int n, regex_t* reg)
- {
- int i, r;
- for (i = 0; i < n; i++) {
- r = compile_tree(node, reg);
- if (r) return r;
- }
- return 0;
- }
- static int
- add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
- regex_t* reg ARG_UNUSED, int ignore_case)
- {
- int len;
- int op = select_str_opcode(mb_len, str_len, ignore_case);
- len = SIZE_OPCODE;
- if (op == OP_EXACTMBN) len += SIZE_LENGTH;
- if (IS_NEED_STR_LEN_OP_EXACT(op))
- len += SIZE_LENGTH;
- len += mb_len * str_len;
- return len;
- }
- static int
- add_compile_string(UChar* s, int mb_len, int str_len,
- regex_t* reg, int ignore_case)
- {
- int op = select_str_opcode(mb_len, str_len, ignore_case);
- add_opcode(reg, op);
- if (op == OP_EXACTMBN)
- add_length(reg, mb_len);
- if (IS_NEED_STR_LEN_OP_EXACT(op)) {
- if (op == OP_EXACTN_IC)
- add_length(reg, mb_len * str_len);
- else
- add_length(reg, str_len);
- }
- add_bytes(reg, s, mb_len * str_len);
- return 0;
- }
- static int
- compile_length_string_node(Node* node, regex_t* reg)
- {
- int rlen, r, len, prev_len, slen, ambig;
- OnigEncoding enc = reg->enc;
- UChar *p, *prev;
- StrNode* sn;
- sn = NSTR(node);
- if (sn->end <= sn->s)
- return 0;
- ambig = NSTRING_IS_AMBIG(node);
- p = prev = sn->s;
- prev_len = enclen(enc, p, sn->end);
- p += prev_len;
- slen = 1;
- rlen = 0;
- for (; p < sn->end; ) {
- len = enclen(enc, p, sn->end);
- if (len == prev_len) {
- slen++;
- }
- else {
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
- rlen += r;
- prev = p;
- slen = 1;
- prev_len = len;
- }
- p += len;
- }
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
- rlen += r;
- return rlen;
- }
- static int
- compile_length_string_raw_node(StrNode* sn, regex_t* reg)
- {
- if (sn->end <= sn->s)
- return 0;
- return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
- }
- static int
- compile_string_node(Node* node, regex_t* reg)
- {
- int r, len, prev_len, slen, ambig;
- OnigEncoding enc = reg->enc;
- UChar *p, *prev, *end;
- StrNode* sn;
- sn = NSTR(node);
- if (sn->end <= sn->s)
- return 0;
- end = sn->end;
- ambig = NSTRING_IS_AMBIG(node);
- p = prev = sn->s;
- prev_len = enclen(enc, p, end);
- p += prev_len;
- slen = 1;
- for (; p < end; ) {
- len = enclen(enc, p, end);
- if (len == prev_len) {
- slen++;
- }
- else {
- r = add_compile_string(prev, prev_len, slen, reg, ambig);
- if (r) return r;
- prev = p;
- slen = 1;
- prev_len = len;
- }
- p += len;
- }
- return add_compile_string(prev, prev_len, slen, reg, ambig);
- }
- static int
- compile_string_raw_node(StrNode* sn, regex_t* reg)
- {
- if (sn->end <= sn->s)
- return 0;
- return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
- }
- static int
- add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
- {
- #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- add_length(reg, mbuf->used);
- return add_bytes(reg, mbuf->p, mbuf->used);
- #else
- int r, pad_size;
- UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
- GET_ALIGNMENT_PAD_SIZE(p, pad_size);
- add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
- if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
- r = add_bytes(reg, mbuf->p, mbuf->used);
- /* padding for return value from compile_length_cclass_node() to be fix. */
- pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
- if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
- return r;
- #endif
- }
- static int
- compile_length_cclass_node(CClassNode* cc, regex_t* reg)
- {
- int len;
- if (IS_NCCLASS_SHARE(cc)) {
- len = SIZE_OPCODE + SIZE_POINTER;
- return len;
- }
- if (IS_NULL(cc->mbuf)) {
- len = SIZE_OPCODE + SIZE_BITSET;
- }
- else {
- if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
- len = SIZE_OPCODE;
- }
- else {
- len = SIZE_OPCODE + SIZE_BITSET;
- }
- #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- len += SIZE_LENGTH + cc->mbuf->used;
- #else
- len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
- #endif
- }
- return len;
- }
- static int
- compile_cclass_node(CClassNode* cc, regex_t* reg)
- {
- int r;
- if (IS_NCCLASS_SHARE(cc)) {
- add_opcode(reg, OP_CCLASS_NODE);
- r = add_pointer(reg, cc);
- return r;
- }
- if (IS_NULL(cc->mbuf)) {
- if (IS_NCCLASS_NOT(cc))
- add_opcode(reg, OP_CCLASS_NOT);
- else
- add_opcode(reg, OP_CCLASS);
- r = add_bitset(reg, cc->bs);
- }
- else {
- if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
- if (IS_NCCLASS_NOT(cc))
- add_opcode(reg, OP_CCLASS_MB_NOT);
- else
- add_opcode(reg, OP_CCLASS_MB);
- r = add_multi_byte_cclass(cc->mbuf, reg);
- }
- else {
- if (IS_NCCLASS_NOT(cc))
- add_opcode(reg, OP_CCLASS_MIX_NOT);
- else
- add_opcode(reg, OP_CCLASS_MIX);
- r = add_bitset(reg, cc->bs);
- if (r) return r;
- r = add_multi_byte_cclass(cc->mbuf, reg);
- }
- }
- return r;
- }
- static int
- entry_repeat_range(regex_t* reg, int id, int lower, int upper)
- {
- #define REPEAT_RANGE_ALLOC 4
- OnigRepeatRange* p;
- if (reg->repeat_range_alloc == 0) {
- p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
- CHECK_NULL_RETURN_MEMERR(p);
- reg->repeat_range = p;
- reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
- }
- else if (reg->repeat_range_alloc <= id) {
- int n;
- n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
- p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
- sizeof(OnigRepeatRange) * n);
- CHECK_NULL_RETURN_MEMERR(p);
- reg->repeat_range = p;
- reg->repeat_range_alloc = n;
- }
- else {
- p = reg->repeat_range;
- }
- p[id].lower = lower;
- p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
- return 0;
- }
- static int
- compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
- regex_t* reg)
- {
- int r;
- int num_repeat = reg->num_repeat;
- r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
- if (r) return r;
- r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
- reg->num_repeat++;
- if (r) return r;
- r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
- if (r) return r;
- r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
- if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- if (
- #ifdef USE_SUBEXP_CALL
- reg->num_call > 0 ||
- #endif
- IS_QUANTIFIER_IN_REPEAT(qn)) {
- r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
- }
- else {
- r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
- }
- if (r) return r;
- r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
- return r;
- }
- static int
- is_anychar_star_quantifier(QtfrNode* qn)
- {
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
- NTYPE(qn->target) == NT_CANY)
- return 1;
- else
- return 0;
- }
- #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
- #define CKN_ON (ckn > 0)
- #ifdef USE_COMBINATION_EXPLOSION_CHECK
- static int
- compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
- {
- int len, mod_tlen, cklen;
- int ckn;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->target_empty_info;
- int tlen = compile_length_tree(qn->target, reg);
- if (tlen < 0) return tlen;
- ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
- cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
- /* anychar repeat */
- if (NTYPE(qn->target) == NT_CANY) {
- if (qn->greedy && infinite) {
- if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
- else
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
- }
- }
- if (empty_info != 0)
- mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
- else
- mod_tlen = tlen;
- if (infinite && qn->lower <= 1) {
- if (qn->greedy) {
- if (qn->lower == 1)
- len = SIZE_OP_JUMP;
- else
- len = 0;
- len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
- }
- else {
- if (qn->lower == 0)
- len = SIZE_OP_JUMP;
- else
- len = 0;
- len += mod_tlen + SIZE_OP_PUSH + cklen;
- }
- }
- else if (qn->upper == 0) {
- if (qn->is_refered != 0) /* /(?<n>..){0}/ */
- len = SIZE_OP_JUMP + tlen;
- else
- len = 0;
- }
- else if (qn->upper == 1 && qn->greedy) {
- if (qn->lower == 0) {
- if (CKN_ON) {
- len = SIZE_OP_STATE_CHECK_PUSH + tlen;
- }
- else {
- len = SIZE_OP_PUSH + tlen;
- }
- }
- else {
- len = tlen;
- }
- }
- else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
- }
- else {
- len = SIZE_OP_REPEAT_INC
- + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
- if (CKN_ON)
- len += SIZE_OP_STATE_CHECK;
- }
- return len;
- }
- static int
- compile_quantifier_node(QtfrNode* qn, regex_t* reg)
- {
- int r, mod_tlen;
- int ckn;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->target_empty_info;
- int tlen = compile_length_tree(qn->target, reg);
- if (tlen < 0) return tlen;
- ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
- if (is_anychar_star_quantifier(qn)) {
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
- if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
- if (IS_MULTILINE(reg->options))
- r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
- else
- r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
- if (r) return r;
- if (CKN_ON) {
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- }
- return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
- }
- else {
- if (IS_MULTILINE(reg->options)) {
- r = add_opcode(reg, (CKN_ON ?
- OP_STATE_CHECK_ANYCHAR_ML_STAR
- : OP_ANYCHAR_ML_STAR));
- }
- else {
- r = add_opcode(reg, (CKN_ON ?
- OP_STATE_CHECK_ANYCHAR_STAR
- : OP_ANYCHAR_STAR));
- }
- if (r) return r;
- if (CKN_ON)
- r = add_state_check_num(reg, ckn);
- return r;
- }
- }
- if (empty_info != 0)
- mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
- else
- mod_tlen = tlen;
- if (infinite && qn->lower <= 1) {
- if (qn->greedy) {
- if (qn->lower == 1) {
- r = add_opcode_rel_addr(reg, OP_JUMP,
- (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
- if (r) return r;
- }
- if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
- }
- else {
- r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
- }
- if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP
- + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
- }
- else {
- if (qn->lower == 0) {
- r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
- if (r) return r;
- }
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- r = add_rel_addr(reg,
- -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
- }
- else
- r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
- }
- }
- else if (qn->upper == 0) {
- if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- }
- else
- r = 0;
- }
- else if (qn->upper == 1 && qn->greedy) {
- if (qn->lower == 0) {
- if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- r = add_rel_addr(reg, tlen);
- }
- else {
- r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
- }
- if (r) return r;
- }
- r = compile_tree(qn->target, reg);
- }
- else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- r = add_rel_addr(reg, SIZE_OP_JUMP);
- }
- else {
- r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
- }
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- }
- else {
- r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
- if (CKN_ON) {
- if (r) return r;
- r = add_opcode(reg, OP_STATE_CHECK);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- }
- }
- return r;
- }
- #else /* USE_COMBINATION_EXPLOSION_CHECK */
- static int
- compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
- {
- int len, mod_tlen;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->target_empty_info;
- int tlen = compile_length_tree(qn->target, reg);
- if (tlen < 0) return tlen;
- /* anychar repeat */
- if (NTYPE(qn->target) == NT_CANY) {
- if (qn->greedy && infinite) {
- if (IS_NOT_NULL(qn->next_head_exact))
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
- else
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
- }
- }
- if (empty_info != 0)
- mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
- else
- mod_tlen = tlen;
- if (infinite &&
- (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
- if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
- len = SIZE_OP_JUMP;
- }
- else {
- len = tlen * qn->lower;
- }
- if (qn->greedy) {
- if (IS_NOT_NULL(qn->head_exact))
- len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
- else if (IS_NOT_NULL(qn->next_head_exact))
- len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
- else
- len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
- }
- else
- len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
- }
- else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
- len = SIZE_OP_JUMP + tlen;
- }
- else if (!infinite && qn->greedy &&
- (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
- <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
- len = tlen * qn->lower;
- len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
- }
- else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
- }
- else {
- len = SIZE_OP_REPEAT_INC
- + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
- }
- return len;
- }
- static int
- compile_quantifier_node(QtfrNode* qn, regex_t* reg)
- {
- int i, r, mod_tlen;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->target_empty_info;
- int tlen = compile_length_tree(qn->target, reg);
- if (tlen < 0) return tlen;
- if (is_anychar_star_quantifier(qn)) {
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
- if (IS_NOT_NULL(qn->next_head_exact)) {
- if (IS_MULTILINE(reg->options))
- r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
- else
- r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
- if (r) return r;
- return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
- }
- else {
- if (IS_MULTILINE(reg->options))
- return add_opcode(reg, OP_ANYCHAR_ML_STAR);
- else
- return add_opcode(reg, OP_ANYCHAR_STAR);
- }
- }
- if (empty_info != 0)
- mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
- else
- mod_tlen = tlen;
- if (infinite &&
- (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
- if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
- if (qn->greedy) {
- if (IS_NOT_NULL(qn->head_exact))
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
- else if (IS_NOT_NULL(qn->next_head_exact))
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
- else
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
- }
- else {
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
- }
- if (r) return r;
- }
- else {
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
- }
- if (qn->greedy) {
- if (IS_NOT_NULL(qn->head_exact)) {
- r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
- mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- add_bytes(reg, NSTR(qn->head_exact)->s, 1);
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
- }
- else if (IS_NOT_NULL(qn->next_head_exact)) {
- r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
- mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
- }
- else {
- r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
- }
- }
- else {
- r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
- if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
- }
- }
- else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- }
- else if (!infinite && qn->greedy &&
- (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
- <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
- int n = qn->upper - qn->lower;
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
- for (i = 0; i < n; i++) {
- r = add_opcode_rel_addr(reg, OP_PUSH,
- (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- if (r) return r;
- }
- }
- else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- }
- else {
- r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
- }
- return r;
- }
- #endif /* USE_COMBINATION_EXPLOSION_CHECK */
- static int
- compile_length_option_node(EncloseNode* node, regex_t* reg)
- {
- int tlen;
- OnigOptionType prev = reg->options;
- reg->options = node->option;
- tlen = compile_length_tree(node->target, reg);
- reg->options = prev;
- if (tlen < 0) return tlen;
- if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
- return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
- + tlen + SIZE_OP_SET_OPTION;
- }
- else
- return tlen;
- }
- static int
- compile_option_node(EncloseNode* node, regex_t* reg)
- {
- int r;
- OnigOptionType prev = reg->options;
- if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
- r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
- if (r) return r;
- r = add_opcode_option(reg, OP_SET_OPTION, prev);
- if (r) return r;
- r = add_opcode(reg, OP_FAIL);
- if (r) return r;
- }
- reg->options = node->option;
- r = compile_tree(node->target, reg);
- reg->options = prev;
- if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
- if (r) return r;
- r = add_opcode_option(reg, OP_SET_OPTION, prev);
- }
- return r;
- }
- static int
- compile_length_enclose_node(EncloseNode* node, regex_t* reg)
- {
- int len;
- int tlen;
- if (node->type == ENCLOSE_OPTION)
- return compile_length_option_node(node, reg);
- if (node->target) {
- tlen = compile_length_tree(node->target, reg);
- if (tlen < 0) return tlen;
- }
- else
- tlen = 0;
- switch (node->type) {
- case ENCLOSE_MEMORY:
- #ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CALLED(node)) {
- len = SIZE_OP_MEMORY_START_PUSH + tlen
- + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
- else
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
- }
- else
- #endif
- {
- if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
- len = SIZE_OP_MEMORY_START_PUSH;
- else
- len = SIZE_OP_MEMORY_START;
- len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
- ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
- }
- break;
- case ENCLOSE_STOP_BACKTRACK:
- if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
- QtfrNode* qn = NQTFR(node->target);
- tlen = compile_length_tree(qn->target, reg);
- if (tlen < 0) return tlen;
- len = tlen * qn->lower
- + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
- }
- else {
- len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
- }
- break;
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
- return len;
- }
- static int get_char_length_tree(Node* node, regex_t* reg, int* len);
- static int
- compile_enclose_node(EncloseNode* node, regex_t* reg)
- {
- int r, len;
- if (node->type == ENCLOSE_OPTION)
- return compile_option_node(node, reg);
- switch (node->type) {
- case ENCLOSE_MEMORY:
- #ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CALLED(node)) {
- r = add_opcode(reg, OP_CALL);
- if (r) return r;
- node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
- node->state |= NST_ADDR_FIXED;
- r = add_abs_addr(reg, (int )node->call_addr);
- if (r) return r;
- len = compile_length_tree(node->target, reg);
- len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
- else
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
- r = add_opcode_rel_addr(reg, OP_JUMP, len);
- if (r) return r;
- }
- #endif
- if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
- r = add_opcode(reg, OP_MEMORY_START_PUSH);
- else
- r = add_opcode(reg, OP_MEMORY_START);
- if (r) return r;
- r = add_mem_num(reg, node->regnum);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
- #ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CALLED(node)) {
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
- ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
- else
- r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
- ? OP_MEMORY_END_REC : OP_MEMORY_END));
- if (r) return r;
- r = add_mem_num(reg, node->regnum);
- if (r) return r;
- r = add_opcode(reg, OP_RETURN);
- }
- else
- #endif
- {
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- r = add_opcode(reg, OP_MEMORY_END_PUSH);
- else
- r = add_opcode(reg, OP_MEMORY_END);
- if (r) return r;
- r = add_mem_num(reg, node->regnum);
- }
- break;
- case ENCLOSE_STOP_BACKTRACK:
- if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
- QtfrNode* qn = NQTFR(node->target);
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
- len = compile_length_tree(qn->target, reg);
- if (len < 0) return len;
- r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- if (r) return r;
- r = add_opcode(reg, OP_POP);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
- }
- else {
- r = add_opcode(reg, OP_PUSH_STOP_BT);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
- r = add_opcode(reg, OP_POP_STOP_BT);
- }
- break;
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
- return r;
- }
- static int
- compile_length_anchor_node(AnchorNode* node, regex_t* reg)
- {
- int len;
- int tlen = 0;
- if (node->target) {
- tlen = compile_length_tree(node->target, reg);
- if (tlen < 0) return tlen;
- }
- switch (node->type) {
- case ANCHOR_PREC_READ:
- len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
- break;
- case ANCHOR_PREC_READ_NOT:
- len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
- break;
- case ANCHOR_LOOK_BEHIND:
- len = SIZE_OP_LOOK_BEHIND + tlen;
- break;
- case ANCHOR_LOOK_BEHIND_NOT:
- len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
- break;
- default:
- len = SIZE_OPCODE;
- break;
- }
- return len;
- }
- static int
- compile_anchor_node(AnchorNode* node, regex_t* reg)
- {
- int r, len;
- switch (node->type) {
- case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
- case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
- case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
- case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
- case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
- case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
- case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
- case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
- #ifdef USE_WORD_BEGIN_END
- case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
- case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
- #endif
- case ANCHOR_PREC_READ:
- r = add_opcode(reg, OP_PUSH_POS);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
- r = add_opcode(reg, OP_POP_POS);
- break;
- case ANCHOR_PREC_READ_NOT:
- len = compile_length_tree(node->target, reg);
- if (len < 0) return len;
- r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
- r = add_opcode(reg, OP_FAIL_POS);
- break;
- case ANCHOR_LOOK_BEHIND:
- {
- int n;
- r = add_opcode(reg, OP_LOOK_BEHIND);
- if (r) return r;
- if (node->char_len < 0) {
- r = get_char_length_tree(node->target, reg, &n);
- if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- }
- else
- n = node->char_len;
- r = add_length(reg, n);
- if (r) return r;
- r = compile_tree(node->target, reg);
- }
- break;
- case ANCHOR_LOOK_BEHIND_NOT:
- {
- int n;
- len = compile_length_tree(node->target, reg);
- r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
- len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
- if (r) return r;
- if (node->char_len < 0) {
- r = get_char_length_tree(node->target, reg, &n);
- if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- }
- else
- n = node->char_len;
- r = add_length(reg, n);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
- r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
- }
- break;
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
- return r;
- }
- static int
- compile_length_tree(Node* node, regex_t* reg)
- {
- int len, type, r;
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- len = 0;
- do {
- r = compile_length_tree(NCAR(node), reg);
- if (r < 0) return r;
- len += r;
- } while (IS_NOT_NULL(node = NCDR(node)));
- r = len;
- break;
- case NT_ALT:
- {
- int n;
- n = r = 0;
- do {
- r += compile_length_tree(NCAR(node), reg);
- n++;
- } while (IS_NOT_NULL(node = NCDR(node)));
- r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
- }
- break;
- case NT_STR:
- if (NSTRING_IS_RAW(node))
- r = compile_length_string_raw_node(NSTR(node), reg);
- else
- r = compile_length_string_node(node, reg);
- break;
- case NT_CCLASS:
- r = compile_length_cclass_node(NCCLASS(node), reg);
- break;
- case NT_CTYPE:
- case NT_CANY:
- r = SIZE_OPCODE;
- break;
- case NT_BREF:
- {
- BRefNode* br = NBREF(node);
- #ifdef USE_BACKREF_WITH_LEVEL
- if (IS_BACKREF_NEST_LEVEL(br)) {
- r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
- SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
- }
- else
- #endif
- if (br->back_num == 1) {
- r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
- ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
- }
- else {
- r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
- }
- }
- break;
- #ifdef USE_SUBEXP_CALL
- case NT_CALL:
- r = SIZE_OP_CALL;
- break;
- #endif
- case NT_QTFR:
- r = compile_length_quantifier_node(NQTFR(node), reg);
- break;
- case NT_ENCLOSE:
- r = compile_length_enclose_node(NENCLOSE(node), reg);
- break;
- case NT_ANCHOR:
- r = compile_length_anchor_node(NANCHOR(node), reg);
- break;
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
- return r;
- }
- static int
- compile_tree(Node* node, regex_t* reg)
- {
- int n, type, len, pos, r = 0;
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- do {
- r = compile_tree(NCAR(node), reg);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
- case NT_ALT:
- {
- Node* x = node;
- len = 0;
- do {
- len += compile_length_tree(NCAR(x), reg);
- if (NCDR(x) != NULL) {
- len += SIZE_OP_PUSH + SIZE_OP_JUMP;
- }
- } while (IS_NOT_NULL(x = NCDR(x)));
- pos = reg->used + len; /* goal position */
- do {
- len = compile_length_tree(NCAR(node), reg);
- if (IS_NOT_NULL(NCDR(node))) {
- r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
- if (r) break;
- }
- r = compile_tree(NCAR(node), reg);
- if (r) break;
- if (IS_NOT_NULL(NCDR(node))) {
- len = pos - (reg->used + SIZE_OP_JUMP);
- r = add_opcode_rel_addr(reg, OP_JUMP, len);
- if (r) break;
- }
- } while (IS_NOT_NULL(node = NCDR(node)));
- }
- break;
- case NT_STR:
- if (NSTRING_IS_RAW(node))
- r = compile_string_raw_node(NSTR(node), reg);
- else
- r = compile_string_node(node, reg);
- break;
- case NT_CCLASS:
- r = compile_cclass_node(NCCLASS(node), reg);
- break;
- case NT_CTYPE:
- {
- int op;
- switch (NCTYPE(node)->ctype) {
- case ONIGENC_CTYPE_WORD:
- if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
- else op = OP_WORD;
- break;
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
- r = add_opcode(reg, op);
- }
- break;
- case NT_CANY:
- if (IS_MULTILINE(reg->options))
- r = add_opcode(reg, OP_ANYCHAR_ML);
- else
- r = add_opcode(reg, OP_ANYCHAR);
- break;
- case NT_BREF:
- {
- BRefNode* br = NBREF(node);
- #ifdef USE_BACKREF_WITH_LEVEL
- if (IS_BACKREF_NEST_LEVEL(br)) {
- r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
- if (r) return r;
- r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
- if (r) return r;
- r = add_length(reg, br->nest_level);
- if (r) return r;
- goto add_bacref_mems;
- }
- else
- #endif
- if (br->back_num == 1) {
- n = br->back_static[0];
- if (IS_IGNORECASE(reg->options)) {
- r = add_opcode(reg, OP_BACKREFN_IC);
- if (r) return r;
- r = add_mem_num(reg, n);
- }
- else {
- switch (n) {
- case 1: r = add_opcode(reg, OP_BACKREF1); break;
- case 2: r = add_opcode(reg, OP_BACKREF2); break;
- default:
- r = add_opcode(reg, OP_BACKREFN);
- if (r) return r;
- r = add_mem_num(reg, n);
- break;
- }
- }
- }
- else {
- int i;
- int* p;
- if (IS_IGNORECASE(reg->options)) {
- r = add_opcode(reg, OP_BACKREF_MULTI_IC);
- }
- else {
- r = add_opcode(reg, OP_BACKREF_MULTI);
- }
- if (r) return r;
- #ifdef USE_BACKREF_WITH_LEVEL
- add_bacref_mems:
- #endif
- r = add_length(reg, br->back_num);
- if (r) return r;
- p = BACKREFS_P(br);
- for (i = br->back_num - 1; i >= 0; i--) {
- r = add_mem_num(reg, p[i]);
- if (r) return r;
- }
- }
- }
- break;
- #ifdef USE_SUBEXP_CALL
- case NT_CALL:
- r = compile_call(NCALL(node), reg);
- break;
- #endif
- case NT_QTFR:
- r = compile_quantifier_node(NQTFR(node), reg);
- break;
- case NT_ENCLOSE:
- r = compile_enclose_node(NENCLOSE(node), reg);
- break;
- case NT_ANCHOR:
- r = compile_anchor_node(NANCHOR(node), reg);
- break;
- default:
- #ifdef ONIG_DEBUG
- fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
- #endif
- break;
- }
- return r;
- }
- #ifdef USE_NAMED_GROUP
- static int
- noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
- {
- int r = 0;
- Node* node = *plink;
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
- do {
- r = noname_disable_map(&(NCAR(node)), map, counter);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
- case NT_QTFR:
- {
- Node** ptarget = &(NQTFR(node)->target);
- Node* old = *ptarget;
- r = noname_disable_map(ptarget, map, counter);
- if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
- onig_reduce_nested_quantifier(node, *ptarget);
- }
- }
- break;
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
- if (en->type == ENCLOSE_MEMORY) {
- if (IS_ENCLOSE_NAMED_GROUP(en)) {
- (*counter)++;
- map[en->regnum].new_val = *counter;
- en->regnum = *counter;
- r = noname_disable_map(&(en->target), map, counter);
- }
- else {
- *plink = en->target;
- en->target = NULL_NODE;
- onig_node_free(node);
- r = noname_disable_map(plink, map, counter);
- }
- }
- else
- r = noname_disable_map(&(en->target), map, counter);
- }
- break;
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = noname_disable_map(&(an->target), map, counter);
- break;
- }
- }
- break;
- default:
- break;
- }
- return r;
- }
- static int
- renumber_node_backref(Node* node, GroupNumRemap* map)
- {
- int i, pos, n, old_num;
- int *backs;
- BRefNode* bn = NBREF(node);
- if (! IS_BACKREF_NAME_REF(bn))
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
- old_num = bn->back_num;
- if (IS_NULL(bn->back_dynamic))
- backs = bn->back_static;
- else
- backs = bn->back_dynamic;
- for (i = 0, pos = 0; i < old_num; i++) {
- n = map[backs[i]].new_val;
- if (n > 0) {
- backs[pos] = n;
- pos++;
- }
- }
- bn->back_num = pos;
- return 0;
- }
- static int
- renumber_by_map(Node* node, GroupNumRemap* map)
- {
- int r = 0;
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
- do {
- r = renumber_by_map(NCAR(node), map);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
- case NT_QTFR:
- r = renumber_by_map(NQTFR(node)->target, map);
- break;
- case NT_ENCLOSE:
- r = renumber_by_map(NENCLOSE(node)->target, map);
- break;
- case NT_BREF:
- r = renumber_node_backref(node, map);
- break;
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = renumber_by_map(an->target, map);
- break;
- }
- }
- break;
- default:
- break;
- }
- return r;
- }
- static int
- numbered_ref_check(Node* node)
- {
- int r = 0;
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
- do {
- r = numbered_ref_check(NCAR(node));
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
- case NT_QTFR:
- r = numbered_ref_check(NQTFR(node)->target);
- break;
- case NT_ENCLOSE:
- r = numbered_ref_check(NENCLOSE(node)->target);
- break;
- case NT_BREF:
- if (! IS_BACKREF_NAME_REF(NBREF(node)))
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
- break;
- default:
- break;
- }
- return r;
- }
- static int
- disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
- {
- int r, i, pos, counter;
- BitStatusType loc;
- GroupNumRemap* map;
- map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
- CHECK_NULL_RETURN_MEMERR(map);
- for (i = 1; i <= env->num_mem; i++) {
- map[i].new_val = 0;
- }
- counter = 0;
- r = noname_disable_map(root, map, &counter);
- if (r != 0) return r;
- r = renumber_by_map(*root, map);
- if (r != 0) return r;
- for (i = 1, pos = 1; i <= env->num_mem; i++) {
- if (map[i].new_val > 0) {
- SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
- pos++;
- }
- }
- loc = env->capture_history;
- BIT_STATUS_CLEAR(env->capture_history);
- for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (BIT_STATUS_AT(loc, i)) {
- BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
- }
- }
- env->num_mem = env->num_named;
- reg->num_mem = env->num_named;
- return onig_renumber_name_table(reg, map);
- }
- #endif /* USE_NAMED_GROUP */
- #ifdef USE_SUBEXP_CALL
- static int
- unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
- {
- int i, offset;
- EncloseNode* en;
- AbsAddrType addr;
- for (i = 0; i < uslist->num; i++) {
- en = NENCLOSE(uslist->us[i].target);
- if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
- addr = en->call_addr;
- offset = uslist->us[i].offset;
- BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
- }
- return 0;
- }
- #endif
- #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
- static int
- quantifiers_memory_node_info(Node* node)
- {
- int r = 0;
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
- {
- int v;
- do {
- v = quantifiers_memory_node_info(NCAR(node));
- if (v > r) r = v;
- } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
- }
- break;
- #ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (IS_CALL_RECURSION(NCALL(node))) {
- return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
- }
- else
- r = quantifiers_memory_node_info(NCALL(node)->target);
- break;
- #endif
- case NT_QTFR:
- {
- QtfrNode* qn = NQTFR(node);
- if (qn->upper != 0) {
- r = quantifiers_memory_node_info(qn->target);
- }
- }
- break;
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
- switch (en->type) {
- case ENCLOSE_MEMORY:
- return NQ_TARGET_IS_EMPTY_MEM;
- break;
- case ENCLOSE_OPTION:
- case ENCLOSE_STOP_BACKTRACK:
- r = quantifiers_memory_node_info(en->target);
- break;
- default:
- break;
- }
- }
- break;
- case NT_BREF:
- case NT_STR:
- case NT_CTYPE:
- case NT_CCLASS:
- case NT_CANY:
- case NT_ANCHOR:
- default:
- break;
- }
- return r;
- }
- #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
- static int
- get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
- {
- OnigDistance tmin;
- int r = 0;
- *min = 0;
- switch (NTYPE(node)) {
- case NT_BREF:
- {
- int i;
- int* backs;
- Node** nodes = SCANENV_MEM_NODES(env);
- BRefNode* br = NBREF(node);
- if (br->state & NST_RECURSION) break;
- backs = BACKREFS_P(br);
- if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- r = get_min_match_length(nodes[backs[0]], min, env);
- if (r != 0) break;
- for (i = 1; i < br->back_num; i++) {
- if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- r = get_min_match_length(nodes[backs[i]], &tmin, env);
- if (r != 0) break;
- if (*min > tmin) *min = tmin;
- }
- }
- break;
- #ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (IS_CALL_RECURSION(NCALL(node))) {
- EncloseNode* en = NENCLOSE(NCALL(node)->target);
- if (IS_ENCLOSE_MIN_FIXED(en))
- *min = en->min_len;
- }
- else
- r = get_min_match_length(NCALL(node)->target, min, env);
- break;
- #endif
- case NT_LIST:
- do {
- r = get_min_match_length(NCAR(node), &tmin, env);
- if (r == 0) *min += tmin;
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
- case NT_ALT:
- {
- Node *x, *y;
- y = node;
- do {
- x = NCAR(y);
- r = get_min_match_length(x, &tmin, env);
- if (r != 0) break;
- if (y == node) *min = tmin;
- else if (*min > tmin) *min = tmin;
- } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
- }
- break;
- case NT_STR:
- {
- StrNode* sn = NSTR(node);
- *min = sn->end - sn->s;
- }
- break;
- case NT_CTYPE:
- *min = 1;
- break;
- case NT_CCLASS:
- case NT_CANY:
- *min = 1;
- break;
- case NT_QTFR:
- {
- QtfrNode* qn = NQTFR(node);
- if (qn->lower > 0) {
- r = get_min_match_length(qn->target, min, env);
- if (r == 0)
- *min = distance_multiply(*min, qn->lower);
- }
- }
- break;
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
- switch (en->type) {
- case ENCLOSE_MEMORY:
- #ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_MIN_FIXED(en))
- *min = en->min_len;
- else {
- r = get_min_match_length(en->target, min, env);
- if (r == 0) {
- en->min_len = *min;
- SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
- }
- }
- break;
- #endif
- cas…
Large files files are truncated, but you can click here to view the full file