/tags/beta3/harbour/source/hbpcre/pcreexec.c
C | 2089 lines | 1344 code | 323 blank | 422 comment | 484 complexity | bba09a04d1133e5b6c51b277f9fa9264 MD5 | raw file
Possible License(s): AGPL-1.0, BSD-3-Clause, CC-BY-SA-3.0, LGPL-3.0, GPL-2.0, LGPL-2.0, LGPL-2.1
Large files files are truncated, but you can click here to view the full file
- /*************************************************
- * Perl-Compatible Regular Expressions *
- *************************************************/
- /* PCRE is a library of functions to support regular expressions whose syntax
- and semantics are as close as possible to those of the Perl 5 language.
- Written by Philip Hazel
- Copyright (c) 1997-2005 University of Cambridge
- -----------------------------------------------------------------------------
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
- -----------------------------------------------------------------------------
- */
- /* This module contains pcre_exec(), the externally visible function that does
- pattern matching using an NFA algorithm, trying to mimic Perl as closely as
- possible. There are also some static supporting functions. */
- #include "pcreinal.h"
- /* Structure for building a chain of data that actually lives on the
- stack, for holding the values of the subject pointer at the start of each
- subpattern, so as to detect when an empty string has been matched by a
- subpattern - to break infinite loops. When NO_RECURSE is set, these blocks
- are on the heap, not on the stack. */
- typedef struct eptrblock {
- struct eptrblock *epb_prev;
- const uschar *epb_saved_eptr;
- } eptrblock;
- /* Flag bits for the match() function */
- #define match_condassert 0x01 /* Called to check a condition assertion */
- #define match_isgroup 0x02 /* Set if start of bracketed group */
- /* Non-error returns from the match() function. Error returns are externally
- defined PCRE_ERROR_xxx codes, which are all negative. */
- #define MATCH_MATCH 1
- #define MATCH_NOMATCH 0
- /* Maximum number of ints of offset to save on the stack for recursive calls.
- If the offset vector is bigger, malloc is used. This should be a multiple of 3,
- because the offset vector is always a multiple of 3 long. */
- #define REC_STACK_SAVE_MAX 30
- /* Min and max values for the common repeats; for the maxima, 0 => infinity */
- static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
- static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
- #ifdef DEBUG
- /*************************************************
- * Debugging function to print chars *
- *************************************************/
- /* Print a sequence of chars in printable format, stopping at the end of the
- subject if the requested.
- Arguments:
- p points to characters
- length number to print
- is_subject TRUE if printing from within md->start_subject
- md pointer to matching data block, if is_subject is TRUE
- Returns: nothing
- */
- static void
- pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
- {
- int c;
- if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
- while (length-- > 0)
- if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
- }
- #endif
- /*************************************************
- * Match a back-reference *
- *************************************************/
- /* If a back reference hasn't been set, the length that is passed is greater
- than the number of characters left in the string, so the match fails.
- Arguments:
- offset index into the offset vector
- eptr points into the subject
- length length to be matched
- md points to match data block
- ims the ims flags
- Returns: TRUE if matched
- */
- static BOOL
- match_ref(int offset, register const uschar *eptr, int length, match_data *md,
- unsigned long int ims)
- {
- const uschar *p = md->start_subject + md->offset_vector[offset];
- #ifdef DEBUG
- if (eptr >= md->end_subject)
- printf("matching subject <null>");
- else
- {
- printf("matching subject ");
- pchars(eptr, length, TRUE, md);
- }
- printf(" against backref ");
- pchars(p, length, FALSE, md);
- printf("\n");
- #endif
- /* Always fail if not enough characters left */
- if (length > md->end_subject - eptr) return FALSE;
- /* Separate the caselesss case for speed */
- if ((ims & PCRE_CASELESS) != 0)
- {
- while (length-- > 0)
- if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
- }
- else
- { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
- return TRUE;
- }
- /***************************************************************************
- ****************************************************************************
- RECURSION IN THE match() FUNCTION
- The match() function is highly recursive. Some regular expressions can cause
- it to recurse thousands of times. I was writing for Unix, so I just let it
- call itself recursively. This uses the stack for saving everything that has
- to be saved for a recursive call. On Unix, the stack can be large, and this
- works fine.
- It turns out that on non-Unix systems there are problems with programs that
- use a lot of stack. (This despite the fact that every last chip has oodles
- of memory these days, and techniques for extending the stack have been known
- for decades.) So....
- There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
- calls by keeping local variables that need to be preserved in blocks of memory
- obtained from malloc instead instead of on the stack. Macros are used to
- achieve this so that the actual code doesn't look very different to what it
- always used to.
- ****************************************************************************
- ***************************************************************************/
- /* These versions of the macros use the stack, as normal */
- #ifndef NO_RECURSE
- #define REGISTER register
- #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)
- #define RRETURN(ra) return ra
- #else
- /* These versions of the macros manage a private stack on the heap. Note
- that the rd argument of RMATCH isn't actually used. It's the md argument of
- match(), which never changes. */
- #define REGISTER
- #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\
- {\
- heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
- if (setjmp(frame->Xwhere) == 0)\
- {\
- newframe->Xeptr = ra;\
- newframe->Xecode = rb;\
- newframe->Xoffset_top = rc;\
- newframe->Xims = re;\
- newframe->Xeptrb = rf;\
- newframe->Xflags = rg;\
- newframe->Xprevframe = frame;\
- frame = newframe;\
- DPRINTF(("restarting from line %d\n", __LINE__));\
- goto HEAP_RECURSE;\
- }\
- else\
- {\
- DPRINTF(("longjumped back to line %d\n", __LINE__));\
- frame = md->thisframe;\
- rx = frame->Xresult;\
- }\
- }
- #define RRETURN(ra)\
- {\
- heapframe *newframe = frame;\
- frame = newframe->Xprevframe;\
- (pcre_stack_free)(newframe);\
- if (frame != NULL)\
- {\
- frame->Xresult = ra;\
- md->thisframe = frame;\
- longjmp(frame->Xwhere, 1);\
- }\
- return ra;\
- }
- /* Structure for remembering the local variables in a private frame */
- typedef struct heapframe {
- struct heapframe *Xprevframe;
- /* Function arguments that may change */
- const uschar *Xeptr;
- const uschar *Xecode;
- int Xoffset_top;
- long int Xims;
- eptrblock *Xeptrb;
- int Xflags;
- /* Function local variables */
- const uschar *Xcallpat;
- const uschar *Xcharptr;
- const uschar *Xdata;
- const uschar *Xnext;
- const uschar *Xpp;
- const uschar *Xprev;
- const uschar *Xsaved_eptr;
- recursion_info Xnew_recursive;
- BOOL Xcur_is_word;
- BOOL Xcondition;
- BOOL Xminimize;
- BOOL Xprev_is_word;
- unsigned long int Xoriginal_ims;
- #ifdef SUPPORT_UCP
- int Xprop_type;
- int Xprop_fail_result;
- int Xprop_category;
- int Xprop_chartype;
- int Xprop_othercase;
- int Xprop_test_against;
- int *Xprop_test_variable;
- #endif
- int Xctype;
- int Xfc;
- int Xfi;
- int Xlength;
- int Xmax;
- int Xmin;
- int Xnumber;
- int Xoffset;
- int Xop;
- int Xsave_capture_last;
- int Xsave_offset1, Xsave_offset2, Xsave_offset3;
- int Xstacksave[REC_STACK_SAVE_MAX];
- eptrblock Xnewptrb;
- /* Place to pass back result, and where to jump back to */
- int Xresult;
- jmp_buf Xwhere;
- } heapframe;
- #endif
- /***************************************************************************
- ***************************************************************************/
- /*************************************************
- * Match from current position *
- *************************************************/
- /* On entry ecode points to the first opcode, and eptr to the first character
- in the subject string, while eptrb holds the value of eptr at the start of the
- last bracketed group - used for breaking infinite loops matching zero-length
- strings. This function is called recursively in many circumstances. Whenever it
- returns a negative (error) response, the outer incarnation must also return the
- same response.
- Performance note: It might be tempting to extract commonly used fields from the
- md structure (e.g. utf8, end_subject) into individual variables to improve
- performance. Tests using gcc on a SPARC disproved this; in the first case, it
- made performance worse.
- Arguments:
- eptr pointer in subject
- ecode position in code
- offset_top current top pointer
- md pointer to "static" info for the match
- ims current /i, /m, and /s options
- eptrb pointer to chain of blocks containing eptr at start of
- brackets - for testing for empty matches
- flags can contain
- match_condassert - this is an assertion condition
- match_isgroup - this is the start of a bracketed group
- Returns: MATCH_MATCH if matched ) these values are >= 0
- MATCH_NOMATCH if failed to match )
- a negative PCRE_ERROR_xxx value if aborted by an error condition
- (e.g. stopped by recursion limit)
- */
- static int
- match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,
- int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
- int flags)
- {
- /* These variables do not need to be preserved over recursion in this function,
- so they can be ordinary variables in all cases. Mark them with "register"
- because they are used a lot in loops. */
- register int rrc; /* Returns from recursive calls */
- register int i; /* Used for loops not involving calls to RMATCH() */
- register int c; /* Character values not kept over RMATCH() calls */
- register BOOL utf8; /* Local copy of UTF-8 flag for speed */
- /* When recursion is not being used, all "local" variables that have to be
- preserved over calls to RMATCH() are part of a "frame" which is obtained from
- heap storage. Set up the top-level frame here; others are obtained from the
- heap whenever RMATCH() does a "recursion". See the macro definitions above. */
- #ifdef NO_RECURSE
- heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
- frame->Xprevframe = NULL; /* Marks the top level */
- /* Copy in the original argument variables */
- frame->Xeptr = eptr;
- frame->Xecode = ecode;
- frame->Xoffset_top = offset_top;
- frame->Xims = ims;
- frame->Xeptrb = eptrb;
- frame->Xflags = flags;
- /* This is where control jumps back to to effect "recursion" */
- HEAP_RECURSE:
- /* Macros make the argument variables come from the current frame */
- #define eptr frame->Xeptr
- #define ecode frame->Xecode
- #define offset_top frame->Xoffset_top
- #define ims frame->Xims
- #define eptrb frame->Xeptrb
- #define flags frame->Xflags
- /* Ditto for the local variables */
- #ifdef SUPPORT_UTF8
- #define charptr frame->Xcharptr
- #endif
- #define callpat frame->Xcallpat
- #define data frame->Xdata
- #define next frame->Xnext
- #define pp frame->Xpp
- #define prev frame->Xprev
- #define saved_eptr frame->Xsaved_eptr
- #define new_recursive frame->Xnew_recursive
- #define cur_is_word frame->Xcur_is_word
- #define condition frame->Xcondition
- #define minimize frame->Xminimize
- #define prev_is_word frame->Xprev_is_word
- #define original_ims frame->Xoriginal_ims
- #ifdef SUPPORT_UCP
- #define prop_type frame->Xprop_type
- #define prop_fail_result frame->Xprop_fail_result
- #define prop_category frame->Xprop_category
- #define prop_chartype frame->Xprop_chartype
- #define prop_othercase frame->Xprop_othercase
- #define prop_test_against frame->Xprop_test_against
- #define prop_test_variable frame->Xprop_test_variable
- #endif
- #define ctype frame->Xctype
- #define fc frame->Xfc
- #define fi frame->Xfi
- #define length frame->Xlength
- #define max frame->Xmax
- #define min frame->Xmin
- #define number frame->Xnumber
- #define offset frame->Xoffset
- #define op frame->Xop
- #define save_capture_last frame->Xsave_capture_last
- #define save_offset1 frame->Xsave_offset1
- #define save_offset2 frame->Xsave_offset2
- #define save_offset3 frame->Xsave_offset3
- #define stacksave frame->Xstacksave
- #define newptrb frame->Xnewptrb
- /* When recursion is being used, local variables are allocated on the stack and
- get preserved during recursion in the normal way. In this environment, fi and
- i, and fc and c, can be the same variables. */
- #else
- #define fi i
- #define fc c
- #ifdef SUPPORT_UTF8 /* Many of these variables are used ony */
- const uschar *charptr; /* small blocks of the code. My normal */
- #endif /* style of coding would have declared */
- const uschar *callpat; /* them within each of those blocks. */
- const uschar *data; /* However, in order to accommodate the */
- const uschar *next; /* version of this code that uses an */
- const uschar *pp; /* external "stack" implemented on the */
- const uschar *prev; /* heap, it is easier to declare them */
- const uschar *saved_eptr; /* all here, so the declarations can */
- /* be cut out in a block. The only */
- recursion_info new_recursive; /* declarations within blocks below are */
- /* for variables that do not have to */
- BOOL cur_is_word; /* be preserved over a recursive call */
- BOOL condition; /* to RMATCH(). */
- BOOL minimize;
- BOOL prev_is_word;
- unsigned long int original_ims;
- #ifdef SUPPORT_UCP
- int prop_type;
- int prop_fail_result;
- int prop_category;
- int prop_chartype;
- int prop_othercase;
- int prop_test_against;
- int *prop_test_variable;
- #endif
- int ctype;
- int length;
- int max;
- int min;
- int number;
- int offset;
- int op;
- int save_capture_last;
- int save_offset1, save_offset2, save_offset3;
- int stacksave[REC_STACK_SAVE_MAX];
- eptrblock newptrb;
- #endif
- /* These statements are here to stop the compiler complaining about unitialized
- variables. */
- #ifdef SUPPORT_UCP
- prop_fail_result = 0;
- prop_test_against = 0;
- prop_test_variable = NULL;
- #endif
- /* OK, now we can get on with the real code of the function. Recursion is
- specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,
- these just turn into a recursive call to match() and a "return", respectively.
- However, RMATCH isn't like a function call because it's quite a complicated
- macro. It has to be used in one particular way. This shouldn't, however, impact
- performance when true recursion is being used. */
- if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
- original_ims = ims; /* Save for resetting on ')' */
- utf8 = md->utf8; /* Local copy of the flag */
- /* At the start of a bracketed group, add the current subject pointer to the
- stack of such pointers, to be re-instated at the end of the group when we hit
- the closing ket. When match() is called in other circumstances, we don't add to
- this stack. */
- if ((flags & match_isgroup) != 0)
- {
- newptrb.epb_prev = eptrb;
- newptrb.epb_saved_eptr = eptr;
- eptrb = &newptrb;
- }
- /* Now start processing the operations. */
- for (;;)
- {
- op = *ecode;
- minimize = FALSE;
- /* For partial matching, remember if we ever hit the end of the subject after
- matching at least one subject character. */
- if (md->partial &&
- eptr >= md->end_subject &&
- eptr > md->start_match)
- md->hitend = TRUE;
- /* Opening capturing bracket. If there is space in the offset vector, save
- the current subject position in the working slot at the top of the vector. We
- mustn't change the current values of the data slot, because they may be set
- from a previous iteration of this group, and be referred to by a reference
- inside the group.
- If the bracket fails to match, we need to restore this value and also the
- values of the final offsets, in case they were set by a previous iteration of
- the same bracket.
- If there isn't enough space in the offset vector, treat this as if it were a
- non-capturing bracket. Don't worry about setting the flag for the error case
- here; that is handled in the code for KET. */
- if (op > OP_BRA)
- {
- number = op - OP_BRA;
- /* For extended extraction brackets (large number), we have to fish out the
- number from a dummy opcode at the start. */
- if (number > EXTRACT_BASIC_MAX)
- number = GET2(ecode, 2+LINK_SIZE);
- offset = number << 1;
- #ifdef DEBUG
- printf("start bracket %d subject=", number);
- pchars(eptr, 16, TRUE, md);
- printf("\n");
- #endif
- if (offset < md->offset_max)
- {
- save_offset1 = md->offset_vector[offset];
- save_offset2 = md->offset_vector[offset+1];
- save_offset3 = md->offset_vector[md->offset_end - number];
- save_capture_last = md->capture_last;
- DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
- md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
- do
- {
- RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
- match_isgroup);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- md->capture_last = save_capture_last;
- ecode += GET(ecode, 1);
- }
- while (*ecode == OP_ALT);
- DPRINTF(("bracket %d failed\n", number));
- md->offset_vector[offset] = save_offset1;
- md->offset_vector[offset+1] = save_offset2;
- md->offset_vector[md->offset_end - number] = save_offset3;
- RRETURN(MATCH_NOMATCH);
- }
- /* Insufficient room for saving captured contents */
- else op = OP_BRA;
- }
- /* Other types of node can be handled by a switch */
- switch(op)
- {
- case OP_BRA: /* Non-capturing bracket: optimized */
- DPRINTF(("start bracket 0\n"));
- do
- {
- RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
- match_isgroup);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += GET(ecode, 1);
- }
- while (*ecode == OP_ALT);
- DPRINTF(("bracket 0 failed\n"));
- RRETURN(MATCH_NOMATCH);
- /* Conditional group: compilation checked that there are no more than
- two branches. If the condition is false, skipping the first branch takes us
- past the end if there is only one branch, but that's OK because that is
- exactly what going to the ket would do. */
- case OP_COND:
- if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */
- {
- offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
- condition = (offset == CREF_RECURSE * 2)?
- (md->recursive != NULL) :
- (offset < offset_top && md->offset_vector[offset] >= 0);
- RMATCH(rrc, eptr, ecode + (condition?
- (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),
- offset_top, md, ims, eptrb, match_isgroup);
- RRETURN(rrc);
- }
- /* The condition is an assertion. Call match() to evaluate it - setting
- the final argument TRUE causes it to stop at the end of an assertion. */
- else
- {
- RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
- match_condassert | match_isgroup);
- if (rrc == MATCH_MATCH)
- {
- ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);
- while (*ecode == OP_ALT) ecode += GET(ecode, 1);
- }
- else if (rrc != MATCH_NOMATCH)
- {
- RRETURN(rrc); /* Need braces because of following else */
- }
- else ecode += GET(ecode, 1);
- RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
- match_isgroup);
- RRETURN(rrc);
- }
- /* Control never reaches here */
- /* Skip over conditional reference or large extraction number data if
- encountered. */
- case OP_CREF:
- case OP_BRANUMBER:
- ecode += 3;
- break;
- /* End of the pattern. If we are in a recursion, we should restore the
- offsets appropriately and continue from after the call. */
- case OP_END:
- if (md->recursive != NULL && md->recursive->group_num == 0)
- {
- recursion_info *rec = md->recursive;
- DPRINTF(("Hit the end in a (?0) recursion\n"));
- md->recursive = rec->prevrec;
- memmove(md->offset_vector, rec->offset_save,
- rec->saved_max * sizeof(int));
- md->start_match = rec->save_start;
- ims = original_ims;
- ecode = rec->after_call;
- break;
- }
- /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
- string - backtracking will then try other alternatives, if any. */
- if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);
- md->end_match_ptr = eptr; /* Record where we ended */
- md->end_offset_top = offset_top; /* and how many extracts were taken */
- RRETURN(MATCH_MATCH);
- /* Change option settings */
- case OP_OPT:
- ims = ecode[1];
- ecode += 2;
- DPRINTF(("ims set to %02lx\n", ims));
- break;
- /* Assertion brackets. Check the alternative branches in turn - the
- matching won't pass the KET for an assertion. If any one branch matches,
- the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
- start of each branch to move the current point backwards, so the code at
- this level is identical to the lookahead case. */
- case OP_ASSERT:
- case OP_ASSERTBACK:
- do
- {
- RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
- match_isgroup);
- if (rrc == MATCH_MATCH) break;
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += GET(ecode, 1);
- }
- while (*ecode == OP_ALT);
- if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
- /* If checking an assertion for a condition, return MATCH_MATCH. */
- if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
- /* Continue from after the assertion, updating the offsets high water
- mark, since extracts may have been taken during the assertion. */
- do ecode += GET(ecode,1); while (*ecode == OP_ALT);
- ecode += 1 + LINK_SIZE;
- offset_top = md->end_offset_top;
- continue;
- /* Negative assertion: all branches must fail to match */
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK_NOT:
- do
- {
- RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
- match_isgroup);
- if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += GET(ecode,1);
- }
- while (*ecode == OP_ALT);
- if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
- ecode += 1 + LINK_SIZE;
- continue;
- /* Move the subject pointer back. This occurs only at the start of
- each branch of a lookbehind assertion. If we are too close to the start to
- move back, this match function fails. When working with UTF-8 we move
- back a number of characters, not bytes. */
- case OP_REVERSE:
- #ifdef SUPPORT_UTF8
- if (utf8)
- {
- c = GET(ecode,1);
- for (i = 0; i < c; i++)
- {
- eptr--;
- if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
- BACKCHAR(eptr)
- }
- }
- else
- #endif
- /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
- {
- eptr -= GET(ecode,1);
- if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
- }
- /* Skip to next op code */
- ecode += 1 + LINK_SIZE;
- break;
- /* The callout item calls an external function, if one is provided, passing
- details of the match so far. This is mainly for debugging, though the
- function is able to force a failure. */
- case OP_CALLOUT:
- if (pcre_callout != NULL)
- {
- pcre_callout_block cb;
- cb.version = 1; /* Version 1 of the callout block */
- cb.callout_number = ecode[1];
- cb.offset_vector = md->offset_vector;
- cb.subject = (const char *)md->start_subject;
- cb.subject_length = md->end_subject - md->start_subject;
- cb.start_match = md->start_match - md->start_subject;
- cb.current_position = eptr - md->start_subject;
- cb.pattern_position = GET(ecode, 2);
- cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
- cb.capture_top = offset_top/2;
- cb.capture_last = md->capture_last;
- cb.callout_data = md->callout_data;
- if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
- if (rrc < 0) RRETURN(rrc);
- }
- ecode += 2 + 2*LINK_SIZE;
- break;
- /* Recursion either matches the current regex, or some subexpression. The
- offset data is the offset to the starting bracket from the start of the
- whole pattern. (This is so that it works from duplicated subpatterns.)
- If there are any capturing brackets started but not finished, we have to
- save their starting points and reinstate them after the recursion. However,
- we don't know how many such there are (offset_top records the completed
- total) so we just have to save all the potential data. There may be up to
- 65535 such values, which is too large to put on the stack, but using malloc
- for small numbers seems expensive. As a compromise, the stack is used when
- there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
- is used. A problem is what to do if the malloc fails ... there is no way of
- returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
- values on the stack, and accept that the rest may be wrong.
- There are also other values that have to be saved. We use a chained
- sequence of blocks that actually live on the stack. Thanks to Robin Houston
- for the original version of this logic. */
- case OP_RECURSE:
- {
- callpat = md->start_code + GET(ecode, 1);
- new_recursive.group_num = *callpat - OP_BRA;
- /* For extended extraction brackets (large number), we have to fish out
- the number from a dummy opcode at the start. */
- if (new_recursive.group_num > EXTRACT_BASIC_MAX)
- new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);
- /* Add to "recursing stack" */
- new_recursive.prevrec = md->recursive;
- md->recursive = &new_recursive;
- /* Find where to continue from afterwards */
- ecode += 1 + LINK_SIZE;
- new_recursive.after_call = ecode;
- /* Now save the offset data. */
- new_recursive.saved_max = md->offset_end;
- if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
- new_recursive.offset_save = stacksave;
- else
- {
- new_recursive.offset_save =
- (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
- if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
- }
- memcpy(new_recursive.offset_save, md->offset_vector,
- new_recursive.saved_max * sizeof(int));
- new_recursive.save_start = md->start_match;
- md->start_match = eptr;
- /* OK, now we can do the recursion. For each top-level alternative we
- restore the offset and recursion data. */
- DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
- do
- {
- RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,
- eptrb, match_isgroup);
- if (rrc == MATCH_MATCH)
- {
- md->recursive = new_recursive.prevrec;
- if (new_recursive.offset_save != stacksave)
- (pcre_free)(new_recursive.offset_save);
- RRETURN(MATCH_MATCH);
- }
- else if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- md->recursive = &new_recursive;
- memcpy(md->offset_vector, new_recursive.offset_save,
- new_recursive.saved_max * sizeof(int));
- callpat += GET(callpat, 1);
- }
- while (*callpat == OP_ALT);
- DPRINTF(("Recursion didn't match\n"));
- md->recursive = new_recursive.prevrec;
- if (new_recursive.offset_save != stacksave)
- (pcre_free)(new_recursive.offset_save);
- RRETURN(MATCH_NOMATCH);
- }
- /* Control never reaches here */
- /* "Once" brackets are like assertion brackets except that after a match,
- the point in the subject string is not moved back. Thus there can never be
- a move back into the brackets. Friedl calls these "atomic" subpatterns.
- Check the alternative branches in turn - the matching won't pass the KET
- for this kind of subpattern. If any one branch matches, we carry on as at
- the end of a normal bracket, leaving the subject pointer. */
- case OP_ONCE:
- {
- prev = ecode;
- saved_eptr = eptr;
- do
- {
- RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
- eptrb, match_isgroup);
- if (rrc == MATCH_MATCH) break;
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += GET(ecode,1);
- }
- while (*ecode == OP_ALT);
- /* If hit the end of the group (which could be repeated), fail */
- if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
- /* Continue as from after the assertion, updating the offsets high water
- mark, since extracts may have been taken. */
- do ecode += GET(ecode,1); while (*ecode == OP_ALT);
- offset_top = md->end_offset_top;
- eptr = md->end_match_ptr;
- /* For a non-repeating ket, just continue at this level. This also
- happens for a repeating ket if no characters were matched in the group.
- This is the forcible breaking of infinite loops as implemented in Perl
- 5.005. If there is an options reset, it will get obeyed in the normal
- course of events. */
- if (*ecode == OP_KET || eptr == saved_eptr)
- {
- ecode += 1+LINK_SIZE;
- break;
- }
- /* The repeating kets try the rest of the pattern or restart from the
- preceding bracket, in the appropriate order. We need to reset any options
- that changed within the bracket before re-running it, so check the next
- opcode. */
- if (ecode[1+LINK_SIZE] == OP_OPT)
- {
- ims = (ims & ~PCRE_IMS) | ecode[4];
- DPRINTF(("ims set to %02lx at group repeat\n", ims));
- }
- if (*ecode == OP_KETRMIN)
- {
- RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- }
- else /* OP_KETRMAX */
- {
- RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- }
- }
- RRETURN(MATCH_NOMATCH);
- /* An alternation is the end of a branch; scan along to find the end of the
- bracketed group and go to there. */
- case OP_ALT:
- do ecode += GET(ecode,1); while (*ecode == OP_ALT);
- break;
- /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
- that it may occur zero times. It may repeat infinitely, or not at all -
- i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
- repeat limits are compiled as a number of copies, with the optional ones
- preceded by BRAZERO or BRAMINZERO. */
- case OP_BRAZERO:
- {
- next = ecode+1;
- RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- do next += GET(next,1); while (*next == OP_ALT);
- ecode = next + 1+LINK_SIZE;
- }
- break;
- case OP_BRAMINZERO:
- {
- next = ecode+1;
- do next += GET(next,1); while (*next == OP_ALT);
- RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,
- match_isgroup);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode++;
- }
- break;
- /* End of a group, repeated or non-repeating. If we are at the end of
- an assertion "group", stop matching and return MATCH_MATCH, but record the
- current high water mark for use by positive assertions. Do this also
- for the "once" (not-backup up) groups. */
- case OP_KET:
- case OP_KETRMIN:
- case OP_KETRMAX:
- {
- prev = ecode - GET(ecode, 1);
- saved_eptr = eptrb->epb_saved_eptr;
- /* Back up the stack of bracket start pointers. */
- eptrb = eptrb->epb_prev;
- if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
- *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
- *prev == OP_ONCE)
- {
- md->end_match_ptr = eptr; /* For ONCE */
- md->end_offset_top = offset_top;
- RRETURN(MATCH_MATCH);
- }
- /* In all other cases except a conditional group we have to check the
- group number back at the start and if necessary complete handling an
- extraction by setting the offsets and bumping the high water mark. */
- if (*prev != OP_COND)
- {
- number = *prev - OP_BRA;
- /* For extended extraction brackets (large number), we have to fish out
- the number from a dummy opcode at the start. */
- if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);
- offset = number << 1;
- #ifdef DEBUG
- printf("end bracket %d", number);
- printf("\n");
- #endif
- /* Test for a numbered group. This includes groups called as a result
- of recursion. Note that whole-pattern recursion is coded as a recurse
- into group 0, so it won't be picked up here. Instead, we catch it when
- the OP_END is reached. */
- if (number > 0)
- {
- md->capture_last = number;
- if (offset >= md->offset_max) md->offset_overflow = TRUE; else
- {
- md->offset_vector[offset] =
- md->offset_vector[md->offset_end - number];
- md->offset_vector[offset+1] = eptr - md->start_subject;
- if (offset_top <= offset) offset_top = offset + 2;
- }
- /* Handle a recursively called group. Restore the offsets
- appropriately and continue from after the call. */
- if (md->recursive != NULL && md->recursive->group_num == number)
- {
- recursion_info *rec = md->recursive;
- DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
- md->recursive = rec->prevrec;
- md->start_match = rec->save_start;
- memcpy(md->offset_vector, rec->offset_save,
- rec->saved_max * sizeof(int));
- ecode = rec->after_call;
- ims = original_ims;
- break;
- }
- }
- }
- /* Reset the value of the ims flags, in case they got changed during
- the group. */
- ims = original_ims;
- DPRINTF(("ims reset to %02lx\n", ims));
- /* For a non-repeating ket, just continue at this level. This also
- happens for a repeating ket if no characters were matched in the group.
- This is the forcible breaking of infinite loops as implemented in Perl
- 5.005. If there is an options reset, it will get obeyed in the normal
- course of events. */
- if (*ecode == OP_KET || eptr == saved_eptr)
- {
- ecode += 1 + LINK_SIZE;
- break;
- }
- /* The repeating kets try the rest of the pattern or restart from the
- preceding bracket, in the appropriate order. */
- if (*ecode == OP_KETRMIN)
- {
- RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- }
- else /* OP_KETRMAX */
- {
- RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- }
- }
- RRETURN(MATCH_NOMATCH);
- /* Start of subject unless notbol, or after internal newline if multiline */
- case OP_CIRC:
- if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
- if ((ims & PCRE_MULTILINE) != 0)
- {
- if (eptr != md->start_subject && eptr[-1] != NEWLINE)
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- }
- /* ... else fall through */
- /* Start of subject assertion */
- case OP_SOD:
- if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- /* Start of match assertion */
- case OP_SOM:
- if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- /* Assert before internal newline if multiline, or before a terminating
- newline unless endonly is set, else end of subject unless noteol is set. */
- case OP_DOLL:
- if ((ims & PCRE_MULTILINE) != 0)
- {
- if (eptr < md->end_subject)
- { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }
- else
- { if (md->noteol) RRETURN(MATCH_NOMATCH); }
- ecode++;
- break;
- }
- else
- {
- if (md->noteol) RRETURN(MATCH_NOMATCH);
- if (!md->endonly)
- {
- if (eptr < md->end_subject - 1 ||
- (eptr == md->end_subject - 1 && *eptr != NEWLINE))
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- }
- }
- /* ... else fall through */
- /* End of subject assertion (\z) */
- case OP_EOD:
- if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- /* End of subject or ending \n assertion (\Z) */
- case OP_EODN:
- if (eptr < md->end_subject - 1 ||
- (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- /* Word boundary assertions */
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- {
- /* Find out if the previous and current characters are "word" characters.
- It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
- be "non-word" characters. */
- #ifdef SUPPORT_UTF8
- if (utf8)
- {
- if (eptr == md->start_subject) prev_is_word = FALSE; else
- {
- const uschar *lastptr = eptr - 1;
- while((*lastptr & 0xc0) == 0x80) lastptr--;
- GETCHAR(c, lastptr);
- prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
- }
- if (eptr >= md->end_subject) cur_is_word = FALSE; else
- {
- GETCHAR(c, eptr);
- cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
- }
- }
- else
- #endif
- /* More streamlined when not in UTF-8 mode */
- {
- prev_is_word = (eptr != md->start_subject) &&
- ((md->ctypes[eptr[-1]] & ctype_word) != 0);
- cur_is_word = (eptr < md->end_subject) &&
- ((md->ctypes[*eptr] & ctype_word) != 0);
- }
- /* Now see if the situation is what we want */
- if ((*ecode++ == OP_WORD_BOUNDARY)?
- cur_is_word == prev_is_word : cur_is_word != prev_is_word)
- RRETURN(MATCH_NOMATCH);
- }
- break;
- /* Match a single character type; inline for speed */
- case OP_ANY:
- if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)
- RRETURN(MATCH_NOMATCH);
- if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
- #ifdef SUPPORT_UTF8
- if (utf8)
- while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
- #endif
- ecode++;
- break;
- /* Match a single byte, even in UTF-8 mode. This opcode really does match
- any byte, even newline, independent of the setting of PCRE_DOTALL. */
- case OP_ANYBYTE:
- if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- case OP_NOT_DIGIT:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
- GETCHARINCTEST(c, eptr);
- if (
- #ifdef SUPPORT_UTF8
- c < 256 &&
- #endif
- (md->ctypes[c] & ctype_digit) != 0
- )
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- case OP_DIGIT:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
- GETCHARINCTEST(c, eptr);
- if (
- #ifdef SUPPORT_UTF8
- c >= 256 ||
- #endif
- (md->ctypes[c] & ctype_digit) == 0
- )
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- case OP_NOT_WHITESPACE:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
- GETCHARINCTEST(c, eptr);
- if (
- #ifdef SUPPORT_UTF8
- c < 256 &&
- #endif
- (md->ctypes[c] & ctype_space) != 0
- )
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- case OP_WHITESPACE:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
- GETCHARINCTEST(c, eptr);
- if (
- #ifdef SUPPORT_UTF8
- c >= 256 ||
- #endif
- (md->ctypes[c] & ctype_space) == 0
- )
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- case OP_NOT_WORDCHAR:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
- GETCHARINCTEST(c, eptr);
- if (
- #ifdef SUPPORT_UTF8
- c < 256 &&
- #endif
- (md->ctypes[c] & ctype_word) != 0
- )
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- case OP_WORDCHAR:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
- GETCHARINCTEST(c, eptr);
- if (
- #ifdef SUPPORT_UTF8
- c >= 256 ||
- #endif
- (md->ctypes[c] & ctype_word) == 0
- )
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
- #ifdef SUPPORT_UCP
- /* Check the next character by Unicode property. We will get here only
- if the support is in the binary; otherwise a compile-time error occurs. */
- case OP_PROP:
- case OP_NOTPROP:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
- GETCHARINCTEST(c, eptr);
- {
- int chartype, rqdtype;
- int othercase;
- int category = ucp_findchar(c, &chartype, &othercase);
- rqdtype = *(++ecode);
- ecode++;
- if (rqdtype >= 128)
- {
- if ((rqdtype - 128 != category) == (op == OP_PROP))
- RRETURN(MATCH_NOMATCH);
- }
- else
- {
- if ((rqdtype != chartype) == (op == OP_PROP))
- RRETURN(MATCH_NOMATCH);
- }
- }
- break;
- /* Match an extended Unicode sequence. We will get here only if the support
- is in the binary; otherwise a compile-time error occurs. */
- case OP_EXTUNI:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
- GETCHARINCTEST(c, eptr);
- {
- int chartype;
- int othercase;
- int category = ucp_findchar(c, &chartype, &othercase);
- if (category == ucp_M) RRETURN(MATCH_NOMATCH);
- while (eptr < md->end_subject)
- {
- int len = 1;
- if (!utf8) c = *eptr; else
- {
- GETCHARLEN(c, eptr, len);
- }
- category = ucp_findchar(c, &chartype, &othercase);
- if (category != ucp_M) break;
- eptr += len;
- }
- }
- ecode++;
- break;
- #endif
- /* Match a back reference, possibly repeatedly. Look past the end of the
- item to see if there is repeat information following. The code is similar
- to that for character classes, but repeated for efficiency. Then obey
- similar code to character type repeats - written out again for speed.
- However, if the referenced string is the empty string, always treat
- it as matched, any number of times (otherwise there could be infinite
- loops). */
- case OP_REF:
- {
- offset = GET2(ecode, 1) << 1; /* Doubled ref number */
- ecode += 3; /* Advance past item */
- /* If the reference is unset, set the length to be longer than the amount
- of subject left; this ensures that every attempt at a match fails. We
- can't just fail here, because of the possibility of quantifiers with zero
- minima. */
- length = (offset >= offset_top || md->offset_vector[offset] < 0)?
- md->end_subject - eptr + 1 :
- md->offset_vector[offset+1] - md->offset_vector[offset];
- /* Set up for repetition, or handle the non-repeated case */
- switch (*ecode)
- {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRPLUS:
- case OP_CRMINPLUS:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- c = *ecode++ - OP_CRSTAR;
- minimize = (c & 1) != 0;
- min = rep_min[c]; /* Pick up values from tables; */
- max = rep_max[c]; /* zero for max => infinity */
- if (max == 0) max = INT_MAX;
- break;
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- minimize = (*ecode == OP_CRMINRANGE);
- min = GET2(ecode, 1);
- max = GET2(ecode, 3);
- if (max == 0) max = INT_MAX;
- ecode += 5;
- break;
- default: /* No repeat follows */
- if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
- eptr += length;
- continue; /* With the main loop */
- }
- /* If the length of the reference is zero, just continue with the
- main loop. */
- if (length == 0) continue;
- /* First, ensure the minimum number of matches are present. We get back
- the length of the reference string explicitly rather than passing the
- address of eptr, so that eptr can be a register variable. */
- for (i = 1; i <= min; i++)
- {
- if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
- eptr += length;
- }
- /* If min = max, continue at the same level without recursion.
- They are not both allowed to be zero. */
- if (min == max) continue;
- /* If minimizing, keep trying and advancing the pointer */
- if (minimize)
- {
- for (fi = min;; fi++)
- {
- RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || !match_ref(offset, eptr, length, md, ims))
- RRETURN(MATCH_NOMATCH);
- eptr += length;
- }
- /* Control never gets here */
- }
- /* If maximizing, find the longest string and work backwards */
- else
- {
- pp = eptr;
- for (i = min; i < max; i++)
- {
- if (!match_ref(offset, eptr, length, md, ims)) break;
- eptr += length;
- }
- while (eptr >= pp)
- {
- RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- eptr -= length;
- }
- RRETURN(MATCH_NOMATCH);
- }
- }
- /* Control never gets here */
- /* Match a bit-mapped character class, possibly repeatedly. This op code is
- used when all the characters in the class have values in the range 0-255,
- and either the matching is caseful, or the characters are in the range
- 0-127 when UTF-8 processing is enabled. The only difference between
- OP_CLASS and OP_NCLASS occurs when a data character outside the range is
- encountered.
- First, look past the end of the item to see if there is repeat information
- following. Then obey similar code to character type repeats - written out
- again for speed. */
- case OP_NCLASS:
- case OP_CLASS:
- {
- data = ecode + 1; /* Save for matching */
- ecode += 33; /* Advance past the item */
- switch (*ecode)
- {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRPLUS:
- case OP_CRMINPLUS:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- c = *ecode++ - OP_CRSTAR;
- minimize = (c & 1) != 0;
- min = rep_min[c]; /* Pick up values from tables; */
- max = rep_max[c]; /* zero for max => infinity */
- if (max ==…
Large files files are truncated, but you can click here to view the full file