/tags/harbour-2.0.0/external/pcre/pcreexec.c
C | 2073 lines | 1329 code | 316 blank | 428 comment | 390 complexity | d6aa033f5405db69dcebe3bf35ce5e24 MD5 | raw file
Possible License(s): AGPL-1.0, BSD-3-Clause, CC-BY-SA-3.0, LGPL-3.0, GPL-2.0, LGPL-2.0, LGPL-2.1
Large files files are truncated, but you can click here to view the full file
- /*************************************************
- * Perl-Compatible Regular Expressions *
- *************************************************/
- /* PCRE is a library of functions to support regular expressions whose syntax
- and semantics are as close as possible to those of the Perl 5 language.
- Written by Philip Hazel
- Copyright (c) 1997-2009 University of Cambridge
- -----------------------------------------------------------------------------
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
- -----------------------------------------------------------------------------
- */
- /* This module contains pcre_exec(), the externally visible function that does
- pattern matching using an NFA algorithm, trying to mimic Perl as closely as
- possible. There are also some static supporting functions. */
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #define NLBLOCK md /* Block containing newline information */
- #define PSSTART start_subject /* Field containing processed string start */
- #define PSEND end_subject /* Field containing processed string end */
- #include "pcreinal.h"
- /* Undefine some potentially clashing cpp symbols */
- #undef min
- #undef max
- /* Flag bits for the match() function */
- #define match_condassert 0x01 /* Called to check a condition assertion */
- #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
- /* Non-error returns from the match() function. Error returns are externally
- defined PCRE_ERROR_xxx codes, which are all negative. */
- #define MATCH_MATCH 1
- #define MATCH_NOMATCH 0
- /* Special internal returns from the match() function. Make them sufficiently
- negative to avoid the external error codes. */
- #define MATCH_COMMIT (-999)
- #define MATCH_PRUNE (-998)
- #define MATCH_SKIP (-997)
- #define MATCH_THEN (-996)
- /* Maximum number of ints of offset to save on the stack for recursive calls.
- If the offset vector is bigger, malloc is used. This should be a multiple of 3,
- because the offset vector is always a multiple of 3 long. */
- #define REC_STACK_SAVE_MAX 30
- /* Min and max values for the common repeats; for the maxima, 0 => infinity */
- static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
- static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
- #ifdef DEBUG
- /*************************************************
- * Debugging function to print chars *
- *************************************************/
- /* Print a sequence of chars in printable format, stopping at the end of the
- subject if the requested.
- Arguments:
- p points to characters
- length number to print
- is_subject TRUE if printing from within md->start_subject
- md pointer to matching data block, if is_subject is TRUE
- Returns: nothing
- */
- static void
- pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
- {
- unsigned int c;
- if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
- while (length-- > 0)
- if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
- }
- #endif
- /*************************************************
- * Match a back-reference *
- *************************************************/
- /* If a back reference hasn't been set, the length that is passed is greater
- than the number of characters left in the string, so the match fails.
- Arguments:
- offset index into the offset vector
- eptr points into the subject
- length length to be matched
- md points to match data block
- ims the ims flags
- Returns: TRUE if matched
- */
- static BOOL
- match_ref(int offset, register USPTR eptr, int length, match_data *md,
- unsigned long int ims)
- {
- USPTR p = md->start_subject + md->offset_vector[offset];
- #ifdef DEBUG
- if (eptr >= md->end_subject)
- printf("matching subject <null>");
- else
- {
- printf("matching subject ");
- pchars(eptr, length, TRUE, md);
- }
- printf(" against backref ");
- pchars(p, length, FALSE, md);
- printf("\n");
- #endif
- /* Always fail if not enough characters left */
- if (length > md->end_subject - eptr) return FALSE;
- /* Separate the caseless case for speed. In UTF-8 mode we can only do this
- properly if Unicode properties are supported. Otherwise, we can check only
- ASCII characters. */
- if ((ims & PCRE_CASELESS) != 0)
- {
- #ifdef SUPPORT_UTF8
- #ifdef SUPPORT_UCP
- if (md->utf8)
- {
- USPTR endptr = eptr + length;
- while (eptr < endptr)
- {
- int c, d;
- GETCHARINC(c, eptr);
- GETCHARINC(d, p);
- if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
- }
- }
- else
- #endif
- #endif
- /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
- is no UCP support. */
- while (length-- > 0)
- { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
- }
- /* In the caseful case, we can just compare the bytes, whether or not we
- are in UTF-8 mode. */
- else
- { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
- return TRUE;
- }
- /***************************************************************************
- ****************************************************************************
- RECURSION IN THE match() FUNCTION
- The match() function is highly recursive, though not every recursive call
- increases the recursive depth. Nevertheless, some regular expressions can cause
- it to recurse to a great depth. I was writing for Unix, so I just let it call
- itself recursively. This uses the stack for saving everything that has to be
- saved for a recursive call. On Unix, the stack can be large, and this works
- fine.
- It turns out that on some non-Unix-like systems there are problems with
- programs that use a lot of stack. (This despite the fact that every last chip
- has oodles of memory these days, and techniques for extending the stack have
- been known for decades.) So....
- There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
- calls by keeping local variables that need to be preserved in blocks of memory
- obtained from malloc() instead instead of on the stack. Macros are used to
- achieve this so that the actual code doesn't look very different to what it
- always used to.
- The original heap-recursive code used longjmp(). However, it seems that this
- can be very slow on some operating systems. Following a suggestion from Stan
- Switzer, the use of longjmp() has been abolished, at the cost of having to
- provide a unique number for each call to RMATCH. There is no way of generating
- a sequence of numbers at compile time in C. I have given them names, to make
- them stand out more clearly.
- Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
- FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
- tests. Furthermore, not using longjmp() means that local dynamic variables
- don't have indeterminate values; this has meant that the frame size can be
- reduced because the result can be "passed back" by straight setting of the
- variable instead of being passed in the frame.
- ****************************************************************************
- ***************************************************************************/
- /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
- below must be updated in sync. */
- enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
- RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
- RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
- RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
- RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
- RM51, RM52, RM53, RM54 };
- /* These versions of the macros use the stack, as normal. There are debugging
- versions and production versions. Note that the "rw" argument of RMATCH isn't
- actuall used in this definition. */
- #ifndef NO_RECURSE
- #define REGISTER register
- #ifdef DEBUG
- #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
- { \
- printf("match() called in line %d\n", __LINE__); \
- rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
- printf("to line %d\n", __LINE__); \
- }
- #define RRETURN(ra) \
- { \
- printf("match() returned %d from line %d ", ra, __LINE__); \
- return ra; \
- }
- #else
- #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
- rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
- #define RRETURN(ra) return ra
- #endif
- #else
- /* These versions of the macros manage a private stack on the heap. Note that
- the "rd" argument of RMATCH isn't actually used in this definition. It's the md
- argument of match(), which never changes. */
- #define REGISTER
- #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
- {\
- heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
- frame->Xwhere = rw; \
- newframe->Xeptr = ra;\
- newframe->Xecode = rb;\
- newframe->Xmstart = mstart;\
- newframe->Xoffset_top = rc;\
- newframe->Xims = re;\
- newframe->Xeptrb = rf;\
- newframe->Xflags = rg;\
- newframe->Xrdepth = frame->Xrdepth + 1;\
- newframe->Xprevframe = frame;\
- frame = newframe;\
- DPRINTF(("restarting from line %d\n", __LINE__));\
- goto HEAP_RECURSE;\
- L_##rw:\
- DPRINTF(("jumped back to line %d\n", __LINE__));\
- }
- #define RRETURN(ra)\
- {\
- heapframe *newframe = frame;\
- frame = newframe->Xprevframe;\
- (pcre_stack_free)(newframe);\
- if (frame != NULL)\
- {\
- rrc = ra;\
- goto HEAP_RETURN;\
- }\
- return ra;\
- }
- /* Structure for remembering the local variables in a private frame */
- typedef struct heapframe {
- struct heapframe *Xprevframe;
- /* Function arguments that may change */
- USPTR Xeptr;
- const uschar *Xecode;
- USPTR Xmstart;
- int Xoffset_top;
- long int Xims;
- eptrblock *Xeptrb;
- int Xflags;
- unsigned int Xrdepth;
- /* Function local variables */
- USPTR Xcallpat;
- #ifdef SUPPORT_UTF8
- USPTR Xcharptr;
- #endif
- USPTR Xdata;
- USPTR Xnext;
- USPTR Xpp;
- USPTR Xprev;
- USPTR Xsaved_eptr;
- recursion_info Xnew_recursive;
- BOOL Xcur_is_word;
- BOOL Xcondition;
- BOOL Xprev_is_word;
- unsigned long int Xoriginal_ims;
- #ifdef SUPPORT_UCP
- int Xprop_type;
- int Xprop_value;
- int Xprop_fail_result;
- int Xprop_category;
- int Xprop_chartype;
- int Xprop_script;
- int Xoclength;
- uschar Xocchars[8];
- #endif
- int Xcodelink;
- int Xctype;
- unsigned int Xfc;
- int Xfi;
- int Xlength;
- int Xmax;
- int Xmin;
- int Xnumber;
- int Xoffset;
- int Xop;
- int Xsave_capture_last;
- int Xsave_offset1, Xsave_offset2, Xsave_offset3;
- int Xstacksave[REC_STACK_SAVE_MAX];
- eptrblock Xnewptrb;
- /* Where to jump back to */
- int Xwhere;
- } heapframe;
- #endif
- /***************************************************************************
- ***************************************************************************/
- /*************************************************
- * Match from current position *
- *************************************************/
- /* This function is called recursively in many circumstances. Whenever it
- returns a negative (error) response, the outer incarnation must also return the
- same response. */
- /* These macros pack up tests that are used for partial matching, and which
- appears several times in the code. We set the "hit end" flag if the pointer is
- at the end of the subject and also past the start of the subject (i.e.
- something has been matched). For hard partial matching, we then return
- immediately. The second one is used when we already know we are past the end of
- the subject. */
- #define CHECK_PARTIAL()\
- if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
- {\
- md->hitend = TRUE;\
- if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
- }
- #define SCHECK_PARTIAL()\
- if (md->partial != 0 && eptr > mstart)\
- {\
- md->hitend = TRUE;\
- if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
- }
- /* Performance note: It might be tempting to extract commonly used fields from
- the md structure (e.g. utf8, end_subject) into individual variables to improve
- performance. Tests using gcc on a SPARC disproved this; in the first case, it
- made performance worse.
- Arguments:
- eptr pointer to current character in subject
- ecode pointer to current position in compiled code
- mstart pointer to the current match start position (can be modified
- by encountering \K)
- offset_top current top pointer
- md pointer to "static" info for the match
- ims current /i, /m, and /s options
- eptrb pointer to chain of blocks containing eptr at start of
- brackets - for testing for empty matches
- flags can contain
- match_condassert - this is an assertion condition
- match_cbegroup - this is the start of an unlimited repeat
- group that can match an empty string
- rdepth the recursion depth
- Returns: MATCH_MATCH if matched ) these values are >= 0
- MATCH_NOMATCH if failed to match )
- a negative PCRE_ERROR_xxx value if aborted by an error condition
- (e.g. stopped by repeated call or recursion limit)
- */
- static int
- match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
- int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
- int flags, unsigned int rdepth)
- {
- /* These variables do not need to be preserved over recursion in this function,
- so they can be ordinary variables in all cases. Mark some of them with
- "register" because they are used a lot in loops. */
- register int rrc; /* Returns from recursive calls */
- register int i; /* Used for loops not involving calls to RMATCH() */
- register unsigned int c; /* Character values not kept over RMATCH() calls */
- register BOOL utf8; /* Local copy of UTF-8 flag for speed */
- BOOL minimize, possessive; /* Quantifier options */
- int condcode;
- /* When recursion is not being used, all "local" variables that have to be
- preserved over calls to RMATCH() are part of a "frame" which is obtained from
- heap storage. Set up the top-level frame here; others are obtained from the
- heap whenever RMATCH() does a "recursion". See the macro definitions above. */
- #ifdef NO_RECURSE
- heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
- frame->Xprevframe = NULL; /* Marks the top level */
- /* Copy in the original argument variables */
- frame->Xeptr = eptr;
- frame->Xecode = ecode;
- frame->Xmstart = mstart;
- frame->Xoffset_top = offset_top;
- frame->Xims = ims;
- frame->Xeptrb = eptrb;
- frame->Xflags = flags;
- frame->Xrdepth = rdepth;
- /* This is where control jumps back to to effect "recursion" */
- HEAP_RECURSE:
- /* Macros make the argument variables come from the current frame */
- #define eptr frame->Xeptr
- #define ecode frame->Xecode
- #define mstart frame->Xmstart
- #define offset_top frame->Xoffset_top
- #define ims frame->Xims
- #define eptrb frame->Xeptrb
- #define flags frame->Xflags
- #define rdepth frame->Xrdepth
- /* Ditto for the local variables */
- #ifdef SUPPORT_UTF8
- #define charptr frame->Xcharptr
- #endif
- #define callpat frame->Xcallpat
- #define codelink frame->Xcodelink
- #define data frame->Xdata
- #define next frame->Xnext
- #define pp frame->Xpp
- #define prev frame->Xprev
- #define saved_eptr frame->Xsaved_eptr
- #define new_recursive frame->Xnew_recursive
- #define cur_is_word frame->Xcur_is_word
- #define condition frame->Xcondition
- #define prev_is_word frame->Xprev_is_word
- #define original_ims frame->Xoriginal_ims
- #ifdef SUPPORT_UCP
- #define prop_type frame->Xprop_type
- #define prop_value frame->Xprop_value
- #define prop_fail_result frame->Xprop_fail_result
- #define prop_category frame->Xprop_category
- #define prop_chartype frame->Xprop_chartype
- #define prop_script frame->Xprop_script
- #define oclength frame->Xoclength
- #define occhars frame->Xocchars
- #endif
- #define ctype frame->Xctype
- #define fc frame->Xfc
- #define fi frame->Xfi
- #define length frame->Xlength
- #define max frame->Xmax
- #define min frame->Xmin
- #define number frame->Xnumber
- #define offset frame->Xoffset
- #define op frame->Xop
- #define save_capture_last frame->Xsave_capture_last
- #define save_offset1 frame->Xsave_offset1
- #define save_offset2 frame->Xsave_offset2
- #define save_offset3 frame->Xsave_offset3
- #define stacksave frame->Xstacksave
- #define newptrb frame->Xnewptrb
- /* When recursion is being used, local variables are allocated on the stack and
- get preserved during recursion in the normal way. In this environment, fi and
- i, and fc and c, can be the same variables. */
- #else /* NO_RECURSE not defined */
- #define fi i
- #define fc c
- #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
- const uschar *charptr; /* in small blocks of the code. My normal */
- #endif /* style of coding would have declared */
- const uschar *callpat; /* them within each of those blocks. */
- const uschar *data; /* However, in order to accommodate the */
- const uschar *next; /* version of this code that uses an */
- USPTR pp; /* external "stack" implemented on the */
- const uschar *prev; /* heap, it is easier to declare them all */
- USPTR saved_eptr; /* here, so the declarations can be cut */
- /* out in a block. The only declarations */
- recursion_info new_recursive; /* within blocks below are for variables */
- /* that do not have to be preserved over */
- BOOL cur_is_word; /* a recursive call to RMATCH(). */
- BOOL condition;
- BOOL prev_is_word;
- unsigned long int original_ims;
- #ifdef SUPPORT_UCP
- int prop_type;
- int prop_value;
- int prop_fail_result;
- int prop_category;
- int prop_chartype;
- int prop_script;
- int oclength;
- uschar occhars[8];
- #endif
- int codelink;
- int ctype;
- int length;
- int max;
- int min;
- int number;
- int offset;
- int op;
- int save_capture_last;
- int save_offset1, save_offset2, save_offset3;
- int stacksave[REC_STACK_SAVE_MAX];
- eptrblock newptrb;
- #endif /* NO_RECURSE */
- /* These statements are here to stop the compiler complaining about unitialized
- variables. */
- #ifdef SUPPORT_UCP
- prop_value = 0;
- prop_fail_result = 0;
- #endif
- /* This label is used for tail recursion, which is used in a few cases even
- when NO_RECURSE is not defined, in order to reduce the amount of stack that is
- used. Thanks to Ian Taylor for noticing this possibility and sending the
- original patch. */
- TAIL_RECURSE:
- /* OK, now we can get on with the real code of the function. Recursive calls
- are specified by the macro RMATCH and RRETURN is used to return. When
- NO_RECURSE is *not* defined, these just turn into a recursive call to match()
- and a "return", respectively (possibly with some debugging if DEBUG is
- defined). However, RMATCH isn't like a function call because it's quite a
- complicated macro. It has to be used in one particular way. This shouldn't,
- however, impact performance when true recursion is being used. */
- #ifdef SUPPORT_UTF8
- utf8 = md->utf8; /* Local copy of the flag */
- #else
- utf8 = FALSE;
- #endif
- /* First check that we haven't called match() too many times, or that we
- haven't exceeded the recursive call limit. */
- if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
- if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
- original_ims = ims; /* Save for resetting on ')' */
- /* At the start of a group with an unlimited repeat that may match an empty
- string, the match_cbegroup flag is set. When this is the case, add the current
- subject pointer to the chain of such remembered pointers, to be checked when we
- hit the closing ket, in order to break infinite loops that match no characters.
- When match() is called in other circumstances, don't add to the chain. The
- match_cbegroup flag must NOT be used with tail recursion, because the memory
- block that is used is on the stack, so a new one may be required for each
- match(). */
- if ((flags & match_cbegroup) != 0)
- {
- newptrb.epb_saved_eptr = eptr;
- newptrb.epb_prev = eptrb;
- eptrb = &newptrb;
- }
- /* Now start processing the opcodes. */
- for (;;)
- {
- minimize = possessive = FALSE;
- op = *ecode;
- switch(op)
- {
- case OP_FAIL:
- RRETURN(MATCH_NOMATCH);
- case OP_PRUNE:
- RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
- ims, eptrb, flags, RM51);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RRETURN(MATCH_PRUNE);
- case OP_COMMIT:
- RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
- ims, eptrb, flags, RM52);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RRETURN(MATCH_COMMIT);
- case OP_SKIP:
- RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
- ims, eptrb, flags, RM53);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- md->start_match_ptr = eptr; /* Pass back current position */
- RRETURN(MATCH_SKIP);
- case OP_THEN:
- RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
- ims, eptrb, flags, RM54);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RRETURN(MATCH_THEN);
- /* Handle a capturing bracket. If there is space in the offset vector, save
- the current subject position in the working slot at the top of the vector.
- We mustn't change the current values of the data slot, because they may be
- set from a previous iteration of this group, and be referred to by a
- reference inside the group.
- If the bracket fails to match, we need to restore this value and also the
- values of the final offsets, in case they were set by a previous iteration
- of the same bracket.
- If there isn't enough space in the offset vector, treat this as if it were
- a non-capturing bracket. Don't worry about setting the flag for the error
- case here; that is handled in the code for KET. */
- case OP_CBRA:
- case OP_SCBRA:
- number = GET2(ecode, 1+LINK_SIZE);
- offset = number << 1;
- #ifdef DEBUG
- printf("start bracket %d\n", number);
- printf("subject=");
- pchars(eptr, 16, TRUE, md);
- printf("\n");
- #endif
- if (offset < md->offset_max)
- {
- save_offset1 = md->offset_vector[offset];
- save_offset2 = md->offset_vector[offset+1];
- save_offset3 = md->offset_vector[md->offset_end - number];
- save_capture_last = md->capture_last;
- DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
- md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
- flags = (op == OP_SCBRA)? match_cbegroup : 0;
- do
- {
- RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
- ims, eptrb, flags, RM1);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
- md->capture_last = save_capture_last;
- ecode += GET(ecode, 1);
- }
- while (*ecode == OP_ALT);
- DPRINTF(("bracket %d failed\n", number));
- md->offset_vector[offset] = save_offset1;
- md->offset_vector[offset+1] = save_offset2;
- md->offset_vector[md->offset_end - number] = save_offset3;
- RRETURN(MATCH_NOMATCH);
- }
- /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
- as a non-capturing bracket. */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- DPRINTF(("insufficient capture room: treat as non-capturing\n"));
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* Non-capturing bracket. Loop for all the alternatives. When we get to the
- final alternative within the brackets, we would return the result of a
- recursive call to match() whatever happened. We can reduce stack usage by
- turning this into a tail recursion, except in the case when match_cbegroup
- is set.*/
- case OP_BRA:
- case OP_SBRA:
- DPRINTF(("start non-capturing bracket\n"));
- flags = (op >= OP_SBRA)? match_cbegroup : 0;
- for (;;)
- {
- if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
- {
- if (flags == 0) /* Not a possibly empty group */
- {
- ecode += _pcre_OP_lengths[*ecode];
- DPRINTF(("bracket 0 tail recursion\n"));
- goto TAIL_RECURSE;
- }
- /* Possibly empty group; can't use tail recursion. */
- RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
- eptrb, flags, RM48);
- RRETURN(rrc);
- }
- /* For non-final alternatives, continue the loop for a NOMATCH result;
- otherwise return. */
- RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
- eptrb, flags, RM2);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
- ecode += GET(ecode, 1);
- }
- /* Control never reaches here. */
- /* Conditional group: compilation checked that there are no more than
- two branches. If the condition is false, skipping the first branch takes us
- past the end if there is only one branch, but that's OK because that is
- exactly what going to the ket would do. As there is only one branch to be
- obeyed, we can use tail recursion to avoid using another stack frame. */
- case OP_COND:
- case OP_SCOND:
- codelink= GET(ecode, 1);
- /* Because of the way auto-callout works during compile, a callout item is
- inserted between OP_COND and an assertion condition. */
- if (ecode[LINK_SIZE+1] == OP_CALLOUT)
- {
- if (pcre_callout != NULL)
- {
- pcre_callout_block cb;
- cb.version = 1; /* Version 1 of the callout block */
- cb.callout_number = ecode[LINK_SIZE+2];
- cb.offset_vector = md->offset_vector;
- cb.subject = (PCRE_SPTR)md->start_subject;
- cb.subject_length = md->end_subject - md->start_subject;
- cb.start_match = mstart - md->start_subject;
- cb.current_position = eptr - md->start_subject;
- cb.pattern_position = GET(ecode, LINK_SIZE + 3);
- cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
- cb.capture_top = offset_top/2;
- cb.capture_last = md->capture_last;
- cb.callout_data = md->callout_data;
- if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
- if (rrc < 0) RRETURN(rrc);
- }
- ecode += _pcre_OP_lengths[OP_CALLOUT];
- }
- condcode = ecode[LINK_SIZE+1];
- /* Now see what the actual condition is */
- if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
- {
- if (md->recursive == NULL) /* Not recursing => FALSE */
- {
- condition = FALSE;
- ecode += GET(ecode, 1);
- }
- else
- {
- int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
- condition = (recno == RREF_ANY || recno == md->recursive->group_num);
- /* If the test is for recursion into a specific subpattern, and it is
- false, but the test was set up by name, scan the table to see if the
- name refers to any other numbers, and test them. The condition is true
- if any one is set. */
- if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
- {
- uschar *slotA = md->name_table;
- for (i = 0; i < md->name_count; i++)
- {
- if (GET2(slotA, 0) == recno) break;
- slotA += md->name_entry_size;
- }
- /* Found a name for the number - there can be only one; duplicate
- names for different numbers are allowed, but not vice versa. First
- scan down for duplicates. */
- if (i < md->name_count)
- {
- uschar *slotB = slotA;
- while (slotB > md->name_table)
- {
- slotB -= md->name_entry_size;
- if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
- {
- condition = GET2(slotB, 0) == md->recursive->group_num;
- if (condition) break;
- }
- else break;
- }
- /* Scan up for duplicates */
- if (!condition)
- {
- slotB = slotA;
- for (i++; i < md->name_count; i++)
- {
- slotB += md->name_entry_size;
- if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
- {
- condition = GET2(slotB, 0) == md->recursive->group_num;
- if (condition) break;
- }
- else break;
- }
- }
- }
- }
- /* Chose branch according to the condition */
- ecode += condition? 3 : GET(ecode, 1);
- }
- }
- else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
- {
- offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
- condition = offset < offset_top && md->offset_vector[offset] >= 0;
- /* If the numbered capture is unset, but the reference was by name,
- scan the table to see if the name refers to any other numbers, and test
- them. The condition is true if any one is set. This is tediously similar
- to the code above, but not close enough to try to amalgamate. */
- if (!condition && condcode == OP_NCREF)
- {
- int refno = offset >> 1;
- uschar *slotA = md->name_table;
- for (i = 0; i < md->name_count; i++)
- {
- if (GET2(slotA, 0) == refno) break;
- slotA += md->name_entry_size;
- }
- /* Found a name for the number - there can be only one; duplicate names
- for different numbers are allowed, but not vice versa. First scan down
- for duplicates. */
- if (i < md->name_count)
- {
- uschar *slotB = slotA;
- while (slotB > md->name_table)
- {
- slotB -= md->name_entry_size;
- if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
- {
- offset = GET2(slotB, 0) << 1;
- condition = offset < offset_top &&
- md->offset_vector[offset] >= 0;
- if (condition) break;
- }
- else break;
- }
- /* Scan up for duplicates */
- if (!condition)
- {
- slotB = slotA;
- for (i++; i < md->name_count; i++)
- {
- slotB += md->name_entry_size;
- if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
- {
- offset = GET2(slotB, 0) << 1;
- condition = offset < offset_top &&
- md->offset_vector[offset] >= 0;
- if (condition) break;
- }
- else break;
- }
- }
- }
- }
- /* Chose branch according to the condition */
- ecode += condition? 3 : GET(ecode, 1);
- }
- else if (condcode == OP_DEF) /* DEFINE - always false */
- {
- condition = FALSE;
- ecode += GET(ecode, 1);
- }
- /* The condition is an assertion. Call match() to evaluate it - setting
- the final argument match_condassert causes it to stop at the end of an
- assertion. */
- else
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
- match_condassert, RM3);
- if (rrc == MATCH_MATCH)
- {
- condition = TRUE;
- ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
- while (*ecode == OP_ALT) ecode += GET(ecode, 1);
- }
- else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
- {
- RRETURN(rrc); /* Need braces because of following else */
- }
- else
- {
- condition = FALSE;
- ecode += codelink;
- }
- }
- /* We are now at the branch that is to be obeyed. As there is only one,
- we can use tail recursion to avoid using another stack frame, except when
- match_cbegroup is required for an unlimited repeat of a possibly empty
- group. If the second alternative doesn't exist, we can just plough on. */
- if (condition || *ecode == OP_ALT)
- {
- ecode += 1 + LINK_SIZE;
- if (op == OP_SCOND) /* Possibly empty group */
- {
- RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
- RRETURN(rrc);
- }
- else /* Group must match something */
- {
- flags = 0;
- goto TAIL_RECURSE;
- }
- }
- else /* Condition false & no alternative */
- {
- ecode += 1 + LINK_SIZE;
- }
- break;
- /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
- to close any currently open capturing brackets. */
- case OP_CLOSE:
- number = GET2(ecode, 1);
- offset = number << 1;
- #ifdef DEBUG
- printf("end bracket %d at *ACCEPT", number);
- printf("\n");
- #endif
- md->capture_last = number;
- if (offset >= md->offset_max) md->offset_overflow = TRUE; else
- {
- md->offset_vector[offset] =
- md->offset_vector[md->offset_end - number];
- md->offset_vector[offset+1] = eptr - md->start_subject;
- if (offset_top <= offset) offset_top = offset + 2;
- }
- ecode += 3;
- break;
- /* End of the pattern, either real or forced. If we are in a top-level
- recursion, we should restore the offsets appropriately and continue from
- after the call. */
- case OP_ACCEPT:
- case OP_END:
- if (md->recursive != NULL && md->recursive->group_num == 0)
- {
- recursion_info *rec = md->recursive;
- DPRINTF(("End of pattern in a (?0) recursion\n"));
- md->recursive = rec->prevrec;
- memmove(md->offset_vector, rec->offset_save,
- rec->saved_max * sizeof(int));
- offset_top = rec->save_offset_top;
- mstart = rec->save_start;
- ims = original_ims;
- ecode = rec->after_call;
- break;
- }
- /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
- set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
- the subject. In both cases, backtracking will then try other alternatives,
- if any. */
- if (eptr == mstart &&
- (md->notempty ||
- (md->notempty_atstart &&
- mstart == md->start_subject + md->start_offset)))
- RRETURN(MATCH_NOMATCH);
- /* Otherwise, we have a match. */
- md->end_match_ptr = eptr; /* Record where we ended */
- md->end_offset_top = offset_top; /* and how many extracts were taken */
- md->start_match_ptr = mstart; /* and the start (\K can modify) */
- RRETURN(MATCH_MATCH);
- /* Change option settings */
- case OP_OPT:
- ims = ecode[1];
- ecode += 2;
- DPRINTF(("ims set to %02lx\n", ims));
- break;
- /* Assertion brackets. Check the alternative branches in turn - the
- matching won't pass the KET for an assertion. If any one branch matches,
- the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
- start of each branch to move the current point backwards, so the code at
- this level is identical to the lookahead case. */
- case OP_ASSERT:
- case OP_ASSERTBACK:
- do
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
- RM4);
- if (rrc == MATCH_MATCH) break;
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
- ecode += GET(ecode, 1);
- }
- while (*ecode == OP_ALT);
- if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
- /* If checking an assertion for a condition, return MATCH_MATCH. */
- if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
- /* Continue from after the assertion, updating the offsets high water
- mark, since extracts may have been taken during the assertion. */
- do ecode += GET(ecode,1); while (*ecode == OP_ALT);
- ecode += 1 + LINK_SIZE;
- offset_top = md->end_offset_top;
- continue;
- /* Negative assertion: all branches must fail to match */
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK_NOT:
- do
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
- RM5);
- if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
- ecode += GET(ecode,1);
- }
- while (*ecode == OP_ALT);
- if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
- ecode += 1 + LINK_SIZE;
- continue;
- /* Move the subject pointer back. This occurs only at the start of
- each branch of a lookbehind assertion. If we are too close to the start to
- move back, this match function fails. When working with UTF-8 we move
- back a number of characters, not bytes. */
- case OP_REVERSE:
- #ifdef SUPPORT_UTF8
- if (utf8)
- {
- i = GET(ecode, 1);
- while (i-- > 0)
- {
- eptr--;
- if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
- BACKCHAR(eptr);
- }
- }
- else
- #endif
- /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
- {
- eptr -= GET(ecode, 1);
- if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
- }
- /* Save the earliest consulted character, then skip to next op code */
- if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
- ecode += 1 + LINK_SIZE;
- break;
- /* The callout item calls an external function, if one is provided, passing
- details of the match so far. This is mainly for debugging, though the
- function is able to force a failure. */
- case OP_CALLOUT:
- if (pcre_callout != NULL)
- {
- pcre_callout_block cb;
- cb.version = 1; /* Version 1 of the callout block */
- cb.callout_number = ecode[1];
- cb.offset_vector = md->offset_vector;
- cb.subject = (PCRE_SPTR)md->start_subject;
- cb.subject_length = md->end_subject - md->start_subject;
- cb.start_match = mstart - md->start_subject;
- cb.current_position = eptr - md->start_subject;
- cb.pattern_position = GET(ecode, 2);
- cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
- cb.capture_top = offset_top/2;
- cb.capture_last = md->capture_last;
- cb.callout_data = md->callout_data;
- if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
- if (rrc < 0) RRETURN(rrc);
- }
- ecode += 2 + 2*LINK_SIZE;
- break;
- /* Recursion either matches the current regex, or some subexpression. The
- offset data is the offset to the starting bracket from the start of the
- whole pattern. (This is so that it works from duplicated subpatterns.)
- If there are any capturing brackets started but not finished, we have to
- save their starting points and reinstate them after the recursion. However,
- we don't know how many such there are (offset_top records the completed
- total) so we just have to save all the potential data. There may be up to
- 65535 such values, which is too large to put on the stack, but using malloc
- for small numbers seems expensive. As a compromise, the stack is used when
- there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
- is used. A problem is what to do if the malloc fails ... there is no way of
- returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
- values on the stack, and accept that the rest may be wrong.
- There are also other values that have to be saved. We use a chained
- sequence of blocks that actually live on the stack. Thanks to Robin Houston
- for the original version of this logic. */
- case OP_RECURSE:
- {
- callpat = md->start_code + GET(ecode, 1);
- new_recursive.group_num = (callpat == md->start_code)? 0 :
- GET2(callpat, 1 + LINK_SIZE);
- /* Add to "recursing stack" */
- new_recursive.prevrec = md->recursive;
- md->recursive = &new_recursive;
- /* Find where to continue from afterwards */
- ecode += 1 + LINK_SIZE;
- new_recursive.after_call = ecode;
- /* Now save the offset data. */
- new_recursive.saved_max = md->offset_end;
- if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
- new_recursive.offset_save = stacksave;
- else
- {
- new_recursive.offset_save =
- (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
- if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
- }
- memcpy(new_recursive.offset_save, md->offset_vector,
- new_recursive.saved_max * sizeof(int));
- new_recursive.save_start = mstart;
- new_recursive.save_offset_top = offset_top;
- mstart = eptr;
- /* OK, now we can do the recursion. For each top-level alternative we
- restore the offset and recursion data. */
- DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
- flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
- do
- {
- RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
- md, ims, eptrb, flags, RM6);
- if (rrc == MATCH_MATCH)
- {
- DPRINTF(("Recursion matched\n"));
- md->recursive = new_recursive.prevrec;
- if (new_recursive.offset_save != stacksave)
- (pcre_free)(new_recursive.offset_save);
- RRETURN(MATCH_MATCH);
- }
- else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
- {
- DPRINTF(("Recursion gave error %d\n", rrc));
- if (new_recursive.offset_save != stacksave)
- (pcre_free)(new_recursive.offset_save);
- RRETURN(rrc);
- }
- md->recursive = &new_recursive;
- memcpy(md->offset_vector, new_recursive.offset_save,
- new_recursive.saved_max * sizeof(int));
- callpat += GET(callpat, 1);
- }
- while (*callpat == OP_ALT);
- DPRINTF(("Recursion didn't match\n"));
- md->recursive = new_recursive.prevrec;
- if (new_recursive.offset_save != stacksave)
- (pcre_free)(new_recursive.offset_save);
- RRETURN(MATCH_NOMATCH);
- }
- /* Control never reaches here */
- /* "Once" brackets are like assertion brackets except that after a match,
- the point in the subject string is not moved back. Thus there can never be
- a move back into the brackets. Friedl calls these "atomic" subpatterns.
- Check the alternative branches in turn - the matching won't pass the KET
- for this kind of subpattern. If any one branch matches, we carry on as at
- the end of a normal bracket, leaving the subject pointer. */
- case OP_ONCE:
- prev = ecode;
- saved_eptr = eptr;
- do
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
- if (rrc == MATCH_MATCH) break;
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
- ecode += GET(ecode,1);
- }
- while (*ecode == OP_ALT);
- /* If hit the end of the group (which could be repeated), fail */
- if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
- /* Continue as from after the assertion, updating the offsets high water
- mark, since extracts may have been taken. */
- do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
- offset_top = md->end_offset_top;
- eptr = md->end_match_ptr;
- /* For a non-repeating ket, just continue at this level. This also
- happens for a repeating ket if no characters were matched in the group.
- This is the forcible breaking of infinite loops as implemented in Perl
- 5.005. If there is an options reset, it will get obeyed in the normal
- course of events. */
- if (*ecode == OP_KET || eptr == saved_eptr)
- {
- ecode += 1+LINK_SIZE;
- break;
- }
- /* The repeating kets try the rest of the pattern or restart from the
- preceding bracket, in the appropriate order. The second "call" of match()
- uses tail recursion, to avoid using another stack frame. We need to reset
- any options that changed within the bracket before re-running it, so
- check the next opcode. */
- if (ecode[1+LINK_SIZE] == OP_OPT)
- {
- ims = (ims & ~PCRE_IMS) | ecode[4];
- DPRINTF(("ims set to %02lx at group repeat\n", ims));
- }
- if (*ecode == OP_KETRMIN)
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode = prev;
- flags = 0;
- goto TAIL_RECURSE;
- }
- else /* OP_KETRMAX */
- {
- RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += 1 + LINK_SIZE;
- flags = 0;
- goto TAIL_RECURSE;
- }
- /* Control never gets here */
- /* An alternation is the end of a branch; scan along to find the end of the
- bracketed group and go to there. */
- case OP_ALT:
- do ecode += GET(ecode,1); while (*ecode == OP_ALT);
- break;
- /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
- indicating that it may occur zero times. It may repeat infinitely, or not
- at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
- with fixed upper repeat limits are compiled as a number of copies, with the
- optional ones preceded by BRAZERO or BRAMINZERO. */
- case OP_BRAZERO:
- {
- next = ecode+1;
- RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- do next += GET(next,1); while (*next == OP_ALT);
- ecode = next + 1 + LINK_SIZE;
- }
- break;
- case OP_BRAMINZERO:
- {
- next = ecode+1;
- do next += GET(next, 1); while (*next == OP_ALT);
- RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode++;
- }
- break;
- case OP_SKIPZERO:
- {
- next = ecode+1;
- do next += GET(next,1); while (*next == OP_ALT);
- ecode = next + 1 + LINK_SIZE;
- }
- break;
- /* End of a group, repeated or non-repeating. */
- case OP_KET:
- case OP_KETRMIN:
- case OP_KETRMAX:
- prev = ecode - GET(ecode, 1);
- /* If this was a group that remembered the subject start, in order to break
- infinite repeats of empty string matches, retrieve the subject start from
- the chain. Otherwise, set it NULL. */
- if (*prev >= OP_SBRA)
- {
- saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
- eptrb = eptrb->epb_prev; /* Backup to previous group */
- }
- else saved_eptr = NULL;
- /* If we are at the end of an assertion group, stop matching and return
- MATCH_MATCH, but record the current high water mark for use by positive
- assertions. Do this also for the "once" (atomic) groups. */
- if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
- *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
- *prev == OP_ONCE)
- {
- md->end_match_ptr = eptr; /* For ONCE */
- md->end_offset_top = offset_top;
- RRETURN(MATCH_MATCH);
- }
- /* For capturing groups we have to check the group number back at the start
- and if necessary complete handling an extraction by setting the offsets and
- bumping the high water mark. Note that whole-pattern recursion is coded as
- a recurse into group 0, so it won't be picked up here. Instead, we catch it
- when the OP_END is reached. Other recursion is handled here. */
- if (*prev == OP_CBRA || *prev == OP_SCBRA)
- {
- number = GET2(prev, 1+LINK_SIZE);
- offset = number << 1;
- #ifdef DEBUG
- printf("end bracket %d", number);
- printf("\n");
- #endif
- md->capture_last = number;
- if (offset >= md->offset_max) md->offset_overflow = TRUE; else
- {
- md->offset_vector[offset] =
- md->offset_vector[md->offset_end - number];
- md->offset_vector[offset+1] = eptr - md->start_subject;
- if (offset_top <= offset) offset_top = offset + 2;
- }
- /* Handle a recursively called group. Restore the offsets
- appropriately and continue from after the call. */
- if (md->recursive != NULL && md->recursive->group_num == number)
- {
- recursion_info *rec = md->…
Large files files are truncated, but you can click here to view the full file