/trunk/harbour/src/3rd/pcre/pcreexec.c
C | 1904 lines | 1079 code | 321 blank | 504 comment | 396 complexity | d80314669e22e9bcba3ac4a3ebca5239 MD5 | raw file
Possible License(s): AGPL-1.0, BSD-3-Clause, CC-BY-SA-3.0, LGPL-3.0, GPL-2.0, LGPL-2.0, LGPL-2.1
Large files files are truncated, but you can click here to view the full file
- /*************************************************
- * Perl-Compatible Regular Expressions *
- *************************************************/
- /* PCRE is a library of functions to support regular expressions whose syntax
- and semantics are as close as possible to those of the Perl 5 language.
- Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
- -----------------------------------------------------------------------------
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the University of Cambridge nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
- -----------------------------------------------------------------------------
- */
- /* This module contains pcre_exec(), the externally visible function that does
- pattern matching using an NFA algorithm, trying to mimic Perl as closely as
- possible. There are also some static supporting functions. */
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #define NLBLOCK md /* Block containing newline information */
- #define PSSTART start_subject /* Field containing processed string start */
- #define PSEND end_subject /* Field containing processed string end */
- #include "pcreinal.h"
- /* Undefine some potentially clashing cpp symbols */
- #undef min
- #undef max
- /* Values for setting in md->match_function_type to indicate two special types
- of call to match(). We do it this way to save on using another stack variable,
- as stack usage is to be discouraged. */
- #define MATCH_CONDASSERT 1 /* Called to check a condition assertion */
- #define MATCH_CBEGROUP 2 /* Could-be-empty unlimited repeat group */
- /* Non-error returns from the match() function. Error returns are externally
- defined PCRE_ERROR_xxx codes, which are all negative. */
- #define MATCH_MATCH 1
- #define MATCH_NOMATCH 0
- /* Special internal returns from the match() function. Make them sufficiently
- negative to avoid the external error codes. */
- #define MATCH_ACCEPT (-999)
- #define MATCH_COMMIT (-998)
- #define MATCH_KETRPOS (-997)
- #define MATCH_ONCE (-996)
- #define MATCH_PRUNE (-995)
- #define MATCH_SKIP (-994)
- #define MATCH_SKIP_ARG (-993)
- #define MATCH_THEN (-992)
- /* Maximum number of ints of offset to save on the stack for recursive calls.
- If the offset vector is bigger, malloc is used. This should be a multiple of 3,
- because the offset vector is always a multiple of 3 long. */
- #define REC_STACK_SAVE_MAX 30
- /* Min and max values for the common repeats; for the maxima, 0 => infinity */
- static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
- static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
- #ifdef PCRE_DEBUG
- /*************************************************
- * Debugging function to print chars *
- *************************************************/
- /* Print a sequence of chars in printable format, stopping at the end of the
- subject if the requested.
- Arguments:
- p points to characters
- length number to print
- is_subject TRUE if printing from within md->start_subject
- md pointer to matching data block, if is_subject is TRUE
- Returns: nothing
- */
- static void
- pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
- {
- unsigned int c;
- if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
- while (length-- > 0)
- if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
- }
- #endif
- /*************************************************
- * Match a back-reference *
- *************************************************/
- /* Normally, if a back reference hasn't been set, the length that is passed is
- negative, so the match always fails. However, in JavaScript compatibility mode,
- the length passed is zero. Note that in caseless UTF-8 mode, the number of
- subject bytes matched may be different to the number of reference bytes.
- Arguments:
- offset index into the offset vector
- eptr pointer into the subject
- length length of reference to be matched (number of bytes)
- md points to match data block
- caseless TRUE if caseless
- Returns: < 0 if not matched, otherwise the number of subject bytes matched
- */
- static int
- match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
- BOOL caseless)
- {
- PCRE_PUCHAR eptr_start = eptr;
- register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
- #ifdef PCRE_DEBUG
- if (eptr >= md->end_subject)
- printf("matching subject <null>");
- else
- {
- printf("matching subject ");
- pchars(eptr, length, TRUE, md);
- }
- printf(" against backref ");
- pchars(p, length, FALSE, md);
- printf("\n");
- #endif
- /* Always fail if reference not set (and not JavaScript compatible). */
- if (length < 0) return -1;
- /* Separate the caseless case for speed. In UTF-8 mode we can only do this
- properly if Unicode properties are supported. Otherwise, we can check only
- ASCII characters. */
- if (caseless)
- {
- #ifdef SUPPORT_UTF
- #ifdef SUPPORT_UCP
- if (md->utf)
- {
- /* Match characters up to the end of the reference. NOTE: the number of
- bytes matched may differ, because there are some characters whose upper and
- lower case versions code as different numbers of bytes. For example, U+023A
- (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
- a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
- the latter. It is important, therefore, to check the length along the
- reference, not along the subject (earlier code did this wrong). */
- PCRE_PUCHAR endptr = p + length;
- while (p < endptr)
- {
- int c, d;
- if (eptr >= md->end_subject) return -1;
- GETCHARINC(c, eptr);
- GETCHARINC(d, p);
- if (c != d && c != UCD_OTHERCASE(d)) return -1;
- }
- }
- else
- #endif
- #endif
- /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
- is no UCP support. */
- {
- if (eptr + length > md->end_subject) return -1;
- while (length-- > 0)
- {
- if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
- p++;
- eptr++;
- }
- }
- }
- /* In the caseful case, we can just compare the bytes, whether or not we
- are in UTF-8 mode. */
- else
- {
- if (eptr + length > md->end_subject) return -1;
- while (length-- > 0) if (*p++ != *eptr++) return -1;
- }
- return (int)(eptr - eptr_start);
- }
- /***************************************************************************
- ****************************************************************************
- RECURSION IN THE match() FUNCTION
- The match() function is highly recursive, though not every recursive call
- increases the recursive depth. Nevertheless, some regular expressions can cause
- it to recurse to a great depth. I was writing for Unix, so I just let it call
- itself recursively. This uses the stack for saving everything that has to be
- saved for a recursive call. On Unix, the stack can be large, and this works
- fine.
- It turns out that on some non-Unix-like systems there are problems with
- programs that use a lot of stack. (This despite the fact that every last chip
- has oodles of memory these days, and techniques for extending the stack have
- been known for decades.) So....
- There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
- calls by keeping local variables that need to be preserved in blocks of memory
- obtained from malloc() instead instead of on the stack. Macros are used to
- achieve this so that the actual code doesn't look very different to what it
- always used to.
- The original heap-recursive code used longjmp(). However, it seems that this
- can be very slow on some operating systems. Following a suggestion from Stan
- Switzer, the use of longjmp() has been abolished, at the cost of having to
- provide a unique number for each call to RMATCH. There is no way of generating
- a sequence of numbers at compile time in C. I have given them names, to make
- them stand out more clearly.
- Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
- FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
- tests. Furthermore, not using longjmp() means that local dynamic variables
- don't have indeterminate values; this has meant that the frame size can be
- reduced because the result can be "passed back" by straight setting of the
- variable instead of being passed in the frame.
- ****************************************************************************
- ***************************************************************************/
- /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
- below must be updated in sync. */
- enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
- RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
- RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
- RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
- RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
- RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
- RM61, RM62, RM63, RM64, RM65, RM66 };
- /* These versions of the macros use the stack, as normal. There are debugging
- versions and production versions. Note that the "rw" argument of RMATCH isn't
- actually used in this definition. */
- #ifndef NO_RECURSE
- #define REGISTER register
- #ifdef PCRE_DEBUG
- #define RMATCH(ra,rb,rc,rd,re,rw) \
- { \
- printf("match() called in line %d\n", __LINE__); \
- rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
- printf("to line %d\n", __LINE__); \
- }
- #define RRETURN(ra) \
- { \
- printf("match() returned %d from line %d ", ra, __LINE__); \
- return ra; \
- }
- #else
- #define RMATCH(ra,rb,rc,rd,re,rw) \
- rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
- #define RRETURN(ra) return ra
- #endif
- #else
- /* These versions of the macros manage a private stack on the heap. Note that
- the "rd" argument of RMATCH isn't actually used in this definition. It's the md
- argument of match(), which never changes. */
- #define REGISTER
- #define RMATCH(ra,rb,rc,rd,re,rw)\
- {\
- heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
- if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
- frame->Xwhere = rw; \
- newframe->Xeptr = ra;\
- newframe->Xecode = rb;\
- newframe->Xmstart = mstart;\
- newframe->Xoffset_top = rc;\
- newframe->Xeptrb = re;\
- newframe->Xrdepth = frame->Xrdepth + 1;\
- newframe->Xprevframe = frame;\
- frame = newframe;\
- DPRINTF(("restarting from line %d\n", __LINE__));\
- goto HEAP_RECURSE;\
- L_##rw:\
- DPRINTF(("jumped back to line %d\n", __LINE__));\
- }
- #define RRETURN(ra)\
- {\
- heapframe *oldframe = frame;\
- frame = oldframe->Xprevframe;\
- if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
- if (frame != NULL)\
- {\
- rrc = ra;\
- goto HEAP_RETURN;\
- }\
- return ra;\
- }
- /* Structure for remembering the local variables in a private frame */
- typedef struct heapframe {
- struct heapframe *Xprevframe;
- /* Function arguments that may change */
- PCRE_PUCHAR Xeptr;
- const pcre_uchar *Xecode;
- PCRE_PUCHAR Xmstart;
- int Xoffset_top;
- eptrblock *Xeptrb;
- unsigned int Xrdepth;
- /* Function local variables */
- PCRE_PUCHAR Xcallpat;
- #ifdef SUPPORT_UTF
- PCRE_PUCHAR Xcharptr;
- #endif
- PCRE_PUCHAR Xdata;
- PCRE_PUCHAR Xnext;
- PCRE_PUCHAR Xpp;
- PCRE_PUCHAR Xprev;
- PCRE_PUCHAR Xsaved_eptr;
- recursion_info Xnew_recursive;
- BOOL Xcur_is_word;
- BOOL Xcondition;
- BOOL Xprev_is_word;
- #ifdef SUPPORT_UCP
- int Xprop_type;
- int Xprop_value;
- int Xprop_fail_result;
- int Xoclength;
- pcre_uchar Xocchars[6];
- #endif
- int Xcodelink;
- int Xctype;
- unsigned int Xfc;
- int Xfi;
- int Xlength;
- int Xmax;
- int Xmin;
- int Xnumber;
- int Xoffset;
- int Xop;
- int Xsave_capture_last;
- int Xsave_offset1, Xsave_offset2, Xsave_offset3;
- int Xstacksave[REC_STACK_SAVE_MAX];
- eptrblock Xnewptrb;
- /* Where to jump back to */
- int Xwhere;
- } heapframe;
- #endif
- /***************************************************************************
- ***************************************************************************/
- /*************************************************
- * Match from current position *
- *************************************************/
- /* This function is called recursively in many circumstances. Whenever it
- returns a negative (error) response, the outer incarnation must also return the
- same response. */
- /* These macros pack up tests that are used for partial matching, and which
- appear several times in the code. We set the "hit end" flag if the pointer is
- at the end of the subject and also past the start of the subject (i.e.
- something has been matched). For hard partial matching, we then return
- immediately. The second one is used when we already know we are past the end of
- the subject. */
- #define CHECK_PARTIAL()\
- if (md->partial != 0 && eptr >= md->end_subject && \
- eptr > md->start_used_ptr) \
- { \
- md->hitend = TRUE; \
- if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
- }
- #define SCHECK_PARTIAL()\
- if (md->partial != 0 && eptr > md->start_used_ptr) \
- { \
- md->hitend = TRUE; \
- if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
- }
- /* Performance note: It might be tempting to extract commonly used fields from
- the md structure (e.g. utf, end_subject) into individual variables to improve
- performance. Tests using gcc on a SPARC disproved this; in the first case, it
- made performance worse.
- Arguments:
- eptr pointer to current character in subject
- ecode pointer to current position in compiled code
- mstart pointer to the current match start position (can be modified
- by encountering \K)
- offset_top current top pointer
- md pointer to "static" info for the match
- eptrb pointer to chain of blocks containing eptr at start of
- brackets - for testing for empty matches
- rdepth the recursion depth
- Returns: MATCH_MATCH if matched ) these values are >= 0
- MATCH_NOMATCH if failed to match )
- a negative MATCH_xxx value for PRUNE, SKIP, etc
- a negative PCRE_ERROR_xxx value if aborted by an error condition
- (e.g. stopped by repeated call or recursion limit)
- */
- static int
- match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
- PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
- unsigned int rdepth)
- {
- /* These variables do not need to be preserved over recursion in this function,
- so they can be ordinary variables in all cases. Mark some of them with
- "register" because they are used a lot in loops. */
- register int rrc; /* Returns from recursive calls */
- register int i; /* Used for loops not involving calls to RMATCH() */
- register unsigned int c; /* Character values not kept over RMATCH() calls */
- register BOOL utf; /* Local copy of UTF flag for speed */
- BOOL minimize, possessive; /* Quantifier options */
- BOOL caseless;
- int condcode;
- /* When recursion is not being used, all "local" variables that have to be
- preserved over calls to RMATCH() are part of a "frame". We set up the top-level
- frame on the stack here; subsequent instantiations are obtained from the heap
- whenever RMATCH() does a "recursion". See the macro definitions above. Putting
- the top-level on the stack rather than malloc-ing them all gives a performance
- boost in many cases where there is not much "recursion". */
- #ifdef NO_RECURSE
- heapframe frame_zero;
- heapframe *frame = &frame_zero;
- frame->Xprevframe = NULL; /* Marks the top level */
- /* Copy in the original argument variables */
- frame->Xeptr = eptr;
- frame->Xecode = ecode;
- frame->Xmstart = mstart;
- frame->Xoffset_top = offset_top;
- frame->Xeptrb = eptrb;
- frame->Xrdepth = rdepth;
- /* This is where control jumps back to to effect "recursion" */
- HEAP_RECURSE:
- /* Macros make the argument variables come from the current frame */
- #define eptr frame->Xeptr
- #define ecode frame->Xecode
- #define mstart frame->Xmstart
- #define offset_top frame->Xoffset_top
- #define eptrb frame->Xeptrb
- #define rdepth frame->Xrdepth
- /* Ditto for the local variables */
- #ifdef SUPPORT_UTF
- #define charptr frame->Xcharptr
- #endif
- #define callpat frame->Xcallpat
- #define codelink frame->Xcodelink
- #define data frame->Xdata
- #define next frame->Xnext
- #define pp frame->Xpp
- #define prev frame->Xprev
- #define saved_eptr frame->Xsaved_eptr
- #define new_recursive frame->Xnew_recursive
- #define cur_is_word frame->Xcur_is_word
- #define condition frame->Xcondition
- #define prev_is_word frame->Xprev_is_word
- #ifdef SUPPORT_UCP
- #define prop_type frame->Xprop_type
- #define prop_value frame->Xprop_value
- #define prop_fail_result frame->Xprop_fail_result
- #define oclength frame->Xoclength
- #define occhars frame->Xocchars
- #endif
- #define ctype frame->Xctype
- #define fc frame->Xfc
- #define fi frame->Xfi
- #define length frame->Xlength
- #define max frame->Xmax
- #define min frame->Xmin
- #define number frame->Xnumber
- #define offset frame->Xoffset
- #define op frame->Xop
- #define save_capture_last frame->Xsave_capture_last
- #define save_offset1 frame->Xsave_offset1
- #define save_offset2 frame->Xsave_offset2
- #define save_offset3 frame->Xsave_offset3
- #define stacksave frame->Xstacksave
- #define newptrb frame->Xnewptrb
- /* When recursion is being used, local variables are allocated on the stack and
- get preserved during recursion in the normal way. In this environment, fi and
- i, and fc and c, can be the same variables. */
- #else /* NO_RECURSE not defined */
- #define fi i
- #define fc c
- /* Many of the following variables are used only in small blocks of the code.
- My normal style of coding would have declared them within each of those blocks.
- However, in order to accommodate the version of this code that uses an external
- "stack" implemented on the heap, it is easier to declare them all here, so the
- declarations can be cut out in a block. The only declarations within blocks
- below are for variables that do not have to be preserved over a recursive call
- to RMATCH(). */
- #ifdef SUPPORT_UTF
- const pcre_uchar *charptr;
- #endif
- const pcre_uchar *callpat;
- const pcre_uchar *data;
- const pcre_uchar *next;
- PCRE_PUCHAR pp;
- const pcre_uchar *prev;
- PCRE_PUCHAR saved_eptr;
- recursion_info new_recursive;
- BOOL cur_is_word;
- BOOL condition;
- BOOL prev_is_word;
- #ifdef SUPPORT_UCP
- int prop_type;
- int prop_value;
- int prop_fail_result;
- int oclength;
- pcre_uchar occhars[6];
- #endif
- int codelink;
- int ctype;
- int length;
- int max;
- int min;
- int number;
- int offset;
- int op;
- int save_capture_last;
- int save_offset1, save_offset2, save_offset3;
- int stacksave[REC_STACK_SAVE_MAX];
- eptrblock newptrb;
- /* There is a special fudge for calling match() in a way that causes it to
- measure the size of its basic stack frame when the stack is being used for
- recursion. The second argument (ecode) being NULL triggers this behaviour. It
- cannot normally ever be NULL. The return is the negated value of the frame
- size. */
- if (ecode == NULL)
- {
- if (rdepth == 0)
- return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
- else
- {
- int len = (char *)&rdepth - (char *)eptr;
- return (len > 0)? -len : len;
- }
- }
- #endif /* NO_RECURSE */
- /* To save space on the stack and in the heap frame, I have doubled up on some
- of the local variables that are used only in localised parts of the code, but
- still need to be preserved over recursive calls of match(). These macros define
- the alternative names that are used. */
- #define allow_zero cur_is_word
- #define cbegroup condition
- #define code_offset codelink
- #define condassert condition
- #define matched_once prev_is_word
- #define foc number
- #define save_mark data
- /* These statements are here to stop the compiler complaining about unitialized
- variables. */
- #ifdef SUPPORT_UCP
- prop_value = 0;
- prop_fail_result = 0;
- #endif
- /* This label is used for tail recursion, which is used in a few cases even
- when NO_RECURSE is not defined, in order to reduce the amount of stack that is
- used. Thanks to Ian Taylor for noticing this possibility and sending the
- original patch. */
- TAIL_RECURSE:
- /* OK, now we can get on with the real code of the function. Recursive calls
- are specified by the macro RMATCH and RRETURN is used to return. When
- NO_RECURSE is *not* defined, these just turn into a recursive call to match()
- and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
- defined). However, RMATCH isn't like a function call because it's quite a
- complicated macro. It has to be used in one particular way. This shouldn't,
- however, impact performance when true recursion is being used. */
- #ifdef SUPPORT_UTF
- utf = md->utf; /* Local copy of the flag */
- #else
- utf = FALSE;
- #endif
- /* First check that we haven't called match() too many times, or that we
- haven't exceeded the recursive call limit. */
- if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
- if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
- /* At the start of a group with an unlimited repeat that may match an empty
- string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
- done this way to save having to use another function argument, which would take
- up space on the stack. See also MATCH_CONDASSERT below.
- When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
- such remembered pointers, to be checked when we hit the closing ket, in order
- to break infinite loops that match no characters. When match() is called in
- other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
- NOT be used with tail recursion, because the memory block that is used is on
- the stack, so a new one may be required for each match(). */
- if (md->match_function_type == MATCH_CBEGROUP)
- {
- newptrb.epb_saved_eptr = eptr;
- newptrb.epb_prev = eptrb;
- eptrb = &newptrb;
- md->match_function_type = 0;
- }
- /* Now start processing the opcodes. */
- for (;;)
- {
- minimize = possessive = FALSE;
- op = *ecode;
- switch(op)
- {
- case OP_MARK:
- md->nomatch_mark = ecode + 2;
- md->mark = NULL; /* In case previously set by assertion */
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
- eptrb, RM55);
- if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
- md->mark == NULL) md->mark = ecode + 2;
- /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
- argument, and we must check whether that argument matches this MARK's
- argument. It is passed back in md->start_match_ptr (an overloading of that
- variable). If it does match, we reset that variable to the current subject
- position and return MATCH_SKIP. Otherwise, pass back the return code
- unaltered. */
- else if (rrc == MATCH_SKIP_ARG &&
- STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
- {
- md->start_match_ptr = eptr;
- RRETURN(MATCH_SKIP);
- }
- RRETURN(rrc);
- case OP_FAIL:
- RRETURN(MATCH_NOMATCH);
- /* COMMIT overrides PRUNE, SKIP, and THEN */
- case OP_COMMIT:
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
- eptrb, RM52);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
- rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
- rrc != MATCH_THEN)
- RRETURN(rrc);
- RRETURN(MATCH_COMMIT);
- /* PRUNE overrides THEN */
- case OP_PRUNE:
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
- eptrb, RM51);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
- RRETURN(MATCH_PRUNE);
- case OP_PRUNE_ARG:
- md->nomatch_mark = ecode + 2;
- md->mark = NULL; /* In case previously set by assertion */
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
- eptrb, RM56);
- if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
- md->mark == NULL) md->mark = ecode + 2;
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
- RRETURN(MATCH_PRUNE);
- /* SKIP overrides PRUNE and THEN */
- case OP_SKIP:
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
- eptrb, RM53);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
- RRETURN(rrc);
- md->start_match_ptr = eptr; /* Pass back current position */
- RRETURN(MATCH_SKIP);
- /* Note that, for Perl compatibility, SKIP with an argument does NOT set
- nomatch_mark. There is a flag that disables this opcode when re-matching a
- pattern that ended with a SKIP for which there was not a matching MARK. */
- case OP_SKIP_ARG:
- if (md->ignore_skip_arg)
- {
- ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
- break;
- }
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
- eptrb, RM57);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
- RRETURN(rrc);
- /* Pass back the current skip name by overloading md->start_match_ptr and
- returning the special MATCH_SKIP_ARG return code. This will either be
- caught by a matching MARK, or get to the top, where it causes a rematch
- with the md->ignore_skip_arg flag set. */
- md->start_match_ptr = ecode + 2;
- RRETURN(MATCH_SKIP_ARG);
- /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
- the branch in which it occurs can be determined. Overload the start of
- match pointer to do this. */
- case OP_THEN:
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
- eptrb, RM54);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- md->start_match_ptr = ecode;
- RRETURN(MATCH_THEN);
- case OP_THEN_ARG:
- md->nomatch_mark = ecode + 2;
- md->mark = NULL; /* In case previously set by assertion */
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
- md, eptrb, RM58);
- if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
- md->mark == NULL) md->mark = ecode + 2;
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- md->start_match_ptr = ecode;
- RRETURN(MATCH_THEN);
- /* Handle an atomic group that does not contain any capturing parentheses.
- This can be handled like an assertion. Prior to 8.13, all atomic groups
- were handled this way. In 8.13, the code was changed as below for ONCE, so
- that backups pass through the group and thereby reset captured values.
- However, this uses a lot more stack, so in 8.20, atomic groups that do not
- contain any captures generate OP_ONCE_NC, which can be handled in the old,
- less stack intensive way.
- Check the alternative branches in turn - the matching won't pass the KET
- for this kind of subpattern. If any one branch matches, we carry on as at
- the end of a normal bracket, leaving the subject pointer, but resetting
- the start-of-match value in case it was changed by \K. */
- case OP_ONCE_NC:
- prev = ecode;
- saved_eptr = eptr;
- save_mark = md->mark;
- do
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
- if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
- {
- mstart = md->start_match_ptr;
- break;
- }
- if (rrc == MATCH_THEN)
- {
- next = ecode + GET(ecode,1);
- if (md->start_match_ptr < next &&
- (*ecode == OP_ALT || *next == OP_ALT))
- rrc = MATCH_NOMATCH;
- }
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += GET(ecode,1);
- md->mark = save_mark;
- }
- while (*ecode == OP_ALT);
- /* If hit the end of the group (which could be repeated), fail */
- if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
- /* Continue as from after the group, updating the offsets high water
- mark, since extracts may have been taken. */
- do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
- offset_top = md->end_offset_top;
- eptr = md->end_match_ptr;
- /* For a non-repeating ket, just continue at this level. This also
- happens for a repeating ket if no characters were matched in the group.
- This is the forcible breaking of infinite loops as implemented in Perl
- 5.005. */
- if (*ecode == OP_KET || eptr == saved_eptr)
- {
- ecode += 1+LINK_SIZE;
- break;
- }
- /* The repeating kets try the rest of the pattern or restart from the
- preceding bracket, in the appropriate order. The second "call" of match()
- uses tail recursion, to avoid using another stack frame. */
- if (*ecode == OP_KETRMIN)
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode = prev;
- goto TAIL_RECURSE;
- }
- else /* OP_KETRMAX */
- {
- md->match_function_type = MATCH_CBEGROUP;
- RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += 1 + LINK_SIZE;
- goto TAIL_RECURSE;
- }
- /* Control never gets here */
- /* Handle a capturing bracket, other than those that are possessive with an
- unlimited repeat. If there is space in the offset vector, save the current
- subject position in the working slot at the top of the vector. We mustn't
- change the current values of the data slot, because they may be set from a
- previous iteration of this group, and be referred to by a reference inside
- the group. A failure to match might occur after the group has succeeded,
- if something later on doesn't match. For this reason, we need to restore
- the working value and also the values of the final offsets, in case they
- were set by a previous iteration of the same bracket.
- If there isn't enough space in the offset vector, treat this as if it were
- a non-capturing bracket. Don't worry about setting the flag for the error
- case here; that is handled in the code for KET. */
- case OP_CBRA:
- case OP_SCBRA:
- number = GET2(ecode, 1+LINK_SIZE);
- offset = number << 1;
- #ifdef PCRE_DEBUG
- printf("start bracket %d\n", number);
- printf("subject=");
- pchars(eptr, 16, TRUE, md);
- printf("\n");
- #endif
- if (offset < md->offset_max)
- {
- save_offset1 = md->offset_vector[offset];
- save_offset2 = md->offset_vector[offset+1];
- save_offset3 = md->offset_vector[md->offset_end - number];
- save_capture_last = md->capture_last;
- save_mark = md->mark;
- DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
- md->offset_vector[md->offset_end - number] =
- (int)(eptr - md->start_subject);
- for (;;)
- {
- if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
- eptrb, RM1);
- if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */
- /* If we backed up to a THEN, check whether it is within the current
- branch by comparing the address of the THEN that is passed back with
- the end of the branch. If it is within the current branch, and the
- branch is one of two or more alternatives (it either starts or ends
- with OP_ALT), we have reached the limit of THEN's action, so convert
- the return code to NOMATCH, which will cause normal backtracking to
- happen from now on. Otherwise, THEN is passed back to an outer
- alternative. This implements Perl's treatment of parenthesized groups,
- where a group not containing | does not affect the current alternative,
- that is, (X) is NOT the same as (X|(*F)). */
- if (rrc == MATCH_THEN)
- {
- next = ecode + GET(ecode,1);
- if (md->start_match_ptr < next &&
- (*ecode == OP_ALT || *next == OP_ALT))
- rrc = MATCH_NOMATCH;
- }
- /* Anything other than NOMATCH is passed back. */
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- md->capture_last = save_capture_last;
- ecode += GET(ecode, 1);
- md->mark = save_mark;
- if (*ecode != OP_ALT) break;
- }
- DPRINTF(("bracket %d failed\n", number));
- md->offset_vector[offset] = save_offset1;
- md->offset_vector[offset+1] = save_offset2;
- md->offset_vector[md->offset_end - number] = save_offset3;
- /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
- RRETURN(rrc);
- }
- /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
- as a non-capturing bracket. */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- DPRINTF(("insufficient capture room: treat as non-capturing\n"));
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* Non-capturing or atomic group, except for possessive with unlimited
- repeat and ONCE group with no captures. Loop for all the alternatives.
- When we get to the final alternative within the brackets, we used to return
- the result of a recursive call to match() whatever happened so it was
- possible to reduce stack usage by turning this into a tail recursion,
- except in the case of a possibly empty group. However, now that there is
- the possiblity of (*THEN) occurring in the final alternative, this
- optimization is no longer always possible.
- We can optimize if we know there are no (*THEN)s in the pattern; at present
- this is the best that can be done.
- MATCH_ONCE is returned when the end of an atomic group is successfully
- reached, but subsequent matching fails. It passes back up the tree (causing
- captured values to be reset) until the original atomic group level is
- reached. This is tested by comparing md->once_target with the start of the
- group. At this point, the return is converted into MATCH_NOMATCH so that
- previous backup points can be taken. */
- case OP_ONCE:
- case OP_BRA:
- case OP_SBRA:
- DPRINTF(("start non-capturing bracket\n"));
- for (;;)
- {
- if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
- /* If this is not a possibly empty group, and there are no (*THEN)s in
- the pattern, and this is the final alternative, optimize as described
- above. */
- else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
- {
- ecode += PRIV(OP_lengths)[*ecode];
- goto TAIL_RECURSE;
- }
- /* In all other cases, we have to make another call to match(). */
- save_mark = md->mark;
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
- RM2);
- /* See comment in the code for capturing groups above about handling
- THEN. */
- if (rrc == MATCH_THEN)
- {
- next = ecode + GET(ecode,1);
- if (md->start_match_ptr < next &&
- (*ecode == OP_ALT || *next == OP_ALT))
- rrc = MATCH_NOMATCH;
- }
- if (rrc != MATCH_NOMATCH)
- {
- if (rrc == MATCH_ONCE)
- {
- const pcre_uchar *scode = ecode;
- if (*scode != OP_ONCE) /* If not at start, find it */
- {
- while (*scode == OP_ALT) scode += GET(scode, 1);
- scode -= GET(scode, 1);
- }
- if (md->once_target == scode) rrc = MATCH_NOMATCH;
- }
- RRETURN(rrc);
- }
- ecode += GET(ecode, 1);
- md->mark = save_mark;
- if (*ecode != OP_ALT) break;
- }
- RRETURN(MATCH_NOMATCH);
- /* Handle possessive capturing brackets with an unlimited repeat. We come
- here from BRAZERO with allow_zero set TRUE. The offset_vector values are
- handled similarly to the normal case above. However, the matching is
- different. The end of these brackets will always be OP_KETRPOS, which
- returns MATCH_KETRPOS without going further in the pattern. By this means
- we can handle the group by iteration rather than recursion, thereby
- reducing the amount of stack needed. */
- case OP_CBRAPOS:
- case OP_SCBRAPOS:
- allow_zero = FALSE;
- POSSESSIVE_CAPTURE:
- number = GET2(ecode, 1+LINK_SIZE);
- offset = number << 1;
- #ifdef PCRE_DEBUG
- printf("start possessive bracket %d\n", number);
- printf("subject=");
- pchars(eptr, 16, TRUE, md);
- printf("\n");
- #endif
- if (offset < md->offset_max)
- {
- matched_once = FALSE;
- code_offset = (int)(ecode - md->start_code);
- save_offset1 = md->offset_vector[offset];
- save_offset2 = md->offset_vector[offset+1];
- save_offset3 = md->offset_vector[md->offset_end - number];
- save_capture_last = md->capture_last;
- DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
- /* Each time round the loop, save the current subject position for use
- when the group matches. For MATCH_MATCH, the group has matched, so we
- restart it with a new subject starting position, remembering that we had
- at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
- usual. If we haven't matched any alternatives in any iteration, check to
- see if a previous iteration matched. If so, the group has matched;
- continue from afterwards. Otherwise it has failed; restore the previous
- capture values before returning NOMATCH. */
- for (;;)
- {
- md->offset_vector[md->offset_end - number] =
- (int)(eptr - md->start_subject);
- if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
- eptrb, RM63);
- if (rrc == MATCH_KETRPOS)
- {
- offset_top = md->end_offset_top;
- eptr = md->end_match_ptr;
- ecode = md->start_code + code_offset;
- save_capture_last = md->capture_last;
- matched_once = TRUE;
- continue;
- }
- /* See comment in the code for capturing groups above about handling
- THEN. */
- if (rrc == MATCH_THEN)
- {
- next = ecode + GET(ecode,1);
- if (md->start_match_ptr < next &&
- (*ecode == OP_ALT || *next == OP_ALT))
- rrc = MATCH_NOMATCH;
- }
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- md->capture_last = save_capture_last;
- ecode += GET(ecode, 1);
- if (*ecode != OP_ALT) break;
- }
- if (!matched_once)
- {
- md->offset_vector[offset] = save_offset1;
- md->offset_vector[offset+1] = save_offset2;
- md->offset_vector[md->offset_end - number] = save_offset3;
- }
- if (allow_zero || matched_once)
- {
- ecode += 1 + LINK_SIZE;
- break;
- }
- RRETURN(MATCH_NOMATCH);
- }
- /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
- as a non-capturing bracket. */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- DPRINTF(("insufficient capture room: treat as non-capturing\n"));
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* Non-capturing possessive bracket with unlimited repeat. We come here
- from BRAZERO with allow_zero = TRUE. The code is similar to the above,
- without the capturing complication. It is written out separately for speed
- and cleanliness. */
- case OP_BRAPOS:
- case OP_SBRAPOS:
- allow_zero = FALSE;
- POSSESSIVE_NON_CAPTURE:
- matched_once = FALSE;
- code_offset = (int)(ecode - md->start_code);
- for (;;)
- {
- if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
- eptrb, RM48);
- if (rrc == MATCH_KETRPOS)
- {
- offset_top = md->end_offset_top;
- eptr = md->end_match_ptr;
- ecode = md->start_code + code_offset;
- matched_once = TRUE;
- continue;
- }
- /* See comment in the code for capturing groups above about handling
- THEN. */
- if (rrc == MATCH_THEN)
- {
- next = ecode + GET(ecode,1);
- if (md->start_match_ptr < next &&
- (*ecode == OP_ALT || *next == OP_ALT))
- rrc = MATCH_NOMATCH;
- }
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += GET(ecode, 1);
- if (*ecode != OP_ALT) break;
- }
- if (matched_once || allow_zero)
- {
- ecode += 1 + LINK_SIZE;
- break;
- }
- RRETURN(MATCH_NOMATCH);
- /* Control never reaches here. */
- /* Conditional group: compilation checked that there are no more than
- two branches. If the condition is false, skipping the first branch takes us
- past the end if there is only one branch, but that's OK because that is
- exactly what going to the ket would do. */
- case OP_COND:
- case OP_SCOND:
- codelink = GET(ecode, 1);
- /* Because of the way auto-callout works during compile, a callout item is
- inserted between OP_COND and an assertion condition. */
- if (ecode[LINK_SIZE+1] == OP_CALLOUT)
- {
- if (PUBL(callout) != NULL)
- {
- PUBL(callout_block) cb;
- cb.version = 2; /* Version 1 of the callout block */
- cb.callout_number = ecode[LINK_SIZE+2];
- cb.offset_vector = md->offset_vector;
- #ifdef COMPILE_PCRE8
- cb.subject = (PCRE_SPTR)md->start_subject;
- #else
- cb.subject = (PCRE_SPTR16)md->start_subject;
- #endif
- cb.subject_length = (int)(md->end_subject - md->start_subject);
- cb.start_match = (int)(mstart - md->start_subject);
- cb.current_position = (int)(eptr - md->start_subject);
- cb.pattern_position = GET(ecode, LINK_SIZE + 3);
- cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
- cb.capture_top = offset_top/2;
- cb.capture_last = md->capture_last;
- cb.callout_data = md->callout_data;
- cb.mark = md->nomatch_mark;
- if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
- if (rrc < 0) RRETURN(rrc);
- }
- ecode += PRIV(OP_lengths)[OP_CALLOUT];
- }
- condcode = ecode[LINK_SIZE+1];
- /* Now see what the actual condition is */
- if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
- {
- if (md->recursive == NULL) /* Not recursing => FALSE */
- {
- condition = FALSE;
- ecode += GET(ecode, 1);
- }
- else
- {
- int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
- condition = (recno == RREF_ANY || recno == md->recursive->group_num);
- /* If the test is for recursion into a specific subpattern, and it is
- false, but the test was set up by name, scan the table to see if the
- name refers to any other numbers, and test them. The condition is true
- if any one is set. */
- if (!condition && condcode == OP_NRREF)
- {
- pcre_uchar *slotA = md->name_table;
- for (i = 0; i < md->name_count; i++)
- {
- if (GET2(slotA, 0) == recno) break;
- slotA += md->name_entry_size;
- }
- /* Found a name for the number - there can be only one; duplicate
- names for different numbers are allowed, but not vice versa. First
- scan down for duplicates. */
- if (i < md->name_count)
- {
- pcre_uchar *slotB = slotA;
- while (slotB > md->name_table)
- {
- slotB -= md->name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- condition = GET2(slotB, 0) == md->recursive->group_num;
- if (condition) break;
- }
- else break;
- }
- /* Scan up for duplicates */
- if (!condition)
- {
- slotB = slotA;
- for (i++; i < md->name_count; i++)
- {
- slotB += md->name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- condition = GET2(slotB, 0) == md->recursive->group_num;
- if (condition) break;
- }
- else break;
- }
- }
- }
- }
- /* Chose branch according to the condition */
- ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
- }
- }
- else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
- {
- offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
- condition = offset < offset_top && md->offset_vector[offset] >= 0;
- /* If the numbered capture is unset, but the reference was by name,
- scan the table to see if the name refers to any other numbers, and test
- them. The condition is true if any one is set. This is tediously similar
- to the code above, but not close enough to try to amalgamate. */
- if (!condition && condcode == OP_NCREF)
- {
- int refno = offset >> 1;
- pcre_uchar *slotA = md->name_table;
- for (i = 0; i < md->name_count; i++)
- {
- if (GET2(slotA, 0) == refno) break;
- slotA += md->name_entry_size;
- }
- /* Found a name for the number - there can be only one; duplicate names
- for different numbers are allowed, but not vice versa. First scan down
- for duplicates. */
- if (i < md->name_count)
- {
- pcre_uchar *slotB = slotA;
- while (slotB > md->name_table)
- {
- slotB -= md->name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- offset = GET2(slotB, 0) << 1;
- condition = offset < offset_top &&
- md->offset_vector[offset] >= 0;
- if (condition) break;
- }
- else break;
- }
- /* Scan up for duplicates */
- if (!condition)
- {
- slotB = slotA;
- for (i++; i < md->name_count; i++)
- {
- slotB += md->name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- offset = GET2(slotB, 0) << 1;
- condition = offset < offset_top &&
- md->offset_vector[offset] >= 0;
- if (condition) break;
- }
- else break;
- }
- }
- }
- }
- /* Chose branch according to the condition */
- ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
- }
- else if (condcode == OP_DEF) /* DEFINE - always false */
- {
- condition = FALSE;
- ecode += GET(ecode, 1);
- }
- /* The condition is an assertion. Call match() to evaluate it - setting
- md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
- an assertion. */
- else
- {
- md->match_function_type = MATCH_CONDASSERT;
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
- if (rrc == MATCH_MATCH)
- {
- if (md->end_offset_top > offset_top)
- offset_top = md->end_offset_top; /* Captures may have happened */
- condition = TRUE;
- ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
- while (*ecode == OP_ALT) ecode += GET(ecode, 1);
- }
- /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
- assertion; it is therefore treated as NOMATCH. */
- else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
- {
- RRETURN(rrc); /* Need braces because of following else */
- }
- else
- {
- condition = FALSE;
- ecode += codelink;
- }
- }
- /* We are now at the branch that is to be obeyed. As there is only one, can
- use tail recursion to avoid using another stack frame, except when there is
- unlimited repeat of a possibly empty group. In the latter case, a recursive
- call to match() is always required, unless the second alternative doesn't
- exist, in which case we can just plough on. Note that…
Large files files are truncated, but you can click here to view the full file