/lib/regex.c
C | 6328 lines | 3947 code | 971 blank | 1410 comment | 985 complexity | 3689ba3b841d387b5ac2efcd77106f5c MD5 | raw file
Large files files are truncated, but you can click here to view the full file
- /*
- * regex.c -- POSIX-conformant regular expression function set for the lsof
- * library
- *
- * This file is used when the UNIX dialect does not have a POSIX-conformant
- * regular expression function set. In that case USE_LIB_REGEX is defined.
- *
- * V. Abell <abe@purdue.edu>
- * Purdue University Computing Center
- */
- /*
- * Copyright 2000 Purdue Research Foundation, West Lafayette, Indiana
- * 47907. All rights reserved.
- *
- * Written by Victor A. Abell
- *
- * This software is not subject to any license of the American Telephone
- * and Telegraph Company or the Regents of the University of California.
- *
- * This software has been adapted from snprintf.c in sendmail 8.9.3. It
- * is subject to the sendmail copyright statements listed below, and the
- * sendmail licensing terms stated in the sendmail LICENSE file comment
- * section of this file.
- *
- * Permission is granted to anyone to use this software for any purpose on
- * any computer system, and to alter it and redistribute it freely, subject
- * to the following restrictions:
- *
- * 1. Neither the authors nor Purdue University are responsible for any
- * consequences of the use of this software.
- *
- * 2. The origin of this software must not be misrepresented, either by
- * explicit claim or by omission. Credit to the authors and Purdue
- * University must appear in documentation and sources.
- *
- * 3. Altered versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- *
- * 4. This notice may not be removed or altered.
- */
- #include "../machine.h"
- #ifdef USE_LIB_REGEX
- /*
- * This file comes from GLIBC 2.2. It is used when the UNIX dialect does not
- * have a POSIX-conformant regular expression function set. In that case
- * USE_LIB_REGEX is defined.
- */
- /* Extended regular expression matching and search library,
- version 0.12.
- (Implements POSIX draft P1003.2/D11.2, except for some of the
- internationalization features.)
- Copyright (C) 1993-1999, 2000 Free Software Foundation, Inc.
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
- /* AIX requires this to be the first thing in the file. */
- #if defined _AIX && !defined REGEX_MALLOC
- #pragma alloca
- #endif
- #undef _GNU_SOURCE
- #define _GNU_SOURCE
- #ifdef HAVE_CONFIG_H
- # include <config.h>
- #endif
- #ifndef PARAMS
- # if defined __GNUC__ || (defined __STDC__ && __STDC__)
- # define PARAMS(args) args
- # else
- # define PARAMS(args) ()
- # endif /* GCC. */
- #endif /* Not PARAMS. */
- #if defined STDC_HEADERS && !defined emacs
- # include <stddef.h>
- #else
- /* We need this for `regex.h', and perhaps for the Emacs include files. */
- # include <sys/types.h>
- #endif
- #define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
- /* For platform which support the ISO C amendement 1 functionality we
- support user defined character classes. */
- #if defined _LIBC || WIDE_CHAR_SUPPORT
- /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
- # include <wchar.h>
- # include <wctype.h>
- #endif
- #ifdef _LIBC
- /* We have to keep the namespace clean. */
- # define regfree(preg) __regfree (preg)
- # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
- # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
- # define regerror(errcode, preg, errbuf, errbuf_size) \
- __regerror(errcode, preg, errbuf, errbuf_size)
- # define re_set_registers(bu, re, nu, st, en) \
- __re_set_registers (bu, re, nu, st, en)
- # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
- __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
- # define re_match(bufp, string, size, pos, regs) \
- __re_match (bufp, string, size, pos, regs)
- # define re_search(bufp, string, size, startpos, range, regs) \
- __re_search (bufp, string, size, startpos, range, regs)
- # define re_compile_pattern(pattern, length, bufp) \
- __re_compile_pattern (pattern, length, bufp)
- # define re_set_syntax(syntax) __re_set_syntax (syntax)
- # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
- __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
- # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
- # define btowc __btowc
- /* We are also using some library internals. */
- # include <locale/localeinfo.h>
- # include <locale/elem-hash.h>
- # include <langinfo.h>
- #endif
- /* This is for other GNU distributions with internationalized messages. */
- #if HAVE_LIBINTL_H || defined _LIBC
- # include <libintl.h>
- # ifdef _LIBC
- # undef gettext
- # define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
- # endif
- #else
- # define gettext(msgid) (msgid)
- #endif
- #ifndef gettext_noop
- /* This define is so xgettext can find the internationalizable
- strings. */
- # define gettext_noop(String) String
- #endif
- /* The `emacs' switch turns on certain matching commands
- that make sense only in Emacs. */
- #ifdef emacs
- # include "lisp.h"
- # include "buffer.h"
- # include "syntax.h"
- #else /* not emacs */
- /* If we are not linking with Emacs proper,
- we can't use the relocating allocator
- even if config.h says that we can. */
- # undef REL_ALLOC
- # if defined STDC_HEADERS || defined _LIBC
- # include <stdlib.h>
- # else
- char *malloc ();
- char *realloc ();
- # endif
- /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
- If nothing else has been done, use the method below. */
- # ifdef INHIBIT_STRING_HEADER
- # if !(defined HAVE_BZERO && defined HAVE_BCOPY)
- # if !defined bzero && !defined bcopy
- # undef INHIBIT_STRING_HEADER
- # endif
- # endif
- # endif
- /* This is the normal way of making sure we have a bcopy and a bzero.
- This is used in most programs--a few other programs avoid this
- by defining INHIBIT_STRING_HEADER. */
- # ifndef INHIBIT_STRING_HEADER
- # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
- # include <string.h>
- # ifndef bzero
- # ifndef _LIBC
- # define bzero(s, n) (memset (s, '\0', n), (s))
- # else
- # define bzero(s, n) __bzero (s, n)
- # endif
- # endif
- # else
- # include <strings.h>
- # ifndef memcmp
- # define memcmp(s1, s2, n) bcmp (s1, s2, n)
- # endif
- # ifndef memcpy
- # define memcpy(d, s, n) (bcopy (s, d, n), (d))
- # endif
- # endif
- # endif
- /* Define the syntax stuff for \<, \>, etc. */
- /* This must be nonzero for the wordchar and notwordchar pattern
- commands in re_match_2. */
- # ifndef Sword
- # define Sword 1
- # endif
- # ifdef SWITCH_ENUM_BUG
- # define SWITCH_ENUM_CAST(x) ((int)(x))
- # else
- # define SWITCH_ENUM_CAST(x) (x)
- # endif
- #endif /* not emacs */
- #if defined _LIBC || HAVE_LIMITS_H
- # include <limits.h>
- #endif
- #ifndef MB_LEN_MAX
- # define MB_LEN_MAX 1
- #endif
- /* Get the interface, including the syntax bits. */
- /* Disabled by V. Abell on January 29, 2001: #include <regex.h> */
- #include "../regex.h"
- /* isalpha etc. are used for the character classes. */
- #include <ctype.h>
- /* Jim Meyering writes:
- "... Some ctype macros are valid only for character codes that
- isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
- using /bin/cc or gcc but without giving an ansi option). So, all
- ctype uses should be through macros like ISPRINT... If
- STDC_HEADERS is defined, then autoconf has verified that the ctype
- macros don't need to be guarded with references to isascii. ...
- Defining isascii to 1 should let any compiler worth its salt
- eliminate the && through constant folding."
- Solaris defines some of these symbols so we must undefine them first. */
- #undef ISASCII
- #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
- # define ISASCII(c) 1
- #else
- # define ISASCII(c) isascii(c)
- #endif
- #ifdef isblank
- # define ISBLANK(c) (ISASCII (c) && isblank (c))
- #else
- # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
- #endif
- #ifdef isgraph
- # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
- #else
- # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
- #endif
- #undef ISPRINT
- #define ISPRINT(c) (ISASCII (c) && isprint (c))
- #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
- #define ISALNUM(c) (ISASCII (c) && isalnum (c))
- #define ISALPHA(c) (ISASCII (c) && isalpha (c))
- #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
- #define ISLOWER(c) (ISASCII (c) && islower (c))
- #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
- #define ISSPACE(c) (ISASCII (c) && isspace (c))
- #define ISUPPER(c) (ISASCII (c) && isupper (c))
- #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
- #ifdef _tolower
- # define TOLOWER(c) _tolower(c)
- #else
- # define TOLOWER(c) tolower(c)
- #endif
- #ifndef NULL
- # define NULL (void *)0
- #endif
- /* We remove any previous definition of `SIGN_EXTEND_CHAR',
- since ours (we hope) works properly with all combinations of
- machines, compilers, `char' and `unsigned char' argument types.
- (Per Bothner suggested the basic approach.) */
- #undef SIGN_EXTEND_CHAR
- #if __STDC__
- # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
- #else /* not __STDC__ */
- /* As in Harbison and Steele. */
- # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
- #endif
- #ifndef emacs
- /* How many characters in the character set. */
- # define CHAR_SET_SIZE 256
- # ifdef SYNTAX_TABLE
- extern char *re_syntax_table;
- # else /* not SYNTAX_TABLE */
- static char re_syntax_table[CHAR_SET_SIZE];
- static void
- init_syntax_once ()
- {
- register int c;
- static int done = 0;
- if (done)
- return;
- bzero (re_syntax_table, sizeof re_syntax_table);
- for (c = 0; c < CHAR_SET_SIZE; ++c)
- if (ISALNUM (c))
- re_syntax_table[c] = Sword;
- re_syntax_table['_'] = Sword;
- done = 1;
- }
- # endif /* not SYNTAX_TABLE */
- # define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
- #endif /* emacs */
- /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
- use `alloca' instead of `malloc'. This is because using malloc in
- re_search* or re_match* could cause memory leaks when C-g is used in
- Emacs; also, malloc is slower and causes storage fragmentation. On
- the other hand, malloc is more portable, and easier to debug.
- Because we sometimes use alloca, some routines have to be macros,
- not functions -- `alloca'-allocated space disappears at the end of the
- function it is called in. */
- #ifdef REGEX_MALLOC
- # define REGEX_ALLOCATE malloc
- # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
- # define REGEX_FREE free
- #else /* not REGEX_MALLOC */
- /* Emacs already defines alloca, sometimes. */
- # ifndef alloca
- /* Make alloca work the best possible way. */
- # ifdef __GNUC__
- # define alloca __builtin_alloca
- # else /* not __GNUC__ */
- # if HAVE_ALLOCA_H
- # include <alloca.h>
- # endif /* HAVE_ALLOCA_H */
- # endif /* not __GNUC__ */
- # endif /* not alloca */
- # define REGEX_ALLOCATE alloca
- /* Assumes a `char *destination' variable. */
- # define REGEX_REALLOCATE(source, osize, nsize) \
- (destination = (char *) alloca (nsize), \
- memcpy (destination, source, osize))
- /* No need to do anything to free, after alloca. */
- # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
- #endif /* not REGEX_MALLOC */
- /* Define how to allocate the failure stack. */
- #if defined REL_ALLOC && defined REGEX_MALLOC
- # define REGEX_ALLOCATE_STACK(size) \
- r_alloc (&failure_stack_ptr, (size))
- # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
- r_re_alloc (&failure_stack_ptr, (nsize))
- # define REGEX_FREE_STACK(ptr) \
- r_alloc_free (&failure_stack_ptr)
- #else /* not using relocating allocator */
- # ifdef REGEX_MALLOC
- # define REGEX_ALLOCATE_STACK malloc
- # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
- # define REGEX_FREE_STACK free
- # else /* not REGEX_MALLOC */
- # define REGEX_ALLOCATE_STACK alloca
- # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
- REGEX_REALLOCATE (source, osize, nsize)
- /* No need to explicitly free anything. */
- # define REGEX_FREE_STACK(arg)
- # endif /* not REGEX_MALLOC */
- #endif /* not using relocating allocator */
- /* True if `size1' is non-NULL and PTR is pointing anywhere inside
- `string1' or just past its end. This works if PTR is NULL, which is
- a good thing. */
- #define FIRST_STRING_P(ptr) \
- (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
- /* (Re)Allocate N items of type T using malloc, or fail. */
- #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
- #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
- #define RETALLOC_IF(addr, n, t) \
- if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
- #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
- #define BYTEWIDTH 8 /* In bits. */
- #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
- #undef MAX
- #undef MIN
- #define MAX(a, b) ((a) > (b) ? (a) : (b))
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
- typedef char boolean;
- #define false 0
- #define true 1
- static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
- const char *string1, int size1,
- const char *string2, int size2,
- int pos,
- struct re_registers *regs,
- int stop));
- /* These are the command codes that appear in compiled regular
- expressions. Some opcodes are followed by argument bytes. A
- command code can specify any interpretation whatsoever for its
- arguments. Zero bytes may appear in the compiled regular expression. */
- typedef enum
- {
- no_op = 0,
- /* Succeed right away--no more backtracking. */
- succeed,
- /* Followed by one byte giving n, then by n literal bytes. */
- exactn,
- /* Matches any (more or less) character. */
- anychar,
- /* Matches any one char belonging to specified set. First
- following byte is number of bitmap bytes. Then come bytes
- for a bitmap saying which chars are in. Bits in each byte
- are ordered low-bit-first. A character is in the set if its
- bit is 1. A character too large to have a bit in the map is
- automatically not in the set. */
- charset,
- /* Same parameters as charset, but match any character that is
- not one of those specified. */
- charset_not,
- /* Start remembering the text that is matched, for storing in a
- register. Followed by one byte with the register number, in
- the range 0 to one less than the pattern buffer's re_nsub
- field. Then followed by one byte with the number of groups
- inner to this one. (This last has to be part of the
- start_memory only because we need it in the on_failure_jump
- of re_match_2.) */
- start_memory,
- /* Stop remembering the text that is matched and store it in a
- memory register. Followed by one byte with the register
- number, in the range 0 to one less than `re_nsub' in the
- pattern buffer, and one byte with the number of inner groups,
- just like `start_memory'. (We need the number of inner
- groups here because we don't have any easy way of finding the
- corresponding start_memory when we're at a stop_memory.) */
- stop_memory,
- /* Match a duplicate of something remembered. Followed by one
- byte containing the register number. */
- duplicate,
- /* Fail unless at beginning of line. */
- begline,
- /* Fail unless at end of line. */
- endline,
- /* Succeeds if at beginning of buffer (if emacs) or at beginning
- of string to be matched (if not). */
- begbuf,
- /* Analogously, for end of buffer/string. */
- endbuf,
- /* Followed by two byte relative address to which to jump. */
- jump,
- /* Same as jump, but marks the end of an alternative. */
- jump_past_alt,
- /* Followed by two-byte relative address of place to resume at
- in case of failure. */
- on_failure_jump,
- /* Like on_failure_jump, but pushes a placeholder instead of the
- current string position when executed. */
- on_failure_keep_string_jump,
- /* Throw away latest failure point and then jump to following
- two-byte relative address. */
- pop_failure_jump,
- /* Change to pop_failure_jump if know won't have to backtrack to
- match; otherwise change to jump. This is used to jump
- back to the beginning of a repeat. If what follows this jump
- clearly won't match what the repeat does, such that we can be
- sure that there is no use backtracking out of repetitions
- already matched, then we change it to a pop_failure_jump.
- Followed by two-byte address. */
- maybe_pop_jump,
- /* Jump to following two-byte address, and push a dummy failure
- point. This failure point will be thrown away if an attempt
- is made to use it for a failure. A `+' construct makes this
- before the first repeat. Also used as an intermediary kind
- of jump when compiling an alternative. */
- dummy_failure_jump,
- /* Push a dummy failure point and continue. Used at the end of
- alternatives. */
- push_dummy_failure,
- /* Followed by two-byte relative address and two-byte number n.
- After matching N times, jump to the address upon failure. */
- succeed_n,
- /* Followed by two-byte relative address, and two-byte number n.
- Jump to the address N times, then fail. */
- jump_n,
- /* Set the following two-byte relative address to the
- subsequent two-byte number. The address *includes* the two
- bytes of number. */
- set_number_at,
- wordchar, /* Matches any word-constituent character. */
- notwordchar, /* Matches any char that is not a word-constituent. */
- wordbeg, /* Succeeds if at word beginning. */
- wordend, /* Succeeds if at word end. */
- wordbound, /* Succeeds if at a word boundary. */
- notwordbound /* Succeeds if not at a word boundary. */
- #ifdef emacs
- ,before_dot, /* Succeeds if before point. */
- at_dot, /* Succeeds if at point. */
- after_dot, /* Succeeds if after point. */
- /* Matches any character whose syntax is specified. Followed by
- a byte which contains a syntax code, e.g., Sword. */
- syntaxspec,
- /* Matches any character whose syntax is not that specified. */
- notsyntaxspec
- #endif /* emacs */
- } re_opcode_t;
- /* Common operations on the compiled pattern. */
- /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
- #define STORE_NUMBER(destination, number) \
- do { \
- (destination)[0] = (number) & 0377; \
- (destination)[1] = (number) >> 8; \
- } while (0)
- /* Same as STORE_NUMBER, except increment DESTINATION to
- the byte after where the number is stored. Therefore, DESTINATION
- must be an lvalue. */
- #define STORE_NUMBER_AND_INCR(destination, number) \
- do { \
- STORE_NUMBER (destination, number); \
- (destination) += 2; \
- } while (0)
- /* Put into DESTINATION a number stored in two contiguous bytes starting
- at SOURCE. */
- #define EXTRACT_NUMBER(destination, source) \
- do { \
- (destination) = *(source) & 0377; \
- (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
- } while (0)
- #ifdef DEBUG
- static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
- static void
- extract_number (dest, source)
- int *dest;
- unsigned char *source;
- {
- int temp = SIGN_EXTEND_CHAR (*(source + 1));
- *dest = *source & 0377;
- *dest += temp << 8;
- }
- # ifndef EXTRACT_MACROS /* To debug the macros. */
- # undef EXTRACT_NUMBER
- # define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
- # endif /* not EXTRACT_MACROS */
- #endif /* DEBUG */
- /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
- SOURCE must be an lvalue. */
- #define EXTRACT_NUMBER_AND_INCR(destination, source) \
- do { \
- EXTRACT_NUMBER (destination, source); \
- (source) += 2; \
- } while (0)
- #ifdef DEBUG
- static void extract_number_and_incr _RE_ARGS ((int *destination,
- unsigned char **source));
- static void
- extract_number_and_incr (destination, source)
- int *destination;
- unsigned char **source;
- {
- extract_number (destination, *source);
- *source += 2;
- }
- # ifndef EXTRACT_MACROS
- # undef EXTRACT_NUMBER_AND_INCR
- # define EXTRACT_NUMBER_AND_INCR(dest, src) \
- extract_number_and_incr (&dest, &src)
- # endif /* not EXTRACT_MACROS */
- #endif /* DEBUG */
- /* If DEBUG is defined, Regex prints many voluminous messages about what
- it is doing (if the variable `debug' is nonzero). If linked with the
- main program in `iregex.c', you can enter patterns and strings
- interactively. And if linked with the main program in `main.c' and
- the other test files, you can run the already-written tests. */
- #ifdef DEBUG
- /* We use standard I/O for debugging. */
- # include <stdio.h>
- /* It is useful to test things that ``must'' be true when debugging. */
- # include <assert.h>
- static int debug;
- # define DEBUG_STATEMENT(e) e
- # define DEBUG_PRINT1(x) if (debug) printf (x)
- # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
- # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
- # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
- # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
- if (debug) print_partial_compiled_pattern (s, e)
- # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
- if (debug) print_double_string (w, s1, sz1, s2, sz2)
- /* Print the fastmap in human-readable form. */
- void
- print_fastmap (fastmap)
- char *fastmap;
- {
- unsigned was_a_range = 0;
- unsigned i = 0;
- while (i < (1 << BYTEWIDTH))
- {
- if (fastmap[i++])
- {
- was_a_range = 0;
- putchar (i - 1);
- while (i < (1 << BYTEWIDTH) && fastmap[i])
- {
- was_a_range = 1;
- i++;
- }
- if (was_a_range)
- {
- printf ("-");
- putchar (i - 1);
- }
- }
- }
- putchar ('\n');
- }
- /* Print a compiled pattern string in human-readable form, starting at
- the START pointer into it and ending just before the pointer END. */
- void
- print_partial_compiled_pattern (start, end)
- unsigned char *start;
- unsigned char *end;
- {
- int mcnt, mcnt2;
- unsigned char *p1;
- unsigned char *p = start;
- unsigned char *pend = end;
- if (start == NULL)
- {
- printf ("(null)\n");
- return;
- }
- /* Loop over pattern commands. */
- while (p < pend)
- {
- #ifdef _LIBC
- printf ("%t:\t", p - start);
- #else
- printf ("%ld:\t", (long int) (p - start));
- #endif
- switch ((re_opcode_t) *p++)
- {
- case no_op:
- printf ("/no_op");
- break;
- case exactn:
- mcnt = *p++;
- printf ("/exactn/%d", mcnt);
- do
- {
- putchar ('/');
- putchar (*p++);
- }
- while (--mcnt);
- break;
- case start_memory:
- mcnt = *p++;
- printf ("/start_memory/%d/%d", mcnt, *p++);
- break;
- case stop_memory:
- mcnt = *p++;
- printf ("/stop_memory/%d/%d", mcnt, *p++);
- break;
- case duplicate:
- printf ("/duplicate/%d", *p++);
- break;
- case anychar:
- printf ("/anychar");
- break;
- case charset:
- case charset_not:
- {
- register int c, last = -100;
- register int in_range = 0;
- printf ("/charset [%s",
- (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
- assert (p + *p < pend);
- for (c = 0; c < 256; c++)
- if (c / 8 < *p
- && (p[1 + (c/8)] & (1 << (c % 8))))
- {
- /* Are we starting a range? */
- if (last + 1 == c && ! in_range)
- {
- putchar ('-');
- in_range = 1;
- }
- /* Have we broken a range? */
- else if (last + 1 != c && in_range)
- {
- putchar (last);
- in_range = 0;
- }
- if (! in_range)
- putchar (c);
- last = c;
- }
- if (in_range)
- putchar (last);
- putchar (']');
- p += 1 + *p;
- }
- break;
- case begline:
- printf ("/begline");
- break;
- case endline:
- printf ("/endline");
- break;
- case on_failure_jump:
- extract_number_and_incr (&mcnt, &p);
- #ifdef _LIBC
- printf ("/on_failure_jump to %t", p + mcnt - start);
- #else
- printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
- #endif
- break;
- case on_failure_keep_string_jump:
- extract_number_and_incr (&mcnt, &p);
- #ifdef _LIBC
- printf ("/on_failure_keep_string_jump to %t", p + mcnt - start);
- #else
- printf ("/on_failure_keep_string_jump to %ld",
- (long int) (p + mcnt - start));
- #endif
- break;
- case dummy_failure_jump:
- extract_number_and_incr (&mcnt, &p);
- #ifdef _LIBC
- printf ("/dummy_failure_jump to %t", p + mcnt - start);
- #else
- printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
- #endif
- break;
- case push_dummy_failure:
- printf ("/push_dummy_failure");
- break;
- case maybe_pop_jump:
- extract_number_and_incr (&mcnt, &p);
- #ifdef _LIBC
- printf ("/maybe_pop_jump to %t", p + mcnt - start);
- #else
- printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
- #endif
- break;
- case pop_failure_jump:
- extract_number_and_incr (&mcnt, &p);
- #ifdef _LIBC
- printf ("/pop_failure_jump to %t", p + mcnt - start);
- #else
- printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
- #endif
- break;
- case jump_past_alt:
- extract_number_and_incr (&mcnt, &p);
- #ifdef _LIBC
- printf ("/jump_past_alt to %t", p + mcnt - start);
- #else
- printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
- #endif
- break;
- case jump:
- extract_number_and_incr (&mcnt, &p);
- #ifdef _LIBC
- printf ("/jump to %t", p + mcnt - start);
- #else
- printf ("/jump to %ld", (long int) (p + mcnt - start));
- #endif
- break;
- case succeed_n:
- extract_number_and_incr (&mcnt, &p);
- p1 = p + mcnt;
- extract_number_and_incr (&mcnt2, &p);
- #ifdef _LIBC
- printf ("/succeed_n to %t, %d times", p1 - start, mcnt2);
- #else
- printf ("/succeed_n to %ld, %d times",
- (long int) (p1 - start), mcnt2);
- #endif
- break;
- case jump_n:
- extract_number_and_incr (&mcnt, &p);
- p1 = p + mcnt;
- extract_number_and_incr (&mcnt2, &p);
- printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
- break;
- case set_number_at:
- extract_number_and_incr (&mcnt, &p);
- p1 = p + mcnt;
- extract_number_and_incr (&mcnt2, &p);
- #ifdef _LIBC
- printf ("/set_number_at location %t to %d", p1 - start, mcnt2);
- #else
- printf ("/set_number_at location %ld to %d",
- (long int) (p1 - start), mcnt2);
- #endif
- break;
- case wordbound:
- printf ("/wordbound");
- break;
- case notwordbound:
- printf ("/notwordbound");
- break;
- case wordbeg:
- printf ("/wordbeg");
- break;
- case wordend:
- printf ("/wordend");
- # ifdef emacs
- case before_dot:
- printf ("/before_dot");
- break;
- case at_dot:
- printf ("/at_dot");
- break;
- case after_dot:
- printf ("/after_dot");
- break;
- case syntaxspec:
- printf ("/syntaxspec");
- mcnt = *p++;
- printf ("/%d", mcnt);
- break;
- case notsyntaxspec:
- printf ("/notsyntaxspec");
- mcnt = *p++;
- printf ("/%d", mcnt);
- break;
- # endif /* emacs */
- case wordchar:
- printf ("/wordchar");
- break;
- case notwordchar:
- printf ("/notwordchar");
- break;
- case begbuf:
- printf ("/begbuf");
- break;
- case endbuf:
- printf ("/endbuf");
- break;
- default:
- printf ("?%d", *(p-1));
- }
- putchar ('\n');
- }
- #ifdef _LIBC
- printf ("%t:\tend of pattern.\n", p - start);
- #else
- printf ("%ld:\tend of pattern.\n", (long int) (p - start));
- #endif
- }
- void
- print_compiled_pattern (bufp)
- struct re_pattern_buffer *bufp;
- {
- unsigned char *buffer = bufp->buffer;
- print_partial_compiled_pattern (buffer, buffer + bufp->used);
- printf ("%ld bytes used/%ld bytes allocated.\n",
- bufp->used, bufp->allocated);
- if (bufp->fastmap_accurate && bufp->fastmap)
- {
- printf ("fastmap: ");
- print_fastmap (bufp->fastmap);
- }
- #ifdef _LIBC
- printf ("re_nsub: %Zd\t", bufp->re_nsub);
- #else
- printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
- #endif
- printf ("regs_alloc: %d\t", bufp->regs_allocated);
- printf ("can_be_null: %d\t", bufp->can_be_null);
- printf ("newline_anchor: %d\n", bufp->newline_anchor);
- printf ("no_sub: %d\t", bufp->no_sub);
- printf ("not_bol: %d\t", bufp->not_bol);
- printf ("not_eol: %d\t", bufp->not_eol);
- printf ("syntax: %lx\n", bufp->syntax);
- /* Perhaps we should print the translate table? */
- }
- void
- print_double_string (where, string1, size1, string2, size2)
- const char *where;
- const char *string1;
- const char *string2;
- int size1;
- int size2;
- {
- int this_char;
- if (where == NULL)
- printf ("(null)");
- else
- {
- if (FIRST_STRING_P (where))
- {
- for (this_char = where - string1; this_char < size1; this_char++)
- putchar (string1[this_char]);
- where = string2;
- }
- for (this_char = where - string2; this_char < size2; this_char++)
- putchar (string2[this_char]);
- }
- }
- void
- printchar (c)
- int c;
- {
- putc (c, stderr);
- }
- #else /* not DEBUG */
- # undef assert
- # define assert(e)
- # define DEBUG_STATEMENT(e)
- # define DEBUG_PRINT1(x)
- # define DEBUG_PRINT2(x1, x2)
- # define DEBUG_PRINT3(x1, x2, x3)
- # define DEBUG_PRINT4(x1, x2, x3, x4)
- # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
- # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
- #endif /* not DEBUG */
- /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
- also be assigned to arbitrarily: each pattern buffer stores its own
- syntax, so it can be changed between regex compilations. */
- /* This has no initializer because initialized variables in Emacs
- become read-only after dumping. */
- reg_syntax_t re_syntax_options;
- /* Specify the precise syntax of regexps for compilation. This provides
- for compatibility for various utilities which historically have
- different, incompatible syntaxes.
- The argument SYNTAX is a bit mask comprised of the various bits
- defined in regex.h. We return the old syntax. */
- reg_syntax_t
- re_set_syntax (syntax)
- reg_syntax_t syntax;
- {
- reg_syntax_t ret = re_syntax_options;
- re_syntax_options = syntax;
- #ifdef DEBUG
- if (syntax & RE_DEBUG)
- debug = 1;
- else if (debug) /* was on but now is not */
- debug = 0;
- #endif /* DEBUG */
- return ret;
- }
- #ifdef _LIBC
- weak_alias (__re_set_syntax, re_set_syntax)
- #endif
- /* This table gives an error message for each of the error codes listed
- in regex.h. Obviously the order here has to be same as there.
- POSIX doesn't require that we do anything for REG_NOERROR,
- but why not be nice? */
- static const char re_error_msgid[] =
- {
- #define REG_NOERROR_IDX 0
- gettext_noop ("Success") /* REG_NOERROR */
- "\0"
- #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
- gettext_noop ("No match") /* REG_NOMATCH */
- "\0"
- #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
- gettext_noop ("Invalid regular expression") /* REG_BADPAT */
- "\0"
- #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
- gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
- "\0"
- #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
- gettext_noop ("Invalid character class name") /* REG_ECTYPE */
- "\0"
- #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
- gettext_noop ("Trailing backslash") /* REG_EESCAPE */
- "\0"
- #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
- gettext_noop ("Invalid back reference") /* REG_ESUBREG */
- "\0"
- #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
- gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
- "\0"
- #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
- gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
- "\0"
- #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
- gettext_noop ("Unmatched \\{") /* REG_EBRACE */
- "\0"
- #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
- gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
- "\0"
- #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
- gettext_noop ("Invalid range end") /* REG_ERANGE */
- "\0"
- #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
- gettext_noop ("Memory exhausted") /* REG_ESPACE */
- "\0"
- #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
- gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
- "\0"
- #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
- gettext_noop ("Premature end of regular expression") /* REG_EEND */
- "\0"
- #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
- gettext_noop ("Regular expression too big") /* REG_ESIZE */
- "\0"
- #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
- gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
- };
- static const size_t re_error_msgid_idx[] =
- {
- REG_NOERROR_IDX,
- REG_NOMATCH_IDX,
- REG_BADPAT_IDX,
- REG_ECOLLATE_IDX,
- REG_ECTYPE_IDX,
- REG_EESCAPE_IDX,
- REG_ESUBREG_IDX,
- REG_EBRACK_IDX,
- REG_EPAREN_IDX,
- REG_EBRACE_IDX,
- REG_BADBR_IDX,
- REG_ERANGE_IDX,
- REG_ESPACE_IDX,
- REG_BADRPT_IDX,
- REG_EEND_IDX,
- REG_ESIZE_IDX,
- REG_ERPAREN_IDX
- };
- /* Avoiding alloca during matching, to placate r_alloc. */
- /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
- searching and matching functions should not call alloca. On some
- systems, alloca is implemented in terms of malloc, and if we're
- using the relocating allocator routines, then malloc could cause a
- relocation, which might (if the strings being searched are in the
- ralloc heap) shift the data out from underneath the regexp
- routines.
- Here's another reason to avoid allocation: Emacs
- processes input from X in a signal handler; processing X input may
- call malloc; if input arrives while a matching routine is calling
- malloc, then we're scrod. But Emacs can't just block input while
- calling matching routines; then we don't notice interrupts when
- they come in. So, Emacs blocks input around all regexp calls
- except the matching calls, which it leaves unprotected, in the
- faith that they will not malloc. */
- /* Normally, this is fine. */
- #define MATCH_MAY_ALLOCATE
- /* When using GNU C, we are not REALLY using the C alloca, no matter
- what config.h may say. So don't take precautions for it. */
- #ifdef __GNUC__
- # undef C_ALLOCA
- #endif
- /* The match routines may not allocate if (1) they would do it with malloc
- and (2) it's not safe for them to use malloc.
- Note that if REL_ALLOC is defined, matching would not use malloc for the
- failure stack, but we would still use it for the register vectors;
- so REL_ALLOC should not affect this. */
- #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
- # undef MATCH_MAY_ALLOCATE
- #endif
- /* Failure stack declarations and macros; both re_compile_fastmap and
- re_match_2 use a failure stack. These have to be macros because of
- REGEX_ALLOCATE_STACK. */
- /* Number of failure points for which to initially allocate space
- when matching. If this number is exceeded, we allocate more
- space, so it is not a hard limit. */
- #ifndef INIT_FAILURE_ALLOC
- # define INIT_FAILURE_ALLOC 5
- #endif
- /* Roughly the maximum number of failure points on the stack. Would be
- exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
- This is a variable only so users of regex can assign to it; we never
- change it ourselves. */
- #ifdef INT_IS_16BIT
- # if defined MATCH_MAY_ALLOCATE
- /* 4400 was enough to cause a crash on Alpha OSF/1,
- whose default stack limit is 2mb. */
- long int re_max_failures = 4000;
- # else
- long int re_max_failures = 2000;
- # endif
- union fail_stack_elt
- {
- unsigned char *pointer;
- long int integer;
- };
- typedef union fail_stack_elt fail_stack_elt_t;
- typedef struct
- {
- fail_stack_elt_t *stack;
- unsigned long int size;
- unsigned long int avail; /* Offset of next open position. */
- } fail_stack_type;
- #else /* not INT_IS_16BIT */
- # if defined MATCH_MAY_ALLOCATE
- /* 4400 was enough to cause a crash on Alpha OSF/1,
- whose default stack limit is 2mb. */
- int re_max_failures = 4000;
- # else
- int re_max_failures = 2000;
- # endif
- union fail_stack_elt
- {
- unsigned char *pointer;
- int integer;
- };
- typedef union fail_stack_elt fail_stack_elt_t;
- typedef struct
- {
- fail_stack_elt_t *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
- } fail_stack_type;
- #endif /* INT_IS_16BIT */
- #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
- #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
- #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
- /* Define macros to initialize and free the failure stack.
- Do `return -2' if the alloc fails. */
- #ifdef MATCH_MAY_ALLOCATE
- # define INIT_FAIL_STACK() \
- do { \
- fail_stack.stack = (fail_stack_elt_t *) \
- REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
- \
- if (fail_stack.stack == NULL) \
- return -2; \
- \
- fail_stack.size = INIT_FAILURE_ALLOC; \
- fail_stack.avail = 0; \
- } while (0)
- # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
- #else
- # define INIT_FAIL_STACK() \
- do { \
- fail_stack.avail = 0; \
- } while (0)
- # define RESET_FAIL_STACK()
- #endif
- /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
- Return 1 if succeeds, and 0 if either ran out of memory
- allocating space for it or it was already too large.
- REGEX_REALLOCATE_STACK requires `destination' be declared. */
- #define DOUBLE_FAIL_STACK(fail_stack) \
- ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
- ? 0 \
- : ((fail_stack).stack = (fail_stack_elt_t *) \
- REGEX_REALLOCATE_STACK ((fail_stack).stack, \
- (fail_stack).size * sizeof (fail_stack_elt_t), \
- ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
- \
- (fail_stack).stack == NULL \
- ? 0 \
- : ((fail_stack).size <<= 1, \
- 1)))
- /* Push pointer POINTER on FAIL_STACK.
- Return 1 if was able to do so and 0 if ran out of memory allocating
- space to do so. */
- #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
- ((FAIL_STACK_FULL () \
- && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
- ? 0 \
- : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
- 1))
- /* Push a pointer value onto the failure stack.
- Assumes the variable `fail_stack'. Probably should only
- be called from within `PUSH_FAILURE_POINT'. */
- #define PUSH_FAILURE_POINTER(item) \
- fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
- /* This pushes an integer-valued item onto the failure stack.
- Assumes the variable `fail_stack'. Probably should only
- be called from within `PUSH_FAILURE_POINT'. */
- #define PUSH_FAILURE_INT(item) \
- fail_stack.stack[fail_stack.avail++].integer = (item)
- /* Push a fail_stack_elt_t value onto the failure stack.
- Assumes the variable `fail_stack'. Probably should only
- be called from within `PUSH_FAILURE_POINT'. */
- #define PUSH_FAILURE_ELT(item) \
- fail_stack.stack[fail_stack.avail++] = (item)
- /* These three POP... operations complement the three PUSH... operations.
- All assume that `fail_stack' is nonempty. */
- #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
- #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
- #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
- /* Used to omit pushing failure point id's when we're not debugging. */
- #ifdef DEBUG
- # define DEBUG_PUSH PUSH_FAILURE_INT
- # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
- #else
- # define DEBUG_PUSH(item)
- # define DEBUG_POP(item_addr)
- #endif
- /* Push the information about the state we will need
- if we ever fail back to it.
- Requires variables fail_stack, regstart, regend, reg_info, and
- num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
- be declared.
- Does `return FAILURE_CODE' if runs out of memory. */
- #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
- do { \
- char *destination; \
- /* Must be int, so when we don't save any registers, the arithmetic \
- of 0 + -1 isn't done as unsigned. */ \
- /* Can't be int, since there is not a shred of a guarantee that int \
- is wide enough to hold a value of something to which pointer can \
- be assigned */ \
- active_reg_t this_reg; \
- \
- DEBUG_STATEMENT (failure_id++); \
- DEBUG_STATEMENT (nfailure_points_pushed++); \
- DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
- DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
- DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
- \
- DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
- DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
- \
- /* Ensure we have enough space allocated for what we will push. */ \
- while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
- { \
- if (!DOUBLE_FAIL_STACK (fail_stack)) \
- return failure_code; \
- \
- DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
- (fail_stack).size); \
- DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
- } \
- \
- /* Push the info, starting with the registers. */ \
- DEBUG_PRINT1 ("\n"); \
- \
- if (1) \
- for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
- this_reg++) \
- { \
- DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
- DEBUG_STATEMENT (num_regs_pushed++); \
- \
- DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
- PUSH_FAILURE_POINTER (regstart[this_reg]); \
- \
- DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
- PUSH_FAILURE_POINTER (regend[this_reg]); \
- \
- DEBUG_PRINT2 (" info: %p\n ", \
- reg_info[this_reg].word.pointer); \
- DEBUG_PRINT2 (" match_null=%d", \
- REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
- DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
- DEBUG_PRINT2 (" matched_something=%d", \
- MATCHED_SOMETHING (reg_info[this_reg])); \
- DEBUG_PRINT2 (" ever_matched=%d", \
- EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
- DEBUG_PRINT1 ("\n"); \
- PUSH_FAILURE_ELT (reg_info[this_reg].word); \
- } \
- \
- DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
- PUSH_FAILURE_INT (lowest_active_reg); \
- \
- DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
- PUSH_FAILURE_INT (highest_active_reg); \
- \
- DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
- DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
- PUSH_FAILURE_POINTER (pattern_place); \
- \
- DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
- DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
- size2); \
- DEBUG_PRINT1 ("'\n"); \
- PUSH_FAILURE_POINTER (string_place); \
- \
- DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
- DEBUG_PUSH (failure_id); \
- } while (0)
- /* This is the number of items that are pushed and popped on the stack
- for each register. */
- #define NUM_REG_ITEMS 3
- /* Individual items aside from the registers. */
- #ifdef DEBUG
- # define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
- #else
- # define NUM_NONREG_ITEMS 4
- #endif
- /* We push at most this many items on the stack. */
- /* We used to use (num_regs - 1), which is the number of registers
- this regexp will save; but that was changed to 5
- to avoid stack overflow for a regexp with lots of parens. */
- #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
- /* We actually push this many items. */
- #define NUM_FAILURE_ITEMS \
- (((0 \
- ? 0 : highest_active_reg - lowest_active_reg + 1) \
- * NUM_REG_ITEMS) \
- + NUM_NONREG_ITEMS)
- /* How many items can still be added to the stack without overflowing it. */
- #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
- /* Pops what PUSH_FAIL_STACK pushes.
- We restore into the parameters, all of which should be lvalues:
- STR -- the saved data position.
- PAT -- the saved pattern position.
- LOW_REG, HIGH_REG -- the highest and lowest active registers.
- REGSTART, REGEND -- arrays of string positions.
- REG_INFO -- array of information about each subexpression.
- Also assumes the variables `fail_stack' and (if debugging), `bufp',
- `pend', `string1', `size1', `string2', and `size2'. */
- #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
- { \
- DEBUG_STATEMENT (unsigned failure_id;) \
- active_reg_t this_reg; \
- const unsigned char *string_temp; \
- \
- assert (!FAIL_STACK_EMPTY ()); \
- \
- /* Remove failure points and point to how many regs pushed. */ \
- DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
- DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
- DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
- \
- assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
- \
- DEBUG_POP (&failure_id); \
- DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
- \
- /* If the saved string location is NULL, it came from an \
- on_failure_keep_string_jump opcode, and we want to throw away the \
- saved NULL, thus retaining our current position in the string. */ \
- string_temp = POP_FAILURE_POINTER (); \
- if (string_temp != NULL) \
- str = (const char *) string_temp; \
- \
- DEBUG_PRINT2 (" Popping string %p: `", str); \
- DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
- DEBUG_PRINT1 ("'\n"); \
- \
- pat = (unsigned char *) POP_FAILURE_POINTER (); \
- DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
- DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
- \
- /* Restore register info. */ \
- high_reg = (active_reg_t) POP_FAILURE_INT (); \
- DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
- \
- low_reg = (active_reg_t) POP_FAILURE_INT (); \
- DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
- \
- if (1) \
- for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
- { \
- DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
- \
- reg_info[this_reg].word = POP_FAILURE_ELT (); \
- DEBUG_PRINT2 (" info: %p\n", \
- reg_info[this_reg].word.pointer); \
- \
- regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
- DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
- \
- regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
- DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
- } \
- else \
- { \
- for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
- { \
- reg_info[this_reg].word.integer = 0; \
- regend[this_reg] = 0; \
- regstart[this_reg] = 0; \
- } \
- highest_active_reg = high_reg; \
- } \
- \
- set_regs_matched_done = 0; \
- DEBUG_STATEMENT (nfailure_points_popped++); \
- } /* POP_FAILURE_POINT */
- /* Structure for per-register (a.k.a. per-group) information.
- Other register information, such as the
- starting and ending positions (which are addresses), and the list of
- inner groups (which is a bits list) are maintained in separate
- variables.
- We are making a (strictly speaking) nonportable assumption here: that
- the compiler will pack our bit fields into something that fits into
- the type of `word', i.e., is something that fits into one item on the
- failure stack. */
- /* Declarations and macros for re_match_2. */
- typedef union
- {
- fail_stack_elt_t word;
- struct
- {
- /* This field is one if this group can match the empty string,
- zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
- #define MATCH_NULL_UNSET_VALUE 3
- unsigned match_null_string_p : 2;
- unsigned is_active : 1;
- unsigned matched_something : 1;
- unsigned ever_matched_something : 1;
- } bits;
- } register_info_type;
- #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
- #define IS_ACTIVE(R) ((R).bits.is_active)
- #def…
Large files files are truncated, but you can click here to view the full file