PageRenderTime 116ms CodeModel.GetById 35ms app.highlight 61ms RepoModel.GetById 1ms app.codeStats 1ms

/contrib/cvs/lib/regex.c

https://bitbucket.org/freebsd/freebsd-head/
C | 6375 lines | 3925 code | 1031 blank | 1419 comment | 970 complexity | 31ee443b3692f366763a0c58c9d6d303 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/* Extended regular expression matching and search library, version
   2   0.12.  (Implements POSIX draft P10003.2/D11.2, except for
   3   internationalization features.)
   4
   5   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
   6
   7   This program is free software; you can redistribute it and/or modify
   8   it under the terms of the GNU General Public License as published by
   9   the Free Software Foundation; either version 2, or (at your option)
  10   any later version.
  11
  12   This program is distributed in the hope that it will be useful,
  13   but WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
  15   GNU General Public License for more details.
  16
  17   You should have received a copy of the GNU General Public License
  18   along with this program; if not, write to the Free Software
  19   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  20   USA.	 */
  21
  22/* AIX requires this to be the first thing in the file. */
  23#if defined (_AIX) && !defined (REGEX_MALLOC)
  24  #pragma alloca
  25#endif
  26
  27#undef	_GNU_SOURCE
  28#define _GNU_SOURCE
  29
  30#ifdef emacs
  31/* Converts the pointer to the char to BEG-based offset from the start.	 */
  32#define PTR_TO_OFFSET(d)						\
  33	POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING			\
  34			  ? (d) - string1 : (d) - (string2 - size1))
  35#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
  36#else
  37#define PTR_TO_OFFSET(d) 0
  38#endif
  39
  40#ifdef HAVE_CONFIG_H
  41#include <config.h>
  42#endif
  43
  44/* We need this for `regex.h', and perhaps for the Emacs include files.	 */
  45#include <sys/types.h>
  46
  47/* This is for other GNU distributions with internationalized messages.	 */
  48#if HAVE_LIBINTL_H || defined (_LIBC)
  49# include <libintl.h>
  50#else
  51# define gettext(msgid) (msgid)
  52#endif
  53
  54#ifndef gettext_noop
  55/* This define is so xgettext can find the internationalizable
  56   strings.  */
  57#define gettext_noop(String) String
  58#endif
  59
  60/* The `emacs' switch turns on certain matching commands
  61   that make sense only in Emacs. */
  62#ifdef emacs
  63
  64#include "lisp.h"
  65#include "buffer.h"
  66
  67/* Make syntax table lookup grant data in gl_state.  */
  68#define SYNTAX_ENTRY_VIA_PROPERTY
  69
  70#include "syntax.h"
  71#include "charset.h"
  72#include "category.h"
  73
  74#define malloc xmalloc
  75#define realloc xrealloc
  76#define free xfree
  77
  78#else  /* not emacs */
  79
  80/* If we are not linking with Emacs proper,
  81   we can't use the relocating allocator
  82   even if config.h says that we can.  */
  83#undef REL_ALLOC
  84
  85#if defined (STDC_HEADERS) || defined (_LIBC)
  86#include <stdlib.h>
  87#else
  88char *malloc ();
  89char *realloc ();
  90#endif
  91
  92/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
  93   If nothing else has been done, use the method below.	 */
  94#ifdef INHIBIT_STRING_HEADER
  95#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
  96#if !defined (bzero) && !defined (bcopy)
  97#undef INHIBIT_STRING_HEADER
  98#endif
  99#endif
 100#endif
 101
 102/* This is the normal way of making sure we have a bcopy and a bzero.
 103   This is used in most programs--a few other programs avoid this
 104   by defining INHIBIT_STRING_HEADER.  */
 105#ifndef INHIBIT_STRING_HEADER
 106#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
 107#include <string.h>
 108#ifndef bcmp
 109#define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
 110#endif
 111#ifndef bcopy
 112#define bcopy(s, d, n)	memcpy ((d), (s), (n))
 113#endif
 114#ifndef bzero
 115#define bzero(s, n)	memset ((s), 0, (n))
 116#endif
 117#else
 118#include <strings.h>
 119#endif
 120#endif
 121
 122/* Define the syntax stuff for \<, \>, etc.  */
 123
 124/* This must be nonzero for the wordchar and notwordchar pattern
 125   commands in re_match_2.  */
 126#ifndef Sword
 127#define Sword 1
 128#endif
 129
 130#ifdef SWITCH_ENUM_BUG
 131#define SWITCH_ENUM_CAST(x) ((int)(x))
 132#else
 133#define SWITCH_ENUM_CAST(x) (x)
 134#endif
 135
 136#ifdef SYNTAX_TABLE
 137
 138extern char *re_syntax_table;
 139
 140#else /* not SYNTAX_TABLE */
 141
 142/* How many characters in the character set.  */
 143#define CHAR_SET_SIZE 256
 144
 145static char re_syntax_table[CHAR_SET_SIZE];
 146
 147static void
 148init_syntax_once ()
 149{
 150   register int c;
 151   static int done = 0;
 152
 153   if (done)
 154     return;
 155
 156   bzero (re_syntax_table, sizeof re_syntax_table);
 157
 158   for (c = 'a'; c <= 'z'; c++)
 159     re_syntax_table[c] = Sword;
 160
 161   for (c = 'A'; c <= 'Z'; c++)
 162     re_syntax_table[c] = Sword;
 163
 164   for (c = '0'; c <= '9'; c++)
 165     re_syntax_table[c] = Sword;
 166
 167   re_syntax_table['_'] = Sword;
 168
 169   done = 1;
 170}
 171
 172#endif /* not SYNTAX_TABLE */
 173
 174#define SYNTAX(c) re_syntax_table[c]
 175
 176/* Dummy macros for non-Emacs environments.  */
 177#define BASE_LEADING_CODE_P(c) (0)
 178#define WORD_BOUNDARY_P(c1, c2) (0)
 179#define CHAR_HEAD_P(p) (1)
 180#define SINGLE_BYTE_CHAR_P(c) (1)
 181#define SAME_CHARSET_P(c1, c2) (1)
 182#define MULTIBYTE_FORM_LENGTH(p, s) (1)
 183#define STRING_CHAR(p, s) (*(p))
 184#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
 185#define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
 186  (c = ((p) == (end1) ? *(str2) : *(p)))
 187#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
 188  (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
 189#endif /* not emacs */
 190
 191/* Get the interface, including the syntax bits.  */
 192#include "regex.h"
 193
 194/* isalpha etc. are used for the character classes.  */
 195#include <ctype.h>
 196
 197/* Jim Meyering writes:
 198
 199   "... Some ctype macros are valid only for character codes that
 200   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
 201   using /bin/cc or gcc but without giving an ansi option).  So, all
 202   ctype uses should be through macros like ISPRINT...	If
 203   STDC_HEADERS is defined, then autoconf has verified that the ctype
 204   macros don't need to be guarded with references to isascii. ...
 205   Defining isascii to 1 should let any compiler worth its salt
 206   eliminate the && through constant folding."	*/
 207
 208#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
 209#define ISASCII(c) 1
 210#else
 211#define ISASCII(c) isascii(c)
 212#endif
 213
 214#ifdef isblank
 215#define ISBLANK(c) (ISASCII (c) && isblank (c))
 216#else
 217#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
 218#endif
 219#ifdef isgraph
 220#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
 221#else
 222#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
 223#endif
 224
 225#define ISPRINT(c) (ISASCII (c) && isprint (c))
 226#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
 227#define ISALNUM(c) (ISASCII (c) && isalnum (c))
 228#define ISALPHA(c) (ISASCII (c) && isalpha (c))
 229#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
 230#define ISLOWER(c) (ISASCII (c) && islower (c))
 231#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
 232#define ISSPACE(c) (ISASCII (c) && isspace (c))
 233#define ISUPPER(c) (ISASCII (c) && isupper (c))
 234#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
 235
 236#ifndef NULL
 237#define NULL (void *)0
 238#endif
 239
 240/* We remove any previous definition of `SIGN_EXTEND_CHAR',
 241   since ours (we hope) works properly with all combinations of
 242   machines, compilers, `char' and `unsigned char' argument types.
 243   (Per Bothner suggested the basic approach.)	*/
 244#undef SIGN_EXTEND_CHAR
 245#if __STDC__
 246#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
 247#else  /* not __STDC__ */
 248/* As in Harbison and Steele.  */
 249#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
 250#endif
 251
 252/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
 253   use `alloca' instead of `malloc'.  This is because using malloc in
 254   re_search* or re_match* could cause memory leaks when C-g is used in
 255   Emacs; also, malloc is slower and causes storage fragmentation.  On
 256   the other hand, malloc is more portable, and easier to debug.
 257
 258   Because we sometimes use alloca, some routines have to be macros,
 259   not functions -- `alloca'-allocated space disappears at the end of the
 260   function it is called in.  */
 261
 262#ifdef REGEX_MALLOC
 263
 264#define REGEX_ALLOCATE malloc
 265#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
 266#define REGEX_FREE free
 267
 268#else /* not REGEX_MALLOC  */
 269
 270/* Emacs already defines alloca, sometimes.  */
 271#ifndef alloca
 272
 273/* Make alloca work the best possible way.  */
 274#ifdef __GNUC__
 275#define alloca __builtin_alloca
 276#else /* not __GNUC__ */
 277#if HAVE_ALLOCA_H
 278#include <alloca.h>
 279#else /* not __GNUC__ or HAVE_ALLOCA_H */
 280#if 0 /* It is a bad idea to declare alloca.  We always cast the result.  */
 281#ifndef _AIX /* Already did AIX, up at the top.	 */
 282char *alloca ();
 283#endif /* not _AIX */
 284#endif
 285#endif /* not HAVE_ALLOCA_H */
 286#endif /* not __GNUC__ */
 287
 288#endif /* not alloca */
 289
 290#define REGEX_ALLOCATE alloca
 291
 292/* Assumes a `char *destination' variable.  */
 293#define REGEX_REALLOCATE(source, osize, nsize)				\
 294  (destination = (char *) alloca (nsize),				\
 295   bcopy (source, destination, osize),					\
 296   destination)
 297
 298/* No need to do anything to free, after alloca.  */
 299#define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
 300
 301#endif /* not REGEX_MALLOC */
 302
 303/* Define how to allocate the failure stack.  */
 304
 305#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
 306
 307#define REGEX_ALLOCATE_STACK(size)				\
 308  r_alloc (&failure_stack_ptr, (size))
 309#define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
 310  r_re_alloc (&failure_stack_ptr, (nsize))
 311#define REGEX_FREE_STACK(ptr)					\
 312  r_alloc_free (&failure_stack_ptr)
 313
 314#else /* not using relocating allocator */
 315
 316#ifdef REGEX_MALLOC
 317
 318#define REGEX_ALLOCATE_STACK malloc
 319#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
 320#define REGEX_FREE_STACK free
 321
 322#else /* not REGEX_MALLOC */
 323
 324#define REGEX_ALLOCATE_STACK alloca
 325
 326#define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
 327   REGEX_REALLOCATE (source, osize, nsize)
 328/* No need to explicitly free anything.	 */
 329#define REGEX_FREE_STACK(arg)
 330
 331#endif /* not REGEX_MALLOC */
 332#endif /* not using relocating allocator */
 333
 334
 335/* True if `size1' is non-NULL and PTR is pointing anywhere inside
 336   `string1' or just past its end.  This works if PTR is NULL, which is
 337   a good thing.  */
 338#define FIRST_STRING_P(ptr)					\
 339  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
 340
 341/* (Re)Allocate N items of type T using malloc, or fail.  */
 342#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
 343#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
 344#define RETALLOC_IF(addr, n, t) \
 345  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
 346#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
 347
 348#define BYTEWIDTH 8 /* In bits.	 */
 349
 350#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
 351
 352#undef MAX
 353#undef MIN
 354#define MAX(a, b) ((a) > (b) ? (a) : (b))
 355#define MIN(a, b) ((a) < (b) ? (a) : (b))
 356
 357typedef char boolean;
 358#define false 0
 359#define true 1
 360
 361static int re_match_2_internal ();
 362
 363/* These are the command codes that appear in compiled regular
 364   expressions.	 Some opcodes are followed by argument bytes.  A
 365   command code can specify any interpretation whatsoever for its
 366   arguments.  Zero bytes may appear in the compiled regular expression.  */
 367
 368typedef enum
 369{
 370  no_op = 0,
 371
 372  /* Succeed right away--no more backtracking.	*/
 373  succeed,
 374
 375	/* Followed by one byte giving n, then by n literal bytes.  */
 376  exactn,
 377
 378	/* Matches any (more or less) character.  */
 379  anychar,
 380
 381	/* Matches any one char belonging to specified set.  First
 382	   following byte is number of bitmap bytes.  Then come bytes
 383	   for a bitmap saying which chars are in.  Bits in each byte
 384	   are ordered low-bit-first.  A character is in the set if its
 385	   bit is 1.  A character too large to have a bit in the map is
 386	   automatically not in the set.  */
 387  charset,
 388
 389	/* Same parameters as charset, but match any character that is
 390	   not one of those specified.	*/
 391  charset_not,
 392
 393	/* Start remembering the text that is matched, for storing in a
 394	   register.  Followed by one byte with the register number, in
 395	   the range 0 to one less than the pattern buffer's re_nsub
 396	   field.  Then followed by one byte with the number of groups
 397	   inner to this one.  (This last has to be part of the
 398	   start_memory only because we need it in the on_failure_jump
 399	   of re_match_2.)  */
 400  start_memory,
 401
 402	/* Stop remembering the text that is matched and store it in a
 403	   memory register.  Followed by one byte with the register
 404	   number, in the range 0 to one less than `re_nsub' in the
 405	   pattern buffer, and one byte with the number of inner groups,
 406	   just like `start_memory'.  (We need the number of inner
 407	   groups here because we don't have any easy way of finding the
 408	   corresponding start_memory when we're at a stop_memory.)  */
 409  stop_memory,
 410
 411	/* Match a duplicate of something remembered. Followed by one
 412	   byte containing the register number.	 */
 413  duplicate,
 414
 415	/* Fail unless at beginning of line.  */
 416  begline,
 417
 418	/* Fail unless at end of line.	*/
 419  endline,
 420
 421	/* Succeeds if at beginning of buffer (if emacs) or at beginning
 422	   of string to be matched (if not).  */
 423  begbuf,
 424
 425	/* Analogously, for end of buffer/string.  */
 426  endbuf,
 427
 428	/* Followed by two byte relative address to which to jump.  */
 429  jump,
 430
 431	/* Same as jump, but marks the end of an alternative.  */
 432  jump_past_alt,
 433
 434	/* Followed by two-byte relative address of place to resume at
 435	   in case of failure.	*/
 436  on_failure_jump,
 437
 438	/* Like on_failure_jump, but pushes a placeholder instead of the
 439	   current string position when executed.  */
 440  on_failure_keep_string_jump,
 441
 442	/* Throw away latest failure point and then jump to following
 443	   two-byte relative address.  */
 444  pop_failure_jump,
 445
 446	/* Change to pop_failure_jump if know won't have to backtrack to
 447	   match; otherwise change to jump.  This is used to jump
 448	   back to the beginning of a repeat.  If what follows this jump
 449	   clearly won't match what the repeat does, such that we can be
 450	   sure that there is no use backtracking out of repetitions
 451	   already matched, then we change it to a pop_failure_jump.
 452	   Followed by two-byte address.  */
 453  maybe_pop_jump,
 454
 455	/* Jump to following two-byte address, and push a dummy failure
 456	   point. This failure point will be thrown away if an attempt
 457	   is made to use it for a failure.  A `+' construct makes this
 458	   before the first repeat.  Also used as an intermediary kind
 459	   of jump when compiling an alternative.  */
 460  dummy_failure_jump,
 461
 462	/* Push a dummy failure point and continue.  Used at the end of
 463	   alternatives.  */
 464  push_dummy_failure,
 465
 466	/* Followed by two-byte relative address and two-byte number n.
 467	   After matching N times, jump to the address upon failure.  */
 468  succeed_n,
 469
 470	/* Followed by two-byte relative address, and two-byte number n.
 471	   Jump to the address N times, then fail.  */
 472  jump_n,
 473
 474	/* Set the following two-byte relative address to the
 475	   subsequent two-byte number.	The address *includes* the two
 476	   bytes of number.  */
 477  set_number_at,
 478
 479  wordchar,	/* Matches any word-constituent character.  */
 480  notwordchar,	/* Matches any char that is not a word-constituent.  */
 481
 482  wordbeg,	/* Succeeds if at word beginning.  */
 483  wordend,	/* Succeeds if at word end.  */
 484
 485  wordbound,	/* Succeeds if at a word boundary.  */
 486  notwordbound	/* Succeeds if not at a word boundary.	*/
 487
 488#ifdef emacs
 489  ,before_dot,	/* Succeeds if before point.  */
 490  at_dot,	/* Succeeds if at point.  */
 491  after_dot,	/* Succeeds if after point.  */
 492
 493	/* Matches any character whose syntax is specified.  Followed by
 494	   a byte which contains a syntax code, e.g., Sword.  */
 495  syntaxspec,
 496
 497	/* Matches any character whose syntax is not that specified.  */
 498  notsyntaxspec,
 499
 500  /* Matches any character whose category-set contains the specified
 501     category.	The operator is followed by a byte which contains a
 502     category code (mnemonic ASCII character).	*/
 503  categoryspec,
 504
 505  /* Matches any character whose category-set does not contain the
 506     specified category.  The operator is followed by a byte which
 507     contains the category code (mnemonic ASCII character).  */
 508  notcategoryspec
 509#endif /* emacs */
 510} re_opcode_t;
 511
 512/* Common operations on the compiled pattern.  */
 513
 514/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
 515
 516#define STORE_NUMBER(destination, number)				\
 517  do {									\
 518    (destination)[0] = (number) & 0377;					\
 519    (destination)[1] = (number) >> 8;					\
 520  } while (0)
 521
 522/* Same as STORE_NUMBER, except increment DESTINATION to
 523   the byte after where the number is stored.  Therefore, DESTINATION
 524   must be an lvalue.  */
 525
 526#define STORE_NUMBER_AND_INCR(destination, number)			\
 527  do {									\
 528    STORE_NUMBER (destination, number);					\
 529    (destination) += 2;							\
 530  } while (0)
 531
 532/* Put into DESTINATION a number stored in two contiguous bytes starting
 533   at SOURCE.  */
 534
 535#define EXTRACT_NUMBER(destination, source)				\
 536  do {									\
 537    (destination) = *(source) & 0377;					\
 538    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
 539  } while (0)
 540
 541#ifdef DEBUG
 542static void
 543extract_number (dest, source)
 544    int *dest;
 545    unsigned char *source;
 546{
 547  int temp = SIGN_EXTEND_CHAR (*(source + 1));
 548  *dest = *source & 0377;
 549  *dest += temp << 8;
 550}
 551
 552#ifndef EXTRACT_MACROS /* To debug the macros.	*/
 553#undef EXTRACT_NUMBER
 554#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
 555#endif /* not EXTRACT_MACROS */
 556
 557#endif /* DEBUG */
 558
 559/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
 560   SOURCE must be an lvalue.  */
 561
 562#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
 563  do {									\
 564    EXTRACT_NUMBER (destination, source);				\
 565    (source) += 2;							\
 566  } while (0)
 567
 568#ifdef DEBUG
 569static void
 570extract_number_and_incr (destination, source)
 571    int *destination;
 572    unsigned char **source;
 573{
 574  extract_number (destination, *source);
 575  *source += 2;
 576}
 577
 578#ifndef EXTRACT_MACROS
 579#undef EXTRACT_NUMBER_AND_INCR
 580#define EXTRACT_NUMBER_AND_INCR(dest, src) \
 581  extract_number_and_incr (&dest, &src)
 582#endif /* not EXTRACT_MACROS */
 583
 584#endif /* DEBUG */
 585
 586/* Store a multibyte character in three contiguous bytes starting
 587   DESTINATION, and increment DESTINATION to the byte after where the
 588   character is stored.	 Therefore, DESTINATION must be an lvalue.  */
 589
 590#define STORE_CHARACTER_AND_INCR(destination, character)	\
 591  do {								\
 592    (destination)[0] = (character) & 0377;			\
 593    (destination)[1] = ((character) >> 8) & 0377;		\
 594    (destination)[2] = (character) >> 16;			\
 595    (destination) += 3;						\
 596  } while (0)
 597
 598/* Put into DESTINATION a character stored in three contiguous bytes
 599   starting at SOURCE.	*/
 600
 601#define EXTRACT_CHARACTER(destination, source)	\
 602  do {						\
 603    (destination) = ((source)[0]		\
 604		     | ((source)[1] << 8)	\
 605		     | ((source)[2] << 16));	\
 606  } while (0)
 607
 608
 609/* Macros for charset. */
 610
 611/* Size of bitmap of charset P in bytes.  P is a start of charset,
 612   i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
 613#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
 614
 615/* Nonzero if charset P has range table.  */
 616#define CHARSET_RANGE_TABLE_EXISTS_P(p)	 ((p)[1] & 0x80)
 617
 618/* Return the address of range table of charset P.  But not the start
 619   of table itself, but the before where the number of ranges is
 620   stored.  `2 +' means to skip re_opcode_t and size of bitmap.	 */
 621#define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)])
 622
 623/* Test if C is listed in the bitmap of charset P.  */
 624#define CHARSET_LOOKUP_BITMAP(p, c)				\
 625  ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH			\
 626   && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
 627
 628/* Return the address of end of RANGE_TABLE.  COUNT is number of
 629   ranges (which is a pair of (start, end)) in the RANGE_TABLE.	 `* 2'
 630   is start of range and end of range.	`* 3' is size of each start
 631   and end.  */
 632#define CHARSET_RANGE_TABLE_END(range_table, count)	\
 633  ((range_table) + (count) * 2 * 3)
 634
 635/* Test if C is in RANGE_TABLE.	 A flag NOT is negated if C is in.
 636   COUNT is number of ranges in RANGE_TABLE.  */
 637#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)	\
 638  do									\
 639    {									\
 640      int range_start, range_end;					\
 641      unsigned char *p;							\
 642      unsigned char *range_table_end					\
 643	= CHARSET_RANGE_TABLE_END ((range_table), (count));		\
 644									\
 645      for (p = (range_table); p < range_table_end; p += 2 * 3)		\
 646	{								\
 647	  EXTRACT_CHARACTER (range_start, p);				\
 648	  EXTRACT_CHARACTER (range_end, p + 3);				\
 649									\
 650	  if (range_start <= (c) && (c) <= range_end)			\
 651	    {								\
 652	      (not) = !(not);						\
 653	      break;							\
 654	    }								\
 655	}								\
 656    }									\
 657  while (0)
 658
 659/* Test if C is in range table of CHARSET.  The flag NOT is negated if
 660   C is listed in it.  */
 661#define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)			\
 662  do									\
 663    {									\
 664      /* Number of ranges in range table. */				\
 665      int count;							\
 666      unsigned char *range_table = CHARSET_RANGE_TABLE (charset);	\
 667									\
 668      EXTRACT_NUMBER_AND_INCR (count, range_table);			\
 669      CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);	\
 670    }									\
 671  while (0)
 672
 673/* If DEBUG is defined, Regex prints many voluminous messages about what
 674   it is doing (if the variable `debug' is nonzero).  If linked with the
 675   main program in `iregex.c', you can enter patterns and strings
 676   interactively.  And if linked with the main program in `main.c' and
 677   the other test files, you can run the already-written tests.	 */
 678
 679#ifdef DEBUG
 680
 681/* We use standard I/O for debugging.  */
 682#include <stdio.h>
 683
 684/* It is useful to test things that ``must'' be true when debugging.  */
 685#include <assert.h>
 686
 687static int debug = 0;
 688
 689#define DEBUG_STATEMENT(e) e
 690#define DEBUG_PRINT1(x) if (debug) printf (x)
 691#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
 692#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
 693#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
 694#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)				\
 695  if (debug) print_partial_compiled_pattern (s, e)
 696#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
 697  if (debug) print_double_string (w, s1, sz1, s2, sz2)
 698
 699
 700/* Print the fastmap in human-readable form.  */
 701
 702void
 703print_fastmap (fastmap)
 704    char *fastmap;
 705{
 706  unsigned was_a_range = 0;
 707  unsigned i = 0;
 708
 709  while (i < (1 << BYTEWIDTH))
 710    {
 711      if (fastmap[i++])
 712	{
 713	  was_a_range = 0;
 714	  putchar (i - 1);
 715	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
 716	    {
 717	      was_a_range = 1;
 718	      i++;
 719	    }
 720	  if (was_a_range)
 721	    {
 722	      printf ("-");
 723	      putchar (i - 1);
 724	    }
 725	}
 726    }
 727  putchar ('\n');
 728}
 729
 730
 731/* Print a compiled pattern string in human-readable form, starting at
 732   the START pointer into it and ending just before the pointer END.  */
 733
 734void
 735print_partial_compiled_pattern (start, end)
 736    unsigned char *start;
 737    unsigned char *end;
 738{
 739  int mcnt, mcnt2;
 740  unsigned char *p = start;
 741  unsigned char *pend = end;
 742
 743  if (start == NULL)
 744    {
 745      printf ("(null)\n");
 746      return;
 747    }
 748
 749  /* Loop over pattern commands.  */
 750  while (p < pend)
 751    {
 752      printf ("%d:\t", p - start);
 753
 754      switch ((re_opcode_t) *p++)
 755	{
 756	case no_op:
 757	  printf ("/no_op");
 758	  break;
 759
 760	case exactn:
 761	  mcnt = *p++;
 762	  printf ("/exactn/%d", mcnt);
 763	  do
 764	    {
 765	      putchar ('/');
 766	      putchar (*p++);
 767	    }
 768	  while (--mcnt);
 769	  break;
 770
 771	case start_memory:
 772	  mcnt = *p++;
 773	  printf ("/start_memory/%d/%d", mcnt, *p++);
 774	  break;
 775
 776	case stop_memory:
 777	  mcnt = *p++;
 778	  printf ("/stop_memory/%d/%d", mcnt, *p++);
 779	  break;
 780
 781	case duplicate:
 782	  printf ("/duplicate/%d", *p++);
 783	  break;
 784
 785	case anychar:
 786	  printf ("/anychar");
 787	  break;
 788
 789	case charset:
 790	case charset_not:
 791	  {
 792	    register int c, last = -100;
 793	    register int in_range = 0;
 794
 795	    printf ("/charset [%s",
 796		    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
 797
 798	    assert (p + *p < pend);
 799
 800	    for (c = 0; c < 256; c++)
 801	      if (c / 8 < *p
 802		  && (p[1 + (c/8)] & (1 << (c % 8))))
 803		{
 804		  /* Are we starting a range?  */
 805		  if (last + 1 == c && ! in_range)
 806		    {
 807		      putchar ('-');
 808		      in_range = 1;
 809		    }
 810		  /* Have we broken a range?  */
 811		  else if (last + 1 != c && in_range)
 812	      {
 813		      putchar (last);
 814		      in_range = 0;
 815		    }
 816
 817		  if (! in_range)
 818		    putchar (c);
 819
 820		  last = c;
 821	      }
 822
 823	    if (in_range)
 824	      putchar (last);
 825
 826	    putchar (']');
 827
 828	    p += 1 + *p;
 829	  }
 830	  break;
 831
 832	case begline:
 833	  printf ("/begline");
 834	  break;
 835
 836	case endline:
 837	  printf ("/endline");
 838	  break;
 839
 840	case on_failure_jump:
 841	  extract_number_and_incr (&mcnt, &p);
 842	  printf ("/on_failure_jump to %d", p + mcnt - start);
 843	  break;
 844
 845	case on_failure_keep_string_jump:
 846	  extract_number_and_incr (&mcnt, &p);
 847	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
 848	  break;
 849
 850	case dummy_failure_jump:
 851	  extract_number_and_incr (&mcnt, &p);
 852	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
 853	  break;
 854
 855	case push_dummy_failure:
 856	  printf ("/push_dummy_failure");
 857	  break;
 858
 859	case maybe_pop_jump:
 860	  extract_number_and_incr (&mcnt, &p);
 861	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
 862	  break;
 863
 864	case pop_failure_jump:
 865	  extract_number_and_incr (&mcnt, &p);
 866	  printf ("/pop_failure_jump to %d", p + mcnt - start);
 867	  break;
 868
 869	case jump_past_alt:
 870	  extract_number_and_incr (&mcnt, &p);
 871	  printf ("/jump_past_alt to %d", p + mcnt - start);
 872	  break;
 873
 874	case jump:
 875	  extract_number_and_incr (&mcnt, &p);
 876	  printf ("/jump to %d", p + mcnt - start);
 877	  break;
 878
 879	case succeed_n:
 880	  extract_number_and_incr (&mcnt, &p);
 881	  extract_number_and_incr (&mcnt2, &p);
 882	  printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
 883	  break;
 884
 885	case jump_n:
 886	  extract_number_and_incr (&mcnt, &p);
 887	  extract_number_and_incr (&mcnt2, &p);
 888	  printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
 889	  break;
 890
 891	case set_number_at:
 892	  extract_number_and_incr (&mcnt, &p);
 893	  extract_number_and_incr (&mcnt2, &p);
 894	  printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
 895	  break;
 896
 897	case wordbound:
 898	  printf ("/wordbound");
 899	  break;
 900
 901	case notwordbound:
 902	  printf ("/notwordbound");
 903	  break;
 904
 905	case wordbeg:
 906	  printf ("/wordbeg");
 907	  break;
 908
 909	case wordend:
 910	  printf ("/wordend");
 911
 912#ifdef emacs
 913	case before_dot:
 914	  printf ("/before_dot");
 915	  break;
 916
 917	case at_dot:
 918	  printf ("/at_dot");
 919	  break;
 920
 921	case after_dot:
 922	  printf ("/after_dot");
 923	  break;
 924
 925	case syntaxspec:
 926	  printf ("/syntaxspec");
 927	  mcnt = *p++;
 928	  printf ("/%d", mcnt);
 929	  break;
 930
 931	case notsyntaxspec:
 932	  printf ("/notsyntaxspec");
 933	  mcnt = *p++;
 934	  printf ("/%d", mcnt);
 935	  break;
 936#endif /* emacs */
 937
 938	case wordchar:
 939	  printf ("/wordchar");
 940	  break;
 941
 942	case notwordchar:
 943	  printf ("/notwordchar");
 944	  break;
 945
 946	case begbuf:
 947	  printf ("/begbuf");
 948	  break;
 949
 950	case endbuf:
 951	  printf ("/endbuf");
 952	  break;
 953
 954	default:
 955	  printf ("?%d", *(p-1));
 956	}
 957
 958      putchar ('\n');
 959    }
 960
 961  printf ("%d:\tend of pattern.\n", p - start);
 962}
 963
 964
 965void
 966print_compiled_pattern (bufp)
 967    struct re_pattern_buffer *bufp;
 968{
 969  unsigned char *buffer = bufp->buffer;
 970
 971  print_partial_compiled_pattern (buffer, buffer + bufp->used);
 972  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
 973
 974  if (bufp->fastmap_accurate && bufp->fastmap)
 975    {
 976      printf ("fastmap: ");
 977      print_fastmap (bufp->fastmap);
 978    }
 979
 980  printf ("re_nsub: %d\t", bufp->re_nsub);
 981  printf ("regs_alloc: %d\t", bufp->regs_allocated);
 982  printf ("can_be_null: %d\t", bufp->can_be_null);
 983  printf ("newline_anchor: %d\n", bufp->newline_anchor);
 984  printf ("no_sub: %d\t", bufp->no_sub);
 985  printf ("not_bol: %d\t", bufp->not_bol);
 986  printf ("not_eol: %d\t", bufp->not_eol);
 987  printf ("syntax: %d\n", bufp->syntax);
 988  /* Perhaps we should print the translate table?  */
 989}
 990
 991
 992void
 993print_double_string (where, string1, size1, string2, size2)
 994    const char *where;
 995    const char *string1;
 996    const char *string2;
 997    int size1;
 998    int size2;
 999{
1000  unsigned this_char;
1001
1002  if (where == NULL)
1003    printf ("(null)");
1004  else
1005    {
1006      if (FIRST_STRING_P (where))
1007	{
1008	  for (this_char = where - string1; this_char < size1; this_char++)
1009	    putchar (string1[this_char]);
1010
1011	  where = string2;
1012	}
1013
1014      for (this_char = where - string2; this_char < size2; this_char++)
1015	putchar (string2[this_char]);
1016    }
1017}
1018
1019#else /* not DEBUG */
1020
1021#undef assert
1022#define assert(e)
1023
1024#define DEBUG_STATEMENT(e)
1025#define DEBUG_PRINT1(x)
1026#define DEBUG_PRINT2(x1, x2)
1027#define DEBUG_PRINT3(x1, x2, x3)
1028#define DEBUG_PRINT4(x1, x2, x3, x4)
1029#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1030#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1031
1032#endif /* not DEBUG */
1033
1034/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1035   also be assigned to arbitrarily: each pattern buffer stores its own
1036   syntax, so it can be changed between regex compilations.  */
1037/* This has no initializer because initialized variables in Emacs
1038   become read-only after dumping.  */
1039reg_syntax_t re_syntax_options;
1040
1041
1042/* Specify the precise syntax of regexps for compilation.  This provides
1043   for compatibility for various utilities which historically have
1044   different, incompatible syntaxes.
1045
1046   The argument SYNTAX is a bit mask comprised of the various bits
1047   defined in regex.h.	We return the old syntax.  */
1048
1049reg_syntax_t
1050re_set_syntax (syntax)
1051    reg_syntax_t syntax;
1052{
1053  reg_syntax_t ret = re_syntax_options;
1054
1055  re_syntax_options = syntax;
1056  return ret;
1057}
1058
1059/* This table gives an error message for each of the error codes listed
1060   in regex.h.	Obviously the order here has to be same as there.
1061   POSIX doesn't require that we do anything for REG_NOERROR,
1062   but why not be nice?	 */
1063
1064static const char *re_error_msgid[] =
1065  {
1066    gettext_noop ("Success"),	/* REG_NOERROR */
1067    gettext_noop ("No match"),	/* REG_NOMATCH */
1068    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1069    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1070    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1071    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1072    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1073    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
1074    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1075    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1076    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1077    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
1078    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1079    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1080    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1081    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1082    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1083  };
1084
1085/* Avoiding alloca during matching, to placate r_alloc.	 */
1086
1087/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1088   searching and matching functions should not call alloca.  On some
1089   systems, alloca is implemented in terms of malloc, and if we're
1090   using the relocating allocator routines, then malloc could cause a
1091   relocation, which might (if the strings being searched are in the
1092   ralloc heap) shift the data out from underneath the regexp
1093   routines.
1094
1095   Here's another reason to avoid allocation: Emacs
1096   processes input from X in a signal handler; processing X input may
1097   call malloc; if input arrives while a matching routine is calling
1098   malloc, then we're scrod.  But Emacs can't just block input while
1099   calling matching routines; then we don't notice interrupts when
1100   they come in.  So, Emacs blocks input around all regexp calls
1101   except the matching calls, which it leaves unprotected, in the
1102   faith that they will not malloc.  */
1103
1104/* Normally, this is fine.  */
1105#define MATCH_MAY_ALLOCATE
1106
1107/* When using GNU C, we are not REALLY using the C alloca, no matter
1108   what config.h may say.  So don't take precautions for it.  */
1109#ifdef __GNUC__
1110#undef C_ALLOCA
1111#endif
1112
1113/* The match routines may not allocate if (1) they would do it with malloc
1114   and (2) it's not safe for them to use malloc.
1115   Note that if REL_ALLOC is defined, matching would not use malloc for the
1116   failure stack, but we would still use it for the register vectors;
1117   so REL_ALLOC should not affect this.	 */
1118#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1119#undef MATCH_MAY_ALLOCATE
1120#endif
1121
1122
1123/* Failure stack declarations and macros; both re_compile_fastmap and
1124   re_match_2 use a failure stack.  These have to be macros because of
1125   REGEX_ALLOCATE_STACK.  */
1126
1127
1128/* Approximate number of failure points for which to initially allocate space
1129   when matching.  If this number is exceeded, we allocate more
1130   space, so it is not a hard limit.  */
1131#ifndef INIT_FAILURE_ALLOC
1132#define INIT_FAILURE_ALLOC 20
1133#endif
1134
1135/* Roughly the maximum number of failure points on the stack.  Would be
1136   exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1137   This is a variable only so users of regex can assign to it; we never
1138   change it ourselves.	 */
1139#if defined (MATCH_MAY_ALLOCATE)
1140/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
1141   whose default stack limit is 2mb.  In order for a larger
1142   value to work reliably, you have to try to make it accord
1143   with the process stack limit.  */
1144int re_max_failures = 40000;
1145#else
1146int re_max_failures = 4000;
1147#endif
1148
1149union fail_stack_elt
1150{
1151  unsigned char *pointer;
1152  int integer;
1153};
1154
1155typedef union fail_stack_elt fail_stack_elt_t;
1156
1157typedef struct
1158{
1159  fail_stack_elt_t *stack;
1160  unsigned size;
1161  unsigned avail;			/* Offset of next open position.  */
1162} fail_stack_type;
1163
1164#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1165#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1166#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1167
1168
1169/* Define macros to initialize and free the failure stack.
1170   Do `return -2' if the alloc fails.  */
1171
1172#ifdef MATCH_MAY_ALLOCATE
1173#define INIT_FAIL_STACK()						\
1174  do {									\
1175    fail_stack.stack = (fail_stack_elt_t *)				\
1176      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE	\
1177			    * sizeof (fail_stack_elt_t));		\
1178									\
1179    if (fail_stack.stack == NULL)					\
1180      return -2;							\
1181									\
1182    fail_stack.size = INIT_FAILURE_ALLOC;				\
1183    fail_stack.avail = 0;						\
1184  } while (0)
1185
1186#define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1187#else
1188#define INIT_FAIL_STACK()						\
1189  do {									\
1190    fail_stack.avail = 0;						\
1191  } while (0)
1192
1193#define RESET_FAIL_STACK()
1194#endif
1195
1196
1197/* Double the size of FAIL_STACK, up to a limit
1198   which allows approximately `re_max_failures' items.
1199
1200   Return 1 if succeeds, and 0 if either ran out of memory
1201   allocating space for it or it was already too large.
1202
1203   REGEX_REALLOCATE_STACK requires `destination' be declared.	*/
1204
1205/* Factor to increase the failure stack size by
1206   when we increase it.
1207   This used to be 2, but 2 was too wasteful
1208   because the old discarded stacks added up to as much space
1209   were as ultimate, maximum-size stack.  */
1210#define FAIL_STACK_GROWTH_FACTOR 4
1211
1212#define GROW_FAIL_STACK(fail_stack)					\
1213  (((fail_stack).size * sizeof (fail_stack_elt_t)			\
1214    >= re_max_failures * TYPICAL_FAILURE_SIZE)				\
1215   ? 0									\
1216   : ((fail_stack).stack						\
1217      = (fail_stack_elt_t *)						\
1218	REGEX_REALLOCATE_STACK ((fail_stack).stack,			\
1219	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
1220	  MIN (re_max_failures * TYPICAL_FAILURE_SIZE,			\
1221	       ((fail_stack).size * sizeof (fail_stack_elt_t)		\
1222		* FAIL_STACK_GROWTH_FACTOR))),				\
1223									\
1224      (fail_stack).stack == NULL					\
1225      ? 0								\
1226      : ((fail_stack).size						\
1227	 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,		\
1228		 ((fail_stack).size * sizeof (fail_stack_elt_t)		\
1229		  * FAIL_STACK_GROWTH_FACTOR))				\
1230	    / sizeof (fail_stack_elt_t)),				\
1231	 1)))
1232
1233
1234/* Push pointer POINTER on FAIL_STACK.
1235   Return 1 if was able to do so and 0 if ran out of memory allocating
1236   space to do so.  */
1237#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1238  ((FAIL_STACK_FULL ()							\
1239    && !GROW_FAIL_STACK (FAIL_STACK))					\
1240   ? 0									\
1241   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1242      1))
1243
1244/* Push a pointer value onto the failure stack.
1245   Assumes the variable `fail_stack'.  Probably should only
1246   be called from within `PUSH_FAILURE_POINT'.	*/
1247#define PUSH_FAILURE_POINTER(item)					\
1248  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1249
1250/* This pushes an integer-valued item onto the failure stack.
1251   Assumes the variable `fail_stack'.  Probably should only
1252   be called from within `PUSH_FAILURE_POINT'.	*/
1253#define PUSH_FAILURE_INT(item)					\
1254  fail_stack.stack[fail_stack.avail++].integer = (item)
1255
1256/* Push a fail_stack_elt_t value onto the failure stack.
1257   Assumes the variable `fail_stack'.  Probably should only
1258   be called from within `PUSH_FAILURE_POINT'.	*/
1259#define PUSH_FAILURE_ELT(item)					\
1260  fail_stack.stack[fail_stack.avail++] =  (item)
1261
1262/* These three POP... operations complement the three PUSH... operations.
1263   All assume that `fail_stack' is nonempty.  */
1264#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1265#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1266#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1267
1268/* Used to omit pushing failure point id's when we're not debugging.  */
1269#ifdef DEBUG
1270#define DEBUG_PUSH PUSH_FAILURE_INT
1271#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1272#else
1273#define DEBUG_PUSH(item)
1274#define DEBUG_POP(item_addr)
1275#endif
1276
1277
1278/* Push the information about the state we will need
1279   if we ever fail back to it.
1280
1281   Requires variables fail_stack, regstart, regend, reg_info, and
1282   num_regs be declared.  GROW_FAIL_STACK requires `destination' be
1283   declared.
1284
1285   Does `return FAILURE_CODE' if runs out of memory.  */
1286
1287#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1288  do {									\
1289    char *destination;							\
1290    /* Must be int, so when we don't save any registers, the arithmetic	\
1291       of 0 + -1 isn't done as unsigned.  */				\
1292    int this_reg;							\
1293									\
1294    DEBUG_STATEMENT (failure_id++);					\
1295    DEBUG_STATEMENT (nfailure_points_pushed++);				\
1296    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1297    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1298    DEBUG_PRINT2 ("			size: %d\n", (fail_stack).size);\
1299									\
1300    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
1301    DEBUG_PRINT2 ("	available: %d\n", REMAINING_AVAIL_SLOTS);	\
1302									\
1303    /* Ensure we have enough space allocated for what we will push.  */	\
1304    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1305      {									\
1306	if (!GROW_FAIL_STACK (fail_stack))				\
1307	  return failure_code;						\
1308									\
1309	DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1310		       (fail_stack).size);				\
1311	DEBUG_PRINT2 ("	 slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1312      }									\
1313									\
1314    /* Push the info, starting with the registers.  */			\
1315    DEBUG_PRINT1 ("\n");						\
1316									\
1317    if (1)								\
1318      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1319	   this_reg++)							\
1320	{								\
1321	  DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);		\
1322	  DEBUG_STATEMENT (num_regs_pushed++);				\
1323									\
1324	  DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);	\
1325	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1326									\
1327	  DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
1328	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1329									\
1330	  DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
1331	  DEBUG_PRINT2 (" match_null=%d",				\
1332			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1333	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1334	  DEBUG_PRINT2 (" matched_something=%d",			\
1335			MATCHED_SOMETHING (reg_info[this_reg]));	\
1336	  DEBUG_PRINT2 (" ever_matched=%d",				\
1337			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1338	  DEBUG_PRINT1 ("\n");						\
1339	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1340	}								\
1341									\
1342    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
1343    PUSH_FAILURE_INT (lowest_active_reg);				\
1344									\
1345    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
1346    PUSH_FAILURE_INT (highest_active_reg);				\
1347									\
1348    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
1349    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1350    PUSH_FAILURE_POINTER (pattern_place);				\
1351									\
1352    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
1353    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,	\
1354				 size2);				\
1355    DEBUG_PRINT1 ("'\n");						\
1356    PUSH_FAILURE_POINTER (string_place);				\
1357									\
1358    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1359    DEBUG_PUSH (failure_id);						\
1360  } while (0)
1361
1362/* This is the number of items that are pushed and popped on the stack
1363   for each register.  */
1364#define NUM_REG_ITEMS  3
1365
1366/* Individual items aside from the registers.  */
1367#ifdef DEBUG
1368#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1369#else
1370#define NUM_NONREG_ITEMS 4
1371#endif
1372
1373/* Estimate the size of data pushed by a typical failure stack entry.
1374   An estimate is all we need, because all we use this for
1375   is to choose a limit for how big to make the failure stack.  */
1376
1377#define TYPICAL_FAILURE_SIZE 20
1378
1379/* This is how many items we actually use for a failure point.
1380   It depends on the regexp.  */
1381#define NUM_FAILURE_ITEMS				\
1382  (((0							\
1383     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1384    * NUM_REG_ITEMS)					\
1385   + NUM_NONREG_ITEMS)
1386
1387/* How many items can still be added to the stack without overflowing it.  */
1388#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1389
1390
1391/* Pops what PUSH_FAIL_STACK pushes.
1392
1393   We restore into the parameters, all of which should be lvalues:
1394     STR -- the saved data position.
1395     PAT -- the saved pattern position.
1396     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1397     REGSTART, REGEND -- arrays of string positions.
1398     REG_INFO -- array of information about each subexpression.
1399
1400   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1401   `pend', `string1', `size1', `string2', and `size2'.	*/
1402
1403#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1404{									\
1405  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
1406  int this_reg;								\
1407  const unsigned char *string_temp;					\
1408									\
1409  assert (!FAIL_STACK_EMPTY ());					\
1410									\
1411  /* Remove failure points and point to how many regs pushed.  */	\
1412  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1413  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1414  DEBUG_PRINT2 ("		     size: %d\n", fail_stack.size);	\
1415									\
1416  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1417									\
1418  DEBUG_POP (&failure_id);						\
1419  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1420									\
1421  /* If the saved string location is NULL, it came from an		\
1422     on_failure_keep_string_jump opcode, and we want to throw away the	\
1423     saved NULL, thus retaining our current position in the string.  */	\
1424  string_temp = POP_FAILURE_POINTER ();					\
1425  if (string_temp != NULL)						\
1426    str = (const char *) string_temp;					\
1427									\
1428  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
1429  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1430  DEBUG_PRINT1 ("'\n");							\
1431									\
1432  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1433  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
1434  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1435									\
1436  /* Restore register info.  */						\
1437  high_reg = (unsigned) POP_FAILURE_INT ();				\
1438  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
1439									\
1440  low_reg = (unsigned) POP_FAILURE_INT ();				\
1441  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
1442									\
1443  if (1)								\
1444    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1445      {									\
1446	DEBUG_PRINT2 ("	   Popping reg: %d\n", this_reg);		\
1447									\
1448	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1449	DEBUG_PRINT2 ("	     info: 0x%x\n", reg_info[this_reg]);	\
1450									\
1451	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1452	DEBUG_PRINT2 ("	     end: 0x%x\n", regend[this_reg]);		\
1453									\
1454	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1455	DEBUG_PRINT2 ("	     start: 0x%x\n", regstart[this_reg]);	\
1456      }									\
1457  else									\
1458    {									\
1459      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1460	{								\
1461	  reg_info[this_reg].word.integer = 0;				\
1462	  regend[this_reg] = 0;						\
1463	  regstart[this_reg] = 0;					\
1464	}								\
1465      highest_active_reg = high_reg;					\
1466    }									\
1467									\
1468  set_regs_matched_done = 0;						\
1469  DEBUG_STATEMENT (nfailure_points_popped++);				\
1470} /* POP_FAILURE_POINT */
1471
1472
1473
1474/* Structure for per-register (a.k.a. per-group) information.
1475   Other register information, such as the
1476   starting and ending positions (which are addresses), and the list of
1477   inner groups (which is a bits list) are maintained in separate
1478   variables.
1479
1480   We are making a (strictly speaking) nonportable assumption here: that
1481   the compiler will pack our bit fields into something that fits into
1482   the type of `word', i.e., is something that fits into one item on the
1483   failure stack.  */
1484
1485typedef union
1486{
1487  fail_stack_elt_t word;
1488  struct
1489  {
1490      /* This field is one if this group can match the empty string,
1491	 zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1492#define MATCH_NULL_UNSET_VALUE 3
1493    unsigned match_null_string_p : 2;
1494    unsigned is_active : 1;
1495    unsigned matched_something : 1;
1496    unsigned ever_matched_something : 1;
1497  } bits;
1498} register_info_type;
1499
1500#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1501#define IS_ACTIVE(R)  ((R).bits.is_active)
1502#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1503#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1504
1505
1506/* Call this when have matched a real character; it sets `matched' flags
1507   for the subexpressions which we are currently inside.  Also records
1508   that those subexprs have matched.  */
1509#define SET_REGS_MATCHED()						\
1510  do									\
1511    {									\
1512      if (!set_regs_matched_done)					\
1513	{								\
1514	  unsigned r;							\
1515	  set_regs_matched_done = 1;					\
1516	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1517	    {								\
1518	      MATCHED_SOMETHING (reg_info[r])				\
1519		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1520		= 1;							\
1521	    }								\
1522	}								\
1523    }									\
1524  while (0)
1525
1526/* Registers are set to a sentinel when they haven't yet matched.  */
1527static char reg_unset_dummy;
1528#define REG_UNSET_VALUE (&reg_unset_dummy)
1529#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1530
1531/* Subroutine declarations and macros for regex_compile.  */
1532
1533static void store_op1 (), store_op2 ();
1534static void insert_op1 (), insert_op2 ();
1535static boolean at_begline_loc_p (), at_endline_loc_p ();
1536static boolean group_in_compile_stack ();
1537
1538/* Fetch the next character in the uncompiled pattern---translating it
1539   if necessary.  Also cast from a signed character in the constant
1540   string passed to us by the user to an unsigned char that we can use
1541   as an array index (in, e.g., `translate').  */
1542#ifndef PATFETCH
1543#define PATFETCH(c)							\
1544  do {if (p == pend) return REG_EEND;					\
1545    c = (unsigned char) *p++;						\
1546    if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c);	\
1547  } while (0)
1548#endif
1549
1550/* Fetch the next character in the uncompiled pattern, with no
1551   translation.	 */
1552#define PATFETCH_RAW(c)							\
1553  do {if (p == pend) return REG_EEND;					\
1554    c = (unsigned char) *p++;						\
1555  } while (0)
1556
1557/* Go backwards one character in the pattern.  */
1558#define PATUNFETCH p--
1559
1560
1561/* If `translate' is non-null, return translate[D], else just D.  We
1562   cast the subscript to translate because some data is declared as
1563   `char *', to avoid warnings when a string constant is passed.  But
1564   when we use a character as a subscript we must make it unsigned.  */
1565#ifndef TRANSLATE
1566#define TRANSLATE(d) \
1567  (RE_TRANSLATE_P (translate) \
1568   ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
1569#endif
1570
1571
1572/* Macros for outputting the compiled pattern into `buffer'.  */
1573
1574/* If the buffer isn't allocated when it comes in, use this.  */
1575#define INIT_BUF_SIZE  32
1576
1577/* Make sure we have at least N more bytes of space in buffer.	*/
1578#define GET_BUFFER_SPACE(n)						\
1579    while (b - bufp->buffer + (n) > bufp->allocated)			\
1580      EXTEND_BUFFER ()
1581
1582/* Make sure we have one more byte of buffer space and then add C to it.  */
1583#define BUF_PUSH(c)							\
1584  do {									\
1585    GET_BUFFER_SPACE (1);						\
1586    *b++ = (unsigned char) (c);						\
1587  } while (0)
1588
1589
1590/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1591#define BUF_PUSH_2(c1, c2)						\
1592  do {									\
1593    GET_BUFFER_SPACE (2);						\
1594    *b++ = (unsigned char) (c1);					\
1595    *b++ = (unsigned char) (c2);					\
1596  } while (0)
1597
1598
1599/* As with BUF_PUSH_2, except for three bytes.	*/
1600#define BUF_PUSH_3(c1, c2, c3)						\
1601  do {									\
1602    GET_BUFFER_SPACE (3);						\
1603    *b++ = (unsigned char) (c1);					\
1604    *b++ = (unsigned char) (c2);					\
1605    *b++ = (unsigned char) (c3);					\
1606  } while (0)
1607
1608
1609/* Store a jump with opcode OP at LOC to location TO.  We store a
1610   relative address offset by the three bytes the jump itself occupies.	 */
1611#define STORE_JUMP(op, loc, to) \
1612  store_op1 (op, loc, (to) - (loc) - 3)
1613
1614/* Likewise, for a two-argument jump.  */
1615#define STORE_JUMP2(op, loc, to, arg) \
1616  store_op2 (op, loc, (to) - (loc) - 3, arg)
1617
1618/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.	 */
1619#define INSERT_JUMP(op, loc, to) \
1620  insert_op1 (op, loc, (to) - (loc) - 3, b)
1621
1622/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1623#define INSERT_JUMP2(op, loc, to, arg) \
1624  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1625
1626
1627/* This is not an arbitrary limit: the arguments which represent offsets
1628   into the pattern are two bytes long.	 So if 2^16 bytes turns out to
1629   be too small, many things would have to change.  */
1630#define MAX_BUF_SIZE (1L << 16)
1631
1632
1633/* E…

Large files files are truncated, but you can click here to view the full file