PageRenderTime 126ms CodeModel.GetById 45ms app.highlight 68ms RepoModel.GetById 1ms app.codeStats 0ms

/vendor/pcre/pcre_dfa_exec.c

http://github.com/feyeleanor/RubyGoLightly
C | 2920 lines | 2135 code | 374 blank | 411 comment | 717 complexity | 5a75071cbd89cdc47950d9729812e6fe MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*************************************************
   2*      Perl-Compatible Regular Expressions       *
   3*************************************************/
   4
   5/* PCRE is a library of functions to support regular expressions whose syntax
   6and semantics are as close as possible to those of the Perl 5 language.
   7
   8                       Written by Philip Hazel
   9           Copyright (c) 1997-2008 University of Cambridge
  10
  11-----------------------------------------------------------------------------
  12Redistribution and use in source and binary forms, with or without
  13modification, are permitted provided that the following conditions are met:
  14
  15    * Redistributions of source code must retain the above copyright notice,
  16      this list of conditions and the following disclaimer.
  17
  18    * Redistributions in binary form must reproduce the above copyright
  19      notice, this list of conditions and the following disclaimer in the
  20      documentation and/or other materials provided with the distribution.
  21
  22    * Neither the name of the University of Cambridge nor the names of its
  23      contributors may be used to endorse or promote products derived from
  24      this software without specific prior written permission.
  25
  26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36POSSIBILITY OF SUCH DAMAGE.
  37-----------------------------------------------------------------------------
  38*/
  39
  40
  41/* This module contains the external function pcre_dfa_exec(), which is an
  42alternative matching function that uses a sort of DFA algorithm (not a true
  43FSM). This is NOT Perl- compatible, but it has advantages in certain
  44applications. */
  45
  46
  47#ifdef HAVE_CONFIG_H
  48#include "config.h"
  49#endif
  50
  51#define NLBLOCK md             /* Block containing newline information */
  52#define PSSTART start_subject  /* Field containing processed string start */
  53#define PSEND   end_subject    /* Field containing processed string end */
  54
  55#include "pcre_internal.h"
  56
  57
  58/* For use to indent debugging output */
  59
  60#define SP "                   "
  61
  62
  63
  64/*************************************************
  65*      Code parameters and static tables         *
  66*************************************************/
  67
  68/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
  69into others, under special conditions. A gap of 20 between the blocks should be
  70enough. The resulting opcodes don't have to be less than 256 because they are
  71never stored, so we push them well clear of the normal opcodes. */
  72
  73#define OP_PROP_EXTRA       300
  74#define OP_EXTUNI_EXTRA     320
  75#define OP_ANYNL_EXTRA      340
  76#define OP_HSPACE_EXTRA     360
  77#define OP_VSPACE_EXTRA     380
  78
  79
  80/* This table identifies those opcodes that are followed immediately by a
  81character that is to be tested in some way. This makes is possible to
  82centralize the loading of these characters. In the case of Type * etc, the
  83"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
  84small value. ***NOTE*** If the start of this table is modified, the two tables
  85that follow must also be modified. */
  86
  87static const uschar coptable[] = {
  88  0,                             /* End                                    */
  89  0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
  90  0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
  91  0, 0, 0,                       /* Any, AllAny, Anybyte                   */
  92  0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
  93  0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
  94  0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
  95  1,                             /* Char                                   */
  96  1,                             /* Charnc                                 */
  97  1,                             /* not                                    */
  98  /* Positive single-char repeats                                          */
  99  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
 100  3, 3, 3,                       /* upto, minupto, exact                   */
 101  1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
 102  /* Negative single-char repeats - only for chars < 256                   */
 103  1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
 104  3, 3, 3,                       /* NOT upto, minupto, exact               */
 105  1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
 106  /* Positive type repeats                                                 */
 107  1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
 108  3, 3, 3,                       /* Type upto, minupto, exact              */
 109  1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
 110  /* Character class & ref repeats                                         */
 111  0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
 112  0, 0,                          /* CRRANGE, CRMINRANGE                    */
 113  0,                             /* CLASS                                  */
 114  0,                             /* NCLASS                                 */
 115  0,                             /* XCLASS - variable length               */
 116  0,                             /* REF                                    */
 117  0,                             /* RECURSE                                */
 118  0,                             /* CALLOUT                                */
 119  0,                             /* Alt                                    */
 120  0,                             /* Ket                                    */
 121  0,                             /* KetRmax                                */
 122  0,                             /* KetRmin                                */
 123  0,                             /* Assert                                 */
 124  0,                             /* Assert not                             */
 125  0,                             /* Assert behind                          */
 126  0,                             /* Assert behind not                      */
 127  0,                             /* Reverse                                */
 128  0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
 129  0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
 130  0,                             /* CREF                                   */
 131  0,                             /* RREF                                   */
 132  0,                             /* DEF                                    */
 133  0, 0,                          /* BRAZERO, BRAMINZERO                    */
 134  0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
 135  0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
 136};
 137
 138/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
 139and \w */
 140
 141static const uschar toptable1[] = {
 142  0, 0, 0, 0, 0, 0,
 143  ctype_digit, ctype_digit,
 144  ctype_space, ctype_space,
 145  ctype_word,  ctype_word,
 146  0, 0                            /* OP_ANY, OP_ALLANY */
 147};
 148
 149static const uschar toptable2[] = {
 150  0, 0, 0, 0, 0, 0,
 151  ctype_digit, 0,
 152  ctype_space, 0,
 153  ctype_word,  0,
 154  1, 1                            /* OP_ANY, OP_ALLANY */
 155};
 156
 157
 158/* Structure for holding data about a particular state, which is in effect the
 159current data for an active path through the match tree. It must consist
 160entirely of ints because the working vector we are passed, and which we put
 161these structures in, is a vector of ints. */
 162
 163typedef struct stateblock {
 164  int offset;                     /* Offset to opcode */
 165  int count;                      /* Count for repeats */
 166  int ims;                        /* ims flag bits */
 167  int data;                       /* Some use extra data */
 168} stateblock;
 169
 170#define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))
 171
 172
 173#ifdef DEBUG
 174/*************************************************
 175*             Print character string             *
 176*************************************************/
 177
 178/* Character string printing function for debugging.
 179
 180Arguments:
 181  p            points to string
 182  length       number of bytes
 183  f            where to print
 184
 185Returns:       nothing
 186*/
 187
 188static void
 189pchars(unsigned char *p, int length, FILE *f)
 190{
 191int c;
 192while (length-- > 0)
 193  {
 194  if (isprint(c = *(p++)))
 195    fprintf(f, "%c", c);
 196  else
 197    fprintf(f, "\\x%02x", c);
 198  }
 199}
 200#endif
 201
 202
 203
 204/*************************************************
 205*    Execute a Regular Expression - DFA engine   *
 206*************************************************/
 207
 208/* This internal function applies a compiled pattern to a subject string,
 209starting at a given point, using a DFA engine. This function is called from the
 210external one, possibly multiple times if the pattern is not anchored. The
 211function calls itself recursively for some kinds of subpattern.
 212
 213Arguments:
 214  md                the match_data block with fixed information
 215  this_start_code   the opening bracket of this subexpression's code
 216  current_subject   where we currently are in the subject string
 217  start_offset      start offset in the subject string
 218  offsets           vector to contain the matching string offsets
 219  offsetcount       size of same
 220  workspace         vector of workspace
 221  wscount           size of same
 222  ims               the current ims flags
 223  rlevel            function call recursion level
 224  recursing         regex recursive call level
 225
 226Returns:            > 0 => number of match offset pairs placed in offsets
 227                    = 0 => offsets overflowed; longest matches are present
 228                     -1 => failed to match
 229                   < -1 => some kind of unexpected problem
 230
 231The following macros are used for adding states to the two state vectors (one
 232for the current character, one for the following character). */
 233
 234#define ADD_ACTIVE(x,y) \
 235  if (active_count++ < wscount) \
 236    { \
 237    next_active_state->offset = (x); \
 238    next_active_state->count  = (y); \
 239    next_active_state->ims    = ims; \
 240    next_active_state++; \
 241    DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
 242    } \
 243  else return PCRE_ERROR_DFA_WSSIZE
 244
 245#define ADD_ACTIVE_DATA(x,y,z) \
 246  if (active_count++ < wscount) \
 247    { \
 248    next_active_state->offset = (x); \
 249    next_active_state->count  = (y); \
 250    next_active_state->ims    = ims; \
 251    next_active_state->data   = (z); \
 252    next_active_state++; \
 253    DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
 254    } \
 255  else return PCRE_ERROR_DFA_WSSIZE
 256
 257#define ADD_NEW(x,y) \
 258  if (new_count++ < wscount) \
 259    { \
 260    next_new_state->offset = (x); \
 261    next_new_state->count  = (y); \
 262    next_new_state->ims    = ims; \
 263    next_new_state++; \
 264    DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
 265    } \
 266  else return PCRE_ERROR_DFA_WSSIZE
 267
 268#define ADD_NEW_DATA(x,y,z) \
 269  if (new_count++ < wscount) \
 270    { \
 271    next_new_state->offset = (x); \
 272    next_new_state->count  = (y); \
 273    next_new_state->ims    = ims; \
 274    next_new_state->data   = (z); \
 275    next_new_state++; \
 276    DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
 277    } \
 278  else return PCRE_ERROR_DFA_WSSIZE
 279
 280/* And now, here is the code */
 281
 282static int
 283internal_dfa_exec(
 284  dfa_match_data *md,
 285  const uschar *this_start_code,
 286  const uschar *current_subject,
 287  int start_offset,
 288  int *offsets,
 289  int offsetcount,
 290  int *workspace,
 291  int wscount,
 292  int ims,
 293  int  rlevel,
 294  int  recursing)
 295{
 296stateblock *active_states, *new_states, *temp_states;
 297stateblock *next_active_state, *next_new_state;
 298
 299const uschar *ctypes, *lcc, *fcc;
 300const uschar *ptr;
 301const uschar *end_code, *first_op;
 302
 303int active_count, new_count, match_count;
 304
 305/* Some fields in the md block are frequently referenced, so we load them into
 306independent variables in the hope that this will perform better. */
 307
 308const uschar *start_subject = md->start_subject;
 309const uschar *end_subject = md->end_subject;
 310const uschar *start_code = md->start_code;
 311
 312#ifdef SUPPORT_UTF8
 313BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
 314#else
 315BOOL utf8 = FALSE;
 316#endif
 317
 318rlevel++;
 319offsetcount &= (-2);
 320
 321wscount -= 2;
 322wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
 323          (2 * INTS_PER_STATEBLOCK);
 324
 325DPRINTF(("\n%.*s---------------------\n"
 326  "%.*sCall to internal_dfa_exec f=%d r=%d\n",
 327  rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));
 328
 329ctypes = md->tables + ctypes_offset;
 330lcc = md->tables + lcc_offset;
 331fcc = md->tables + fcc_offset;
 332
 333match_count = PCRE_ERROR_NOMATCH;   /* A negative number */
 334
 335active_states = (stateblock *)(workspace + 2);
 336next_new_state = new_states = active_states + wscount;
 337new_count = 0;
 338
 339first_op = this_start_code + 1 + LINK_SIZE +
 340  ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
 341
 342/* The first thing in any (sub) pattern is a bracket of some sort. Push all
 343the alternative states onto the list, and find out where the end is. This
 344makes is possible to use this function recursively, when we want to stop at a
 345matching internal ket rather than at the end.
 346
 347If the first opcode in the first alternative is OP_REVERSE, we are dealing with
 348a backward assertion. In that case, we have to find out the maximum amount to
 349move back, and set up each alternative appropriately. */
 350
 351if (*first_op == OP_REVERSE)
 352  {
 353  int max_back = 0;
 354  int gone_back;
 355
 356  end_code = this_start_code;
 357  do
 358    {
 359    int back = GET(end_code, 2+LINK_SIZE);
 360    if (back > max_back) max_back = back;
 361    end_code += GET(end_code, 1);
 362    }
 363  while (*end_code == OP_ALT);
 364
 365  /* If we can't go back the amount required for the longest lookbehind
 366  pattern, go back as far as we can; some alternatives may still be viable. */
 367
 368#ifdef SUPPORT_UTF8
 369  /* In character mode we have to step back character by character */
 370
 371  if (utf8)
 372    {
 373    for (gone_back = 0; gone_back < max_back; gone_back++)
 374      {
 375      if (current_subject <= start_subject) break;
 376      current_subject--;
 377      while (current_subject > start_subject &&
 378             (*current_subject & 0xc0) == 0x80)
 379        current_subject--;
 380      }
 381    }
 382  else
 383#endif
 384
 385  /* In byte-mode we can do this quickly. */
 386
 387    {
 388    gone_back = (current_subject - max_back < start_subject)?
 389      current_subject - start_subject : max_back;
 390    current_subject -= gone_back;
 391    }
 392
 393  /* Now we can process the individual branches. */
 394
 395  end_code = this_start_code;
 396  do
 397    {
 398    int back = GET(end_code, 2+LINK_SIZE);
 399    if (back <= gone_back)
 400      {
 401      int bstate = end_code - start_code + 2 + 2*LINK_SIZE;
 402      ADD_NEW_DATA(-bstate, 0, gone_back - back);
 403      }
 404    end_code += GET(end_code, 1);
 405    }
 406  while (*end_code == OP_ALT);
 407 }
 408
 409/* This is the code for a "normal" subpattern (not a backward assertion). The
 410start of a whole pattern is always one of these. If we are at the top level,
 411we may be asked to restart matching from the same point that we reached for a
 412previous partial match. We still have to scan through the top-level branches to
 413find the end state. */
 414
 415else
 416  {
 417  end_code = this_start_code;
 418
 419  /* Restarting */
 420
 421  if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
 422    {
 423    do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
 424    new_count = workspace[1];
 425    if (!workspace[0])
 426      memcpy(new_states, active_states, new_count * sizeof(stateblock));
 427    }
 428
 429  /* Not restarting */
 430
 431  else
 432    {
 433    int length = 1 + LINK_SIZE +
 434      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
 435    do
 436      {
 437      ADD_NEW(end_code - start_code + length, 0);
 438      end_code += GET(end_code, 1);
 439      length = 1 + LINK_SIZE;
 440      }
 441    while (*end_code == OP_ALT);
 442    }
 443  }
 444
 445workspace[0] = 0;    /* Bit indicating which vector is current */
 446
 447DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));
 448
 449/* Loop for scanning the subject */
 450
 451ptr = current_subject;
 452for (;;)
 453  {
 454  int i, j;
 455  int clen, dlen;
 456  unsigned int c, d;
 457
 458  /* Make the new state list into the active state list and empty the
 459  new state list. */
 460
 461  temp_states = active_states;
 462  active_states = new_states;
 463  new_states = temp_states;
 464  active_count = new_count;
 465  new_count = 0;
 466
 467  workspace[0] ^= 1;              /* Remember for the restarting feature */
 468  workspace[1] = active_count;
 469
 470#ifdef DEBUG
 471  printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
 472  pchars((uschar *)ptr, strlen((char *)ptr), stdout);
 473  printf("\"\n");
 474
 475  printf("%.*sActive states: ", rlevel*2-2, SP);
 476  for (i = 0; i < active_count; i++)
 477    printf("%d/%d ", active_states[i].offset, active_states[i].count);
 478  printf("\n");
 479#endif
 480
 481  /* Set the pointers for adding new states */
 482
 483  next_active_state = active_states + active_count;
 484  next_new_state = new_states;
 485
 486  /* Load the current character from the subject outside the loop, as many
 487  different states may want to look at it, and we assume that at least one
 488  will. */
 489
 490  if (ptr < end_subject)
 491    {
 492    clen = 1;        /* Number of bytes in the character */
 493#ifdef SUPPORT_UTF8
 494    if (utf8) { GETCHARLEN(c, ptr, clen); } else
 495#endif  /* SUPPORT_UTF8 */
 496    c = *ptr;
 497    }
 498  else
 499    {
 500    clen = 0;        /* This indicates the end of the subject */
 501    c = NOTACHAR;    /* This value should never actually be used */
 502    }
 503
 504  /* Scan up the active states and act on each one. The result of an action
 505  may be to add more states to the currently active list (e.g. on hitting a
 506  parenthesis) or it may be to put states on the new list, for considering
 507  when we move the character pointer on. */
 508
 509  for (i = 0; i < active_count; i++)
 510    {
 511    stateblock *current_state = active_states + i;
 512    const uschar *code;
 513    int state_offset = current_state->offset;
 514    int count, codevalue;
 515
 516#ifdef DEBUG
 517    printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
 518    if (clen == 0) printf("EOL\n");
 519      else if (c > 32 && c < 127) printf("'%c'\n", c);
 520        else printf("0x%02x\n", c);
 521#endif
 522
 523    /* This variable is referred to implicity in the ADD_xxx macros. */
 524
 525    ims = current_state->ims;
 526
 527    /* A negative offset is a special case meaning "hold off going to this
 528    (negated) state until the number of characters in the data field have
 529    been skipped". */
 530
 531    if (state_offset < 0)
 532      {
 533      if (current_state->data > 0)
 534        {
 535        DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
 536        ADD_NEW_DATA(state_offset, current_state->count,
 537          current_state->data - 1);
 538        continue;
 539        }
 540      else
 541        {
 542        current_state->offset = state_offset = -state_offset;
 543        }
 544      }
 545
 546    /* Check for a duplicate state with the same count, and skip if found. */
 547
 548    for (j = 0; j < i; j++)
 549      {
 550      if (active_states[j].offset == state_offset &&
 551          active_states[j].count == current_state->count)
 552        {
 553        DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
 554        goto NEXT_ACTIVE_STATE;
 555        }
 556      }
 557
 558    /* The state offset is the offset to the opcode */
 559
 560    code = start_code + state_offset;
 561    codevalue = *code;
 562
 563    /* If this opcode is followed by an inline character, load it. It is
 564    tempting to test for the presence of a subject character here, but that
 565    is wrong, because sometimes zero repetitions of the subject are
 566    permitted.
 567
 568    We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
 569    argument that is not a data character - but is always one byte long. We
 570    have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
 571    this case. To keep the other cases fast, convert these ones to new opcodes.
 572    */
 573
 574    if (coptable[codevalue] > 0)
 575      {
 576      dlen = 1;
 577#ifdef SUPPORT_UTF8
 578      if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
 579#endif  /* SUPPORT_UTF8 */
 580      d = code[coptable[codevalue]];
 581      if (codevalue >= OP_TYPESTAR)
 582        {
 583        switch(d)
 584          {
 585          case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
 586          case OP_NOTPROP:
 587          case OP_PROP: codevalue += OP_PROP_EXTRA; break;
 588          case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
 589          case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
 590          case OP_NOT_HSPACE:
 591          case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
 592          case OP_NOT_VSPACE:
 593          case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
 594          default: break;
 595          }
 596        }
 597      }
 598    else
 599      {
 600      dlen = 0;         /* Not strictly necessary, but compilers moan */
 601      d = NOTACHAR;     /* if these variables are not set. */
 602      }
 603
 604
 605    /* Now process the individual opcodes */
 606
 607    switch (codevalue)
 608      {
 609
 610/* ========================================================================== */
 611      /* Reached a closing bracket. If not at the end of the pattern, carry
 612      on with the next opcode. Otherwise, unless we have an empty string and
 613      PCRE_NOTEMPTY is set, save the match data, shifting up all previous
 614      matches so we always have the longest first. */
 615
 616      case OP_KET:
 617      case OP_KETRMIN:
 618      case OP_KETRMAX:
 619      if (code != end_code)
 620        {
 621        ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
 622        if (codevalue != OP_KET)
 623          {
 624          ADD_ACTIVE(state_offset - GET(code, 1), 0);
 625          }
 626        }
 627      else if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)
 628        {
 629        if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
 630          else if (match_count > 0 && ++match_count * 2 >= offsetcount)
 631            match_count = 0;
 632        count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
 633        if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
 634        if (offsetcount >= 2)
 635          {
 636          offsets[0] = current_subject - start_subject;
 637          offsets[1] = ptr - start_subject;
 638          DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
 639            offsets[1] - offsets[0], current_subject));
 640          }
 641        if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
 642          {
 643          DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
 644            "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
 645            match_count, rlevel*2-2, SP));
 646          return match_count;
 647          }
 648        }
 649      break;
 650
 651/* ========================================================================== */
 652      /* These opcodes add to the current list of states without looking
 653      at the current character. */
 654
 655      /*-----------------------------------------------------------------*/
 656      case OP_ALT:
 657      do { code += GET(code, 1); } while (*code == OP_ALT);
 658      ADD_ACTIVE(code - start_code, 0);
 659      break;
 660
 661      /*-----------------------------------------------------------------*/
 662      case OP_BRA:
 663      case OP_SBRA:
 664      do
 665        {
 666        ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
 667        code += GET(code, 1);
 668        }
 669      while (*code == OP_ALT);
 670      break;
 671
 672      /*-----------------------------------------------------------------*/
 673      case OP_CBRA:
 674      case OP_SCBRA:
 675      ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
 676      code += GET(code, 1);
 677      while (*code == OP_ALT)
 678        {
 679        ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
 680        code += GET(code, 1);
 681        }
 682      break;
 683
 684      /*-----------------------------------------------------------------*/
 685      case OP_BRAZERO:
 686      case OP_BRAMINZERO:
 687      ADD_ACTIVE(state_offset + 1, 0);
 688      code += 1 + GET(code, 2);
 689      while (*code == OP_ALT) code += GET(code, 1);
 690      ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
 691      break;
 692
 693      /*-----------------------------------------------------------------*/
 694      case OP_SKIPZERO:
 695      code += 1 + GET(code, 2);
 696      while (*code == OP_ALT) code += GET(code, 1);
 697      ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
 698      break;
 699
 700      /*-----------------------------------------------------------------*/
 701      case OP_CIRC:
 702      if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
 703          ((ims & PCRE_MULTILINE) != 0 &&
 704            ptr != end_subject &&
 705            WAS_NEWLINE(ptr)))
 706        { ADD_ACTIVE(state_offset + 1, 0); }
 707      break;
 708
 709      /*-----------------------------------------------------------------*/
 710      case OP_EOD:
 711      if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }
 712      break;
 713
 714      /*-----------------------------------------------------------------*/
 715      case OP_OPT:
 716      ims = code[1];
 717      ADD_ACTIVE(state_offset + 2, 0);
 718      break;
 719
 720      /*-----------------------------------------------------------------*/
 721      case OP_SOD:
 722      if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
 723      break;
 724
 725      /*-----------------------------------------------------------------*/
 726      case OP_SOM:
 727      if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
 728      break;
 729
 730
 731/* ========================================================================== */
 732      /* These opcodes inspect the next subject character, and sometimes
 733      the previous one as well, but do not have an argument. The variable
 734      clen contains the length of the current character and is zero if we are
 735      at the end of the subject. */
 736
 737      /*-----------------------------------------------------------------*/
 738      case OP_ANY:
 739      if (clen > 0 && !IS_NEWLINE(ptr))
 740        { ADD_NEW(state_offset + 1, 0); }
 741      break;
 742
 743      /*-----------------------------------------------------------------*/
 744      case OP_ALLANY:
 745      if (clen > 0)
 746        { ADD_NEW(state_offset + 1, 0); }
 747      break;
 748
 749      /*-----------------------------------------------------------------*/
 750      case OP_EODN:
 751      if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
 752        { ADD_ACTIVE(state_offset + 1, 0); }
 753      break;
 754
 755      /*-----------------------------------------------------------------*/
 756      case OP_DOLL:
 757      if ((md->moptions & PCRE_NOTEOL) == 0)
 758        {
 759        if (clen == 0 ||
 760            (IS_NEWLINE(ptr) &&
 761               ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
 762            ))
 763          { ADD_ACTIVE(state_offset + 1, 0); }
 764        }
 765      else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
 766        { ADD_ACTIVE(state_offset + 1, 0); }
 767      break;
 768
 769      /*-----------------------------------------------------------------*/
 770
 771      case OP_DIGIT:
 772      case OP_WHITESPACE:
 773      case OP_WORDCHAR:
 774      if (clen > 0 && c < 256 &&
 775            ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
 776        { ADD_NEW(state_offset + 1, 0); }
 777      break;
 778
 779      /*-----------------------------------------------------------------*/
 780      case OP_NOT_DIGIT:
 781      case OP_NOT_WHITESPACE:
 782      case OP_NOT_WORDCHAR:
 783      if (clen > 0 && (c >= 256 ||
 784            ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
 785        { ADD_NEW(state_offset + 1, 0); }
 786      break;
 787
 788      /*-----------------------------------------------------------------*/
 789      case OP_WORD_BOUNDARY:
 790      case OP_NOT_WORD_BOUNDARY:
 791        {
 792        int left_word, right_word;
 793
 794        if (ptr > start_subject)
 795          {
 796          const uschar *temp = ptr - 1;
 797#ifdef SUPPORT_UTF8
 798          if (utf8) BACKCHAR(temp);
 799#endif
 800          GETCHARTEST(d, temp);
 801          left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
 802          }
 803        else left_word = 0;
 804
 805        if (clen > 0) right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
 806          else right_word = 0;
 807
 808        if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
 809          { ADD_ACTIVE(state_offset + 1, 0); }
 810        }
 811      break;
 812
 813
 814      /*-----------------------------------------------------------------*/
 815      /* Check the next character by Unicode property. We will get here only
 816      if the support is in the binary; otherwise a compile-time error occurs.
 817      */
 818
 819#ifdef SUPPORT_UCP
 820      case OP_PROP:
 821      case OP_NOTPROP:
 822      if (clen > 0)
 823        {
 824        BOOL OK;
 825        const ucd_record * prop = GET_UCD(c);
 826        switch(code[1])
 827          {
 828          case PT_ANY:
 829          OK = TRUE;
 830          break;
 831
 832          case PT_LAMP:
 833          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
 834          break;
 835
 836          case PT_GC:
 837          OK = _pcre_ucp_gentype[prop->chartype] == code[2];
 838          break;
 839
 840          case PT_PC:
 841          OK = prop->chartype == code[2];
 842          break;
 843
 844          case PT_SC:
 845          OK = prop->script == code[2];
 846          break;
 847
 848          /* Should never occur, but keep compilers from grumbling. */
 849
 850          default:
 851          OK = codevalue != OP_PROP;
 852          break;
 853          }
 854
 855        if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
 856        }
 857      break;
 858#endif
 859
 860
 861
 862/* ========================================================================== */
 863      /* These opcodes likewise inspect the subject character, but have an
 864      argument that is not a data character. It is one of these opcodes:
 865      OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
 866      OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
 867
 868      case OP_TYPEPLUS:
 869      case OP_TYPEMINPLUS:
 870      case OP_TYPEPOSPLUS:
 871      count = current_state->count;  /* Already matched */
 872      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
 873      if (clen > 0)
 874        {
 875        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
 876            (c < 256 &&
 877              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
 878              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
 879          {
 880          if (count > 0 && codevalue == OP_TYPEPOSPLUS)
 881            {
 882            active_count--;            /* Remove non-match possibility */
 883            next_active_state--;
 884            }
 885          count++;
 886          ADD_NEW(state_offset, count);
 887          }
 888        }
 889      break;
 890
 891      /*-----------------------------------------------------------------*/
 892      case OP_TYPEQUERY:
 893      case OP_TYPEMINQUERY:
 894      case OP_TYPEPOSQUERY:
 895      ADD_ACTIVE(state_offset + 2, 0);
 896      if (clen > 0)
 897        {
 898        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
 899            (c < 256 &&
 900              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
 901              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
 902          {
 903          if (codevalue == OP_TYPEPOSQUERY)
 904            {
 905            active_count--;            /* Remove non-match possibility */
 906            next_active_state--;
 907            }
 908          ADD_NEW(state_offset + 2, 0);
 909          }
 910        }
 911      break;
 912
 913      /*-----------------------------------------------------------------*/
 914      case OP_TYPESTAR:
 915      case OP_TYPEMINSTAR:
 916      case OP_TYPEPOSSTAR:
 917      ADD_ACTIVE(state_offset + 2, 0);
 918      if (clen > 0)
 919        {
 920        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
 921            (c < 256 &&
 922              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
 923              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
 924          {
 925          if (codevalue == OP_TYPEPOSSTAR)
 926            {
 927            active_count--;            /* Remove non-match possibility */
 928            next_active_state--;
 929            }
 930          ADD_NEW(state_offset, 0);
 931          }
 932        }
 933      break;
 934
 935      /*-----------------------------------------------------------------*/
 936      case OP_TYPEEXACT:
 937      count = current_state->count;  /* Number already matched */
 938      if (clen > 0)
 939        {
 940        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
 941            (c < 256 &&
 942              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
 943              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
 944          {
 945          if (++count >= GET2(code, 1))
 946            { ADD_NEW(state_offset + 4, 0); }
 947          else
 948            { ADD_NEW(state_offset, count); }
 949          }
 950        }
 951      break;
 952
 953      /*-----------------------------------------------------------------*/
 954      case OP_TYPEUPTO:
 955      case OP_TYPEMINUPTO:
 956      case OP_TYPEPOSUPTO:
 957      ADD_ACTIVE(state_offset + 4, 0);
 958      count = current_state->count;  /* Number already matched */
 959      if (clen > 0)
 960        {
 961        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
 962            (c < 256 &&
 963              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
 964              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
 965          {
 966          if (codevalue == OP_TYPEPOSUPTO)
 967            {
 968            active_count--;           /* Remove non-match possibility */
 969            next_active_state--;
 970            }
 971          if (++count >= GET2(code, 1))
 972            { ADD_NEW(state_offset + 4, 0); }
 973          else
 974            { ADD_NEW(state_offset, count); }
 975          }
 976        }
 977      break;
 978
 979/* ========================================================================== */
 980      /* These are virtual opcodes that are used when something like
 981      OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
 982      argument. It keeps the code above fast for the other cases. The argument
 983      is in the d variable. */
 984
 985#ifdef SUPPORT_UCP
 986      case OP_PROP_EXTRA + OP_TYPEPLUS:
 987      case OP_PROP_EXTRA + OP_TYPEMINPLUS:
 988      case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
 989      count = current_state->count;           /* Already matched */
 990      if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
 991      if (clen > 0)
 992        {
 993        BOOL OK;
 994        const ucd_record * prop = GET_UCD(c);
 995        switch(code[2])
 996          {
 997          case PT_ANY:
 998          OK = TRUE;
 999          break;
1000
1001          case PT_LAMP:
1002          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1003          break;
1004
1005          case PT_GC:
1006          OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1007          break;
1008
1009          case PT_PC:
1010          OK = prop->chartype == code[3];
1011          break;
1012
1013          case PT_SC:
1014          OK = prop->script == code[3];
1015          break;
1016
1017          /* Should never occur, but keep compilers from grumbling. */
1018
1019          default:
1020          OK = codevalue != OP_PROP;
1021          break;
1022          }
1023
1024        if (OK == (d == OP_PROP))
1025          {
1026          if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1027            {
1028            active_count--;           /* Remove non-match possibility */
1029            next_active_state--;
1030            }
1031          count++;
1032          ADD_NEW(state_offset, count);
1033          }
1034        }
1035      break;
1036
1037      /*-----------------------------------------------------------------*/
1038      case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1039      case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1040      case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1041      count = current_state->count;  /* Already matched */
1042      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1043      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1044        {
1045        const uschar *nptr = ptr + clen;
1046        int ncount = 0;
1047        if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1048          {
1049          active_count--;           /* Remove non-match possibility */
1050          next_active_state--;
1051          }
1052        while (nptr < end_subject)
1053          {
1054          int nd;
1055          int ndlen = 1;
1056          GETCHARLEN(nd, nptr, ndlen);
1057          if (UCD_CATEGORY(nd) != ucp_M) break;
1058          ncount++;
1059          nptr += ndlen;
1060          }
1061        count++;
1062        ADD_NEW_DATA(-state_offset, count, ncount);
1063        }
1064      break;
1065#endif
1066
1067      /*-----------------------------------------------------------------*/
1068      case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1069      case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1070      case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1071      count = current_state->count;  /* Already matched */
1072      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1073      if (clen > 0)
1074        {
1075        int ncount = 0;
1076        switch (c)
1077          {
1078          case 0x000b:
1079          case 0x000c:
1080          case 0x0085:
1081          case 0x2028:
1082          case 0x2029:
1083          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1084          goto ANYNL01;
1085
1086          case 0x000d:
1087          if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1088          /* Fall through */
1089
1090          ANYNL01:
1091          case 0x000a:
1092          if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1093            {
1094            active_count--;           /* Remove non-match possibility */
1095            next_active_state--;
1096            }
1097          count++;
1098          ADD_NEW_DATA(-state_offset, count, ncount);
1099          break;
1100
1101          default:
1102          break;
1103          }
1104        }
1105      break;
1106
1107      /*-----------------------------------------------------------------*/
1108      case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1109      case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1110      case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1111      count = current_state->count;  /* Already matched */
1112      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1113      if (clen > 0)
1114        {
1115        BOOL OK;
1116        switch (c)
1117          {
1118          case 0x000a:
1119          case 0x000b:
1120          case 0x000c:
1121          case 0x000d:
1122          case 0x0085:
1123          case 0x2028:
1124          case 0x2029:
1125          OK = TRUE;
1126          break;
1127
1128          default:
1129          OK = FALSE;
1130          break;
1131          }
1132
1133        if (OK == (d == OP_VSPACE))
1134          {
1135          if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1136            {
1137            active_count--;           /* Remove non-match possibility */
1138            next_active_state--;
1139            }
1140          count++;
1141          ADD_NEW_DATA(-state_offset, count, 0);
1142          }
1143        }
1144      break;
1145
1146      /*-----------------------------------------------------------------*/
1147      case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1148      case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1149      case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1150      count = current_state->count;  /* Already matched */
1151      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1152      if (clen > 0)
1153        {
1154        BOOL OK;
1155        switch (c)
1156          {
1157          case 0x09:      /* HT */
1158          case 0x20:      /* SPACE */
1159          case 0xa0:      /* NBSP */
1160          case 0x1680:    /* OGHAM SPACE MARK */
1161          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1162          case 0x2000:    /* EN QUAD */
1163          case 0x2001:    /* EM QUAD */
1164          case 0x2002:    /* EN SPACE */
1165          case 0x2003:    /* EM SPACE */
1166          case 0x2004:    /* THREE-PER-EM SPACE */
1167          case 0x2005:    /* FOUR-PER-EM SPACE */
1168          case 0x2006:    /* SIX-PER-EM SPACE */
1169          case 0x2007:    /* FIGURE SPACE */
1170          case 0x2008:    /* PUNCTUATION SPACE */
1171          case 0x2009:    /* THIN SPACE */
1172          case 0x200A:    /* HAIR SPACE */
1173          case 0x202f:    /* NARROW NO-BREAK SPACE */
1174          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1175          case 0x3000:    /* IDEOGRAPHIC SPACE */
1176          OK = TRUE;
1177          break;
1178
1179          default:
1180          OK = FALSE;
1181          break;
1182          }
1183
1184        if (OK == (d == OP_HSPACE))
1185          {
1186          if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1187            {
1188            active_count--;           /* Remove non-match possibility */
1189            next_active_state--;
1190            }
1191          count++;
1192          ADD_NEW_DATA(-state_offset, count, 0);
1193          }
1194        }
1195      break;
1196
1197      /*-----------------------------------------------------------------*/
1198#ifdef SUPPORT_UCP
1199      case OP_PROP_EXTRA + OP_TYPEQUERY:
1200      case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1201      case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1202      count = 4;
1203      goto QS1;
1204
1205      case OP_PROP_EXTRA + OP_TYPESTAR:
1206      case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1207      case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1208      count = 0;
1209
1210      QS1:
1211
1212      ADD_ACTIVE(state_offset + 4, 0);
1213      if (clen > 0)
1214        {
1215        BOOL OK;
1216        const ucd_record * prop = GET_UCD(c);
1217        switch(code[2])
1218          {
1219          case PT_ANY:
1220          OK = TRUE;
1221          break;
1222
1223          case PT_LAMP:
1224          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1225          break;
1226
1227          case PT_GC:
1228          OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1229          break;
1230
1231          case PT_PC:
1232          OK = prop->chartype == code[3];
1233          break;
1234
1235          case PT_SC:
1236          OK = prop->script == code[3];
1237          break;
1238
1239          /* Should never occur, but keep compilers from grumbling. */
1240
1241          default:
1242          OK = codevalue != OP_PROP;
1243          break;
1244          }
1245
1246        if (OK == (d == OP_PROP))
1247          {
1248          if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1249              codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1250            {
1251            active_count--;           /* Remove non-match possibility */
1252            next_active_state--;
1253            }
1254          ADD_NEW(state_offset + count, 0);
1255          }
1256        }
1257      break;
1258
1259      /*-----------------------------------------------------------------*/
1260      case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1261      case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1262      case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1263      count = 2;
1264      goto QS2;
1265
1266      case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1267      case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1268      case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1269      count = 0;
1270
1271      QS2:
1272
1273      ADD_ACTIVE(state_offset + 2, 0);
1274      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1275        {
1276        const uschar *nptr = ptr + clen;
1277        int ncount = 0;
1278        if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1279            codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1280          {
1281          active_count--;           /* Remove non-match possibility */
1282          next_active_state--;
1283          }
1284        while (nptr < end_subject)
1285          {
1286          int nd;
1287          int ndlen = 1;
1288          GETCHARLEN(nd, nptr, ndlen);
1289          if (UCD_CATEGORY(nd) != ucp_M) break;
1290          ncount++;
1291          nptr += ndlen;
1292          }
1293        ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1294        }
1295      break;
1296#endif
1297
1298      /*-----------------------------------------------------------------*/
1299      case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1300      case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1301      case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1302      count = 2;
1303      goto QS3;
1304
1305      case OP_ANYNL_EXTRA + OP_TYPESTAR:
1306      case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1307      case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1308      count = 0;
1309
1310      QS3:
1311      ADD_ACTIVE(state_offset + 2, 0);
1312      if (clen > 0)
1313        {
1314        int ncount = 0;
1315        switch (c)
1316          {
1317          case 0x000b:
1318          case 0x000c:
1319          case 0x0085:
1320          case 0x2028:
1321          case 0x2029:
1322          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1323          goto ANYNL02;
1324
1325          case 0x000d:
1326          if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1327          /* Fall through */
1328
1329          ANYNL02:
1330          case 0x000a:
1331          if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1332              codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1333            {
1334            active_count--;           /* Remove non-match possibility */
1335            next_active_state--;
1336            }
1337          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1338          break;
1339
1340          default:
1341          break;
1342          }
1343        }
1344      break;
1345
1346      /*-----------------------------------------------------------------*/
1347      case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1348      case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1349      case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1350      count = 2;
1351      goto QS4;
1352
1353      case OP_VSPACE_EXTRA + OP_TYPESTAR:
1354      case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1355      case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1356      count = 0;
1357
1358      QS4:
1359      ADD_ACTIVE(state_offset + 2, 0);
1360      if (clen > 0)
1361        {
1362        BOOL OK;
1363        switch (c)
1364          {
1365          case 0x000a:
1366          case 0x000b:
1367          case 0x000c:
1368          case 0x000d:
1369          case 0x0085:
1370          case 0x2028:
1371          case 0x2029:
1372          OK = TRUE;
1373          break;
1374
1375          default:
1376          OK = FALSE;
1377          break;
1378          }
1379        if (OK == (d == OP_VSPACE))
1380          {
1381          if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1382              codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1383            {
1384            active_count--;           /* Remove non-match possibility */
1385            next_active_state--;
1386            }
1387          ADD_NEW_DATA(-(state_offset + count), 0, 0);
1388          }
1389        }
1390      break;
1391
1392      /*-----------------------------------------------------------------*/
1393      case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1394      case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1395      case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1396      count = 2;
1397      goto QS5;
1398
1399      case OP_HSPACE_EXTRA + OP_TYPESTAR:
1400      case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1401      case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1402      count = 0;
1403
1404      QS5:
1405      ADD_ACTIVE(state_offset + 2, 0);
1406      if (clen > 0)
1407        {
1408        BOOL OK;
1409        switch (c)
1410          {
1411          case 0x09:      /* HT */
1412          case 0x20:      /* SPACE */
1413          case 0xa0:      /* NBSP */
1414          case 0x1680:    /* OGHAM SPACE MARK */
1415          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1416          case 0x2000:    /* EN QUAD */
1417          case 0x2001:    /* EM QUAD */
1418          case 0x2002:    /* EN SPACE */
1419          case 0x2003:    /* EM SPACE */
1420          case 0x2004:    /* THREE-PER-EM SPACE */
1421          case 0x2005:    /* FOUR-PER-EM SPACE */
1422          case 0x2006:    /* SIX-PER-EM SPACE */
1423          case 0x2007:    /* FIGURE SPACE */
1424          case 0x2008:    /* PUNCTUATION SPACE */
1425          case 0x2009:    /* THIN SPACE */
1426          case 0x200A:    /* HAIR SPACE */
1427          case 0x202f:    /* NARROW NO-BREAK SPACE */
1428          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1429          case 0x3000:    /* IDEOGRAPHIC SPACE */
1430          OK = TRUE;
1431          break;
1432
1433          default:
1434          OK = FALSE;
1435          break;
1436          }
1437
1438        if (OK == (d == OP_HSPACE))
1439          {
1440          if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1441              codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1442            {
1443            active_count--;           /* Remove non-match possibility */
1444            next_active_state--;
1445            }
1446          ADD_NEW_DATA(-(state_offset + count), 0, 0);
1447          }
1448        }
1449      break;
1450
1451      /*-----------------------------------------------------------------*/
1452#ifdef SUPPORT_UCP
1453      case OP_PROP_EXTRA + OP_TYPEEXACT:
1454      case OP_PROP_EXTRA + OP_TYPEUPTO:
1455      case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1456      case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1457      if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1458        { ADD_ACTIVE(state_offset + 6, 0); }
1459      count = current_state->count;  /* Number already matched */
1460      if (clen > 0)
1461        {
1462        BOOL OK;
1463        const ucd_record * prop = GET_UCD(c);
1464        switch(code[4])
1465          {
1466          case PT_ANY:
1467          OK = TRUE;
1468          break;
1469
1470          case PT_LAMP:
1471          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1472          break;
1473
1474          case PT_GC:
1475          OK = _pcre_ucp_gentype[prop->chartype] == code[5];
1476          break;
1477
1478          case PT_PC:
1479          OK = prop->chartype == code[5];
1480          break;
1481
1482          case PT_SC:
1483          OK = prop->script == code[5];
1484          break;
1485
1486          /* Should never occur, but keep compilers from grumbling. */
1487
1488          default:
1489          OK = codevalue != OP_PROP;
1490          break;
1491          }
1492
1493        if (OK == (d == OP_PROP))
1494          {
1495          if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1496            {
1497            active_count--;           /* Remove non-match possibility */
1498            next_active_state--;
1499            }
1500          if (++count >= GET2(code, 1))
1501            { ADD_NEW(state_offset + 6, 0); }
1502          else
1503            { ADD_NEW(state_offset, count); }
1504          }
1505        }
1506      break;
1507
1508      /*-----------------------------------------------------------------*/
1509      case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1510      case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1511      case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1512      case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1513      if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1514        { ADD_ACTIVE(state_offset + 4, 0); }
1515      count = current_state->count;  /* Number already matched */
1516      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1517        {
1518        const uschar *nptr = ptr + clen;
1519        int ncount = 0;
1520        if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1521          {
1522          active_count--;           /* Remove non-match possibility */
1523          next_active_state--;
1524          }
1525        while (nptr < end_subject)
1526          {
1527          int nd;
1528          int ndlen = 1;
1529          GETCHARLEN(nd, nptr, ndlen);
1530          if (UCD_CATEGORY(nd) != ucp_M) break;
1531          ncount++;
1532          nptr += ndlen;
1533          }
1534        if (++count >= GET2(code, 1))
1535          { 

Large files files are truncated, but you can click here to view the full file