PageRenderTime 474ms CodeModel.GetById 81ms app.highlight 274ms RepoModel.GetById 62ms app.codeStats 2ms

/pypy/module/cpyext/test/_sre.c

https://bitbucket.org/kcr/pypy
C | 3908 lines | 2992 code | 556 blank | 360 comment | 613 complexity | 30e62d123b897db7a9341ac71625a5d9 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2 * Secret Labs' Regular Expression Engine
   3 *
   4 * regular expression matching engine
   5 *
   6 * partial history:
   7 * 1999-10-24 fl  created (based on existing template matcher code)
   8 * 2000-03-06 fl  first alpha, sort of
   9 * 2000-08-01 fl  fixes for 1.6b1
  10 * 2000-08-07 fl  use PyOS_CheckStack() if available
  11 * 2000-09-20 fl  added expand method
  12 * 2001-03-20 fl  lots of fixes for 2.1b2
  13 * 2001-04-15 fl  export copyright as Python attribute, not global
  14 * 2001-04-28 fl  added __copy__ methods (work in progress)
  15 * 2001-05-14 fl  fixes for 1.5.2 compatibility
  16 * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
  17 * 2001-10-18 fl  fixed group reset issue (from Matthew Mueller)
  18 * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
  19 * 2001-10-21 fl  added sub/subn primitive
  20 * 2001-10-24 fl  added finditer primitive (for 2.2 only)
  21 * 2001-12-07 fl  fixed memory leak in sub/subn (Guido van Rossum)
  22 * 2002-11-09 fl  fixed empty sub/subn return type
  23 * 2003-04-18 mvl fully support 4-byte codes
  24 * 2003-10-17 gn  implemented non recursive scheme
  25 *
  26 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
  27 *
  28 * This version of the SRE library can be redistributed under CNRI's
  29 * Python 1.6 license.  For any other use, please contact Secret Labs
  30 * AB (info@pythonware.com).
  31 *
  32 * Portions of this engine have been developed in cooperation with
  33 * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
  34 * other compatibility work.
  35 */
  36
  37#ifndef SRE_RECURSIVE
  38
  39static char copyright[] =
  40    " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
  41
  42#define PY_SSIZE_T_CLEAN
  43
  44#include "Python.h"
  45#include "structmember.h" /* offsetof */
  46
  47#include "sre.h"
  48
  49#include <ctype.h>
  50
  51/* name of this module, minus the leading underscore */
  52#if !defined(SRE_MODULE)
  53#define SRE_MODULE "sre"
  54#endif
  55
  56#define SRE_PY_MODULE "re"
  57
  58/* defining this one enables tracing */
  59#undef VERBOSE
  60
  61#if PY_VERSION_HEX >= 0x01060000
  62#if PY_VERSION_HEX  < 0x02020000 || defined(Py_USING_UNICODE)
  63/* defining this enables unicode support (default under 1.6a1 and later) */
  64#define HAVE_UNICODE
  65#endif
  66#endif
  67
  68/* -------------------------------------------------------------------- */
  69/* optional features */
  70
  71/* enables fast searching */
  72#define USE_FAST_SEARCH
  73
  74/* enables aggressive inlining (always on for Visual C) */
  75#undef USE_INLINE
  76
  77/* enables copy/deepcopy handling (work in progress) */
  78#undef USE_BUILTIN_COPY
  79
  80#if PY_VERSION_HEX < 0x01060000
  81#define PyObject_DEL(op) PyMem_DEL((op))
  82#endif
  83
  84/* -------------------------------------------------------------------- */
  85
  86#if defined(_MSC_VER)
  87#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
  88#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
  89/* fastest possible local call under MSVC */
  90#define LOCAL(type) static __inline type __fastcall
  91#elif defined(USE_INLINE)
  92#define LOCAL(type) static inline type
  93#else
  94#define LOCAL(type) static type
  95#endif
  96
  97/* error codes */
  98#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
  99#define SRE_ERROR_STATE -2 /* illegal state */
 100#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
 101#define SRE_ERROR_MEMORY -9 /* out of memory */
 102#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
 103
 104#if defined(VERBOSE)
 105#define TRACE(v) printf v
 106#else
 107#define TRACE(v)
 108#endif
 109
 110/* -------------------------------------------------------------------- */
 111/* search engine state */
 112
 113/* default character predicates (run sre_chars.py to regenerate tables) */
 114
 115#define SRE_DIGIT_MASK 1
 116#define SRE_SPACE_MASK 2
 117#define SRE_LINEBREAK_MASK 4
 118#define SRE_ALNUM_MASK 8
 119#define SRE_WORD_MASK 16
 120
 121/* FIXME: this assumes ASCII.  create tables in init_sre() instead */
 122
 123static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
 1242, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
 1250, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
 12625, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
 12724, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
 1280, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
 12924, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
 130
 131static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
 13210, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
 13327, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
 13444, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
 13561, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
 136108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
 137122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
 138106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
 139120, 121, 122, 123, 124, 125, 126, 127 };
 140
 141#define SRE_IS_DIGIT(ch)\
 142    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
 143#define SRE_IS_SPACE(ch)\
 144    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
 145#define SRE_IS_LINEBREAK(ch)\
 146    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
 147#define SRE_IS_ALNUM(ch)\
 148    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
 149#define SRE_IS_WORD(ch)\
 150    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
 151
 152static unsigned int sre_lower(unsigned int ch)
 153{
 154    return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
 155}
 156
 157/* locale-specific character predicates */
 158/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
 159 * warnings when c's type supports only numbers < N+1 */
 160#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
 161#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
 162#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
 163#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
 164#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
 165
 166static unsigned int sre_lower_locale(unsigned int ch)
 167{
 168    return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
 169}
 170
 171/* unicode-specific character predicates */
 172
 173#if defined(HAVE_UNICODE)
 174
 175#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
 176#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
 177#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
 178#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
 179#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
 180
 181static unsigned int sre_lower_unicode(unsigned int ch)
 182{
 183    return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
 184}
 185
 186#endif
 187
 188LOCAL(int)
 189sre_category(SRE_CODE category, unsigned int ch)
 190{
 191    switch (category) {
 192
 193    case SRE_CATEGORY_DIGIT:
 194        return SRE_IS_DIGIT(ch);
 195    case SRE_CATEGORY_NOT_DIGIT:
 196        return !SRE_IS_DIGIT(ch);
 197    case SRE_CATEGORY_SPACE:
 198        return SRE_IS_SPACE(ch);
 199    case SRE_CATEGORY_NOT_SPACE:
 200        return !SRE_IS_SPACE(ch);
 201    case SRE_CATEGORY_WORD:
 202        return SRE_IS_WORD(ch);
 203    case SRE_CATEGORY_NOT_WORD:
 204        return !SRE_IS_WORD(ch);
 205    case SRE_CATEGORY_LINEBREAK:
 206        return SRE_IS_LINEBREAK(ch);
 207    case SRE_CATEGORY_NOT_LINEBREAK:
 208        return !SRE_IS_LINEBREAK(ch);
 209
 210    case SRE_CATEGORY_LOC_WORD:
 211        return SRE_LOC_IS_WORD(ch);
 212    case SRE_CATEGORY_LOC_NOT_WORD:
 213        return !SRE_LOC_IS_WORD(ch);
 214
 215#if defined(HAVE_UNICODE)
 216    case SRE_CATEGORY_UNI_DIGIT:
 217        return SRE_UNI_IS_DIGIT(ch);
 218    case SRE_CATEGORY_UNI_NOT_DIGIT:
 219        return !SRE_UNI_IS_DIGIT(ch);
 220    case SRE_CATEGORY_UNI_SPACE:
 221        return SRE_UNI_IS_SPACE(ch);
 222    case SRE_CATEGORY_UNI_NOT_SPACE:
 223        return !SRE_UNI_IS_SPACE(ch);
 224    case SRE_CATEGORY_UNI_WORD:
 225        return SRE_UNI_IS_WORD(ch);
 226    case SRE_CATEGORY_UNI_NOT_WORD:
 227        return !SRE_UNI_IS_WORD(ch);
 228    case SRE_CATEGORY_UNI_LINEBREAK:
 229        return SRE_UNI_IS_LINEBREAK(ch);
 230    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
 231        return !SRE_UNI_IS_LINEBREAK(ch);
 232#else
 233    case SRE_CATEGORY_UNI_DIGIT:
 234        return SRE_IS_DIGIT(ch);
 235    case SRE_CATEGORY_UNI_NOT_DIGIT:
 236        return !SRE_IS_DIGIT(ch);
 237    case SRE_CATEGORY_UNI_SPACE:
 238        return SRE_IS_SPACE(ch);
 239    case SRE_CATEGORY_UNI_NOT_SPACE:
 240        return !SRE_IS_SPACE(ch);
 241    case SRE_CATEGORY_UNI_WORD:
 242        return SRE_LOC_IS_WORD(ch);
 243    case SRE_CATEGORY_UNI_NOT_WORD:
 244        return !SRE_LOC_IS_WORD(ch);
 245    case SRE_CATEGORY_UNI_LINEBREAK:
 246        return SRE_IS_LINEBREAK(ch);
 247    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
 248        return !SRE_IS_LINEBREAK(ch);
 249#endif
 250    }
 251    return 0;
 252}
 253
 254/* helpers */
 255
 256static void
 257data_stack_dealloc(SRE_STATE* state)
 258{
 259    if (state->data_stack) {
 260        PyMem_FREE(state->data_stack);
 261        state->data_stack = NULL;
 262    }
 263    state->data_stack_size = state->data_stack_base = 0;
 264}
 265
 266static int
 267data_stack_grow(SRE_STATE* state, Py_ssize_t size)
 268{
 269    Py_ssize_t minsize, cursize;
 270    minsize = state->data_stack_base+size;
 271    cursize = state->data_stack_size;
 272    if (cursize < minsize) {
 273        void* stack;
 274        cursize = minsize+minsize/4+1024;
 275        TRACE(("allocate/grow stack %d\n", cursize));
 276        stack = PyMem_REALLOC(state->data_stack, cursize);
 277        if (!stack) {
 278            data_stack_dealloc(state);
 279            return SRE_ERROR_MEMORY;
 280        }
 281        state->data_stack = (char *)stack;
 282        state->data_stack_size = cursize;
 283    }
 284    return 0;
 285}
 286
 287/* generate 8-bit version */
 288
 289#define SRE_CHAR unsigned char
 290#define SRE_AT sre_at
 291#define SRE_COUNT sre_count
 292#define SRE_CHARSET sre_charset
 293#define SRE_INFO sre_info
 294#define SRE_MATCH sre_match
 295#define SRE_MATCH_CONTEXT sre_match_context
 296#define SRE_SEARCH sre_search
 297#define SRE_LITERAL_TEMPLATE sre_literal_template
 298
 299#if defined(HAVE_UNICODE)
 300
 301#define SRE_RECURSIVE
 302#include "_sre.c"
 303#undef SRE_RECURSIVE
 304
 305#undef SRE_LITERAL_TEMPLATE
 306#undef SRE_SEARCH
 307#undef SRE_MATCH
 308#undef SRE_MATCH_CONTEXT
 309#undef SRE_INFO
 310#undef SRE_CHARSET
 311#undef SRE_COUNT
 312#undef SRE_AT
 313#undef SRE_CHAR
 314
 315/* generate 16-bit unicode version */
 316
 317#define SRE_CHAR Py_UNICODE
 318#define SRE_AT sre_uat
 319#define SRE_COUNT sre_ucount
 320#define SRE_CHARSET sre_ucharset
 321#define SRE_INFO sre_uinfo
 322#define SRE_MATCH sre_umatch
 323#define SRE_MATCH_CONTEXT sre_umatch_context
 324#define SRE_SEARCH sre_usearch
 325#define SRE_LITERAL_TEMPLATE sre_uliteral_template
 326#endif
 327
 328#endif /* SRE_RECURSIVE */
 329
 330/* -------------------------------------------------------------------- */
 331/* String matching engine */
 332
 333/* the following section is compiled twice, with different character
 334   settings */
 335
 336LOCAL(int)
 337SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
 338{
 339    /* check if pointer is at given position */
 340
 341    Py_ssize_t thisp, thatp;
 342
 343    switch (at) {
 344
 345    case SRE_AT_BEGINNING:
 346    case SRE_AT_BEGINNING_STRING:
 347        return ((void*) ptr == state->beginning);
 348
 349    case SRE_AT_BEGINNING_LINE:
 350        return ((void*) ptr == state->beginning ||
 351                SRE_IS_LINEBREAK((int) ptr[-1]));
 352
 353    case SRE_AT_END:
 354        return (((void*) (ptr+1) == state->end &&
 355                 SRE_IS_LINEBREAK((int) ptr[0])) ||
 356                ((void*) ptr == state->end));
 357
 358    case SRE_AT_END_LINE:
 359        return ((void*) ptr == state->end ||
 360                SRE_IS_LINEBREAK((int) ptr[0]));
 361
 362    case SRE_AT_END_STRING:
 363        return ((void*) ptr == state->end);
 364
 365    case SRE_AT_BOUNDARY:
 366        if (state->beginning == state->end)
 367            return 0;
 368        thatp = ((void*) ptr > state->beginning) ?
 369            SRE_IS_WORD((int) ptr[-1]) : 0;
 370        thisp = ((void*) ptr < state->end) ?
 371            SRE_IS_WORD((int) ptr[0]) : 0;
 372        return thisp != thatp;
 373
 374    case SRE_AT_NON_BOUNDARY:
 375        if (state->beginning == state->end)
 376            return 0;
 377        thatp = ((void*) ptr > state->beginning) ?
 378            SRE_IS_WORD((int) ptr[-1]) : 0;
 379        thisp = ((void*) ptr < state->end) ?
 380            SRE_IS_WORD((int) ptr[0]) : 0;
 381        return thisp == thatp;
 382
 383    case SRE_AT_LOC_BOUNDARY:
 384        if (state->beginning == state->end)
 385            return 0;
 386        thatp = ((void*) ptr > state->beginning) ?
 387            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
 388        thisp = ((void*) ptr < state->end) ?
 389            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
 390        return thisp != thatp;
 391
 392    case SRE_AT_LOC_NON_BOUNDARY:
 393        if (state->beginning == state->end)
 394            return 0;
 395        thatp = ((void*) ptr > state->beginning) ?
 396            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
 397        thisp = ((void*) ptr < state->end) ?
 398            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
 399        return thisp == thatp;
 400
 401#if defined(HAVE_UNICODE)
 402    case SRE_AT_UNI_BOUNDARY:
 403        if (state->beginning == state->end)
 404            return 0;
 405        thatp = ((void*) ptr > state->beginning) ?
 406            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
 407        thisp = ((void*) ptr < state->end) ?
 408            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
 409        return thisp != thatp;
 410
 411    case SRE_AT_UNI_NON_BOUNDARY:
 412        if (state->beginning == state->end)
 413            return 0;
 414        thatp = ((void*) ptr > state->beginning) ?
 415            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
 416        thisp = ((void*) ptr < state->end) ?
 417            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
 418        return thisp == thatp;
 419#endif
 420
 421    }
 422
 423    return 0;
 424}
 425
 426LOCAL(int)
 427SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
 428{
 429    /* check if character is a member of the given set */
 430
 431    int ok = 1;
 432
 433    for (;;) {
 434        switch (*set++) {
 435
 436        case SRE_OP_FAILURE:
 437            return !ok;
 438
 439        case SRE_OP_LITERAL:
 440            /* <LITERAL> <code> */
 441            if (ch == set[0])
 442                return ok;
 443            set++;
 444            break;
 445
 446        case SRE_OP_CATEGORY:
 447            /* <CATEGORY> <code> */
 448            if (sre_category(set[0], (int) ch))
 449                return ok;
 450            set += 1;
 451            break;
 452
 453        case SRE_OP_CHARSET:
 454            if (sizeof(SRE_CODE) == 2) {
 455                /* <CHARSET> <bitmap> (16 bits per code word) */
 456                if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
 457                    return ok;
 458                set += 16;
 459            }
 460            else {
 461                /* <CHARSET> <bitmap> (32 bits per code word) */
 462                if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
 463                    return ok;
 464                set += 8;
 465            }
 466            break;
 467
 468        case SRE_OP_RANGE:
 469            /* <RANGE> <lower> <upper> */
 470            if (set[0] <= ch && ch <= set[1])
 471                return ok;
 472            set += 2;
 473            break;
 474
 475        case SRE_OP_NEGATE:
 476            ok = !ok;
 477            break;
 478
 479        case SRE_OP_BIGCHARSET:
 480            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
 481        {
 482            Py_ssize_t count, block;
 483            count = *(set++);
 484
 485            if (sizeof(SRE_CODE) == 2) {
 486                block = ((unsigned char*)set)[ch >> 8];
 487                set += 128;
 488                if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
 489                    return ok;
 490                set += count*16;
 491            }
 492            else {
 493                /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
 494                 * warnings when c's type supports only numbers < N+1 */
 495                if (!(ch & ~65535))
 496                    block = ((unsigned char*)set)[ch >> 8];
 497                else
 498                    block = -1;
 499                set += 64;
 500                if (block >=0 &&
 501                    (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
 502                    return ok;
 503                set += count*8;
 504            }
 505            break;
 506        }
 507
 508        default:
 509            /* internal error -- there's not much we can do about it
 510               here, so let's just pretend it didn't match... */
 511            return 0;
 512        }
 513    }
 514}
 515
 516LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
 517
 518LOCAL(Py_ssize_t)
 519SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
 520{
 521    SRE_CODE chr;
 522    SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
 523    SRE_CHAR* end = (SRE_CHAR *)state->end;
 524    Py_ssize_t i;
 525
 526    /* adjust end */
 527    if (maxcount < end - ptr && maxcount != 65535)
 528        end = ptr + maxcount;
 529
 530    switch (pattern[0]) {
 531
 532    case SRE_OP_IN:
 533        /* repeated set */
 534        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
 535        while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
 536            ptr++;
 537        break;
 538
 539    case SRE_OP_ANY:
 540        /* repeated dot wildcard. */
 541        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
 542        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
 543            ptr++;
 544        break;
 545
 546    case SRE_OP_ANY_ALL:
 547        /* repeated dot wildcard.  skip to the end of the target
 548           string, and backtrack from there */
 549        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
 550        ptr = end;
 551        break;
 552
 553    case SRE_OP_LITERAL:
 554        /* repeated literal */
 555        chr = pattern[1];
 556        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
 557        while (ptr < end && (SRE_CODE) *ptr == chr)
 558            ptr++;
 559        break;
 560
 561    case SRE_OP_LITERAL_IGNORE:
 562        /* repeated literal */
 563        chr = pattern[1];
 564        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
 565        while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
 566            ptr++;
 567        break;
 568
 569    case SRE_OP_NOT_LITERAL:
 570        /* repeated non-literal */
 571        chr = pattern[1];
 572        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
 573        while (ptr < end && (SRE_CODE) *ptr != chr)
 574            ptr++;
 575        break;
 576
 577    case SRE_OP_NOT_LITERAL_IGNORE:
 578        /* repeated non-literal */
 579        chr = pattern[1];
 580        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
 581        while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
 582            ptr++;
 583        break;
 584
 585    default:
 586        /* repeated single character pattern */
 587        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
 588        while ((SRE_CHAR*) state->ptr < end) {
 589            i = SRE_MATCH(state, pattern);
 590            if (i < 0)
 591                return i;
 592            if (!i)
 593                break;
 594        }
 595        TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
 596               (SRE_CHAR*) state->ptr - ptr));
 597        return (SRE_CHAR*) state->ptr - ptr;
 598    }
 599
 600    TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
 601    return ptr - (SRE_CHAR*) state->ptr;
 602}
 603
 604#if 0 /* not used in this release */
 605LOCAL(int)
 606SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
 607{
 608    /* check if an SRE_OP_INFO block matches at the current position.
 609       returns the number of SRE_CODE objects to skip if successful, 0
 610       if no match */
 611
 612    SRE_CHAR* end = state->end;
 613    SRE_CHAR* ptr = state->ptr;
 614    Py_ssize_t i;
 615
 616    /* check minimal length */
 617    if (pattern[3] && (end - ptr) < pattern[3])
 618        return 0;
 619
 620    /* check known prefix */
 621    if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
 622        /* <length> <skip> <prefix data> <overlap data> */
 623        for (i = 0; i < pattern[5]; i++)
 624            if ((SRE_CODE) ptr[i] != pattern[7 + i])
 625                return 0;
 626        return pattern[0] + 2 * pattern[6];
 627    }
 628    return pattern[0];
 629}
 630#endif
 631
 632/* The macros below should be used to protect recursive SRE_MATCH()
 633 * calls that *failed* and do *not* return immediately (IOW, those
 634 * that will backtrack). Explaining:
 635 *
 636 * - Recursive SRE_MATCH() returned true: that's usually a success
 637 *   (besides atypical cases like ASSERT_NOT), therefore there's no
 638 *   reason to restore lastmark;
 639 *
 640 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
 641 *   is returning to the caller: If the current SRE_MATCH() is the
 642 *   top function of the recursion, returning false will be a matching
 643 *   failure, and it doesn't matter where lastmark is pointing to.
 644 *   If it's *not* the top function, it will be a recursive SRE_MATCH()
 645 *   failure by itself, and the calling SRE_MATCH() will have to deal
 646 *   with the failure by the same rules explained here (it will restore
 647 *   lastmark by itself if necessary);
 648 *
 649 * - Recursive SRE_MATCH() returned false, and will continue the
 650 *   outside 'for' loop: must be protected when breaking, since the next
 651 *   OP could potentially depend on lastmark;
 652 *
 653 * - Recursive SRE_MATCH() returned false, and will be called again
 654 *   inside a local for/while loop: must be protected between each
 655 *   loop iteration, since the recursive SRE_MATCH() could do anything,
 656 *   and could potentially depend on lastmark.
 657 *
 658 * For more information, check the discussion at SF patch #712900.
 659 */
 660#define LASTMARK_SAVE()     \
 661    do { \
 662        ctx->lastmark = state->lastmark; \
 663        ctx->lastindex = state->lastindex; \
 664    } while (0)
 665#define LASTMARK_RESTORE()  \
 666    do { \
 667        state->lastmark = ctx->lastmark; \
 668        state->lastindex = ctx->lastindex; \
 669    } while (0)
 670
 671#define RETURN_ERROR(i) do { return i; } while(0)
 672#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
 673#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
 674
 675#define RETURN_ON_ERROR(i) \
 676    do { if (i < 0) RETURN_ERROR(i); } while (0)
 677#define RETURN_ON_SUCCESS(i) \
 678    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
 679#define RETURN_ON_FAILURE(i) \
 680    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
 681
 682#define SFY(x) #x
 683
 684#define DATA_STACK_ALLOC(state, type, ptr) \
 685do { \
 686    alloc_pos = state->data_stack_base; \
 687    TRACE(("allocating %s in %d (%d)\n", \
 688           SFY(type), alloc_pos, sizeof(type))); \
 689    if (state->data_stack_size < alloc_pos+sizeof(type)) { \
 690        int j = data_stack_grow(state, sizeof(type)); \
 691        if (j < 0) return j; \
 692        if (ctx_pos != -1) \
 693            DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
 694    } \
 695    ptr = (type*)(state->data_stack+alloc_pos); \
 696    state->data_stack_base += sizeof(type); \
 697} while (0)
 698
 699#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
 700do { \
 701    TRACE(("looking up %s at %d\n", SFY(type), pos)); \
 702    ptr = (type*)(state->data_stack+pos); \
 703} while (0)
 704
 705#define DATA_STACK_PUSH(state, data, size) \
 706do { \
 707    TRACE(("copy data in %p to %d (%d)\n", \
 708           data, state->data_stack_base, size)); \
 709    if (state->data_stack_size < state->data_stack_base+size) { \
 710        int j = data_stack_grow(state, size); \
 711        if (j < 0) return j; \
 712        if (ctx_pos != -1) \
 713            DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
 714    } \
 715    memcpy(state->data_stack+state->data_stack_base, data, size); \
 716    state->data_stack_base += size; \
 717} while (0)
 718
 719#define DATA_STACK_POP(state, data, size, discard) \
 720do { \
 721    TRACE(("copy data to %p from %d (%d)\n", \
 722           data, state->data_stack_base-size, size)); \
 723    memcpy(data, state->data_stack+state->data_stack_base-size, size); \
 724    if (discard) \
 725        state->data_stack_base -= size; \
 726} while (0)
 727
 728#define DATA_STACK_POP_DISCARD(state, size) \
 729do { \
 730    TRACE(("discard data from %d (%d)\n", \
 731           state->data_stack_base-size, size)); \
 732    state->data_stack_base -= size; \
 733} while(0)
 734
 735#define DATA_PUSH(x) \
 736    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
 737#define DATA_POP(x) \
 738    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
 739#define DATA_POP_DISCARD(x) \
 740    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
 741#define DATA_ALLOC(t,p) \
 742    DATA_STACK_ALLOC(state, t, p)
 743#define DATA_LOOKUP_AT(t,p,pos) \
 744    DATA_STACK_LOOKUP_AT(state,t,p,pos)
 745
 746#define MARK_PUSH(lastmark) \
 747    do if (lastmark > 0) { \
 748        i = lastmark; /* ctx->lastmark may change if reallocated */ \
 749        DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
 750    } while (0)
 751#define MARK_POP(lastmark) \
 752    do if (lastmark > 0) { \
 753        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
 754    } while (0)
 755#define MARK_POP_KEEP(lastmark) \
 756    do if (lastmark > 0) { \
 757        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
 758    } while (0)
 759#define MARK_POP_DISCARD(lastmark) \
 760    do if (lastmark > 0) { \
 761        DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
 762    } while (0)
 763
 764#define JUMP_NONE            0
 765#define JUMP_MAX_UNTIL_1     1
 766#define JUMP_MAX_UNTIL_2     2
 767#define JUMP_MAX_UNTIL_3     3
 768#define JUMP_MIN_UNTIL_1     4
 769#define JUMP_MIN_UNTIL_2     5
 770#define JUMP_MIN_UNTIL_3     6
 771#define JUMP_REPEAT          7
 772#define JUMP_REPEAT_ONE_1    8
 773#define JUMP_REPEAT_ONE_2    9
 774#define JUMP_MIN_REPEAT_ONE  10
 775#define JUMP_BRANCH          11
 776#define JUMP_ASSERT          12
 777#define JUMP_ASSERT_NOT      13
 778
 779#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
 780    DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
 781    nextctx->last_ctx_pos = ctx_pos; \
 782    nextctx->jump = jumpvalue; \
 783    nextctx->pattern = nextpattern; \
 784    ctx_pos = alloc_pos; \
 785    ctx = nextctx; \
 786    goto entrance; \
 787    jumplabel: \
 788    while (0) /* gcc doesn't like labels at end of scopes */ \
 789
 790typedef struct {
 791    Py_ssize_t last_ctx_pos;
 792    Py_ssize_t jump;
 793    SRE_CHAR* ptr;
 794    SRE_CODE* pattern;
 795    Py_ssize_t count;
 796    Py_ssize_t lastmark;
 797    Py_ssize_t lastindex;
 798    union {
 799        SRE_CODE chr;
 800        SRE_REPEAT* rep;
 801    } u;
 802} SRE_MATCH_CONTEXT;
 803
 804/* check if string matches the given pattern.  returns <0 for
 805   error, 0 for failure, and 1 for success */
 806LOCAL(Py_ssize_t)
 807SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 808{
 809    SRE_CHAR* end = (SRE_CHAR *)state->end;
 810    Py_ssize_t alloc_pos, ctx_pos = -1;
 811    Py_ssize_t i, ret = 0;
 812    Py_ssize_t jump;
 813    unsigned int sigcount=0;
 814
 815    SRE_MATCH_CONTEXT* ctx;
 816    SRE_MATCH_CONTEXT* nextctx;
 817
 818    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
 819
 820    DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
 821    ctx->last_ctx_pos = -1;
 822    ctx->jump = JUMP_NONE;
 823    ctx->pattern = pattern;
 824    ctx_pos = alloc_pos;
 825
 826entrance:
 827
 828    ctx->ptr = (SRE_CHAR *)state->ptr;
 829
 830    if (ctx->pattern[0] == SRE_OP_INFO) {
 831        /* optimization info block */
 832        /* <INFO> <1=skip> <2=flags> <3=min> ... */
 833        if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
 834            TRACE(("reject (got %d chars, need %d)\n",
 835                   (end - ctx->ptr), ctx->pattern[3]));
 836            RETURN_FAILURE;
 837        }
 838        ctx->pattern += ctx->pattern[1] + 1;
 839    }
 840
 841    for (;;) {
 842        ++sigcount;
 843        if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
 844            RETURN_ERROR(SRE_ERROR_INTERRUPTED);
 845
 846        switch (*ctx->pattern++) {
 847
 848        case SRE_OP_MARK:
 849            /* set mark */
 850            /* <MARK> <gid> */
 851            TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
 852                   ctx->ptr, ctx->pattern[0]));
 853            i = ctx->pattern[0];
 854            if (i & 1)
 855                state->lastindex = i/2 + 1;
 856            if (i > state->lastmark) {
 857                /* state->lastmark is the highest valid index in the
 858                   state->mark array.  If it is increased by more than 1,
 859                   the intervening marks must be set to NULL to signal
 860                   that these marks have not been encountered. */
 861                Py_ssize_t j = state->lastmark + 1;
 862                while (j < i)
 863                    state->mark[j++] = NULL;
 864                state->lastmark = i;
 865            }
 866            state->mark[i] = ctx->ptr;
 867            ctx->pattern++;
 868            break;
 869
 870        case SRE_OP_LITERAL:
 871            /* match literal string */
 872            /* <LITERAL> <code> */
 873            TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
 874                   ctx->ptr, *ctx->pattern));
 875            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
 876                RETURN_FAILURE;
 877            ctx->pattern++;
 878            ctx->ptr++;
 879            break;
 880
 881        case SRE_OP_NOT_LITERAL:
 882            /* match anything that is not literal character */
 883            /* <NOT_LITERAL> <code> */
 884            TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
 885                   ctx->ptr, *ctx->pattern));
 886            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
 887                RETURN_FAILURE;
 888            ctx->pattern++;
 889            ctx->ptr++;
 890            break;
 891
 892        case SRE_OP_SUCCESS:
 893            /* end of pattern */
 894            TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
 895            state->ptr = ctx->ptr;
 896            RETURN_SUCCESS;
 897
 898        case SRE_OP_AT:
 899            /* match at given position */
 900            /* <AT> <code> */
 901            TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
 902            if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
 903                RETURN_FAILURE;
 904            ctx->pattern++;
 905            break;
 906
 907        case SRE_OP_CATEGORY:
 908            /* match at given category */
 909            /* <CATEGORY> <code> */
 910            TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
 911                   ctx->ptr, *ctx->pattern));
 912            if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
 913                RETURN_FAILURE;
 914            ctx->pattern++;
 915            ctx->ptr++;
 916            break;
 917
 918        case SRE_OP_ANY:
 919            /* match anything (except a newline) */
 920            /* <ANY> */
 921            TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
 922            if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
 923                RETURN_FAILURE;
 924            ctx->ptr++;
 925            break;
 926
 927        case SRE_OP_ANY_ALL:
 928            /* match anything */
 929            /* <ANY_ALL> */
 930            TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
 931            if (ctx->ptr >= end)
 932                RETURN_FAILURE;
 933            ctx->ptr++;
 934            break;
 935
 936        case SRE_OP_IN:
 937            /* match set member (or non_member) */
 938            /* <IN> <skip> <set> */
 939            TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
 940            if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
 941                RETURN_FAILURE;
 942            ctx->pattern += ctx->pattern[0];
 943            ctx->ptr++;
 944            break;
 945
 946        case SRE_OP_LITERAL_IGNORE:
 947            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
 948                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
 949            if (ctx->ptr >= end ||
 950                state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
 951                RETURN_FAILURE;
 952            ctx->pattern++;
 953            ctx->ptr++;
 954            break;
 955
 956        case SRE_OP_NOT_LITERAL_IGNORE:
 957            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
 958                   ctx->pattern, ctx->ptr, *ctx->pattern));
 959            if (ctx->ptr >= end ||
 960                state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
 961                RETURN_FAILURE;
 962            ctx->pattern++;
 963            ctx->ptr++;
 964            break;
 965
 966        case SRE_OP_IN_IGNORE:
 967            TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
 968            if (ctx->ptr >= end
 969                || !SRE_CHARSET(ctx->pattern+1,
 970                                (SRE_CODE)state->lower(*ctx->ptr)))
 971                RETURN_FAILURE;
 972            ctx->pattern += ctx->pattern[0];
 973            ctx->ptr++;
 974            break;
 975
 976        case SRE_OP_JUMP:
 977        case SRE_OP_INFO:
 978            /* jump forward */
 979            /* <JUMP> <offset> */
 980            TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
 981                   ctx->ptr, ctx->pattern[0]));
 982            ctx->pattern += ctx->pattern[0];
 983            break;
 984
 985        case SRE_OP_BRANCH:
 986            /* alternation */
 987            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
 988            TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
 989            LASTMARK_SAVE();
 990            ctx->u.rep = state->repeat;
 991            if (ctx->u.rep)
 992                MARK_PUSH(ctx->lastmark);
 993            for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
 994                if (ctx->pattern[1] == SRE_OP_LITERAL &&
 995                    (ctx->ptr >= end ||
 996                     (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
 997                    continue;
 998                if (ctx->pattern[1] == SRE_OP_IN &&
 999                    (ctx->ptr >= end ||
1000                     !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
1001                    continue;
1002                state->ptr = ctx->ptr;
1003                DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
1004                if (ret) {
1005                    if (ctx->u.rep)
1006                        MARK_POP_DISCARD(ctx->lastmark);
1007                    RETURN_ON_ERROR(ret);
1008                    RETURN_SUCCESS;
1009                }
1010                if (ctx->u.rep)
1011                    MARK_POP_KEEP(ctx->lastmark);
1012                LASTMARK_RESTORE();
1013            }
1014            if (ctx->u.rep)
1015                MARK_POP_DISCARD(ctx->lastmark);
1016            RETURN_FAILURE;
1017
1018        case SRE_OP_REPEAT_ONE:
1019            /* match repeated sequence (maximizing regexp) */
1020
1021            /* this operator only works if the repeated item is
1022               exactly one character wide, and we're not already
1023               collecting backtracking points.  for other cases,
1024               use the MAX_REPEAT operator */
1025
1026            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1027
1028            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1029                   ctx->pattern[1], ctx->pattern[2]));
1030
1031            if (ctx->ptr + ctx->pattern[1] > end)
1032                RETURN_FAILURE; /* cannot match */
1033
1034            state->ptr = ctx->ptr;
1035
1036            ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1037            RETURN_ON_ERROR(ret);
1038            DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1039            ctx->count = ret;
1040            ctx->ptr += ctx->count;
1041
1042            /* when we arrive here, count contains the number of
1043               matches, and ctx->ptr points to the tail of the target
1044               string.  check if the rest of the pattern matches,
1045               and backtrack if not. */
1046
1047            if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1048                RETURN_FAILURE;
1049
1050            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1051                /* tail is empty.  we're finished */
1052                state->ptr = ctx->ptr;
1053                RETURN_SUCCESS;
1054            }
1055
1056            LASTMARK_SAVE();
1057
1058            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
1059                /* tail starts with a literal. skip positions where
1060                   the rest of the pattern cannot possibly match */
1061                ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
1062                for (;;) {
1063                    while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
1064                           (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
1065                        ctx->ptr--;
1066                        ctx->count--;
1067                    }
1068                    if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1069                        break;
1070                    state->ptr = ctx->ptr;
1071                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1072                            ctx->pattern+ctx->pattern[0]);
1073                    if (ret) {
1074                        RETURN_ON_ERROR(ret);
1075                        RETURN_SUCCESS;
1076                    }
1077
1078                    LASTMARK_RESTORE();
1079
1080                    ctx->ptr--;
1081                    ctx->count--;
1082                }
1083
1084            } else {
1085                /* general case */
1086                while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
1087                    state->ptr = ctx->ptr;
1088                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1089                            ctx->pattern+ctx->pattern[0]);
1090                    if (ret) {
1091                        RETURN_ON_ERROR(ret);
1092                        RETURN_SUCCESS;
1093                    }
1094                    ctx->ptr--;
1095                    ctx->count--;
1096                    LASTMARK_RESTORE();
1097                }
1098            }
1099            RETURN_FAILURE;
1100
1101        case SRE_OP_MIN_REPEAT_ONE:
1102            /* match repeated sequence (minimizing regexp) */
1103
1104            /* this operator only works if the repeated item is
1105               exactly one character wide, and we're not already
1106               collecting backtracking points.  for other cases,
1107               use the MIN_REPEAT operator */
1108
1109            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1110
1111            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1112                   ctx->pattern[1], ctx->pattern[2]));
1113
1114            if (ctx->ptr + ctx->pattern[1] > end)
1115                RETURN_FAILURE; /* cannot match */
1116
1117            state->ptr = ctx->ptr;
1118
1119            if (ctx->pattern[1] == 0)
1120                ctx->count = 0;
1121            else {
1122                /* count using pattern min as the maximum */
1123                ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1124                RETURN_ON_ERROR(ret);
1125                DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1126                if (ret < (Py_ssize_t) ctx->pattern[1])
1127                    /* didn't match minimum number of times */
1128                    RETURN_FAILURE;
1129                /* advance past minimum matches of repeat */
1130                ctx->count = ret;
1131                ctx->ptr += ctx->count;
1132            }
1133
1134            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1135                /* tail is empty.  we're finished */
1136                state->ptr = ctx->ptr;
1137                RETURN_SUCCESS;
1138
1139            } else {
1140                /* general case */
1141                LASTMARK_SAVE();
1142                while ((Py_ssize_t)ctx->pattern[2] == 65535
1143                       || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
1144                    state->ptr = ctx->ptr;
1145                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1146                            ctx->pattern+ctx->pattern[0]);
1147                    if (ret) {
1148                        RETURN_ON_ERROR(ret);
1149                        RETURN_SUCCESS;
1150                    }
1151                    state->ptr = ctx->ptr;
1152                    ret = SRE_COUNT(state, ctx->pattern+3, 1);
1153                    RETURN_ON_ERROR(ret);
1154                    DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1155                    if (ret == 0)
1156                        break;
1157                    assert(ret == 1);
1158                    ctx->ptr++;
1159                    ctx->count++;
1160                    LASTMARK_RESTORE();
1161                }
1162            }
1163            RETURN_FAILURE;
1164
1165        case SRE_OP_REPEAT:
1166            /* create repeat context.  all the hard work is done
1167               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1168            /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1169            TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1170                   ctx->pattern[1], ctx->pattern[2]));
1171
1172            /* install new repeat context */
1173            ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
1174            if (!ctx->u.rep) {
1175                PyErr_NoMemory();
1176                RETURN_FAILURE;
1177            }
1178            ctx->u.rep->count = -1;
1179            ctx->u.rep->pattern = ctx->pattern;
1180            ctx->u.rep->prev = state->repeat;
1181            ctx->u.rep->last_ptr = NULL;
1182            state->repeat = ctx->u.rep;
1183
1184            state->ptr = ctx->ptr;
1185            DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
1186            state->repeat = ctx->u.rep->prev;
1187            PyObject_FREE(ctx->u.rep);
1188
1189            if (ret) {
1190                RETURN_ON_ERROR(ret);
1191                RETURN_SUCCESS;
1192            }
1193            RETURN_FAILURE;
1194
1195        case SRE_OP_MAX_UNTIL:
1196            /* maximizing repeat */
1197            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1198
1199            /* FIXME: we probably need to deal with zero-width
1200               matches in here... */
1201
1202            ctx->u.rep = state->repeat;
1203            if (!ctx->u.rep)
1204                RETURN_ERROR(SRE_ERROR_STATE);
1205
1206            state->ptr = ctx->ptr;
1207
1208            ctx->count = ctx->u.rep->count+1;
1209
1210            TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
1211                   ctx->ptr, ctx->count));
1212
1213            if (ctx->count < ctx->u.rep->pattern[1]) {
1214                /* not enough matches */
1215                ctx->u.rep->count = ctx->count;
1216                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1217                        ctx->u.rep->pattern+3);
1218                if (ret) {
1219                    RETURN_ON_ERROR(ret);
1220                    RETURN_SUCCESS;
1221                }
1222                ctx->u.rep->count = ctx->count-1;
1223                state->ptr = ctx->ptr;
1224                RETURN_FAILURE;
1225            }
1226
1227            if ((ctx->count < ctx->u.rep->pattern[2] ||
1228                ctx->u.rep->pattern[2] == 65535) &&
1229                state->ptr != ctx->u.rep->last_ptr) {
1230                /* we may have enough matches, but if we can
1231                   match another item, do so */
1232                ctx->u.rep->count = ctx->count;
1233                LASTMARK_SAVE();
1234                MARK_PUSH(ctx->lastmark);
1235                /* zero-width match protection */
1236                DATA_PUSH(&ctx->u.rep->last_ptr);
1237                ctx->u.rep->last_ptr = state->ptr;
1238                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1239                        ctx->u.rep->pattern+3);
1240                DATA_POP(&ctx->u.rep->last_ptr);
1241                if (ret) {
1242                    MARK_POP_DISCARD(ctx->lastmark);
1243                    RETURN_ON_ERROR(ret);
1244                    RETURN_SUCCESS;
1245                }
1246                MARK_POP(ctx->lastmark);
1247                LASTMARK_RESTORE();
1248                ctx->u.rep->count = ctx->count-1;
1249                state->ptr = ctx->ptr;
1250            }
1251
1252            /* cannot match more repeated items here.  make sure the
1253               tail matches */
1254            state->repeat = ctx->u.rep->prev;
1255            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
1256            RETURN_ON_SUCCESS(ret);
1257            state->repeat = ctx->u.rep;
1258            state->ptr = ctx->ptr;
1259            RETURN_FAILURE;
1260
1261        case SRE_OP_MIN_UNTIL:
1262            /* minimizing repeat */
1263            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1264
1265            ctx->u.rep = state->repeat;
1266            if (!ctx->u.rep)
1267                RETURN_ERROR(SRE_ERROR_STATE);
1268
1269            state->ptr = ctx->ptr;
1270
1271            ctx->count = ctx->u.rep->count+1;
1272
1273            TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
1274                   ctx->ptr, ctx->count, ctx->u.rep->pattern));
1275
1276            if (ctx->count < ctx->u.rep->pattern[1]) {
1277                /* not enough matches */
1278                ctx->u.rep->count = ctx->count;
1279                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1280                        ctx->u.rep->pattern+3);
1281                if (ret) {
1282                    RETURN_ON_ERROR(ret);
1283                    RETURN_SUCCESS;
1284                }
1285                ctx->u.rep->count = ctx->count-1;
1286                state->ptr = ctx->ptr;
1287                RETURN_FAILURE;
1288            }
1289
1290            LASTMARK_SAVE();
1291
1292            /* see if the tail matches */
1293            state->repeat = ctx->u.rep->prev;
1294            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1295            if (ret) {
1296                RETURN_ON_ERROR(ret);
1297                RETURN_SUCCESS;
1298            }
1299
1300            state->repeat = ctx->u.rep;
1301            state->ptr = ctx->ptr;
1302
1303            LASTMARK_RESTORE();
1304
1305            if (ctx->count >= ctx->u.rep->pattern[2]
1306                && ctx->u.rep->pattern[2] != 65535)
1307                RETURN_FAILURE;
1308
1309            ctx->u.rep->count = ctx->count;
1310            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1311                    ctx->u.rep->pattern+3);
1312            if (ret) {
1313                RETURN_ON_ERROR(ret);
1314                RETURN_SUCCESS;
1315            }
1316            ctx->u.rep->count = ctx->count-1;
1317            state->ptr = ctx->ptr;
1318            RETURN_FAILURE;
1319
1320        case SRE_OP_GROUPREF:
1321            /* match backreference */
1322            TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1323                   ctx->ptr, ctx->pattern[0]));
1324            i = ctx->pattern[0];
1325            {
1326                Py_ssize_t groupref = i+i;
1327                if (groupref >= state->lastmark) {
1328                    RETURN_FAILURE;
1329                } else {
1330                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1331                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1332                    if (!p || !e || e < p)
1333                        RETURN_FAILURE;
1334                    while (p < e) {
1335                        if (ctx->ptr >= end || *ctx->ptr != *p)
1336                            RETURN_FAILURE;
1337                        p++; ctx->ptr++;
1338                    }
1339                }
1340            }
1341            ctx->pattern++;
1342            break;
1343
1344        case SRE_OP_GROUPREF_IGNORE:
1345            /* match backreference */
1346            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1347                   ctx->ptr, ctx->pattern[0]));
1348            i = ctx->pattern[0];
1349            {
1350                Py_ssize_t groupref = i+i;
1351                if (groupref >= state->lastmark) {
1352                    RETURN_FAILURE;
1353                } else {
1354                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1355                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1356                    if (!p || !e || e < p)
1357                        RETURN_FAILURE;
1358                    while (p < e) {
1359                        if (ctx->ptr >= end ||
1360                            state->lower(*ctx->ptr) != state->lower(*p))
1361                            RETURN_FAILURE;
1362                        p++; ctx->ptr++;
1363                    }
1364                }
1365            }
1366            ctx->pattern++;
1367            break;
1368
1369        case SRE_OP_GROUPREF_EXISTS:
1370            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1371                   ctx->ptr, ctx->pattern[0]));
1372            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1373            i = ctx->pattern[0];
1374            {
1375                Py_ssize_t groupref = i+i;
1376                if (groupref >= state->lastmark) {
1377                    ctx->pattern += ctx->pattern[1];
1378                    break;
1379                } else {
1380                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1381                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1382                    if (!p || !e || e < p) {
1383                        ctx->pattern += ctx->pattern[1];
1384                        break;
1385                    }
1386                }
1387            }
1388            ctx->pattern += 2;
1389            break;
1390
1391        case SRE_OP_ASSERT:
1392            /* assert subpattern */
1393            /* <ASSERT> <skip> <back> <pattern> */
1394            TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1395                   ctx->ptr, ctx->pattern[1]));
1396            state->ptr = ctx->ptr - ctx->pattern[1];
1397            if (state->ptr < state->beginning)
1398                RETURN_FAILURE;
1399            DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1400            RETURN_ON_FAILURE(ret);
1401            ctx->pattern += ctx->pattern[0];
1402            break;
1403
1404        case SRE_OP_ASSERT_NOT:
1405            /* assert not subpattern */
1406            /* <ASSERT_NOT> <skip> <back> <pattern> */
1407            TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1408                   ctx->ptr, ctx->pattern[1]));
1409            state->ptr = ctx->ptr - ctx->pattern[1];
1410            if (state->ptr >= state->beginning) {
1411                DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1412                if (ret) {
1413                    RETURN_ON_ERROR(ret);
1414                    RETURN_FAILURE;
1415                }
1416            }
1417            ctx->pattern += ctx->pattern[0];
1418            break;
1419
1420        case SRE_OP_FAILURE:
1421            /* immediate failure */
1422            TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1423            RETURN_FAILURE;
1424
1425        default:
1426            TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1427                   ctx->pattern[-1]));
1428            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1429        }
1430    }
1431
1432exit:
1433    ctx_pos = ctx->last_ctx_pos;
1434    jump = ctx->jump;
1435    DATA_POP_DISCARD(ctx);
1436    if (ctx_pos == -1)
1437        return ret;
1438    DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1439
1440    switch (jump) {
1441        case JUMP_MAX_UNTIL_2:
1442            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1443            goto jump_max_until_2;
1444        case JUMP_MAX_UNTIL_3:
1445            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1446            goto jump_max_until_3;
1447        case JUMP_MIN_UNTIL_2:
1448            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1449            goto jump_min_until_2;
1450        case JUMP_MIN_UNTIL_3:
1451            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1452            goto jump_min_until_3;
1453        case JUMP_BRANCH:
1454            TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1455            goto jump_branch;
1456        case JUMP_MAX_UNTIL_1:
1457            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1458            goto jump_max_until_1;
1459        case JUMP_MIN_UNTIL_1:
1460            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1461            goto jump_min_until_1;
1462        case JUMP_REPEAT:
1463            TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1464            goto jump_repeat;
1465        case JUMP_REPEAT_ONE_1:
1466            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1467            goto jump_repeat_one_1;
1468        case JUMP_REPEAT_ONE_2:
1469            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1470            goto jump_repeat_one_2;
1471        case JUMP_MIN_REPEAT_ONE:
1472            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1473            goto jump_min_repeat_one;
1474        case JUMP_ASSERT:
1475            TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1476            goto jump_assert;
1477        case JUMP_ASSERT_NOT:
1478            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1479            goto jump_assert_not;
1480        case JUMP_NONE:
1481            TRACE(("|%p|%…

Large files files are truncated, but you can click here to view the full file