PageRenderTime 23ms CodeModel.GetById 18ms app.highlight 2ms RepoModel.GetById 1ms app.codeStats 0ms

/vendor/pcre/pcre_printint.src

http://github.com/feyeleanor/RubyGoLightly
Unknown | 512 lines | 435 code | 77 blank | 0 comment | 0 complexity | 1d4f35e62a9180374967fff92e05d9c8 MD5 | raw file
  1/*************************************************
  2*      Perl-Compatible Regular Expressions       *
  3*************************************************/
  4
  5/* PCRE is a library of functions to support regular expressions whose syntax
  6and semantics are as close as possible to those of the Perl 5 language.
  7
  8                       Written by Philip Hazel
  9           Copyright (c) 1997-2008 University of Cambridge
 10
 11-----------------------------------------------------------------------------
 12Redistribution and use in source and binary forms, with or without
 13modification, are permitted provided that the following conditions are met:
 14
 15    * Redistributions of source code must retain the above copyright notice,
 16      this list of conditions and the following disclaimer.
 17
 18    * Redistributions in binary form must reproduce the above copyright
 19      notice, this list of conditions and the following disclaimer in the
 20      documentation and/or other materials provided with the distribution.
 21
 22    * Neither the name of the University of Cambridge nor the names of its
 23      contributors may be used to endorse or promote products derived from
 24      this software without specific prior written permission.
 25
 26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 36POSSIBILITY OF SUCH DAMAGE.
 37-----------------------------------------------------------------------------
 38*/
 39
 40
 41/* This module contains a PCRE private debugging function for printing out the
 42internal form of a compiled regular expression, along with some supporting
 43local functions. This source file is used in two places:
 44
 45(1) It is #included by pcre_compile.c when it is compiled in debugging mode
 46(DEBUG defined in pcre_internal.h). It is not included in production compiles.
 47
 48(2) It is always #included by pcretest.c, which can be asked to print out a
 49compiled regex for debugging purposes. */
 50
 51
 52/* Macro that decides whether a character should be output as a literal or in
 53hexadecimal. We don't use isprint() because that can vary from system to system
 54(even without the use of locales) and we want the output always to be the same,
 55for testing purposes. This macro is used in pcretest as well as in this file. */
 56
 57#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
 58
 59/* The table of operator names. */
 60
 61static const char *OP_names[] = { OP_NAME_LIST };
 62
 63
 64
 65/*************************************************
 66*       Print single- or multi-byte character    *
 67*************************************************/
 68
 69static int
 70print_char(FILE *f, uschar *ptr, BOOL utf8)
 71{
 72int c = *ptr;
 73
 74#ifndef SUPPORT_UTF8
 75utf8 = utf8;  /* Avoid compiler warning */
 76if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
 77return 0;
 78
 79#else
 80if (!utf8 || (c & 0xc0) != 0xc0)
 81  {
 82  if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
 83  return 0;
 84  }
 85else
 86  {
 87  int i;
 88  int a = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
 89  int s = 6*a;
 90  c = (c & _pcre_utf8_table3[a]) << s;
 91  for (i = 1; i <= a; i++)
 92    {
 93    /* This is a check for malformed UTF-8; it should only occur if the sanity
 94    check has been turned off. Rather than swallow random bytes, just stop if
 95    we hit a bad one. Print it with \X instead of \x as an indication. */
 96
 97    if ((ptr[i] & 0xc0) != 0x80)
 98      {
 99      fprintf(f, "\\X{%x}", c);
100      return i - 1;
101      }
102
103    /* The byte is OK */
104
105    s -= 6;
106    c |= (ptr[i] & 0x3f) << s;
107    }
108  if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
109  return a;
110  }
111#endif
112}
113
114
115
116/*************************************************
117*          Find Unicode property name            *
118*************************************************/
119
120static const char *
121get_ucpname(int ptype, int pvalue)
122{
123#ifdef SUPPORT_UCP
124int i;
125for (i = _pcre_utt_size - 1; i >= 0; i--)
126  {
127  if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
128  }
129return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
130#else
131/* It gets harder and harder to shut off unwanted compiler warnings. */
132ptype = ptype * pvalue;
133return (ptype == pvalue)? "??" : "??";
134#endif
135}
136
137
138
139/*************************************************
140*         Print compiled regex                   *
141*************************************************/
142
143/* Make this function work for a regex with integers either byte order.
144However, we assume that what we are passed is a compiled regex. The
145print_lengths flag controls whether offsets and lengths of items are printed.
146They can be turned off from pcretest so that automatic tests on bytecode can be
147written that do not depend on the value of LINK_SIZE. */
148
149static void
150pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
151{
152real_pcre *re = (real_pcre *)external_re;
153uschar *codestart, *code;
154BOOL utf8;
155
156unsigned int options = re->options;
157int offset = re->name_table_offset;
158int count = re->name_count;
159int size = re->name_entry_size;
160
161if (re->magic_number != MAGIC_NUMBER)
162  {
163  offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
164  count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
165  size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
166  options = ((options << 24) & 0xff000000) |
167            ((options <<  8) & 0x00ff0000) |
168            ((options >>  8) & 0x0000ff00) |
169            ((options >> 24) & 0x000000ff);
170  }
171
172code = codestart = (uschar *)re + offset + count * size;
173utf8 = (options & PCRE_UTF8) != 0;
174
175for(;;)
176  {
177  uschar *ccode;
178  int c;
179  int extra = 0;
180
181  if (print_lengths)
182    fprintf(f, "%3d ", (int)(code - codestart));
183  else
184    fprintf(f, "    ");
185
186  switch(*code)
187    {
188    case OP_END:
189    fprintf(f, "    %s\n", OP_names[*code]);
190    fprintf(f, "------------------------------------------------------------------\n");
191    return;
192
193    case OP_OPT:
194    fprintf(f, " %.2x %s", code[1], OP_names[*code]);
195    break;
196
197    case OP_CHAR:
198    fprintf(f, "    ");
199    do
200      {
201      code++;
202      code += 1 + print_char(f, code, utf8);
203      }
204    while (*code == OP_CHAR);
205    fprintf(f, "\n");
206    continue;
207
208    case OP_CHARNC:
209    fprintf(f, " NC ");
210    do
211      {
212      code++;
213      code += 1 + print_char(f, code, utf8);
214      }
215    while (*code == OP_CHARNC);
216    fprintf(f, "\n");
217    continue;
218
219    case OP_CBRA:
220    case OP_SCBRA:
221    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
222      else fprintf(f, "    ");
223    fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
224    break;
225
226    case OP_BRA:
227    case OP_SBRA:
228    case OP_KETRMAX:
229    case OP_KETRMIN:
230    case OP_ALT:
231    case OP_KET:
232    case OP_ASSERT:
233    case OP_ASSERT_NOT:
234    case OP_ASSERTBACK:
235    case OP_ASSERTBACK_NOT:
236    case OP_ONCE:
237    case OP_COND:
238    case OP_SCOND:
239    case OP_REVERSE:
240    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
241      else fprintf(f, "    ");
242    fprintf(f, "%s", OP_names[*code]);
243    break;
244
245    case OP_CREF:
246    fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
247    break;
248
249    case OP_RREF:
250    c = GET2(code, 1);
251    if (c == RREF_ANY)
252      fprintf(f, "    Cond recurse any");
253    else
254      fprintf(f, "    Cond recurse %d", c);
255    break;
256
257    case OP_DEF:
258    fprintf(f, "    Cond def");
259    break;
260
261    case OP_STAR:
262    case OP_MINSTAR:
263    case OP_POSSTAR:
264    case OP_PLUS:
265    case OP_MINPLUS:
266    case OP_POSPLUS:
267    case OP_QUERY:
268    case OP_MINQUERY:
269    case OP_POSQUERY:
270    case OP_TYPESTAR:
271    case OP_TYPEMINSTAR:
272    case OP_TYPEPOSSTAR:
273    case OP_TYPEPLUS:
274    case OP_TYPEMINPLUS:
275    case OP_TYPEPOSPLUS:
276    case OP_TYPEQUERY:
277    case OP_TYPEMINQUERY:
278    case OP_TYPEPOSQUERY:
279    fprintf(f, "    ");
280    if (*code >= OP_TYPESTAR)
281      {
282      fprintf(f, "%s", OP_names[code[1]]);
283      if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
284        {
285        fprintf(f, " %s ", get_ucpname(code[2], code[3]));
286        extra = 2;
287        }
288      }
289    else extra = print_char(f, code+1, utf8);
290    fprintf(f, "%s", OP_names[*code]);
291    break;
292
293    case OP_EXACT:
294    case OP_UPTO:
295    case OP_MINUPTO:
296    case OP_POSUPTO:
297    fprintf(f, "    ");
298    extra = print_char(f, code+3, utf8);
299    fprintf(f, "{");
300    if (*code != OP_EXACT) fprintf(f, "0,");
301    fprintf(f, "%d}", GET2(code,1));
302    if (*code == OP_MINUPTO) fprintf(f, "?");
303      else if (*code == OP_POSUPTO) fprintf(f, "+");
304    break;
305
306    case OP_TYPEEXACT:
307    case OP_TYPEUPTO:
308    case OP_TYPEMINUPTO:
309    case OP_TYPEPOSUPTO:
310    fprintf(f, "    %s", OP_names[code[3]]);
311    if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
312      {
313      fprintf(f, " %s ", get_ucpname(code[4], code[5]));
314      extra = 2;
315      }
316    fprintf(f, "{");
317    if (*code != OP_TYPEEXACT) fprintf(f, "0,");
318    fprintf(f, "%d}", GET2(code,1));
319    if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
320      else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
321    break;
322
323    case OP_NOT:
324    c = code[1];
325    if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
326      else fprintf(f, "    [^\\x%02x]", c);
327    break;
328
329    case OP_NOTSTAR:
330    case OP_NOTMINSTAR:
331    case OP_NOTPOSSTAR:
332    case OP_NOTPLUS:
333    case OP_NOTMINPLUS:
334    case OP_NOTPOSPLUS:
335    case OP_NOTQUERY:
336    case OP_NOTMINQUERY:
337    case OP_NOTPOSQUERY:
338    c = code[1];
339    if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
340      else fprintf(f, "    [^\\x%02x]", c);
341    fprintf(f, "%s", OP_names[*code]);
342    break;
343
344    case OP_NOTEXACT:
345    case OP_NOTUPTO:
346    case OP_NOTMINUPTO:
347    case OP_NOTPOSUPTO:
348    c = code[3];
349    if (PRINTABLE(c)) fprintf(f, "    [^%c]{", c);
350      else fprintf(f, "    [^\\x%02x]{", c);
351    if (*code != OP_NOTEXACT) fprintf(f, "0,");
352    fprintf(f, "%d}", GET2(code,1));
353    if (*code == OP_NOTMINUPTO) fprintf(f, "?");
354      else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
355    break;
356
357    case OP_RECURSE:
358    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
359      else fprintf(f, "    ");
360    fprintf(f, "%s", OP_names[*code]);
361    break;
362
363    case OP_REF:
364    fprintf(f, "    \\%d", GET2(code,1));
365    ccode = code + _pcre_OP_lengths[*code];
366    goto CLASS_REF_REPEAT;
367
368    case OP_CALLOUT:
369    fprintf(f, "    %s %d %d %d", OP_names[*code], code[1], GET(code,2),
370      GET(code, 2 + LINK_SIZE));
371    break;
372
373    case OP_PROP:
374    case OP_NOTPROP:
375    fprintf(f, "    %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
376    break;
377
378    /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
379    having this code always here, and it makes it less messy without all those
380    #ifdefs. */
381
382    case OP_CLASS:
383    case OP_NCLASS:
384    case OP_XCLASS:
385      {
386      int i, min, max;
387      BOOL printmap;
388
389      fprintf(f, "    [");
390
391      if (*code == OP_XCLASS)
392        {
393        extra = GET(code, 1);
394        ccode = code + LINK_SIZE + 1;
395        printmap = (*ccode & XCL_MAP) != 0;
396        if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
397        }
398      else
399        {
400        printmap = TRUE;
401        ccode = code + 1;
402        }
403
404      /* Print a bit map */
405
406      if (printmap)
407        {
408        for (i = 0; i < 256; i++)
409          {
410          if ((ccode[i/8] & (1 << (i&7))) != 0)
411            {
412            int j;
413            for (j = i+1; j < 256; j++)
414              if ((ccode[j/8] & (1 << (j&7))) == 0) break;
415            if (i == '-' || i == ']') fprintf(f, "\\");
416            if (PRINTABLE(i)) fprintf(f, "%c", i);
417              else fprintf(f, "\\x%02x", i);
418            if (--j > i)
419              {
420              if (j != i + 1) fprintf(f, "-");
421              if (j == '-' || j == ']') fprintf(f, "\\");
422              if (PRINTABLE(j)) fprintf(f, "%c", j);
423                else fprintf(f, "\\x%02x", j);
424              }
425            i = j;
426            }
427          }
428        ccode += 32;
429        }
430
431      /* For an XCLASS there is always some additional data */
432
433      if (*code == OP_XCLASS)
434        {
435        int ch;
436        while ((ch = *ccode++) != XCL_END)
437          {
438          if (ch == XCL_PROP)
439            {
440            int ptype = *ccode++;
441            int pvalue = *ccode++;
442            fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
443            }
444          else if (ch == XCL_NOTPROP)
445            {
446            int ptype = *ccode++;
447            int pvalue = *ccode++;
448            fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
449            }
450          else
451            {
452            ccode += 1 + print_char(f, ccode, TRUE);
453            if (ch == XCL_RANGE)
454              {
455              fprintf(f, "-");
456              ccode += 1 + print_char(f, ccode, TRUE);
457              }
458            }
459          }
460        }
461
462      /* Indicate a non-UTF8 class which was created by negation */
463
464      fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
465
466      /* Handle repeats after a class or a back reference */
467
468      CLASS_REF_REPEAT:
469      switch(*ccode)
470        {
471        case OP_CRSTAR:
472        case OP_CRMINSTAR:
473        case OP_CRPLUS:
474        case OP_CRMINPLUS:
475        case OP_CRQUERY:
476        case OP_CRMINQUERY:
477        fprintf(f, "%s", OP_names[*ccode]);
478        extra += _pcre_OP_lengths[*ccode];
479        break;
480
481        case OP_CRRANGE:
482        case OP_CRMINRANGE:
483        min = GET2(ccode,1);
484        max = GET2(ccode,3);
485        if (max == 0) fprintf(f, "{%d,}", min);
486        else fprintf(f, "{%d,%d}", min, max);
487        if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
488        extra += _pcre_OP_lengths[*ccode];
489        break;
490
491        /* Do nothing if it's not a repeat; this code stops picky compilers
492        warning about the lack of a default code path. */
493
494        default:
495        break;
496        }
497      }
498    break;
499
500    /* Anything else is just an item with no data*/
501
502    default:
503    fprintf(f, "    %s", OP_names[*code]);
504    break;
505    }
506
507  code += _pcre_OP_lengths[*code] + extra;
508  fprintf(f, "\n");
509  }
510}
511
512/* End of pcre_printint.src */