PageRenderTime 174ms CodeModel.GetById 89ms app.highlight 76ms RepoModel.GetById 1ms app.codeStats 1ms

/Objects/stringlib/string_format.h

http://unladen-swallow.googlecode.com/
C++ Header | 1275 lines | 839 code | 172 blank | 264 comment | 174 complexity | 3c585a51defc041fe51620131e5655d0 MD5 | raw file
   1/*
   2    string_format.h -- implementation of string.format().
   3
   4    It uses the Objects/stringlib conventions, so that it can be
   5    compiled for both unicode and string objects.
   6*/
   7
   8
   9/* Defines for Python 2.6 compatability */
  10#if PY_VERSION_HEX < 0x03000000
  11#define PyLong_FromSsize_t _PyLong_FromSsize_t
  12#endif
  13
  14/* Defines for more efficiently reallocating the string buffer */
  15#define INITIAL_SIZE_INCREMENT 100
  16#define SIZE_MULTIPLIER 2
  17#define MAX_SIZE_INCREMENT  3200
  18
  19
  20/************************************************************************/
  21/***********   Global data structures and forward declarations  *********/
  22/************************************************************************/
  23
  24/*
  25   A SubString consists of the characters between two string or
  26   unicode pointers.
  27*/
  28typedef struct {
  29    STRINGLIB_CHAR *ptr;
  30    STRINGLIB_CHAR *end;
  31} SubString;
  32
  33
  34/* forward declaration for recursion */
  35static PyObject *
  36build_string(SubString *input, PyObject *args, PyObject *kwargs,
  37             int recursion_depth);
  38
  39
  40
  41/************************************************************************/
  42/**************************  Utility  functions  ************************/
  43/************************************************************************/
  44
  45/* fill in a SubString from a pointer and length */
  46Py_LOCAL_INLINE(void)
  47SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
  48{
  49    str->ptr = p;
  50    if (p == NULL)
  51        str->end = NULL;
  52    else
  53        str->end = str->ptr + len;
  54}
  55
  56/* return a new string.  if str->ptr is NULL, return None */
  57Py_LOCAL_INLINE(PyObject *)
  58SubString_new_object(SubString *str)
  59{
  60    if (str->ptr == NULL) {
  61        Py_INCREF(Py_None);
  62        return Py_None;
  63    }
  64    return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
  65}
  66
  67/* return a new string.  if str->ptr is NULL, return None */
  68Py_LOCAL_INLINE(PyObject *)
  69SubString_new_object_or_empty(SubString *str)
  70{
  71    if (str->ptr == NULL) {
  72        return STRINGLIB_NEW(NULL, 0);
  73    }
  74    return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
  75}
  76
  77/************************************************************************/
  78/***********    Output string management functions       ****************/
  79/************************************************************************/
  80
  81typedef struct {
  82    STRINGLIB_CHAR *ptr;
  83    STRINGLIB_CHAR *end;
  84    PyObject *obj;
  85    Py_ssize_t size_increment;
  86} OutputString;
  87
  88/* initialize an OutputString object, reserving size characters */
  89static int
  90output_initialize(OutputString *output, Py_ssize_t size)
  91{
  92    output->obj = STRINGLIB_NEW(NULL, size);
  93    if (output->obj == NULL)
  94        return 0;
  95
  96    output->ptr = STRINGLIB_STR(output->obj);
  97    output->end = STRINGLIB_LEN(output->obj) + output->ptr;
  98    output->size_increment = INITIAL_SIZE_INCREMENT;
  99
 100    return 1;
 101}
 102
 103/*
 104    output_extend reallocates the output string buffer.
 105    It returns a status:  0 for a failed reallocation,
 106    1 for success.
 107*/
 108
 109static int
 110output_extend(OutputString *output, Py_ssize_t count)
 111{
 112    STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
 113    Py_ssize_t curlen = output->ptr - startptr;
 114    Py_ssize_t maxlen = curlen + count + output->size_increment;
 115
 116    if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
 117        return 0;
 118    startptr = STRINGLIB_STR(output->obj);
 119    output->ptr = startptr + curlen;
 120    output->end = startptr + maxlen;
 121    if (output->size_increment < MAX_SIZE_INCREMENT)
 122        output->size_increment *= SIZE_MULTIPLIER;
 123    return 1;
 124}
 125
 126/*
 127    output_data dumps characters into our output string
 128    buffer.
 129
 130    In some cases, it has to reallocate the string.
 131
 132    It returns a status:  0 for a failed reallocation,
 133    1 for success.
 134*/
 135static int
 136output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
 137{
 138    if ((count > output->end - output->ptr) && !output_extend(output, count))
 139        return 0;
 140    memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
 141    output->ptr += count;
 142    return 1;
 143}
 144
 145/************************************************************************/
 146/***********  Format string parsing -- integers and identifiers *********/
 147/************************************************************************/
 148
 149static Py_ssize_t
 150get_integer(const SubString *str)
 151{
 152    Py_ssize_t accumulator = 0;
 153    Py_ssize_t digitval;
 154    Py_ssize_t oldaccumulator;
 155    STRINGLIB_CHAR *p;
 156
 157    /* empty string is an error */
 158    if (str->ptr >= str->end)
 159        return -1;
 160
 161    for (p = str->ptr; p < str->end; p++) {
 162        digitval = STRINGLIB_TODECIMAL(*p);
 163        if (digitval < 0)
 164            return -1;
 165        /*
 166           This trick was copied from old Unicode format code.  It's cute,
 167           but would really suck on an old machine with a slow divide
 168           implementation.  Fortunately, in the normal case we do not
 169           expect too many digits.
 170        */
 171        oldaccumulator = accumulator;
 172        accumulator *= 10;
 173        if ((accumulator+10)/10 != oldaccumulator+1) {
 174            PyErr_Format(PyExc_ValueError,
 175                         "Too many decimal digits in format string");
 176            return -1;
 177        }
 178        accumulator += digitval;
 179    }
 180    return accumulator;
 181}
 182
 183/************************************************************************/
 184/******** Functions to get field objects and specification strings ******/
 185/************************************************************************/
 186
 187/* do the equivalent of obj.name */
 188static PyObject *
 189getattr(PyObject *obj, SubString *name)
 190{
 191    PyObject *newobj;
 192    PyObject *str = SubString_new_object(name);
 193    if (str == NULL)
 194        return NULL;
 195    newobj = PyObject_GetAttr(obj, str);
 196    Py_DECREF(str);
 197    return newobj;
 198}
 199
 200/* do the equivalent of obj[idx], where obj is a sequence */
 201static PyObject *
 202getitem_sequence(PyObject *obj, Py_ssize_t idx)
 203{
 204    return PySequence_GetItem(obj, idx);
 205}
 206
 207/* do the equivalent of obj[idx], where obj is not a sequence */
 208static PyObject *
 209getitem_idx(PyObject *obj, Py_ssize_t idx)
 210{
 211    PyObject *newobj;
 212    PyObject *idx_obj = PyLong_FromSsize_t(idx);
 213    if (idx_obj == NULL)
 214        return NULL;
 215    newobj = PyObject_GetItem(obj, idx_obj);
 216    Py_DECREF(idx_obj);
 217    return newobj;
 218}
 219
 220/* do the equivalent of obj[name] */
 221static PyObject *
 222getitem_str(PyObject *obj, SubString *name)
 223{
 224    PyObject *newobj;
 225    PyObject *str = SubString_new_object(name);
 226    if (str == NULL)
 227        return NULL;
 228    newobj = PyObject_GetItem(obj, str);
 229    Py_DECREF(str);
 230    return newobj;
 231}
 232
 233typedef struct {
 234    /* the entire string we're parsing.  we assume that someone else
 235       is managing its lifetime, and that it will exist for the
 236       lifetime of the iterator.  can be empty */
 237    SubString str;
 238
 239    /* pointer to where we are inside field_name */
 240    STRINGLIB_CHAR *ptr;
 241} FieldNameIterator;
 242
 243
 244static int
 245FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
 246                       Py_ssize_t len)
 247{
 248    SubString_init(&self->str, ptr, len);
 249    self->ptr = self->str.ptr;
 250    return 1;
 251}
 252
 253static int
 254_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
 255{
 256    STRINGLIB_CHAR c;
 257
 258    name->ptr = self->ptr;
 259
 260    /* return everything until '.' or '[' */
 261    while (self->ptr < self->str.end) {
 262        switch (c = *self->ptr++) {
 263        case '[':
 264        case '.':
 265            /* backup so that we this character will be seen next time */
 266            self->ptr--;
 267            break;
 268        default:
 269            continue;
 270        }
 271        break;
 272    }
 273    /* end of string is okay */
 274    name->end = self->ptr;
 275    return 1;
 276}
 277
 278static int
 279_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
 280{
 281    int bracket_seen = 0;
 282    STRINGLIB_CHAR c;
 283
 284    name->ptr = self->ptr;
 285
 286    /* return everything until ']' */
 287    while (self->ptr < self->str.end) {
 288        switch (c = *self->ptr++) {
 289        case ']':
 290            bracket_seen = 1;
 291            break;
 292        default:
 293            continue;
 294        }
 295        break;
 296    }
 297    /* make sure we ended with a ']' */
 298    if (!bracket_seen) {
 299        PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
 300        return 0;
 301    }
 302
 303    /* end of string is okay */
 304    /* don't include the ']' */
 305    name->end = self->ptr-1;
 306    return 1;
 307}
 308
 309/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
 310static int
 311FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
 312                       Py_ssize_t *name_idx, SubString *name)
 313{
 314    /* check at end of input */
 315    if (self->ptr >= self->str.end)
 316        return 1;
 317
 318    switch (*self->ptr++) {
 319    case '.':
 320        *is_attribute = 1;
 321        if (_FieldNameIterator_attr(self, name) == 0)
 322            return 0;
 323        *name_idx = -1;
 324        break;
 325    case '[':
 326        *is_attribute = 0;
 327        if (_FieldNameIterator_item(self, name) == 0)
 328            return 0;
 329        *name_idx = get_integer(name);
 330        break;
 331    default:
 332        /* Invalid character follows ']' */
 333        PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
 334                        "follow ']' in format field specifier");
 335        return 0;
 336    }
 337
 338    /* empty string is an error */
 339    if (name->ptr == name->end) {
 340        PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
 341        return 0;
 342    }
 343
 344    return 2;
 345}
 346
 347
 348/* input: field_name
 349   output: 'first' points to the part before the first '[' or '.'
 350           'first_idx' is -1 if 'first' is not an integer, otherwise
 351                       it's the value of first converted to an integer
 352           'rest' is an iterator to return the rest
 353*/
 354static int
 355field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
 356                 Py_ssize_t *first_idx, FieldNameIterator *rest)
 357{
 358    STRINGLIB_CHAR c;
 359    STRINGLIB_CHAR *p = ptr;
 360    STRINGLIB_CHAR *end = ptr + len;
 361
 362    /* find the part up until the first '.' or '[' */
 363    while (p < end) {
 364        switch (c = *p++) {
 365        case '[':
 366        case '.':
 367            /* backup so that we this character is available to the
 368               "rest" iterator */
 369            p--;
 370            break;
 371        default:
 372            continue;
 373        }
 374        break;
 375    }
 376
 377    /* set up the return values */
 378    SubString_init(first, ptr, p - ptr);
 379    FieldNameIterator_init(rest, p, end - p);
 380
 381    /* see if "first" is an integer, in which case it's used as an index */
 382    *first_idx = get_integer(first);
 383
 384    /* zero length string is an error */
 385    if (first->ptr >= first->end) {
 386        PyErr_SetString(PyExc_ValueError, "empty field name");
 387        goto error;
 388    }
 389
 390    return 1;
 391error:
 392    return 0;
 393}
 394
 395
 396/*
 397    get_field_object returns the object inside {}, before the
 398    format_spec.  It handles getindex and getattr lookups and consumes
 399    the entire input string.
 400*/
 401static PyObject *
 402get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
 403{
 404    PyObject *obj = NULL;
 405    int ok;
 406    int is_attribute;
 407    SubString name;
 408    SubString first;
 409    Py_ssize_t index;
 410    FieldNameIterator rest;
 411
 412    if (!field_name_split(input->ptr, input->end - input->ptr, &first,
 413                          &index, &rest)) {
 414        goto error;
 415    }
 416
 417    if (index == -1) {
 418        /* look up in kwargs */
 419        PyObject *key = SubString_new_object(&first);
 420        if (key == NULL)
 421            goto error;
 422        if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
 423            PyErr_SetObject(PyExc_KeyError, key);
 424            Py_DECREF(key);
 425            goto error;
 426        }
 427        Py_DECREF(key);
 428        Py_INCREF(obj);
 429    }
 430    else {
 431        /* look up in args */
 432        obj = PySequence_GetItem(args, index);
 433        if (obj == NULL)
 434            goto error;
 435    }
 436
 437    /* iterate over the rest of the field_name */
 438    while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
 439                                        &name)) == 2) {
 440        PyObject *tmp;
 441
 442        if (is_attribute)
 443            /* getattr lookup "." */
 444            tmp = getattr(obj, &name);
 445        else
 446            /* getitem lookup "[]" */
 447            if (index == -1)
 448                tmp = getitem_str(obj, &name);
 449            else
 450                if (PySequence_Check(obj))
 451                    tmp = getitem_sequence(obj, index);
 452                else
 453                    /* not a sequence */
 454                    tmp = getitem_idx(obj, index);
 455        if (tmp == NULL)
 456            goto error;
 457
 458        /* assign to obj */
 459        Py_DECREF(obj);
 460        obj = tmp;
 461    }
 462    /* end of iterator, this is the non-error case */
 463    if (ok == 1)
 464        return obj;
 465error:
 466    Py_XDECREF(obj);
 467    return NULL;
 468}
 469
 470/************************************************************************/
 471/*****************  Field rendering functions  **************************/
 472/************************************************************************/
 473
 474/*
 475    render_field() is the main function in this section.  It takes the
 476    field object and field specification string generated by
 477    get_field_and_spec, and renders the field into the output string.
 478
 479    render_field calls fieldobj.__format__(format_spec) method, and
 480    appends to the output.
 481*/
 482static int
 483render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
 484{
 485    int ok = 0;
 486    PyObject *result = NULL;
 487    PyObject *format_spec_object = NULL;
 488    PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
 489    STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
 490	    format_spec->ptr : NULL;
 491    Py_ssize_t format_spec_len = format_spec->ptr ?
 492	    format_spec->end - format_spec->ptr : 0;
 493
 494    /* If we know the type exactly, skip the lookup of __format__ and just
 495       call the formatter directly. */
 496#if STRINGLIB_IS_UNICODE
 497    if (PyUnicode_CheckExact(fieldobj))
 498	formatter = _PyUnicode_FormatAdvanced;
 499    /* Unfortunately, there's a problem with checking for int, long,
 500       and float here.  If we're being included as unicode, their
 501       formatters expect string format_spec args.  For now, just skip
 502       this optimization for unicode.  This could be fixed, but it's a
 503       hassle. */
 504#else
 505    if (PyString_CheckExact(fieldobj))
 506	formatter = _PyBytes_FormatAdvanced;
 507    else if (PyInt_CheckExact(fieldobj))
 508	formatter =_PyInt_FormatAdvanced;
 509    else if (PyLong_CheckExact(fieldobj))
 510	formatter =_PyLong_FormatAdvanced;
 511    else if (PyFloat_CheckExact(fieldobj))
 512	formatter = _PyFloat_FormatAdvanced;
 513#endif
 514
 515    if (formatter) {
 516	/* we know exactly which formatter will be called when __format__ is
 517	   looked up, so call it directly, instead. */
 518	result = formatter(fieldobj, format_spec_start, format_spec_len);
 519    }
 520    else {
 521	/* We need to create an object out of the pointers we have, because
 522	   __format__ takes a string/unicode object for format_spec. */
 523	format_spec_object = STRINGLIB_NEW(format_spec_start,
 524					   format_spec_len);
 525	if (format_spec_object == NULL)
 526	    goto done;
 527
 528	result = PyObject_Format(fieldobj, format_spec_object);
 529    }
 530    if (result == NULL)
 531        goto done;
 532
 533#if PY_VERSION_HEX >= 0x03000000
 534    assert(PyUnicode_Check(result));
 535#else
 536    assert(PyString_Check(result) || PyUnicode_Check(result));
 537
 538    /* Convert result to our type.  We could be str, and result could
 539       be unicode */
 540    {
 541	PyObject *tmp = STRINGLIB_TOSTR(result);
 542	if (tmp == NULL)
 543	    goto done;
 544	Py_DECREF(result);
 545	result = tmp;
 546    }
 547#endif
 548
 549    ok = output_data(output,
 550                     STRINGLIB_STR(result), STRINGLIB_LEN(result));
 551done:
 552    Py_XDECREF(format_spec_object);
 553    Py_XDECREF(result);
 554    return ok;
 555}
 556
 557static int
 558parse_field(SubString *str, SubString *field_name, SubString *format_spec,
 559            STRINGLIB_CHAR *conversion)
 560{
 561    STRINGLIB_CHAR c = 0;
 562
 563    /* initialize these, as they may be empty */
 564    *conversion = '\0';
 565    SubString_init(format_spec, NULL, 0);
 566
 567    /* search for the field name.  it's terminated by the end of the
 568       string, or a ':' or '!' */
 569    field_name->ptr = str->ptr;
 570    while (str->ptr < str->end) {
 571        switch (c = *(str->ptr++)) {
 572        case ':':
 573        case '!':
 574            break;
 575        default:
 576            continue;
 577        }
 578        break;
 579    }
 580
 581    if (c == '!' || c == ':') {
 582        /* we have a format specifier and/or a conversion */
 583        /* don't include the last character */
 584        field_name->end = str->ptr-1;
 585
 586        /* the format specifier is the rest of the string */
 587        format_spec->ptr = str->ptr;
 588        format_spec->end = str->end;
 589
 590        /* see if there's a conversion specifier */
 591        if (c == '!') {
 592            /* there must be another character present */
 593            if (format_spec->ptr >= format_spec->end) {
 594                PyErr_SetString(PyExc_ValueError,
 595                                "end of format while looking for conversion "
 596                                "specifier");
 597                return 0;
 598            }
 599            *conversion = *(format_spec->ptr++);
 600
 601            /* if there is another character, it must be a colon */
 602            if (format_spec->ptr < format_spec->end) {
 603                c = *(format_spec->ptr++);
 604                if (c != ':') {
 605                    PyErr_SetString(PyExc_ValueError,
 606                                    "expected ':' after format specifier");
 607                    return 0;
 608                }
 609            }
 610        }
 611
 612        return 1;
 613
 614    }
 615    else {
 616        /* end of string, there's no format_spec or conversion */
 617        field_name->end = str->ptr;
 618        return 1;
 619    }
 620}
 621
 622/************************************************************************/
 623/******* Output string allocation and escape-to-markup processing  ******/
 624/************************************************************************/
 625
 626/* MarkupIterator breaks the string into pieces of either literal
 627   text, or things inside {} that need to be marked up.  it is
 628   designed to make it easy to wrap a Python iterator around it, for
 629   use with the Formatter class */
 630
 631typedef struct {
 632    SubString str;
 633} MarkupIterator;
 634
 635static int
 636MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
 637{
 638    SubString_init(&self->str, ptr, len);
 639    return 1;
 640}
 641
 642/* returns 0 on error, 1 on non-error termination, and 2 if it got a
 643   string (or something to be expanded) */
 644static int
 645MarkupIterator_next(MarkupIterator *self, SubString *literal,
 646                    SubString *field_name, SubString *format_spec,
 647                    STRINGLIB_CHAR *conversion,
 648                    int *format_spec_needs_expanding)
 649{
 650    int at_end;
 651    STRINGLIB_CHAR c = 0;
 652    STRINGLIB_CHAR *start;
 653    int count;
 654    Py_ssize_t len;
 655    int markup_follows = 0;
 656
 657    /* initialize all of the output variables */
 658    SubString_init(literal, NULL, 0);
 659    SubString_init(field_name, NULL, 0);
 660    SubString_init(format_spec, NULL, 0);
 661    *conversion = '\0';
 662    *format_spec_needs_expanding = 0;
 663
 664    /* No more input, end of iterator.  This is the normal exit
 665       path. */
 666    if (self->str.ptr >= self->str.end)
 667        return 1;
 668
 669    start = self->str.ptr;
 670
 671    /* First read any literal text. Read until the end of string, an
 672       escaped '{' or '}', or an unescaped '{'.  In order to never
 673       allocate memory and so I can just pass pointers around, if
 674       there's an escaped '{' or '}' then we'll return the literal
 675       including the brace, but no format object.  The next time
 676       through, we'll return the rest of the literal, skipping past
 677       the second consecutive brace. */
 678    while (self->str.ptr < self->str.end) {
 679        switch (c = *(self->str.ptr++)) {
 680        case '{':
 681        case '}':
 682            markup_follows = 1;
 683            break;
 684        default:
 685            continue;
 686        }
 687        break;
 688    }
 689
 690    at_end = self->str.ptr >= self->str.end;
 691    len = self->str.ptr - start;
 692
 693    if ((c == '}') && (at_end || (c != *self->str.ptr))) {
 694        PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
 695                        "in format string");
 696        return 0;
 697    }
 698    if (at_end && c == '{') {
 699        PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
 700                        "in format string");
 701        return 0;
 702    }
 703    if (!at_end) {
 704        if (c == *self->str.ptr) {
 705            /* escaped } or {, skip it in the input.  there is no
 706               markup object following us, just this literal text */
 707            self->str.ptr++;
 708            markup_follows = 0;
 709        }
 710        else
 711            len--;
 712    }
 713
 714    /* record the literal text */
 715    literal->ptr = start;
 716    literal->end = start + len;
 717
 718    if (!markup_follows)
 719        return 2;
 720
 721    /* this is markup, find the end of the string by counting nested
 722       braces.  note that this prohibits escaped braces, so that
 723       format_specs cannot have braces in them. */
 724    count = 1;
 725
 726    start = self->str.ptr;
 727
 728    /* we know we can't have a zero length string, so don't worry
 729       about that case */
 730    while (self->str.ptr < self->str.end) {
 731        switch (c = *(self->str.ptr++)) {
 732        case '{':
 733            /* the format spec needs to be recursively expanded.
 734               this is an optimization, and not strictly needed */
 735            *format_spec_needs_expanding = 1;
 736            count++;
 737            break;
 738        case '}':
 739            count--;
 740            if (count <= 0) {
 741                /* we're done.  parse and get out */
 742                SubString s;
 743
 744                SubString_init(&s, start, self->str.ptr - 1 - start);
 745                if (parse_field(&s, field_name, format_spec, conversion) == 0)
 746                    return 0;
 747
 748                /* a zero length field_name is an error */
 749                if (field_name->ptr == field_name->end) {
 750                    PyErr_SetString(PyExc_ValueError, "zero length field name "
 751                                    "in format");
 752                    return 0;
 753                }
 754
 755                /* success */
 756                return 2;
 757            }
 758            break;
 759        }
 760    }
 761
 762    /* end of string while searching for matching '}' */
 763    PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
 764    return 0;
 765}
 766
 767
 768/* do the !r or !s conversion on obj */
 769static PyObject *
 770do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
 771{
 772    /* XXX in pre-3.0, do we need to convert this to unicode, since it
 773       might have returned a string? */
 774    switch (conversion) {
 775    case 'r':
 776        return PyObject_Repr(obj);
 777    case 's':
 778        return STRINGLIB_TOSTR(obj);
 779    default:
 780	if (conversion > 32 && conversion < 127) {
 781		/* It's the ASCII subrange; casting to char is safe
 782		   (assuming the execution character set is an ASCII
 783		   superset). */
 784        	PyErr_Format(PyExc_ValueError,
 785                     "Unknown conversion specifier %c",
 786                     (char)conversion);
 787	} else
 788		PyErr_Format(PyExc_ValueError,
 789		     "Unknown conversion specifier \\x%x",
 790		     (unsigned int)conversion);
 791        return NULL;
 792    }
 793}
 794
 795/* given:
 796
 797   {field_name!conversion:format_spec}
 798
 799   compute the result and write it to output.
 800   format_spec_needs_expanding is an optimization.  if it's false,
 801   just output the string directly, otherwise recursively expand the
 802   format_spec string. */
 803
 804static int
 805output_markup(SubString *field_name, SubString *format_spec,
 806              int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
 807              OutputString *output, PyObject *args, PyObject *kwargs,
 808              int recursion_depth)
 809{
 810    PyObject *tmp = NULL;
 811    PyObject *fieldobj = NULL;
 812    SubString expanded_format_spec;
 813    SubString *actual_format_spec;
 814    int result = 0;
 815
 816    /* convert field_name to an object */
 817    fieldobj = get_field_object(field_name, args, kwargs);
 818    if (fieldobj == NULL)
 819        goto done;
 820
 821    if (conversion != '\0') {
 822        tmp = do_conversion(fieldobj, conversion);
 823        if (tmp == NULL)
 824            goto done;
 825
 826        /* do the assignment, transferring ownership: fieldobj = tmp */
 827        Py_DECREF(fieldobj);
 828        fieldobj = tmp;
 829        tmp = NULL;
 830    }
 831
 832    /* if needed, recurively compute the format_spec */
 833    if (format_spec_needs_expanding) {
 834        tmp = build_string(format_spec, args, kwargs, recursion_depth-1);
 835        if (tmp == NULL)
 836            goto done;
 837
 838        /* note that in the case we're expanding the format string,
 839           tmp must be kept around until after the call to
 840           render_field. */
 841        SubString_init(&expanded_format_spec,
 842                       STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
 843        actual_format_spec = &expanded_format_spec;
 844    }
 845    else
 846        actual_format_spec = format_spec;
 847
 848    if (render_field(fieldobj, actual_format_spec, output) == 0)
 849        goto done;
 850
 851    result = 1;
 852
 853done:
 854    Py_XDECREF(fieldobj);
 855    Py_XDECREF(tmp);
 856
 857    return result;
 858}
 859
 860/*
 861    do_markup is the top-level loop for the format() method.  It
 862    searches through the format string for escapes to markup codes, and
 863    calls other functions to move non-markup text to the output,
 864    and to perform the markup to the output.
 865*/
 866static int
 867do_markup(SubString *input, PyObject *args, PyObject *kwargs,
 868          OutputString *output, int recursion_depth)
 869{
 870    MarkupIterator iter;
 871    int format_spec_needs_expanding;
 872    int result;
 873    SubString literal;
 874    SubString field_name;
 875    SubString format_spec;
 876    STRINGLIB_CHAR conversion;
 877
 878    MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
 879    while ((result = MarkupIterator_next(&iter, &literal, &field_name,
 880                                         &format_spec, &conversion,
 881                                         &format_spec_needs_expanding)) == 2) {
 882        if (!output_data(output, literal.ptr, literal.end - literal.ptr))
 883            return 0;
 884        if (field_name.ptr != field_name.end)
 885            if (!output_markup(&field_name, &format_spec,
 886                               format_spec_needs_expanding, conversion, output,
 887                               args, kwargs, recursion_depth))
 888                return 0;
 889    }
 890    return result;
 891}
 892
 893
 894/*
 895    build_string allocates the output string and then
 896    calls do_markup to do the heavy lifting.
 897*/
 898static PyObject *
 899build_string(SubString *input, PyObject *args, PyObject *kwargs,
 900             int recursion_depth)
 901{
 902    OutputString output;
 903    PyObject *result = NULL;
 904    Py_ssize_t count;
 905
 906    output.obj = NULL; /* needed so cleanup code always works */
 907
 908    /* check the recursion level */
 909    if (recursion_depth <= 0) {
 910        PyErr_SetString(PyExc_ValueError,
 911                        "Max string recursion exceeded");
 912        goto done;
 913    }
 914
 915    /* initial size is the length of the format string, plus the size
 916       increment.  seems like a reasonable default */
 917    if (!output_initialize(&output,
 918                           input->end - input->ptr +
 919                           INITIAL_SIZE_INCREMENT))
 920        goto done;
 921
 922    if (!do_markup(input, args, kwargs, &output, recursion_depth)) {
 923        goto done;
 924    }
 925
 926    count = output.ptr - STRINGLIB_STR(output.obj);
 927    if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
 928        goto done;
 929    }
 930
 931    /* transfer ownership to result */
 932    result = output.obj;
 933    output.obj = NULL;
 934
 935done:
 936    Py_XDECREF(output.obj);
 937    return result;
 938}
 939
 940/************************************************************************/
 941/*********** main routine ***********************************************/
 942/************************************************************************/
 943
 944/* this is the main entry point */
 945static PyObject *
 946do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
 947{
 948    SubString input;
 949
 950    /* PEP 3101 says only 2 levels, so that
 951       "{0:{1}}".format('abc', 's')            # works
 952       "{0:{1:{2}}}".format('abc', 's', '')    # fails
 953    */
 954    int recursion_depth = 2;
 955
 956    SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
 957    return build_string(&input, args, kwargs, recursion_depth);
 958}
 959
 960
 961
 962/************************************************************************/
 963/*********** formatteriterator ******************************************/
 964/************************************************************************/
 965
 966/* This is used to implement string.Formatter.vparse().  It exists so
 967   Formatter can share code with the built in unicode.format() method.
 968   It's really just a wrapper around MarkupIterator that is callable
 969   from Python. */
 970
 971typedef struct {
 972    PyObject_HEAD
 973
 974    STRINGLIB_OBJECT *str;
 975
 976    MarkupIterator it_markup;
 977} formatteriterobject;
 978
 979static void
 980formatteriter_dealloc(formatteriterobject *it)
 981{
 982    Py_XDECREF(it->str);
 983    PyObject_FREE(it);
 984}
 985
 986/* returns a tuple:
 987   (literal, field_name, format_spec, conversion)
 988
 989   literal is any literal text to output.  might be zero length
 990   field_name is the string before the ':'.  might be None
 991   format_spec is the string after the ':'.  mibht be None
 992   conversion is either None, or the string after the '!'
 993*/
 994static PyObject *
 995formatteriter_next(formatteriterobject *it)
 996{
 997    SubString literal;
 998    SubString field_name;
 999    SubString format_spec;
1000    STRINGLIB_CHAR conversion;
1001    int format_spec_needs_expanding;
1002    int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
1003                                     &format_spec, &conversion,
1004                                     &format_spec_needs_expanding);
1005
1006    /* all of the SubString objects point into it->str, so no
1007       memory management needs to be done on them */
1008    assert(0 <= result && result <= 2);
1009    if (result == 0 || result == 1)
1010        /* if 0, error has already been set, if 1, iterator is empty */
1011        return NULL;
1012    else {
1013        PyObject *literal_str = NULL;
1014        PyObject *field_name_str = NULL;
1015        PyObject *format_spec_str = NULL;
1016        PyObject *conversion_str = NULL;
1017        PyObject *tuple = NULL;
1018        int has_field = field_name.ptr != field_name.end;
1019
1020        literal_str = SubString_new_object(&literal);
1021        if (literal_str == NULL)
1022            goto done;
1023
1024        field_name_str = SubString_new_object(&field_name);
1025        if (field_name_str == NULL)
1026            goto done;
1027
1028        /* if field_name is non-zero length, return a string for
1029           format_spec (even if zero length), else return None */
1030        format_spec_str = (has_field ?
1031                           SubString_new_object_or_empty :
1032                           SubString_new_object)(&format_spec);
1033        if (format_spec_str == NULL)
1034            goto done;
1035
1036        /* if the conversion is not specified, return a None,
1037           otherwise create a one length string with the conversion
1038           character */
1039        if (conversion == '\0') {
1040            conversion_str = Py_None;
1041            Py_INCREF(conversion_str);
1042        }
1043        else
1044	    conversion_str = STRINGLIB_NEW(&conversion, 1);
1045        if (conversion_str == NULL)
1046            goto done;
1047
1048        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1049                             conversion_str);
1050    done:
1051        Py_XDECREF(literal_str);
1052        Py_XDECREF(field_name_str);
1053        Py_XDECREF(format_spec_str);
1054        Py_XDECREF(conversion_str);
1055        return tuple;
1056    }
1057}
1058
1059static PyMethodDef formatteriter_methods[] = {
1060    {NULL,		NULL}		/* sentinel */
1061};
1062
1063static PyTypeObject PyFormatterIter_Type = {
1064    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1065    "formatteriterator",		/* tp_name */
1066    sizeof(formatteriterobject),	/* tp_basicsize */
1067    0,					/* tp_itemsize */
1068    /* methods */
1069    (destructor)formatteriter_dealloc,	/* tp_dealloc */
1070    0,					/* tp_print */
1071    0,					/* tp_getattr */
1072    0,					/* tp_setattr */
1073    0,					/* tp_compare */
1074    0,					/* tp_repr */
1075    0,					/* tp_as_number */
1076    0,					/* tp_as_sequence */
1077    0,					/* tp_as_mapping */
1078    0,					/* tp_hash */
1079    0,					/* tp_call */
1080    0,					/* tp_str */
1081    PyObject_GenericGetAttr,		/* tp_getattro */
1082    0,					/* tp_setattro */
1083    0,					/* tp_as_buffer */
1084    Py_TPFLAGS_DEFAULT,			/* tp_flags */
1085    0,					/* tp_doc */
1086    0,					/* tp_traverse */
1087    0,					/* tp_clear */
1088    0,					/* tp_richcompare */
1089    0,					/* tp_weaklistoffset */
1090    PyObject_SelfIter,			/* tp_iter */
1091    (iternextfunc)formatteriter_next,	/* tp_iternext */
1092    formatteriter_methods,		/* tp_methods */
1093    0,
1094};
1095
1096/* unicode_formatter_parser is used to implement
1097   string.Formatter.vformat.  it parses a string and returns tuples
1098   describing the parsed elements.  It's a wrapper around
1099   stringlib/string_format.h's MarkupIterator */
1100static PyObject *
1101formatter_parser(STRINGLIB_OBJECT *self)
1102{
1103    formatteriterobject *it;
1104
1105    it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1106    if (it == NULL)
1107        return NULL;
1108
1109    /* take ownership, give the object to the iterator */
1110    Py_INCREF(self);
1111    it->str = self;
1112
1113    /* initialize the contained MarkupIterator */
1114    MarkupIterator_init(&it->it_markup,
1115                        STRINGLIB_STR(self),
1116                        STRINGLIB_LEN(self));
1117
1118    return (PyObject *)it;
1119}
1120
1121
1122/************************************************************************/
1123/*********** fieldnameiterator ******************************************/
1124/************************************************************************/
1125
1126
1127/* This is used to implement string.Formatter.vparse().  It parses the
1128   field name into attribute and item values.  It's a Python-callable
1129   wrapper around FieldNameIterator */
1130
1131typedef struct {
1132    PyObject_HEAD
1133
1134    STRINGLIB_OBJECT *str;
1135
1136    FieldNameIterator it_field;
1137} fieldnameiterobject;
1138
1139static void
1140fieldnameiter_dealloc(fieldnameiterobject *it)
1141{
1142    Py_XDECREF(it->str);
1143    PyObject_FREE(it);
1144}
1145
1146/* returns a tuple:
1147   (is_attr, value)
1148   is_attr is true if we used attribute syntax (e.g., '.foo')
1149              false if we used index syntax (e.g., '[foo]')
1150   value is an integer or string
1151*/
1152static PyObject *
1153fieldnameiter_next(fieldnameiterobject *it)
1154{
1155    int result;
1156    int is_attr;
1157    Py_ssize_t idx;
1158    SubString name;
1159
1160    result = FieldNameIterator_next(&it->it_field, &is_attr,
1161                                    &idx, &name);
1162    if (result == 0 || result == 1)
1163        /* if 0, error has already been set, if 1, iterator is empty */
1164        return NULL;
1165    else {
1166        PyObject* result = NULL;
1167        PyObject* is_attr_obj = NULL;
1168        PyObject* obj = NULL;
1169
1170        is_attr_obj = PyBool_FromLong(is_attr);
1171        if (is_attr_obj == NULL)
1172            goto done;
1173
1174        /* either an integer or a string */
1175        if (idx != -1)
1176            obj = PyLong_FromSsize_t(idx);
1177        else
1178            obj = SubString_new_object(&name);
1179        if (obj == NULL)
1180            goto done;
1181
1182        /* return a tuple of values */
1183        result = PyTuple_Pack(2, is_attr_obj, obj);
1184
1185    done:
1186        Py_XDECREF(is_attr_obj);
1187        Py_XDECREF(obj);
1188        return result;
1189    }
1190}
1191
1192static PyMethodDef fieldnameiter_methods[] = {
1193    {NULL,		NULL}		/* sentinel */
1194};
1195
1196static PyTypeObject PyFieldNameIter_Type = {
1197    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1198    "fieldnameiterator",		/* tp_name */
1199    sizeof(fieldnameiterobject),	/* tp_basicsize */
1200    0,					/* tp_itemsize */
1201    /* methods */
1202    (destructor)fieldnameiter_dealloc,	/* tp_dealloc */
1203    0,					/* tp_print */
1204    0,					/* tp_getattr */
1205    0,					/* tp_setattr */
1206    0,					/* tp_compare */
1207    0,					/* tp_repr */
1208    0,					/* tp_as_number */
1209    0,					/* tp_as_sequence */
1210    0,					/* tp_as_mapping */
1211    0,					/* tp_hash */
1212    0,					/* tp_call */
1213    0,					/* tp_str */
1214    PyObject_GenericGetAttr,		/* tp_getattro */
1215    0,					/* tp_setattro */
1216    0,					/* tp_as_buffer */
1217    Py_TPFLAGS_DEFAULT,			/* tp_flags */
1218    0,					/* tp_doc */
1219    0,					/* tp_traverse */
1220    0,					/* tp_clear */
1221    0,					/* tp_richcompare */
1222    0,					/* tp_weaklistoffset */
1223    PyObject_SelfIter,			/* tp_iter */
1224    (iternextfunc)fieldnameiter_next,	/* tp_iternext */
1225    fieldnameiter_methods,		/* tp_methods */
1226    0};
1227
1228/* unicode_formatter_field_name_split is used to implement
1229   string.Formatter.vformat.  it takes an PEP 3101 "field name", and
1230   returns a tuple of (first, rest): "first", the part before the
1231   first '.' or '['; and "rest", an iterator for the rest of the field
1232   name.  it's a wrapper around stringlib/string_format.h's
1233   field_name_split.  The iterator it returns is a
1234   FieldNameIterator */
1235static PyObject *
1236formatter_field_name_split(STRINGLIB_OBJECT *self)
1237{
1238    SubString first;
1239    Py_ssize_t first_idx;
1240    fieldnameiterobject *it;
1241
1242    PyObject *first_obj = NULL;
1243    PyObject *result = NULL;
1244
1245    it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1246    if (it == NULL)
1247        return NULL;
1248
1249    /* take ownership, give the object to the iterator.  this is
1250       just to keep the field_name alive */
1251    Py_INCREF(self);
1252    it->str = self;
1253
1254    if (!field_name_split(STRINGLIB_STR(self),
1255                          STRINGLIB_LEN(self),
1256                          &first, &first_idx, &it->it_field))
1257        goto done;
1258
1259    /* first becomes an integer, if possible; else a string */
1260    if (first_idx != -1)
1261        first_obj = PyLong_FromSsize_t(first_idx);
1262    else
1263        /* convert "first" into a string object */
1264        first_obj = SubString_new_object(&first);
1265    if (first_obj == NULL)
1266        goto done;
1267
1268    /* return a tuple of values */
1269    result = PyTuple_Pack(2, first_obj, it);
1270
1271done:
1272    Py_XDECREF(it);
1273    Py_XDECREF(first_obj);
1274    return result;
1275}