/Objects/stringlib/string_format.h
C++ Header | 1275 lines | 839 code | 172 blank | 264 comment | 174 complexity | 3c585a51defc041fe51620131e5655d0 MD5 | raw file
1/* 2 string_format.h -- implementation of string.format(). 3 4 It uses the Objects/stringlib conventions, so that it can be 5 compiled for both unicode and string objects. 6*/ 7 8 9/* Defines for Python 2.6 compatability */ 10#if PY_VERSION_HEX < 0x03000000 11#define PyLong_FromSsize_t _PyLong_FromSsize_t 12#endif 13 14/* Defines for more efficiently reallocating the string buffer */ 15#define INITIAL_SIZE_INCREMENT 100 16#define SIZE_MULTIPLIER 2 17#define MAX_SIZE_INCREMENT 3200 18 19 20/************************************************************************/ 21/*********** Global data structures and forward declarations *********/ 22/************************************************************************/ 23 24/* 25 A SubString consists of the characters between two string or 26 unicode pointers. 27*/ 28typedef struct { 29 STRINGLIB_CHAR *ptr; 30 STRINGLIB_CHAR *end; 31} SubString; 32 33 34/* forward declaration for recursion */ 35static PyObject * 36build_string(SubString *input, PyObject *args, PyObject *kwargs, 37 int recursion_depth); 38 39 40 41/************************************************************************/ 42/************************** Utility functions ************************/ 43/************************************************************************/ 44 45/* fill in a SubString from a pointer and length */ 46Py_LOCAL_INLINE(void) 47SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len) 48{ 49 str->ptr = p; 50 if (p == NULL) 51 str->end = NULL; 52 else 53 str->end = str->ptr + len; 54} 55 56/* return a new string. if str->ptr is NULL, return None */ 57Py_LOCAL_INLINE(PyObject *) 58SubString_new_object(SubString *str) 59{ 60 if (str->ptr == NULL) { 61 Py_INCREF(Py_None); 62 return Py_None; 63 } 64 return STRINGLIB_NEW(str->ptr, str->end - str->ptr); 65} 66 67/* return a new string. if str->ptr is NULL, return None */ 68Py_LOCAL_INLINE(PyObject *) 69SubString_new_object_or_empty(SubString *str) 70{ 71 if (str->ptr == NULL) { 72 return STRINGLIB_NEW(NULL, 0); 73 } 74 return STRINGLIB_NEW(str->ptr, str->end - str->ptr); 75} 76 77/************************************************************************/ 78/*********** Output string management functions ****************/ 79/************************************************************************/ 80 81typedef struct { 82 STRINGLIB_CHAR *ptr; 83 STRINGLIB_CHAR *end; 84 PyObject *obj; 85 Py_ssize_t size_increment; 86} OutputString; 87 88/* initialize an OutputString object, reserving size characters */ 89static int 90output_initialize(OutputString *output, Py_ssize_t size) 91{ 92 output->obj = STRINGLIB_NEW(NULL, size); 93 if (output->obj == NULL) 94 return 0; 95 96 output->ptr = STRINGLIB_STR(output->obj); 97 output->end = STRINGLIB_LEN(output->obj) + output->ptr; 98 output->size_increment = INITIAL_SIZE_INCREMENT; 99 100 return 1; 101} 102 103/* 104 output_extend reallocates the output string buffer. 105 It returns a status: 0 for a failed reallocation, 106 1 for success. 107*/ 108 109static int 110output_extend(OutputString *output, Py_ssize_t count) 111{ 112 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj); 113 Py_ssize_t curlen = output->ptr - startptr; 114 Py_ssize_t maxlen = curlen + count + output->size_increment; 115 116 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0) 117 return 0; 118 startptr = STRINGLIB_STR(output->obj); 119 output->ptr = startptr + curlen; 120 output->end = startptr + maxlen; 121 if (output->size_increment < MAX_SIZE_INCREMENT) 122 output->size_increment *= SIZE_MULTIPLIER; 123 return 1; 124} 125 126/* 127 output_data dumps characters into our output string 128 buffer. 129 130 In some cases, it has to reallocate the string. 131 132 It returns a status: 0 for a failed reallocation, 133 1 for success. 134*/ 135static int 136output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count) 137{ 138 if ((count > output->end - output->ptr) && !output_extend(output, count)) 139 return 0; 140 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR)); 141 output->ptr += count; 142 return 1; 143} 144 145/************************************************************************/ 146/*********** Format string parsing -- integers and identifiers *********/ 147/************************************************************************/ 148 149static Py_ssize_t 150get_integer(const SubString *str) 151{ 152 Py_ssize_t accumulator = 0; 153 Py_ssize_t digitval; 154 Py_ssize_t oldaccumulator; 155 STRINGLIB_CHAR *p; 156 157 /* empty string is an error */ 158 if (str->ptr >= str->end) 159 return -1; 160 161 for (p = str->ptr; p < str->end; p++) { 162 digitval = STRINGLIB_TODECIMAL(*p); 163 if (digitval < 0) 164 return -1; 165 /* 166 This trick was copied from old Unicode format code. It's cute, 167 but would really suck on an old machine with a slow divide 168 implementation. Fortunately, in the normal case we do not 169 expect too many digits. 170 */ 171 oldaccumulator = accumulator; 172 accumulator *= 10; 173 if ((accumulator+10)/10 != oldaccumulator+1) { 174 PyErr_Format(PyExc_ValueError, 175 "Too many decimal digits in format string"); 176 return -1; 177 } 178 accumulator += digitval; 179 } 180 return accumulator; 181} 182 183/************************************************************************/ 184/******** Functions to get field objects and specification strings ******/ 185/************************************************************************/ 186 187/* do the equivalent of obj.name */ 188static PyObject * 189getattr(PyObject *obj, SubString *name) 190{ 191 PyObject *newobj; 192 PyObject *str = SubString_new_object(name); 193 if (str == NULL) 194 return NULL; 195 newobj = PyObject_GetAttr(obj, str); 196 Py_DECREF(str); 197 return newobj; 198} 199 200/* do the equivalent of obj[idx], where obj is a sequence */ 201static PyObject * 202getitem_sequence(PyObject *obj, Py_ssize_t idx) 203{ 204 return PySequence_GetItem(obj, idx); 205} 206 207/* do the equivalent of obj[idx], where obj is not a sequence */ 208static PyObject * 209getitem_idx(PyObject *obj, Py_ssize_t idx) 210{ 211 PyObject *newobj; 212 PyObject *idx_obj = PyLong_FromSsize_t(idx); 213 if (idx_obj == NULL) 214 return NULL; 215 newobj = PyObject_GetItem(obj, idx_obj); 216 Py_DECREF(idx_obj); 217 return newobj; 218} 219 220/* do the equivalent of obj[name] */ 221static PyObject * 222getitem_str(PyObject *obj, SubString *name) 223{ 224 PyObject *newobj; 225 PyObject *str = SubString_new_object(name); 226 if (str == NULL) 227 return NULL; 228 newobj = PyObject_GetItem(obj, str); 229 Py_DECREF(str); 230 return newobj; 231} 232 233typedef struct { 234 /* the entire string we're parsing. we assume that someone else 235 is managing its lifetime, and that it will exist for the 236 lifetime of the iterator. can be empty */ 237 SubString str; 238 239 /* pointer to where we are inside field_name */ 240 STRINGLIB_CHAR *ptr; 241} FieldNameIterator; 242 243 244static int 245FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr, 246 Py_ssize_t len) 247{ 248 SubString_init(&self->str, ptr, len); 249 self->ptr = self->str.ptr; 250 return 1; 251} 252 253static int 254_FieldNameIterator_attr(FieldNameIterator *self, SubString *name) 255{ 256 STRINGLIB_CHAR c; 257 258 name->ptr = self->ptr; 259 260 /* return everything until '.' or '[' */ 261 while (self->ptr < self->str.end) { 262 switch (c = *self->ptr++) { 263 case '[': 264 case '.': 265 /* backup so that we this character will be seen next time */ 266 self->ptr--; 267 break; 268 default: 269 continue; 270 } 271 break; 272 } 273 /* end of string is okay */ 274 name->end = self->ptr; 275 return 1; 276} 277 278static int 279_FieldNameIterator_item(FieldNameIterator *self, SubString *name) 280{ 281 int bracket_seen = 0; 282 STRINGLIB_CHAR c; 283 284 name->ptr = self->ptr; 285 286 /* return everything until ']' */ 287 while (self->ptr < self->str.end) { 288 switch (c = *self->ptr++) { 289 case ']': 290 bracket_seen = 1; 291 break; 292 default: 293 continue; 294 } 295 break; 296 } 297 /* make sure we ended with a ']' */ 298 if (!bracket_seen) { 299 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string"); 300 return 0; 301 } 302 303 /* end of string is okay */ 304 /* don't include the ']' */ 305 name->end = self->ptr-1; 306 return 1; 307} 308 309/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ 310static int 311FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, 312 Py_ssize_t *name_idx, SubString *name) 313{ 314 /* check at end of input */ 315 if (self->ptr >= self->str.end) 316 return 1; 317 318 switch (*self->ptr++) { 319 case '.': 320 *is_attribute = 1; 321 if (_FieldNameIterator_attr(self, name) == 0) 322 return 0; 323 *name_idx = -1; 324 break; 325 case '[': 326 *is_attribute = 0; 327 if (_FieldNameIterator_item(self, name) == 0) 328 return 0; 329 *name_idx = get_integer(name); 330 break; 331 default: 332 /* Invalid character follows ']' */ 333 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may " 334 "follow ']' in format field specifier"); 335 return 0; 336 } 337 338 /* empty string is an error */ 339 if (name->ptr == name->end) { 340 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); 341 return 0; 342 } 343 344 return 2; 345} 346 347 348/* input: field_name 349 output: 'first' points to the part before the first '[' or '.' 350 'first_idx' is -1 if 'first' is not an integer, otherwise 351 it's the value of first converted to an integer 352 'rest' is an iterator to return the rest 353*/ 354static int 355field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first, 356 Py_ssize_t *first_idx, FieldNameIterator *rest) 357{ 358 STRINGLIB_CHAR c; 359 STRINGLIB_CHAR *p = ptr; 360 STRINGLIB_CHAR *end = ptr + len; 361 362 /* find the part up until the first '.' or '[' */ 363 while (p < end) { 364 switch (c = *p++) { 365 case '[': 366 case '.': 367 /* backup so that we this character is available to the 368 "rest" iterator */ 369 p--; 370 break; 371 default: 372 continue; 373 } 374 break; 375 } 376 377 /* set up the return values */ 378 SubString_init(first, ptr, p - ptr); 379 FieldNameIterator_init(rest, p, end - p); 380 381 /* see if "first" is an integer, in which case it's used as an index */ 382 *first_idx = get_integer(first); 383 384 /* zero length string is an error */ 385 if (first->ptr >= first->end) { 386 PyErr_SetString(PyExc_ValueError, "empty field name"); 387 goto error; 388 } 389 390 return 1; 391error: 392 return 0; 393} 394 395 396/* 397 get_field_object returns the object inside {}, before the 398 format_spec. It handles getindex and getattr lookups and consumes 399 the entire input string. 400*/ 401static PyObject * 402get_field_object(SubString *input, PyObject *args, PyObject *kwargs) 403{ 404 PyObject *obj = NULL; 405 int ok; 406 int is_attribute; 407 SubString name; 408 SubString first; 409 Py_ssize_t index; 410 FieldNameIterator rest; 411 412 if (!field_name_split(input->ptr, input->end - input->ptr, &first, 413 &index, &rest)) { 414 goto error; 415 } 416 417 if (index == -1) { 418 /* look up in kwargs */ 419 PyObject *key = SubString_new_object(&first); 420 if (key == NULL) 421 goto error; 422 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) { 423 PyErr_SetObject(PyExc_KeyError, key); 424 Py_DECREF(key); 425 goto error; 426 } 427 Py_DECREF(key); 428 Py_INCREF(obj); 429 } 430 else { 431 /* look up in args */ 432 obj = PySequence_GetItem(args, index); 433 if (obj == NULL) 434 goto error; 435 } 436 437 /* iterate over the rest of the field_name */ 438 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, 439 &name)) == 2) { 440 PyObject *tmp; 441 442 if (is_attribute) 443 /* getattr lookup "." */ 444 tmp = getattr(obj, &name); 445 else 446 /* getitem lookup "[]" */ 447 if (index == -1) 448 tmp = getitem_str(obj, &name); 449 else 450 if (PySequence_Check(obj)) 451 tmp = getitem_sequence(obj, index); 452 else 453 /* not a sequence */ 454 tmp = getitem_idx(obj, index); 455 if (tmp == NULL) 456 goto error; 457 458 /* assign to obj */ 459 Py_DECREF(obj); 460 obj = tmp; 461 } 462 /* end of iterator, this is the non-error case */ 463 if (ok == 1) 464 return obj; 465error: 466 Py_XDECREF(obj); 467 return NULL; 468} 469 470/************************************************************************/ 471/***************** Field rendering functions **************************/ 472/************************************************************************/ 473 474/* 475 render_field() is the main function in this section. It takes the 476 field object and field specification string generated by 477 get_field_and_spec, and renders the field into the output string. 478 479 render_field calls fieldobj.__format__(format_spec) method, and 480 appends to the output. 481*/ 482static int 483render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output) 484{ 485 int ok = 0; 486 PyObject *result = NULL; 487 PyObject *format_spec_object = NULL; 488 PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL; 489 STRINGLIB_CHAR* format_spec_start = format_spec->ptr ? 490 format_spec->ptr : NULL; 491 Py_ssize_t format_spec_len = format_spec->ptr ? 492 format_spec->end - format_spec->ptr : 0; 493 494 /* If we know the type exactly, skip the lookup of __format__ and just 495 call the formatter directly. */ 496#if STRINGLIB_IS_UNICODE 497 if (PyUnicode_CheckExact(fieldobj)) 498 formatter = _PyUnicode_FormatAdvanced; 499 /* Unfortunately, there's a problem with checking for int, long, 500 and float here. If we're being included as unicode, their 501 formatters expect string format_spec args. For now, just skip 502 this optimization for unicode. This could be fixed, but it's a 503 hassle. */ 504#else 505 if (PyString_CheckExact(fieldobj)) 506 formatter = _PyBytes_FormatAdvanced; 507 else if (PyInt_CheckExact(fieldobj)) 508 formatter =_PyInt_FormatAdvanced; 509 else if (PyLong_CheckExact(fieldobj)) 510 formatter =_PyLong_FormatAdvanced; 511 else if (PyFloat_CheckExact(fieldobj)) 512 formatter = _PyFloat_FormatAdvanced; 513#endif 514 515 if (formatter) { 516 /* we know exactly which formatter will be called when __format__ is 517 looked up, so call it directly, instead. */ 518 result = formatter(fieldobj, format_spec_start, format_spec_len); 519 } 520 else { 521 /* We need to create an object out of the pointers we have, because 522 __format__ takes a string/unicode object for format_spec. */ 523 format_spec_object = STRINGLIB_NEW(format_spec_start, 524 format_spec_len); 525 if (format_spec_object == NULL) 526 goto done; 527 528 result = PyObject_Format(fieldobj, format_spec_object); 529 } 530 if (result == NULL) 531 goto done; 532 533#if PY_VERSION_HEX >= 0x03000000 534 assert(PyUnicode_Check(result)); 535#else 536 assert(PyString_Check(result) || PyUnicode_Check(result)); 537 538 /* Convert result to our type. We could be str, and result could 539 be unicode */ 540 { 541 PyObject *tmp = STRINGLIB_TOSTR(result); 542 if (tmp == NULL) 543 goto done; 544 Py_DECREF(result); 545 result = tmp; 546 } 547#endif 548 549 ok = output_data(output, 550 STRINGLIB_STR(result), STRINGLIB_LEN(result)); 551done: 552 Py_XDECREF(format_spec_object); 553 Py_XDECREF(result); 554 return ok; 555} 556 557static int 558parse_field(SubString *str, SubString *field_name, SubString *format_spec, 559 STRINGLIB_CHAR *conversion) 560{ 561 STRINGLIB_CHAR c = 0; 562 563 /* initialize these, as they may be empty */ 564 *conversion = '\0'; 565 SubString_init(format_spec, NULL, 0); 566 567 /* search for the field name. it's terminated by the end of the 568 string, or a ':' or '!' */ 569 field_name->ptr = str->ptr; 570 while (str->ptr < str->end) { 571 switch (c = *(str->ptr++)) { 572 case ':': 573 case '!': 574 break; 575 default: 576 continue; 577 } 578 break; 579 } 580 581 if (c == '!' || c == ':') { 582 /* we have a format specifier and/or a conversion */ 583 /* don't include the last character */ 584 field_name->end = str->ptr-1; 585 586 /* the format specifier is the rest of the string */ 587 format_spec->ptr = str->ptr; 588 format_spec->end = str->end; 589 590 /* see if there's a conversion specifier */ 591 if (c == '!') { 592 /* there must be another character present */ 593 if (format_spec->ptr >= format_spec->end) { 594 PyErr_SetString(PyExc_ValueError, 595 "end of format while looking for conversion " 596 "specifier"); 597 return 0; 598 } 599 *conversion = *(format_spec->ptr++); 600 601 /* if there is another character, it must be a colon */ 602 if (format_spec->ptr < format_spec->end) { 603 c = *(format_spec->ptr++); 604 if (c != ':') { 605 PyErr_SetString(PyExc_ValueError, 606 "expected ':' after format specifier"); 607 return 0; 608 } 609 } 610 } 611 612 return 1; 613 614 } 615 else { 616 /* end of string, there's no format_spec or conversion */ 617 field_name->end = str->ptr; 618 return 1; 619 } 620} 621 622/************************************************************************/ 623/******* Output string allocation and escape-to-markup processing ******/ 624/************************************************************************/ 625 626/* MarkupIterator breaks the string into pieces of either literal 627 text, or things inside {} that need to be marked up. it is 628 designed to make it easy to wrap a Python iterator around it, for 629 use with the Formatter class */ 630 631typedef struct { 632 SubString str; 633} MarkupIterator; 634 635static int 636MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len) 637{ 638 SubString_init(&self->str, ptr, len); 639 return 1; 640} 641 642/* returns 0 on error, 1 on non-error termination, and 2 if it got a 643 string (or something to be expanded) */ 644static int 645MarkupIterator_next(MarkupIterator *self, SubString *literal, 646 SubString *field_name, SubString *format_spec, 647 STRINGLIB_CHAR *conversion, 648 int *format_spec_needs_expanding) 649{ 650 int at_end; 651 STRINGLIB_CHAR c = 0; 652 STRINGLIB_CHAR *start; 653 int count; 654 Py_ssize_t len; 655 int markup_follows = 0; 656 657 /* initialize all of the output variables */ 658 SubString_init(literal, NULL, 0); 659 SubString_init(field_name, NULL, 0); 660 SubString_init(format_spec, NULL, 0); 661 *conversion = '\0'; 662 *format_spec_needs_expanding = 0; 663 664 /* No more input, end of iterator. This is the normal exit 665 path. */ 666 if (self->str.ptr >= self->str.end) 667 return 1; 668 669 start = self->str.ptr; 670 671 /* First read any literal text. Read until the end of string, an 672 escaped '{' or '}', or an unescaped '{'. In order to never 673 allocate memory and so I can just pass pointers around, if 674 there's an escaped '{' or '}' then we'll return the literal 675 including the brace, but no format object. The next time 676 through, we'll return the rest of the literal, skipping past 677 the second consecutive brace. */ 678 while (self->str.ptr < self->str.end) { 679 switch (c = *(self->str.ptr++)) { 680 case '{': 681 case '}': 682 markup_follows = 1; 683 break; 684 default: 685 continue; 686 } 687 break; 688 } 689 690 at_end = self->str.ptr >= self->str.end; 691 len = self->str.ptr - start; 692 693 if ((c == '}') && (at_end || (c != *self->str.ptr))) { 694 PyErr_SetString(PyExc_ValueError, "Single '}' encountered " 695 "in format string"); 696 return 0; 697 } 698 if (at_end && c == '{') { 699 PyErr_SetString(PyExc_ValueError, "Single '{' encountered " 700 "in format string"); 701 return 0; 702 } 703 if (!at_end) { 704 if (c == *self->str.ptr) { 705 /* escaped } or {, skip it in the input. there is no 706 markup object following us, just this literal text */ 707 self->str.ptr++; 708 markup_follows = 0; 709 } 710 else 711 len--; 712 } 713 714 /* record the literal text */ 715 literal->ptr = start; 716 literal->end = start + len; 717 718 if (!markup_follows) 719 return 2; 720 721 /* this is markup, find the end of the string by counting nested 722 braces. note that this prohibits escaped braces, so that 723 format_specs cannot have braces in them. */ 724 count = 1; 725 726 start = self->str.ptr; 727 728 /* we know we can't have a zero length string, so don't worry 729 about that case */ 730 while (self->str.ptr < self->str.end) { 731 switch (c = *(self->str.ptr++)) { 732 case '{': 733 /* the format spec needs to be recursively expanded. 734 this is an optimization, and not strictly needed */ 735 *format_spec_needs_expanding = 1; 736 count++; 737 break; 738 case '}': 739 count--; 740 if (count <= 0) { 741 /* we're done. parse and get out */ 742 SubString s; 743 744 SubString_init(&s, start, self->str.ptr - 1 - start); 745 if (parse_field(&s, field_name, format_spec, conversion) == 0) 746 return 0; 747 748 /* a zero length field_name is an error */ 749 if (field_name->ptr == field_name->end) { 750 PyErr_SetString(PyExc_ValueError, "zero length field name " 751 "in format"); 752 return 0; 753 } 754 755 /* success */ 756 return 2; 757 } 758 break; 759 } 760 } 761 762 /* end of string while searching for matching '}' */ 763 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format"); 764 return 0; 765} 766 767 768/* do the !r or !s conversion on obj */ 769static PyObject * 770do_conversion(PyObject *obj, STRINGLIB_CHAR conversion) 771{ 772 /* XXX in pre-3.0, do we need to convert this to unicode, since it 773 might have returned a string? */ 774 switch (conversion) { 775 case 'r': 776 return PyObject_Repr(obj); 777 case 's': 778 return STRINGLIB_TOSTR(obj); 779 default: 780 if (conversion > 32 && conversion < 127) { 781 /* It's the ASCII subrange; casting to char is safe 782 (assuming the execution character set is an ASCII 783 superset). */ 784 PyErr_Format(PyExc_ValueError, 785 "Unknown conversion specifier %c", 786 (char)conversion); 787 } else 788 PyErr_Format(PyExc_ValueError, 789 "Unknown conversion specifier \\x%x", 790 (unsigned int)conversion); 791 return NULL; 792 } 793} 794 795/* given: 796 797 {field_name!conversion:format_spec} 798 799 compute the result and write it to output. 800 format_spec_needs_expanding is an optimization. if it's false, 801 just output the string directly, otherwise recursively expand the 802 format_spec string. */ 803 804static int 805output_markup(SubString *field_name, SubString *format_spec, 806 int format_spec_needs_expanding, STRINGLIB_CHAR conversion, 807 OutputString *output, PyObject *args, PyObject *kwargs, 808 int recursion_depth) 809{ 810 PyObject *tmp = NULL; 811 PyObject *fieldobj = NULL; 812 SubString expanded_format_spec; 813 SubString *actual_format_spec; 814 int result = 0; 815 816 /* convert field_name to an object */ 817 fieldobj = get_field_object(field_name, args, kwargs); 818 if (fieldobj == NULL) 819 goto done; 820 821 if (conversion != '\0') { 822 tmp = do_conversion(fieldobj, conversion); 823 if (tmp == NULL) 824 goto done; 825 826 /* do the assignment, transferring ownership: fieldobj = tmp */ 827 Py_DECREF(fieldobj); 828 fieldobj = tmp; 829 tmp = NULL; 830 } 831 832 /* if needed, recurively compute the format_spec */ 833 if (format_spec_needs_expanding) { 834 tmp = build_string(format_spec, args, kwargs, recursion_depth-1); 835 if (tmp == NULL) 836 goto done; 837 838 /* note that in the case we're expanding the format string, 839 tmp must be kept around until after the call to 840 render_field. */ 841 SubString_init(&expanded_format_spec, 842 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp)); 843 actual_format_spec = &expanded_format_spec; 844 } 845 else 846 actual_format_spec = format_spec; 847 848 if (render_field(fieldobj, actual_format_spec, output) == 0) 849 goto done; 850 851 result = 1; 852 853done: 854 Py_XDECREF(fieldobj); 855 Py_XDECREF(tmp); 856 857 return result; 858} 859 860/* 861 do_markup is the top-level loop for the format() method. It 862 searches through the format string for escapes to markup codes, and 863 calls other functions to move non-markup text to the output, 864 and to perform the markup to the output. 865*/ 866static int 867do_markup(SubString *input, PyObject *args, PyObject *kwargs, 868 OutputString *output, int recursion_depth) 869{ 870 MarkupIterator iter; 871 int format_spec_needs_expanding; 872 int result; 873 SubString literal; 874 SubString field_name; 875 SubString format_spec; 876 STRINGLIB_CHAR conversion; 877 878 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr); 879 while ((result = MarkupIterator_next(&iter, &literal, &field_name, 880 &format_spec, &conversion, 881 &format_spec_needs_expanding)) == 2) { 882 if (!output_data(output, literal.ptr, literal.end - literal.ptr)) 883 return 0; 884 if (field_name.ptr != field_name.end) 885 if (!output_markup(&field_name, &format_spec, 886 format_spec_needs_expanding, conversion, output, 887 args, kwargs, recursion_depth)) 888 return 0; 889 } 890 return result; 891} 892 893 894/* 895 build_string allocates the output string and then 896 calls do_markup to do the heavy lifting. 897*/ 898static PyObject * 899build_string(SubString *input, PyObject *args, PyObject *kwargs, 900 int recursion_depth) 901{ 902 OutputString output; 903 PyObject *result = NULL; 904 Py_ssize_t count; 905 906 output.obj = NULL; /* needed so cleanup code always works */ 907 908 /* check the recursion level */ 909 if (recursion_depth <= 0) { 910 PyErr_SetString(PyExc_ValueError, 911 "Max string recursion exceeded"); 912 goto done; 913 } 914 915 /* initial size is the length of the format string, plus the size 916 increment. seems like a reasonable default */ 917 if (!output_initialize(&output, 918 input->end - input->ptr + 919 INITIAL_SIZE_INCREMENT)) 920 goto done; 921 922 if (!do_markup(input, args, kwargs, &output, recursion_depth)) { 923 goto done; 924 } 925 926 count = output.ptr - STRINGLIB_STR(output.obj); 927 if (STRINGLIB_RESIZE(&output.obj, count) < 0) { 928 goto done; 929 } 930 931 /* transfer ownership to result */ 932 result = output.obj; 933 output.obj = NULL; 934 935done: 936 Py_XDECREF(output.obj); 937 return result; 938} 939 940/************************************************************************/ 941/*********** main routine ***********************************************/ 942/************************************************************************/ 943 944/* this is the main entry point */ 945static PyObject * 946do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) 947{ 948 SubString input; 949 950 /* PEP 3101 says only 2 levels, so that 951 "{0:{1}}".format('abc', 's') # works 952 "{0:{1:{2}}}".format('abc', 's', '') # fails 953 */ 954 int recursion_depth = 2; 955 956 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self)); 957 return build_string(&input, args, kwargs, recursion_depth); 958} 959 960 961 962/************************************************************************/ 963/*********** formatteriterator ******************************************/ 964/************************************************************************/ 965 966/* This is used to implement string.Formatter.vparse(). It exists so 967 Formatter can share code with the built in unicode.format() method. 968 It's really just a wrapper around MarkupIterator that is callable 969 from Python. */ 970 971typedef struct { 972 PyObject_HEAD 973 974 STRINGLIB_OBJECT *str; 975 976 MarkupIterator it_markup; 977} formatteriterobject; 978 979static void 980formatteriter_dealloc(formatteriterobject *it) 981{ 982 Py_XDECREF(it->str); 983 PyObject_FREE(it); 984} 985 986/* returns a tuple: 987 (literal, field_name, format_spec, conversion) 988 989 literal is any literal text to output. might be zero length 990 field_name is the string before the ':'. might be None 991 format_spec is the string after the ':'. mibht be None 992 conversion is either None, or the string after the '!' 993*/ 994static PyObject * 995formatteriter_next(formatteriterobject *it) 996{ 997 SubString literal; 998 SubString field_name; 999 SubString format_spec; 1000 STRINGLIB_CHAR conversion; 1001 int format_spec_needs_expanding; 1002 int result = MarkupIterator_next(&it->it_markup, &literal, &field_name, 1003 &format_spec, &conversion, 1004 &format_spec_needs_expanding); 1005 1006 /* all of the SubString objects point into it->str, so no 1007 memory management needs to be done on them */ 1008 assert(0 <= result && result <= 2); 1009 if (result == 0 || result == 1) 1010 /* if 0, error has already been set, if 1, iterator is empty */ 1011 return NULL; 1012 else { 1013 PyObject *literal_str = NULL; 1014 PyObject *field_name_str = NULL; 1015 PyObject *format_spec_str = NULL; 1016 PyObject *conversion_str = NULL; 1017 PyObject *tuple = NULL; 1018 int has_field = field_name.ptr != field_name.end; 1019 1020 literal_str = SubString_new_object(&literal); 1021 if (literal_str == NULL) 1022 goto done; 1023 1024 field_name_str = SubString_new_object(&field_name); 1025 if (field_name_str == NULL) 1026 goto done; 1027 1028 /* if field_name is non-zero length, return a string for 1029 format_spec (even if zero length), else return None */ 1030 format_spec_str = (has_field ? 1031 SubString_new_object_or_empty : 1032 SubString_new_object)(&format_spec); 1033 if (format_spec_str == NULL) 1034 goto done; 1035 1036 /* if the conversion is not specified, return a None, 1037 otherwise create a one length string with the conversion 1038 character */ 1039 if (conversion == '\0') { 1040 conversion_str = Py_None; 1041 Py_INCREF(conversion_str); 1042 } 1043 else 1044 conversion_str = STRINGLIB_NEW(&conversion, 1); 1045 if (conversion_str == NULL) 1046 goto done; 1047 1048 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, 1049 conversion_str); 1050 done: 1051 Py_XDECREF(literal_str); 1052 Py_XDECREF(field_name_str); 1053 Py_XDECREF(format_spec_str); 1054 Py_XDECREF(conversion_str); 1055 return tuple; 1056 } 1057} 1058 1059static PyMethodDef formatteriter_methods[] = { 1060 {NULL, NULL} /* sentinel */ 1061}; 1062 1063static PyTypeObject PyFormatterIter_Type = { 1064 PyVarObject_HEAD_INIT(&PyType_Type, 0) 1065 "formatteriterator", /* tp_name */ 1066 sizeof(formatteriterobject), /* tp_basicsize */ 1067 0, /* tp_itemsize */ 1068 /* methods */ 1069 (destructor)formatteriter_dealloc, /* tp_dealloc */ 1070 0, /* tp_print */ 1071 0, /* tp_getattr */ 1072 0, /* tp_setattr */ 1073 0, /* tp_compare */ 1074 0, /* tp_repr */ 1075 0, /* tp_as_number */ 1076 0, /* tp_as_sequence */ 1077 0, /* tp_as_mapping */ 1078 0, /* tp_hash */ 1079 0, /* tp_call */ 1080 0, /* tp_str */ 1081 PyObject_GenericGetAttr, /* tp_getattro */ 1082 0, /* tp_setattro */ 1083 0, /* tp_as_buffer */ 1084 Py_TPFLAGS_DEFAULT, /* tp_flags */ 1085 0, /* tp_doc */ 1086 0, /* tp_traverse */ 1087 0, /* tp_clear */ 1088 0, /* tp_richcompare */ 1089 0, /* tp_weaklistoffset */ 1090 PyObject_SelfIter, /* tp_iter */ 1091 (iternextfunc)formatteriter_next, /* tp_iternext */ 1092 formatteriter_methods, /* tp_methods */ 1093 0, 1094}; 1095 1096/* unicode_formatter_parser is used to implement 1097 string.Formatter.vformat. it parses a string and returns tuples 1098 describing the parsed elements. It's a wrapper around 1099 stringlib/string_format.h's MarkupIterator */ 1100static PyObject * 1101formatter_parser(STRINGLIB_OBJECT *self) 1102{ 1103 formatteriterobject *it; 1104 1105 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); 1106 if (it == NULL) 1107 return NULL; 1108 1109 /* take ownership, give the object to the iterator */ 1110 Py_INCREF(self); 1111 it->str = self; 1112 1113 /* initialize the contained MarkupIterator */ 1114 MarkupIterator_init(&it->it_markup, 1115 STRINGLIB_STR(self), 1116 STRINGLIB_LEN(self)); 1117 1118 return (PyObject *)it; 1119} 1120 1121 1122/************************************************************************/ 1123/*********** fieldnameiterator ******************************************/ 1124/************************************************************************/ 1125 1126 1127/* This is used to implement string.Formatter.vparse(). It parses the 1128 field name into attribute and item values. It's a Python-callable 1129 wrapper around FieldNameIterator */ 1130 1131typedef struct { 1132 PyObject_HEAD 1133 1134 STRINGLIB_OBJECT *str; 1135 1136 FieldNameIterator it_field; 1137} fieldnameiterobject; 1138 1139static void 1140fieldnameiter_dealloc(fieldnameiterobject *it) 1141{ 1142 Py_XDECREF(it->str); 1143 PyObject_FREE(it); 1144} 1145 1146/* returns a tuple: 1147 (is_attr, value) 1148 is_attr is true if we used attribute syntax (e.g., '.foo') 1149 false if we used index syntax (e.g., '[foo]') 1150 value is an integer or string 1151*/ 1152static PyObject * 1153fieldnameiter_next(fieldnameiterobject *it) 1154{ 1155 int result; 1156 int is_attr; 1157 Py_ssize_t idx; 1158 SubString name; 1159 1160 result = FieldNameIterator_next(&it->it_field, &is_attr, 1161 &idx, &name); 1162 if (result == 0 || result == 1) 1163 /* if 0, error has already been set, if 1, iterator is empty */ 1164 return NULL; 1165 else { 1166 PyObject* result = NULL; 1167 PyObject* is_attr_obj = NULL; 1168 PyObject* obj = NULL; 1169 1170 is_attr_obj = PyBool_FromLong(is_attr); 1171 if (is_attr_obj == NULL) 1172 goto done; 1173 1174 /* either an integer or a string */ 1175 if (idx != -1) 1176 obj = PyLong_FromSsize_t(idx); 1177 else 1178 obj = SubString_new_object(&name); 1179 if (obj == NULL) 1180 goto done; 1181 1182 /* return a tuple of values */ 1183 result = PyTuple_Pack(2, is_attr_obj, obj); 1184 1185 done: 1186 Py_XDECREF(is_attr_obj); 1187 Py_XDECREF(obj); 1188 return result; 1189 } 1190} 1191 1192static PyMethodDef fieldnameiter_methods[] = { 1193 {NULL, NULL} /* sentinel */ 1194}; 1195 1196static PyTypeObject PyFieldNameIter_Type = { 1197 PyVarObject_HEAD_INIT(&PyType_Type, 0) 1198 "fieldnameiterator", /* tp_name */ 1199 sizeof(fieldnameiterobject), /* tp_basicsize */ 1200 0, /* tp_itemsize */ 1201 /* methods */ 1202 (destructor)fieldnameiter_dealloc, /* tp_dealloc */ 1203 0, /* tp_print */ 1204 0, /* tp_getattr */ 1205 0, /* tp_setattr */ 1206 0, /* tp_compare */ 1207 0, /* tp_repr */ 1208 0, /* tp_as_number */ 1209 0, /* tp_as_sequence */ 1210 0, /* tp_as_mapping */ 1211 0, /* tp_hash */ 1212 0, /* tp_call */ 1213 0, /* tp_str */ 1214 PyObject_GenericGetAttr, /* tp_getattro */ 1215 0, /* tp_setattro */ 1216 0, /* tp_as_buffer */ 1217 Py_TPFLAGS_DEFAULT, /* tp_flags */ 1218 0, /* tp_doc */ 1219 0, /* tp_traverse */ 1220 0, /* tp_clear */ 1221 0, /* tp_richcompare */ 1222 0, /* tp_weaklistoffset */ 1223 PyObject_SelfIter, /* tp_iter */ 1224 (iternextfunc)fieldnameiter_next, /* tp_iternext */ 1225 fieldnameiter_methods, /* tp_methods */ 1226 0}; 1227 1228/* unicode_formatter_field_name_split is used to implement 1229 string.Formatter.vformat. it takes an PEP 3101 "field name", and 1230 returns a tuple of (first, rest): "first", the part before the 1231 first '.' or '['; and "rest", an iterator for the rest of the field 1232 name. it's a wrapper around stringlib/string_format.h's 1233 field_name_split. The iterator it returns is a 1234 FieldNameIterator */ 1235static PyObject * 1236formatter_field_name_split(STRINGLIB_OBJECT *self) 1237{ 1238 SubString first; 1239 Py_ssize_t first_idx; 1240 fieldnameiterobject *it; 1241 1242 PyObject *first_obj = NULL; 1243 PyObject *result = NULL; 1244 1245 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); 1246 if (it == NULL) 1247 return NULL; 1248 1249 /* take ownership, give the object to the iterator. this is 1250 just to keep the field_name alive */ 1251 Py_INCREF(self); 1252 it->str = self; 1253 1254 if (!field_name_split(STRINGLIB_STR(self), 1255 STRINGLIB_LEN(self), 1256 &first, &first_idx, &it->it_field)) 1257 goto done; 1258 1259 /* first becomes an integer, if possible; else a string */ 1260 if (first_idx != -1) 1261 first_obj = PyLong_FromSsize_t(first_idx); 1262 else 1263 /* convert "first" into a string object */ 1264 first_obj = SubString_new_object(&first); 1265 if (first_obj == NULL) 1266 goto done; 1267 1268 /* return a tuple of values */ 1269 result = PyTuple_Pack(2, first_obj, it); 1270 1271done: 1272 Py_XDECREF(it); 1273 Py_XDECREF(first_obj); 1274 return result; 1275}