stringobject.c - This C code is part of the Python interpre…

/Objects/stringobject.c

http://unladen-swallow.googlecode.com/ · C · 5242 lines · 4329 code · 538 blank · 375 comment · 1221 complexity · a1c18aba068e8f25c3e3da8e22507f9f MD5 · raw file
Large files are truncated click here to view the full file

/* String (str/bytes) object implementation */

#define PY_SSIZE_T_CLEAN

#include "Python.h"
#include <ctype.h>

#ifdef COUNT_ALLOCS
int null_strings, one_strings;
#endif

static PyStringObject *characters[UCHAR_MAX + 1];
static PyStringObject *nullstring;

/* This dictionary holds all interned strings.  Note that references to
   strings in this dictionary are *not* counted in the string's ob_refcnt.
   When the interned string reaches a refcnt of 0 the string deallocation
   function will delete the reference from this dictionary.

   Another way to look at this is that to say that the actual reference
   count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
*/
static PyObject *interned;

/*
   For both PyString_FromString() and PyString_FromStringAndSize(), the
   parameter `size' denotes number of characters to allocate, not counting any
   null terminating character.

   For PyString_FromString(), the parameter `str' points to a null-terminated
   string containing exactly `size' bytes.

   For PyString_FromStringAndSize(), the parameter the parameter `str' is
   either NULL or else points to a string containing at least `size' bytes.
   For PyString_FromStringAndSize(), the string in the `str' parameter does
   not have to be null-terminated.  (Therefore it is safe to construct a
   substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
   If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
   bytes (setting the last byte to the null terminating character) and you can
   fill in the data yourself.  If `str' is non-NULL then the resulting
   PyString object must be treated as immutable and you must not fill in nor
   alter the data yourself, since the strings may be shared.

   The PyObject member `op->ob_size', which denotes the number of "extra
   items" in a variable-size object, will contain the number of bytes
   allocated for string data, not counting the null terminating character.  It
   is therefore equal to the equal to the `size' parameter (for
   PyString_FromStringAndSize()) or the length of the string in the `str'
   parameter (for PyString_FromString()).
*/
PyObject *
PyString_FromStringAndSize(const char *str, Py_ssize_t size)
{
	register PyStringObject *op;
	if (size < 0) {
		PyErr_SetString(PyExc_SystemError,
		    "Negative size passed to PyString_FromStringAndSize");
		return NULL;
	}
	if (size == 0 && (op = nullstring) != NULL) {
#ifdef COUNT_ALLOCS
		null_strings++;
#endif
		Py_INCREF(op);
		return (PyObject *)op;
	}
	if (size == 1 && str != NULL &&
	    (op = characters[*str & UCHAR_MAX]) != NULL)
	{
#ifdef COUNT_ALLOCS
		one_strings++;
#endif
		Py_INCREF(op);
		return (PyObject *)op;
	}

	if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
		PyErr_SetString(PyExc_OverflowError, "string is too large");
		return NULL;
	}

	/* Inline PyObject_NewVar */
	op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
	if (op == NULL)
		return PyErr_NoMemory();
	PyObject_INIT_VAR(op, &PyString_Type, size);
	op->ob_shash = -1;
	op->ob_sstate = SSTATE_NOT_INTERNED;
	if (str != NULL)
		Py_MEMCPY(op->ob_sval, str, size);
	op->ob_sval[size] = '\0';
	/* share short strings */
	if (size == 0) {
		PyObject *t = (PyObject *)op;
		PyString_InternInPlace(&t);
		op = (PyStringObject *)t;
		nullstring = op;
		Py_INCREF(op);
	} else if (size == 1 && str != NULL) {
		PyObject *t = (PyObject *)op;
		PyString_InternInPlace(&t);
		op = (PyStringObject *)t;
		characters[*str & UCHAR_MAX] = op;
		Py_INCREF(op);
	}
	return (PyObject *) op;
}

PyObject *
PyString_FromString(const char *str)
{
	register size_t size;
	register PyStringObject *op;

	assert(str != NULL);
	size = strlen(str);
	if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
		PyErr_SetString(PyExc_OverflowError,
			"string is too long for a Python string");
		return NULL;
	}
	if (size == 0 && (op = nullstring) != NULL) {
#ifdef COUNT_ALLOCS
		null_strings++;
#endif
		Py_INCREF(op);
		return (PyObject *)op;
	}
	if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
#ifdef COUNT_ALLOCS
		one_strings++;
#endif
		Py_INCREF(op);
		return (PyObject *)op;
	}

	/* Inline PyObject_NewVar */
	op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
	if (op == NULL)
		return PyErr_NoMemory();
	PyObject_INIT_VAR(op, &PyString_Type, size);
	op->ob_shash = -1;
	op->ob_sstate = SSTATE_NOT_INTERNED;
	Py_MEMCPY(op->ob_sval, str, size+1);
	/* share short strings */
	if (size == 0) {
		PyObject *t = (PyObject *)op;
		PyString_InternInPlace(&t);
		op = (PyStringObject *)t;
		nullstring = op;
		Py_INCREF(op);
	} else if (size == 1) {
		PyObject *t = (PyObject *)op;
		PyString_InternInPlace(&t);
		op = (PyStringObject *)t;
		characters[*str & UCHAR_MAX] = op;
		Py_INCREF(op);
	}
	return (PyObject *) op;
}

PyObject *
PyString_FromFormatV(const char *format, va_list vargs)
{
	va_list count;
	Py_ssize_t n = 0;
	const char* f;
	char *s;
	PyObject* string;

#ifdef VA_LIST_IS_ARRAY
	Py_MEMCPY(count, vargs, sizeof(va_list));
#else
#ifdef  __va_copy
	__va_copy(count, vargs);
#else
	count = vargs;
#endif
#endif
	/* step 1: figure out how large a buffer we need */
	for (f = format; *f; f++) {
		if (*f == '%') {
			const char* p = f;
			while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
				;

			/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
			 * they don't affect the amount of space we reserve.
			 */
			if ((*f == 'l' || *f == 'z') &&
					(f[1] == 'd' || f[1] == 'u'))
				++f;

			switch (*f) {
			case 'c':
				(void)va_arg(count, int);
				/* fall through... */
			case '%':
				n++;
				break;
			case 'd': case 'u': case 'i': case 'x':
				(void) va_arg(count, int);
				/* 20 bytes is enough to hold a 64-bit
				   integer.  Decimal takes the most space.
				   This isn't enough for octal. */
				n += 20;
				break;
			case 's':
				s = va_arg(count, char*);
				n += strlen(s);
				break;
			case 'p':
				(void) va_arg(count, int);
				/* maximum 64-bit pointer representation:
				 * 0xffffffffffffffff
				 * so 19 characters is enough.
				 * XXX I count 18 -- what's the extra for?
				 */
				n += 19;
				break;
			default:
				/* if we stumble upon an unknown
				   formatting code, copy the rest of
				   the format string to the output
				   string. (we cannot just skip the
				   code, since there's no way to know
				   what's in the argument list) */
				n += strlen(p);
				goto expand;
			}
		} else
			n++;
	}
 expand:
	/* step 2: fill the buffer */
	/* Since we've analyzed how much space we need for the worst case,
	   use sprintf directly instead of the slower PyOS_snprintf. */
	string = PyString_FromStringAndSize(NULL, n);
	if (!string)
		return NULL;

	s = PyString_AsString(string);

	for (f = format; *f; f++) {
		if (*f == '%') {
			const char* p = f++;
			Py_ssize_t i;
			int longflag = 0;
			int size_tflag = 0;
			/* parse the width.precision part (we're only
			   interested in the precision value, if any) */
			n = 0;
			while (isdigit(Py_CHARMASK(*f)))
				n = (n*10) + *f++ - '0';
			if (*f == '.') {
				f++;
				n = 0;
				while (isdigit(Py_CHARMASK(*f)))
					n = (n*10) + *f++ - '0';
			}
			while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
				f++;
			/* handle the long flag, but only for %ld and %lu.
			   others can be added when necessary. */
			if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
				longflag = 1;
				++f;
			}
			/* handle the size_t flag. */
			if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
				size_tflag = 1;
				++f;
			}

			switch (*f) {
			case 'c':
				*s++ = va_arg(vargs, int);
				break;
			case 'd':
				if (longflag)
					sprintf(s, "%ld", va_arg(vargs, long));
				else if (size_tflag)
					sprintf(s, "%" PY_FORMAT_SIZE_T "d",
					        va_arg(vargs, Py_ssize_t));
				else
					sprintf(s, "%d", va_arg(vargs, int));
				s += strlen(s);
				break;
			case 'u':
				if (longflag)
					sprintf(s, "%lu",
						va_arg(vargs, unsigned long));
				else if (size_tflag)
					sprintf(s, "%" PY_FORMAT_SIZE_T "u",
					        va_arg(vargs, size_t));
				else
					sprintf(s, "%u",
						va_arg(vargs, unsigned int));
				s += strlen(s);
				break;
			case 'i':
				sprintf(s, "%i", va_arg(vargs, int));
				s += strlen(s);
				break;
			case 'x':
				sprintf(s, "%x", va_arg(vargs, int));
				s += strlen(s);
				break;
			case 's':
				p = va_arg(vargs, char*);
				i = strlen(p);
				if (n > 0 && i > n)
					i = n;
				Py_MEMCPY(s, p, i);
				s += i;
				break;
			case 'p':
				sprintf(s, "%p", va_arg(vargs, void*));
				/* %p is ill-defined:  ensure leading 0x. */
				if (s[1] == 'X')
					s[1] = 'x';
				else if (s[1] != 'x') {
					memmove(s+2, s, strlen(s)+1);
					s[0] = '0';
					s[1] = 'x';
				}
				s += strlen(s);
				break;
			case '%':
				*s++ = '%';
				break;
			default:
				strcpy(s, p);
				s += strlen(s);
				goto end;
			}
		} else
			*s++ = *f;
	}

 end:
	_PyString_Resize(&string, s - PyString_AS_STRING(string));
	return string;
}

PyObject *
PyString_FromFormat(const char *format, ...)
{
	PyObject* ret;
	va_list vargs;

#ifdef HAVE_STDARG_PROTOTYPES
	va_start(vargs, format);
#else
	va_start(vargs);
#endif
	ret = PyString_FromFormatV(format, vargs);
	va_end(vargs);
	return ret;
}


PyObject *PyString_Decode(const char *s,
			  Py_ssize_t size,
			  const char *encoding,
			  const char *errors)
{
    PyObject *v, *str;

    str = PyString_FromStringAndSize(s, size);
    if (str == NULL)
	return NULL;
    v = PyString_AsDecodedString(str, encoding, errors);
    Py_DECREF(str);
    return v;
}

PyObject *PyString_AsDecodedObject(PyObject *str,
				   const char *encoding,
				   const char *errors)
{
    PyObject *v;

    if (!PyString_Check(str)) {
        PyErr_BadArgument();
        goto onError;
    }

    if (encoding == NULL) {
#ifdef Py_USING_UNICODE
	encoding = PyUnicode_GetDefaultEncoding();
#else
	PyErr_SetString(PyExc_ValueError, "no encoding specified");
	goto onError;
#endif
    }

    /* Decode via the codec registry */
    v = PyCodec_Decode(str, encoding, errors);
    if (v == NULL)
        goto onError;

    return v;

 onError:
    return NULL;
}

PyObject *PyString_AsDecodedString(PyObject *str,
				   const char *encoding,
				   const char *errors)
{
    PyObject *v;

    v = PyString_AsDecodedObject(str, encoding, errors);
    if (v == NULL)
        goto onError;

#ifdef Py_USING_UNICODE
    /* Convert Unicode to a string using the default encoding */
    if (PyUnicode_Check(v)) {
	PyObject *temp = v;
	v = PyUnicode_AsEncodedString(v, NULL, NULL);
	Py_DECREF(temp);
	if (v == NULL)
	    goto onError;
    }
#endif
    if (!PyString_Check(v)) {
        PyErr_Format(PyExc_TypeError,
                     "decoder did not return a string object (type=%.400s)",
                     Py_TYPE(v)->tp_name);
        Py_DECREF(v);
        goto onError;
    }

    return v;

 onError:
    return NULL;
}

PyObject *PyString_Encode(const char *s,
			  Py_ssize_t size,
			  const char *encoding,
			  const char *errors)
{
    PyObject *v, *str;

    str = PyString_FromStringAndSize(s, size);
    if (str == NULL)
	return NULL;
    v = PyString_AsEncodedString(str, encoding, errors);
    Py_DECREF(str);
    return v;
}

PyObject *PyString_AsEncodedObject(PyObject *str,
				   const char *encoding,
				   const char *errors)
{
    PyObject *v;

    if (!PyString_Check(str)) {
        PyErr_BadArgument();
        goto onError;
    }

    if (encoding == NULL) {
#ifdef Py_USING_UNICODE
	encoding = PyUnicode_GetDefaultEncoding();
#else
	PyErr_SetString(PyExc_ValueError, "no encoding specified");
	goto onError;
#endif
    }

    /* Encode via the codec registry */
    v = PyCodec_Encode(str, encoding, errors);
    if (v == NULL)
        goto onError;

    return v;

 onError:
    return NULL;
}

PyObject *PyString_AsEncodedString(PyObject *str,
				   const char *encoding,
				   const char *errors)
{
    PyObject *v;

    v = PyString_AsEncodedObject(str, encoding, errors);
    if (v == NULL)
        goto onError;

#ifdef Py_USING_UNICODE
    /* Convert Unicode to a string using the default encoding */
    if (PyUnicode_Check(v)) {
	PyObject *temp = v;
	v = PyUnicode_AsEncodedString(v, NULL, NULL);
	Py_DECREF(temp);
	if (v == NULL)
	    goto onError;
    }
#endif
    if (!PyString_Check(v)) {
        PyErr_Format(PyExc_TypeError,
                     "encoder did not return a string object (type=%.400s)",
                     Py_TYPE(v)->tp_name);
        Py_DECREF(v);
        goto onError;
    }

    return v;

 onError:
    return NULL;
}

static void
string_dealloc(PyObject *op)
{
	switch (PyString_CHECK_INTERNED(op)) {
		case SSTATE_NOT_INTERNED:
			break;

		case SSTATE_INTERNED_MORTAL:
			/* revive dead object temporarily for DelItem */
			Py_REFCNT(op) = 3;
			if (PyDict_DelItem(interned, op) != 0)
				Py_FatalError(
					"deletion of interned string failed");
			break;

		case SSTATE_INTERNED_IMMORTAL:
			Py_FatalError("Immortal interned string died.");

		default:
			Py_FatalError("Inconsistent interned string state.");
	}
	Py_TYPE(op)->tp_free(op);
}

/* Unescape a backslash-escaped string. If unicode is non-zero,
   the string is a u-literal. If recode_encoding is non-zero,
   the string is UTF-8 encoded and should be re-encoded in the
   specified encoding.  */

PyObject *PyString_DecodeEscape(const char *s,
				Py_ssize_t len,
				const char *errors,
				Py_ssize_t unicode,
				const char *recode_encoding)
{
	int c;
	char *p, *buf;
	const char *end;
	PyObject *v;
	Py_ssize_t newlen = recode_encoding ? 4*len:len;
	v = PyString_FromStringAndSize((char *)NULL, newlen);
	if (v == NULL)
		return NULL;
	p = buf = PyString_AsString(v);
	end = s + len;
	while (s < end) {
		if (*s != '\\') {
		  non_esc:
#ifdef Py_USING_UNICODE
			if (recode_encoding && (*s & 0x80)) {
				PyObject *u, *w;
				char *r;
				const char* t;
				Py_ssize_t rn;
				t = s;
				/* Decode non-ASCII bytes as UTF-8. */
				while (t < end && (*t & 0x80)) t++;
				u = PyUnicode_DecodeUTF8(s, t - s, errors);
				if(!u) goto failed;

				/* Recode them in target encoding. */
				w = PyUnicode_AsEncodedString(
					u, recode_encoding, errors);
				Py_DECREF(u);
				if (!w)	goto failed;

				/* Append bytes to output buffer. */
				assert(PyString_Check(w));
				r = PyString_AS_STRING(w);
				rn = PyString_GET_SIZE(w);
				Py_MEMCPY(p, r, rn);
				p += rn;
				Py_DECREF(w);
				s = t;
			} else {
				*p++ = *s++;
			}
#else
			*p++ = *s++;
#endif
			continue;
		}
		s++;
                if (s==end) {
			PyErr_SetString(PyExc_ValueError,
					"Trailing \\ in string");
			goto failed;
		}
		switch (*s++) {
		/* XXX This assumes ASCII! */
		case '\n': break;
		case '\\': *p++ = '\\'; break;
		case '\'': *p++ = '\''; break;
		case '\"': *p++ = '\"'; break;
		case 'b': *p++ = '\b'; break;
		case 'f': *p++ = '\014'; break; /* FF */
		case 't': *p++ = '\t'; break;
		case 'n': *p++ = '\n'; break;
		case 'r': *p++ = '\r'; break;
		case 'v': *p++ = '\013'; break; /* VT */
		case 'a': *p++ = '\007'; break; /* BEL, not classic C */
		case '0': case '1': case '2': case '3':
		case '4': case '5': case '6': case '7':
			c = s[-1] - '0';
			if (s < end && '0' <= *s && *s <= '7') {
				c = (c<<3) + *s++ - '0';
				if (s < end && '0' <= *s && *s <= '7')
					c = (c<<3) + *s++ - '0';
			}
			*p++ = c;
			break;
		case 'x':
			if (s+1 < end &&
                            isxdigit(Py_CHARMASK(s[0])) &&
			    isxdigit(Py_CHARMASK(s[1])))
                        {
				unsigned int x = 0;
				c = Py_CHARMASK(*s);
				s++;
				if (isdigit(c))
					x = c - '0';
				else if (islower(c))
					x = 10 + c - 'a';
				else
					x = 10 + c - 'A';
				x = x << 4;
				c = Py_CHARMASK(*s);
				s++;
				if (isdigit(c))
					x += c - '0';
				else if (islower(c))
					x += 10 + c - 'a';
				else
					x += 10 + c - 'A';
				*p++ = x;
				break;
			}
			if (!errors || strcmp(errors, "strict") == 0) {
				PyErr_SetString(PyExc_ValueError,
						"invalid \\x escape");
				goto failed;
			}
			if (strcmp(errors, "replace") == 0) {
				*p++ = '?';
			} else if (strcmp(errors, "ignore") == 0)
				/* do nothing */;
			else {
				PyErr_Format(PyExc_ValueError,
					     "decoding error; "
					     "unknown error handling code: %.400s",
					     errors);
				goto failed;
			}
#ifndef Py_USING_UNICODE
		case 'u':
		case 'U':
		case 'N':
			if (unicode) {
				PyErr_SetString(PyExc_ValueError,
					  "Unicode escapes not legal "
					  "when Unicode disabled");
				goto failed;
			}
#endif
		default:
			*p++ = '\\';
			s--;
			goto non_esc; /* an arbitry number of unescaped
					 UTF-8 bytes may follow. */
		}
	}
	if (p-buf < newlen)
		_PyString_Resize(&v, p - buf);
	return v;
  failed:
	Py_DECREF(v);
	return NULL;
}

/* -------------------------------------------------------------------- */
/* object api */

static Py_ssize_t
string_getsize(register PyObject *op)
{
    	char *s;
    	Py_ssize_t len;
	if (PyString_AsStringAndSize(op, &s, &len))
		return -1;
	return len;
}

static /*const*/ char *
string_getbuffer(register PyObject *op)
{
    	char *s;
    	Py_ssize_t len;
	if (PyString_AsStringAndSize(op, &s, &len))
		return NULL;
	return s;
}

Py_ssize_t
PyString_Size(register PyObject *op)
{
	if (!PyString_Check(op))
		return string_getsize(op);
	return Py_SIZE(op);
}

/*const*/ char *
PyString_AsString(register PyObject *op)
{
	if (!PyString_Check(op))
		return string_getbuffer(op);
	return ((PyStringObject *)op) -> ob_sval;
}

int
PyString_AsStringAndSize(register PyObject *obj,
			 register char **s,
			 register Py_ssize_t *len)
{
	if (s == NULL) {
		PyErr_BadInternalCall();
		return -1;
	}

	if (!PyString_Check(obj)) {
#ifdef Py_USING_UNICODE
		if (PyUnicode_Check(obj)) {
			obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
			if (obj == NULL)
				return -1;
		}
		else
#endif
		{
			PyErr_Format(PyExc_TypeError,
				     "expected string or Unicode object, "
				     "%.200s found", Py_TYPE(obj)->tp_name);
			return -1;
		}
	}

	*s = PyString_AS_STRING(obj);
	if (len != NULL)
		*len = PyString_GET_SIZE(obj);
	else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
		PyErr_SetString(PyExc_TypeError,
				"expected string without null bytes");
		return -1;
	}
	return 0;
}

/* -------------------------------------------------------------------- */
/* Methods */

#include "stringlib/stringdefs.h"
#include "stringlib/fastsearch.h"

#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/partition.h"

#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
#include "stringlib/localeutil.h"



static int
string_print(PyStringObject *op, FILE *fp, int flags)
{
	Py_ssize_t i, str_len;
	char c;
	int quote;

	/* XXX Ought to check for interrupts when writing long strings */
	if (! PyString_CheckExact(op)) {
		int ret;
		/* A str subclass may have its own __str__ method. */
		op = (PyStringObject *) PyObject_Str((PyObject *)op);
		if (op == NULL)
			return -1;
		ret = string_print(op, fp, flags);
		Py_DECREF(op);
		return ret;
	}
	if (flags & Py_PRINT_RAW) {
		char *data = op->ob_sval;
		Py_ssize_t size = Py_SIZE(op);
		Py_BEGIN_ALLOW_THREADS
		while (size > INT_MAX) {
			/* Very long strings cannot be written atomically.
			 * But don't write exactly INT_MAX bytes at a time
			 * to avoid memory aligment issues.
			 */
			const int chunk_size = INT_MAX & ~0x3FFF;
			fwrite(data, 1, chunk_size, fp);
			data += chunk_size;
			size -= chunk_size;
		}
#ifdef __VMS
                if (size) fwrite(data, (int)size, 1, fp);
#else
                fwrite(data, 1, (int)size, fp);
#endif
		Py_END_ALLOW_THREADS
		return 0;
	}

	/* figure out which quote to use; single is preferred */
	quote = '\'';
	if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
	    !memchr(op->ob_sval, '"', Py_SIZE(op)))
		quote = '"';

	str_len = Py_SIZE(op);
	Py_BEGIN_ALLOW_THREADS
	fputc(quote, fp);
	for (i = 0; i < str_len; i++) {
		/* Since strings are immutable and the caller should have a
		reference, accessing the interal buffer should not be an issue
		with the GIL released. */
		c = op->ob_sval[i];
		if (c == quote || c == '\\')
			fprintf(fp, "\\%c", c);
                else if (c == '\t')
                        fprintf(fp, "\\t");
                else if (c == '\n')
                        fprintf(fp, "\\n");
                else if (c == '\r')
                        fprintf(fp, "\\r");
		else if (c < ' ' || c >= 0x7f)
			fprintf(fp, "\\x%02x", c & 0xff);
		else
			fputc(c, fp);
	}
	fputc(quote, fp);
	Py_END_ALLOW_THREADS
	return 0;
}

PyObject *
PyString_Repr(PyObject *obj, int smartquotes)
{
	register PyStringObject* op = (PyStringObject*) obj;
	size_t newsize = 2 + 4 * Py_SIZE(op);
	PyObject *v;
	if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
		PyErr_SetString(PyExc_OverflowError,
			"string is too large to make repr");
                return NULL;
	}
	v = PyString_FromStringAndSize((char *)NULL, newsize);
	if (v == NULL) {
		return NULL;
	}
	else {
		register Py_ssize_t i;
		register char c;
		register char *p;
		int quote;

		/* figure out which quote to use; single is preferred */
		quote = '\'';
		if (smartquotes &&
		    memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
		    !memchr(op->ob_sval, '"', Py_SIZE(op)))
			quote = '"';

		p = PyString_AS_STRING(v);
		*p++ = quote;
		for (i = 0; i < Py_SIZE(op); i++) {
			/* There's at least enough room for a hex escape
			   and a closing quote. */
			assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
			c = op->ob_sval[i];
			if (c == quote || c == '\\')
				*p++ = '\\', *p++ = c;
			else if (c == '\t')
				*p++ = '\\', *p++ = 't';
			else if (c == '\n')
				*p++ = '\\', *p++ = 'n';
			else if (c == '\r')
				*p++ = '\\', *p++ = 'r';
			else if (c < ' ' || c >= 0x7f) {
				/* For performance, we don't want to call
				   PyOS_snprintf here (extra layers of
				   function call). */
				sprintf(p, "\\x%02x", c & 0xff);
                                p += 4;
			}
			else
				*p++ = c;
		}
		assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
		*p++ = quote;
		*p = '\0';
		_PyString_Resize(
			&v, (p - PyString_AS_STRING(v)));
		return v;
	}
}

static PyObject *
string_repr(PyObject *op)
{
	return PyString_Repr(op, 1);
}

static PyObject *
string_str(PyObject *s)
{
	assert(PyString_Check(s));
	if (PyString_CheckExact(s)) {
		Py_INCREF(s);
		return s;
	}
	else {
		/* Subtype -- return genuine string with the same value. */
		PyStringObject *t = (PyStringObject *) s;
		return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
	}
}

static Py_ssize_t
string_length(PyStringObject *a)
{
	return Py_SIZE(a);
}

static PyObject *
string_concat(register PyStringObject *a, register PyObject *bb)
{
	register Py_ssize_t size;
	register PyStringObject *op;
	if (!PyString_Check(bb)) {
#ifdef Py_USING_UNICODE
		if (PyUnicode_Check(bb))
		    return PyUnicode_Concat((PyObject *)a, bb);
#endif
		if (PyByteArray_Check(bb))
		    return PyByteArray_Concat((PyObject *)a, bb);
		PyErr_Format(PyExc_TypeError,
			     "cannot concatenate 'str' and '%.200s' objects",
			     Py_TYPE(bb)->tp_name);
		return NULL;
	}
#define b ((PyStringObject *)bb)
	/* Optimize cases with empty left or right operand */
	if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
	    PyString_CheckExact(a) && PyString_CheckExact(b)) {
		if (Py_SIZE(a) == 0) {
			Py_INCREF(bb);
			return bb;
		}
		Py_INCREF(a);
		return (PyObject *)a;
	}
	size = Py_SIZE(a) + Py_SIZE(b);
	/* Check that string sizes are not negative, to prevent an
	   overflow in cases where we are passed incorrectly-created
	   strings with negative lengths (due to a bug in other code).
        */
	if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
	    Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
		PyErr_SetString(PyExc_OverflowError,
				"strings are too large to concat");
		return NULL;
	}
	  
	/* Inline PyObject_NewVar */
	if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
		PyErr_SetString(PyExc_OverflowError,
				"strings are too large to concat");
		return NULL;
	}
	op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
	if (op == NULL)
		return PyErr_NoMemory();
	PyObject_INIT_VAR(op, &PyString_Type, size);
	op->ob_shash = -1;
	op->ob_sstate = SSTATE_NOT_INTERNED;
	Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
	Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
	op->ob_sval[size] = '\0';
	return (PyObject *) op;
#undef b
}

static PyObject *
string_repeat(register PyStringObject *a, register Py_ssize_t n)
{
	register Py_ssize_t i;
	register Py_ssize_t j;
	register Py_ssize_t size;
	register PyStringObject *op;
	size_t nbytes;
	if (n < 0)
		n = 0;
	/* watch out for overflows:  the size can overflow int,
	 * and the # of bytes needed can overflow size_t
	 */
	size = Py_SIZE(a) * n;
	if (n && size / n != Py_SIZE(a)) {
		PyErr_SetString(PyExc_OverflowError,
			"repeated string is too long");
		return NULL;
	}
	if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
		Py_INCREF(a);
		return (PyObject *)a;
	}
	nbytes = (size_t)size;
	if (nbytes + sizeof(PyStringObject) <= nbytes) {
		PyErr_SetString(PyExc_OverflowError,
			"repeated string is too long");
		return NULL;
	}
	op = (PyStringObject *)
		PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
	if (op == NULL)
		return PyErr_NoMemory();
	PyObject_INIT_VAR(op, &PyString_Type, size);
	op->ob_shash = -1;
	op->ob_sstate = SSTATE_NOT_INTERNED;
	op->ob_sval[size] = '\0';
	if (Py_SIZE(a) == 1 && n > 0) {
		memset(op->ob_sval, a->ob_sval[0] , n);
		return (PyObject *) op;
	}
	i = 0;
	if (i < size) {
		Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
		i = Py_SIZE(a);
	}
	while (i < size) {
		j = (i <= size-i)  ?  i  :  size-i;
		Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
		i += j;
	}
	return (PyObject *) op;
}

/* String slice a[i:j] consists of characters a[i] ... a[j-1] */

static PyObject *
string_slice(register PyStringObject *a, register Py_ssize_t i,
	     register Py_ssize_t j)
     /* j -- may be negative! */
{
	if (i < 0)
		i = 0;
	if (j < 0)
		j = 0; /* Avoid signed/unsigned bug in next line */
	if (j > Py_SIZE(a))
		j = Py_SIZE(a);
	if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
		/* It's the same as a */
		Py_INCREF(a);
		return (PyObject *)a;
	}
	if (j < i)
		j = i;
	return PyString_FromStringAndSize(a->ob_sval + i, j-i);
}

static int
string_contains(PyObject *str_obj, PyObject *sub_obj)
{
	if (!PyString_CheckExact(sub_obj)) {
#ifdef Py_USING_UNICODE
		if (PyUnicode_Check(sub_obj))
			return PyUnicode_Contains(str_obj, sub_obj);
#endif
		if (!PyString_Check(sub_obj)) {
			PyErr_Format(PyExc_TypeError,
			    "'in <string>' requires string as left operand, "
			    "not %.200s", Py_TYPE(sub_obj)->tp_name);
			return -1;
		}
	}

	return stringlib_contains_obj(str_obj, sub_obj);
}

static PyObject *
string_item(PyStringObject *a, register Py_ssize_t i)
{
	char pchar;
	PyObject *v;
	if (i < 0 || i >= Py_SIZE(a)) {
		PyErr_SetString(PyExc_IndexError, "string index out of range");
		return NULL;
	}
	pchar = a->ob_sval[i];
	v = (PyObject *)characters[pchar & UCHAR_MAX];
	if (v == NULL)
		v = PyString_FromStringAndSize(&pchar, 1);
	else {
#ifdef COUNT_ALLOCS
		one_strings++;
#endif
		Py_INCREF(v);
	}
	return v;
}

static PyObject*
string_richcompare(PyStringObject *a, PyStringObject *b, int op)
{
	int c;
	Py_ssize_t len_a, len_b;
	Py_ssize_t min_len;
	PyObject *result;

	/* Make sure both arguments are strings. */
	if (!(PyString_Check(a) && PyString_Check(b))) {
		result = Py_NotImplemented;
		goto out;
	}
	if (a == b) {
		switch (op) {
		case Py_EQ:case Py_LE:case Py_GE:
			result = Py_True;
			goto out;
		case Py_NE:case Py_LT:case Py_GT:
			result = Py_False;
			goto out;
		}
	}
	if (op == Py_EQ) {
		/* Supporting Py_NE here as well does not save
		   much time, since Py_NE is rarely used.  */
		if (Py_SIZE(a) == Py_SIZE(b)
		    && (a->ob_sval[0] == b->ob_sval[0]
			&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
			result = Py_True;
		} else {
			result = Py_False;
		}
		goto out;
	}
	len_a = Py_SIZE(a); len_b = Py_SIZE(b);
	min_len = (len_a < len_b) ? len_a : len_b;
	if (min_len > 0) {
		c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
		if (c==0)
			c = memcmp(a->ob_sval, b->ob_sval, min_len);
	} else
		c = 0;
	if (c == 0)
		c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
	switch (op) {
	case Py_LT: c = c <  0; break;
	case Py_LE: c = c <= 0; break;
	case Py_EQ: assert(0);  break; /* unreachable */
	case Py_NE: c = c != 0; break;
	case Py_GT: c = c >  0; break;
	case Py_GE: c = c >= 0; break;
	default:
		result = Py_NotImplemented;
		goto out;
	}
	result = c ? Py_True : Py_False;
  out:
	Py_INCREF(result);
	return result;
}

int
_PyString_Eq(PyObject *o1, PyObject *o2)
{
	PyStringObject *a = (PyStringObject*) o1;
	PyStringObject *b = (PyStringObject*) o2;
        return Py_SIZE(a) == Py_SIZE(b)
          && *a->ob_sval == *b->ob_sval
          && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
}

static long
string_hash(PyStringObject *a)
{
	register Py_ssize_t len;
	register unsigned char *p;
	register long x;

	if (a->ob_shash != -1)
		return a->ob_shash;
	len = Py_SIZE(a);
	p = (unsigned char *) a->ob_sval;
	x = *p << 7;
	while (--len >= 0)
		x = (1000003*x) ^ *p++;
	x ^= Py_SIZE(a);
	if (x == -1)
		x = -2;
	a->ob_shash = x;
	return x;
}

static PyObject*
string_subscript(PyStringObject* self, PyObject* item)
{
	if (PyIndex_Check(item)) {
		Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
		if (i == -1 && PyErr_Occurred())
			return NULL;
		if (i < 0)
			i += PyString_GET_SIZE(self);
		return string_item(self, i);
	}
	else if (PySlice_Check(item)) {
		Py_ssize_t start, stop, step, slicelength, cur, i;
		char* source_buf;
		char* result_buf;
		PyObject* result;

		if (PySlice_GetIndicesEx((PySliceObject*)item,
				 PyString_GET_SIZE(self),
				 &start, &stop, &step, &slicelength) < 0) {
			return NULL;
		}

		if (slicelength <= 0) {
			return PyString_FromStringAndSize("", 0);
		}
		else if (start == 0 && step == 1 &&
			 slicelength == PyString_GET_SIZE(self) &&
			 PyString_CheckExact(self)) {
			Py_INCREF(self);
			return (PyObject *)self;
		}
		else if (step == 1) {
			return PyString_FromStringAndSize(
				PyString_AS_STRING(self) + start,
				slicelength);
		}
		else {
			source_buf = PyString_AsString((PyObject*)self);
			result_buf = (char *)PyMem_Malloc(slicelength);
			if (result_buf == NULL)
				return PyErr_NoMemory();

			for (cur = start, i = 0; i < slicelength;
			     cur += step, i++) {
				result_buf[i] = source_buf[cur];
			}

			result = PyString_FromStringAndSize(result_buf,
							    slicelength);
			PyMem_Free(result_buf);
			return result;
		}
	}
	else {
		PyErr_Format(PyExc_TypeError,
			     "string indices must be integers, not %.200s",
			     Py_TYPE(item)->tp_name);
		return NULL;
	}
}

static Py_ssize_t
string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
{
	if ( index != 0 ) {
		PyErr_SetString(PyExc_SystemError,
				"accessing non-existent string segment");
		return -1;
	}
	*ptr = (void *)self->ob_sval;
	return Py_SIZE(self);
}

static Py_ssize_t
string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
{
	PyErr_SetString(PyExc_TypeError,
			"Cannot use string as modifiable buffer");
	return -1;
}

static Py_ssize_t
string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
{
	if ( lenp )
		*lenp = Py_SIZE(self);
	return 1;
}

static Py_ssize_t
string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
{
	if ( index != 0 ) {
		PyErr_SetString(PyExc_SystemError,
				"accessing non-existent string segment");
		return -1;
	}
	*ptr = self->ob_sval;
	return Py_SIZE(self);
}

static int
string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
{
	return PyBuffer_FillInfo(view, (PyObject*)self,
				 (void *)self->ob_sval, Py_SIZE(self),
				 1, flags);
}

static PySequenceMethods string_as_sequence = {
	(lenfunc)string_length, /*sq_length*/
	(binaryfunc)string_concat, /*sq_concat*/
	(ssizeargfunc)string_repeat, /*sq_repeat*/
	(ssizeargfunc)string_item, /*sq_item*/
	(ssizessizeargfunc)string_slice, /*sq_slice*/
	0,		/*sq_ass_item*/
	0,		/*sq_ass_slice*/
	(objobjproc)string_contains /*sq_contains*/
};

static PyMappingMethods string_as_mapping = {
	(lenfunc)string_length,
	(binaryfunc)string_subscript,
	0,
};

static PyBufferProcs string_as_buffer = {
	(readbufferproc)string_buffer_getreadbuf,
	(writebufferproc)string_buffer_getwritebuf,
	(segcountproc)string_buffer_getsegcount,
	(charbufferproc)string_buffer_getcharbuf,
	(getbufferproc)string_buffer_getbuffer,
	0, /* XXX */
};



#define LEFTSTRIP 0
#define RIGHTSTRIP 1
#define BOTHSTRIP 2

/* Arrays indexed by above */
static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};

#define STRIPNAME(i) (stripformat[i]+3)


/* Don't call if length < 2 */
#define Py_STRING_MATCH(target, offset, pattern, length)	\
  (target[offset] == pattern[0] &&				\
   target[offset+length-1] == pattern[length-1] &&		\
   !memcmp(target+offset+1, pattern+1, length-2) )


/* Overallocate the initial list to reduce the number of reallocs for small
   split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three
   resizes, to sizes 4, 8, then 16.  Most observed string splits are for human
   text (roughly 11 words per line) and field delimited data (usually 1-10
   fields).  For large strings the split algorithms are bandwidth limited
   so increasing the preallocation likely will not improve things.*/

#define MAX_PREALLOC 12

/* 5 splits gives 6 elements */
#define PREALLOC_SIZE(maxsplit) \
	(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)

#define SPLIT_APPEND(data, left, right)				\
	str = PyString_FromStringAndSize((data) + (left),	\
					 (right) - (left));	\
	if (str == NULL)					\
		goto onError;					\
	if (PyList_Append(list, str)) {				\
		Py_DECREF(str);					\
		goto onError;					\
	}							\
	else							\
		Py_DECREF(str);

#define SPLIT_ADD(data, left, right) {				\
	str = PyString_FromStringAndSize((data) + (left),	\
					 (right) - (left));	\
	if (str == NULL)					\
		goto onError;					\
	if (count < MAX_PREALLOC) {				\
		PyList_SET_ITEM(list, count, str);		\
	} else {						\
		if (PyList_Append(list, str)) {			\
			Py_DECREF(str);				\
			goto onError;				\
		}						\
		else						\
			Py_DECREF(str);				\
	}							\
	count++; }

/* Always force the list to the expected size. */
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count

#define SKIP_SPACE(s, i, len)    { while (i<len &&  isspace(Py_CHARMASK(s[i]))) i++; }
#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
#define RSKIP_SPACE(s, i)        { while (i>=0  &&  isspace(Py_CHARMASK(s[i]))) i--; }
#define RSKIP_NONSPACE(s, i)     { while (i>=0  && !isspace(Py_CHARMASK(s[i]))) i--; }

Py_LOCAL_INLINE(PyObject *)
split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
{
	const char *s = PyString_AS_STRING(self);
	Py_ssize_t i, j, count=0;
	PyObject *str;
	PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));

	if (list == NULL)
		return NULL;

	i = j = 0;

	while (maxsplit-- > 0) {
		SKIP_SPACE(s, i, len);
		if (i==len) break;
		j = i; i++;
		SKIP_NONSPACE(s, i, len);
		if (j == 0 && i == len && PyString_CheckExact(self)) {
			/* No whitespace in self, so just use it as list[0] */
			Py_INCREF(self);
			PyList_SET_ITEM(list, 0, (PyObject *)self);
			count++;
			break;
		}
		SPLIT_ADD(s, j, i);
	}

	if (i < len) {
		/* Only occurs when maxsplit was reached */
		/* Skip any remaining whitespace and copy to end of string */
		SKIP_SPACE(s, i, len);
		if (i != len)
			SPLIT_ADD(s, i, len);
	}
	FIX_PREALLOC_SIZE(list);
	return list;
  onError:
	Py_DECREF(list);
	return NULL;
}

Py_LOCAL_INLINE(PyObject *)
split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{
	const char *s = PyString_AS_STRING(self);
	register Py_ssize_t i, j, count=0;
	PyObject *str;
	PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));

	if (list == NULL)
		return NULL;

	i = j = 0;
	while ((j < len) && (maxcount-- > 0)) {
		for(; j<len; j++) {
			/* I found that using memchr makes no difference */
			if (s[j] == ch) {
				SPLIT_ADD(s, i, j);
				i = j = j + 1;
				break;
			}
		}
	}
	if (i == 0 && count == 0 && PyString_CheckExact(self)) {
		/* ch not in self, so just use self as list[0] */
		Py_INCREF(self);
		PyList_SET_ITEM(list, 0, (PyObject *)self);
		count++;
	}
	else if (i <= len) {
		SPLIT_ADD(s, i, len);
	}
	FIX_PREALLOC_SIZE(list);
	return list;

  onError:
	Py_DECREF(list);
	return NULL;
}

PyDoc_STRVAR(split__doc__,
"S.split([sep [,maxsplit]]) -> list of strings\n\
\n\
Return a list of the words in the string S, using sep as the\n\
delimiter string.  If maxsplit is given, at most maxsplit\n\
splits are done. If sep is not specified or is None, any\n\
whitespace string is a separator and empty strings are removed\n\
from the result.");

static PyObject *
string_split(PyStringObject *self, PyObject *args)
{
	Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
	Py_ssize_t maxsplit = -1, count=0;
	const char *s = PyString_AS_STRING(self), *sub;
	PyObject *list, *str, *subobj = Py_None;
#ifdef USE_FAST
	Py_ssize_t pos;
#endif

	if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
		return NULL;
	if (maxsplit < 0)
		maxsplit = PY_SSIZE_T_MAX;
	if (subobj == Py_None)
		return split_whitespace(self, len, maxsplit);
	if (PyString_Check(subobj)) {
		sub = PyString_AS_STRING(subobj);
		n = PyString_GET_SIZE(subobj);
	}
#ifdef Py_USING_UNICODE
	else if (PyUnicode_Check(subobj))
		return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
#endif
	else if (PyObject_AsCharBuffer(subobj, &sub, &n))
		return NULL;

	if (n == 0) {
		PyErr_SetString(PyExc_ValueError, "empty separator");
		return NULL;
	}
	else if (n == 1)
		return split_char(self, len, sub[0], maxsplit);

	list = PyList_New(PREALLOC_SIZE(maxsplit));
	if (list == NULL)
		return NULL;

#ifdef USE_FAST
	i = j = 0;
	while (maxsplit-- > 0) {
		pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
		if (pos < 0)
			break;
		j = i+pos;
		SPLIT_ADD(s, i, j);
		i = j + n;
	}
#else
	i = j = 0;
	while ((j+n <= len) && (maxsplit-- > 0)) {
		for (; j+n <= len; j++) {
			if (Py_STRING_MATCH(s, j, sub, n)) {
				SPLIT_ADD(s, i, j);
				i = j = j + n;
				break;
			}
		}
	}
#endif
	SPLIT_ADD(s, i, len);
	FIX_PREALLOC_SIZE(list);
	return list;

 onError:
	Py_DECREF(list);
	return NULL;
}

PyDoc_STRVAR(partition__doc__,
"S.partition(sep) -> (head, sep, tail)\n\
\n\
Search for the separator sep in S, and return the part before it,\n\
the separator itself, and the part after it.  If the separator is not\n\
found, return S and two empty strings.");

static PyObject *
string_partition(PyStringObject *self, PyObject *sep_obj)
{
	const char *sep;
	Py_ssize_t sep_len;

	if (PyString_Check(sep_obj)) {
		sep = PyString_AS_STRING(sep_obj);
		sep_len = PyString_GET_SIZE(sep_obj);
	}
#ifdef Py_USING_UNICODE
	else if (PyUnicode_Check(sep_obj))
		return PyUnicode_Partition((PyObject *) self, sep_obj);
#endif
	else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
		return NULL;

	return stringlib_partition(
		(PyObject*) self,
		PyString_AS_STRING(self), PyString_GET_SIZE(self),
		sep_obj, sep, sep_len
		);
}

PyDoc_STRVAR(rpartition__doc__,
"S.rpartition(sep) -> (tail, sep, head)\n\
\n\
Search for the separator sep in S, starting at the end of S, and return\n\
the part before it, the separator itself, and the part after it.  If the\n\
separator is not found, return two empty strings and S.");

static PyObject *
string_rpartition(PyStringObject *self, PyObject *sep_obj)
{
	const char *sep;
	Py_ssize_t sep_len;

	if (PyString_Check(sep_obj)) {
		sep = PyString_AS_STRING(sep_obj);
		sep_len = PyString_GET_SIZE(sep_obj);
	}
#ifdef Py_USING_UNICODE
	else if (PyUnicode_Check(sep_obj))
		return PyUnicode_RPartition((PyObject *) self, sep_obj);
#endif
	else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
		return NULL;

	return stringlib_rpartition(
		(PyObject*) self,
		PyString_AS_STRING(self), PyString_GET_SIZE(self),
		sep_obj, sep, sep_len
		);
}

Py_LOCAL_INLINE(PyObject *)
rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
{
	const char *s = PyString_AS_STRING(self);
	Py_ssize_t i, j, count=0;
	PyObject *str;
	PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));

	if (list == NULL)
		return NULL;

	i = j = len-1;

	while (maxsplit-- > 0) {
		RSKIP_SPACE(s, i);
		if (i<0) break;
		j = i; i--;
		RSKIP_NONSPACE(s, i);
		if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
			/* No whitespace in self, so just use it as list[0] */
			Py_INCREF(self);
			PyList_SET_ITEM(list, 0, (PyObject *)self);
			count++;
			break;
		}
		SPLIT_ADD(s, i + 1, j + 1);
	}
	if (i >= 0) {
		/* Only occurs when maxsplit was reached */
		/* Skip any remaining whitespace and copy to beginning of string */
		RSKIP_SPACE(s, i);
		if (i >= 0)
			SPLIT_ADD(s, 0, i + 1);

	}
	FIX_PREALLOC_SIZE(list);
	if (PyList_Reverse(list) < 0)
		goto onError;
	return list;
  onError:
	Py_DECREF(list);
	return NULL;
}

Py_LOCAL_INLINE(PyObject *)
rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{
	const char *s = PyString_AS_STRING(self);
	register Py_ssize_t i, j, count=0;
	PyObject *str;
	PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));

	if (list == NULL)
		return NULL;

	i = j = len - 1;
	while ((i >= 0) && (maxcount-- > 0)) {
		for (; i >= 0; i--) {
			if (s[i] == ch) {
				SPLIT_ADD(s, i + 1, j + 1);
				j = i = i - 1;
				break;
			}
		}
	}
	if (i < 0 && count == 0 && PyString_CheckExact(self)) {
		/* ch not in self, so just use self as list[0] */
		Py_INCREF(self);
		PyList_SET_ITEM(list, 0, (PyObject *)self);
		count++;
	}
	else if (j >= -1) {
		SPLIT_ADD(s, 0, j + 1);
	}
	FIX_PREALLOC_SIZE(list);
	if (PyList_Reverse(list) < 0)
		goto onError;
	return list;

 onError:
	Py_DECREF(list);
	return NULL;
}

PyDoc_STRVAR(rsplit__doc__,
"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
\n\
Return a list of the words in the string S, using sep as the\n\
delimiter string, starting at the end of the string and working\n\
to the front.  If maxsplit is given, at most maxsplit splits are\n\
done. If sep is not specified or is None, any whitespace string\n\
is a separator.");

static PyObject *
string_rsplit(PyStringObject *self, PyObject *args)
{
	Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
	Py_ssize_t maxsplit = -1, count=0;
	const char *s, *sub;
	PyObject *list, *str, *subobj = Py_None;

	if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
		return NULL;
	if (maxsplit < 0)
		maxsplit = PY_SSIZE_T_MAX;
	if (subobj == Py_None)
		return rsplit_whitespace(self, len, maxsplit);
	if (PyString_Check(subobj)) {
		sub = PyString_AS_STRING(subobj);
		n = PyString_GET_SIZE(subobj);
	}
#ifdef Py_USING_UNICODE
	else if (PyUnicode_Check(subobj))
		return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
#endif
	else if (PyObject_AsCharBuffer(subobj, &sub, &n))
		return NULL;

	if (n == 0) {
		PyErr_SetString(PyExc_ValueError, "empty separator");
		return NULL;
	}
	else if (n == 1)
		return rsplit_char(self, len, sub[0], maxsplit);

	list = PyList_New(PREALLOC_SIZE(maxsplit));
	if (list == NULL)
		return NULL;

	j = len;
	i = j - n;

	s = PyString_AS_STRING(self);
	while ( (i >= 0) && (maxsplit-- > 0) ) {
		for (; i>=0; i--) {
			if (Py_STRING_MATCH(s, i, sub, n)) {
				SPLIT_ADD(s, i + n, j);
				j = i;
				i -= n;
				break;
			}
		}
	}
	SPLIT_ADD(s, 0, j);
	FIX_PREALLOC_SIZE(list);
	if (PyList_Reverse(list) < 0)
		goto onError;
	return list;

onError:
	Py_DECREF(list);
	return NULL;
}


PyDoc_STRVAR(join__doc__,
"S.join(sequence) -> string\n\
\n\
Return a string which is the concatenation of the strings in the\n\
sequence.  The separator between elements is S.");

static PyObject *
string_join(PyStringObject *self, PyObject *orig)
{
	char *sep = PyString_AS_STRING(self);
	const Py_ssize_t seplen = PyString_GET_SIZE(self);
	PyObject *res = NULL;
	char *p;
	Py_ssize_t seqlen = 0;
	size_t sz = 0;
	Py_ssize_t i;
	PyObject *seq, *item;

	seq = PySequence_Fast(orig, "");
	if (seq == NULL) {
		return NULL;
	}

	seqlen = PySequence_Size(seq);
	if (seqlen == 0) {
		Py_DECREF(seq);
		return PyString_FromString("");
	}
	if (seqlen == 1) {
		item = PySequence_Fast_GET_ITEM(seq, 0);
		if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
			Py_INCREF(item);
			Py_DECREF(seq);
			return item;
		}
	}

	/* There are at least two things to join, or else we have a subclass
	 * of the builtin types in the sequence.
	 * Do a pre-pass to figure out the total amount of space we'll
	 * need (sz), see whether any argument is absurd, and defer to
	 * the Unicode join if appropriate.
	 */
	for (i = 0; i < seqlen; i++) {
		const size_t old_sz = sz;
		item = PySequence_Fast_GET_ITEM(seq, i);
		if (!PyString_Check(item)){
#ifdef Py_USING_UNICODE
			if (PyUnicode_Check(item)) {
				/* Defer to Unicode join.
				 * CAUTION:  There's no gurantee that the
				 * original sequence can be iterated over
				 * again, so we must pass seq here.
				 */
				PyObject *result;
				result = PyUnicode_Join((PyObject *)self, seq);
				Py_DECREF(seq);
				return result;
			}
#endif
			PyErr_Format(PyExc_TypeError,
				     "sequence item %zd: expected string,"
				     " %.80s found",
				     i, Py_TYPE(item)->tp_name);
			Py_DECREF(seq);
			return NULL;
		}
		sz += PyString_GET_SIZE(item);
		if (i != 0)
			sz += seplen;
		if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
			PyErr_SetString(PyExc_OverflowError,
				"join() result is too long for a Python string");
			Py_DECREF(seq);
			return NULL;
		}
	}

	/* Allocate result space. */
	res = PyString_FromStringAndSize((char*)NULL, sz);
	if (res == NULL) {
		Py_DECREF(seq);
		return NULL;
	}

	/* Catenate everything. */
	p = PyString_AS_STRING(res);
	for (i = 0; i < seqlen; ++i) {
		size_t n;
		item = PySequence_Fast_GET_ITEM(seq, i);
		n = PyString_GET_SIZE(item);
		Py_MEMCPY(p, PyString_AS_STRING(item), n);
		p += n;
		if (i < seqlen - 1) {
			Py_MEMCPY(p, sep, seplen);
			p += seplen;
		}
	}

	Py_DECREF(seq);
	return res;
}

PyObject *
_PyString_Join(PyObject *sep, PyObject *x)
{
	assert(sep != NULL && PyString_Check(sep));
	assert(x != NULL);
	return string_join((PyStringObject *)sep, x);
}

Py_LOCAL_INLINE(void)
string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
{
	if (*end > len)
		*end = len;
	else if (*end < 0)
		*end += len;
	if (*end < 0)
		*end = 0;
	if (*start < 0)
		*start += len;
	if (*start < 0)
		*start = 0;
}

Py_LOCAL_INLINE(Py_ssize_t)
string_find_internal(PyStringObject *self, PyObject *args, int dir)
{
	PyObject *subobj;
	const char *sub;
	Py_ssize_t sub_len;
	Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
	PyObject *obj_start=Py_None, *obj_end=Py_None;

	if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
		&obj_start, &obj_end))
		return -2;
	/* To support None in "start" and "end" arguments, meaning
	   the same as if they were not passed.
	*/
	if (obj_start != Py_None)
		if (!_PyEval_SliceIndex(obj_start, &start))
	        return -2;
	if (obj_end != Py_None)
		if (!_PyEval_SliceIndex(obj_end, &end))
	        return -2;

	if (PyString_Check(subobj)) {
		sub = PyString_AS_STRING(subobj);
		sub_len = PyString_GET_SIZE(subobj);
	}
#ifdef Py_USING_UNICODE
	else if (PyUnicode_Check(subobj))
		return PyUnicode_Find(
			(PyObject *)self, subobj, start, end, dir);
#endif
	else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
		/* XXX - the "expected a character buffer object" is pretty
		   confusing for a non-expert.  remap to something else ? */
		return -2;

	if (dir > 0)
		return stringlib_find_slice(
			PyString_AS_STRING(self), PyString_GET_SIZE(self),
			sub, sub_len, start, end);
	else
		return stringlib_rfind_slice(
			PyString_AS_STRING(self), PyString_GET_SIZE(self),
			sub, sub_len, start, end);
}


PyDoc_STRVAR(find__doc__,
"S.find(sub [,start [,end]]) -> int\n\
\n\
Return the lowest index in S where substring sub is found,\n\
such that sub is contained within s[start:end].  Optional\n\
arguments start and end are interpreted as in slice notation.\n\
\n\
Return -1 on failure.");

static PyObject *
string_find(PyStringObject *self, PyObject *args)
{
	Py_ssize_t result = string_find_internal(self, args, +1);
	if (result == -2)
		return NULL;
	return PyInt_FromSsize_t(result);
}


PyDoc_STRVAR(index__doc__,
"S.index(sub [,start [,end]]) -> int\n\
\n\
Like S.find() but raise ValueError when the substring is not found.");

static PyObject *
string_index(PyStringObject *self, PyObject *args)
{
	Py_ssize_t result = string_find_internal(self, args, +1);
	if (result == -2)
		return NULL;
	if (result == -1) {
		PyErr_SetString(PyExc_ValueError,
				"substring not found");
		return NULL;
	}
	return PyInt_FromSsize_t(result);
}


PyDoc_STRVAR(rfind__doc__,
"S.rfind(sub [,start [,end]]) -> int\n\
\n\
Return the highest index in S where substring sub is found,\n\
such that sub is contained within s[start:end].  Optional\n\
arguments start and end are interpreted as in slice notation.\n\
\n\
Return -1 on failure.");

static PyObject *
string_rfind(PyStringObject *self, PyObject *args)
{
	Py_ssize_t result = string_find_internal(self, args, -1);
	if (result == -2)
		return NULL;
	return PyInt_FromSsize_t(result);
}


PyDoc_STRVAR(rindex__doc__,
"S.rindex(sub [,start [,end]]) -> int\n\
\n\
Like S.rfind() but raise ValueError when the substring is not found.");

static PyObject *
string_rindex(PyStringObject *self, PyObject *args)
{
	Py_ssize_t result = string_find_internal(self, args, -1);
	if (result == -2)
		return NULL;
	if (result == -1) {
		PyErr_SetString(PyExc_ValueError,
				"substring not found");
		return NULL;
	}
	return PyInt_FromSsize_t(result);
}


Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
{
	char *s = PyString_AS_STRING(self);
	Py_ssize_…
Summary ✨

This C code is part of the Python interpreter, specifically related to string management. It handles the interned strings dictionary, which stores unique strings and their references. The code updates the dictionary, releases references back to the strings, and clears the dictionary to prevent memory leaks. It also checks for inconsistent string states and reports errors if necessary.
Alerts (4)

Complexity hotspot; lines 1426 to 1429 (total complexity: 8)
1426 1427 1428 1429