blob: 3aee28eb654b20f549dd452edcd4bb88a4b1c7eb [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossum98297ee2007-11-06 21:34:58 +00003/* XXX This is now called 'bytes' as far as the user is concerned.
4 Many docstrings and error messages need to be cleaned up. */
5
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00006#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00008#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000011
Guido van Rossum98297ee2007-11-06 21:34:58 +000012static Py_ssize_t
13_getbuffer(PyObject *obj, Py_buffer *view)
14{
Christian Heimes90aa7642007-12-19 02:45:37 +000015 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossum98297ee2007-11-06 21:34:58 +000016
17 if (buffer == NULL || buffer->bf_getbuffer == NULL)
18 {
19 PyErr_Format(PyExc_TypeError,
20 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000021 Py_TYPE(obj)->tp_name);
Guido van Rossum98297ee2007-11-06 21:34:58 +000022 return -1;
23 }
24
25 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
26 return -1;
27 return view->len;
28}
29
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000030#ifdef COUNT_ALLOCS
31int null_strings, one_strings;
32#endif
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000035static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036
37/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000038 For both PyString_FromString() and PyString_FromStringAndSize(), the
39 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000040 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000041
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000042 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000043 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 For PyString_FromStringAndSize(), the parameter the parameter `str' is
46 either NULL or else points to a string containing at least `size' bytes.
47 For PyString_FromStringAndSize(), the string in the `str' parameter does
48 not have to be null-terminated. (Therefore it is safe to construct a
49 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
50 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
51 bytes (setting the last byte to the null terminating character) and you can
52 fill in the data yourself. If `str' is non-NULL then the resulting
53 PyString object must be treated as immutable and you must not fill in nor
54 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000055
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000056 The PyObject member `op->ob_size', which denotes the number of "extra
57 items" in a variable-size object, will contain the number of bytes
58 allocated for string data, not counting the null terminating character. It
59 is therefore equal to the equal to the `size' parameter (for
60 PyString_FromStringAndSize()) or the length of the string in the `str'
61 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000064PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065{
Tim Peters9e897f42001-05-09 07:37:07 +000066 register PyStringObject *op;
Christian Heimes33fe8092008-04-13 13:53:33 +000067 if (size < 0) {
68 PyErr_SetString(PyExc_SystemError,
69 "Negative size passed to PyString_FromStringAndSize");
70 return NULL;
71 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 if (size == 0 && (op = nullstring) != NULL) {
73#ifdef COUNT_ALLOCS
74 null_strings++;
75#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 Py_INCREF(op);
77 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000079 if (size == 1 && str != NULL &&
80 (op = characters[*str & UCHAR_MAX]) != NULL)
81 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082#ifdef COUNT_ALLOCS
83 one_strings++;
84#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 Py_INCREF(op);
86 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000088
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000089 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000090 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000091 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000093 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 op->ob_shash = -1;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000095 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000096 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000097 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000098 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000099 if (size == 0) {
100 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000102 } else if (size == 1 && str != NULL) {
103 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000105 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000106 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107}
108
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000109PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000110PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000111{
Tim Peters62de65b2001-12-06 20:29:32 +0000112 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000113 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000114
115 assert(str != NULL);
116 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000117 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000118 PyErr_SetString(PyExc_OverflowError,
119 "string is too long for a Python string");
120 return NULL;
121 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000122 if (size == 0 && (op = nullstring) != NULL) {
123#ifdef COUNT_ALLOCS
124 null_strings++;
125#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000128 }
129 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
130#ifdef COUNT_ALLOCS
131 one_strings++;
132#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 Py_INCREF(op);
134 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000136
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000137 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000138 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000139 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000140 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000141 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 op->ob_shash = -1;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000143 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000144 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 if (size == 0) {
146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
149 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Barry Warsawdadace02001-08-24 18:32:06 +0000155PyObject *
156PyString_FromFormatV(const char *format, va_list vargs)
157{
Tim Petersc15c4f12001-10-02 21:32:07 +0000158 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000159 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000160 const char* f;
161 char *s;
162 PyObject* string;
163
Tim Petersc15c4f12001-10-02 21:32:07 +0000164#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000165 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000166#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000167#ifdef __va_copy
168 __va_copy(count, vargs);
169#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000170 count = vargs;
171#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000177 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000178 ;
179
Thomas Wouters477c8d52006-05-27 19:21:47 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000185 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000186
Barry Warsawdadace02001-08-24 18:32:06 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000194 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000195 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000210 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000220 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
227 expand:
228 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000231 string = PyString_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000234
Barry Warsawdadace02001-08-24 18:32:06 +0000235 s = PyString_AsString(string);
236
237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000240 Py_ssize_t i;
241 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000242 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000246 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000251 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000252 n = (n*10) + *f++ - '0';
253 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000254 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000255 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000259 longflag = 1;
260 ++f;
261 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000262 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000267
Barry Warsawdadace02001-08-24 18:32:06 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000275 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000289 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000307 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
Barry Warsawdadace02001-08-24 18:32:06 +0000320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000333
Barry Warsawdadace02001-08-24 18:32:06 +0000334 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000335 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000336 return string;
337}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000340PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000341{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000342 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000343 va_list vargs;
344
345#ifdef HAVE_STDARG_PROTOTYPES
346 va_start(vargs, format);
347#else
348 va_start(vargs);
349#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000350 ret = PyString_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000353}
354
Guido van Rossum234f9421993-06-17 12:35:49 +0000355static void
Fred Drakeba096332000-07-09 07:04:36 +0000356string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000357{
Christian Heimes90aa7642007-12-19 02:45:37 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000359}
360
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000367 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000368 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000369 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000370 const char *recode_encoding)
371{
372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000377 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000378 if (v == NULL)
379 return NULL;
380 p = buf = PyString_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000384 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000389 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
395
396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
401
402 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000403 assert(PyString_Check(w));
404 r = PyString_AS_STRING(w);
405 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000406 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000413 continue;
414 }
415 s++;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000416 if (s==end) {
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000437 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000438 c = (c<<3) + *s++ - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000439 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000445 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000449 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000450 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000451 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000458 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000459 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000460 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000468 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000469 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000470 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000478 "decoding error; unknown "
479 "error handling code: %.400s",
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000480 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000481 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000482 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000483 default:
484 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000485 s--;
486 goto non_esc; /* an arbitry number of unescaped
487 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000488 }
489 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000490 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000491 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000492 return v;
493 failed:
494 Py_DECREF(v);
495 return NULL;
496}
497
Thomas Wouters477c8d52006-05-27 19:21:47 +0000498/* -------------------------------------------------------------------- */
499/* object api */
500
Martin v. Löwis18e16552006-02-15 17:27:45 +0000501Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000502PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000503{
Alexandre Vassalottiad433db2008-01-07 02:06:10 +0000504 if (!PyString_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000509 return Py_SIZE(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510}
511
Alexandre Vassalottiad433db2008-01-07 02:06:10 +0000512char *
Fred Drakeba096332000-07-09 07:04:36 +0000513PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514{
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000515 if (!PyString_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
Christian Heimes90aa7642007-12-19 02:45:37 +0000517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000518 return NULL;
519 }
Alexandre Vassalottiad433db2008-01-07 02:06:10 +0000520 return ((PyStringObject *)op)->ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521}
522
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000523int
524PyString_AsStringAndSize(register PyObject *obj,
525 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000526 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000527{
528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
532
533 if (!PyString_Check(obj)) {
Christian Heimesf3863112007-11-22 07:46:41 +0000534 PyErr_Format(PyExc_TypeError,
Christian Heimes90aa7642007-12-19 02:45:37 +0000535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
Christian Heimesf3863112007-11-22 07:46:41 +0000536 return -1;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000537 }
538
539 *s = PyString_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000542 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000543 PyErr_SetString(PyExc_TypeError,
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000544 "expected bytes with no null");
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000545 return -1;
546 }
547 return 0;
548}
549
Thomas Wouters477c8d52006-05-27 19:21:47 +0000550/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000551/* Methods */
552
Thomas Wouters477c8d52006-05-27 19:21:47 +0000553#define STRINGLIB_CHAR char
554
555#define STRINGLIB_CMP memcmp
556#define STRINGLIB_LEN PyString_GET_SIZE
557#define STRINGLIB_NEW PyString_FromStringAndSize
558#define STRINGLIB_STR PyString_AS_STRING
Guido van Rossum98297ee2007-11-06 21:34:58 +0000559/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000560
561#define STRINGLIB_EMPTY nullstring
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000562#define STRINGLIB_CHECK_EXACT PyString_CheckExact
563#define STRINGLIB_MUTABLE 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000564
565#include "stringlib/fastsearch.h"
566
567#include "stringlib/count.h"
568#include "stringlib/find.h"
569#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000570#include "stringlib/ctype.h"
571#include "stringlib/transmogrify.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000572
Eric Smith5807c412008-05-11 21:00:57 +0000573#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
574#include "stringlib/localeutil.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000575
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000576PyObject *
577PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000578{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000579 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000580 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes90aa7642007-12-19 02:45:37 +0000581 Py_ssize_t length = Py_SIZE(op);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000582 size_t newsize = 3 + 4 * length;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000583 PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000584 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000585 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000586 "bytes object is too large to make repr");
Guido van Rossum58da9312007-11-10 23:39:45 +0000587 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000588 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000589 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000590 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000591 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000592 }
593 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000594 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000595 register Py_UNICODE c;
596 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000597 int quote;
598
Guido van Rossum98297ee2007-11-06 21:34:58 +0000599 /* Figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000600 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000601 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000602 char *test, *start;
603 start = PyString_AS_STRING(op);
604 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000605 if (*test == '"') {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000606 quote = '\''; /* back to single */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000607 goto decided;
608 }
609 else if (*test == '\'')
610 quote = '"';
611 }
612 decided:
613 ;
614 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000615
Guido van Rossum98297ee2007-11-06 21:34:58 +0000616 *p++ = 'b', *p++ = quote;
617 for (i = 0; i < length; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000618 /* There's at least enough room for a hex escape
619 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000620 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000621 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000622 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000624 else if (c == '\t')
625 *p++ = '\\', *p++ = 't';
626 else if (c == '\n')
627 *p++ = '\\', *p++ = 'n';
628 else if (c == '\r')
629 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000630 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000631 *p++ = '\\';
632 *p++ = 'x';
633 *p++ = hexdigits[(c & 0xf0) >> 4];
634 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000635 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000636 else
637 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000638 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000639 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000640 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000641 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000642 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
643 Py_DECREF(v);
644 return NULL;
645 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000646 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000647 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000648}
649
Guido van Rossum189f1df2001-05-01 16:51:53 +0000650static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000651string_repr(PyObject *op)
652{
653 return PyString_Repr(op, 1);
654}
655
656static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000657string_str(PyObject *op)
Guido van Rossum189f1df2001-05-01 16:51:53 +0000658{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000659 if (Py_BytesWarningFlag) {
660 if (PyErr_WarnEx(PyExc_BytesWarning,
661 "str() on a bytes instance", 1))
662 return NULL;
Tim Petersc9933152001-10-16 20:18:24 +0000663 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000664 return string_repr(op);
Guido van Rossum189f1df2001-05-01 16:51:53 +0000665}
666
Martin v. Löwis18e16552006-02-15 17:27:45 +0000667static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000668string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669{
Christian Heimes90aa7642007-12-19 02:45:37 +0000670 return Py_SIZE(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000671}
672
Guido van Rossum98297ee2007-11-06 21:34:58 +0000673/* This is also used by PyString_Concat() */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000674static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000675string_concat(PyObject *a, PyObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000676{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000677 Py_ssize_t size;
678 Py_buffer va, vb;
679 PyObject *result = NULL;
680
681 va.len = -1;
682 vb.len = -1;
683 if (_getbuffer(a, &va) < 0 ||
684 _getbuffer(b, &vb) < 0) {
685 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Christian Heimes90aa7642007-12-19 02:45:37 +0000686 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000687 goto done;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000688 }
Guido van Rossumae404e22007-10-26 21:46:44 +0000689
Guido van Rossum98297ee2007-11-06 21:34:58 +0000690 /* Optimize end cases */
691 if (va.len == 0 && PyString_CheckExact(b)) {
692 result = b;
693 Py_INCREF(result);
694 goto done;
695 }
696 if (vb.len == 0 && PyString_CheckExact(a)) {
697 result = a;
698 Py_INCREF(result);
699 goto done;
700 }
701
702 size = va.len + vb.len;
703 if (size < 0) {
704 PyErr_NoMemory();
705 goto done;
706 }
707
708 result = PyString_FromStringAndSize(NULL, size);
709 if (result != NULL) {
710 memcpy(PyString_AS_STRING(result), va.buf, va.len);
711 memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
712 }
713
714 done:
715 if (va.len != -1)
716 PyObject_ReleaseBuffer(a, &va);
717 if (vb.len != -1)
718 PyObject_ReleaseBuffer(b, &vb);
719 return result;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000720}
721
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000722static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000723string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000724{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000725 register Py_ssize_t i;
726 register Py_ssize_t j;
727 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000729 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000730 if (n < 0)
731 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000732 /* watch out for overflows: the size can overflow int,
733 * and the # of bytes needed can overflow size_t
734 */
Christian Heimes90aa7642007-12-19 02:45:37 +0000735 size = Py_SIZE(a) * n;
736 if (n && size / n != Py_SIZE(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000737 PyErr_SetString(PyExc_OverflowError,
738 "repeated string is too long");
739 return NULL;
740 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000741 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000742 Py_INCREF(a);
743 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000744 }
Tim Peterse7c05322004-06-27 17:24:49 +0000745 nbytes = (size_t)size;
746 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000747 PyErr_SetString(PyExc_OverflowError,
748 "repeated string is too long");
749 return NULL;
750 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000751 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000752 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000753 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000754 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000755 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000756 op->ob_shash = -1;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000757 op->ob_sval[size] = '\0';
Christian Heimes90aa7642007-12-19 02:45:37 +0000758 if (Py_SIZE(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000759 memset(op->ob_sval, a->ob_sval[0] , n);
760 return (PyObject *) op;
761 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000762 i = 0;
763 if (i < size) {
Christian Heimes90aa7642007-12-19 02:45:37 +0000764 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
765 i = Py_SIZE(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000766 }
767 while (i < size) {
768 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000769 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000770 i += j;
771 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000772 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000773}
774
Guido van Rossum9284a572000-03-07 15:53:43 +0000775static int
Guido van Rossum98297ee2007-11-06 21:34:58 +0000776string_contains(PyObject *self, PyObject *arg)
Guido van Rossum9284a572000-03-07 15:53:43 +0000777{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000778 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
779 if (ival == -1 && PyErr_Occurred()) {
780 Py_buffer varg;
781 int pos;
782 PyErr_Clear();
783 if (_getbuffer(arg, &varg) < 0)
784 return -1;
Christian Heimes90aa7642007-12-19 02:45:37 +0000785 pos = stringlib_find(PyString_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000786 varg.buf, varg.len, 0);
787 PyObject_ReleaseBuffer(arg, &varg);
788 return pos >= 0;
789 }
790 if (ival < 0 || ival >= 256) {
791 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
792 return -1;
793 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000794
Christian Heimes90aa7642007-12-19 02:45:37 +0000795 return memchr(PyString_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000796}
797
798static PyObject *
799string_item(PyStringObject *a, register Py_ssize_t i)
800{
Christian Heimes90aa7642007-12-19 02:45:37 +0000801 if (i < 0 || i >= Py_SIZE(a)) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000802 PyErr_SetString(PyExc_IndexError, "string index out of range");
803 return NULL;
804 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000805 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Guido van Rossum9284a572000-03-07 15:53:43 +0000806}
807
Martin v. Löwiscd353062001-05-24 16:56:35 +0000808static PyObject*
809string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000810{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000811 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000812 Py_ssize_t len_a, len_b;
813 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +0000814 PyObject *result;
815
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000816 /* Make sure both arguments are strings. */
817 if (!(PyString_Check(a) && PyString_Check(b))) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000818 if (Py_BytesWarningFlag && (op == Py_EQ) &&
819 (PyObject_IsInstance((PyObject*)a,
820 (PyObject*)&PyUnicode_Type) ||
821 PyObject_IsInstance((PyObject*)b,
822 (PyObject*)&PyUnicode_Type))) {
823 if (PyErr_WarnEx(PyExc_BytesWarning,
824 "Comparsion between bytes and string", 1))
825 return NULL;
826 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000827 result = Py_NotImplemented;
828 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000829 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000830 if (a == b) {
831 switch (op) {
832 case Py_EQ:case Py_LE:case Py_GE:
833 result = Py_True;
834 goto out;
835 case Py_NE:case Py_LT:case Py_GT:
836 result = Py_False;
837 goto out;
838 }
839 }
840 if (op == Py_EQ) {
841 /* Supporting Py_NE here as well does not save
842 much time, since Py_NE is rarely used. */
Christian Heimes90aa7642007-12-19 02:45:37 +0000843 if (Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +0000844 && (a->ob_sval[0] == b->ob_sval[0]
Christian Heimes90aa7642007-12-19 02:45:37 +0000845 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000846 result = Py_True;
847 } else {
848 result = Py_False;
849 }
850 goto out;
851 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000852 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +0000853 min_len = (len_a < len_b) ? len_a : len_b;
854 if (min_len > 0) {
855 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
856 if (c==0)
857 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +0000858 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +0000859 c = 0;
860 if (c == 0)
861 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
862 switch (op) {
863 case Py_LT: c = c < 0; break;
864 case Py_LE: c = c <= 0; break;
865 case Py_EQ: assert(0); break; /* unreachable */
866 case Py_NE: c = c != 0; break;
867 case Py_GT: c = c > 0; break;
868 case Py_GE: c = c >= 0; break;
869 default:
870 result = Py_NotImplemented;
871 goto out;
872 }
873 result = c ? Py_True : Py_False;
874 out:
875 Py_INCREF(result);
876 return result;
877}
878
Guido van Rossum9bfef441993-03-29 10:43:31 +0000879static long
Fred Drakeba096332000-07-09 07:04:36 +0000880string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000881{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000882 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000883 register unsigned char *p;
884 register long x;
885
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000886 if (a->ob_shash != -1)
887 return a->ob_shash;
Christian Heimes90aa7642007-12-19 02:45:37 +0000888 len = Py_SIZE(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000889 p = (unsigned char *) a->ob_sval;
890 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000891 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000892 x = (1000003*x) ^ *p++;
Christian Heimes90aa7642007-12-19 02:45:37 +0000893 x ^= Py_SIZE(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +0000894 if (x == -1)
895 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000896 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000897 return x;
898}
899
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000900static PyObject*
901string_subscript(PyStringObject* self, PyObject* item)
902{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000903 if (PyIndex_Check(item)) {
904 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000905 if (i == -1 && PyErr_Occurred())
906 return NULL;
907 if (i < 0)
908 i += PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000909 if (i < 0 || i >= PyString_GET_SIZE(self)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +0000910 PyErr_SetString(PyExc_IndexError,
911 "string index out of range");
912 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000913 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000914 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000915 }
916 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000917 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000918 char* source_buf;
919 char* result_buf;
920 PyObject* result;
921
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000922 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000923 PyString_GET_SIZE(self),
924 &start, &stop, &step, &slicelength) < 0) {
925 return NULL;
926 }
927
928 if (slicelength <= 0) {
929 return PyString_FromStringAndSize("", 0);
930 }
Thomas Woutersed03b412007-08-28 21:37:11 +0000931 else if (start == 0 && step == 1 &&
932 slicelength == PyString_GET_SIZE(self) &&
933 PyString_CheckExact(self)) {
934 Py_INCREF(self);
935 return (PyObject *)self;
936 }
937 else if (step == 1) {
938 return PyString_FromStringAndSize(
939 PyString_AS_STRING(self) + start,
940 slicelength);
941 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000942 else {
943 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000944 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +0000945 if (result_buf == NULL)
946 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000947
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000948 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000949 cur += step, i++) {
950 result_buf[i] = source_buf[cur];
951 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000952
953 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000954 slicelength);
955 PyMem_Free(result_buf);
956 return result;
957 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000958 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000959 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000960 PyErr_Format(PyExc_TypeError,
961 "string indices must be integers, not %.200s",
Christian Heimes90aa7642007-12-19 02:45:37 +0000962 Py_TYPE(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000963 return NULL;
964 }
965}
966
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000967static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000968string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000969{
Christian Heimes90aa7642007-12-19 02:45:37 +0000970 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000971 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +0000972}
973
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000974static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000975 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000976 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +0000977 (ssizeargfunc)string_repeat, /*sq_repeat*/
Guido van Rossum98297ee2007-11-06 21:34:58 +0000978 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +0000979 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000980 0, /*sq_ass_item*/
981 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000982 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000983};
984
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000985static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000986 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000987 (binaryfunc)string_subscript,
988 0,
989};
990
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000991static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000992 (getbufferproc)string_buffer_getbuffer,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000993 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000994};
995
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000996
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000997#define LEFTSTRIP 0
998#define RIGHTSTRIP 1
999#define BOTHSTRIP 2
1000
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001001/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001002static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1003
1004#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001005
Thomas Wouters477c8d52006-05-27 19:21:47 +00001006
1007/* Don't call if length < 2 */
1008#define Py_STRING_MATCH(target, offset, pattern, length) \
1009 (target[offset] == pattern[0] && \
1010 target[offset+length-1] == pattern[length-1] && \
1011 !memcmp(target+offset+1, pattern+1, length-2) )
1012
1013
1014/* Overallocate the initial list to reduce the number of reallocs for small
1015 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1016 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1017 text (roughly 11 words per line) and field delimited data (usually 1-10
1018 fields). For large strings the split algorithms are bandwidth limited
1019 so increasing the preallocation likely will not improve things.*/
1020
1021#define MAX_PREALLOC 12
1022
1023/* 5 splits gives 6 elements */
1024#define PREALLOC_SIZE(maxsplit) \
1025 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1026
Thomas Wouters477c8d52006-05-27 19:21:47 +00001027#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001028 str = PyString_FromStringAndSize((data) + (left), \
1029 (right) - (left)); \
1030 if (str == NULL) \
1031 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001032 if (count < MAX_PREALLOC) { \
1033 PyList_SET_ITEM(list, count, str); \
1034 } else { \
1035 if (PyList_Append(list, str)) { \
1036 Py_DECREF(str); \
1037 goto onError; \
1038 } \
1039 else \
1040 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001041 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001042 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001043
Thomas Wouters477c8d52006-05-27 19:21:47 +00001044/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001045#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001046
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001047#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1048#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1049#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1050#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001051
1052Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001053split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054{
Christian Heimes895627f2007-12-08 17:28:33 +00001055 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001056 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001057 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001058 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001059
1060 if (list == NULL)
1061 return NULL;
1062
Thomas Wouters477c8d52006-05-27 19:21:47 +00001063 i = j = 0;
1064
1065 while (maxsplit-- > 0) {
1066 SKIP_SPACE(s, i, len);
1067 if (i==len) break;
1068 j = i; i++;
1069 SKIP_NONSPACE(s, i, len);
Christian Heimes895627f2007-12-08 17:28:33 +00001070 if (j == 0 && i == len && PyString_CheckExact(self)) {
1071 /* No whitespace in self, so just use it as list[0] */
1072 Py_INCREF(self);
1073 PyList_SET_ITEM(list, 0, (PyObject *)self);
1074 count++;
1075 break;
1076 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001077 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001078 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001079
1080 if (i < len) {
1081 /* Only occurs when maxsplit was reached */
1082 /* Skip any remaining whitespace and copy to end of string */
1083 SKIP_SPACE(s, i, len);
1084 if (i != len)
1085 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001086 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001087 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001088 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001089 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001090 Py_DECREF(list);
1091 return NULL;
1092}
1093
Thomas Wouters477c8d52006-05-27 19:21:47 +00001094Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001095split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001096{
Christian Heimes895627f2007-12-08 17:28:33 +00001097 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001098 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001099 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001100 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001101
1102 if (list == NULL)
1103 return NULL;
1104
Thomas Wouters477c8d52006-05-27 19:21:47 +00001105 i = j = 0;
1106 while ((j < len) && (maxcount-- > 0)) {
1107 for(; j<len; j++) {
1108 /* I found that using memchr makes no difference */
1109 if (s[j] == ch) {
1110 SPLIT_ADD(s, i, j);
1111 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001112 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001113 }
1114 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001115 }
Christian Heimes895627f2007-12-08 17:28:33 +00001116 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1117 /* ch not in self, so just use self as list[0] */
1118 Py_INCREF(self);
1119 PyList_SET_ITEM(list, 0, (PyObject *)self);
1120 count++;
1121 }
1122 else if (i <= len) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001123 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001124 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001125 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001126 return list;
1127
1128 onError:
1129 Py_DECREF(list);
1130 return NULL;
1131}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001132
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001133PyDoc_STRVAR(split__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001134"B.split([sep[, maxsplit]]) -> list of bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001135\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001136Return a list of the sections in B, using sep as the delimiter.\n\
1137If sep is not given, B is split on ASCII whitespace characters\n\
1138(space, tab, return, newline, formfeed, vertical tab).\n\
1139If maxsplit is given, at most maxsplit splits are done.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001140
1141static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001142string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001143{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001144 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001145 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001146 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001147 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001148 PyObject *list, *str, *subobj = Py_None;
1149#ifdef USE_FAST
1150 Py_ssize_t pos;
1151#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001152
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001153 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001154 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001155 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001156 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001157 if (subobj == Py_None)
Christian Heimes895627f2007-12-08 17:28:33 +00001158 return split_whitespace(self, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001159 if (_getbuffer(subobj, &vsub) < 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001160 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001161 sub = vsub.buf;
1162 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001163
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001164 if (n == 0) {
1165 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001166 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001167 return NULL;
1168 }
Christian Heimes895627f2007-12-08 17:28:33 +00001169 else if (n == 1)
1170 return split_char(self, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001171
Thomas Wouters477c8d52006-05-27 19:21:47 +00001172 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001173 if (list == NULL) {
1174 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001175 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001176 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001177
Thomas Wouters477c8d52006-05-27 19:21:47 +00001178#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001179 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001180 while (maxsplit-- > 0) {
1181 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1182 if (pos < 0)
1183 break;
1184 j = i+pos;
1185 SPLIT_ADD(s, i, j);
1186 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001187 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001188#else
1189 i = j = 0;
1190 while ((j+n <= len) && (maxsplit-- > 0)) {
1191 for (; j+n <= len; j++) {
1192 if (Py_STRING_MATCH(s, j, sub, n)) {
1193 SPLIT_ADD(s, i, j);
1194 i = j = j + n;
1195 break;
1196 }
1197 }
1198 }
1199#endif
1200 SPLIT_ADD(s, i, len);
1201 FIX_PREALLOC_SIZE(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001202 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001203 return list;
1204
Thomas Wouters477c8d52006-05-27 19:21:47 +00001205 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001206 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001207 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001208 return NULL;
1209}
1210
Thomas Wouters477c8d52006-05-27 19:21:47 +00001211PyDoc_STRVAR(partition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001212"B.partition(sep) -> (head, sep, tail)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001213\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001214Searches for the separator sep in B, and returns the part before it,\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001215the separator itself, and the part after it. If the separator is not\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001216found, returns B and two empty bytes objects.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001217
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001218static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001219string_partition(PyStringObject *self, PyObject *sep_obj)
1220{
1221 const char *sep;
1222 Py_ssize_t sep_len;
1223
1224 if (PyString_Check(sep_obj)) {
1225 sep = PyString_AS_STRING(sep_obj);
1226 sep_len = PyString_GET_SIZE(sep_obj);
1227 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001228 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1229 return NULL;
1230
1231 return stringlib_partition(
1232 (PyObject*) self,
1233 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1234 sep_obj, sep, sep_len
1235 );
1236}
1237
1238PyDoc_STRVAR(rpartition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001239"B.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001240\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001241Searches for the separator sep in B, starting at the end of B,\n\
1242and returns the part before it, the separator itself, and the\n\
1243part after it. If the separator is not found, returns two empty\n\
1244bytes objects and B.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001245
1246static PyObject *
1247string_rpartition(PyStringObject *self, PyObject *sep_obj)
1248{
1249 const char *sep;
1250 Py_ssize_t sep_len;
1251
1252 if (PyString_Check(sep_obj)) {
1253 sep = PyString_AS_STRING(sep_obj);
1254 sep_len = PyString_GET_SIZE(sep_obj);
1255 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001256 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1257 return NULL;
1258
1259 return stringlib_rpartition(
1260 (PyObject*) self,
1261 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1262 sep_obj, sep, sep_len
1263 );
1264}
1265
1266Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001267rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001268{
Christian Heimes895627f2007-12-08 17:28:33 +00001269 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001270 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001271 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001272 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001273
1274 if (list == NULL)
1275 return NULL;
1276
Thomas Wouters477c8d52006-05-27 19:21:47 +00001277 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001278
Thomas Wouters477c8d52006-05-27 19:21:47 +00001279 while (maxsplit-- > 0) {
1280 RSKIP_SPACE(s, i);
1281 if (i<0) break;
1282 j = i; i--;
1283 RSKIP_NONSPACE(s, i);
Christian Heimes895627f2007-12-08 17:28:33 +00001284 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1285 /* No whitespace in self, so just use it as list[0] */
1286 Py_INCREF(self);
1287 PyList_SET_ITEM(list, 0, (PyObject *)self);
1288 count++;
1289 break;
1290 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001291 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001292 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001293 if (i >= 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001294 /* Only occurs when maxsplit was reached. Skip any remaining
1295 whitespace and copy to beginning of string. */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001296 RSKIP_SPACE(s, i);
1297 if (i >= 0)
1298 SPLIT_ADD(s, 0, i + 1);
1299
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001300 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001301 FIX_PREALLOC_SIZE(list);
1302 if (PyList_Reverse(list) < 0)
1303 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001304 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001305 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001306 Py_DECREF(list);
1307 return NULL;
1308}
1309
Thomas Wouters477c8d52006-05-27 19:21:47 +00001310Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001311rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001312{
Christian Heimes895627f2007-12-08 17:28:33 +00001313 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001314 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001315 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001316 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001317
1318 if (list == NULL)
1319 return NULL;
1320
Thomas Wouters477c8d52006-05-27 19:21:47 +00001321 i = j = len - 1;
1322 while ((i >= 0) && (maxcount-- > 0)) {
1323 for (; i >= 0; i--) {
1324 if (s[i] == ch) {
1325 SPLIT_ADD(s, i + 1, j + 1);
1326 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001327 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001328 }
1329 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001330 }
Christian Heimes895627f2007-12-08 17:28:33 +00001331 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1332 /* ch not in self, so just use self as list[0] */
1333 Py_INCREF(self);
1334 PyList_SET_ITEM(list, 0, (PyObject *)self);
1335 count++;
1336 }
1337 else if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001338 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001339 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001340 FIX_PREALLOC_SIZE(list);
1341 if (PyList_Reverse(list) < 0)
1342 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001343 return list;
1344
1345 onError:
1346 Py_DECREF(list);
1347 return NULL;
1348}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001349
1350PyDoc_STRVAR(rsplit__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001351"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001352\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001353Return a list of the sections in B, using sep as the delimiter,\n\
1354starting at the end of B and working to the front.\n\
1355If sep is not given, B is split on ASCII whitespace characters\n\
1356(space, tab, return, newline, formfeed, vertical tab).\n\
1357If maxsplit is given, at most maxsplit splits are done.");
1358
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001359
1360static PyObject *
1361string_rsplit(PyStringObject *self, PyObject *args)
1362{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001363 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001364 Py_ssize_t maxsplit = -1, count=0;
Christian Heimes895627f2007-12-08 17:28:33 +00001365 const char *s, *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001366 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001367 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001368
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001369 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001370 return NULL;
1371 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001372 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001373 if (subobj == Py_None)
Christian Heimes895627f2007-12-08 17:28:33 +00001374 return rsplit_whitespace(self, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001375 if (_getbuffer(subobj, &vsub) < 0)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001376 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001377 sub = vsub.buf;
1378 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001379
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001380 if (n == 0) {
1381 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001382 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001383 return NULL;
1384 }
Christian Heimes895627f2007-12-08 17:28:33 +00001385 else if (n == 1)
1386 return rsplit_char(self, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001387
Thomas Wouters477c8d52006-05-27 19:21:47 +00001388 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001389 if (list == NULL) {
1390 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001391 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001392 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001393
1394 j = len;
1395 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001396
Christian Heimes895627f2007-12-08 17:28:33 +00001397 s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001398 while ( (i >= 0) && (maxsplit-- > 0) ) {
1399 for (; i>=0; i--) {
1400 if (Py_STRING_MATCH(s, i, sub, n)) {
1401 SPLIT_ADD(s, i + n, j);
1402 j = i;
1403 i -= n;
1404 break;
1405 }
1406 }
1407 }
1408 SPLIT_ADD(s, 0, j);
1409 FIX_PREALLOC_SIZE(list);
1410 if (PyList_Reverse(list) < 0)
1411 goto onError;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001412 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001413 return list;
1414
Thomas Wouters477c8d52006-05-27 19:21:47 +00001415onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001416 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001417 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001418 return NULL;
1419}
1420
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001421#undef SPLIT_ADD
1422#undef MAX_PREALLOC
1423#undef PREALLOC_SIZE
1424
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001426PyDoc_STRVAR(join__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001427"B.join(iterable_of_bytes) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001428\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001429Concatenates any number of bytes objects, with B in between each pair.\n\
1430Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001431
1432static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00001433string_join(PyObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001434{
1435 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001436 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001438 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001439 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001440 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001441 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001442 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001443
Tim Peters19fe14e2001-01-19 03:03:47 +00001444 seq = PySequence_Fast(orig, "");
1445 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001446 return NULL;
1447 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001448
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001449 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001450 if (seqlen == 0) {
1451 Py_DECREF(seq);
1452 return PyString_FromString("");
1453 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001455 item = PySequence_Fast_GET_ITEM(seq, 0);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001456 if (PyString_CheckExact(item)) {
Raymond Hettinger674f2412004-08-23 23:23:54 +00001457 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001458 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001459 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001460 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001462
Raymond Hettinger674f2412004-08-23 23:23:54 +00001463 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001464 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001465 * Do a pre-pass to figure out the total amount of space we'll
Guido van Rossum98297ee2007-11-06 21:34:58 +00001466 * need (sz), and see whether all argument are bytes.
Tim Peters19fe14e2001-01-19 03:03:47 +00001467 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001468 /* XXX Shouldn't we use _getbuffer() on these items instead? */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001469 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001470 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001471 item = PySequence_Fast_GET_ITEM(seq, i);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001472 if (!PyString_Check(item) && !PyBytes_Check(item)) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001473 PyErr_Format(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001474 "sequence item %zd: expected bytes,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001475 " %.80s found",
Christian Heimes90aa7642007-12-19 02:45:37 +00001476 i, Py_TYPE(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001477 Py_DECREF(seq);
1478 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001479 }
Christian Heimes90aa7642007-12-19 02:45:37 +00001480 sz += Py_SIZE(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001481 if (i != 0)
1482 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001483 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001484 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001485 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001486 Py_DECREF(seq);
1487 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001489 }
1490
1491 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001492 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001493 if (res == NULL) {
1494 Py_DECREF(seq);
1495 return NULL;
1496 }
1497
1498 /* Catenate everything. */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001499 /* I'm not worried about a PyBytes item growing because there's
1500 nowhere in this function where we release the GIL. */
Tim Peters19fe14e2001-01-19 03:03:47 +00001501 p = PyString_AS_STRING(res);
1502 for (i = 0; i < seqlen; ++i) {
1503 size_t n;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001504 char *q;
1505 if (i) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001506 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001507 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001508 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001509 item = PySequence_Fast_GET_ITEM(seq, i);
Christian Heimes90aa7642007-12-19 02:45:37 +00001510 n = Py_SIZE(item);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001511 if (PyString_Check(item))
1512 q = PyString_AS_STRING(item);
1513 else
1514 q = PyBytes_AS_STRING(item);
1515 Py_MEMCPY(p, q, n);
1516 p += n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001517 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001518
Jeremy Hylton49048292000-07-11 03:28:17 +00001519 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001521}
1522
Tim Peters52e155e2001-06-16 05:42:57 +00001523PyObject *
1524_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001525{
Tim Petersa7259592001-06-16 05:11:17 +00001526 assert(sep != NULL && PyString_Check(sep));
1527 assert(x != NULL);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001528 return string_join(sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001529}
1530
Thomas Wouters477c8d52006-05-27 19:21:47 +00001531Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001532string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001533{
1534 if (*end > len)
1535 *end = len;
1536 else if (*end < 0)
1537 *end += len;
1538 if (*end < 0)
1539 *end = 0;
1540 if (*start < 0)
1541 *start += len;
1542 if (*start < 0)
1543 *start = 0;
1544}
1545
Thomas Wouters477c8d52006-05-27 19:21:47 +00001546Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001547string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001548{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001549 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001550 const char *sub;
1551 Py_ssize_t sub_len;
1552 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes9cd17752007-11-18 19:35:23 +00001553 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001554
Christian Heimes9cd17752007-11-18 19:35:23 +00001555 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1556 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001557 return -2;
Christian Heimes9cd17752007-11-18 19:35:23 +00001558 /* To support None in "start" and "end" arguments, meaning
1559 the same as if they were not passed.
1560 */
1561 if (obj_start != Py_None)
1562 if (!_PyEval_SliceIndex(obj_start, &start))
1563 return -2;
1564 if (obj_end != Py_None)
1565 if (!_PyEval_SliceIndex(obj_end, &end))
1566 return -2;
1567
Guido van Rossum4c08d552000-03-10 22:55:18 +00001568 if (PyString_Check(subobj)) {
1569 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001570 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001571 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001572 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001573 /* XXX - the "expected a character buffer object" is pretty
1574 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575 return -2;
1576
Thomas Wouters477c8d52006-05-27 19:21:47 +00001577 if (dir > 0)
1578 return stringlib_find_slice(
1579 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1580 sub, sub_len, start, end);
1581 else
1582 return stringlib_rfind_slice(
1583 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1584 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001585}
1586
1587
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001588PyDoc_STRVAR(find__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001589"B.find(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590\n\
1591Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001592such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001593arguments start and end are interpreted as in slice notation.\n\
1594\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001595Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001596
1597static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001598string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001600 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001601 if (result == -2)
1602 return NULL;
Christian Heimes217cfd12007-12-02 14:31:20 +00001603 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604}
1605
1606
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001607PyDoc_STRVAR(index__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001608"B.index(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001610Like B.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611
1612static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001613string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001615 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001616 if (result == -2)
1617 return NULL;
1618 if (result == -1) {
1619 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001620 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621 return NULL;
1622 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001623 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624}
1625
1626
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001627PyDoc_STRVAR(rfind__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001628"B.rfind(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001630Return the highest index in B where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001631such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632arguments start and end are interpreted as in slice notation.\n\
1633\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001634Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635
1636static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001637string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001639 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640 if (result == -2)
1641 return NULL;
Christian Heimes217cfd12007-12-02 14:31:20 +00001642 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643}
1644
1645
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001646PyDoc_STRVAR(rindex__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001647"B.rindex(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001648\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001649Like B.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001650
1651static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001652string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001654 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001655 if (result == -2)
1656 return NULL;
1657 if (result == -1) {
1658 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001659 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001660 return NULL;
1661 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001662 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001663}
1664
1665
Thomas Wouters477c8d52006-05-27 19:21:47 +00001666Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001667do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1668{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001669 Py_buffer vsep;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001670 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001671 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001672 char *sep;
1673 Py_ssize_t seplen;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001674 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001675
Guido van Rossum98297ee2007-11-06 21:34:58 +00001676 if (_getbuffer(sepobj, &vsep) < 0)
1677 return NULL;
1678 sep = vsep.buf;
1679 seplen = vsep.len;
1680
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001681 i = 0;
1682 if (striptype != RIGHTSTRIP) {
1683 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1684 i++;
1685 }
1686 }
1687
1688 j = len;
1689 if (striptype != LEFTSTRIP) {
1690 do {
1691 j--;
1692 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1693 j++;
1694 }
1695
Guido van Rossum98297ee2007-11-06 21:34:58 +00001696 PyObject_ReleaseBuffer(sepobj, &vsep);
1697
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001698 if (i == 0 && j == len && PyString_CheckExact(self)) {
1699 Py_INCREF(self);
1700 return (PyObject*)self;
1701 }
1702 else
1703 return PyString_FromStringAndSize(s+i, j-i);
1704}
1705
1706
Thomas Wouters477c8d52006-05-27 19:21:47 +00001707Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001708do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001709{
1710 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001711 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001712
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001713 i = 0;
1714 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001715 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001716 i++;
1717 }
1718 }
1719
1720 j = len;
1721 if (striptype != LEFTSTRIP) {
1722 do {
1723 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001724 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725 j++;
1726 }
1727
Tim Peters8fa5dd02001-09-12 02:18:30 +00001728 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729 Py_INCREF(self);
1730 return (PyObject*)self;
1731 }
1732 else
1733 return PyString_FromStringAndSize(s+i, j-i);
1734}
1735
1736
Thomas Wouters477c8d52006-05-27 19:21:47 +00001737Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001738do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1739{
1740 PyObject *sep = NULL;
1741
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001742 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001743 return NULL;
1744
1745 if (sep != NULL && sep != Py_None) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001746 return do_xstrip(self, striptype, sep);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001747 }
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001748 return do_strip(self, striptype);
1749}
1750
1751
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001752PyDoc_STRVAR(strip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001753"B.strip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001755Strip leading and trailing bytes contained in the argument.\n\
1756If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001758string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001760 if (PyTuple_GET_SIZE(args) == 0)
1761 return do_strip(self, BOTHSTRIP); /* Common case */
1762 else
1763 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764}
1765
1766
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001767PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001768"B.lstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001769\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001770Strip leading bytes contained in the argument.\n\
1771If the argument is omitted, strip leading ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001773string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001774{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001775 if (PyTuple_GET_SIZE(args) == 0)
1776 return do_strip(self, LEFTSTRIP); /* Common case */
1777 else
1778 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779}
1780
1781
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001782PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001783"B.rstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001785Strip trailing bytes contained in the argument.\n\
1786If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001788string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001790 if (PyTuple_GET_SIZE(args) == 0)
1791 return do_strip(self, RIGHTSTRIP); /* Common case */
1792 else
1793 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001794}
1795
1796
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001797PyDoc_STRVAR(count__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001798"B.count(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001800Return the number of non-overlapping occurrences of substring sub in\n\
1801string S[start:end]. Optional arguments start and end are interpreted\n\
1802as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803
1804static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001805string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001806{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001807 PyObject *sub_obj;
1808 const char *str = PyString_AS_STRING(self), *sub;
1809 Py_ssize_t sub_len;
1810 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811
Thomas Wouters477c8d52006-05-27 19:21:47 +00001812 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1813 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001815
Thomas Wouters477c8d52006-05-27 19:21:47 +00001816 if (PyString_Check(sub_obj)) {
1817 sub = PyString_AS_STRING(sub_obj);
1818 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001819 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001820 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001821 return NULL;
1822
Thomas Wouters477c8d52006-05-27 19:21:47 +00001823 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001824
Christian Heimes217cfd12007-12-02 14:31:20 +00001825 return PyLong_FromSsize_t(
Thomas Wouters477c8d52006-05-27 19:21:47 +00001826 stringlib_count(str + start, end - start, sub, sub_len)
1827 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828}
1829
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001831PyDoc_STRVAR(translate__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001832"B.translate(table[, deletechars]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001833\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001834Return a copy of B, where all characters occurring in the\n\
1835optional argument deletechars are removed, and the remaining\n\
1836characters have been mapped through the given translation\n\
1837table, which must be a bytes object of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001838
1839static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001840string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001841{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001842 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001843 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001844 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001845 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001846 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001847 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848 PyObject *result;
1849 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001850 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00001852 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001853 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001854 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001855
1856 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00001857 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001858 tablen = PyString_GET_SIZE(tableobj);
1859 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001860 else if (tableobj == Py_None) {
1861 table = NULL;
1862 tablen = 256;
1863 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001864 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001865 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001866
Martin v. Löwis00b61272002-12-12 20:03:19 +00001867 if (tablen != 256) {
1868 PyErr_SetString(PyExc_ValueError,
1869 "translation table must be 256 characters long");
1870 return NULL;
1871 }
1872
Guido van Rossum4c08d552000-03-10 22:55:18 +00001873 if (delobj != NULL) {
1874 if (PyString_Check(delobj)) {
1875 del_table = PyString_AS_STRING(delobj);
1876 dellen = PyString_GET_SIZE(delobj);
1877 }
1878 else if (PyUnicode_Check(delobj)) {
1879 PyErr_SetString(PyExc_TypeError,
1880 "deletions are implemented differently for unicode");
1881 return NULL;
1882 }
1883 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1884 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001885 }
1886 else {
1887 del_table = NULL;
1888 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889 }
1890
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001891 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892 result = PyString_FromStringAndSize((char *)NULL, inlen);
1893 if (result == NULL)
1894 return NULL;
1895 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001896 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897
Guido van Rossumd8faa362007-04-27 19:54:29 +00001898 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001899 /* If no deletions are required, use faster code */
1900 for (i = inlen; --i >= 0; ) {
1901 c = Py_CHARMASK(*input++);
1902 if (Py_CHARMASK((*output++ = table[c])) != c)
1903 changed = 1;
1904 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001905 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906 return result;
1907 Py_DECREF(result);
1908 Py_INCREF(input_obj);
1909 return input_obj;
1910 }
1911
Guido van Rossumd8faa362007-04-27 19:54:29 +00001912 if (table == NULL) {
1913 for (i = 0; i < 256; i++)
1914 trans_table[i] = Py_CHARMASK(i);
1915 } else {
1916 for (i = 0; i < 256; i++)
1917 trans_table[i] = Py_CHARMASK(table[i]);
1918 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919
1920 for (i = 0; i < dellen; i++)
1921 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1922
1923 for (i = inlen; --i >= 0; ) {
1924 c = Py_CHARMASK(*input++);
1925 if (trans_table[c] != -1)
1926 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1927 continue;
1928 changed = 1;
1929 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001930 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931 Py_DECREF(result);
1932 Py_INCREF(input_obj);
1933 return input_obj;
1934 }
1935 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001936 if (inlen > 0)
1937 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938 return result;
1939}
1940
1941
Thomas Wouters477c8d52006-05-27 19:21:47 +00001942#define FORWARD 1
1943#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944
Thomas Wouters477c8d52006-05-27 19:21:47 +00001945/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001946
Thomas Wouters477c8d52006-05-27 19:21:47 +00001947#define findchar(target, target_len, c) \
1948 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949
Thomas Wouters477c8d52006-05-27 19:21:47 +00001950/* String ops must return a string. */
1951/* If the object is subclass of string, create a copy */
1952Py_LOCAL(PyStringObject *)
1953return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001955 if (PyString_CheckExact(self)) {
1956 Py_INCREF(self);
1957 return self;
1958 }
1959 return (PyStringObject *)PyString_FromStringAndSize(
1960 PyString_AS_STRING(self),
1961 PyString_GET_SIZE(self));
1962}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963
Thomas Wouters477c8d52006-05-27 19:21:47 +00001964Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001965countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001966{
1967 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001968 const char *start=target;
1969 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970
Thomas Wouters477c8d52006-05-27 19:21:47 +00001971 while ( (start=findchar(start, end-start, c)) != NULL ) {
1972 count++;
1973 if (count >= maxcount)
1974 break;
1975 start += 1;
1976 }
1977 return count;
1978}
1979
1980Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001981findstring(const char *target, Py_ssize_t target_len,
1982 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00001983 Py_ssize_t start,
1984 Py_ssize_t end,
1985 int direction)
1986{
1987 if (start < 0) {
1988 start += target_len;
1989 if (start < 0)
1990 start = 0;
1991 }
1992 if (end > target_len) {
1993 end = target_len;
1994 } else if (end < 0) {
1995 end += target_len;
1996 if (end < 0)
1997 end = 0;
1998 }
1999
2000 /* zero-length substrings always match at the first attempt */
2001 if (pattern_len == 0)
2002 return (direction > 0) ? start : end;
2003
2004 end -= pattern_len;
2005
2006 if (direction < 0) {
2007 for (; end >= start; end--)
2008 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2009 return end;
2010 } else {
2011 for (; start <= end; start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002012 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
Thomas Wouters477c8d52006-05-27 19:21:47 +00002013 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014 }
2015 return -1;
2016}
2017
Thomas Wouters477c8d52006-05-27 19:21:47 +00002018Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002019countstring(const char *target, Py_ssize_t target_len,
2020 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002021 Py_ssize_t start,
2022 Py_ssize_t end,
2023 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002025 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002026
Thomas Wouters477c8d52006-05-27 19:21:47 +00002027 if (start < 0) {
2028 start += target_len;
2029 if (start < 0)
2030 start = 0;
2031 }
2032 if (end > target_len) {
2033 end = target_len;
2034 } else if (end < 0) {
2035 end += target_len;
2036 if (end < 0)
2037 end = 0;
2038 }
2039
2040 /* zero-length substrings match everywhere */
2041 if (pattern_len == 0 || maxcount == 0) {
2042 if (target_len+1 < maxcount)
2043 return target_len+1;
2044 return maxcount;
2045 }
2046
2047 end -= pattern_len;
2048 if (direction < 0) {
2049 for (; (end >= start); end--)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002050 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002051 count++;
2052 if (--maxcount <= 0) break;
2053 end -= pattern_len-1;
2054 }
2055 } else {
2056 for (; (start <= end); start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002057 if (Py_STRING_MATCH(target, start,
2058 pattern, pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002059 count++;
2060 if (--maxcount <= 0)
2061 break;
2062 start += pattern_len-1;
2063 }
2064 }
2065 return count;
2066}
2067
2068
2069/* Algorithms for different cases of string replacement */
2070
2071/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2072Py_LOCAL(PyStringObject *)
2073replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002074 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002075 Py_ssize_t maxcount)
2076{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002077 char *self_s, *result_s;
2078 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002079 Py_ssize_t count, i, product;
2080 PyStringObject *result;
2081
2082 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002083
Thomas Wouters477c8d52006-05-27 19:21:47 +00002084 /* 1 at the end plus 1 after every character */
2085 count = self_len+1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002086 if (maxcount < count)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002087 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002088
Thomas Wouters477c8d52006-05-27 19:21:47 +00002089 /* Check for overflow */
2090 /* result_len = count * to_len + self_len; */
2091 product = count * to_len;
2092 if (product / to_len != count) {
2093 PyErr_SetString(PyExc_OverflowError,
2094 "replace string is too long");
2095 return NULL;
2096 }
2097 result_len = product + self_len;
2098 if (result_len < 0) {
2099 PyErr_SetString(PyExc_OverflowError,
2100 "replace string is too long");
2101 return NULL;
2102 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002103
Thomas Wouters477c8d52006-05-27 19:21:47 +00002104 if (! (result = (PyStringObject *)
2105 PyString_FromStringAndSize(NULL, result_len)) )
2106 return NULL;
2107
2108 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002109 result_s = PyString_AS_STRING(result);
2110
2111 /* TODO: special case single character, which doesn't need memcpy */
2112
2113 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002114 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002115 result_s += to_len;
2116 count -= 1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002117
Thomas Wouters477c8d52006-05-27 19:21:47 +00002118 for (i=0; i<count; i++) {
2119 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002120 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002121 result_s += to_len;
2122 }
2123
2124 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002125 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002126
2127 return result;
2128}
2129
2130/* Special case for deleting a single character */
2131/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2132Py_LOCAL(PyStringObject *)
2133replace_delete_single_character(PyStringObject *self,
2134 char from_c, Py_ssize_t maxcount)
2135{
2136 char *self_s, *result_s;
2137 char *start, *next, *end;
2138 Py_ssize_t self_len, result_len;
2139 Py_ssize_t count;
2140 PyStringObject *result;
2141
2142 self_len = PyString_GET_SIZE(self);
2143 self_s = PyString_AS_STRING(self);
2144
2145 count = countchar(self_s, self_len, from_c, maxcount);
2146 if (count == 0) {
2147 return return_self(self);
2148 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002149
Thomas Wouters477c8d52006-05-27 19:21:47 +00002150 result_len = self_len - count; /* from_len == 1 */
2151 assert(result_len>=0);
2152
2153 if ( (result = (PyStringObject *)
2154 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2155 return NULL;
2156 result_s = PyString_AS_STRING(result);
2157
2158 start = self_s;
2159 end = self_s + self_len;
2160 while (count-- > 0) {
2161 next = findchar(start, end-start, from_c);
2162 if (next == NULL)
2163 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002164 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002165 result_s += (next-start);
2166 start = next+1;
2167 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002168 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002169
Thomas Wouters477c8d52006-05-27 19:21:47 +00002170 return result;
2171}
2172
2173/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2174
2175Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002176replace_delete_substring(PyStringObject *self,
2177 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002178 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002179 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002180 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002181 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002182 Py_ssize_t count, offset;
2183 PyStringObject *result;
2184
2185 self_len = PyString_GET_SIZE(self);
2186 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002187
2188 count = countstring(self_s, self_len,
2189 from_s, from_len,
2190 0, self_len, 1,
2191 maxcount);
2192
2193 if (count == 0) {
2194 /* no matches */
2195 return return_self(self);
2196 }
2197
2198 result_len = self_len - (count * from_len);
2199 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002200
Thomas Wouters477c8d52006-05-27 19:21:47 +00002201 if ( (result = (PyStringObject *)
2202 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2203 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002204
Thomas Wouters477c8d52006-05-27 19:21:47 +00002205 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002206
Thomas Wouters477c8d52006-05-27 19:21:47 +00002207 start = self_s;
2208 end = self_s + self_len;
2209 while (count-- > 0) {
2210 offset = findstring(start, end-start,
2211 from_s, from_len,
2212 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002213 if (offset == -1)
2214 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002215 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002216
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002217 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002218
Thomas Wouters477c8d52006-05-27 19:21:47 +00002219 result_s += (next-start);
2220 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002222 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002223 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224}
2225
Thomas Wouters477c8d52006-05-27 19:21:47 +00002226/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2227Py_LOCAL(PyStringObject *)
2228replace_single_character_in_place(PyStringObject *self,
2229 char from_c, char to_c,
2230 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002232 char *self_s, *result_s, *start, *end, *next;
2233 Py_ssize_t self_len;
2234 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002235
Thomas Wouters477c8d52006-05-27 19:21:47 +00002236 /* The result string will be the same size */
2237 self_s = PyString_AS_STRING(self);
2238 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002239
Thomas Wouters477c8d52006-05-27 19:21:47 +00002240 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002241
Thomas Wouters477c8d52006-05-27 19:21:47 +00002242 if (next == NULL) {
2243 /* No matches; return the original string */
2244 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002245 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002246
Thomas Wouters477c8d52006-05-27 19:21:47 +00002247 /* Need to make a new string */
2248 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2249 if (result == NULL)
2250 return NULL;
2251 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002252 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002253
Thomas Wouters477c8d52006-05-27 19:21:47 +00002254 /* change everything in-place, starting with this one */
2255 start = result_s + (next-self_s);
2256 *start = to_c;
2257 start++;
2258 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002259
Thomas Wouters477c8d52006-05-27 19:21:47 +00002260 while (--maxcount > 0) {
2261 next = findchar(start, end-start, from_c);
2262 if (next == NULL)
2263 break;
2264 *next = to_c;
2265 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002266 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002267
Thomas Wouters477c8d52006-05-27 19:21:47 +00002268 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002269}
2270
Thomas Wouters477c8d52006-05-27 19:21:47 +00002271/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2272Py_LOCAL(PyStringObject *)
2273replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002274 const char *from_s, Py_ssize_t from_len,
2275 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002276 Py_ssize_t maxcount)
2277{
2278 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002279 char *self_s;
2280 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002281 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002282
Thomas Wouters477c8d52006-05-27 19:21:47 +00002283 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002284
Thomas Wouters477c8d52006-05-27 19:21:47 +00002285 self_s = PyString_AS_STRING(self);
2286 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002287
Thomas Wouters477c8d52006-05-27 19:21:47 +00002288 offset = findstring(self_s, self_len,
2289 from_s, from_len,
2290 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002291 if (offset == -1) {
2292 /* No matches; return the original string */
2293 return return_self(self);
2294 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002295
Thomas Wouters477c8d52006-05-27 19:21:47 +00002296 /* Need to make a new string */
2297 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2298 if (result == NULL)
2299 return NULL;
2300 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002301 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002302
Thomas Wouters477c8d52006-05-27 19:21:47 +00002303 /* change everything in-place, starting with this one */
2304 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002305 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002306 start += from_len;
2307 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002308
Thomas Wouters477c8d52006-05-27 19:21:47 +00002309 while ( --maxcount > 0) {
2310 offset = findstring(start, end-start,
2311 from_s, from_len,
2312 0, end-start, FORWARD);
2313 if (offset==-1)
2314 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002315 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002316 start += offset+from_len;
2317 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002318
Thomas Wouters477c8d52006-05-27 19:21:47 +00002319 return result;
2320}
2321
2322/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2323Py_LOCAL(PyStringObject *)
2324replace_single_character(PyStringObject *self,
2325 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002326 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002327 Py_ssize_t maxcount)
2328{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002329 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002330 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002331 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002332 Py_ssize_t count, product;
2333 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002334
Thomas Wouters477c8d52006-05-27 19:21:47 +00002335 self_s = PyString_AS_STRING(self);
2336 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002337
Thomas Wouters477c8d52006-05-27 19:21:47 +00002338 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002339 if (count == 0) {
2340 /* no matches, return unchanged */
2341 return return_self(self);
2342 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002343
Thomas Wouters477c8d52006-05-27 19:21:47 +00002344 /* use the difference between current and new, hence the "-1" */
2345 /* result_len = self_len + count * (to_len-1) */
2346 product = count * (to_len-1);
2347 if (product / (to_len-1) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002348 PyErr_SetString(PyExc_OverflowError,
2349 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002350 return NULL;
2351 }
2352 result_len = self_len + product;
2353 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002354 PyErr_SetString(PyExc_OverflowError,
2355 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002356 return NULL;
2357 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002358
Thomas Wouters477c8d52006-05-27 19:21:47 +00002359 if ( (result = (PyStringObject *)
2360 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2361 return NULL;
2362 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002363
Thomas Wouters477c8d52006-05-27 19:21:47 +00002364 start = self_s;
2365 end = self_s + self_len;
2366 while (count-- > 0) {
2367 next = findchar(start, end-start, from_c);
Guido van Rossumae404e22007-10-26 21:46:44 +00002368 if (next == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002369 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002370
Thomas Wouters477c8d52006-05-27 19:21:47 +00002371 if (next == start) {
2372 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002373 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002374 result_s += to_len;
2375 start += 1;
2376 } else {
2377 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002378 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002379 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002380 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002381 result_s += to_len;
2382 start = next+1;
2383 }
2384 }
2385 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002386 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002387
Thomas Wouters477c8d52006-05-27 19:21:47 +00002388 return result;
2389}
2390
2391/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2392Py_LOCAL(PyStringObject *)
2393replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002394 const char *from_s, Py_ssize_t from_len,
2395 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002396 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002397 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002398 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002399 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002400 Py_ssize_t count, offset, product;
2401 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002402
Thomas Wouters477c8d52006-05-27 19:21:47 +00002403 self_s = PyString_AS_STRING(self);
2404 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002405
Thomas Wouters477c8d52006-05-27 19:21:47 +00002406 count = countstring(self_s, self_len,
2407 from_s, from_len,
2408 0, self_len, FORWARD, maxcount);
2409 if (count == 0) {
2410 /* no matches, return unchanged */
2411 return return_self(self);
2412 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002413
Thomas Wouters477c8d52006-05-27 19:21:47 +00002414 /* Check for overflow */
2415 /* result_len = self_len + count * (to_len-from_len) */
2416 product = count * (to_len-from_len);
2417 if (product / (to_len-from_len) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002418 PyErr_SetString(PyExc_OverflowError,
2419 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002420 return NULL;
2421 }
2422 result_len = self_len + product;
2423 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002424 PyErr_SetString(PyExc_OverflowError,
2425 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002426 return NULL;
2427 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002428
Thomas Wouters477c8d52006-05-27 19:21:47 +00002429 if ( (result = (PyStringObject *)
2430 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2431 return NULL;
2432 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002433
Thomas Wouters477c8d52006-05-27 19:21:47 +00002434 start = self_s;
2435 end = self_s + self_len;
2436 while (count-- > 0) {
2437 offset = findstring(start, end-start,
2438 from_s, from_len,
2439 0, end-start, FORWARD);
2440 if (offset == -1)
2441 break;
2442 next = start+offset;
2443 if (next == start) {
2444 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002445 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002446 result_s += to_len;
2447 start += from_len;
2448 } else {
2449 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002450 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002451 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002452 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002453 result_s += to_len;
2454 start = next+from_len;
2455 }
2456 }
2457 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002458 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002459
Thomas Wouters477c8d52006-05-27 19:21:47 +00002460 return result;
2461}
2462
2463
2464Py_LOCAL(PyStringObject *)
2465replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002466 const char *from_s, Py_ssize_t from_len,
2467 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002468 Py_ssize_t maxcount)
2469{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002470 if (maxcount < 0) {
2471 maxcount = PY_SSIZE_T_MAX;
2472 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2473 /* nothing to do; return the original string */
2474 return return_self(self);
2475 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002476
Thomas Wouters477c8d52006-05-27 19:21:47 +00002477 if (maxcount == 0 ||
2478 (from_len == 0 && to_len == 0)) {
2479 /* nothing to do; return the original string */
2480 return return_self(self);
2481 }
2482
2483 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002484
Thomas Wouters477c8d52006-05-27 19:21:47 +00002485 if (from_len == 0) {
2486 /* insert the 'to' string everywhere. */
2487 /* >>> "Python".replace("", ".") */
2488 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002489 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002490 }
2491
2492 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2493 /* point for an empty self string to generate a non-empty string */
2494 /* Special case so the remaining code always gets a non-empty string */
2495 if (PyString_GET_SIZE(self) == 0) {
2496 return return_self(self);
2497 }
2498
2499 if (to_len == 0) {
2500 /* delete all occurances of 'from' string */
2501 if (from_len == 1) {
2502 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002503 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002504 } else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002505 return replace_delete_substring(self, from_s,
2506 from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002507 }
2508 }
2509
2510 /* Handle special case where both strings have the same length */
2511
2512 if (from_len == to_len) {
2513 if (from_len == 1) {
2514 return replace_single_character_in_place(
2515 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002516 from_s[0],
2517 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002518 maxcount);
2519 } else {
2520 return replace_substring_in_place(
Guido van Rossum98297ee2007-11-06 21:34:58 +00002521 self, from_s, from_len, to_s, to_len,
2522 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002523 }
2524 }
2525
2526 /* Otherwise use the more generic algorithms */
2527 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002528 return replace_single_character(self, from_s[0],
2529 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002530 } else {
2531 /* len('from')>=2, len('to')>=1 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002532 return replace_substring(self, from_s, from_len, to_s, to_len,
2533 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002534 }
2535}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002536
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002537PyDoc_STRVAR(replace__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002538"B.replace(old, new[, count]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002539\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002540Return a copy of B with all occurrences of subsection\n\
Fred Draked22bb652003-10-22 02:56:40 +00002541old replaced by new. If the optional argument count is\n\
2542given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002543
2544static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002545string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002546{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002547 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002548 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002549 const char *from_s, *to_s;
2550 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002551
Thomas Wouters477c8d52006-05-27 19:21:47 +00002552 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002553 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002554
Thomas Wouters477c8d52006-05-27 19:21:47 +00002555 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002556 from_s = PyString_AS_STRING(from);
2557 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002558 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002559 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002560 return NULL;
2561
Thomas Wouters477c8d52006-05-27 19:21:47 +00002562 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002563 to_s = PyString_AS_STRING(to);
2564 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002565 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002566 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002567 return NULL;
2568
Thomas Wouters477c8d52006-05-27 19:21:47 +00002569 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002570 from_s, from_len,
2571 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002572}
2573
Thomas Wouters477c8d52006-05-27 19:21:47 +00002574/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002575
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002576/* Matches the end (direction >= 0) or start (direction < 0) of self
2577 * against substr, using the start and end arguments. Returns
2578 * -1 on error, 0 if not found and 1 if found.
2579 */
2580Py_LOCAL(int)
2581_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2582 Py_ssize_t end, int direction)
2583{
2584 Py_ssize_t len = PyString_GET_SIZE(self);
2585 Py_ssize_t slen;
2586 const char* sub;
2587 const char* str;
2588
2589 if (PyString_Check(substr)) {
2590 sub = PyString_AS_STRING(substr);
2591 slen = PyString_GET_SIZE(substr);
2592 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002593 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2594 return -1;
2595 str = PyString_AS_STRING(self);
2596
2597 string_adjust_indices(&start, &end, len);
2598
2599 if (direction < 0) {
2600 /* startswith */
2601 if (start+slen > len)
2602 return 0;
2603 } else {
2604 /* endswith */
2605 if (end-start < slen || start > len)
2606 return 0;
2607
2608 if (end-slen > start)
2609 start = end - slen;
2610 }
2611 if (end-start >= slen)
2612 return ! memcmp(str+start, sub, slen);
2613 return 0;
2614}
2615
2616
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002617PyDoc_STRVAR(startswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002618"B.startswith(prefix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002619\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002620Return True if B starts with the specified prefix, False otherwise.\n\
2621With optional start, test B beginning at that position.\n\
2622With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002623prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002624
2625static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002626string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002627{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002628 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002629 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002630 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002631 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002632
Guido van Rossumc6821402000-05-08 14:08:05 +00002633 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2634 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002635 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002636 if (PyTuple_Check(subobj)) {
2637 Py_ssize_t i;
2638 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2639 result = _string_tailmatch(self,
2640 PyTuple_GET_ITEM(subobj, i),
2641 start, end, -1);
2642 if (result == -1)
2643 return NULL;
2644 else if (result) {
2645 Py_RETURN_TRUE;
2646 }
2647 }
2648 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002649 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002650 result = _string_tailmatch(self, subobj, start, end, -1);
2651 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002652 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002653 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002654 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002655}
2656
2657
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002658PyDoc_STRVAR(endswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002659"B.endswith(suffix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002660\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002661Return True if B ends with the specified suffix, False otherwise.\n\
2662With optional start, test B beginning at that position.\n\
2663With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002664suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002665
2666static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002667string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002668{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002669 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002670 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002671 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002672 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002673
Guido van Rossumc6821402000-05-08 14:08:05 +00002674 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2675 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002676 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002677 if (PyTuple_Check(subobj)) {
2678 Py_ssize_t i;
2679 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2680 result = _string_tailmatch(self,
2681 PyTuple_GET_ITEM(subobj, i),
2682 start, end, +1);
2683 if (result == -1)
2684 return NULL;
2685 else if (result) {
2686 Py_RETURN_TRUE;
2687 }
2688 }
2689 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002690 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002691 result = _string_tailmatch(self, subobj, start, end, +1);
2692 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002693 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002694 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002695 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002696}
2697
2698
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002699PyDoc_STRVAR(decode__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002700"B.decode([encoding[, errors]]) -> object\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002701\n\
2702Decodes S using the codec registered for encoding. encoding defaults\n\
2703to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002704handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2705a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002706as well as any other name registerd with codecs.register_error that is\n\
2707able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002708
2709static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00002710string_decode(PyObject *self, PyObject *args)
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002711{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002712 const char *encoding = NULL;
2713 const char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002714
Guido van Rossum98297ee2007-11-06 21:34:58 +00002715 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2716 return NULL;
2717 if (encoding == NULL)
2718 encoding = PyUnicode_GetDefaultEncoding();
2719 return PyCodec_Decode(self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002720}
2721
2722
Guido van Rossumae404e22007-10-26 21:46:44 +00002723PyDoc_STRVAR(fromhex_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002724"bytes.fromhex(string) -> bytes\n\
Guido van Rossumae404e22007-10-26 21:46:44 +00002725\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002726Create a bytes object from a string of hexadecimal numbers.\n\
2727Spaces between two numbers are accepted.\n\
2728Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Guido van Rossumae404e22007-10-26 21:46:44 +00002729
2730static int
2731hex_digit_to_int(Py_UNICODE c)
2732{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002733 if (c >= 128)
2734 return -1;
2735 if (ISDIGIT(c))
2736 return c - '0';
2737 else {
2738 if (ISUPPER(c))
2739 c = TOLOWER(c);
2740 if (c >= 'a' && c <= 'f')
2741 return c - 'a' + 10;
2742 }
2743 return -1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002744}
2745
2746static PyObject *
2747string_fromhex(PyObject *cls, PyObject *args)
2748{
2749 PyObject *newstring, *hexobj;
2750 char *buf;
2751 Py_UNICODE *hex;
2752 Py_ssize_t hexlen, byteslen, i, j;
2753 int top, bot;
2754
2755 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2756 return NULL;
2757 assert(PyUnicode_Check(hexobj));
2758 hexlen = PyUnicode_GET_SIZE(hexobj);
2759 hex = PyUnicode_AS_UNICODE(hexobj);
2760 byteslen = hexlen/2; /* This overestimates if there are spaces */
2761 newstring = PyString_FromStringAndSize(NULL, byteslen);
2762 if (!newstring)
2763 return NULL;
2764 buf = PyString_AS_STRING(newstring);
2765 for (i = j = 0; i < hexlen; i += 2) {
2766 /* skip over spaces in the input */
2767 while (hex[i] == ' ')
2768 i++;
2769 if (i >= hexlen)
2770 break;
2771 top = hex_digit_to_int(hex[i]);
2772 bot = hex_digit_to_int(hex[i+1]);
2773 if (top == -1 || bot == -1) {
2774 PyErr_Format(PyExc_ValueError,
2775 "non-hexadecimal number found in "
2776 "fromhex() arg at position %zd", i);
2777 goto error;
2778 }
2779 buf[j++] = (top << 4) + bot;
2780 }
Christian Heimes2c4a0722008-01-30 11:28:29 +00002781 if (j != byteslen && _PyString_Resize(&newstring, j) < 0)
Guido van Rossumae404e22007-10-26 21:46:44 +00002782 goto error;
2783 return newstring;
2784
2785 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00002786 Py_XDECREF(newstring);
Guido van Rossumae404e22007-10-26 21:46:44 +00002787 return NULL;
2788}
2789
2790
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002791static PyObject *
2792string_getnewargs(PyStringObject *v)
2793{
Christian Heimes90aa7642007-12-19 02:45:37 +00002794 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002795}
2796
Christian Heimes2c4a0722008-01-30 11:28:29 +00002797
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002798static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002799string_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00002800 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002801 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2802 _Py_capitalize__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002803 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002804 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002805 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002806 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002807 endswith__doc__},
2808 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2809 expandtabs__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002810 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002811 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2812 fromhex_doc},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002813 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002814 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2815 _Py_isalnum__doc__},
2816 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2817 _Py_isalpha__doc__},
2818 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2819 _Py_isdigit__doc__},
2820 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2821 _Py_islower__doc__},
2822 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2823 _Py_isspace__doc__},
2824 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2825 _Py_istitle__doc__},
2826 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2827 _Py_isupper__doc__},
2828 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2829 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2830 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002831 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002832 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002833 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2834 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2835 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002836 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00002837 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2838 rpartition__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002839 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2840 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2841 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2842 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2843 splitlines__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002844 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002845 startswith__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002846 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002847 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2848 _Py_swapcase__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002849 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002850 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2851 translate__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002852 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002853 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002854 {NULL, NULL} /* sentinel */
2855};
2856
Jeremy Hylton938ace62002-07-17 16:30:39 +00002857static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002858str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2859
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002860static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002861string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002862{
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002863 PyObject *x = NULL, *it;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002864 const char *encoding = NULL;
2865 const char *errors = NULL;
2866 PyObject *new = NULL;
2867 Py_ssize_t i, size;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002868 static char *kwlist[] = {"source", "encoding", "errors", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00002869
Guido van Rossumae960af2001-08-30 03:11:59 +00002870 if (type != &PyString_Type)
2871 return str_subtype_new(type, args, kwds);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002872 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002873 &encoding, &errors))
Tim Peters6d6c1a32001-08-02 04:15:00 +00002874 return NULL;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002875 if (x == NULL) {
2876 if (encoding != NULL || errors != NULL) {
2877 PyErr_SetString(PyExc_TypeError,
2878 "encoding or errors without sequence "
2879 "argument");
2880 return NULL;
2881 }
Tim Peters6d6c1a32001-08-02 04:15:00 +00002882 return PyString_FromString("");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002883 }
2884
2885 if (PyUnicode_Check(x)) {
2886 /* Encode via the codec registry */
2887 if (encoding == NULL) {
2888 PyErr_SetString(PyExc_TypeError,
2889 "string argument without an encoding");
2890 return NULL;
2891 }
2892 new = PyCodec_Encode(x, encoding, errors);
2893 if (new == NULL)
2894 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002895 assert(PyString_Check(new));
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002896 return new;
2897 }
2898
2899 /* If it's not unicode, there can't be encoding or errors */
2900 if (encoding != NULL || errors != NULL) {
2901 PyErr_SetString(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002902 "encoding or errors without a string argument");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002903 return NULL;
2904 }
2905
Guido van Rossum98297ee2007-11-06 21:34:58 +00002906 /* Is it an int? */
2907 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2908 if (size == -1 && PyErr_Occurred()) {
2909 PyErr_Clear();
2910 }
2911 else {
2912 if (size < 0) {
2913 PyErr_SetString(PyExc_ValueError, "negative count");
2914 return NULL;
2915 }
2916 new = PyString_FromStringAndSize(NULL, size);
2917 if (new == NULL) {
2918 return NULL;
2919 }
2920 if (size > 0) {
2921 memset(((PyStringObject*)new)->ob_sval, 0, size);
2922 }
2923 return new;
2924 }
2925
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002926 /* Use the modern buffer interface */
2927 if (PyObject_CheckBuffer(x)) {
2928 Py_buffer view;
2929 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2930 return NULL;
2931 new = PyString_FromStringAndSize(NULL, view.len);
2932 if (!new)
2933 goto fail;
2934 // XXX(brett.cannon): Better way to get to internal buffer?
2935 if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
2936 &view, view.len, 'C') < 0)
2937 goto fail;
2938 PyObject_ReleaseBuffer(x, &view);
2939 return new;
2940 fail:
2941 Py_XDECREF(new);
2942 PyObject_ReleaseBuffer(x, &view);
2943 return NULL;
2944 }
2945
Guido van Rossum98297ee2007-11-06 21:34:58 +00002946 /* For iterator version, create a string object and resize as needed */
2947 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2948 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2949 input being a truly long iterator. */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002950 size = 64;
2951 new = PyString_FromStringAndSize(NULL, size);
2952 if (new == NULL)
2953 return NULL;
2954
2955 /* XXX Optimize this if the arguments is a list, tuple */
2956
2957 /* Get the iterator */
2958 it = PyObject_GetIter(x);
2959 if (it == NULL)
2960 goto error;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002961
2962 /* Run the iterator to exhaustion */
2963 for (i = 0; ; i++) {
2964 PyObject *item;
2965 Py_ssize_t value;
2966
2967 /* Get the next item */
Georg Brandl5fb8eb92007-11-24 20:42:02 +00002968 item = PyIter_Next(it);
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002969 if (item == NULL) {
Georg Brandl5fb8eb92007-11-24 20:42:02 +00002970 if (PyErr_Occurred())
2971 goto error;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002972 break;
2973 }
2974
2975 /* Interpret it as an int (__index__) */
2976 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2977 Py_DECREF(item);
2978 if (value == -1 && PyErr_Occurred())
2979 goto error;
2980
2981 /* Range check */
2982 if (value < 0 || value >= 256) {
2983 PyErr_SetString(PyExc_ValueError,
2984 "bytes must be in range(0, 256)");
2985 goto error;
2986 }
2987
2988 /* Append the byte */
2989 if (i >= size) {
2990 size *= 2;
2991 if (_PyString_Resize(&new, size) < 0)
2992 goto error;
2993 }
2994 ((PyStringObject *)new)->ob_sval[i] = value;
2995 }
2996 _PyString_Resize(&new, i);
2997
2998 /* Clean up and return success */
2999 Py_DECREF(it);
3000 return new;
3001
3002 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00003003 /* Error handling when new != NULL */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003004 Py_XDECREF(it);
3005 Py_DECREF(new);
3006 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003007}
3008
Guido van Rossumae960af2001-08-30 03:11:59 +00003009static PyObject *
3010str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3011{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003012 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003013 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003014
3015 assert(PyType_IsSubtype(type, &PyString_Type));
3016 tmp = string_new(&PyString_Type, args, kwds);
3017 if (tmp == NULL)
3018 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003019 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003020 n = PyString_GET_SIZE(tmp);
3021 pnew = type->tp_alloc(type, n);
3022 if (pnew != NULL) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003023 Py_MEMCPY(PyString_AS_STRING(pnew),
3024 PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003025 ((PyStringObject *)pnew)->ob_shash =
3026 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003027 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003028 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003029 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003030}
3031
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003032PyDoc_STRVAR(string_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00003033"bytes(iterable_of_ints) -> bytes.\n\
3034bytes(string, encoding[, errors]) -> bytes\n\
3035bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
3036bytes(memory_view) -> bytes.\n\
Tim Peters6d6c1a32001-08-02 04:15:00 +00003037\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003038Construct an immutable array of bytes from:\n\
3039 - an iterable yielding integers in range(256)\n\
3040 - a text string encoded using the specified encoding\n\
3041 - a bytes or a buffer object\n\
3042 - any object implementing the buffer API.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003043
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003044static PyObject *str_iter(PyObject *seq);
3045
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003046PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003047 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum98297ee2007-11-06 21:34:58 +00003048 "bytes",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003049 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003050 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003051 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003052 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003053 0, /* tp_getattr */
3054 0, /* tp_setattr */
3055 0, /* tp_compare */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003056 (reprfunc)string_repr, /* tp_repr */
3057 0, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003058 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003059 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003060 (hashfunc)string_hash, /* tp_hash */
3061 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003062 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003063 PyObject_GenericGetAttr, /* tp_getattro */
3064 0, /* tp_setattro */
3065 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003066 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3067 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003068 string_doc, /* tp_doc */
3069 0, /* tp_traverse */
3070 0, /* tp_clear */
3071 (richcmpfunc)string_richcompare, /* tp_richcompare */
3072 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003073 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003074 0, /* tp_iternext */
3075 string_methods, /* tp_methods */
3076 0, /* tp_members */
3077 0, /* tp_getset */
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003078 &PyBaseObject_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003079 0, /* tp_dict */
3080 0, /* tp_descr_get */
3081 0, /* tp_descr_set */
3082 0, /* tp_dictoffset */
3083 0, /* tp_init */
3084 0, /* tp_alloc */
3085 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003086 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003087};
3088
3089void
Fred Drakeba096332000-07-09 07:04:36 +00003090PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003091{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003092 register PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003093 assert(pv != NULL);
Guido van Rossum013142a1994-08-30 08:19:36 +00003094 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003095 return;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003096 if (w == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003097 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003098 *pv = NULL;
3099 return;
3100 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00003101 v = string_concat(*pv, w);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003102 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003103 *pv = v;
3104}
3105
Guido van Rossum013142a1994-08-30 08:19:36 +00003106void
Fred Drakeba096332000-07-09 07:04:36 +00003107PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003108{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003109 PyString_Concat(pv, w);
3110 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003111}
3112
3113
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003114/* The following function breaks the notion that strings are immutable:
3115 it changes the size of a string. We get away with this only if there
3116 is only one module referencing the object. You can also think of it
3117 as creating a new string object and destroying the old one, only
3118 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003119 already be known to some other part of the code...
3120 Note that if there's not enough memory to resize the string, the original
3121 string object at *pv is deallocated, *pv is set to NULL, an "out of
3122 memory" exception is set, and -1 is returned. Else (on success) 0 is
3123 returned, and the value in *pv may or may not be the same as on input.
3124 As always, an extra byte is allocated for a trailing \0 byte (newsize
3125 does *not* include that), and a trailing \0 byte is stored.
3126*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003127
3128int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003129_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003130{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003131 register PyObject *v;
3132 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003133 v = *pv;
Christian Heimes90aa7642007-12-19 02:45:37 +00003134 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003135 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003136 Py_DECREF(v);
3137 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003138 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003139 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003140 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003141 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003142 _Py_ForgetReference(v);
3143 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003144 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003145 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003146 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003147 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003148 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003150 _Py_NewReference(*pv);
3151 sv = (PyStringObject *) *pv;
Christian Heimes90aa7642007-12-19 02:45:37 +00003152 Py_SIZE(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003153 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003154 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003155 return 0;
3156}
Guido van Rossume5372401993-03-16 12:15:04 +00003157
Tim Peters38fd5b62000-09-21 05:43:11 +00003158/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3159 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3160 * Python's regular ints.
3161 * Return value: a new PyString*, or NULL if error.
3162 * . *pbuf is set to point into it,
3163 * *plen set to the # of chars following that.
3164 * Caller must decref it when done using pbuf.
3165 * The string starting at *pbuf is of the form
3166 * "-"? ("0x" | "0X")? digit+
3167 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003168 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003169 * There will be at least prec digits, zero-filled on the left if
3170 * necessary to get that many.
3171 * val object to be converted
3172 * flags bitmask of format flags; only F_ALT is looked at
3173 * prec minimum number of digits; 0-fill on left if needed
3174 * type a character in [duoxX]; u acts the same as d
3175 *
3176 * CAUTION: o, x and X conversions on regular ints can never
3177 * produce a '-' sign, but can for Python's unbounded ints.
3178 */
3179PyObject*
3180_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3181 char **pbuf, int *plen)
3182{
3183 PyObject *result = NULL;
3184 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003185 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003186 int sign; /* 1 if '-', else 0 */
3187 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003188 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003189 int numdigits; /* len == numnondigits + numdigits */
3190 int numnondigits = 0;
3191
Guido van Rossumddefaf32007-01-14 03:31:43 +00003192 /* Avoid exceeding SSIZE_T_MAX */
3193 if (prec > PY_SSIZE_T_MAX-3) {
3194 PyErr_SetString(PyExc_OverflowError,
3195 "precision too large");
3196 return NULL;
3197 }
3198
Tim Peters38fd5b62000-09-21 05:43:11 +00003199 switch (type) {
3200 case 'd':
3201 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00003202 /* Special-case boolean: we want 0/1 */
3203 if (PyBool_Check(val))
3204 result = PyNumber_ToBase(val, 10);
3205 else
Christian Heimes90aa7642007-12-19 02:45:37 +00003206 result = Py_TYPE(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00003207 break;
3208 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003209 numnondigits = 2;
3210 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00003211 break;
3212 case 'x':
3213 case 'X':
3214 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003215 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00003216 break;
3217 default:
3218 assert(!"'type' not in [duoxX]");
3219 }
3220 if (!result)
3221 return NULL;
3222
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003223 buf = PyUnicode_AsString(result);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00003224 if (!buf) {
3225 Py_DECREF(result);
3226 return NULL;
3227 }
3228
Tim Peters38fd5b62000-09-21 05:43:11 +00003229 /* To modify the string in-place, there can only be one reference. */
Christian Heimes90aa7642007-12-19 02:45:37 +00003230 if (Py_REFCNT(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003231 PyErr_BadInternalCall();
3232 return NULL;
3233 }
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003234 llen = PyUnicode_GetSize(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00003235 if (llen > INT_MAX) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003236 PyErr_SetString(PyExc_ValueError,
3237 "string too large in _PyString_FormatLong");
Martin v. Löwis725507b2006-03-07 12:08:51 +00003238 return NULL;
3239 }
3240 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003241 if (buf[len-1] == 'L') {
3242 --len;
3243 buf[len] = '\0';
3244 }
3245 sign = buf[0] == '-';
3246 numnondigits += sign;
3247 numdigits = len - numnondigits;
3248 assert(numdigits > 0);
3249
Tim Petersfff53252001-04-12 18:38:48 +00003250 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003251 if (((flags & F_ALT) == 0 &&
3252 (type == 'o' || type == 'x' || type == 'X'))) {
3253 assert(buf[sign] == '0');
3254 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
Guido van Rossum98297ee2007-11-06 21:34:58 +00003255 buf[sign+1] == 'o');
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003256 numnondigits -= 2;
3257 buf += 2;
3258 len -= 2;
3259 if (sign)
3260 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00003261 assert(len == numnondigits + numdigits);
3262 assert(numdigits > 0);
3263 }
3264
3265 /* Fill with leading zeroes to meet minimum width. */
3266 if (prec > numdigits) {
3267 PyObject *r1 = PyString_FromStringAndSize(NULL,
3268 numnondigits + prec);
3269 char *b1;
3270 if (!r1) {
3271 Py_DECREF(result);
3272 return NULL;
3273 }
3274 b1 = PyString_AS_STRING(r1);
3275 for (i = 0; i < numnondigits; ++i)
3276 *b1++ = *buf++;
3277 for (i = 0; i < prec - numdigits; i++)
3278 *b1++ = '0';
3279 for (i = 0; i < numdigits; i++)
3280 *b1++ = *buf++;
3281 *b1 = '\0';
3282 Py_DECREF(result);
3283 result = r1;
3284 buf = PyString_AS_STRING(result);
3285 len = numnondigits + prec;
3286 }
3287
3288 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003289 if (type == 'X') {
3290 /* Need to convert all lower case letters to upper case.
3291 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003292 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003293 if (buf[i] >= 'a' && buf[i] <= 'x')
3294 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003295 }
3296 *pbuf = buf;
3297 *plen = len;
3298 return result;
3299}
3300
Guido van Rossum8cf04761997-08-02 02:57:45 +00003301void
Fred Drakeba096332000-07-09 07:04:36 +00003302PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003303{
3304 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003305 for (i = 0; i < UCHAR_MAX + 1; i++) {
3306 Py_XDECREF(characters[i]);
3307 characters[i] = NULL;
3308 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003309 Py_XDECREF(nullstring);
3310 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003311}
Barry Warsawa903ad982001-02-23 16:40:48 +00003312
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003313/*********************** Str Iterator ****************************/
3314
3315typedef struct {
3316 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00003317 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003318 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
3319} striterobject;
3320
3321static void
3322striter_dealloc(striterobject *it)
3323{
3324 _PyObject_GC_UNTRACK(it);
3325 Py_XDECREF(it->it_seq);
3326 PyObject_GC_Del(it);
3327}
3328
3329static int
3330striter_traverse(striterobject *it, visitproc visit, void *arg)
3331{
3332 Py_VISIT(it->it_seq);
3333 return 0;
3334}
3335
3336static PyObject *
3337striter_next(striterobject *it)
3338{
3339 PyStringObject *seq;
3340 PyObject *item;
3341
3342 assert(it != NULL);
3343 seq = it->it_seq;
3344 if (seq == NULL)
3345 return NULL;
3346 assert(PyString_Check(seq));
3347
3348 if (it->it_index < PyString_GET_SIZE(seq)) {
Christian Heimes217cfd12007-12-02 14:31:20 +00003349 item = PyLong_FromLong(
Guido van Rossum75a902d2007-10-19 22:06:24 +00003350 (unsigned char)seq->ob_sval[it->it_index]);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003351 if (item != NULL)
3352 ++it->it_index;
3353 return item;
3354 }
3355
3356 Py_DECREF(seq);
3357 it->it_seq = NULL;
3358 return NULL;
3359}
3360
3361static PyObject *
3362striter_len(striterobject *it)
3363{
3364 Py_ssize_t len = 0;
3365 if (it->it_seq)
3366 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
Christian Heimes217cfd12007-12-02 14:31:20 +00003367 return PyLong_FromSsize_t(len);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003368}
3369
Guido van Rossum49d6b072006-08-17 21:11:47 +00003370PyDoc_STRVAR(length_hint_doc,
3371 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003372
3373static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00003374 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3375 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003376 {NULL, NULL} /* sentinel */
3377};
3378
3379PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003380 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum254348e2007-11-21 19:29:53 +00003381 "bytes_iterator", /* tp_name */
Guido van Rossum49d6b072006-08-17 21:11:47 +00003382 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003383 0, /* tp_itemsize */
3384 /* methods */
3385 (destructor)striter_dealloc, /* tp_dealloc */
3386 0, /* tp_print */
3387 0, /* tp_getattr */
3388 0, /* tp_setattr */
3389 0, /* tp_compare */
3390 0, /* tp_repr */
3391 0, /* tp_as_number */
3392 0, /* tp_as_sequence */
3393 0, /* tp_as_mapping */
3394 0, /* tp_hash */
3395 0, /* tp_call */
3396 0, /* tp_str */
3397 PyObject_GenericGetAttr, /* tp_getattro */
3398 0, /* tp_setattro */
3399 0, /* tp_as_buffer */
3400 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3401 0, /* tp_doc */
3402 (traverseproc)striter_traverse, /* tp_traverse */
3403 0, /* tp_clear */
3404 0, /* tp_richcompare */
3405 0, /* tp_weaklistoffset */
3406 PyObject_SelfIter, /* tp_iter */
3407 (iternextfunc)striter_next, /* tp_iternext */
3408 striter_methods, /* tp_methods */
3409 0,
3410};
3411
3412static PyObject *
3413str_iter(PyObject *seq)
3414{
3415 striterobject *it;
3416
3417 if (!PyString_Check(seq)) {
3418 PyErr_BadInternalCall();
3419 return NULL;
3420 }
3421 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
3422 if (it == NULL)
3423 return NULL;
3424 it->it_index = 0;
3425 Py_INCREF(seq);
3426 it->it_seq = (PyStringObject *)seq;
3427 _PyObject_GC_TRACK(it);
3428 return (PyObject *)it;
3429}