blob: c33332ee22b4960710f3d73e90a588d0dccbf0cd [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossum98297ee2007-11-06 21:34:58 +00003/* XXX This is now called 'bytes' as far as the user is concerned.
4 Many docstrings and error messages need to be cleaned up. */
5
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00006#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00008#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000011
Guido van Rossum98297ee2007-11-06 21:34:58 +000012static Py_ssize_t
13_getbuffer(PyObject *obj, Py_buffer *view)
14{
15 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
16
17 if (buffer == NULL || buffer->bf_getbuffer == NULL)
18 {
19 PyErr_Format(PyExc_TypeError,
20 "Type %.100s doesn't support the buffer API",
21 Py_Type(obj)->tp_name);
22 return -1;
23 }
24
25 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
26 return -1;
27 return view->len;
28}
29
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000030#ifdef COUNT_ALLOCS
31int null_strings, one_strings;
32#endif
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000035static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036
37/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000038 For both PyString_FromString() and PyString_FromStringAndSize(), the
39 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000040 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000041
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000042 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000043 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 For PyString_FromStringAndSize(), the parameter the parameter `str' is
46 either NULL or else points to a string containing at least `size' bytes.
47 For PyString_FromStringAndSize(), the string in the `str' parameter does
48 not have to be null-terminated. (Therefore it is safe to construct a
49 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
50 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
51 bytes (setting the last byte to the null terminating character) and you can
52 fill in the data yourself. If `str' is non-NULL then the resulting
53 PyString object must be treated as immutable and you must not fill in nor
54 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000055
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000056 The PyObject member `op->ob_size', which denotes the number of "extra
57 items" in a variable-size object, will contain the number of bytes
58 allocated for string data, not counting the null terminating character. It
59 is therefore equal to the equal to the `size' parameter (for
60 PyString_FromStringAndSize()) or the length of the string in the `str'
61 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000064PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065{
Tim Peters9e897f42001-05-09 07:37:07 +000066 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000067 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 if (size == 0 && (op = nullstring) != NULL) {
69#ifdef COUNT_ALLOCS
70 null_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 if (size == 1 && str != NULL &&
76 (op = characters[*str & UCHAR_MAX]) != NULL)
77 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078#ifdef COUNT_ALLOCS
79 one_strings++;
80#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 Py_INCREF(op);
82 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000084
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000085 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000086 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000089 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 op->ob_shash = -1;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000091 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000092 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000094 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 if (size == 0) {
96 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 } else if (size == 1 && str != NULL) {
99 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103}
104
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000106PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Tim Peters62de65b2001-12-06 20:29:32 +0000108 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000109 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000110
111 assert(str != NULL);
112 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000113 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000114 PyErr_SetString(PyExc_OverflowError,
115 "string is too long for a Python string");
116 return NULL;
117 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 if (size == 0 && (op = nullstring) != NULL) {
119#ifdef COUNT_ALLOCS
120 null_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
125 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
126#ifdef COUNT_ALLOCS
127 one_strings++;
128#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
130 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000133 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000134 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000140 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
145 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000149}
150
Barry Warsawdadace02001-08-24 18:32:06 +0000151PyObject *
152PyString_FromFormatV(const char *format, va_list vargs)
153{
Tim Petersc15c4f12001-10-02 21:32:07 +0000154 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000155 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000156 const char* f;
157 char *s;
158 PyObject* string;
159
Tim Petersc15c4f12001-10-02 21:32:07 +0000160#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000161 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000163#ifdef __va_copy
164 __va_copy(count, vargs);
165#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000166 count = vargs;
167#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000168#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000169 /* step 1: figure out how large a buffer we need */
170 for (f = format; *f; f++) {
171 if (*f == '%') {
172 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000173 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000174 ;
175
Thomas Wouters477c8d52006-05-27 19:21:47 +0000176 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
177 * they don't affect the amount of space we reserve.
178 */
179 if ((*f == 'l' || *f == 'z') &&
180 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000181 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000182
Barry Warsawdadace02001-08-24 18:32:06 +0000183 switch (*f) {
184 case 'c':
185 (void)va_arg(count, int);
186 /* fall through... */
187 case '%':
188 n++;
189 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000191 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000192 /* 20 bytes is enough to hold a 64-bit
193 integer. Decimal takes the most space.
194 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000195 n += 20;
196 break;
197 case 's':
198 s = va_arg(count, char*);
199 n += strlen(s);
200 break;
201 case 'p':
202 (void) va_arg(count, int);
203 /* maximum 64-bit pointer representation:
204 * 0xffffffffffffffff
205 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000206 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000207 */
208 n += 19;
209 break;
210 default:
211 /* if we stumble upon an unknown
212 formatting code, copy the rest of
213 the format string to the output
214 string. (we cannot just skip the
215 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000216 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000217 n += strlen(p);
218 goto expand;
219 }
220 } else
221 n++;
222 }
223 expand:
224 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000225 /* Since we've analyzed how much space we need for the worst case,
226 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000227 string = PyString_FromStringAndSize(NULL, n);
228 if (!string)
229 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000230
Barry Warsawdadace02001-08-24 18:32:06 +0000231 s = PyString_AsString(string);
232
233 for (f = format; *f; f++) {
234 if (*f == '%') {
235 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000236 Py_ssize_t i;
237 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000238 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000239 /* parse the width.precision part (we're only
240 interested in the precision value, if any) */
241 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000242 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000243 n = (n*10) + *f++ - '0';
244 if (*f == '.') {
245 f++;
246 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000247 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000248 n = (n*10) + *f++ - '0';
249 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000250 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000251 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000252 /* handle the long flag, but only for %ld and %lu.
253 others can be added when necessary. */
254 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000255 longflag = 1;
256 ++f;
257 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000258 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000259 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000260 size_tflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000271 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000272 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
273 va_arg(vargs, Py_ssize_t));
274 else
275 sprintf(s, "%d", va_arg(vargs, int));
276 s += strlen(s);
277 break;
278 case 'u':
279 if (longflag)
280 sprintf(s, "%lu",
281 va_arg(vargs, unsigned long));
282 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000283 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
284 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000285 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000286 sprintf(s, "%u",
287 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 s += strlen(s);
289 break;
290 case 'i':
291 sprintf(s, "%i", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'x':
295 sprintf(s, "%x", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 's':
299 p = va_arg(vargs, char*);
300 i = strlen(p);
301 if (n > 0 && i > n)
302 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000303 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000304 s += i;
305 break;
306 case 'p':
307 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000308 /* %p is ill-defined: ensure leading 0x. */
309 if (s[1] == 'X')
310 s[1] = 'x';
311 else if (s[1] != 'x') {
312 memmove(s+2, s, strlen(s)+1);
313 s[0] = '0';
314 s[1] = 'x';
315 }
Barry Warsawdadace02001-08-24 18:32:06 +0000316 s += strlen(s);
317 break;
318 case '%':
319 *s++ = '%';
320 break;
321 default:
322 strcpy(s, p);
323 s += strlen(s);
324 goto end;
325 }
326 } else
327 *s++ = *f;
328 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000329
Barry Warsawdadace02001-08-24 18:32:06 +0000330 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000332 return string;
333}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000334
Barry Warsawdadace02001-08-24 18:32:06 +0000335PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000336PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000337{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000338 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000339 va_list vargs;
340
341#ifdef HAVE_STDARG_PROTOTYPES
342 va_start(vargs, format);
343#else
344 va_start(vargs);
345#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 ret = PyString_FromFormatV(format, vargs);
347 va_end(vargs);
348 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000349}
350
Guido van Rossum234f9421993-06-17 12:35:49 +0000351static void
Fred Drakeba096332000-07-09 07:04:36 +0000352string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000353{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000354 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000355}
356
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000357/* Unescape a backslash-escaped string. If unicode is non-zero,
358 the string is a u-literal. If recode_encoding is non-zero,
359 the string is UTF-8 encoded and should be re-encoded in the
360 specified encoding. */
361
362PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000363 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000364 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000365 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000366 const char *recode_encoding)
367{
368 int c;
369 char *p, *buf;
370 const char *end;
371 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000372 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000373 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000374 if (v == NULL)
375 return NULL;
376 p = buf = PyString_AsString(v);
377 end = s + len;
378 while (s < end) {
379 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000380 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000381 if (recode_encoding && (*s & 0x80)) {
382 PyObject *u, *w;
383 char *r;
384 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000385 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000386 t = s;
387 /* Decode non-ASCII bytes as UTF-8. */
388 while (t < end && (*t & 0x80)) t++;
389 u = PyUnicode_DecodeUTF8(s, t - s, errors);
390 if(!u) goto failed;
391
392 /* Recode them in target encoding. */
393 w = PyUnicode_AsEncodedString(
394 u, recode_encoding, errors);
395 Py_DECREF(u);
396 if (!w) goto failed;
397
398 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000399 assert(PyString_Check(w));
400 r = PyString_AS_STRING(w);
401 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000402 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000403 p += rn;
404 Py_DECREF(w);
405 s = t;
406 } else {
407 *p++ = *s++;
408 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000409 continue;
410 }
411 s++;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000412 if (s==end) {
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000413 PyErr_SetString(PyExc_ValueError,
414 "Trailing \\ in string");
415 goto failed;
416 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000417 switch (*s++) {
418 /* XXX This assumes ASCII! */
419 case '\n': break;
420 case '\\': *p++ = '\\'; break;
421 case '\'': *p++ = '\''; break;
422 case '\"': *p++ = '\"'; break;
423 case 'b': *p++ = '\b'; break;
424 case 'f': *p++ = '\014'; break; /* FF */
425 case 't': *p++ = '\t'; break;
426 case 'n': *p++ = '\n'; break;
427 case 'r': *p++ = '\r'; break;
428 case 'v': *p++ = '\013'; break; /* VT */
429 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
430 case '0': case '1': case '2': case '3':
431 case '4': case '5': case '6': case '7':
432 c = s[-1] - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000433 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000434 c = (c<<3) + *s++ - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000435 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000436 c = (c<<3) + *s++ - '0';
437 }
438 *p++ = c;
439 break;
440 case 'x':
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000441 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000442 unsigned int x = 0;
443 c = Py_CHARMASK(*s);
444 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000445 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000446 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000447 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000448 x = 10 + c - 'a';
449 else
450 x = 10 + c - 'A';
451 x = x << 4;
452 c = Py_CHARMASK(*s);
453 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000454 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000455 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000456 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000457 x += 10 + c - 'a';
458 else
459 x += 10 + c - 'A';
460 *p++ = x;
461 break;
462 }
463 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000464 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000465 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000466 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000467 }
468 if (strcmp(errors, "replace") == 0) {
469 *p++ = '?';
470 } else if (strcmp(errors, "ignore") == 0)
471 /* do nothing */;
472 else {
473 PyErr_Format(PyExc_ValueError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000474 "decoding error; unknown "
475 "error handling code: %.400s",
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000476 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000477 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000478 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000479 default:
480 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000481 s--;
482 goto non_esc; /* an arbitry number of unescaped
483 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000484 }
485 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000486 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000487 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000488 return v;
489 failed:
490 Py_DECREF(v);
491 return NULL;
492}
493
Thomas Wouters477c8d52006-05-27 19:21:47 +0000494/* -------------------------------------------------------------------- */
495/* object api */
496
Martin v. Löwis18e16552006-02-15 17:27:45 +0000497static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000498string_getsize(register PyObject *op)
499{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000500 char *s;
501 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000502 if (PyString_AsStringAndSize(op, &s, &len))
503 return -1;
504 return len;
505}
506
507static /*const*/ char *
508string_getbuffer(register PyObject *op)
509{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000510 char *s;
511 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000512 if (PyString_AsStringAndSize(op, &s, &len))
513 return NULL;
514 return s;
515}
516
Martin v. Löwis18e16552006-02-15 17:27:45 +0000517Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000518PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000520 if (!PyString_Check(op))
521 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000522 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000523}
524
525/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000526PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000527{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000528 if (!PyString_Check(op))
529 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000530 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000531}
532
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000533int
534PyString_AsStringAndSize(register PyObject *obj,
535 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000536 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000537{
538 if (s == NULL) {
539 PyErr_BadInternalCall();
540 return -1;
541 }
542
543 if (!PyString_Check(obj)) {
Christian Heimesf3863112007-11-22 07:46:41 +0000544 PyErr_Format(PyExc_TypeError,
545 "expected string, %.200s found", Py_Type(obj)->tp_name);
546 return -1;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000547 }
548
549 *s = PyString_AS_STRING(obj);
550 if (len != NULL)
551 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000552 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000553 PyErr_SetString(PyExc_TypeError,
554 "expected string without null bytes");
555 return -1;
556 }
557 return 0;
558}
559
Thomas Wouters477c8d52006-05-27 19:21:47 +0000560/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000561/* Methods */
562
Thomas Wouters477c8d52006-05-27 19:21:47 +0000563#define STRINGLIB_CHAR char
564
565#define STRINGLIB_CMP memcmp
566#define STRINGLIB_LEN PyString_GET_SIZE
567#define STRINGLIB_NEW PyString_FromStringAndSize
568#define STRINGLIB_STR PyString_AS_STRING
Guido van Rossum98297ee2007-11-06 21:34:58 +0000569/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000570
571#define STRINGLIB_EMPTY nullstring
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000572#define STRINGLIB_CHECK_EXACT PyString_CheckExact
573#define STRINGLIB_MUTABLE 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000574
575#include "stringlib/fastsearch.h"
576
577#include "stringlib/count.h"
578#include "stringlib/find.h"
579#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000580#include "stringlib/ctype.h"
581#include "stringlib/transmogrify.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000582
583
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000584PyObject *
585PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000586{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000587 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000588 register PyStringObject* op = (PyStringObject*) obj;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000589 Py_ssize_t length = Py_Size(op);
590 size_t newsize = 3 + 4 * length;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000591 PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000592 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000593 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000594 "bytes object is too large to make repr");
Guido van Rossum58da9312007-11-10 23:39:45 +0000595 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000596 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000597 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000598 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000599 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000600 }
601 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000602 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000603 register Py_UNICODE c;
604 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000605 int quote;
606
Guido van Rossum98297ee2007-11-06 21:34:58 +0000607 /* Figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000608 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000609 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000610 char *test, *start;
611 start = PyString_AS_STRING(op);
612 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000613 if (*test == '"') {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000614 quote = '\''; /* back to single */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000615 goto decided;
616 }
617 else if (*test == '\'')
618 quote = '"';
619 }
620 decided:
621 ;
622 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000623
Guido van Rossum98297ee2007-11-06 21:34:58 +0000624 *p++ = 'b', *p++ = quote;
625 for (i = 0; i < length; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000626 /* There's at least enough room for a hex escape
627 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000628 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000629 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000630 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000631 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000632 else if (c == '\t')
633 *p++ = '\\', *p++ = 't';
634 else if (c == '\n')
635 *p++ = '\\', *p++ = 'n';
636 else if (c == '\r')
637 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000638 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000639 *p++ = '\\';
640 *p++ = 'x';
641 *p++ = hexdigits[(c & 0xf0) >> 4];
642 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000643 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000644 else
645 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000646 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000647 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000648 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000649 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000650 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
651 Py_DECREF(v);
652 return NULL;
653 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000654 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000655 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000656}
657
Guido van Rossum189f1df2001-05-01 16:51:53 +0000658static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000659string_repr(PyObject *op)
660{
661 return PyString_Repr(op, 1);
662}
663
664static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000665string_str(PyObject *op)
Guido van Rossum189f1df2001-05-01 16:51:53 +0000666{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000667 if (Py_BytesWarningFlag) {
668 if (PyErr_WarnEx(PyExc_BytesWarning,
669 "str() on a bytes instance", 1))
670 return NULL;
Tim Petersc9933152001-10-16 20:18:24 +0000671 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000672 return string_repr(op);
Guido van Rossum189f1df2001-05-01 16:51:53 +0000673}
674
Martin v. Löwis18e16552006-02-15 17:27:45 +0000675static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000676string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000677{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000678 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000679}
680
Guido van Rossum98297ee2007-11-06 21:34:58 +0000681/* This is also used by PyString_Concat() */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000682static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000683string_concat(PyObject *a, PyObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000684{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000685 Py_ssize_t size;
686 Py_buffer va, vb;
687 PyObject *result = NULL;
688
689 va.len = -1;
690 vb.len = -1;
691 if (_getbuffer(a, &va) < 0 ||
692 _getbuffer(b, &vb) < 0) {
693 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
694 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
695 goto done;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000696 }
Guido van Rossumae404e22007-10-26 21:46:44 +0000697
Guido van Rossum98297ee2007-11-06 21:34:58 +0000698 /* Optimize end cases */
699 if (va.len == 0 && PyString_CheckExact(b)) {
700 result = b;
701 Py_INCREF(result);
702 goto done;
703 }
704 if (vb.len == 0 && PyString_CheckExact(a)) {
705 result = a;
706 Py_INCREF(result);
707 goto done;
708 }
709
710 size = va.len + vb.len;
711 if (size < 0) {
712 PyErr_NoMemory();
713 goto done;
714 }
715
716 result = PyString_FromStringAndSize(NULL, size);
717 if (result != NULL) {
718 memcpy(PyString_AS_STRING(result), va.buf, va.len);
719 memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
720 }
721
722 done:
723 if (va.len != -1)
724 PyObject_ReleaseBuffer(a, &va);
725 if (vb.len != -1)
726 PyObject_ReleaseBuffer(b, &vb);
727 return result;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000728}
729
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000730static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000731string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000732{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000733 register Py_ssize_t i;
734 register Py_ssize_t j;
735 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000736 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000737 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000738 if (n < 0)
739 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000740 /* watch out for overflows: the size can overflow int,
741 * and the # of bytes needed can overflow size_t
742 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000743 size = Py_Size(a) * n;
744 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000745 PyErr_SetString(PyExc_OverflowError,
746 "repeated string is too long");
747 return NULL;
748 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000749 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000750 Py_INCREF(a);
751 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000752 }
Tim Peterse7c05322004-06-27 17:24:49 +0000753 nbytes = (size_t)size;
754 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000755 PyErr_SetString(PyExc_OverflowError,
756 "repeated string is too long");
757 return NULL;
758 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000759 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000760 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000761 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000762 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000763 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000764 op->ob_shash = -1;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000765 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000766 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000767 memset(op->ob_sval, a->ob_sval[0] , n);
768 return (PyObject *) op;
769 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000770 i = 0;
771 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000772 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
773 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000774 }
775 while (i < size) {
776 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000777 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000778 i += j;
779 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000780 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000781}
782
Guido van Rossum9284a572000-03-07 15:53:43 +0000783static int
Guido van Rossum98297ee2007-11-06 21:34:58 +0000784string_contains(PyObject *self, PyObject *arg)
Guido van Rossum9284a572000-03-07 15:53:43 +0000785{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000786 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
787 if (ival == -1 && PyErr_Occurred()) {
788 Py_buffer varg;
789 int pos;
790 PyErr_Clear();
791 if (_getbuffer(arg, &varg) < 0)
792 return -1;
793 pos = stringlib_find(PyString_AS_STRING(self), Py_Size(self),
794 varg.buf, varg.len, 0);
795 PyObject_ReleaseBuffer(arg, &varg);
796 return pos >= 0;
797 }
798 if (ival < 0 || ival >= 256) {
799 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
800 return -1;
801 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000802
Guido van Rossum98297ee2007-11-06 21:34:58 +0000803 return memchr(PyString_AS_STRING(self), ival, Py_Size(self)) != NULL;
804}
805
806static PyObject *
807string_item(PyStringObject *a, register Py_ssize_t i)
808{
809 if (i < 0 || i >= Py_Size(a)) {
810 PyErr_SetString(PyExc_IndexError, "string index out of range");
811 return NULL;
812 }
813 return PyInt_FromLong((unsigned char)a->ob_sval[i]);
Guido van Rossum9284a572000-03-07 15:53:43 +0000814}
815
Martin v. Löwiscd353062001-05-24 16:56:35 +0000816static PyObject*
817string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000818{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000819 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000820 Py_ssize_t len_a, len_b;
821 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +0000822 PyObject *result;
823
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000824 /* Make sure both arguments are strings. */
825 if (!(PyString_Check(a) && PyString_Check(b))) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000826 if (Py_BytesWarningFlag && (op == Py_EQ) &&
827 (PyObject_IsInstance((PyObject*)a,
828 (PyObject*)&PyUnicode_Type) ||
829 PyObject_IsInstance((PyObject*)b,
830 (PyObject*)&PyUnicode_Type))) {
831 if (PyErr_WarnEx(PyExc_BytesWarning,
832 "Comparsion between bytes and string", 1))
833 return NULL;
834 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000835 result = Py_NotImplemented;
836 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000837 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000838 if (a == b) {
839 switch (op) {
840 case Py_EQ:case Py_LE:case Py_GE:
841 result = Py_True;
842 goto out;
843 case Py_NE:case Py_LT:case Py_GT:
844 result = Py_False;
845 goto out;
846 }
847 }
848 if (op == Py_EQ) {
849 /* Supporting Py_NE here as well does not save
850 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000851 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +0000852 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000853 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000854 result = Py_True;
855 } else {
856 result = Py_False;
857 }
858 goto out;
859 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000860 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +0000861 min_len = (len_a < len_b) ? len_a : len_b;
862 if (min_len > 0) {
863 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
864 if (c==0)
865 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +0000866 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +0000867 c = 0;
868 if (c == 0)
869 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
870 switch (op) {
871 case Py_LT: c = c < 0; break;
872 case Py_LE: c = c <= 0; break;
873 case Py_EQ: assert(0); break; /* unreachable */
874 case Py_NE: c = c != 0; break;
875 case Py_GT: c = c > 0; break;
876 case Py_GE: c = c >= 0; break;
877 default:
878 result = Py_NotImplemented;
879 goto out;
880 }
881 result = c ? Py_True : Py_False;
882 out:
883 Py_INCREF(result);
884 return result;
885}
886
887int
888_PyString_Eq(PyObject *o1, PyObject *o2)
889{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000890 PyStringObject *a = (PyStringObject*) o1;
891 PyStringObject *b = (PyStringObject*) o2;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000892 return Py_Size(a) == Py_Size(b)
893 && *a->ob_sval == *b->ob_sval
894 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895}
896
Guido van Rossum9bfef441993-03-29 10:43:31 +0000897static long
Fred Drakeba096332000-07-09 07:04:36 +0000898string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000899{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000900 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000901 register unsigned char *p;
902 register long x;
903
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000904 if (a->ob_shash != -1)
905 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000906 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000907 p = (unsigned char *) a->ob_sval;
908 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000909 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000910 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000911 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +0000912 if (x == -1)
913 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000914 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000915 return x;
916}
917
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000918static PyObject*
919string_subscript(PyStringObject* self, PyObject* item)
920{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000921 if (PyIndex_Check(item)) {
922 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000923 if (i == -1 && PyErr_Occurred())
924 return NULL;
925 if (i < 0)
926 i += PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000927 if (i < 0 || i >= PyString_GET_SIZE(self)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +0000928 PyErr_SetString(PyExc_IndexError,
929 "string index out of range");
930 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000931 }
932 return PyInt_FromLong((unsigned char)self->ob_sval[i]);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000933 }
934 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000936 char* source_buf;
937 char* result_buf;
938 PyObject* result;
939
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000940 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000941 PyString_GET_SIZE(self),
942 &start, &stop, &step, &slicelength) < 0) {
943 return NULL;
944 }
945
946 if (slicelength <= 0) {
947 return PyString_FromStringAndSize("", 0);
948 }
Thomas Woutersed03b412007-08-28 21:37:11 +0000949 else if (start == 0 && step == 1 &&
950 slicelength == PyString_GET_SIZE(self) &&
951 PyString_CheckExact(self)) {
952 Py_INCREF(self);
953 return (PyObject *)self;
954 }
955 else if (step == 1) {
956 return PyString_FromStringAndSize(
957 PyString_AS_STRING(self) + start,
958 slicelength);
959 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000960 else {
961 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000962 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +0000963 if (result_buf == NULL)
964 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000965
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000966 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000967 cur += step, i++) {
968 result_buf[i] = source_buf[cur];
969 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000970
971 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000972 slicelength);
973 PyMem_Free(result_buf);
974 return result;
975 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000976 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000977 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000978 PyErr_Format(PyExc_TypeError,
979 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000980 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000981 return NULL;
982 }
983}
984
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000985static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000986string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000987{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000988 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self),
989 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +0000990}
991
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000992static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000993 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000994 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +0000995 (ssizeargfunc)string_repeat, /*sq_repeat*/
Guido van Rossum98297ee2007-11-06 21:34:58 +0000996 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +0000997 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000998 0, /*sq_ass_item*/
999 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001000 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001};
1002
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001003static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001004 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001005 (binaryfunc)string_subscript,
1006 0,
1007};
1008
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001009static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001010 (getbufferproc)string_buffer_getbuffer,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001011 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001012};
1013
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001014
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001015#define LEFTSTRIP 0
1016#define RIGHTSTRIP 1
1017#define BOTHSTRIP 2
1018
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001019/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001020static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1021
1022#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001023
Thomas Wouters477c8d52006-05-27 19:21:47 +00001024
1025/* Don't call if length < 2 */
1026#define Py_STRING_MATCH(target, offset, pattern, length) \
1027 (target[offset] == pattern[0] && \
1028 target[offset+length-1] == pattern[length-1] && \
1029 !memcmp(target+offset+1, pattern+1, length-2) )
1030
1031
1032/* Overallocate the initial list to reduce the number of reallocs for small
1033 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1034 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1035 text (roughly 11 words per line) and field delimited data (usually 1-10
1036 fields). For large strings the split algorithms are bandwidth limited
1037 so increasing the preallocation likely will not improve things.*/
1038
1039#define MAX_PREALLOC 12
1040
1041/* 5 splits gives 6 elements */
1042#define PREALLOC_SIZE(maxsplit) \
1043 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1044
Thomas Wouters477c8d52006-05-27 19:21:47 +00001045#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001046 str = PyString_FromStringAndSize((data) + (left), \
1047 (right) - (left)); \
1048 if (str == NULL) \
1049 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001050 if (count < MAX_PREALLOC) { \
1051 PyList_SET_ITEM(list, count, str); \
1052 } else { \
1053 if (PyList_Append(list, str)) { \
1054 Py_DECREF(str); \
1055 goto onError; \
1056 } \
1057 else \
1058 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001059 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001060 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001061
Thomas Wouters477c8d52006-05-27 19:21:47 +00001062/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001063#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001064
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001065#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1066#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1067#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1068#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001069
1070Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001071split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001072{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001073 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001074 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001075 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001076
1077 if (list == NULL)
1078 return NULL;
1079
Thomas Wouters477c8d52006-05-27 19:21:47 +00001080 i = j = 0;
1081
1082 while (maxsplit-- > 0) {
1083 SKIP_SPACE(s, i, len);
1084 if (i==len) break;
1085 j = i; i++;
1086 SKIP_NONSPACE(s, i, len);
1087 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001088 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001089
1090 if (i < len) {
1091 /* Only occurs when maxsplit was reached */
1092 /* Skip any remaining whitespace and copy to end of string */
1093 SKIP_SPACE(s, i, len);
1094 if (i != len)
1095 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001096 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001097 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001098 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001099 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001100 Py_DECREF(list);
1101 return NULL;
1102}
1103
Thomas Wouters477c8d52006-05-27 19:21:47 +00001104Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001105split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001106{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001107 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001108 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001109 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001110
1111 if (list == NULL)
1112 return NULL;
1113
Thomas Wouters477c8d52006-05-27 19:21:47 +00001114 i = j = 0;
1115 while ((j < len) && (maxcount-- > 0)) {
1116 for(; j<len; j++) {
1117 /* I found that using memchr makes no difference */
1118 if (s[j] == ch) {
1119 SPLIT_ADD(s, i, j);
1120 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001121 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001122 }
1123 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001124 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001125 if (i <= len) {
1126 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001127 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001128 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001129 return list;
1130
1131 onError:
1132 Py_DECREF(list);
1133 return NULL;
1134}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001135
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001136PyDoc_STRVAR(split__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001137"B.split([sep[, maxsplit]]) -> list of bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001138\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001139Return a list of the sections in B, using sep as the delimiter.\n\
1140If sep is not given, B is split on ASCII whitespace characters\n\
1141(space, tab, return, newline, formfeed, vertical tab).\n\
1142If maxsplit is given, at most maxsplit splits are done.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001143
1144static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001145string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001146{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001147 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001148 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001149 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001150 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001151 PyObject *list, *str, *subobj = Py_None;
1152#ifdef USE_FAST
1153 Py_ssize_t pos;
1154#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001155
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001156 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001157 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001158 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001159 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001160 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001161 return split_whitespace(s, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001162 if (_getbuffer(subobj, &vsub) < 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001163 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001164 sub = vsub.buf;
1165 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001166
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001167 if (n == 0) {
1168 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001169 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001170 return NULL;
1171 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001172 else if (n == 1) {
1173 char ch = sub[0];
1174 PyObject_ReleaseBuffer(subobj, &vsub);
1175 return split_char(s, len, ch, maxsplit);
1176 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001177
Thomas Wouters477c8d52006-05-27 19:21:47 +00001178 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001179 if (list == NULL) {
1180 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001181 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001182 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001183
Thomas Wouters477c8d52006-05-27 19:21:47 +00001184#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001185 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001186 while (maxsplit-- > 0) {
1187 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1188 if (pos < 0)
1189 break;
1190 j = i+pos;
1191 SPLIT_ADD(s, i, j);
1192 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001193 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001194#else
1195 i = j = 0;
1196 while ((j+n <= len) && (maxsplit-- > 0)) {
1197 for (; j+n <= len; j++) {
1198 if (Py_STRING_MATCH(s, j, sub, n)) {
1199 SPLIT_ADD(s, i, j);
1200 i = j = j + n;
1201 break;
1202 }
1203 }
1204 }
1205#endif
1206 SPLIT_ADD(s, i, len);
1207 FIX_PREALLOC_SIZE(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001208 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001209 return list;
1210
Thomas Wouters477c8d52006-05-27 19:21:47 +00001211 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001212 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001213 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001214 return NULL;
1215}
1216
Thomas Wouters477c8d52006-05-27 19:21:47 +00001217PyDoc_STRVAR(partition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001218"B.partition(sep) -> (head, sep, tail)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001219\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001220Searches for the separator sep in B, and returns the part before it,\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001221the separator itself, and the part after it. If the separator is not\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001222found, returns B and two empty bytes objects.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001223
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001224static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001225string_partition(PyStringObject *self, PyObject *sep_obj)
1226{
1227 const char *sep;
1228 Py_ssize_t sep_len;
1229
1230 if (PyString_Check(sep_obj)) {
1231 sep = PyString_AS_STRING(sep_obj);
1232 sep_len = PyString_GET_SIZE(sep_obj);
1233 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001234 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1235 return NULL;
1236
1237 return stringlib_partition(
1238 (PyObject*) self,
1239 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1240 sep_obj, sep, sep_len
1241 );
1242}
1243
1244PyDoc_STRVAR(rpartition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001245"B.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001246\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001247Searches for the separator sep in B, starting at the end of B,\n\
1248and returns the part before it, the separator itself, and the\n\
1249part after it. If the separator is not found, returns two empty\n\
1250bytes objects and B.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001251
1252static PyObject *
1253string_rpartition(PyStringObject *self, PyObject *sep_obj)
1254{
1255 const char *sep;
1256 Py_ssize_t sep_len;
1257
1258 if (PyString_Check(sep_obj)) {
1259 sep = PyString_AS_STRING(sep_obj);
1260 sep_len = PyString_GET_SIZE(sep_obj);
1261 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001262 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1263 return NULL;
1264
1265 return stringlib_rpartition(
1266 (PyObject*) self,
1267 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1268 sep_obj, sep, sep_len
1269 );
1270}
1271
1272Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001273rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001274{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001275 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001276 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001277 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001278
1279 if (list == NULL)
1280 return NULL;
1281
Thomas Wouters477c8d52006-05-27 19:21:47 +00001282 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001283
Thomas Wouters477c8d52006-05-27 19:21:47 +00001284 while (maxsplit-- > 0) {
1285 RSKIP_SPACE(s, i);
1286 if (i<0) break;
1287 j = i; i--;
1288 RSKIP_NONSPACE(s, i);
1289 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001290 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001291 if (i >= 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001292 /* Only occurs when maxsplit was reached. Skip any remaining
1293 whitespace and copy to beginning of string. */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001294 RSKIP_SPACE(s, i);
1295 if (i >= 0)
1296 SPLIT_ADD(s, 0, i + 1);
1297
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001298 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001299 FIX_PREALLOC_SIZE(list);
1300 if (PyList_Reverse(list) < 0)
1301 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001302 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001303 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001304 Py_DECREF(list);
1305 return NULL;
1306}
1307
Thomas Wouters477c8d52006-05-27 19:21:47 +00001308Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001309rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001310{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001311 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001312 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001313 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001314
1315 if (list == NULL)
1316 return NULL;
1317
Thomas Wouters477c8d52006-05-27 19:21:47 +00001318 i = j = len - 1;
1319 while ((i >= 0) && (maxcount-- > 0)) {
1320 for (; i >= 0; i--) {
1321 if (s[i] == ch) {
1322 SPLIT_ADD(s, i + 1, j + 1);
1323 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001324 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001325 }
1326 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001327 }
1328 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001329 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001330 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001331 FIX_PREALLOC_SIZE(list);
1332 if (PyList_Reverse(list) < 0)
1333 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001334 return list;
1335
1336 onError:
1337 Py_DECREF(list);
1338 return NULL;
1339}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001340
1341PyDoc_STRVAR(rsplit__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001342"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001343\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001344Return a list of the sections in B, using sep as the delimiter,\n\
1345starting at the end of B and working to the front.\n\
1346If sep is not given, B is split on ASCII whitespace characters\n\
1347(space, tab, return, newline, formfeed, vertical tab).\n\
1348If maxsplit is given, at most maxsplit splits are done.");
1349
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001350
1351static PyObject *
1352string_rsplit(PyStringObject *self, PyObject *args)
1353{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001354 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001355 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001356 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001357 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001358 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001359
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001360 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001361 return NULL;
1362 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001363 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001364 if (subobj == Py_None)
1365 return rsplit_whitespace(s, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001366 if (_getbuffer(subobj, &vsub) < 0)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001367 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001368 sub = vsub.buf;
1369 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001370
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001371 if (n == 0) {
1372 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001373 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001374 return NULL;
1375 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001376 else if (n == 1) {
1377 char ch = sub[0];
1378 PyObject_ReleaseBuffer(subobj, &vsub);
1379 return rsplit_char(s, len, ch, maxsplit);
1380 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001381
Thomas Wouters477c8d52006-05-27 19:21:47 +00001382 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001383 if (list == NULL) {
1384 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001385 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001386 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001387
1388 j = len;
1389 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001390
Thomas Wouters477c8d52006-05-27 19:21:47 +00001391 while ( (i >= 0) && (maxsplit-- > 0) ) {
1392 for (; i>=0; i--) {
1393 if (Py_STRING_MATCH(s, i, sub, n)) {
1394 SPLIT_ADD(s, i + n, j);
1395 j = i;
1396 i -= n;
1397 break;
1398 }
1399 }
1400 }
1401 SPLIT_ADD(s, 0, j);
1402 FIX_PREALLOC_SIZE(list);
1403 if (PyList_Reverse(list) < 0)
1404 goto onError;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001405 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001406 return list;
1407
Thomas Wouters477c8d52006-05-27 19:21:47 +00001408onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001409 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001410 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001411 return NULL;
1412}
1413
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001414#undef SPLIT_ADD
1415#undef MAX_PREALLOC
1416#undef PREALLOC_SIZE
1417
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001418
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001419PyDoc_STRVAR(join__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001420"B.join(iterable_of_bytes) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001421\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001422Concatenates any number of bytes objects, with B in between each pair.\n\
1423Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424
1425static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00001426string_join(PyObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001427{
1428 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001429 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001430 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001431 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001432 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001433 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001434 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001435 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436
Tim Peters19fe14e2001-01-19 03:03:47 +00001437 seq = PySequence_Fast(orig, "");
1438 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001439 return NULL;
1440 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001441
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001442 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001443 if (seqlen == 0) {
1444 Py_DECREF(seq);
1445 return PyString_FromString("");
1446 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001448 item = PySequence_Fast_GET_ITEM(seq, 0);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001449 if (PyString_CheckExact(item)) {
Raymond Hettinger674f2412004-08-23 23:23:54 +00001450 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001451 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001452 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001453 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001455
Raymond Hettinger674f2412004-08-23 23:23:54 +00001456 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001457 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001458 * Do a pre-pass to figure out the total amount of space we'll
Guido van Rossum98297ee2007-11-06 21:34:58 +00001459 * need (sz), and see whether all argument are bytes.
Tim Peters19fe14e2001-01-19 03:03:47 +00001460 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001461 /* XXX Shouldn't we use _getbuffer() on these items instead? */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001462 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001463 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001464 item = PySequence_Fast_GET_ITEM(seq, i);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001465 if (!PyString_Check(item) && !PyBytes_Check(item)) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001466 PyErr_Format(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001467 "sequence item %zd: expected bytes,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001468 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001469 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001470 Py_DECREF(seq);
1471 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001472 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001473 sz += Py_Size(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001474 if (i != 0)
1475 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001476 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001477 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001478 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001479 Py_DECREF(seq);
1480 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001482 }
1483
1484 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001485 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001486 if (res == NULL) {
1487 Py_DECREF(seq);
1488 return NULL;
1489 }
1490
1491 /* Catenate everything. */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001492 /* I'm not worried about a PyBytes item growing because there's
1493 nowhere in this function where we release the GIL. */
Tim Peters19fe14e2001-01-19 03:03:47 +00001494 p = PyString_AS_STRING(res);
1495 for (i = 0; i < seqlen; ++i) {
1496 size_t n;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001497 char *q;
1498 if (i) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001499 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001500 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001501 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001502 item = PySequence_Fast_GET_ITEM(seq, i);
1503 n = Py_Size(item);
1504 if (PyString_Check(item))
1505 q = PyString_AS_STRING(item);
1506 else
1507 q = PyBytes_AS_STRING(item);
1508 Py_MEMCPY(p, q, n);
1509 p += n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001510 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001511
Jeremy Hylton49048292000-07-11 03:28:17 +00001512 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001513 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001514}
1515
Tim Peters52e155e2001-06-16 05:42:57 +00001516PyObject *
1517_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001518{
Tim Petersa7259592001-06-16 05:11:17 +00001519 assert(sep != NULL && PyString_Check(sep));
1520 assert(x != NULL);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001521 return string_join(sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001522}
1523
Thomas Wouters477c8d52006-05-27 19:21:47 +00001524Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001525string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001526{
1527 if (*end > len)
1528 *end = len;
1529 else if (*end < 0)
1530 *end += len;
1531 if (*end < 0)
1532 *end = 0;
1533 if (*start < 0)
1534 *start += len;
1535 if (*start < 0)
1536 *start = 0;
1537}
1538
Thomas Wouters477c8d52006-05-27 19:21:47 +00001539Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001540string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001541{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001542 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001543 const char *sub;
1544 Py_ssize_t sub_len;
1545 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes9cd17752007-11-18 19:35:23 +00001546 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001547
Christian Heimes9cd17752007-11-18 19:35:23 +00001548 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1549 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001550 return -2;
Christian Heimes9cd17752007-11-18 19:35:23 +00001551 /* To support None in "start" and "end" arguments, meaning
1552 the same as if they were not passed.
1553 */
1554 if (obj_start != Py_None)
1555 if (!_PyEval_SliceIndex(obj_start, &start))
1556 return -2;
1557 if (obj_end != Py_None)
1558 if (!_PyEval_SliceIndex(obj_end, &end))
1559 return -2;
1560
Guido van Rossum4c08d552000-03-10 22:55:18 +00001561 if (PyString_Check(subobj)) {
1562 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001563 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001564 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001565 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001566 /* XXX - the "expected a character buffer object" is pretty
1567 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568 return -2;
1569
Thomas Wouters477c8d52006-05-27 19:21:47 +00001570 if (dir > 0)
1571 return stringlib_find_slice(
1572 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1573 sub, sub_len, start, end);
1574 else
1575 return stringlib_rfind_slice(
1576 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1577 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001578}
1579
1580
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001581PyDoc_STRVAR(find__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001582"B.find(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583\n\
1584Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001585such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586arguments start and end are interpreted as in slice notation.\n\
1587\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001588Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001589
1590static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001591string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001593 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001594 if (result == -2)
1595 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001596 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001597}
1598
1599
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001600PyDoc_STRVAR(index__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001601"B.index(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001603Like B.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604
1605static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001606string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001608 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609 if (result == -2)
1610 return NULL;
1611 if (result == -1) {
1612 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001613 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614 return NULL;
1615 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001616 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617}
1618
1619
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001620PyDoc_STRVAR(rfind__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001621"B.rfind(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001623Return the highest index in B where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001624such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625arguments start and end are interpreted as in slice notation.\n\
1626\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001627Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628
1629static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001630string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001632 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633 if (result == -2)
1634 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001635 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636}
1637
1638
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001639PyDoc_STRVAR(rindex__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001640"B.rindex(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001642Like B.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643
1644static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001645string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001646{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001647 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001648 if (result == -2)
1649 return NULL;
1650 if (result == -1) {
1651 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001652 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653 return NULL;
1654 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001655 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656}
1657
1658
Thomas Wouters477c8d52006-05-27 19:21:47 +00001659Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001660do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1661{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001662 Py_buffer vsep;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001663 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001664 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001665 char *sep;
1666 Py_ssize_t seplen;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001667 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001668
Guido van Rossum98297ee2007-11-06 21:34:58 +00001669 if (_getbuffer(sepobj, &vsep) < 0)
1670 return NULL;
1671 sep = vsep.buf;
1672 seplen = vsep.len;
1673
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001674 i = 0;
1675 if (striptype != RIGHTSTRIP) {
1676 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1677 i++;
1678 }
1679 }
1680
1681 j = len;
1682 if (striptype != LEFTSTRIP) {
1683 do {
1684 j--;
1685 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1686 j++;
1687 }
1688
Guido van Rossum98297ee2007-11-06 21:34:58 +00001689 PyObject_ReleaseBuffer(sepobj, &vsep);
1690
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001691 if (i == 0 && j == len && PyString_CheckExact(self)) {
1692 Py_INCREF(self);
1693 return (PyObject*)self;
1694 }
1695 else
1696 return PyString_FromStringAndSize(s+i, j-i);
1697}
1698
1699
Thomas Wouters477c8d52006-05-27 19:21:47 +00001700Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001701do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001702{
1703 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001704 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001706 i = 0;
1707 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001708 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001709 i++;
1710 }
1711 }
1712
1713 j = len;
1714 if (striptype != LEFTSTRIP) {
1715 do {
1716 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001717 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001718 j++;
1719 }
1720
Tim Peters8fa5dd02001-09-12 02:18:30 +00001721 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001722 Py_INCREF(self);
1723 return (PyObject*)self;
1724 }
1725 else
1726 return PyString_FromStringAndSize(s+i, j-i);
1727}
1728
1729
Thomas Wouters477c8d52006-05-27 19:21:47 +00001730Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001731do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1732{
1733 PyObject *sep = NULL;
1734
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001735 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001736 return NULL;
1737
1738 if (sep != NULL && sep != Py_None) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001739 return do_xstrip(self, striptype, sep);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001740 }
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001741 return do_strip(self, striptype);
1742}
1743
1744
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001745PyDoc_STRVAR(strip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001746"B.strip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001748Strip leading and trailing bytes contained in the argument.\n\
1749If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001751string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001753 if (PyTuple_GET_SIZE(args) == 0)
1754 return do_strip(self, BOTHSTRIP); /* Common case */
1755 else
1756 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757}
1758
1759
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001760PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001761"B.lstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001763Strip leading bytes contained in the argument.\n\
1764If the argument is omitted, strip leading ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001766string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001768 if (PyTuple_GET_SIZE(args) == 0)
1769 return do_strip(self, LEFTSTRIP); /* Common case */
1770 else
1771 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772}
1773
1774
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001775PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001776"B.rstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001778Strip trailing bytes contained in the argument.\n\
1779If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001781string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001783 if (PyTuple_GET_SIZE(args) == 0)
1784 return do_strip(self, RIGHTSTRIP); /* Common case */
1785 else
1786 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787}
1788
1789
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001790PyDoc_STRVAR(count__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001791"B.count(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001793Return the number of non-overlapping occurrences of substring sub in\n\
1794string S[start:end]. Optional arguments start and end are interpreted\n\
1795as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796
1797static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001798string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001800 PyObject *sub_obj;
1801 const char *str = PyString_AS_STRING(self), *sub;
1802 Py_ssize_t sub_len;
1803 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804
Thomas Wouters477c8d52006-05-27 19:21:47 +00001805 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1806 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001808
Thomas Wouters477c8d52006-05-27 19:21:47 +00001809 if (PyString_Check(sub_obj)) {
1810 sub = PyString_AS_STRING(sub_obj);
1811 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001812 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001813 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001814 return NULL;
1815
Thomas Wouters477c8d52006-05-27 19:21:47 +00001816 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001817
Thomas Wouters477c8d52006-05-27 19:21:47 +00001818 return PyInt_FromSsize_t(
1819 stringlib_count(str + start, end - start, sub, sub_len)
1820 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821}
1822
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001824PyDoc_STRVAR(translate__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001825"B.translate(table[, deletechars]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001827Return a copy of B, where all characters occurring in the\n\
1828optional argument deletechars are removed, and the remaining\n\
1829characters have been mapped through the given translation\n\
1830table, which must be a bytes object of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831
1832static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001833string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001835 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001836 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001837 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001838 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001839 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001840 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001841 PyObject *result;
1842 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001843 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00001845 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001846 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001848
1849 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00001850 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001851 tablen = PyString_GET_SIZE(tableobj);
1852 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001853 else if (tableobj == Py_None) {
1854 table = NULL;
1855 tablen = 256;
1856 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001857 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001859
Martin v. Löwis00b61272002-12-12 20:03:19 +00001860 if (tablen != 256) {
1861 PyErr_SetString(PyExc_ValueError,
1862 "translation table must be 256 characters long");
1863 return NULL;
1864 }
1865
Guido van Rossum4c08d552000-03-10 22:55:18 +00001866 if (delobj != NULL) {
1867 if (PyString_Check(delobj)) {
1868 del_table = PyString_AS_STRING(delobj);
1869 dellen = PyString_GET_SIZE(delobj);
1870 }
1871 else if (PyUnicode_Check(delobj)) {
1872 PyErr_SetString(PyExc_TypeError,
1873 "deletions are implemented differently for unicode");
1874 return NULL;
1875 }
1876 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1877 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001878 }
1879 else {
1880 del_table = NULL;
1881 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882 }
1883
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001884 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001885 result = PyString_FromStringAndSize((char *)NULL, inlen);
1886 if (result == NULL)
1887 return NULL;
1888 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001889 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001890
Guido van Rossumd8faa362007-04-27 19:54:29 +00001891 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892 /* If no deletions are required, use faster code */
1893 for (i = inlen; --i >= 0; ) {
1894 c = Py_CHARMASK(*input++);
1895 if (Py_CHARMASK((*output++ = table[c])) != c)
1896 changed = 1;
1897 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001898 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001899 return result;
1900 Py_DECREF(result);
1901 Py_INCREF(input_obj);
1902 return input_obj;
1903 }
1904
Guido van Rossumd8faa362007-04-27 19:54:29 +00001905 if (table == NULL) {
1906 for (i = 0; i < 256; i++)
1907 trans_table[i] = Py_CHARMASK(i);
1908 } else {
1909 for (i = 0; i < 256; i++)
1910 trans_table[i] = Py_CHARMASK(table[i]);
1911 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912
1913 for (i = 0; i < dellen; i++)
1914 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1915
1916 for (i = inlen; --i >= 0; ) {
1917 c = Py_CHARMASK(*input++);
1918 if (trans_table[c] != -1)
1919 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1920 continue;
1921 changed = 1;
1922 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001923 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924 Py_DECREF(result);
1925 Py_INCREF(input_obj);
1926 return input_obj;
1927 }
1928 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001929 if (inlen > 0)
1930 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931 return result;
1932}
1933
1934
Thomas Wouters477c8d52006-05-27 19:21:47 +00001935#define FORWARD 1
1936#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937
Thomas Wouters477c8d52006-05-27 19:21:47 +00001938/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939
Thomas Wouters477c8d52006-05-27 19:21:47 +00001940#define findchar(target, target_len, c) \
1941 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942
Thomas Wouters477c8d52006-05-27 19:21:47 +00001943/* String ops must return a string. */
1944/* If the object is subclass of string, create a copy */
1945Py_LOCAL(PyStringObject *)
1946return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001948 if (PyString_CheckExact(self)) {
1949 Py_INCREF(self);
1950 return self;
1951 }
1952 return (PyStringObject *)PyString_FromStringAndSize(
1953 PyString_AS_STRING(self),
1954 PyString_GET_SIZE(self));
1955}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956
Thomas Wouters477c8d52006-05-27 19:21:47 +00001957Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001958countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001959{
1960 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001961 const char *start=target;
1962 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963
Thomas Wouters477c8d52006-05-27 19:21:47 +00001964 while ( (start=findchar(start, end-start, c)) != NULL ) {
1965 count++;
1966 if (count >= maxcount)
1967 break;
1968 start += 1;
1969 }
1970 return count;
1971}
1972
1973Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001974findstring(const char *target, Py_ssize_t target_len,
1975 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00001976 Py_ssize_t start,
1977 Py_ssize_t end,
1978 int direction)
1979{
1980 if (start < 0) {
1981 start += target_len;
1982 if (start < 0)
1983 start = 0;
1984 }
1985 if (end > target_len) {
1986 end = target_len;
1987 } else if (end < 0) {
1988 end += target_len;
1989 if (end < 0)
1990 end = 0;
1991 }
1992
1993 /* zero-length substrings always match at the first attempt */
1994 if (pattern_len == 0)
1995 return (direction > 0) ? start : end;
1996
1997 end -= pattern_len;
1998
1999 if (direction < 0) {
2000 for (; end >= start; end--)
2001 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2002 return end;
2003 } else {
2004 for (; start <= end; start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002005 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
Thomas Wouters477c8d52006-05-27 19:21:47 +00002006 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007 }
2008 return -1;
2009}
2010
Thomas Wouters477c8d52006-05-27 19:21:47 +00002011Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002012countstring(const char *target, Py_ssize_t target_len,
2013 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002014 Py_ssize_t start,
2015 Py_ssize_t end,
2016 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002017{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002018 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019
Thomas Wouters477c8d52006-05-27 19:21:47 +00002020 if (start < 0) {
2021 start += target_len;
2022 if (start < 0)
2023 start = 0;
2024 }
2025 if (end > target_len) {
2026 end = target_len;
2027 } else if (end < 0) {
2028 end += target_len;
2029 if (end < 0)
2030 end = 0;
2031 }
2032
2033 /* zero-length substrings match everywhere */
2034 if (pattern_len == 0 || maxcount == 0) {
2035 if (target_len+1 < maxcount)
2036 return target_len+1;
2037 return maxcount;
2038 }
2039
2040 end -= pattern_len;
2041 if (direction < 0) {
2042 for (; (end >= start); end--)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002043 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002044 count++;
2045 if (--maxcount <= 0) break;
2046 end -= pattern_len-1;
2047 }
2048 } else {
2049 for (; (start <= end); start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002050 if (Py_STRING_MATCH(target, start,
2051 pattern, pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002052 count++;
2053 if (--maxcount <= 0)
2054 break;
2055 start += pattern_len-1;
2056 }
2057 }
2058 return count;
2059}
2060
2061
2062/* Algorithms for different cases of string replacement */
2063
2064/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2065Py_LOCAL(PyStringObject *)
2066replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002067 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002068 Py_ssize_t maxcount)
2069{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002070 char *self_s, *result_s;
2071 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002072 Py_ssize_t count, i, product;
2073 PyStringObject *result;
2074
2075 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002076
Thomas Wouters477c8d52006-05-27 19:21:47 +00002077 /* 1 at the end plus 1 after every character */
2078 count = self_len+1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002079 if (maxcount < count)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002080 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002081
Thomas Wouters477c8d52006-05-27 19:21:47 +00002082 /* Check for overflow */
2083 /* result_len = count * to_len + self_len; */
2084 product = count * to_len;
2085 if (product / to_len != count) {
2086 PyErr_SetString(PyExc_OverflowError,
2087 "replace string is too long");
2088 return NULL;
2089 }
2090 result_len = product + self_len;
2091 if (result_len < 0) {
2092 PyErr_SetString(PyExc_OverflowError,
2093 "replace string is too long");
2094 return NULL;
2095 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002096
Thomas Wouters477c8d52006-05-27 19:21:47 +00002097 if (! (result = (PyStringObject *)
2098 PyString_FromStringAndSize(NULL, result_len)) )
2099 return NULL;
2100
2101 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002102 result_s = PyString_AS_STRING(result);
2103
2104 /* TODO: special case single character, which doesn't need memcpy */
2105
2106 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002107 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002108 result_s += to_len;
2109 count -= 1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002110
Thomas Wouters477c8d52006-05-27 19:21:47 +00002111 for (i=0; i<count; i++) {
2112 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002113 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002114 result_s += to_len;
2115 }
2116
2117 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002118 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002119
2120 return result;
2121}
2122
2123/* Special case for deleting a single character */
2124/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2125Py_LOCAL(PyStringObject *)
2126replace_delete_single_character(PyStringObject *self,
2127 char from_c, Py_ssize_t maxcount)
2128{
2129 char *self_s, *result_s;
2130 char *start, *next, *end;
2131 Py_ssize_t self_len, result_len;
2132 Py_ssize_t count;
2133 PyStringObject *result;
2134
2135 self_len = PyString_GET_SIZE(self);
2136 self_s = PyString_AS_STRING(self);
2137
2138 count = countchar(self_s, self_len, from_c, maxcount);
2139 if (count == 0) {
2140 return return_self(self);
2141 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002142
Thomas Wouters477c8d52006-05-27 19:21:47 +00002143 result_len = self_len - count; /* from_len == 1 */
2144 assert(result_len>=0);
2145
2146 if ( (result = (PyStringObject *)
2147 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2148 return NULL;
2149 result_s = PyString_AS_STRING(result);
2150
2151 start = self_s;
2152 end = self_s + self_len;
2153 while (count-- > 0) {
2154 next = findchar(start, end-start, from_c);
2155 if (next == NULL)
2156 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002157 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002158 result_s += (next-start);
2159 start = next+1;
2160 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002161 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002162
Thomas Wouters477c8d52006-05-27 19:21:47 +00002163 return result;
2164}
2165
2166/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2167
2168Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002169replace_delete_substring(PyStringObject *self,
2170 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002171 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002172 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002173 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002174 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002175 Py_ssize_t count, offset;
2176 PyStringObject *result;
2177
2178 self_len = PyString_GET_SIZE(self);
2179 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002180
2181 count = countstring(self_s, self_len,
2182 from_s, from_len,
2183 0, self_len, 1,
2184 maxcount);
2185
2186 if (count == 0) {
2187 /* no matches */
2188 return return_self(self);
2189 }
2190
2191 result_len = self_len - (count * from_len);
2192 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002193
Thomas Wouters477c8d52006-05-27 19:21:47 +00002194 if ( (result = (PyStringObject *)
2195 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2196 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002197
Thomas Wouters477c8d52006-05-27 19:21:47 +00002198 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002199
Thomas Wouters477c8d52006-05-27 19:21:47 +00002200 start = self_s;
2201 end = self_s + self_len;
2202 while (count-- > 0) {
2203 offset = findstring(start, end-start,
2204 from_s, from_len,
2205 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002206 if (offset == -1)
2207 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002208 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002209
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002210 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002211
Thomas Wouters477c8d52006-05-27 19:21:47 +00002212 result_s += (next-start);
2213 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002215 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002216 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217}
2218
Thomas Wouters477c8d52006-05-27 19:21:47 +00002219/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2220Py_LOCAL(PyStringObject *)
2221replace_single_character_in_place(PyStringObject *self,
2222 char from_c, char to_c,
2223 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002225 char *self_s, *result_s, *start, *end, *next;
2226 Py_ssize_t self_len;
2227 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002228
Thomas Wouters477c8d52006-05-27 19:21:47 +00002229 /* The result string will be the same size */
2230 self_s = PyString_AS_STRING(self);
2231 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002232
Thomas Wouters477c8d52006-05-27 19:21:47 +00002233 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002234
Thomas Wouters477c8d52006-05-27 19:21:47 +00002235 if (next == NULL) {
2236 /* No matches; return the original string */
2237 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002238 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002239
Thomas Wouters477c8d52006-05-27 19:21:47 +00002240 /* Need to make a new string */
2241 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2242 if (result == NULL)
2243 return NULL;
2244 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002245 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002246
Thomas Wouters477c8d52006-05-27 19:21:47 +00002247 /* change everything in-place, starting with this one */
2248 start = result_s + (next-self_s);
2249 *start = to_c;
2250 start++;
2251 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002252
Thomas Wouters477c8d52006-05-27 19:21:47 +00002253 while (--maxcount > 0) {
2254 next = findchar(start, end-start, from_c);
2255 if (next == NULL)
2256 break;
2257 *next = to_c;
2258 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002259 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002260
Thomas Wouters477c8d52006-05-27 19:21:47 +00002261 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002262}
2263
Thomas Wouters477c8d52006-05-27 19:21:47 +00002264/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2265Py_LOCAL(PyStringObject *)
2266replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002267 const char *from_s, Py_ssize_t from_len,
2268 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002269 Py_ssize_t maxcount)
2270{
2271 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002272 char *self_s;
2273 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002274 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002275
Thomas Wouters477c8d52006-05-27 19:21:47 +00002276 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002277
Thomas Wouters477c8d52006-05-27 19:21:47 +00002278 self_s = PyString_AS_STRING(self);
2279 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002280
Thomas Wouters477c8d52006-05-27 19:21:47 +00002281 offset = findstring(self_s, self_len,
2282 from_s, from_len,
2283 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002284 if (offset == -1) {
2285 /* No matches; return the original string */
2286 return return_self(self);
2287 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002288
Thomas Wouters477c8d52006-05-27 19:21:47 +00002289 /* Need to make a new string */
2290 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2291 if (result == NULL)
2292 return NULL;
2293 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002294 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002295
Thomas Wouters477c8d52006-05-27 19:21:47 +00002296 /* change everything in-place, starting with this one */
2297 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002298 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002299 start += from_len;
2300 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002301
Thomas Wouters477c8d52006-05-27 19:21:47 +00002302 while ( --maxcount > 0) {
2303 offset = findstring(start, end-start,
2304 from_s, from_len,
2305 0, end-start, FORWARD);
2306 if (offset==-1)
2307 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002308 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002309 start += offset+from_len;
2310 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002311
Thomas Wouters477c8d52006-05-27 19:21:47 +00002312 return result;
2313}
2314
2315/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2316Py_LOCAL(PyStringObject *)
2317replace_single_character(PyStringObject *self,
2318 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002319 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002320 Py_ssize_t maxcount)
2321{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002322 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002323 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002324 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002325 Py_ssize_t count, product;
2326 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002327
Thomas Wouters477c8d52006-05-27 19:21:47 +00002328 self_s = PyString_AS_STRING(self);
2329 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002330
Thomas Wouters477c8d52006-05-27 19:21:47 +00002331 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002332 if (count == 0) {
2333 /* no matches, return unchanged */
2334 return return_self(self);
2335 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002336
Thomas Wouters477c8d52006-05-27 19:21:47 +00002337 /* use the difference between current and new, hence the "-1" */
2338 /* result_len = self_len + count * (to_len-1) */
2339 product = count * (to_len-1);
2340 if (product / (to_len-1) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002341 PyErr_SetString(PyExc_OverflowError,
2342 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002343 return NULL;
2344 }
2345 result_len = self_len + product;
2346 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002347 PyErr_SetString(PyExc_OverflowError,
2348 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002349 return NULL;
2350 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002351
Thomas Wouters477c8d52006-05-27 19:21:47 +00002352 if ( (result = (PyStringObject *)
2353 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2354 return NULL;
2355 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002356
Thomas Wouters477c8d52006-05-27 19:21:47 +00002357 start = self_s;
2358 end = self_s + self_len;
2359 while (count-- > 0) {
2360 next = findchar(start, end-start, from_c);
Guido van Rossumae404e22007-10-26 21:46:44 +00002361 if (next == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002362 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002363
Thomas Wouters477c8d52006-05-27 19:21:47 +00002364 if (next == start) {
2365 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002366 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002367 result_s += to_len;
2368 start += 1;
2369 } else {
2370 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002371 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002372 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002373 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002374 result_s += to_len;
2375 start = next+1;
2376 }
2377 }
2378 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002379 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002380
Thomas Wouters477c8d52006-05-27 19:21:47 +00002381 return result;
2382}
2383
2384/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2385Py_LOCAL(PyStringObject *)
2386replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002387 const char *from_s, Py_ssize_t from_len,
2388 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002389 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002390 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002391 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002392 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002393 Py_ssize_t count, offset, product;
2394 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002395
Thomas Wouters477c8d52006-05-27 19:21:47 +00002396 self_s = PyString_AS_STRING(self);
2397 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002398
Thomas Wouters477c8d52006-05-27 19:21:47 +00002399 count = countstring(self_s, self_len,
2400 from_s, from_len,
2401 0, self_len, FORWARD, maxcount);
2402 if (count == 0) {
2403 /* no matches, return unchanged */
2404 return return_self(self);
2405 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002406
Thomas Wouters477c8d52006-05-27 19:21:47 +00002407 /* Check for overflow */
2408 /* result_len = self_len + count * (to_len-from_len) */
2409 product = count * (to_len-from_len);
2410 if (product / (to_len-from_len) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002411 PyErr_SetString(PyExc_OverflowError,
2412 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002413 return NULL;
2414 }
2415 result_len = self_len + product;
2416 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002417 PyErr_SetString(PyExc_OverflowError,
2418 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002419 return NULL;
2420 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002421
Thomas Wouters477c8d52006-05-27 19:21:47 +00002422 if ( (result = (PyStringObject *)
2423 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2424 return NULL;
2425 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002426
Thomas Wouters477c8d52006-05-27 19:21:47 +00002427 start = self_s;
2428 end = self_s + self_len;
2429 while (count-- > 0) {
2430 offset = findstring(start, end-start,
2431 from_s, from_len,
2432 0, end-start, FORWARD);
2433 if (offset == -1)
2434 break;
2435 next = start+offset;
2436 if (next == start) {
2437 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002438 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002439 result_s += to_len;
2440 start += from_len;
2441 } else {
2442 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002443 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002444 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002445 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002446 result_s += to_len;
2447 start = next+from_len;
2448 }
2449 }
2450 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002451 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002452
Thomas Wouters477c8d52006-05-27 19:21:47 +00002453 return result;
2454}
2455
2456
2457Py_LOCAL(PyStringObject *)
2458replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002459 const char *from_s, Py_ssize_t from_len,
2460 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002461 Py_ssize_t maxcount)
2462{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002463 if (maxcount < 0) {
2464 maxcount = PY_SSIZE_T_MAX;
2465 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2466 /* nothing to do; return the original string */
2467 return return_self(self);
2468 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002469
Thomas Wouters477c8d52006-05-27 19:21:47 +00002470 if (maxcount == 0 ||
2471 (from_len == 0 && to_len == 0)) {
2472 /* nothing to do; return the original string */
2473 return return_self(self);
2474 }
2475
2476 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002477
Thomas Wouters477c8d52006-05-27 19:21:47 +00002478 if (from_len == 0) {
2479 /* insert the 'to' string everywhere. */
2480 /* >>> "Python".replace("", ".") */
2481 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002482 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002483 }
2484
2485 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2486 /* point for an empty self string to generate a non-empty string */
2487 /* Special case so the remaining code always gets a non-empty string */
2488 if (PyString_GET_SIZE(self) == 0) {
2489 return return_self(self);
2490 }
2491
2492 if (to_len == 0) {
2493 /* delete all occurances of 'from' string */
2494 if (from_len == 1) {
2495 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002496 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002497 } else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002498 return replace_delete_substring(self, from_s,
2499 from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002500 }
2501 }
2502
2503 /* Handle special case where both strings have the same length */
2504
2505 if (from_len == to_len) {
2506 if (from_len == 1) {
2507 return replace_single_character_in_place(
2508 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002509 from_s[0],
2510 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002511 maxcount);
2512 } else {
2513 return replace_substring_in_place(
Guido van Rossum98297ee2007-11-06 21:34:58 +00002514 self, from_s, from_len, to_s, to_len,
2515 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002516 }
2517 }
2518
2519 /* Otherwise use the more generic algorithms */
2520 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002521 return replace_single_character(self, from_s[0],
2522 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002523 } else {
2524 /* len('from')>=2, len('to')>=1 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002525 return replace_substring(self, from_s, from_len, to_s, to_len,
2526 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002527 }
2528}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002529
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002530PyDoc_STRVAR(replace__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002531"B.replace(old, new[, count]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002532\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002533Return a copy of B with all occurrences of subsection\n\
Fred Draked22bb652003-10-22 02:56:40 +00002534old replaced by new. If the optional argument count is\n\
2535given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002536
2537static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002538string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002539{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002540 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002541 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002542 const char *from_s, *to_s;
2543 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002544
Thomas Wouters477c8d52006-05-27 19:21:47 +00002545 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002546 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002547
Thomas Wouters477c8d52006-05-27 19:21:47 +00002548 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002549 from_s = PyString_AS_STRING(from);
2550 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002551 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002552 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002553 return NULL;
2554
Thomas Wouters477c8d52006-05-27 19:21:47 +00002555 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002556 to_s = PyString_AS_STRING(to);
2557 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002558 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002559 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002560 return NULL;
2561
Thomas Wouters477c8d52006-05-27 19:21:47 +00002562 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002563 from_s, from_len,
2564 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002565}
2566
Thomas Wouters477c8d52006-05-27 19:21:47 +00002567/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002568
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002569/* Matches the end (direction >= 0) or start (direction < 0) of self
2570 * against substr, using the start and end arguments. Returns
2571 * -1 on error, 0 if not found and 1 if found.
2572 */
2573Py_LOCAL(int)
2574_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2575 Py_ssize_t end, int direction)
2576{
2577 Py_ssize_t len = PyString_GET_SIZE(self);
2578 Py_ssize_t slen;
2579 const char* sub;
2580 const char* str;
2581
2582 if (PyString_Check(substr)) {
2583 sub = PyString_AS_STRING(substr);
2584 slen = PyString_GET_SIZE(substr);
2585 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002586 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2587 return -1;
2588 str = PyString_AS_STRING(self);
2589
2590 string_adjust_indices(&start, &end, len);
2591
2592 if (direction < 0) {
2593 /* startswith */
2594 if (start+slen > len)
2595 return 0;
2596 } else {
2597 /* endswith */
2598 if (end-start < slen || start > len)
2599 return 0;
2600
2601 if (end-slen > start)
2602 start = end - slen;
2603 }
2604 if (end-start >= slen)
2605 return ! memcmp(str+start, sub, slen);
2606 return 0;
2607}
2608
2609
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002610PyDoc_STRVAR(startswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002611"B.startswith(prefix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002612\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002613Return True if B starts with the specified prefix, False otherwise.\n\
2614With optional start, test B beginning at that position.\n\
2615With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002616prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002617
2618static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002619string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002620{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002621 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002622 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002623 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002624 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002625
Guido van Rossumc6821402000-05-08 14:08:05 +00002626 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2627 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002628 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002629 if (PyTuple_Check(subobj)) {
2630 Py_ssize_t i;
2631 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2632 result = _string_tailmatch(self,
2633 PyTuple_GET_ITEM(subobj, i),
2634 start, end, -1);
2635 if (result == -1)
2636 return NULL;
2637 else if (result) {
2638 Py_RETURN_TRUE;
2639 }
2640 }
2641 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002642 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002643 result = _string_tailmatch(self, subobj, start, end, -1);
2644 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002645 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002646 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002647 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002648}
2649
2650
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002651PyDoc_STRVAR(endswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002652"B.endswith(suffix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002653\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002654Return True if B ends with the specified suffix, False otherwise.\n\
2655With optional start, test B beginning at that position.\n\
2656With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002657suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002658
2659static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002660string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002661{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002662 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002663 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002664 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002665 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002666
Guido van Rossumc6821402000-05-08 14:08:05 +00002667 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2668 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002669 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002670 if (PyTuple_Check(subobj)) {
2671 Py_ssize_t i;
2672 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2673 result = _string_tailmatch(self,
2674 PyTuple_GET_ITEM(subobj, i),
2675 start, end, +1);
2676 if (result == -1)
2677 return NULL;
2678 else if (result) {
2679 Py_RETURN_TRUE;
2680 }
2681 }
2682 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002683 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002684 result = _string_tailmatch(self, subobj, start, end, +1);
2685 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002686 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002687 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002688 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002689}
2690
2691
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002692PyDoc_STRVAR(decode__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002693"B.decode([encoding[, errors]]) -> object\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002694\n\
2695Decodes S using the codec registered for encoding. encoding defaults\n\
2696to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002697handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2698a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002699as well as any other name registerd with codecs.register_error that is\n\
2700able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002701
2702static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00002703string_decode(PyObject *self, PyObject *args)
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002704{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002705 const char *encoding = NULL;
2706 const char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002707
Guido van Rossum98297ee2007-11-06 21:34:58 +00002708 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2709 return NULL;
2710 if (encoding == NULL)
2711 encoding = PyUnicode_GetDefaultEncoding();
2712 return PyCodec_Decode(self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002713}
2714
2715
Guido van Rossumae404e22007-10-26 21:46:44 +00002716PyDoc_STRVAR(fromhex_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002717"bytes.fromhex(string) -> bytes\n\
Guido van Rossumae404e22007-10-26 21:46:44 +00002718\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002719Create a bytes object from a string of hexadecimal numbers.\n\
2720Spaces between two numbers are accepted.\n\
2721Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Guido van Rossumae404e22007-10-26 21:46:44 +00002722
2723static int
2724hex_digit_to_int(Py_UNICODE c)
2725{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002726 if (c >= 128)
2727 return -1;
2728 if (ISDIGIT(c))
2729 return c - '0';
2730 else {
2731 if (ISUPPER(c))
2732 c = TOLOWER(c);
2733 if (c >= 'a' && c <= 'f')
2734 return c - 'a' + 10;
2735 }
2736 return -1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002737}
2738
2739static PyObject *
2740string_fromhex(PyObject *cls, PyObject *args)
2741{
2742 PyObject *newstring, *hexobj;
2743 char *buf;
2744 Py_UNICODE *hex;
2745 Py_ssize_t hexlen, byteslen, i, j;
2746 int top, bot;
2747
2748 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2749 return NULL;
2750 assert(PyUnicode_Check(hexobj));
2751 hexlen = PyUnicode_GET_SIZE(hexobj);
2752 hex = PyUnicode_AS_UNICODE(hexobj);
2753 byteslen = hexlen/2; /* This overestimates if there are spaces */
2754 newstring = PyString_FromStringAndSize(NULL, byteslen);
2755 if (!newstring)
2756 return NULL;
2757 buf = PyString_AS_STRING(newstring);
2758 for (i = j = 0; i < hexlen; i += 2) {
2759 /* skip over spaces in the input */
2760 while (hex[i] == ' ')
2761 i++;
2762 if (i >= hexlen)
2763 break;
2764 top = hex_digit_to_int(hex[i]);
2765 bot = hex_digit_to_int(hex[i+1]);
2766 if (top == -1 || bot == -1) {
2767 PyErr_Format(PyExc_ValueError,
2768 "non-hexadecimal number found in "
2769 "fromhex() arg at position %zd", i);
2770 goto error;
2771 }
2772 buf[j++] = (top << 4) + bot;
2773 }
2774 if (_PyString_Resize(&newstring, j) < 0)
2775 goto error;
2776 return newstring;
2777
2778 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00002779 Py_XDECREF(newstring);
Guido van Rossumae404e22007-10-26 21:46:44 +00002780 return NULL;
2781}
2782
2783
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002784static PyObject *
2785string_getnewargs(PyStringObject *v)
2786{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002787 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002788}
2789
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002790
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002791static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002792string_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00002793 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002794 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2795 _Py_capitalize__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002796 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002797 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002798 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002799 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002800 endswith__doc__},
2801 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2802 expandtabs__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002803 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002804 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2805 fromhex_doc},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002806 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002807 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2808 _Py_isalnum__doc__},
2809 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2810 _Py_isalpha__doc__},
2811 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2812 _Py_isdigit__doc__},
2813 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2814 _Py_islower__doc__},
2815 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2816 _Py_isspace__doc__},
2817 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2818 _Py_istitle__doc__},
2819 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2820 _Py_isupper__doc__},
2821 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2822 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2823 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002824 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002825 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002826 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2827 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2828 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002829 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00002830 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2831 rpartition__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002832 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2833 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2834 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2835 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2836 splitlines__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002837 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002838 startswith__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002839 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002840 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2841 _Py_swapcase__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002842 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002843 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2844 translate__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002845 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002846 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002847 {NULL, NULL} /* sentinel */
2848};
2849
Jeremy Hylton938ace62002-07-17 16:30:39 +00002850static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002851str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2852
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002853static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002854string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002855{
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002856 PyObject *x = NULL, *it;
2857 PyObject *(*iternext)(PyObject *);
2858 const char *encoding = NULL;
2859 const char *errors = NULL;
2860 PyObject *new = NULL;
2861 Py_ssize_t i, size;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002862 static char *kwlist[] = {"source", "encoding", "errors", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00002863
Guido van Rossumae960af2001-08-30 03:11:59 +00002864 if (type != &PyString_Type)
2865 return str_subtype_new(type, args, kwds);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002866 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002867 &encoding, &errors))
Tim Peters6d6c1a32001-08-02 04:15:00 +00002868 return NULL;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002869 if (x == NULL) {
2870 if (encoding != NULL || errors != NULL) {
2871 PyErr_SetString(PyExc_TypeError,
2872 "encoding or errors without sequence "
2873 "argument");
2874 return NULL;
2875 }
Tim Peters6d6c1a32001-08-02 04:15:00 +00002876 return PyString_FromString("");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002877 }
2878
2879 if (PyUnicode_Check(x)) {
2880 /* Encode via the codec registry */
2881 if (encoding == NULL) {
2882 PyErr_SetString(PyExc_TypeError,
2883 "string argument without an encoding");
2884 return NULL;
2885 }
2886 new = PyCodec_Encode(x, encoding, errors);
2887 if (new == NULL)
2888 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002889 assert(PyString_Check(new));
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002890 return new;
2891 }
2892
2893 /* If it's not unicode, there can't be encoding or errors */
2894 if (encoding != NULL || errors != NULL) {
2895 PyErr_SetString(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002896 "encoding or errors without a string argument");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002897 return NULL;
2898 }
2899
Guido van Rossum98297ee2007-11-06 21:34:58 +00002900 /* Is it an int? */
2901 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2902 if (size == -1 && PyErr_Occurred()) {
2903 PyErr_Clear();
2904 }
2905 else {
2906 if (size < 0) {
2907 PyErr_SetString(PyExc_ValueError, "negative count");
2908 return NULL;
2909 }
2910 new = PyString_FromStringAndSize(NULL, size);
2911 if (new == NULL) {
2912 return NULL;
2913 }
2914 if (size > 0) {
2915 memset(((PyStringObject*)new)->ob_sval, 0, size);
2916 }
2917 return new;
2918 }
2919
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002920 /* Use the modern buffer interface */
2921 if (PyObject_CheckBuffer(x)) {
2922 Py_buffer view;
2923 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2924 return NULL;
2925 new = PyString_FromStringAndSize(NULL, view.len);
2926 if (!new)
2927 goto fail;
2928 // XXX(brett.cannon): Better way to get to internal buffer?
2929 if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
2930 &view, view.len, 'C') < 0)
2931 goto fail;
2932 PyObject_ReleaseBuffer(x, &view);
2933 return new;
2934 fail:
2935 Py_XDECREF(new);
2936 PyObject_ReleaseBuffer(x, &view);
2937 return NULL;
2938 }
2939
Guido van Rossum98297ee2007-11-06 21:34:58 +00002940 /* For iterator version, create a string object and resize as needed */
2941 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2942 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2943 input being a truly long iterator. */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002944 size = 64;
2945 new = PyString_FromStringAndSize(NULL, size);
2946 if (new == NULL)
2947 return NULL;
2948
2949 /* XXX Optimize this if the arguments is a list, tuple */
2950
2951 /* Get the iterator */
2952 it = PyObject_GetIter(x);
2953 if (it == NULL)
2954 goto error;
2955 // XXX(brett.cannon): No API for this?
2956 iternext = *Py_Type(it)->tp_iternext;
2957
2958 /* Run the iterator to exhaustion */
2959 for (i = 0; ; i++) {
2960 PyObject *item;
2961 Py_ssize_t value;
2962
2963 /* Get the next item */
2964 item = iternext(it);
2965 if (item == NULL) {
2966 if (PyErr_Occurred()) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002967 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
2968 goto error;
2969 PyErr_Clear();
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002970 }
2971 break;
2972 }
2973
2974 /* Interpret it as an int (__index__) */
2975 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2976 Py_DECREF(item);
2977 if (value == -1 && PyErr_Occurred())
2978 goto error;
2979
2980 /* Range check */
2981 if (value < 0 || value >= 256) {
2982 PyErr_SetString(PyExc_ValueError,
2983 "bytes must be in range(0, 256)");
2984 goto error;
2985 }
2986
2987 /* Append the byte */
2988 if (i >= size) {
2989 size *= 2;
2990 if (_PyString_Resize(&new, size) < 0)
2991 goto error;
2992 }
2993 ((PyStringObject *)new)->ob_sval[i] = value;
2994 }
2995 _PyString_Resize(&new, i);
2996
2997 /* Clean up and return success */
2998 Py_DECREF(it);
2999 return new;
3000
3001 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00003002 /* Error handling when new != NULL */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003003 Py_XDECREF(it);
3004 Py_DECREF(new);
3005 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003006}
3007
Guido van Rossumae960af2001-08-30 03:11:59 +00003008static PyObject *
3009str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3010{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003011 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003012 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003013
3014 assert(PyType_IsSubtype(type, &PyString_Type));
3015 tmp = string_new(&PyString_Type, args, kwds);
3016 if (tmp == NULL)
3017 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003018 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003019 n = PyString_GET_SIZE(tmp);
3020 pnew = type->tp_alloc(type, n);
3021 if (pnew != NULL) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003022 Py_MEMCPY(PyString_AS_STRING(pnew),
3023 PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003024 ((PyStringObject *)pnew)->ob_shash =
3025 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003026 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003027 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003028 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003029}
3030
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003031PyDoc_STRVAR(string_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00003032"bytes(iterable_of_ints) -> bytes.\n\
3033bytes(string, encoding[, errors]) -> bytes\n\
3034bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
3035bytes(memory_view) -> bytes.\n\
Tim Peters6d6c1a32001-08-02 04:15:00 +00003036\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003037Construct an immutable array of bytes from:\n\
3038 - an iterable yielding integers in range(256)\n\
3039 - a text string encoded using the specified encoding\n\
3040 - a bytes or a buffer object\n\
3041 - any object implementing the buffer API.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003042
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003043static PyObject *str_iter(PyObject *seq);
3044
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003045PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003046 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum98297ee2007-11-06 21:34:58 +00003047 "bytes",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003048 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003049 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003050 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003051 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003052 0, /* tp_getattr */
3053 0, /* tp_setattr */
3054 0, /* tp_compare */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003055 (reprfunc)string_repr, /* tp_repr */
3056 0, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003057 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003058 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003059 (hashfunc)string_hash, /* tp_hash */
3060 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003061 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003062 PyObject_GenericGetAttr, /* tp_getattro */
3063 0, /* tp_setattro */
3064 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003065 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3066 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003067 string_doc, /* tp_doc */
3068 0, /* tp_traverse */
3069 0, /* tp_clear */
3070 (richcmpfunc)string_richcompare, /* tp_richcompare */
3071 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003072 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003073 0, /* tp_iternext */
3074 string_methods, /* tp_methods */
3075 0, /* tp_members */
3076 0, /* tp_getset */
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003077 &PyBaseObject_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003078 0, /* tp_dict */
3079 0, /* tp_descr_get */
3080 0, /* tp_descr_set */
3081 0, /* tp_dictoffset */
3082 0, /* tp_init */
3083 0, /* tp_alloc */
3084 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003085 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003086};
3087
3088void
Fred Drakeba096332000-07-09 07:04:36 +00003089PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003090{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003091 register PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003092 assert(pv != NULL);
Guido van Rossum013142a1994-08-30 08:19:36 +00003093 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003094 return;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003095 if (w == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003096 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003097 *pv = NULL;
3098 return;
3099 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00003100 v = string_concat(*pv, w);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003101 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003102 *pv = v;
3103}
3104
Guido van Rossum013142a1994-08-30 08:19:36 +00003105void
Fred Drakeba096332000-07-09 07:04:36 +00003106PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003107{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003108 PyString_Concat(pv, w);
3109 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003110}
3111
3112
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003113/* The following function breaks the notion that strings are immutable:
3114 it changes the size of a string. We get away with this only if there
3115 is only one module referencing the object. You can also think of it
3116 as creating a new string object and destroying the old one, only
3117 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003118 already be known to some other part of the code...
3119 Note that if there's not enough memory to resize the string, the original
3120 string object at *pv is deallocated, *pv is set to NULL, an "out of
3121 memory" exception is set, and -1 is returned. Else (on success) 0 is
3122 returned, and the value in *pv may or may not be the same as on input.
3123 As always, an extra byte is allocated for a trailing \0 byte (newsize
3124 does *not* include that), and a trailing \0 byte is stored.
3125*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003126
3127int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003128_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003129{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003130 register PyObject *v;
3131 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003132 v = *pv;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003133 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003134 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003135 Py_DECREF(v);
3136 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003137 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003138 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003139 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003140 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003141 _Py_ForgetReference(v);
3142 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003143 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003144 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003145 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003146 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003147 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003148 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003149 _Py_NewReference(*pv);
3150 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003151 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003152 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003153 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003154 return 0;
3155}
Guido van Rossume5372401993-03-16 12:15:04 +00003156
Tim Peters38fd5b62000-09-21 05:43:11 +00003157/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3158 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3159 * Python's regular ints.
3160 * Return value: a new PyString*, or NULL if error.
3161 * . *pbuf is set to point into it,
3162 * *plen set to the # of chars following that.
3163 * Caller must decref it when done using pbuf.
3164 * The string starting at *pbuf is of the form
3165 * "-"? ("0x" | "0X")? digit+
3166 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003167 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003168 * There will be at least prec digits, zero-filled on the left if
3169 * necessary to get that many.
3170 * val object to be converted
3171 * flags bitmask of format flags; only F_ALT is looked at
3172 * prec minimum number of digits; 0-fill on left if needed
3173 * type a character in [duoxX]; u acts the same as d
3174 *
3175 * CAUTION: o, x and X conversions on regular ints can never
3176 * produce a '-' sign, but can for Python's unbounded ints.
3177 */
3178PyObject*
3179_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3180 char **pbuf, int *plen)
3181{
3182 PyObject *result = NULL;
3183 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003184 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003185 int sign; /* 1 if '-', else 0 */
3186 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003187 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003188 int numdigits; /* len == numnondigits + numdigits */
3189 int numnondigits = 0;
3190
Guido van Rossumddefaf32007-01-14 03:31:43 +00003191 /* Avoid exceeding SSIZE_T_MAX */
3192 if (prec > PY_SSIZE_T_MAX-3) {
3193 PyErr_SetString(PyExc_OverflowError,
3194 "precision too large");
3195 return NULL;
3196 }
3197
Tim Peters38fd5b62000-09-21 05:43:11 +00003198 switch (type) {
3199 case 'd':
3200 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00003201 /* Special-case boolean: we want 0/1 */
3202 if (PyBool_Check(val))
3203 result = PyNumber_ToBase(val, 10);
3204 else
3205 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00003206 break;
3207 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003208 numnondigits = 2;
3209 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00003210 break;
3211 case 'x':
3212 case 'X':
3213 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003214 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00003215 break;
3216 default:
3217 assert(!"'type' not in [duoxX]");
3218 }
3219 if (!result)
3220 return NULL;
3221
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003222 buf = PyUnicode_AsString(result);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00003223 if (!buf) {
3224 Py_DECREF(result);
3225 return NULL;
3226 }
3227
Tim Peters38fd5b62000-09-21 05:43:11 +00003228 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003229 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003230 PyErr_BadInternalCall();
3231 return NULL;
3232 }
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003233 llen = PyUnicode_GetSize(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00003234 if (llen > INT_MAX) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003235 PyErr_SetString(PyExc_ValueError,
3236 "string too large in _PyString_FormatLong");
Martin v. Löwis725507b2006-03-07 12:08:51 +00003237 return NULL;
3238 }
3239 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003240 if (buf[len-1] == 'L') {
3241 --len;
3242 buf[len] = '\0';
3243 }
3244 sign = buf[0] == '-';
3245 numnondigits += sign;
3246 numdigits = len - numnondigits;
3247 assert(numdigits > 0);
3248
Tim Petersfff53252001-04-12 18:38:48 +00003249 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003250 if (((flags & F_ALT) == 0 &&
3251 (type == 'o' || type == 'x' || type == 'X'))) {
3252 assert(buf[sign] == '0');
3253 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
Guido van Rossum98297ee2007-11-06 21:34:58 +00003254 buf[sign+1] == 'o');
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003255 numnondigits -= 2;
3256 buf += 2;
3257 len -= 2;
3258 if (sign)
3259 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00003260 assert(len == numnondigits + numdigits);
3261 assert(numdigits > 0);
3262 }
3263
3264 /* Fill with leading zeroes to meet minimum width. */
3265 if (prec > numdigits) {
3266 PyObject *r1 = PyString_FromStringAndSize(NULL,
3267 numnondigits + prec);
3268 char *b1;
3269 if (!r1) {
3270 Py_DECREF(result);
3271 return NULL;
3272 }
3273 b1 = PyString_AS_STRING(r1);
3274 for (i = 0; i < numnondigits; ++i)
3275 *b1++ = *buf++;
3276 for (i = 0; i < prec - numdigits; i++)
3277 *b1++ = '0';
3278 for (i = 0; i < numdigits; i++)
3279 *b1++ = *buf++;
3280 *b1 = '\0';
3281 Py_DECREF(result);
3282 result = r1;
3283 buf = PyString_AS_STRING(result);
3284 len = numnondigits + prec;
3285 }
3286
3287 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003288 if (type == 'X') {
3289 /* Need to convert all lower case letters to upper case.
3290 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003291 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003292 if (buf[i] >= 'a' && buf[i] <= 'x')
3293 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003294 }
3295 *pbuf = buf;
3296 *plen = len;
3297 return result;
3298}
3299
Guido van Rossum8cf04761997-08-02 02:57:45 +00003300void
Fred Drakeba096332000-07-09 07:04:36 +00003301PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003302{
3303 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003304 for (i = 0; i < UCHAR_MAX + 1; i++) {
3305 Py_XDECREF(characters[i]);
3306 characters[i] = NULL;
3307 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003308 Py_XDECREF(nullstring);
3309 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003310}
Barry Warsawa903ad982001-02-23 16:40:48 +00003311
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003312/*********************** Str Iterator ****************************/
3313
3314typedef struct {
3315 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00003316 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003317 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
3318} striterobject;
3319
3320static void
3321striter_dealloc(striterobject *it)
3322{
3323 _PyObject_GC_UNTRACK(it);
3324 Py_XDECREF(it->it_seq);
3325 PyObject_GC_Del(it);
3326}
3327
3328static int
3329striter_traverse(striterobject *it, visitproc visit, void *arg)
3330{
3331 Py_VISIT(it->it_seq);
3332 return 0;
3333}
3334
3335static PyObject *
3336striter_next(striterobject *it)
3337{
3338 PyStringObject *seq;
3339 PyObject *item;
3340
3341 assert(it != NULL);
3342 seq = it->it_seq;
3343 if (seq == NULL)
3344 return NULL;
3345 assert(PyString_Check(seq));
3346
3347 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +00003348 item = PyInt_FromLong(
3349 (unsigned char)seq->ob_sval[it->it_index]);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003350 if (item != NULL)
3351 ++it->it_index;
3352 return item;
3353 }
3354
3355 Py_DECREF(seq);
3356 it->it_seq = NULL;
3357 return NULL;
3358}
3359
3360static PyObject *
3361striter_len(striterobject *it)
3362{
3363 Py_ssize_t len = 0;
3364 if (it->it_seq)
3365 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
3366 return PyInt_FromSsize_t(len);
3367}
3368
Guido van Rossum49d6b072006-08-17 21:11:47 +00003369PyDoc_STRVAR(length_hint_doc,
3370 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003371
3372static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00003373 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3374 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003375 {NULL, NULL} /* sentinel */
3376};
3377
3378PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003379 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum254348e2007-11-21 19:29:53 +00003380 "bytes_iterator", /* tp_name */
Guido van Rossum49d6b072006-08-17 21:11:47 +00003381 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003382 0, /* tp_itemsize */
3383 /* methods */
3384 (destructor)striter_dealloc, /* tp_dealloc */
3385 0, /* tp_print */
3386 0, /* tp_getattr */
3387 0, /* tp_setattr */
3388 0, /* tp_compare */
3389 0, /* tp_repr */
3390 0, /* tp_as_number */
3391 0, /* tp_as_sequence */
3392 0, /* tp_as_mapping */
3393 0, /* tp_hash */
3394 0, /* tp_call */
3395 0, /* tp_str */
3396 PyObject_GenericGetAttr, /* tp_getattro */
3397 0, /* tp_setattro */
3398 0, /* tp_as_buffer */
3399 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3400 0, /* tp_doc */
3401 (traverseproc)striter_traverse, /* tp_traverse */
3402 0, /* tp_clear */
3403 0, /* tp_richcompare */
3404 0, /* tp_weaklistoffset */
3405 PyObject_SelfIter, /* tp_iter */
3406 (iternextfunc)striter_next, /* tp_iternext */
3407 striter_methods, /* tp_methods */
3408 0,
3409};
3410
3411static PyObject *
3412str_iter(PyObject *seq)
3413{
3414 striterobject *it;
3415
3416 if (!PyString_Check(seq)) {
3417 PyErr_BadInternalCall();
3418 return NULL;
3419 }
3420 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
3421 if (it == NULL)
3422 return NULL;
3423 it->it_index = 0;
3424 Py_INCREF(seq);
3425 it->it_seq = (PyStringObject *)seq;
3426 _PyObject_GC_TRACK(it);
3427 return (PyObject *)it;
3428}