blob: 8761477eec63d38347c7598e60023a856a86a7ce [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossum98297ee2007-11-06 21:34:58 +00003/* XXX This is now called 'bytes' as far as the user is concerned.
4 Many docstrings and error messages need to be cleaned up. */
5
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00006#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00008#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000011
Guido van Rossum98297ee2007-11-06 21:34:58 +000012static Py_ssize_t
13_getbuffer(PyObject *obj, Py_buffer *view)
14{
15 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
16
17 if (buffer == NULL || buffer->bf_getbuffer == NULL)
18 {
19 PyErr_Format(PyExc_TypeError,
20 "Type %.100s doesn't support the buffer API",
21 Py_Type(obj)->tp_name);
22 return -1;
23 }
24
25 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
26 return -1;
27 return view->len;
28}
29
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000030#ifdef COUNT_ALLOCS
31int null_strings, one_strings;
32#endif
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000035static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036
37/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000038 For both PyString_FromString() and PyString_FromStringAndSize(), the
39 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000040 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000041
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000042 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000043 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 For PyString_FromStringAndSize(), the parameter the parameter `str' is
46 either NULL or else points to a string containing at least `size' bytes.
47 For PyString_FromStringAndSize(), the string in the `str' parameter does
48 not have to be null-terminated. (Therefore it is safe to construct a
49 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
50 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
51 bytes (setting the last byte to the null terminating character) and you can
52 fill in the data yourself. If `str' is non-NULL then the resulting
53 PyString object must be treated as immutable and you must not fill in nor
54 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000055
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000056 The PyObject member `op->ob_size', which denotes the number of "extra
57 items" in a variable-size object, will contain the number of bytes
58 allocated for string data, not counting the null terminating character. It
59 is therefore equal to the equal to the `size' parameter (for
60 PyString_FromStringAndSize()) or the length of the string in the `str'
61 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000064PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065{
Tim Peters9e897f42001-05-09 07:37:07 +000066 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000067 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 if (size == 0 && (op = nullstring) != NULL) {
69#ifdef COUNT_ALLOCS
70 null_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 if (size == 1 && str != NULL &&
76 (op = characters[*str & UCHAR_MAX]) != NULL)
77 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078#ifdef COUNT_ALLOCS
79 one_strings++;
80#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 Py_INCREF(op);
82 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000084
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000085 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000086 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000089 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 op->ob_shash = -1;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000091 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000092 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000094 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 if (size == 0) {
96 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 } else if (size == 1 && str != NULL) {
99 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103}
104
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000106PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Tim Peters62de65b2001-12-06 20:29:32 +0000108 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000109 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000110
111 assert(str != NULL);
112 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000113 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000114 PyErr_SetString(PyExc_OverflowError,
115 "string is too long for a Python string");
116 return NULL;
117 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 if (size == 0 && (op = nullstring) != NULL) {
119#ifdef COUNT_ALLOCS
120 null_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
125 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
126#ifdef COUNT_ALLOCS
127 one_strings++;
128#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
130 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000133 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000134 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000140 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
145 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000149}
150
Barry Warsawdadace02001-08-24 18:32:06 +0000151PyObject *
152PyString_FromFormatV(const char *format, va_list vargs)
153{
Tim Petersc15c4f12001-10-02 21:32:07 +0000154 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000155 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000156 const char* f;
157 char *s;
158 PyObject* string;
159
Tim Petersc15c4f12001-10-02 21:32:07 +0000160#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000161 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#else
Martin v. Löwis75d2d94e2002-07-28 10:23:27 +0000163#ifdef __va_copy
164 __va_copy(count, vargs);
165#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000166 count = vargs;
167#endif
Martin v. Löwis75d2d94e2002-07-28 10:23:27 +0000168#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000169 /* step 1: figure out how large a buffer we need */
170 for (f = format; *f; f++) {
171 if (*f == '%') {
172 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000173 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000174 ;
175
Thomas Wouters477c8d52006-05-27 19:21:47 +0000176 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
177 * they don't affect the amount of space we reserve.
178 */
179 if ((*f == 'l' || *f == 'z') &&
180 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000181 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000182
Barry Warsawdadace02001-08-24 18:32:06 +0000183 switch (*f) {
184 case 'c':
185 (void)va_arg(count, int);
186 /* fall through... */
187 case '%':
188 n++;
189 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000191 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000192 /* 20 bytes is enough to hold a 64-bit
193 integer. Decimal takes the most space.
194 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000195 n += 20;
196 break;
197 case 's':
198 s = va_arg(count, char*);
199 n += strlen(s);
200 break;
201 case 'p':
202 (void) va_arg(count, int);
203 /* maximum 64-bit pointer representation:
204 * 0xffffffffffffffff
205 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000206 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000207 */
208 n += 19;
209 break;
210 default:
211 /* if we stumble upon an unknown
212 formatting code, copy the rest of
213 the format string to the output
214 string. (we cannot just skip the
215 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000216 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000217 n += strlen(p);
218 goto expand;
219 }
220 } else
221 n++;
222 }
223 expand:
224 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000225 /* Since we've analyzed how much space we need for the worst case,
226 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000227 string = PyString_FromStringAndSize(NULL, n);
228 if (!string)
229 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000230
Barry Warsawdadace02001-08-24 18:32:06 +0000231 s = PyString_AsString(string);
232
233 for (f = format; *f; f++) {
234 if (*f == '%') {
235 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000236 Py_ssize_t i;
237 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000238 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000239 /* parse the width.precision part (we're only
240 interested in the precision value, if any) */
241 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000242 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000243 n = (n*10) + *f++ - '0';
244 if (*f == '.') {
245 f++;
246 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000247 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000248 n = (n*10) + *f++ - '0';
249 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000250 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000251 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000252 /* handle the long flag, but only for %ld and %lu.
253 others can be added when necessary. */
254 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000255 longflag = 1;
256 ++f;
257 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000258 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000259 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000260 size_tflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000271 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000272 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
273 va_arg(vargs, Py_ssize_t));
274 else
275 sprintf(s, "%d", va_arg(vargs, int));
276 s += strlen(s);
277 break;
278 case 'u':
279 if (longflag)
280 sprintf(s, "%lu",
281 va_arg(vargs, unsigned long));
282 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000283 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
284 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000285 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000286 sprintf(s, "%u",
287 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 s += strlen(s);
289 break;
290 case 'i':
291 sprintf(s, "%i", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'x':
295 sprintf(s, "%x", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 's':
299 p = va_arg(vargs, char*);
300 i = strlen(p);
301 if (n > 0 && i > n)
302 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000303 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000304 s += i;
305 break;
306 case 'p':
307 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000308 /* %p is ill-defined: ensure leading 0x. */
309 if (s[1] == 'X')
310 s[1] = 'x';
311 else if (s[1] != 'x') {
312 memmove(s+2, s, strlen(s)+1);
313 s[0] = '0';
314 s[1] = 'x';
315 }
Barry Warsawdadace02001-08-24 18:32:06 +0000316 s += strlen(s);
317 break;
318 case '%':
319 *s++ = '%';
320 break;
321 default:
322 strcpy(s, p);
323 s += strlen(s);
324 goto end;
325 }
326 } else
327 *s++ = *f;
328 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000329
Barry Warsawdadace02001-08-24 18:32:06 +0000330 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000332 return string;
333}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000334
Barry Warsawdadace02001-08-24 18:32:06 +0000335PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000336PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000337{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000338 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000339 va_list vargs;
340
341#ifdef HAVE_STDARG_PROTOTYPES
342 va_start(vargs, format);
343#else
344 va_start(vargs);
345#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 ret = PyString_FromFormatV(format, vargs);
347 va_end(vargs);
348 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000349}
350
Guido van Rossum234f9421993-06-17 12:35:49 +0000351static void
Fred Drakeba096332000-07-09 07:04:36 +0000352string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000353{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000354 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000355}
356
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000357/* Unescape a backslash-escaped string. If unicode is non-zero,
358 the string is a u-literal. If recode_encoding is non-zero,
359 the string is UTF-8 encoded and should be re-encoded in the
360 specified encoding. */
361
362PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000363 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000364 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000365 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000366 const char *recode_encoding)
367{
368 int c;
369 char *p, *buf;
370 const char *end;
371 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000372 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000373 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000374 if (v == NULL)
375 return NULL;
376 p = buf = PyString_AsString(v);
377 end = s + len;
378 while (s < end) {
379 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000380 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000381 if (recode_encoding && (*s & 0x80)) {
382 PyObject *u, *w;
383 char *r;
384 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000385 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000386 t = s;
387 /* Decode non-ASCII bytes as UTF-8. */
388 while (t < end && (*t & 0x80)) t++;
389 u = PyUnicode_DecodeUTF8(s, t - s, errors);
390 if(!u) goto failed;
391
392 /* Recode them in target encoding. */
393 w = PyUnicode_AsEncodedString(
394 u, recode_encoding, errors);
395 Py_DECREF(u);
396 if (!w) goto failed;
397
398 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000399 assert(PyString_Check(w));
400 r = PyString_AS_STRING(w);
401 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000402 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000403 p += rn;
404 Py_DECREF(w);
405 s = t;
406 } else {
407 *p++ = *s++;
408 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000409 continue;
410 }
411 s++;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000412 if (s==end) {
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000413 PyErr_SetString(PyExc_ValueError,
414 "Trailing \\ in string");
415 goto failed;
416 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000417 switch (*s++) {
418 /* XXX This assumes ASCII! */
419 case '\n': break;
420 case '\\': *p++ = '\\'; break;
421 case '\'': *p++ = '\''; break;
422 case '\"': *p++ = '\"'; break;
423 case 'b': *p++ = '\b'; break;
424 case 'f': *p++ = '\014'; break; /* FF */
425 case 't': *p++ = '\t'; break;
426 case 'n': *p++ = '\n'; break;
427 case 'r': *p++ = '\r'; break;
428 case 'v': *p++ = '\013'; break; /* VT */
429 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
430 case '0': case '1': case '2': case '3':
431 case '4': case '5': case '6': case '7':
432 c = s[-1] - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000433 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000434 c = (c<<3) + *s++ - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000435 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000436 c = (c<<3) + *s++ - '0';
437 }
438 *p++ = c;
439 break;
440 case 'x':
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000441 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000442 unsigned int x = 0;
443 c = Py_CHARMASK(*s);
444 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000445 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000446 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000447 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000448 x = 10 + c - 'a';
449 else
450 x = 10 + c - 'A';
451 x = x << 4;
452 c = Py_CHARMASK(*s);
453 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000454 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000455 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000456 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000457 x += 10 + c - 'a';
458 else
459 x += 10 + c - 'A';
460 *p++ = x;
461 break;
462 }
463 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000464 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000465 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000466 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000467 }
468 if (strcmp(errors, "replace") == 0) {
469 *p++ = '?';
470 } else if (strcmp(errors, "ignore") == 0)
471 /* do nothing */;
472 else {
473 PyErr_Format(PyExc_ValueError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000474 "decoding error; unknown "
475 "error handling code: %.400s",
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000476 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000477 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000478 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000479 default:
480 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000481 s--;
482 goto non_esc; /* an arbitry number of unescaped
483 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000484 }
485 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000486 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000487 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000488 return v;
489 failed:
490 Py_DECREF(v);
491 return NULL;
492}
493
Thomas Wouters477c8d52006-05-27 19:21:47 +0000494/* -------------------------------------------------------------------- */
495/* object api */
496
Martin v. Löwis18e16552006-02-15 17:27:45 +0000497static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000498string_getsize(register PyObject *op)
499{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000500 char *s;
501 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000502 if (PyString_AsStringAndSize(op, &s, &len))
503 return -1;
504 return len;
505}
506
507static /*const*/ char *
508string_getbuffer(register PyObject *op)
509{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000510 char *s;
511 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000512 if (PyString_AsStringAndSize(op, &s, &len))
513 return NULL;
514 return s;
515}
516
Martin v. Löwis18e16552006-02-15 17:27:45 +0000517Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000518PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000520 if (PyUnicode_Check(op)) {
521 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
522 if (!op)
523 return -1;
524 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000525 if (!PyString_Check(op))
526 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000527 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000528}
529
530/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000531PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000532{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000533 if (PyUnicode_Check(op)) {
534 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
535 if (!op)
536 return NULL;
537 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000538 if (!PyString_Check(op))
539 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000540 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000541}
542
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000543int
544PyString_AsStringAndSize(register PyObject *obj,
545 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000547{
548 if (s == NULL) {
549 PyErr_BadInternalCall();
550 return -1;
551 }
552
553 if (!PyString_Check(obj)) {
554 if (PyUnicode_Check(obj)) {
555 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
556 if (obj == NULL)
557 return -1;
558 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000559 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000560 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000561 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000562 "expected string, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000563 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000564 return -1;
565 }
566 }
567
568 *s = PyString_AS_STRING(obj);
569 if (len != NULL)
570 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000571 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000572 PyErr_SetString(PyExc_TypeError,
573 "expected string without null bytes");
574 return -1;
575 }
576 return 0;
577}
578
Thomas Wouters477c8d52006-05-27 19:21:47 +0000579/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000580/* Methods */
581
Thomas Wouters477c8d52006-05-27 19:21:47 +0000582#define STRINGLIB_CHAR char
583
584#define STRINGLIB_CMP memcmp
585#define STRINGLIB_LEN PyString_GET_SIZE
586#define STRINGLIB_NEW PyString_FromStringAndSize
587#define STRINGLIB_STR PyString_AS_STRING
Guido van Rossum98297ee2007-11-06 21:34:58 +0000588/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000589
590#define STRINGLIB_EMPTY nullstring
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000591#define STRINGLIB_CHECK_EXACT PyString_CheckExact
592#define STRINGLIB_MUTABLE 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000593
594#include "stringlib/fastsearch.h"
595
596#include "stringlib/count.h"
597#include "stringlib/find.h"
598#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000599#include "stringlib/ctype.h"
600#include "stringlib/transmogrify.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000601
602
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603PyObject *
604PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000605{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000606 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000607 register PyStringObject* op = (PyStringObject*) obj;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000608 Py_ssize_t length = Py_Size(op);
609 size_t newsize = 3 + 4 * length;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000610 PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000611 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000612 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000613 "bytes object is too large to make repr");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000614 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000615 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000616 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000617 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000618 }
619 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000620 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000621 register Py_UNICODE c;
622 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000623 int quote;
624
Guido van Rossum98297ee2007-11-06 21:34:58 +0000625 /* Figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000626 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000627 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000628 char *test, *start;
629 start = PyString_AS_STRING(op);
630 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000631 if (*test == '"') {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000632 quote = '\''; /* back to single */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000633 goto decided;
634 }
635 else if (*test == '\'')
636 quote = '"';
637 }
638 decided:
639 ;
640 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000641
Guido van Rossum98297ee2007-11-06 21:34:58 +0000642 *p++ = 'b', *p++ = quote;
643 for (i = 0; i < length; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000644 /* There's at least enough room for a hex escape
645 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000646 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000647 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000648 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000649 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000650 else if (c == '\t')
651 *p++ = '\\', *p++ = 't';
652 else if (c == '\n')
653 *p++ = '\\', *p++ = 'n';
654 else if (c == '\r')
655 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000656 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000657 *p++ = '\\';
658 *p++ = 'x';
659 *p++ = hexdigits[(c & 0xf0) >> 4];
660 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000661 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000662 else
663 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000664 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000665 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000666 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000667 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000668 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
669 Py_DECREF(v);
670 return NULL;
671 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000672 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000673 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000674}
675
Guido van Rossum189f1df2001-05-01 16:51:53 +0000676static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677string_repr(PyObject *op)
678{
679 return PyString_Repr(op, 1);
680}
681
682static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000683string_str(PyObject *op)
Guido van Rossum189f1df2001-05-01 16:51:53 +0000684{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000685 if (Py_BytesWarningFlag) {
686 if (PyErr_WarnEx(PyExc_BytesWarning,
687 "str() on a bytes instance", 1))
688 return NULL;
Tim Petersc9933152001-10-16 20:18:24 +0000689 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000690 return string_repr(op);
Guido van Rossum189f1df2001-05-01 16:51:53 +0000691}
692
Martin v. Löwis18e16552006-02-15 17:27:45 +0000693static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000694string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000696 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697}
698
Guido van Rossum98297ee2007-11-06 21:34:58 +0000699/* This is also used by PyString_Concat() */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000700static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000701string_concat(PyObject *a, PyObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000702{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000703 Py_ssize_t size;
704 Py_buffer va, vb;
705 PyObject *result = NULL;
706
707 va.len = -1;
708 vb.len = -1;
709 if (_getbuffer(a, &va) < 0 ||
710 _getbuffer(b, &vb) < 0) {
711 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
712 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
713 goto done;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000714 }
Guido van Rossumae404e22007-10-26 21:46:44 +0000715
Guido van Rossum98297ee2007-11-06 21:34:58 +0000716 /* Optimize end cases */
717 if (va.len == 0 && PyString_CheckExact(b)) {
718 result = b;
719 Py_INCREF(result);
720 goto done;
721 }
722 if (vb.len == 0 && PyString_CheckExact(a)) {
723 result = a;
724 Py_INCREF(result);
725 goto done;
726 }
727
728 size = va.len + vb.len;
729 if (size < 0) {
730 PyErr_NoMemory();
731 goto done;
732 }
733
734 result = PyString_FromStringAndSize(NULL, size);
735 if (result != NULL) {
736 memcpy(PyString_AS_STRING(result), va.buf, va.len);
737 memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
738 }
739
740 done:
741 if (va.len != -1)
742 PyObject_ReleaseBuffer(a, &va);
743 if (vb.len != -1)
744 PyObject_ReleaseBuffer(b, &vb);
745 return result;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000746}
747
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000748static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000749string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000750{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000751 register Py_ssize_t i;
752 register Py_ssize_t j;
753 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000754 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000755 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000756 if (n < 0)
757 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000758 /* watch out for overflows: the size can overflow int,
759 * and the # of bytes needed can overflow size_t
760 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000761 size = Py_Size(a) * n;
762 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000763 PyErr_SetString(PyExc_OverflowError,
764 "repeated string is too long");
765 return NULL;
766 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000767 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000768 Py_INCREF(a);
769 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000770 }
Tim Peterse7c05322004-06-27 17:24:49 +0000771 nbytes = (size_t)size;
772 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000773 PyErr_SetString(PyExc_OverflowError,
774 "repeated string is too long");
775 return NULL;
776 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000777 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000778 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000779 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000780 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000781 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000782 op->ob_shash = -1;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000783 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000784 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000785 memset(op->ob_sval, a->ob_sval[0] , n);
786 return (PyObject *) op;
787 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000788 i = 0;
789 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000790 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
791 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000792 }
793 while (i < size) {
794 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000795 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000796 i += j;
797 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000798 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799}
800
Guido van Rossum9284a572000-03-07 15:53:43 +0000801static int
Guido van Rossum98297ee2007-11-06 21:34:58 +0000802string_contains(PyObject *self, PyObject *arg)
Guido van Rossum9284a572000-03-07 15:53:43 +0000803{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000804 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
805 if (ival == -1 && PyErr_Occurred()) {
806 Py_buffer varg;
807 int pos;
808 PyErr_Clear();
809 if (_getbuffer(arg, &varg) < 0)
810 return -1;
811 pos = stringlib_find(PyString_AS_STRING(self), Py_Size(self),
812 varg.buf, varg.len, 0);
813 PyObject_ReleaseBuffer(arg, &varg);
814 return pos >= 0;
815 }
816 if (ival < 0 || ival >= 256) {
817 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
818 return -1;
819 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000820
Guido van Rossum98297ee2007-11-06 21:34:58 +0000821 return memchr(PyString_AS_STRING(self), ival, Py_Size(self)) != NULL;
822}
823
824static PyObject *
825string_item(PyStringObject *a, register Py_ssize_t i)
826{
827 if (i < 0 || i >= Py_Size(a)) {
828 PyErr_SetString(PyExc_IndexError, "string index out of range");
829 return NULL;
830 }
831 return PyInt_FromLong((unsigned char)a->ob_sval[i]);
Guido van Rossum9284a572000-03-07 15:53:43 +0000832}
833
Martin v. Löwiscd353062001-05-24 16:56:35 +0000834static PyObject*
835string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000837 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000838 Py_ssize_t len_a, len_b;
839 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +0000840 PyObject *result;
841
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000842 /* Make sure both arguments are strings. */
843 if (!(PyString_Check(a) && PyString_Check(b))) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000844 if (Py_BytesWarningFlag && (op == Py_EQ) &&
845 (PyObject_IsInstance((PyObject*)a,
846 (PyObject*)&PyUnicode_Type) ||
847 PyObject_IsInstance((PyObject*)b,
848 (PyObject*)&PyUnicode_Type))) {
849 if (PyErr_WarnEx(PyExc_BytesWarning,
850 "Comparsion between bytes and string", 1))
851 return NULL;
852 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000853 result = Py_NotImplemented;
854 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000855 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000856 if (a == b) {
857 switch (op) {
858 case Py_EQ:case Py_LE:case Py_GE:
859 result = Py_True;
860 goto out;
861 case Py_NE:case Py_LT:case Py_GT:
862 result = Py_False;
863 goto out;
864 }
865 }
866 if (op == Py_EQ) {
867 /* Supporting Py_NE here as well does not save
868 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000869 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +0000870 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000871 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000872 result = Py_True;
873 } else {
874 result = Py_False;
875 }
876 goto out;
877 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000878 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +0000879 min_len = (len_a < len_b) ? len_a : len_b;
880 if (min_len > 0) {
881 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
882 if (c==0)
883 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +0000884 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +0000885 c = 0;
886 if (c == 0)
887 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
888 switch (op) {
889 case Py_LT: c = c < 0; break;
890 case Py_LE: c = c <= 0; break;
891 case Py_EQ: assert(0); break; /* unreachable */
892 case Py_NE: c = c != 0; break;
893 case Py_GT: c = c > 0; break;
894 case Py_GE: c = c >= 0; break;
895 default:
896 result = Py_NotImplemented;
897 goto out;
898 }
899 result = c ? Py_True : Py_False;
900 out:
901 Py_INCREF(result);
902 return result;
903}
904
905int
906_PyString_Eq(PyObject *o1, PyObject *o2)
907{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000908 PyStringObject *a = (PyStringObject*) o1;
909 PyStringObject *b = (PyStringObject*) o2;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000910 return Py_Size(a) == Py_Size(b)
911 && *a->ob_sval == *b->ob_sval
912 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000913}
914
Guido van Rossum9bfef441993-03-29 10:43:31 +0000915static long
Fred Drakeba096332000-07-09 07:04:36 +0000916string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000917{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000918 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000919 register unsigned char *p;
920 register long x;
921
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000922 if (a->ob_shash != -1)
923 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000924 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000925 p = (unsigned char *) a->ob_sval;
926 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000927 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000928 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000929 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +0000930 if (x == -1)
931 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000932 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000933 return x;
934}
935
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000936static PyObject*
937string_subscript(PyStringObject* self, PyObject* item)
938{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000939 if (PyIndex_Check(item)) {
940 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000941 if (i == -1 && PyErr_Occurred())
942 return NULL;
943 if (i < 0)
944 i += PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000945 if (i < 0 || i >= PyString_GET_SIZE(self)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +0000946 PyErr_SetString(PyExc_IndexError,
947 "string index out of range");
948 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000949 }
950 return PyInt_FromLong((unsigned char)self->ob_sval[i]);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000951 }
952 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000953 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000954 char* source_buf;
955 char* result_buf;
956 PyObject* result;
957
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000958 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000959 PyString_GET_SIZE(self),
960 &start, &stop, &step, &slicelength) < 0) {
961 return NULL;
962 }
963
964 if (slicelength <= 0) {
965 return PyString_FromStringAndSize("", 0);
966 }
Thomas Woutersed03b412007-08-28 21:37:11 +0000967 else if (start == 0 && step == 1 &&
968 slicelength == PyString_GET_SIZE(self) &&
969 PyString_CheckExact(self)) {
970 Py_INCREF(self);
971 return (PyObject *)self;
972 }
973 else if (step == 1) {
974 return PyString_FromStringAndSize(
975 PyString_AS_STRING(self) + start,
976 slicelength);
977 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000978 else {
979 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000980 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +0000981 if (result_buf == NULL)
982 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000983
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000984 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000985 cur += step, i++) {
986 result_buf[i] = source_buf[cur];
987 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000988
989 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000990 slicelength);
991 PyMem_Free(result_buf);
992 return result;
993 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000994 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000995 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000996 PyErr_Format(PyExc_TypeError,
997 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000998 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000999 return NULL;
1000 }
1001}
1002
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001003static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00001004string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001005{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001006 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self),
1007 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +00001008}
1009
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001010static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001011 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001012 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001013 (ssizeargfunc)string_repeat, /*sq_repeat*/
Guido van Rossum98297ee2007-11-06 21:34:58 +00001014 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +00001015 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001016 0, /*sq_ass_item*/
1017 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001018 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001019};
1020
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001021static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001022 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001023 (binaryfunc)string_subscript,
1024 0,
1025};
1026
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001027static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001028 (getbufferproc)string_buffer_getbuffer,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001029 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001030};
1031
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001032
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001033#define LEFTSTRIP 0
1034#define RIGHTSTRIP 1
1035#define BOTHSTRIP 2
1036
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001037/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001038static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1039
1040#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001041
Thomas Wouters477c8d52006-05-27 19:21:47 +00001042
1043/* Don't call if length < 2 */
1044#define Py_STRING_MATCH(target, offset, pattern, length) \
1045 (target[offset] == pattern[0] && \
1046 target[offset+length-1] == pattern[length-1] && \
1047 !memcmp(target+offset+1, pattern+1, length-2) )
1048
1049
1050/* Overallocate the initial list to reduce the number of reallocs for small
1051 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1052 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1053 text (roughly 11 words per line) and field delimited data (usually 1-10
1054 fields). For large strings the split algorithms are bandwidth limited
1055 so increasing the preallocation likely will not improve things.*/
1056
1057#define MAX_PREALLOC 12
1058
1059/* 5 splits gives 6 elements */
1060#define PREALLOC_SIZE(maxsplit) \
1061 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1062
Thomas Wouters477c8d52006-05-27 19:21:47 +00001063#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001064 str = PyString_FromStringAndSize((data) + (left), \
1065 (right) - (left)); \
1066 if (str == NULL) \
1067 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001068 if (count < MAX_PREALLOC) { \
1069 PyList_SET_ITEM(list, count, str); \
1070 } else { \
1071 if (PyList_Append(list, str)) { \
1072 Py_DECREF(str); \
1073 goto onError; \
1074 } \
1075 else \
1076 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001077 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001078 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001079
Thomas Wouters477c8d52006-05-27 19:21:47 +00001080/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001081#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001082
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001083#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1084#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1085#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1086#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001087
1088Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001089split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001090{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001091 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001092 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001093 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001094
1095 if (list == NULL)
1096 return NULL;
1097
Thomas Wouters477c8d52006-05-27 19:21:47 +00001098 i = j = 0;
1099
1100 while (maxsplit-- > 0) {
1101 SKIP_SPACE(s, i, len);
1102 if (i==len) break;
1103 j = i; i++;
1104 SKIP_NONSPACE(s, i, len);
1105 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001106 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001107
1108 if (i < len) {
1109 /* Only occurs when maxsplit was reached */
1110 /* Skip any remaining whitespace and copy to end of string */
1111 SKIP_SPACE(s, i, len);
1112 if (i != len)
1113 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001114 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001115 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001116 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001117 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001118 Py_DECREF(list);
1119 return NULL;
1120}
1121
Thomas Wouters477c8d52006-05-27 19:21:47 +00001122Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001123split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001124{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001125 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001126 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001127 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001128
1129 if (list == NULL)
1130 return NULL;
1131
Thomas Wouters477c8d52006-05-27 19:21:47 +00001132 i = j = 0;
1133 while ((j < len) && (maxcount-- > 0)) {
1134 for(; j<len; j++) {
1135 /* I found that using memchr makes no difference */
1136 if (s[j] == ch) {
1137 SPLIT_ADD(s, i, j);
1138 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001139 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001140 }
1141 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001142 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001143 if (i <= len) {
1144 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001145 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001146 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001147 return list;
1148
1149 onError:
1150 Py_DECREF(list);
1151 return NULL;
1152}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001153
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001154PyDoc_STRVAR(split__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001155"B.split([sep[, maxsplit]]) -> list of bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001156\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001157Return a list of the sections in B, using sep as the delimiter.\n\
1158If sep is not given, B is split on ASCII whitespace characters\n\
1159(space, tab, return, newline, formfeed, vertical tab).\n\
1160If maxsplit is given, at most maxsplit splits are done.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001161
1162static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001163string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001164{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001165 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001166 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001167 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001168 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001169 PyObject *list, *str, *subobj = Py_None;
1170#ifdef USE_FAST
1171 Py_ssize_t pos;
1172#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001173
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001174 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001175 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001176 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001177 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001178 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001179 return split_whitespace(s, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001180 if (_getbuffer(subobj, &vsub) < 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001181 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001182 sub = vsub.buf;
1183 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001184
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001185 if (n == 0) {
1186 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001187 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001188 return NULL;
1189 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001190 else if (n == 1) {
1191 char ch = sub[0];
1192 PyObject_ReleaseBuffer(subobj, &vsub);
1193 return split_char(s, len, ch, maxsplit);
1194 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001195
Thomas Wouters477c8d52006-05-27 19:21:47 +00001196 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001197 if (list == NULL) {
1198 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001199 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001200 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001201
Thomas Wouters477c8d52006-05-27 19:21:47 +00001202#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001203 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001204 while (maxsplit-- > 0) {
1205 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1206 if (pos < 0)
1207 break;
1208 j = i+pos;
1209 SPLIT_ADD(s, i, j);
1210 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001211 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001212#else
1213 i = j = 0;
1214 while ((j+n <= len) && (maxsplit-- > 0)) {
1215 for (; j+n <= len; j++) {
1216 if (Py_STRING_MATCH(s, j, sub, n)) {
1217 SPLIT_ADD(s, i, j);
1218 i = j = j + n;
1219 break;
1220 }
1221 }
1222 }
1223#endif
1224 SPLIT_ADD(s, i, len);
1225 FIX_PREALLOC_SIZE(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001226 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001227 return list;
1228
Thomas Wouters477c8d52006-05-27 19:21:47 +00001229 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001230 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001231 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001232 return NULL;
1233}
1234
Thomas Wouters477c8d52006-05-27 19:21:47 +00001235PyDoc_STRVAR(partition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001236"B.partition(sep) -> (head, sep, tail)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001237\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001238Searches for the separator sep in B, and returns the part before it,\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001239the separator itself, and the part after it. If the separator is not\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001240found, returns B and two empty bytes objects.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001241
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001242static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001243string_partition(PyStringObject *self, PyObject *sep_obj)
1244{
1245 const char *sep;
1246 Py_ssize_t sep_len;
1247
1248 if (PyString_Check(sep_obj)) {
1249 sep = PyString_AS_STRING(sep_obj);
1250 sep_len = PyString_GET_SIZE(sep_obj);
1251 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001252 else if (PyUnicode_Check(sep_obj))
1253 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001254 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1255 return NULL;
1256
1257 return stringlib_partition(
1258 (PyObject*) self,
1259 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1260 sep_obj, sep, sep_len
1261 );
1262}
1263
1264PyDoc_STRVAR(rpartition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001265"B.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001266\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001267Searches for the separator sep in B, starting at the end of B,\n\
1268and returns the part before it, the separator itself, and the\n\
1269part after it. If the separator is not found, returns two empty\n\
1270bytes objects and B.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001271
1272static PyObject *
1273string_rpartition(PyStringObject *self, PyObject *sep_obj)
1274{
1275 const char *sep;
1276 Py_ssize_t sep_len;
1277
1278 if (PyString_Check(sep_obj)) {
1279 sep = PyString_AS_STRING(sep_obj);
1280 sep_len = PyString_GET_SIZE(sep_obj);
1281 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001282 else if (PyUnicode_Check(sep_obj))
1283 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001284 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1285 return NULL;
1286
1287 return stringlib_rpartition(
1288 (PyObject*) self,
1289 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1290 sep_obj, sep, sep_len
1291 );
1292}
1293
1294Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001295rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001296{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001297 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001298 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001299 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001300
1301 if (list == NULL)
1302 return NULL;
1303
Thomas Wouters477c8d52006-05-27 19:21:47 +00001304 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001305
Thomas Wouters477c8d52006-05-27 19:21:47 +00001306 while (maxsplit-- > 0) {
1307 RSKIP_SPACE(s, i);
1308 if (i<0) break;
1309 j = i; i--;
1310 RSKIP_NONSPACE(s, i);
1311 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001312 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001313 if (i >= 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001314 /* Only occurs when maxsplit was reached. Skip any remaining
1315 whitespace and copy to beginning of string. */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001316 RSKIP_SPACE(s, i);
1317 if (i >= 0)
1318 SPLIT_ADD(s, 0, i + 1);
1319
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001320 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001321 FIX_PREALLOC_SIZE(list);
1322 if (PyList_Reverse(list) < 0)
1323 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001324 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001325 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001326 Py_DECREF(list);
1327 return NULL;
1328}
1329
Thomas Wouters477c8d52006-05-27 19:21:47 +00001330Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001331rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001332{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001333 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001334 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001335 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001336
1337 if (list == NULL)
1338 return NULL;
1339
Thomas Wouters477c8d52006-05-27 19:21:47 +00001340 i = j = len - 1;
1341 while ((i >= 0) && (maxcount-- > 0)) {
1342 for (; i >= 0; i--) {
1343 if (s[i] == ch) {
1344 SPLIT_ADD(s, i + 1, j + 1);
1345 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001346 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001347 }
1348 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001349 }
1350 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001351 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001352 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001353 FIX_PREALLOC_SIZE(list);
1354 if (PyList_Reverse(list) < 0)
1355 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001356 return list;
1357
1358 onError:
1359 Py_DECREF(list);
1360 return NULL;
1361}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001362
1363PyDoc_STRVAR(rsplit__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001364"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001365\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001366Return a list of the sections in B, using sep as the delimiter,\n\
1367starting at the end of B and working to the front.\n\
1368If sep is not given, B is split on ASCII whitespace characters\n\
1369(space, tab, return, newline, formfeed, vertical tab).\n\
1370If maxsplit is given, at most maxsplit splits are done.");
1371
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001372
1373static PyObject *
1374string_rsplit(PyStringObject *self, PyObject *args)
1375{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001376 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001377 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001378 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001379 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001380 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001381
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001382 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001383 return NULL;
1384 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001385 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001386 if (subobj == Py_None)
1387 return rsplit_whitespace(s, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001388 if (_getbuffer(subobj, &vsub) < 0)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001389 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001390 sub = vsub.buf;
1391 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001392
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001393 if (n == 0) {
1394 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001395 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001396 return NULL;
1397 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001398 else if (n == 1) {
1399 char ch = sub[0];
1400 PyObject_ReleaseBuffer(subobj, &vsub);
1401 return rsplit_char(s, len, ch, maxsplit);
1402 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001403
Thomas Wouters477c8d52006-05-27 19:21:47 +00001404 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001405 if (list == NULL) {
1406 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001407 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001408 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001409
1410 j = len;
1411 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001412
Thomas Wouters477c8d52006-05-27 19:21:47 +00001413 while ( (i >= 0) && (maxsplit-- > 0) ) {
1414 for (; i>=0; i--) {
1415 if (Py_STRING_MATCH(s, i, sub, n)) {
1416 SPLIT_ADD(s, i + n, j);
1417 j = i;
1418 i -= n;
1419 break;
1420 }
1421 }
1422 }
1423 SPLIT_ADD(s, 0, j);
1424 FIX_PREALLOC_SIZE(list);
1425 if (PyList_Reverse(list) < 0)
1426 goto onError;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001427 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001428 return list;
1429
Thomas Wouters477c8d52006-05-27 19:21:47 +00001430onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001431 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001432 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001433 return NULL;
1434}
1435
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001436#undef SPLIT_ADD
1437#undef MAX_PREALLOC
1438#undef PREALLOC_SIZE
1439
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001440
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001441PyDoc_STRVAR(join__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001442"B.join(iterable_of_bytes) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001443\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001444Concatenates any number of bytes objects, with B in between each pair.\n\
1445Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001446
1447static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00001448string_join(PyObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449{
1450 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001451 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001453 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001454 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001455 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001456 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001457 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458
Tim Peters19fe14e2001-01-19 03:03:47 +00001459 seq = PySequence_Fast(orig, "");
1460 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001461 return NULL;
1462 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001463
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001464 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001465 if (seqlen == 0) {
1466 Py_DECREF(seq);
1467 return PyString_FromString("");
1468 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001469 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001470 item = PySequence_Fast_GET_ITEM(seq, 0);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001471 if (PyString_CheckExact(item)) {
Raymond Hettinger674f2412004-08-23 23:23:54 +00001472 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001473 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001474 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001475 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001476 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001477
Raymond Hettinger674f2412004-08-23 23:23:54 +00001478 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001479 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001480 * Do a pre-pass to figure out the total amount of space we'll
Guido van Rossum98297ee2007-11-06 21:34:58 +00001481 * need (sz), and see whether all argument are bytes.
Tim Peters19fe14e2001-01-19 03:03:47 +00001482 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001483 /* XXX Shouldn't we use _getbuffer() on these items instead? */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001484 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001485 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001486 item = PySequence_Fast_GET_ITEM(seq, i);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001487 if (!PyString_Check(item) && !PyBytes_Check(item)) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001488 PyErr_Format(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001489 "sequence item %zd: expected bytes,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001490 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001491 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001492 Py_DECREF(seq);
1493 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001494 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001495 sz += Py_Size(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001496 if (i != 0)
1497 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001498 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001499 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001500 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001501 Py_DECREF(seq);
1502 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001503 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001504 }
1505
1506 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001507 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001508 if (res == NULL) {
1509 Py_DECREF(seq);
1510 return NULL;
1511 }
1512
1513 /* Catenate everything. */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001514 /* I'm not worried about a PyBytes item growing because there's
1515 nowhere in this function where we release the GIL. */
Tim Peters19fe14e2001-01-19 03:03:47 +00001516 p = PyString_AS_STRING(res);
1517 for (i = 0; i < seqlen; ++i) {
1518 size_t n;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001519 char *q;
1520 if (i) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001521 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001522 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001523 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001524 item = PySequence_Fast_GET_ITEM(seq, i);
1525 n = Py_Size(item);
1526 if (PyString_Check(item))
1527 q = PyString_AS_STRING(item);
1528 else
1529 q = PyBytes_AS_STRING(item);
1530 Py_MEMCPY(p, q, n);
1531 p += n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001532 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001533
Jeremy Hylton49048292000-07-11 03:28:17 +00001534 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001535 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001536}
1537
Tim Peters52e155e2001-06-16 05:42:57 +00001538PyObject *
1539_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001540{
Tim Petersa7259592001-06-16 05:11:17 +00001541 assert(sep != NULL && PyString_Check(sep));
1542 assert(x != NULL);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001543 return string_join(sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001544}
1545
Thomas Wouters477c8d52006-05-27 19:21:47 +00001546Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001547string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001548{
1549 if (*end > len)
1550 *end = len;
1551 else if (*end < 0)
1552 *end += len;
1553 if (*end < 0)
1554 *end = 0;
1555 if (*start < 0)
1556 *start += len;
1557 if (*start < 0)
1558 *start = 0;
1559}
1560
Thomas Wouters477c8d52006-05-27 19:21:47 +00001561Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001562string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001563{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001564 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001565 const char *sub;
1566 Py_ssize_t sub_len;
1567 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568
Thomas Wouters477c8d52006-05-27 19:21:47 +00001569 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1570 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001571 return -2;
1572 if (PyString_Check(subobj)) {
1573 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001574 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001575 }
1576 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001577 return PyUnicode_Find(
1578 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001579 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001580 /* XXX - the "expected a character buffer object" is pretty
1581 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582 return -2;
1583
Thomas Wouters477c8d52006-05-27 19:21:47 +00001584 if (dir > 0)
1585 return stringlib_find_slice(
1586 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1587 sub, sub_len, start, end);
1588 else
1589 return stringlib_rfind_slice(
1590 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1591 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592}
1593
1594
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001595PyDoc_STRVAR(find__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001596"B.find(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001597\n\
1598Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001599such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600arguments start and end are interpreted as in slice notation.\n\
1601\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001602Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603
1604static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001605string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001606{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001607 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608 if (result == -2)
1609 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001610 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611}
1612
1613
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001614PyDoc_STRVAR(index__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001615"B.index(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001616\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001617Like B.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618
1619static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001620string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001622 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623 if (result == -2)
1624 return NULL;
1625 if (result == -1) {
1626 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001627 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628 return NULL;
1629 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001630 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631}
1632
1633
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001634PyDoc_STRVAR(rfind__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001635"B.rfind(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001637Return the highest index in B where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001638such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639arguments start and end are interpreted as in slice notation.\n\
1640\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001641Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001642
1643static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001644string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001645{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001646 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001647 if (result == -2)
1648 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001649 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001650}
1651
1652
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001653PyDoc_STRVAR(rindex__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001654"B.rindex(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001655\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001656Like B.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001657
1658static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001659string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001660{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001661 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001662 if (result == -2)
1663 return NULL;
1664 if (result == -1) {
1665 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001666 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001667 return NULL;
1668 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001669 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001670}
1671
1672
Thomas Wouters477c8d52006-05-27 19:21:47 +00001673Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001674do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1675{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001676 Py_buffer vsep;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001677 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001678 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001679 char *sep;
1680 Py_ssize_t seplen;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001681 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001682
Guido van Rossum98297ee2007-11-06 21:34:58 +00001683 if (_getbuffer(sepobj, &vsep) < 0)
1684 return NULL;
1685 sep = vsep.buf;
1686 seplen = vsep.len;
1687
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001688 i = 0;
1689 if (striptype != RIGHTSTRIP) {
1690 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1691 i++;
1692 }
1693 }
1694
1695 j = len;
1696 if (striptype != LEFTSTRIP) {
1697 do {
1698 j--;
1699 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1700 j++;
1701 }
1702
Guido van Rossum98297ee2007-11-06 21:34:58 +00001703 PyObject_ReleaseBuffer(sepobj, &vsep);
1704
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001705 if (i == 0 && j == len && PyString_CheckExact(self)) {
1706 Py_INCREF(self);
1707 return (PyObject*)self;
1708 }
1709 else
1710 return PyString_FromStringAndSize(s+i, j-i);
1711}
1712
1713
Thomas Wouters477c8d52006-05-27 19:21:47 +00001714Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001715do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001716{
1717 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001718 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720 i = 0;
1721 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001722 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723 i++;
1724 }
1725 }
1726
1727 j = len;
1728 if (striptype != LEFTSTRIP) {
1729 do {
1730 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001731 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001732 j++;
1733 }
1734
Tim Peters8fa5dd02001-09-12 02:18:30 +00001735 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736 Py_INCREF(self);
1737 return (PyObject*)self;
1738 }
1739 else
1740 return PyString_FromStringAndSize(s+i, j-i);
1741}
1742
1743
Thomas Wouters477c8d52006-05-27 19:21:47 +00001744Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001745do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1746{
1747 PyObject *sep = NULL;
1748
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001749 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001750 return NULL;
1751
1752 if (sep != NULL && sep != Py_None) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001753 return do_xstrip(self, striptype, sep);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001754 }
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001755 return do_strip(self, striptype);
1756}
1757
1758
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001759PyDoc_STRVAR(strip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001760"B.strip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001762Strip leading and trailing bytes contained in the argument.\n\
1763If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001765string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001767 if (PyTuple_GET_SIZE(args) == 0)
1768 return do_strip(self, BOTHSTRIP); /* Common case */
1769 else
1770 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001771}
1772
1773
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001774PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001775"B.lstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001777Strip leading bytes contained in the argument.\n\
1778If the argument is omitted, strip leading ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001780string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001782 if (PyTuple_GET_SIZE(args) == 0)
1783 return do_strip(self, LEFTSTRIP); /* Common case */
1784 else
1785 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786}
1787
1788
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001789PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001790"B.rstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001792Strip trailing bytes contained in the argument.\n\
1793If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001794static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001795string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001797 if (PyTuple_GET_SIZE(args) == 0)
1798 return do_strip(self, RIGHTSTRIP); /* Common case */
1799 else
1800 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001801}
1802
1803
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001804PyDoc_STRVAR(count__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001805"B.count(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001806\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001807Return the number of non-overlapping occurrences of substring sub in\n\
1808string S[start:end]. Optional arguments start and end are interpreted\n\
1809as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810
1811static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001812string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001814 PyObject *sub_obj;
1815 const char *str = PyString_AS_STRING(self), *sub;
1816 Py_ssize_t sub_len;
1817 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818
Thomas Wouters477c8d52006-05-27 19:21:47 +00001819 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1820 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001822
Thomas Wouters477c8d52006-05-27 19:21:47 +00001823 if (PyString_Check(sub_obj)) {
1824 sub = PyString_AS_STRING(sub_obj);
1825 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001826 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001827 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001828 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001829 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001830 if (count == -1)
1831 return NULL;
1832 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00001833 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001834 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001835 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001836 return NULL;
1837
Thomas Wouters477c8d52006-05-27 19:21:47 +00001838 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001839
Thomas Wouters477c8d52006-05-27 19:21:47 +00001840 return PyInt_FromSsize_t(
1841 stringlib_count(str + start, end - start, sub, sub_len)
1842 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843}
1844
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001845
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001846PyDoc_STRVAR(translate__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001847"B.translate(table[, deletechars]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001849Return a copy of B, where all characters occurring in the\n\
1850optional argument deletechars are removed, and the remaining\n\
1851characters have been mapped through the given translation\n\
1852table, which must be a bytes object of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853
1854static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001855string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001857 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001858 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001859 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001860 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001861 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001862 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863 PyObject *result;
1864 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001865 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001866
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00001867 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001869 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870
1871 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00001872 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001873 tablen = PyString_GET_SIZE(tableobj);
1874 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001875 else if (tableobj == Py_None) {
1876 table = NULL;
1877 tablen = 256;
1878 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001879 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001880 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001881 parameter; instead a mapping to None will cause characters
1882 to be deleted. */
1883 if (delobj != NULL) {
1884 PyErr_SetString(PyExc_TypeError,
1885 "deletions are implemented differently for unicode");
1886 return NULL;
1887 }
1888 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1889 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001890 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001892
Martin v. Löwis00b61272002-12-12 20:03:19 +00001893 if (tablen != 256) {
1894 PyErr_SetString(PyExc_ValueError,
1895 "translation table must be 256 characters long");
1896 return NULL;
1897 }
1898
Guido van Rossum4c08d552000-03-10 22:55:18 +00001899 if (delobj != NULL) {
1900 if (PyString_Check(delobj)) {
1901 del_table = PyString_AS_STRING(delobj);
1902 dellen = PyString_GET_SIZE(delobj);
1903 }
1904 else if (PyUnicode_Check(delobj)) {
1905 PyErr_SetString(PyExc_TypeError,
1906 "deletions are implemented differently for unicode");
1907 return NULL;
1908 }
1909 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1910 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001911 }
1912 else {
1913 del_table = NULL;
1914 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915 }
1916
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001917 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918 result = PyString_FromStringAndSize((char *)NULL, inlen);
1919 if (result == NULL)
1920 return NULL;
1921 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001922 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923
Guido van Rossumd8faa362007-04-27 19:54:29 +00001924 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925 /* If no deletions are required, use faster code */
1926 for (i = inlen; --i >= 0; ) {
1927 c = Py_CHARMASK(*input++);
1928 if (Py_CHARMASK((*output++ = table[c])) != c)
1929 changed = 1;
1930 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001931 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932 return result;
1933 Py_DECREF(result);
1934 Py_INCREF(input_obj);
1935 return input_obj;
1936 }
1937
Guido van Rossumd8faa362007-04-27 19:54:29 +00001938 if (table == NULL) {
1939 for (i = 0; i < 256; i++)
1940 trans_table[i] = Py_CHARMASK(i);
1941 } else {
1942 for (i = 0; i < 256; i++)
1943 trans_table[i] = Py_CHARMASK(table[i]);
1944 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945
1946 for (i = 0; i < dellen; i++)
1947 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1948
1949 for (i = inlen; --i >= 0; ) {
1950 c = Py_CHARMASK(*input++);
1951 if (trans_table[c] != -1)
1952 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1953 continue;
1954 changed = 1;
1955 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001956 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957 Py_DECREF(result);
1958 Py_INCREF(input_obj);
1959 return input_obj;
1960 }
1961 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001962 if (inlen > 0)
1963 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964 return result;
1965}
1966
1967
Thomas Wouters477c8d52006-05-27 19:21:47 +00001968#define FORWARD 1
1969#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970
Thomas Wouters477c8d52006-05-27 19:21:47 +00001971/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972
Thomas Wouters477c8d52006-05-27 19:21:47 +00001973#define findchar(target, target_len, c) \
1974 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975
Thomas Wouters477c8d52006-05-27 19:21:47 +00001976/* String ops must return a string. */
1977/* If the object is subclass of string, create a copy */
1978Py_LOCAL(PyStringObject *)
1979return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001981 if (PyString_CheckExact(self)) {
1982 Py_INCREF(self);
1983 return self;
1984 }
1985 return (PyStringObject *)PyString_FromStringAndSize(
1986 PyString_AS_STRING(self),
1987 PyString_GET_SIZE(self));
1988}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989
Thomas Wouters477c8d52006-05-27 19:21:47 +00001990Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001991countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001992{
1993 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001994 const char *start=target;
1995 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996
Thomas Wouters477c8d52006-05-27 19:21:47 +00001997 while ( (start=findchar(start, end-start, c)) != NULL ) {
1998 count++;
1999 if (count >= maxcount)
2000 break;
2001 start += 1;
2002 }
2003 return count;
2004}
2005
2006Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002007findstring(const char *target, Py_ssize_t target_len,
2008 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002009 Py_ssize_t start,
2010 Py_ssize_t end,
2011 int direction)
2012{
2013 if (start < 0) {
2014 start += target_len;
2015 if (start < 0)
2016 start = 0;
2017 }
2018 if (end > target_len) {
2019 end = target_len;
2020 } else if (end < 0) {
2021 end += target_len;
2022 if (end < 0)
2023 end = 0;
2024 }
2025
2026 /* zero-length substrings always match at the first attempt */
2027 if (pattern_len == 0)
2028 return (direction > 0) ? start : end;
2029
2030 end -= pattern_len;
2031
2032 if (direction < 0) {
2033 for (; end >= start; end--)
2034 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2035 return end;
2036 } else {
2037 for (; start <= end; start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002038 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
Thomas Wouters477c8d52006-05-27 19:21:47 +00002039 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040 }
2041 return -1;
2042}
2043
Thomas Wouters477c8d52006-05-27 19:21:47 +00002044Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002045countstring(const char *target, Py_ssize_t target_len,
2046 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002047 Py_ssize_t start,
2048 Py_ssize_t end,
2049 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002050{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002051 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002052
Thomas Wouters477c8d52006-05-27 19:21:47 +00002053 if (start < 0) {
2054 start += target_len;
2055 if (start < 0)
2056 start = 0;
2057 }
2058 if (end > target_len) {
2059 end = target_len;
2060 } else if (end < 0) {
2061 end += target_len;
2062 if (end < 0)
2063 end = 0;
2064 }
2065
2066 /* zero-length substrings match everywhere */
2067 if (pattern_len == 0 || maxcount == 0) {
2068 if (target_len+1 < maxcount)
2069 return target_len+1;
2070 return maxcount;
2071 }
2072
2073 end -= pattern_len;
2074 if (direction < 0) {
2075 for (; (end >= start); end--)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002076 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002077 count++;
2078 if (--maxcount <= 0) break;
2079 end -= pattern_len-1;
2080 }
2081 } else {
2082 for (; (start <= end); start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002083 if (Py_STRING_MATCH(target, start,
2084 pattern, pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002085 count++;
2086 if (--maxcount <= 0)
2087 break;
2088 start += pattern_len-1;
2089 }
2090 }
2091 return count;
2092}
2093
2094
2095/* Algorithms for different cases of string replacement */
2096
2097/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2098Py_LOCAL(PyStringObject *)
2099replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002100 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002101 Py_ssize_t maxcount)
2102{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002103 char *self_s, *result_s;
2104 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002105 Py_ssize_t count, i, product;
2106 PyStringObject *result;
2107
2108 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002109
Thomas Wouters477c8d52006-05-27 19:21:47 +00002110 /* 1 at the end plus 1 after every character */
2111 count = self_len+1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002112 if (maxcount < count)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002113 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002114
Thomas Wouters477c8d52006-05-27 19:21:47 +00002115 /* Check for overflow */
2116 /* result_len = count * to_len + self_len; */
2117 product = count * to_len;
2118 if (product / to_len != count) {
2119 PyErr_SetString(PyExc_OverflowError,
2120 "replace string is too long");
2121 return NULL;
2122 }
2123 result_len = product + self_len;
2124 if (result_len < 0) {
2125 PyErr_SetString(PyExc_OverflowError,
2126 "replace string is too long");
2127 return NULL;
2128 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002129
Thomas Wouters477c8d52006-05-27 19:21:47 +00002130 if (! (result = (PyStringObject *)
2131 PyString_FromStringAndSize(NULL, result_len)) )
2132 return NULL;
2133
2134 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002135 result_s = PyString_AS_STRING(result);
2136
2137 /* TODO: special case single character, which doesn't need memcpy */
2138
2139 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002140 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002141 result_s += to_len;
2142 count -= 1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002143
Thomas Wouters477c8d52006-05-27 19:21:47 +00002144 for (i=0; i<count; i++) {
2145 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002146 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002147 result_s += to_len;
2148 }
2149
2150 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002151 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002152
2153 return result;
2154}
2155
2156/* Special case for deleting a single character */
2157/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2158Py_LOCAL(PyStringObject *)
2159replace_delete_single_character(PyStringObject *self,
2160 char from_c, Py_ssize_t maxcount)
2161{
2162 char *self_s, *result_s;
2163 char *start, *next, *end;
2164 Py_ssize_t self_len, result_len;
2165 Py_ssize_t count;
2166 PyStringObject *result;
2167
2168 self_len = PyString_GET_SIZE(self);
2169 self_s = PyString_AS_STRING(self);
2170
2171 count = countchar(self_s, self_len, from_c, maxcount);
2172 if (count == 0) {
2173 return return_self(self);
2174 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002175
Thomas Wouters477c8d52006-05-27 19:21:47 +00002176 result_len = self_len - count; /* from_len == 1 */
2177 assert(result_len>=0);
2178
2179 if ( (result = (PyStringObject *)
2180 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2181 return NULL;
2182 result_s = PyString_AS_STRING(result);
2183
2184 start = self_s;
2185 end = self_s + self_len;
2186 while (count-- > 0) {
2187 next = findchar(start, end-start, from_c);
2188 if (next == NULL)
2189 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002190 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002191 result_s += (next-start);
2192 start = next+1;
2193 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002194 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002195
Thomas Wouters477c8d52006-05-27 19:21:47 +00002196 return result;
2197}
2198
2199/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2200
2201Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002202replace_delete_substring(PyStringObject *self,
2203 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002204 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002205 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002206 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002207 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002208 Py_ssize_t count, offset;
2209 PyStringObject *result;
2210
2211 self_len = PyString_GET_SIZE(self);
2212 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002213
2214 count = countstring(self_s, self_len,
2215 from_s, from_len,
2216 0, self_len, 1,
2217 maxcount);
2218
2219 if (count == 0) {
2220 /* no matches */
2221 return return_self(self);
2222 }
2223
2224 result_len = self_len - (count * from_len);
2225 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002226
Thomas Wouters477c8d52006-05-27 19:21:47 +00002227 if ( (result = (PyStringObject *)
2228 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2229 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002230
Thomas Wouters477c8d52006-05-27 19:21:47 +00002231 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002232
Thomas Wouters477c8d52006-05-27 19:21:47 +00002233 start = self_s;
2234 end = self_s + self_len;
2235 while (count-- > 0) {
2236 offset = findstring(start, end-start,
2237 from_s, from_len,
2238 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239 if (offset == -1)
2240 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002241 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002242
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002243 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002244
Thomas Wouters477c8d52006-05-27 19:21:47 +00002245 result_s += (next-start);
2246 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002248 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002249 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250}
2251
Thomas Wouters477c8d52006-05-27 19:21:47 +00002252/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2253Py_LOCAL(PyStringObject *)
2254replace_single_character_in_place(PyStringObject *self,
2255 char from_c, char to_c,
2256 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002257{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002258 char *self_s, *result_s, *start, *end, *next;
2259 Py_ssize_t self_len;
2260 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002261
Thomas Wouters477c8d52006-05-27 19:21:47 +00002262 /* The result string will be the same size */
2263 self_s = PyString_AS_STRING(self);
2264 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002265
Thomas Wouters477c8d52006-05-27 19:21:47 +00002266 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002267
Thomas Wouters477c8d52006-05-27 19:21:47 +00002268 if (next == NULL) {
2269 /* No matches; return the original string */
2270 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002272
Thomas Wouters477c8d52006-05-27 19:21:47 +00002273 /* Need to make a new string */
2274 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2275 if (result == NULL)
2276 return NULL;
2277 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002278 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002279
Thomas Wouters477c8d52006-05-27 19:21:47 +00002280 /* change everything in-place, starting with this one */
2281 start = result_s + (next-self_s);
2282 *start = to_c;
2283 start++;
2284 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002285
Thomas Wouters477c8d52006-05-27 19:21:47 +00002286 while (--maxcount > 0) {
2287 next = findchar(start, end-start, from_c);
2288 if (next == NULL)
2289 break;
2290 *next = to_c;
2291 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002292 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002293
Thomas Wouters477c8d52006-05-27 19:21:47 +00002294 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002295}
2296
Thomas Wouters477c8d52006-05-27 19:21:47 +00002297/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2298Py_LOCAL(PyStringObject *)
2299replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002300 const char *from_s, Py_ssize_t from_len,
2301 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002302 Py_ssize_t maxcount)
2303{
2304 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002305 char *self_s;
2306 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002307 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002308
Thomas Wouters477c8d52006-05-27 19:21:47 +00002309 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002310
Thomas Wouters477c8d52006-05-27 19:21:47 +00002311 self_s = PyString_AS_STRING(self);
2312 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002313
Thomas Wouters477c8d52006-05-27 19:21:47 +00002314 offset = findstring(self_s, self_len,
2315 from_s, from_len,
2316 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002317 if (offset == -1) {
2318 /* No matches; return the original string */
2319 return return_self(self);
2320 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002321
Thomas Wouters477c8d52006-05-27 19:21:47 +00002322 /* Need to make a new string */
2323 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2324 if (result == NULL)
2325 return NULL;
2326 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002327 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002328
Thomas Wouters477c8d52006-05-27 19:21:47 +00002329 /* change everything in-place, starting with this one */
2330 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002331 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002332 start += from_len;
2333 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002334
Thomas Wouters477c8d52006-05-27 19:21:47 +00002335 while ( --maxcount > 0) {
2336 offset = findstring(start, end-start,
2337 from_s, from_len,
2338 0, end-start, FORWARD);
2339 if (offset==-1)
2340 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002341 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002342 start += offset+from_len;
2343 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002344
Thomas Wouters477c8d52006-05-27 19:21:47 +00002345 return result;
2346}
2347
2348/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2349Py_LOCAL(PyStringObject *)
2350replace_single_character(PyStringObject *self,
2351 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002352 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002353 Py_ssize_t maxcount)
2354{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002355 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002356 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002357 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002358 Py_ssize_t count, product;
2359 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002360
Thomas Wouters477c8d52006-05-27 19:21:47 +00002361 self_s = PyString_AS_STRING(self);
2362 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002363
Thomas Wouters477c8d52006-05-27 19:21:47 +00002364 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002365 if (count == 0) {
2366 /* no matches, return unchanged */
2367 return return_self(self);
2368 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002369
Thomas Wouters477c8d52006-05-27 19:21:47 +00002370 /* use the difference between current and new, hence the "-1" */
2371 /* result_len = self_len + count * (to_len-1) */
2372 product = count * (to_len-1);
2373 if (product / (to_len-1) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002374 PyErr_SetString(PyExc_OverflowError,
2375 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002376 return NULL;
2377 }
2378 result_len = self_len + product;
2379 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002380 PyErr_SetString(PyExc_OverflowError,
2381 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002382 return NULL;
2383 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002384
Thomas Wouters477c8d52006-05-27 19:21:47 +00002385 if ( (result = (PyStringObject *)
2386 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2387 return NULL;
2388 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002389
Thomas Wouters477c8d52006-05-27 19:21:47 +00002390 start = self_s;
2391 end = self_s + self_len;
2392 while (count-- > 0) {
2393 next = findchar(start, end-start, from_c);
Guido van Rossumae404e22007-10-26 21:46:44 +00002394 if (next == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002395 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002396
Thomas Wouters477c8d52006-05-27 19:21:47 +00002397 if (next == start) {
2398 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002399 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002400 result_s += to_len;
2401 start += 1;
2402 } else {
2403 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002404 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002405 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002406 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002407 result_s += to_len;
2408 start = next+1;
2409 }
2410 }
2411 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002412 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002413
Thomas Wouters477c8d52006-05-27 19:21:47 +00002414 return result;
2415}
2416
2417/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2418Py_LOCAL(PyStringObject *)
2419replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002420 const char *from_s, Py_ssize_t from_len,
2421 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002422 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002423 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002424 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002425 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002426 Py_ssize_t count, offset, product;
2427 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002428
Thomas Wouters477c8d52006-05-27 19:21:47 +00002429 self_s = PyString_AS_STRING(self);
2430 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002431
Thomas Wouters477c8d52006-05-27 19:21:47 +00002432 count = countstring(self_s, self_len,
2433 from_s, from_len,
2434 0, self_len, FORWARD, maxcount);
2435 if (count == 0) {
2436 /* no matches, return unchanged */
2437 return return_self(self);
2438 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002439
Thomas Wouters477c8d52006-05-27 19:21:47 +00002440 /* Check for overflow */
2441 /* result_len = self_len + count * (to_len-from_len) */
2442 product = count * (to_len-from_len);
2443 if (product / (to_len-from_len) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002444 PyErr_SetString(PyExc_OverflowError,
2445 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002446 return NULL;
2447 }
2448 result_len = self_len + product;
2449 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002450 PyErr_SetString(PyExc_OverflowError,
2451 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002452 return NULL;
2453 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002454
Thomas Wouters477c8d52006-05-27 19:21:47 +00002455 if ( (result = (PyStringObject *)
2456 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2457 return NULL;
2458 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002459
Thomas Wouters477c8d52006-05-27 19:21:47 +00002460 start = self_s;
2461 end = self_s + self_len;
2462 while (count-- > 0) {
2463 offset = findstring(start, end-start,
2464 from_s, from_len,
2465 0, end-start, FORWARD);
2466 if (offset == -1)
2467 break;
2468 next = start+offset;
2469 if (next == start) {
2470 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002471 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002472 result_s += to_len;
2473 start += from_len;
2474 } else {
2475 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002476 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002477 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002478 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002479 result_s += to_len;
2480 start = next+from_len;
2481 }
2482 }
2483 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002484 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002485
Thomas Wouters477c8d52006-05-27 19:21:47 +00002486 return result;
2487}
2488
2489
2490Py_LOCAL(PyStringObject *)
2491replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002492 const char *from_s, Py_ssize_t from_len,
2493 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002494 Py_ssize_t maxcount)
2495{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002496 if (maxcount < 0) {
2497 maxcount = PY_SSIZE_T_MAX;
2498 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2499 /* nothing to do; return the original string */
2500 return return_self(self);
2501 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002502
Thomas Wouters477c8d52006-05-27 19:21:47 +00002503 if (maxcount == 0 ||
2504 (from_len == 0 && to_len == 0)) {
2505 /* nothing to do; return the original string */
2506 return return_self(self);
2507 }
2508
2509 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002510
Thomas Wouters477c8d52006-05-27 19:21:47 +00002511 if (from_len == 0) {
2512 /* insert the 'to' string everywhere. */
2513 /* >>> "Python".replace("", ".") */
2514 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002515 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002516 }
2517
2518 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2519 /* point for an empty self string to generate a non-empty string */
2520 /* Special case so the remaining code always gets a non-empty string */
2521 if (PyString_GET_SIZE(self) == 0) {
2522 return return_self(self);
2523 }
2524
2525 if (to_len == 0) {
2526 /* delete all occurances of 'from' string */
2527 if (from_len == 1) {
2528 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002529 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002530 } else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002531 return replace_delete_substring(self, from_s,
2532 from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002533 }
2534 }
2535
2536 /* Handle special case where both strings have the same length */
2537
2538 if (from_len == to_len) {
2539 if (from_len == 1) {
2540 return replace_single_character_in_place(
2541 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002542 from_s[0],
2543 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002544 maxcount);
2545 } else {
2546 return replace_substring_in_place(
Guido van Rossum98297ee2007-11-06 21:34:58 +00002547 self, from_s, from_len, to_s, to_len,
2548 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002549 }
2550 }
2551
2552 /* Otherwise use the more generic algorithms */
2553 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002554 return replace_single_character(self, from_s[0],
2555 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002556 } else {
2557 /* len('from')>=2, len('to')>=1 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002558 return replace_substring(self, from_s, from_len, to_s, to_len,
2559 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002560 }
2561}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002562
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002563PyDoc_STRVAR(replace__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002564"B.replace(old, new[, count]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002565\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002566Return a copy of B with all occurrences of subsection\n\
Fred Draked22bb652003-10-22 02:56:40 +00002567old replaced by new. If the optional argument count is\n\
2568given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002569
2570static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002571string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002572{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002573 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002574 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002575 const char *from_s, *to_s;
2576 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002577
Thomas Wouters477c8d52006-05-27 19:21:47 +00002578 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002579 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002580
Thomas Wouters477c8d52006-05-27 19:21:47 +00002581 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002582 from_s = PyString_AS_STRING(from);
2583 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002584 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002585 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002586 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002587 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002588 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002589 return NULL;
2590
Thomas Wouters477c8d52006-05-27 19:21:47 +00002591 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002592 to_s = PyString_AS_STRING(to);
2593 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002594 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002595 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002596 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002597 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002598 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002599 return NULL;
2600
Thomas Wouters477c8d52006-05-27 19:21:47 +00002601 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002602 from_s, from_len,
2603 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002604}
2605
Thomas Wouters477c8d52006-05-27 19:21:47 +00002606/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002607
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002608/* Matches the end (direction >= 0) or start (direction < 0) of self
2609 * against substr, using the start and end arguments. Returns
2610 * -1 on error, 0 if not found and 1 if found.
2611 */
2612Py_LOCAL(int)
2613_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2614 Py_ssize_t end, int direction)
2615{
2616 Py_ssize_t len = PyString_GET_SIZE(self);
2617 Py_ssize_t slen;
2618 const char* sub;
2619 const char* str;
2620
2621 if (PyString_Check(substr)) {
2622 sub = PyString_AS_STRING(substr);
2623 slen = PyString_GET_SIZE(substr);
2624 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002625 else if (PyUnicode_Check(substr))
2626 return PyUnicode_Tailmatch((PyObject *)self,
2627 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002628 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2629 return -1;
2630 str = PyString_AS_STRING(self);
2631
2632 string_adjust_indices(&start, &end, len);
2633
2634 if (direction < 0) {
2635 /* startswith */
2636 if (start+slen > len)
2637 return 0;
2638 } else {
2639 /* endswith */
2640 if (end-start < slen || start > len)
2641 return 0;
2642
2643 if (end-slen > start)
2644 start = end - slen;
2645 }
2646 if (end-start >= slen)
2647 return ! memcmp(str+start, sub, slen);
2648 return 0;
2649}
2650
2651
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002652PyDoc_STRVAR(startswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002653"B.startswith(prefix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002654\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002655Return True if B starts with the specified prefix, False otherwise.\n\
2656With optional start, test B beginning at that position.\n\
2657With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002658prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002659
2660static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002661string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002662{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002663 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002664 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002665 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002666 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002667
Guido van Rossumc6821402000-05-08 14:08:05 +00002668 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2669 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002670 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002671 if (PyTuple_Check(subobj)) {
2672 Py_ssize_t i;
2673 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2674 result = _string_tailmatch(self,
2675 PyTuple_GET_ITEM(subobj, i),
2676 start, end, -1);
2677 if (result == -1)
2678 return NULL;
2679 else if (result) {
2680 Py_RETURN_TRUE;
2681 }
2682 }
2683 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002684 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002685 result = _string_tailmatch(self, subobj, start, end, -1);
2686 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002687 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002688 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002689 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002690}
2691
2692
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002693PyDoc_STRVAR(endswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002694"B.endswith(suffix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002695\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002696Return True if B ends with the specified suffix, False otherwise.\n\
2697With optional start, test B beginning at that position.\n\
2698With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002699suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002700
2701static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002702string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002703{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002704 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002705 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002706 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002707 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002708
Guido van Rossumc6821402000-05-08 14:08:05 +00002709 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2710 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002711 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002712 if (PyTuple_Check(subobj)) {
2713 Py_ssize_t i;
2714 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2715 result = _string_tailmatch(self,
2716 PyTuple_GET_ITEM(subobj, i),
2717 start, end, +1);
2718 if (result == -1)
2719 return NULL;
2720 else if (result) {
2721 Py_RETURN_TRUE;
2722 }
2723 }
2724 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002725 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002726 result = _string_tailmatch(self, subobj, start, end, +1);
2727 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002728 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002729 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002730 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002731}
2732
2733
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002734PyDoc_STRVAR(decode__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002735"B.decode([encoding[, errors]]) -> object\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002736\n\
2737Decodes S using the codec registered for encoding. encoding defaults\n\
2738to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002739handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2740a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002741as well as any other name registerd with codecs.register_error that is\n\
2742able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002743
2744static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00002745string_decode(PyObject *self, PyObject *args)
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002746{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002747 const char *encoding = NULL;
2748 const char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002749
Guido van Rossum98297ee2007-11-06 21:34:58 +00002750 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2751 return NULL;
2752 if (encoding == NULL)
2753 encoding = PyUnicode_GetDefaultEncoding();
2754 return PyCodec_Decode(self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002755}
2756
2757
Guido van Rossumae404e22007-10-26 21:46:44 +00002758PyDoc_STRVAR(fromhex_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002759"bytes.fromhex(string) -> bytes\n\
Guido van Rossumae404e22007-10-26 21:46:44 +00002760\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002761Create a bytes object from a string of hexadecimal numbers.\n\
2762Spaces between two numbers are accepted.\n\
2763Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Guido van Rossumae404e22007-10-26 21:46:44 +00002764
2765static int
2766hex_digit_to_int(Py_UNICODE c)
2767{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002768 if (c >= 128)
2769 return -1;
2770 if (ISDIGIT(c))
2771 return c - '0';
2772 else {
2773 if (ISUPPER(c))
2774 c = TOLOWER(c);
2775 if (c >= 'a' && c <= 'f')
2776 return c - 'a' + 10;
2777 }
2778 return -1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002779}
2780
2781static PyObject *
2782string_fromhex(PyObject *cls, PyObject *args)
2783{
2784 PyObject *newstring, *hexobj;
2785 char *buf;
2786 Py_UNICODE *hex;
2787 Py_ssize_t hexlen, byteslen, i, j;
2788 int top, bot;
2789
2790 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2791 return NULL;
2792 assert(PyUnicode_Check(hexobj));
2793 hexlen = PyUnicode_GET_SIZE(hexobj);
2794 hex = PyUnicode_AS_UNICODE(hexobj);
2795 byteslen = hexlen/2; /* This overestimates if there are spaces */
2796 newstring = PyString_FromStringAndSize(NULL, byteslen);
2797 if (!newstring)
2798 return NULL;
2799 buf = PyString_AS_STRING(newstring);
2800 for (i = j = 0; i < hexlen; i += 2) {
2801 /* skip over spaces in the input */
2802 while (hex[i] == ' ')
2803 i++;
2804 if (i >= hexlen)
2805 break;
2806 top = hex_digit_to_int(hex[i]);
2807 bot = hex_digit_to_int(hex[i+1]);
2808 if (top == -1 || bot == -1) {
2809 PyErr_Format(PyExc_ValueError,
2810 "non-hexadecimal number found in "
2811 "fromhex() arg at position %zd", i);
2812 goto error;
2813 }
2814 buf[j++] = (top << 4) + bot;
2815 }
2816 if (_PyString_Resize(&newstring, j) < 0)
2817 goto error;
2818 return newstring;
2819
2820 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00002821 Py_XDECREF(newstring);
Guido van Rossumae404e22007-10-26 21:46:44 +00002822 return NULL;
2823}
2824
2825
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002826static PyObject *
2827string_getnewargs(PyStringObject *v)
2828{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002829 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002830}
2831
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002832
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002833static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002834string_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00002835 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002836 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2837 _Py_capitalize__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002838 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002839 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002840 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002841 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002842 endswith__doc__},
2843 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2844 expandtabs__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002845 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002846 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2847 fromhex_doc},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002848 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002849 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2850 _Py_isalnum__doc__},
2851 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2852 _Py_isalpha__doc__},
2853 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2854 _Py_isdigit__doc__},
2855 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2856 _Py_islower__doc__},
2857 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2858 _Py_isspace__doc__},
2859 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2860 _Py_istitle__doc__},
2861 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2862 _Py_isupper__doc__},
2863 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2864 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2865 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002866 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002867 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002868 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2869 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2870 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002871 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00002872 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2873 rpartition__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002874 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2875 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2876 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2877 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2878 splitlines__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002879 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002880 startswith__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002881 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002882 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2883 _Py_swapcase__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002884 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002885 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2886 translate__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002887 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002888 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002889 {NULL, NULL} /* sentinel */
2890};
2891
Jeremy Hylton938ace62002-07-17 16:30:39 +00002892static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002893str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2894
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002895static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002896string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002897{
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002898 PyObject *x = NULL, *it;
2899 PyObject *(*iternext)(PyObject *);
2900 const char *encoding = NULL;
2901 const char *errors = NULL;
2902 PyObject *new = NULL;
2903 Py_ssize_t i, size;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002904 static char *kwlist[] = {"source", "encoding", "errors", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00002905
Guido van Rossumae960af2001-08-30 03:11:59 +00002906 if (type != &PyString_Type)
2907 return str_subtype_new(type, args, kwds);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002908 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002909 &encoding, &errors))
Tim Peters6d6c1a32001-08-02 04:15:00 +00002910 return NULL;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002911 if (x == NULL) {
2912 if (encoding != NULL || errors != NULL) {
2913 PyErr_SetString(PyExc_TypeError,
2914 "encoding or errors without sequence "
2915 "argument");
2916 return NULL;
2917 }
Tim Peters6d6c1a32001-08-02 04:15:00 +00002918 return PyString_FromString("");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002919 }
2920
2921 if (PyUnicode_Check(x)) {
2922 /* Encode via the codec registry */
2923 if (encoding == NULL) {
2924 PyErr_SetString(PyExc_TypeError,
2925 "string argument without an encoding");
2926 return NULL;
2927 }
2928 new = PyCodec_Encode(x, encoding, errors);
2929 if (new == NULL)
2930 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002931 assert(PyString_Check(new));
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002932 return new;
2933 }
2934
2935 /* If it's not unicode, there can't be encoding or errors */
2936 if (encoding != NULL || errors != NULL) {
2937 PyErr_SetString(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002938 "encoding or errors without a string argument");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002939 return NULL;
2940 }
2941
Guido van Rossum98297ee2007-11-06 21:34:58 +00002942 /* Is it an int? */
2943 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2944 if (size == -1 && PyErr_Occurred()) {
2945 PyErr_Clear();
2946 }
2947 else {
2948 if (size < 0) {
2949 PyErr_SetString(PyExc_ValueError, "negative count");
2950 return NULL;
2951 }
2952 new = PyString_FromStringAndSize(NULL, size);
2953 if (new == NULL) {
2954 return NULL;
2955 }
2956 if (size > 0) {
2957 memset(((PyStringObject*)new)->ob_sval, 0, size);
2958 }
2959 return new;
2960 }
2961
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002962 /* Use the modern buffer interface */
2963 if (PyObject_CheckBuffer(x)) {
2964 Py_buffer view;
2965 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2966 return NULL;
2967 new = PyString_FromStringAndSize(NULL, view.len);
2968 if (!new)
2969 goto fail;
2970 // XXX(brett.cannon): Better way to get to internal buffer?
2971 if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
2972 &view, view.len, 'C') < 0)
2973 goto fail;
2974 PyObject_ReleaseBuffer(x, &view);
2975 return new;
2976 fail:
2977 Py_XDECREF(new);
2978 PyObject_ReleaseBuffer(x, &view);
2979 return NULL;
2980 }
2981
Guido van Rossum98297ee2007-11-06 21:34:58 +00002982 /* For iterator version, create a string object and resize as needed */
2983 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2984 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2985 input being a truly long iterator. */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002986 size = 64;
2987 new = PyString_FromStringAndSize(NULL, size);
2988 if (new == NULL)
2989 return NULL;
2990
2991 /* XXX Optimize this if the arguments is a list, tuple */
2992
2993 /* Get the iterator */
2994 it = PyObject_GetIter(x);
2995 if (it == NULL)
2996 goto error;
2997 // XXX(brett.cannon): No API for this?
2998 iternext = *Py_Type(it)->tp_iternext;
2999
3000 /* Run the iterator to exhaustion */
3001 for (i = 0; ; i++) {
3002 PyObject *item;
3003 Py_ssize_t value;
3004
3005 /* Get the next item */
3006 item = iternext(it);
3007 if (item == NULL) {
3008 if (PyErr_Occurred()) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003009 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
3010 goto error;
3011 PyErr_Clear();
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003012 }
3013 break;
3014 }
3015
3016 /* Interpret it as an int (__index__) */
3017 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3018 Py_DECREF(item);
3019 if (value == -1 && PyErr_Occurred())
3020 goto error;
3021
3022 /* Range check */
3023 if (value < 0 || value >= 256) {
3024 PyErr_SetString(PyExc_ValueError,
3025 "bytes must be in range(0, 256)");
3026 goto error;
3027 }
3028
3029 /* Append the byte */
3030 if (i >= size) {
3031 size *= 2;
3032 if (_PyString_Resize(&new, size) < 0)
3033 goto error;
3034 }
3035 ((PyStringObject *)new)->ob_sval[i] = value;
3036 }
3037 _PyString_Resize(&new, i);
3038
3039 /* Clean up and return success */
3040 Py_DECREF(it);
3041 return new;
3042
3043 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00003044 /* Error handling when new != NULL */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003045 Py_XDECREF(it);
3046 Py_DECREF(new);
3047 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003048}
3049
Guido van Rossumae960af2001-08-30 03:11:59 +00003050static PyObject *
3051str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3052{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003053 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003054 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003055
3056 assert(PyType_IsSubtype(type, &PyString_Type));
3057 tmp = string_new(&PyString_Type, args, kwds);
3058 if (tmp == NULL)
3059 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003060 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003061 n = PyString_GET_SIZE(tmp);
3062 pnew = type->tp_alloc(type, n);
3063 if (pnew != NULL) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003064 Py_MEMCPY(PyString_AS_STRING(pnew),
3065 PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003066 ((PyStringObject *)pnew)->ob_shash =
3067 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003068 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003069 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003070 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003071}
3072
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003073PyDoc_STRVAR(string_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00003074"bytes(iterable_of_ints) -> bytes.\n\
3075bytes(string, encoding[, errors]) -> bytes\n\
3076bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
3077bytes(memory_view) -> bytes.\n\
Tim Peters6d6c1a32001-08-02 04:15:00 +00003078\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003079Construct an immutable array of bytes from:\n\
3080 - an iterable yielding integers in range(256)\n\
3081 - a text string encoded using the specified encoding\n\
3082 - a bytes or a buffer object\n\
3083 - any object implementing the buffer API.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003084
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003085static PyObject *str_iter(PyObject *seq);
3086
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003087PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003088 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum98297ee2007-11-06 21:34:58 +00003089 "bytes",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003090 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003091 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003092 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003093 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003094 0, /* tp_getattr */
3095 0, /* tp_setattr */
3096 0, /* tp_compare */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003097 (reprfunc)string_repr, /* tp_repr */
3098 0, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003099 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003100 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003101 (hashfunc)string_hash, /* tp_hash */
3102 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003103 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003104 PyObject_GenericGetAttr, /* tp_getattro */
3105 0, /* tp_setattro */
3106 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003107 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3108 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003109 string_doc, /* tp_doc */
3110 0, /* tp_traverse */
3111 0, /* tp_clear */
3112 (richcmpfunc)string_richcompare, /* tp_richcompare */
3113 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003114 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003115 0, /* tp_iternext */
3116 string_methods, /* tp_methods */
3117 0, /* tp_members */
3118 0, /* tp_getset */
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003119 &PyBaseObject_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003120 0, /* tp_dict */
3121 0, /* tp_descr_get */
3122 0, /* tp_descr_set */
3123 0, /* tp_dictoffset */
3124 0, /* tp_init */
3125 0, /* tp_alloc */
3126 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003127 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003128};
3129
3130void
Fred Drakeba096332000-07-09 07:04:36 +00003131PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003132{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003133 register PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003134 assert(pv != NULL);
Guido van Rossum013142a1994-08-30 08:19:36 +00003135 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003136 return;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003137 if (w == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003138 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003139 *pv = NULL;
3140 return;
3141 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00003142 v = string_concat(*pv, w);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003143 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003144 *pv = v;
3145}
3146
Guido van Rossum013142a1994-08-30 08:19:36 +00003147void
Fred Drakeba096332000-07-09 07:04:36 +00003148PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003149{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003150 PyString_Concat(pv, w);
3151 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003152}
3153
3154
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003155/* The following function breaks the notion that strings are immutable:
3156 it changes the size of a string. We get away with this only if there
3157 is only one module referencing the object. You can also think of it
3158 as creating a new string object and destroying the old one, only
3159 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003160 already be known to some other part of the code...
3161 Note that if there's not enough memory to resize the string, the original
3162 string object at *pv is deallocated, *pv is set to NULL, an "out of
3163 memory" exception is set, and -1 is returned. Else (on success) 0 is
3164 returned, and the value in *pv may or may not be the same as on input.
3165 As always, an extra byte is allocated for a trailing \0 byte (newsize
3166 does *not* include that), and a trailing \0 byte is stored.
3167*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003168
3169int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003170_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003171{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003172 register PyObject *v;
3173 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003174 v = *pv;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003175 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003176 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003177 Py_DECREF(v);
3178 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003179 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003180 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003181 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003182 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003183 _Py_ForgetReference(v);
3184 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003185 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003186 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003187 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003188 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003189 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003190 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003191 _Py_NewReference(*pv);
3192 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003193 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003194 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003195 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003196 return 0;
3197}
Guido van Rossume5372401993-03-16 12:15:04 +00003198
Tim Peters38fd5b62000-09-21 05:43:11 +00003199/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3200 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3201 * Python's regular ints.
3202 * Return value: a new PyString*, or NULL if error.
3203 * . *pbuf is set to point into it,
3204 * *plen set to the # of chars following that.
3205 * Caller must decref it when done using pbuf.
3206 * The string starting at *pbuf is of the form
3207 * "-"? ("0x" | "0X")? digit+
3208 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003209 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003210 * There will be at least prec digits, zero-filled on the left if
3211 * necessary to get that many.
3212 * val object to be converted
3213 * flags bitmask of format flags; only F_ALT is looked at
3214 * prec minimum number of digits; 0-fill on left if needed
3215 * type a character in [duoxX]; u acts the same as d
3216 *
3217 * CAUTION: o, x and X conversions on regular ints can never
3218 * produce a '-' sign, but can for Python's unbounded ints.
3219 */
3220PyObject*
3221_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3222 char **pbuf, int *plen)
3223{
3224 PyObject *result = NULL;
3225 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003226 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003227 int sign; /* 1 if '-', else 0 */
3228 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003229 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003230 int numdigits; /* len == numnondigits + numdigits */
3231 int numnondigits = 0;
3232
Guido van Rossumddefaf32007-01-14 03:31:43 +00003233 /* Avoid exceeding SSIZE_T_MAX */
3234 if (prec > PY_SSIZE_T_MAX-3) {
3235 PyErr_SetString(PyExc_OverflowError,
3236 "precision too large");
3237 return NULL;
3238 }
3239
Tim Peters38fd5b62000-09-21 05:43:11 +00003240 switch (type) {
3241 case 'd':
3242 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00003243 /* Special-case boolean: we want 0/1 */
3244 if (PyBool_Check(val))
3245 result = PyNumber_ToBase(val, 10);
3246 else
3247 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00003248 break;
3249 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003250 numnondigits = 2;
3251 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00003252 break;
3253 case 'x':
3254 case 'X':
3255 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003256 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00003257 break;
3258 default:
3259 assert(!"'type' not in [duoxX]");
3260 }
3261 if (!result)
3262 return NULL;
3263
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00003264 buf = PyString_AsString(result);
3265 if (!buf) {
3266 Py_DECREF(result);
3267 return NULL;
3268 }
3269
Tim Peters38fd5b62000-09-21 05:43:11 +00003270 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003271 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003272 PyErr_BadInternalCall();
3273 return NULL;
3274 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00003275 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00003276 if (llen > INT_MAX) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003277 PyErr_SetString(PyExc_ValueError,
3278 "string too large in _PyString_FormatLong");
Martin v. Löwis725507b2006-03-07 12:08:51 +00003279 return NULL;
3280 }
3281 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003282 if (buf[len-1] == 'L') {
3283 --len;
3284 buf[len] = '\0';
3285 }
3286 sign = buf[0] == '-';
3287 numnondigits += sign;
3288 numdigits = len - numnondigits;
3289 assert(numdigits > 0);
3290
Tim Petersfff53252001-04-12 18:38:48 +00003291 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003292 if (((flags & F_ALT) == 0 &&
3293 (type == 'o' || type == 'x' || type == 'X'))) {
3294 assert(buf[sign] == '0');
3295 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
Guido van Rossum98297ee2007-11-06 21:34:58 +00003296 buf[sign+1] == 'o');
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003297 numnondigits -= 2;
3298 buf += 2;
3299 len -= 2;
3300 if (sign)
3301 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00003302 assert(len == numnondigits + numdigits);
3303 assert(numdigits > 0);
3304 }
3305
3306 /* Fill with leading zeroes to meet minimum width. */
3307 if (prec > numdigits) {
3308 PyObject *r1 = PyString_FromStringAndSize(NULL,
3309 numnondigits + prec);
3310 char *b1;
3311 if (!r1) {
3312 Py_DECREF(result);
3313 return NULL;
3314 }
3315 b1 = PyString_AS_STRING(r1);
3316 for (i = 0; i < numnondigits; ++i)
3317 *b1++ = *buf++;
3318 for (i = 0; i < prec - numdigits; i++)
3319 *b1++ = '0';
3320 for (i = 0; i < numdigits; i++)
3321 *b1++ = *buf++;
3322 *b1 = '\0';
3323 Py_DECREF(result);
3324 result = r1;
3325 buf = PyString_AS_STRING(result);
3326 len = numnondigits + prec;
3327 }
3328
3329 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003330 if (type == 'X') {
3331 /* Need to convert all lower case letters to upper case.
3332 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003333 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003334 if (buf[i] >= 'a' && buf[i] <= 'x')
3335 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003336 }
3337 *pbuf = buf;
3338 *plen = len;
3339 return result;
3340}
3341
Guido van Rossum8cf04761997-08-02 02:57:45 +00003342void
Fred Drakeba096332000-07-09 07:04:36 +00003343PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003344{
3345 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003346 for (i = 0; i < UCHAR_MAX + 1; i++) {
3347 Py_XDECREF(characters[i]);
3348 characters[i] = NULL;
3349 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003350 Py_XDECREF(nullstring);
3351 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003352}
Barry Warsawa903ad982001-02-23 16:40:48 +00003353
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003354/*********************** Str Iterator ****************************/
3355
3356typedef struct {
3357 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00003358 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003359 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
3360} striterobject;
3361
3362static void
3363striter_dealloc(striterobject *it)
3364{
3365 _PyObject_GC_UNTRACK(it);
3366 Py_XDECREF(it->it_seq);
3367 PyObject_GC_Del(it);
3368}
3369
3370static int
3371striter_traverse(striterobject *it, visitproc visit, void *arg)
3372{
3373 Py_VISIT(it->it_seq);
3374 return 0;
3375}
3376
3377static PyObject *
3378striter_next(striterobject *it)
3379{
3380 PyStringObject *seq;
3381 PyObject *item;
3382
3383 assert(it != NULL);
3384 seq = it->it_seq;
3385 if (seq == NULL)
3386 return NULL;
3387 assert(PyString_Check(seq));
3388
3389 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +00003390 item = PyInt_FromLong(
3391 (unsigned char)seq->ob_sval[it->it_index]);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003392 if (item != NULL)
3393 ++it->it_index;
3394 return item;
3395 }
3396
3397 Py_DECREF(seq);
3398 it->it_seq = NULL;
3399 return NULL;
3400}
3401
3402static PyObject *
3403striter_len(striterobject *it)
3404{
3405 Py_ssize_t len = 0;
3406 if (it->it_seq)
3407 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
3408 return PyInt_FromSsize_t(len);
3409}
3410
Guido van Rossum49d6b072006-08-17 21:11:47 +00003411PyDoc_STRVAR(length_hint_doc,
3412 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003413
3414static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00003415 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3416 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003417 {NULL, NULL} /* sentinel */
3418};
3419
3420PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003421 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum49d6b072006-08-17 21:11:47 +00003422 "striterator", /* tp_name */
3423 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003424 0, /* tp_itemsize */
3425 /* methods */
3426 (destructor)striter_dealloc, /* tp_dealloc */
3427 0, /* tp_print */
3428 0, /* tp_getattr */
3429 0, /* tp_setattr */
3430 0, /* tp_compare */
3431 0, /* tp_repr */
3432 0, /* tp_as_number */
3433 0, /* tp_as_sequence */
3434 0, /* tp_as_mapping */
3435 0, /* tp_hash */
3436 0, /* tp_call */
3437 0, /* tp_str */
3438 PyObject_GenericGetAttr, /* tp_getattro */
3439 0, /* tp_setattro */
3440 0, /* tp_as_buffer */
3441 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3442 0, /* tp_doc */
3443 (traverseproc)striter_traverse, /* tp_traverse */
3444 0, /* tp_clear */
3445 0, /* tp_richcompare */
3446 0, /* tp_weaklistoffset */
3447 PyObject_SelfIter, /* tp_iter */
3448 (iternextfunc)striter_next, /* tp_iternext */
3449 striter_methods, /* tp_methods */
3450 0,
3451};
3452
3453static PyObject *
3454str_iter(PyObject *seq)
3455{
3456 striterobject *it;
3457
3458 if (!PyString_Check(seq)) {
3459 PyErr_BadInternalCall();
3460 return NULL;
3461 }
3462 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
3463 if (it == NULL)
3464 return NULL;
3465 it->it_index = 0;
3466 Py_INCREF(seq);
3467 it->it_seq = (PyStringObject *)seq;
3468 _PyObject_GC_TRACK(it);
3469 return (PyObject *)it;
3470}