blob: ae2a977425b8cf01bdf3d29e4802fcc795c20b5c [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossum98297ee2007-11-06 21:34:58 +00003/* XXX This is now called 'bytes' as far as the user is concerned.
4 Many docstrings and error messages need to be cleaned up. */
5
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00006#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00008#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000011
Guido van Rossum98297ee2007-11-06 21:34:58 +000012static Py_ssize_t
13_getbuffer(PyObject *obj, Py_buffer *view)
14{
15 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
16
17 if (buffer == NULL || buffer->bf_getbuffer == NULL)
18 {
19 PyErr_Format(PyExc_TypeError,
20 "Type %.100s doesn't support the buffer API",
21 Py_Type(obj)->tp_name);
22 return -1;
23 }
24
25 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
26 return -1;
27 return view->len;
28}
29
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000030#ifdef COUNT_ALLOCS
31int null_strings, one_strings;
32#endif
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000035static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036
37/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000038 For both PyString_FromString() and PyString_FromStringAndSize(), the
39 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000040 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000041
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000042 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000043 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 For PyString_FromStringAndSize(), the parameter the parameter `str' is
46 either NULL or else points to a string containing at least `size' bytes.
47 For PyString_FromStringAndSize(), the string in the `str' parameter does
48 not have to be null-terminated. (Therefore it is safe to construct a
49 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
50 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
51 bytes (setting the last byte to the null terminating character) and you can
52 fill in the data yourself. If `str' is non-NULL then the resulting
53 PyString object must be treated as immutable and you must not fill in nor
54 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000055
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000056 The PyObject member `op->ob_size', which denotes the number of "extra
57 items" in a variable-size object, will contain the number of bytes
58 allocated for string data, not counting the null terminating character. It
59 is therefore equal to the equal to the `size' parameter (for
60 PyString_FromStringAndSize()) or the length of the string in the `str'
61 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000064PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065{
Tim Peters9e897f42001-05-09 07:37:07 +000066 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000067 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 if (size == 0 && (op = nullstring) != NULL) {
69#ifdef COUNT_ALLOCS
70 null_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 if (size == 1 && str != NULL &&
76 (op = characters[*str & UCHAR_MAX]) != NULL)
77 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078#ifdef COUNT_ALLOCS
79 one_strings++;
80#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 Py_INCREF(op);
82 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000084
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000085 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000086 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000089 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 op->ob_shash = -1;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000091 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000092 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000094 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 if (size == 0) {
96 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 } else if (size == 1 && str != NULL) {
99 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103}
104
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000106PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Tim Peters62de65b2001-12-06 20:29:32 +0000108 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000109 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000110
111 assert(str != NULL);
112 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000113 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000114 PyErr_SetString(PyExc_OverflowError,
115 "string is too long for a Python string");
116 return NULL;
117 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 if (size == 0 && (op = nullstring) != NULL) {
119#ifdef COUNT_ALLOCS
120 null_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
125 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
126#ifdef COUNT_ALLOCS
127 one_strings++;
128#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
130 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000133 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000134 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000140 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
145 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000149}
150
Barry Warsawdadace02001-08-24 18:32:06 +0000151PyObject *
152PyString_FromFormatV(const char *format, va_list vargs)
153{
Tim Petersc15c4f12001-10-02 21:32:07 +0000154 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000155 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000156 const char* f;
157 char *s;
158 PyObject* string;
159
Tim Petersc15c4f12001-10-02 21:32:07 +0000160#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000161 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000163#ifdef __va_copy
164 __va_copy(count, vargs);
165#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000166 count = vargs;
167#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000168#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000169 /* step 1: figure out how large a buffer we need */
170 for (f = format; *f; f++) {
171 if (*f == '%') {
172 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000173 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000174 ;
175
Thomas Wouters477c8d52006-05-27 19:21:47 +0000176 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
177 * they don't affect the amount of space we reserve.
178 */
179 if ((*f == 'l' || *f == 'z') &&
180 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000181 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000182
Barry Warsawdadace02001-08-24 18:32:06 +0000183 switch (*f) {
184 case 'c':
185 (void)va_arg(count, int);
186 /* fall through... */
187 case '%':
188 n++;
189 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000191 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000192 /* 20 bytes is enough to hold a 64-bit
193 integer. Decimal takes the most space.
194 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000195 n += 20;
196 break;
197 case 's':
198 s = va_arg(count, char*);
199 n += strlen(s);
200 break;
201 case 'p':
202 (void) va_arg(count, int);
203 /* maximum 64-bit pointer representation:
204 * 0xffffffffffffffff
205 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000206 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000207 */
208 n += 19;
209 break;
210 default:
211 /* if we stumble upon an unknown
212 formatting code, copy the rest of
213 the format string to the output
214 string. (we cannot just skip the
215 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000216 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000217 n += strlen(p);
218 goto expand;
219 }
220 } else
221 n++;
222 }
223 expand:
224 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000225 /* Since we've analyzed how much space we need for the worst case,
226 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000227 string = PyString_FromStringAndSize(NULL, n);
228 if (!string)
229 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000230
Barry Warsawdadace02001-08-24 18:32:06 +0000231 s = PyString_AsString(string);
232
233 for (f = format; *f; f++) {
234 if (*f == '%') {
235 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000236 Py_ssize_t i;
237 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000238 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000239 /* parse the width.precision part (we're only
240 interested in the precision value, if any) */
241 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000242 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000243 n = (n*10) + *f++ - '0';
244 if (*f == '.') {
245 f++;
246 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000247 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000248 n = (n*10) + *f++ - '0';
249 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000250 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000251 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000252 /* handle the long flag, but only for %ld and %lu.
253 others can be added when necessary. */
254 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000255 longflag = 1;
256 ++f;
257 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000258 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000259 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000260 size_tflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000271 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000272 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
273 va_arg(vargs, Py_ssize_t));
274 else
275 sprintf(s, "%d", va_arg(vargs, int));
276 s += strlen(s);
277 break;
278 case 'u':
279 if (longflag)
280 sprintf(s, "%lu",
281 va_arg(vargs, unsigned long));
282 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000283 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
284 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000285 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000286 sprintf(s, "%u",
287 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 s += strlen(s);
289 break;
290 case 'i':
291 sprintf(s, "%i", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'x':
295 sprintf(s, "%x", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 's':
299 p = va_arg(vargs, char*);
300 i = strlen(p);
301 if (n > 0 && i > n)
302 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000303 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000304 s += i;
305 break;
306 case 'p':
307 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000308 /* %p is ill-defined: ensure leading 0x. */
309 if (s[1] == 'X')
310 s[1] = 'x';
311 else if (s[1] != 'x') {
312 memmove(s+2, s, strlen(s)+1);
313 s[0] = '0';
314 s[1] = 'x';
315 }
Barry Warsawdadace02001-08-24 18:32:06 +0000316 s += strlen(s);
317 break;
318 case '%':
319 *s++ = '%';
320 break;
321 default:
322 strcpy(s, p);
323 s += strlen(s);
324 goto end;
325 }
326 } else
327 *s++ = *f;
328 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000329
Barry Warsawdadace02001-08-24 18:32:06 +0000330 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000332 return string;
333}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000334
Barry Warsawdadace02001-08-24 18:32:06 +0000335PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000336PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000337{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000338 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000339 va_list vargs;
340
341#ifdef HAVE_STDARG_PROTOTYPES
342 va_start(vargs, format);
343#else
344 va_start(vargs);
345#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 ret = PyString_FromFormatV(format, vargs);
347 va_end(vargs);
348 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000349}
350
Guido van Rossum234f9421993-06-17 12:35:49 +0000351static void
Fred Drakeba096332000-07-09 07:04:36 +0000352string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000353{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000354 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000355}
356
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000357/* Unescape a backslash-escaped string. If unicode is non-zero,
358 the string is a u-literal. If recode_encoding is non-zero,
359 the string is UTF-8 encoded and should be re-encoded in the
360 specified encoding. */
361
362PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000363 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000364 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000365 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000366 const char *recode_encoding)
367{
368 int c;
369 char *p, *buf;
370 const char *end;
371 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000372 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000373 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000374 if (v == NULL)
375 return NULL;
376 p = buf = PyString_AsString(v);
377 end = s + len;
378 while (s < end) {
379 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000380 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000381 if (recode_encoding && (*s & 0x80)) {
382 PyObject *u, *w;
383 char *r;
384 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000385 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000386 t = s;
387 /* Decode non-ASCII bytes as UTF-8. */
388 while (t < end && (*t & 0x80)) t++;
389 u = PyUnicode_DecodeUTF8(s, t - s, errors);
390 if(!u) goto failed;
391
392 /* Recode them in target encoding. */
393 w = PyUnicode_AsEncodedString(
394 u, recode_encoding, errors);
395 Py_DECREF(u);
396 if (!w) goto failed;
397
398 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000399 assert(PyString_Check(w));
400 r = PyString_AS_STRING(w);
401 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000402 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000403 p += rn;
404 Py_DECREF(w);
405 s = t;
406 } else {
407 *p++ = *s++;
408 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000409 continue;
410 }
411 s++;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000412 if (s==end) {
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000413 PyErr_SetString(PyExc_ValueError,
414 "Trailing \\ in string");
415 goto failed;
416 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000417 switch (*s++) {
418 /* XXX This assumes ASCII! */
419 case '\n': break;
420 case '\\': *p++ = '\\'; break;
421 case '\'': *p++ = '\''; break;
422 case '\"': *p++ = '\"'; break;
423 case 'b': *p++ = '\b'; break;
424 case 'f': *p++ = '\014'; break; /* FF */
425 case 't': *p++ = '\t'; break;
426 case 'n': *p++ = '\n'; break;
427 case 'r': *p++ = '\r'; break;
428 case 'v': *p++ = '\013'; break; /* VT */
429 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
430 case '0': case '1': case '2': case '3':
431 case '4': case '5': case '6': case '7':
432 c = s[-1] - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000433 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000434 c = (c<<3) + *s++ - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000435 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000436 c = (c<<3) + *s++ - '0';
437 }
438 *p++ = c;
439 break;
440 case 'x':
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000441 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000442 unsigned int x = 0;
443 c = Py_CHARMASK(*s);
444 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000445 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000446 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000447 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000448 x = 10 + c - 'a';
449 else
450 x = 10 + c - 'A';
451 x = x << 4;
452 c = Py_CHARMASK(*s);
453 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000454 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000455 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000456 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000457 x += 10 + c - 'a';
458 else
459 x += 10 + c - 'A';
460 *p++ = x;
461 break;
462 }
463 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000464 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000465 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000466 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000467 }
468 if (strcmp(errors, "replace") == 0) {
469 *p++ = '?';
470 } else if (strcmp(errors, "ignore") == 0)
471 /* do nothing */;
472 else {
473 PyErr_Format(PyExc_ValueError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000474 "decoding error; unknown "
475 "error handling code: %.400s",
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000476 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000477 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000478 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000479 default:
480 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000481 s--;
482 goto non_esc; /* an arbitry number of unescaped
483 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000484 }
485 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000486 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000487 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000488 return v;
489 failed:
490 Py_DECREF(v);
491 return NULL;
492}
493
Thomas Wouters477c8d52006-05-27 19:21:47 +0000494/* -------------------------------------------------------------------- */
495/* object api */
496
Martin v. Löwis18e16552006-02-15 17:27:45 +0000497static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000498string_getsize(register PyObject *op)
499{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000500 char *s;
501 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000502 if (PyString_AsStringAndSize(op, &s, &len))
503 return -1;
504 return len;
505}
506
507static /*const*/ char *
508string_getbuffer(register PyObject *op)
509{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000510 char *s;
511 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000512 if (PyString_AsStringAndSize(op, &s, &len))
513 return NULL;
514 return s;
515}
516
Martin v. Löwis18e16552006-02-15 17:27:45 +0000517Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000518PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000520 if (PyUnicode_Check(op)) {
521 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
522 if (!op)
523 return -1;
524 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000525 if (!PyString_Check(op))
526 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000527 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000528}
529
530/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000531PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000532{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000533 if (PyUnicode_Check(op)) {
534 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
535 if (!op)
536 return NULL;
537 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000538 if (!PyString_Check(op))
539 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000540 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000541}
542
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000543int
544PyString_AsStringAndSize(register PyObject *obj,
545 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000547{
548 if (s == NULL) {
549 PyErr_BadInternalCall();
550 return -1;
551 }
552
553 if (!PyString_Check(obj)) {
554 if (PyUnicode_Check(obj)) {
555 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
556 if (obj == NULL)
557 return -1;
558 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000559 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000560 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000561 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000562 "expected string, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000563 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000564 return -1;
565 }
566 }
567
568 *s = PyString_AS_STRING(obj);
569 if (len != NULL)
570 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000571 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000572 PyErr_SetString(PyExc_TypeError,
573 "expected string without null bytes");
574 return -1;
575 }
576 return 0;
577}
578
Thomas Wouters477c8d52006-05-27 19:21:47 +0000579/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000580/* Methods */
581
Thomas Wouters477c8d52006-05-27 19:21:47 +0000582#define STRINGLIB_CHAR char
583
584#define STRINGLIB_CMP memcmp
585#define STRINGLIB_LEN PyString_GET_SIZE
586#define STRINGLIB_NEW PyString_FromStringAndSize
587#define STRINGLIB_STR PyString_AS_STRING
Guido van Rossum98297ee2007-11-06 21:34:58 +0000588/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000589
590#define STRINGLIB_EMPTY nullstring
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000591#define STRINGLIB_CHECK_EXACT PyString_CheckExact
592#define STRINGLIB_MUTABLE 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000593
594#include "stringlib/fastsearch.h"
595
596#include "stringlib/count.h"
597#include "stringlib/find.h"
598#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000599#include "stringlib/ctype.h"
600#include "stringlib/transmogrify.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000601
602
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603PyObject *
604PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000605{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000606 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000607 register PyStringObject* op = (PyStringObject*) obj;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000608 Py_ssize_t length = Py_Size(op);
609 size_t newsize = 3 + 4 * length;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000610 PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000611 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000612 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000613 "bytes object is too large to make repr");
Guido van Rossum58da9312007-11-10 23:39:45 +0000614 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000615 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000616 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000617 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000618 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000619 }
620 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000621 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000622 register Py_UNICODE c;
623 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000624 int quote;
625
Guido van Rossum98297ee2007-11-06 21:34:58 +0000626 /* Figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000627 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000628 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000629 char *test, *start;
630 start = PyString_AS_STRING(op);
631 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000632 if (*test == '"') {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000633 quote = '\''; /* back to single */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000634 goto decided;
635 }
636 else if (*test == '\'')
637 quote = '"';
638 }
639 decided:
640 ;
641 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000642
Guido van Rossum98297ee2007-11-06 21:34:58 +0000643 *p++ = 'b', *p++ = quote;
644 for (i = 0; i < length; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000645 /* There's at least enough room for a hex escape
646 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000647 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000648 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000649 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000650 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000651 else if (c == '\t')
652 *p++ = '\\', *p++ = 't';
653 else if (c == '\n')
654 *p++ = '\\', *p++ = 'n';
655 else if (c == '\r')
656 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000657 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000658 *p++ = '\\';
659 *p++ = 'x';
660 *p++ = hexdigits[(c & 0xf0) >> 4];
661 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000662 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000663 else
664 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000665 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000666 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000667 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000668 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000669 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
670 Py_DECREF(v);
671 return NULL;
672 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000673 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000674 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000675}
676
Guido van Rossum189f1df2001-05-01 16:51:53 +0000677static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000678string_repr(PyObject *op)
679{
680 return PyString_Repr(op, 1);
681}
682
683static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000684string_str(PyObject *op)
Guido van Rossum189f1df2001-05-01 16:51:53 +0000685{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000686 if (Py_BytesWarningFlag) {
687 if (PyErr_WarnEx(PyExc_BytesWarning,
688 "str() on a bytes instance", 1))
689 return NULL;
Tim Petersc9933152001-10-16 20:18:24 +0000690 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000691 return string_repr(op);
Guido van Rossum189f1df2001-05-01 16:51:53 +0000692}
693
Martin v. Löwis18e16552006-02-15 17:27:45 +0000694static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000695string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000697 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000698}
699
Guido van Rossum98297ee2007-11-06 21:34:58 +0000700/* This is also used by PyString_Concat() */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000701static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000702string_concat(PyObject *a, PyObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000704 Py_ssize_t size;
705 Py_buffer va, vb;
706 PyObject *result = NULL;
707
708 va.len = -1;
709 vb.len = -1;
710 if (_getbuffer(a, &va) < 0 ||
711 _getbuffer(b, &vb) < 0) {
712 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
713 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
714 goto done;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000715 }
Guido van Rossumae404e22007-10-26 21:46:44 +0000716
Guido van Rossum98297ee2007-11-06 21:34:58 +0000717 /* Optimize end cases */
718 if (va.len == 0 && PyString_CheckExact(b)) {
719 result = b;
720 Py_INCREF(result);
721 goto done;
722 }
723 if (vb.len == 0 && PyString_CheckExact(a)) {
724 result = a;
725 Py_INCREF(result);
726 goto done;
727 }
728
729 size = va.len + vb.len;
730 if (size < 0) {
731 PyErr_NoMemory();
732 goto done;
733 }
734
735 result = PyString_FromStringAndSize(NULL, size);
736 if (result != NULL) {
737 memcpy(PyString_AS_STRING(result), va.buf, va.len);
738 memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
739 }
740
741 done:
742 if (va.len != -1)
743 PyObject_ReleaseBuffer(a, &va);
744 if (vb.len != -1)
745 PyObject_ReleaseBuffer(b, &vb);
746 return result;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747}
748
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000749static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000750string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000751{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000752 register Py_ssize_t i;
753 register Py_ssize_t j;
754 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000755 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000756 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000757 if (n < 0)
758 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000759 /* watch out for overflows: the size can overflow int,
760 * and the # of bytes needed can overflow size_t
761 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000762 size = Py_Size(a) * n;
763 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000764 PyErr_SetString(PyExc_OverflowError,
765 "repeated string is too long");
766 return NULL;
767 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000768 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000769 Py_INCREF(a);
770 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000771 }
Tim Peterse7c05322004-06-27 17:24:49 +0000772 nbytes = (size_t)size;
773 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000774 PyErr_SetString(PyExc_OverflowError,
775 "repeated string is too long");
776 return NULL;
777 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000778 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000779 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000780 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000781 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000782 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000783 op->ob_shash = -1;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000784 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000785 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000786 memset(op->ob_sval, a->ob_sval[0] , n);
787 return (PyObject *) op;
788 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000789 i = 0;
790 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000791 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
792 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000793 }
794 while (i < size) {
795 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000796 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000797 i += j;
798 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000799 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800}
801
Guido van Rossum9284a572000-03-07 15:53:43 +0000802static int
Guido van Rossum98297ee2007-11-06 21:34:58 +0000803string_contains(PyObject *self, PyObject *arg)
Guido van Rossum9284a572000-03-07 15:53:43 +0000804{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000805 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
806 if (ival == -1 && PyErr_Occurred()) {
807 Py_buffer varg;
808 int pos;
809 PyErr_Clear();
810 if (_getbuffer(arg, &varg) < 0)
811 return -1;
812 pos = stringlib_find(PyString_AS_STRING(self), Py_Size(self),
813 varg.buf, varg.len, 0);
814 PyObject_ReleaseBuffer(arg, &varg);
815 return pos >= 0;
816 }
817 if (ival < 0 || ival >= 256) {
818 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
819 return -1;
820 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000821
Guido van Rossum98297ee2007-11-06 21:34:58 +0000822 return memchr(PyString_AS_STRING(self), ival, Py_Size(self)) != NULL;
823}
824
825static PyObject *
826string_item(PyStringObject *a, register Py_ssize_t i)
827{
828 if (i < 0 || i >= Py_Size(a)) {
829 PyErr_SetString(PyExc_IndexError, "string index out of range");
830 return NULL;
831 }
832 return PyInt_FromLong((unsigned char)a->ob_sval[i]);
Guido van Rossum9284a572000-03-07 15:53:43 +0000833}
834
Martin v. Löwiscd353062001-05-24 16:56:35 +0000835static PyObject*
836string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000838 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000839 Py_ssize_t len_a, len_b;
840 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +0000841 PyObject *result;
842
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000843 /* Make sure both arguments are strings. */
844 if (!(PyString_Check(a) && PyString_Check(b))) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000845 if (Py_BytesWarningFlag && (op == Py_EQ) &&
846 (PyObject_IsInstance((PyObject*)a,
847 (PyObject*)&PyUnicode_Type) ||
848 PyObject_IsInstance((PyObject*)b,
849 (PyObject*)&PyUnicode_Type))) {
850 if (PyErr_WarnEx(PyExc_BytesWarning,
851 "Comparsion between bytes and string", 1))
852 return NULL;
853 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000854 result = Py_NotImplemented;
855 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000856 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000857 if (a == b) {
858 switch (op) {
859 case Py_EQ:case Py_LE:case Py_GE:
860 result = Py_True;
861 goto out;
862 case Py_NE:case Py_LT:case Py_GT:
863 result = Py_False;
864 goto out;
865 }
866 }
867 if (op == Py_EQ) {
868 /* Supporting Py_NE here as well does not save
869 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000870 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +0000871 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000872 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000873 result = Py_True;
874 } else {
875 result = Py_False;
876 }
877 goto out;
878 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000879 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +0000880 min_len = (len_a < len_b) ? len_a : len_b;
881 if (min_len > 0) {
882 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
883 if (c==0)
884 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +0000885 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +0000886 c = 0;
887 if (c == 0)
888 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
889 switch (op) {
890 case Py_LT: c = c < 0; break;
891 case Py_LE: c = c <= 0; break;
892 case Py_EQ: assert(0); break; /* unreachable */
893 case Py_NE: c = c != 0; break;
894 case Py_GT: c = c > 0; break;
895 case Py_GE: c = c >= 0; break;
896 default:
897 result = Py_NotImplemented;
898 goto out;
899 }
900 result = c ? Py_True : Py_False;
901 out:
902 Py_INCREF(result);
903 return result;
904}
905
906int
907_PyString_Eq(PyObject *o1, PyObject *o2)
908{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000909 PyStringObject *a = (PyStringObject*) o1;
910 PyStringObject *b = (PyStringObject*) o2;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000911 return Py_Size(a) == Py_Size(b)
912 && *a->ob_sval == *b->ob_sval
913 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000914}
915
Guido van Rossum9bfef441993-03-29 10:43:31 +0000916static long
Fred Drakeba096332000-07-09 07:04:36 +0000917string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000918{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000919 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000920 register unsigned char *p;
921 register long x;
922
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000923 if (a->ob_shash != -1)
924 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000925 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000926 p = (unsigned char *) a->ob_sval;
927 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000928 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000929 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000930 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +0000931 if (x == -1)
932 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000933 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000934 return x;
935}
936
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000937static PyObject*
938string_subscript(PyStringObject* self, PyObject* item)
939{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000940 if (PyIndex_Check(item)) {
941 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000942 if (i == -1 && PyErr_Occurred())
943 return NULL;
944 if (i < 0)
945 i += PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000946 if (i < 0 || i >= PyString_GET_SIZE(self)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +0000947 PyErr_SetString(PyExc_IndexError,
948 "string index out of range");
949 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000950 }
951 return PyInt_FromLong((unsigned char)self->ob_sval[i]);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000952 }
953 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000954 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000955 char* source_buf;
956 char* result_buf;
957 PyObject* result;
958
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000959 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000960 PyString_GET_SIZE(self),
961 &start, &stop, &step, &slicelength) < 0) {
962 return NULL;
963 }
964
965 if (slicelength <= 0) {
966 return PyString_FromStringAndSize("", 0);
967 }
Thomas Woutersed03b412007-08-28 21:37:11 +0000968 else if (start == 0 && step == 1 &&
969 slicelength == PyString_GET_SIZE(self) &&
970 PyString_CheckExact(self)) {
971 Py_INCREF(self);
972 return (PyObject *)self;
973 }
974 else if (step == 1) {
975 return PyString_FromStringAndSize(
976 PyString_AS_STRING(self) + start,
977 slicelength);
978 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000979 else {
980 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000981 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +0000982 if (result_buf == NULL)
983 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000984
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000985 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000986 cur += step, i++) {
987 result_buf[i] = source_buf[cur];
988 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000989
990 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000991 slicelength);
992 PyMem_Free(result_buf);
993 return result;
994 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000995 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000996 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000997 PyErr_Format(PyExc_TypeError,
998 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000999 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001000 return NULL;
1001 }
1002}
1003
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001004static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00001005string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001006{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001007 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self),
1008 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +00001009}
1010
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001011static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001012 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001013 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001014 (ssizeargfunc)string_repeat, /*sq_repeat*/
Guido van Rossum98297ee2007-11-06 21:34:58 +00001015 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +00001016 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001017 0, /*sq_ass_item*/
1018 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001019 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001020};
1021
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001022static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001023 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001024 (binaryfunc)string_subscript,
1025 0,
1026};
1027
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001028static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001029 (getbufferproc)string_buffer_getbuffer,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001030 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001031};
1032
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001033
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001034#define LEFTSTRIP 0
1035#define RIGHTSTRIP 1
1036#define BOTHSTRIP 2
1037
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001038/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001039static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1040
1041#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001042
Thomas Wouters477c8d52006-05-27 19:21:47 +00001043
1044/* Don't call if length < 2 */
1045#define Py_STRING_MATCH(target, offset, pattern, length) \
1046 (target[offset] == pattern[0] && \
1047 target[offset+length-1] == pattern[length-1] && \
1048 !memcmp(target+offset+1, pattern+1, length-2) )
1049
1050
1051/* Overallocate the initial list to reduce the number of reallocs for small
1052 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1053 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1054 text (roughly 11 words per line) and field delimited data (usually 1-10
1055 fields). For large strings the split algorithms are bandwidth limited
1056 so increasing the preallocation likely will not improve things.*/
1057
1058#define MAX_PREALLOC 12
1059
1060/* 5 splits gives 6 elements */
1061#define PREALLOC_SIZE(maxsplit) \
1062 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1063
Thomas Wouters477c8d52006-05-27 19:21:47 +00001064#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001065 str = PyString_FromStringAndSize((data) + (left), \
1066 (right) - (left)); \
1067 if (str == NULL) \
1068 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001069 if (count < MAX_PREALLOC) { \
1070 PyList_SET_ITEM(list, count, str); \
1071 } else { \
1072 if (PyList_Append(list, str)) { \
1073 Py_DECREF(str); \
1074 goto onError; \
1075 } \
1076 else \
1077 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001078 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001079 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001080
Thomas Wouters477c8d52006-05-27 19:21:47 +00001081/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001082#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001083
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001084#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1085#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1086#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1087#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001088
1089Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001090split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001091{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001092 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001093 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001094 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001095
1096 if (list == NULL)
1097 return NULL;
1098
Thomas Wouters477c8d52006-05-27 19:21:47 +00001099 i = j = 0;
1100
1101 while (maxsplit-- > 0) {
1102 SKIP_SPACE(s, i, len);
1103 if (i==len) break;
1104 j = i; i++;
1105 SKIP_NONSPACE(s, i, len);
1106 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001107 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001108
1109 if (i < len) {
1110 /* Only occurs when maxsplit was reached */
1111 /* Skip any remaining whitespace and copy to end of string */
1112 SKIP_SPACE(s, i, len);
1113 if (i != len)
1114 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001115 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001116 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001117 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001118 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001119 Py_DECREF(list);
1120 return NULL;
1121}
1122
Thomas Wouters477c8d52006-05-27 19:21:47 +00001123Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001124split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001125{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001126 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001127 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001128 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001129
1130 if (list == NULL)
1131 return NULL;
1132
Thomas Wouters477c8d52006-05-27 19:21:47 +00001133 i = j = 0;
1134 while ((j < len) && (maxcount-- > 0)) {
1135 for(; j<len; j++) {
1136 /* I found that using memchr makes no difference */
1137 if (s[j] == ch) {
1138 SPLIT_ADD(s, i, j);
1139 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001140 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001141 }
1142 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001143 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001144 if (i <= len) {
1145 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001146 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001147 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001148 return list;
1149
1150 onError:
1151 Py_DECREF(list);
1152 return NULL;
1153}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001154
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001155PyDoc_STRVAR(split__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001156"B.split([sep[, maxsplit]]) -> list of bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001157\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001158Return a list of the sections in B, using sep as the delimiter.\n\
1159If sep is not given, B is split on ASCII whitespace characters\n\
1160(space, tab, return, newline, formfeed, vertical tab).\n\
1161If maxsplit is given, at most maxsplit splits are done.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001162
1163static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001164string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001165{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001166 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001167 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001168 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001169 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001170 PyObject *list, *str, *subobj = Py_None;
1171#ifdef USE_FAST
1172 Py_ssize_t pos;
1173#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001174
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001175 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001176 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001177 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001178 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001179 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001180 return split_whitespace(s, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001181 if (_getbuffer(subobj, &vsub) < 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001182 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001183 sub = vsub.buf;
1184 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001185
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001186 if (n == 0) {
1187 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001188 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001189 return NULL;
1190 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001191 else if (n == 1) {
1192 char ch = sub[0];
1193 PyObject_ReleaseBuffer(subobj, &vsub);
1194 return split_char(s, len, ch, maxsplit);
1195 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001196
Thomas Wouters477c8d52006-05-27 19:21:47 +00001197 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001198 if (list == NULL) {
1199 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001200 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001201 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001202
Thomas Wouters477c8d52006-05-27 19:21:47 +00001203#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001204 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001205 while (maxsplit-- > 0) {
1206 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1207 if (pos < 0)
1208 break;
1209 j = i+pos;
1210 SPLIT_ADD(s, i, j);
1211 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001212 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001213#else
1214 i = j = 0;
1215 while ((j+n <= len) && (maxsplit-- > 0)) {
1216 for (; j+n <= len; j++) {
1217 if (Py_STRING_MATCH(s, j, sub, n)) {
1218 SPLIT_ADD(s, i, j);
1219 i = j = j + n;
1220 break;
1221 }
1222 }
1223 }
1224#endif
1225 SPLIT_ADD(s, i, len);
1226 FIX_PREALLOC_SIZE(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001227 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001228 return list;
1229
Thomas Wouters477c8d52006-05-27 19:21:47 +00001230 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001231 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001232 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001233 return NULL;
1234}
1235
Thomas Wouters477c8d52006-05-27 19:21:47 +00001236PyDoc_STRVAR(partition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001237"B.partition(sep) -> (head, sep, tail)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001238\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001239Searches for the separator sep in B, and returns the part before it,\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001240the separator itself, and the part after it. If the separator is not\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001241found, returns B and two empty bytes objects.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001242
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001243static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001244string_partition(PyStringObject *self, PyObject *sep_obj)
1245{
1246 const char *sep;
1247 Py_ssize_t sep_len;
1248
1249 if (PyString_Check(sep_obj)) {
1250 sep = PyString_AS_STRING(sep_obj);
1251 sep_len = PyString_GET_SIZE(sep_obj);
1252 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001253 else if (PyUnicode_Check(sep_obj))
1254 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001255 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1256 return NULL;
1257
1258 return stringlib_partition(
1259 (PyObject*) self,
1260 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1261 sep_obj, sep, sep_len
1262 );
1263}
1264
1265PyDoc_STRVAR(rpartition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001266"B.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001267\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001268Searches for the separator sep in B, starting at the end of B,\n\
1269and returns the part before it, the separator itself, and the\n\
1270part after it. If the separator is not found, returns two empty\n\
1271bytes objects and B.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001272
1273static PyObject *
1274string_rpartition(PyStringObject *self, PyObject *sep_obj)
1275{
1276 const char *sep;
1277 Py_ssize_t sep_len;
1278
1279 if (PyString_Check(sep_obj)) {
1280 sep = PyString_AS_STRING(sep_obj);
1281 sep_len = PyString_GET_SIZE(sep_obj);
1282 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001283 else if (PyUnicode_Check(sep_obj))
1284 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001285 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1286 return NULL;
1287
1288 return stringlib_rpartition(
1289 (PyObject*) self,
1290 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1291 sep_obj, sep, sep_len
1292 );
1293}
1294
1295Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001296rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001297{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001298 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001299 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001300 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001301
1302 if (list == NULL)
1303 return NULL;
1304
Thomas Wouters477c8d52006-05-27 19:21:47 +00001305 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001306
Thomas Wouters477c8d52006-05-27 19:21:47 +00001307 while (maxsplit-- > 0) {
1308 RSKIP_SPACE(s, i);
1309 if (i<0) break;
1310 j = i; i--;
1311 RSKIP_NONSPACE(s, i);
1312 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001313 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001314 if (i >= 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001315 /* Only occurs when maxsplit was reached. Skip any remaining
1316 whitespace and copy to beginning of string. */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001317 RSKIP_SPACE(s, i);
1318 if (i >= 0)
1319 SPLIT_ADD(s, 0, i + 1);
1320
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001321 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001322 FIX_PREALLOC_SIZE(list);
1323 if (PyList_Reverse(list) < 0)
1324 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001325 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001326 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001327 Py_DECREF(list);
1328 return NULL;
1329}
1330
Thomas Wouters477c8d52006-05-27 19:21:47 +00001331Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001332rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001333{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001334 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001335 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001336 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001337
1338 if (list == NULL)
1339 return NULL;
1340
Thomas Wouters477c8d52006-05-27 19:21:47 +00001341 i = j = len - 1;
1342 while ((i >= 0) && (maxcount-- > 0)) {
1343 for (; i >= 0; i--) {
1344 if (s[i] == ch) {
1345 SPLIT_ADD(s, i + 1, j + 1);
1346 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001347 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001348 }
1349 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001350 }
1351 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001352 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001353 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001354 FIX_PREALLOC_SIZE(list);
1355 if (PyList_Reverse(list) < 0)
1356 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001357 return list;
1358
1359 onError:
1360 Py_DECREF(list);
1361 return NULL;
1362}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001363
1364PyDoc_STRVAR(rsplit__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001365"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001366\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001367Return a list of the sections in B, using sep as the delimiter,\n\
1368starting at the end of B and working to the front.\n\
1369If sep is not given, B is split on ASCII whitespace characters\n\
1370(space, tab, return, newline, formfeed, vertical tab).\n\
1371If maxsplit is given, at most maxsplit splits are done.");
1372
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001373
1374static PyObject *
1375string_rsplit(PyStringObject *self, PyObject *args)
1376{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001377 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001378 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001379 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001380 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001381 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001382
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001383 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001384 return NULL;
1385 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001386 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001387 if (subobj == Py_None)
1388 return rsplit_whitespace(s, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001389 if (_getbuffer(subobj, &vsub) < 0)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001390 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001391 sub = vsub.buf;
1392 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001393
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001394 if (n == 0) {
1395 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001396 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001397 return NULL;
1398 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001399 else if (n == 1) {
1400 char ch = sub[0];
1401 PyObject_ReleaseBuffer(subobj, &vsub);
1402 return rsplit_char(s, len, ch, maxsplit);
1403 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001404
Thomas Wouters477c8d52006-05-27 19:21:47 +00001405 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001406 if (list == NULL) {
1407 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001408 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001409 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001410
1411 j = len;
1412 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001413
Thomas Wouters477c8d52006-05-27 19:21:47 +00001414 while ( (i >= 0) && (maxsplit-- > 0) ) {
1415 for (; i>=0; i--) {
1416 if (Py_STRING_MATCH(s, i, sub, n)) {
1417 SPLIT_ADD(s, i + n, j);
1418 j = i;
1419 i -= n;
1420 break;
1421 }
1422 }
1423 }
1424 SPLIT_ADD(s, 0, j);
1425 FIX_PREALLOC_SIZE(list);
1426 if (PyList_Reverse(list) < 0)
1427 goto onError;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001428 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001429 return list;
1430
Thomas Wouters477c8d52006-05-27 19:21:47 +00001431onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001432 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001433 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001434 return NULL;
1435}
1436
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001437#undef SPLIT_ADD
1438#undef MAX_PREALLOC
1439#undef PREALLOC_SIZE
1440
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001442PyDoc_STRVAR(join__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001443"B.join(iterable_of_bytes) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001444\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001445Concatenates any number of bytes objects, with B in between each pair.\n\
1446Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447
1448static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00001449string_join(PyObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450{
1451 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001452 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001453 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001455 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001456 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001457 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001458 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459
Tim Peters19fe14e2001-01-19 03:03:47 +00001460 seq = PySequence_Fast(orig, "");
1461 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001462 return NULL;
1463 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001464
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001465 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001466 if (seqlen == 0) {
1467 Py_DECREF(seq);
1468 return PyString_FromString("");
1469 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001471 item = PySequence_Fast_GET_ITEM(seq, 0);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001472 if (PyString_CheckExact(item)) {
Raymond Hettinger674f2412004-08-23 23:23:54 +00001473 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001474 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001475 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001476 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001477 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001478
Raymond Hettinger674f2412004-08-23 23:23:54 +00001479 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001480 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001481 * Do a pre-pass to figure out the total amount of space we'll
Guido van Rossum98297ee2007-11-06 21:34:58 +00001482 * need (sz), and see whether all argument are bytes.
Tim Peters19fe14e2001-01-19 03:03:47 +00001483 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001484 /* XXX Shouldn't we use _getbuffer() on these items instead? */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001485 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001486 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001487 item = PySequence_Fast_GET_ITEM(seq, i);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001488 if (!PyString_Check(item) && !PyBytes_Check(item)) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001489 PyErr_Format(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001490 "sequence item %zd: expected bytes,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001491 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001492 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001493 Py_DECREF(seq);
1494 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001495 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001496 sz += Py_Size(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001497 if (i != 0)
1498 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001499 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001500 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001501 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001502 Py_DECREF(seq);
1503 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001505 }
1506
1507 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001508 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001509 if (res == NULL) {
1510 Py_DECREF(seq);
1511 return NULL;
1512 }
1513
1514 /* Catenate everything. */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001515 /* I'm not worried about a PyBytes item growing because there's
1516 nowhere in this function where we release the GIL. */
Tim Peters19fe14e2001-01-19 03:03:47 +00001517 p = PyString_AS_STRING(res);
1518 for (i = 0; i < seqlen; ++i) {
1519 size_t n;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001520 char *q;
1521 if (i) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001522 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001523 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001524 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001525 item = PySequence_Fast_GET_ITEM(seq, i);
1526 n = Py_Size(item);
1527 if (PyString_Check(item))
1528 q = PyString_AS_STRING(item);
1529 else
1530 q = PyBytes_AS_STRING(item);
1531 Py_MEMCPY(p, q, n);
1532 p += n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001533 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001534
Jeremy Hylton49048292000-07-11 03:28:17 +00001535 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001536 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001537}
1538
Tim Peters52e155e2001-06-16 05:42:57 +00001539PyObject *
1540_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001541{
Tim Petersa7259592001-06-16 05:11:17 +00001542 assert(sep != NULL && PyString_Check(sep));
1543 assert(x != NULL);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001544 return string_join(sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001545}
1546
Thomas Wouters477c8d52006-05-27 19:21:47 +00001547Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001548string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001549{
1550 if (*end > len)
1551 *end = len;
1552 else if (*end < 0)
1553 *end += len;
1554 if (*end < 0)
1555 *end = 0;
1556 if (*start < 0)
1557 *start += len;
1558 if (*start < 0)
1559 *start = 0;
1560}
1561
Thomas Wouters477c8d52006-05-27 19:21:47 +00001562Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001563string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001565 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001566 const char *sub;
1567 Py_ssize_t sub_len;
1568 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes9cd17752007-11-18 19:35:23 +00001569 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570
Christian Heimes9cd17752007-11-18 19:35:23 +00001571 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1572 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001573 return -2;
Christian Heimes9cd17752007-11-18 19:35:23 +00001574 /* To support None in "start" and "end" arguments, meaning
1575 the same as if they were not passed.
1576 */
1577 if (obj_start != Py_None)
1578 if (!_PyEval_SliceIndex(obj_start, &start))
1579 return -2;
1580 if (obj_end != Py_None)
1581 if (!_PyEval_SliceIndex(obj_end, &end))
1582 return -2;
1583
Guido van Rossum4c08d552000-03-10 22:55:18 +00001584 if (PyString_Check(subobj)) {
1585 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001586 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001587 }
1588 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001589 return PyUnicode_Find(
1590 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001591 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001592 /* XXX - the "expected a character buffer object" is pretty
1593 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001594 return -2;
1595
Thomas Wouters477c8d52006-05-27 19:21:47 +00001596 if (dir > 0)
1597 return stringlib_find_slice(
1598 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1599 sub, sub_len, start, end);
1600 else
1601 return stringlib_rfind_slice(
1602 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1603 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604}
1605
1606
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001607PyDoc_STRVAR(find__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001608"B.find(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609\n\
1610Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001611such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612arguments start and end are interpreted as in slice notation.\n\
1613\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001614Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615
1616static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001617string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001619 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001620 if (result == -2)
1621 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001622 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623}
1624
1625
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001626PyDoc_STRVAR(index__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001627"B.index(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001629Like B.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630
1631static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001632string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001634 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635 if (result == -2)
1636 return NULL;
1637 if (result == -1) {
1638 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001639 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640 return NULL;
1641 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001642 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643}
1644
1645
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001646PyDoc_STRVAR(rfind__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001647"B.rfind(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001648\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001649Return the highest index in B where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001650such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651arguments start and end are interpreted as in slice notation.\n\
1652\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001653Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654
1655static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001656string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001657{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001658 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001659 if (result == -2)
1660 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001661 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001662}
1663
1664
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001665PyDoc_STRVAR(rindex__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001666"B.rindex(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001667\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001668Like B.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001669
1670static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001671string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001673 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001674 if (result == -2)
1675 return NULL;
1676 if (result == -1) {
1677 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001678 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001679 return NULL;
1680 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001681 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001682}
1683
1684
Thomas Wouters477c8d52006-05-27 19:21:47 +00001685Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001686do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1687{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001688 Py_buffer vsep;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001689 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001690 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001691 char *sep;
1692 Py_ssize_t seplen;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001693 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001694
Guido van Rossum98297ee2007-11-06 21:34:58 +00001695 if (_getbuffer(sepobj, &vsep) < 0)
1696 return NULL;
1697 sep = vsep.buf;
1698 seplen = vsep.len;
1699
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001700 i = 0;
1701 if (striptype != RIGHTSTRIP) {
1702 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1703 i++;
1704 }
1705 }
1706
1707 j = len;
1708 if (striptype != LEFTSTRIP) {
1709 do {
1710 j--;
1711 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1712 j++;
1713 }
1714
Guido van Rossum98297ee2007-11-06 21:34:58 +00001715 PyObject_ReleaseBuffer(sepobj, &vsep);
1716
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001717 if (i == 0 && j == len && PyString_CheckExact(self)) {
1718 Py_INCREF(self);
1719 return (PyObject*)self;
1720 }
1721 else
1722 return PyString_FromStringAndSize(s+i, j-i);
1723}
1724
1725
Thomas Wouters477c8d52006-05-27 19:21:47 +00001726Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001727do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001728{
1729 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001730 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001732 i = 0;
1733 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001734 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735 i++;
1736 }
1737 }
1738
1739 j = len;
1740 if (striptype != LEFTSTRIP) {
1741 do {
1742 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001743 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744 j++;
1745 }
1746
Tim Peters8fa5dd02001-09-12 02:18:30 +00001747 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748 Py_INCREF(self);
1749 return (PyObject*)self;
1750 }
1751 else
1752 return PyString_FromStringAndSize(s+i, j-i);
1753}
1754
1755
Thomas Wouters477c8d52006-05-27 19:21:47 +00001756Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001757do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1758{
1759 PyObject *sep = NULL;
1760
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001761 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001762 return NULL;
1763
1764 if (sep != NULL && sep != Py_None) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001765 return do_xstrip(self, striptype, sep);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001766 }
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001767 return do_strip(self, striptype);
1768}
1769
1770
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001771PyDoc_STRVAR(strip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001772"B.strip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001774Strip leading and trailing bytes contained in the argument.\n\
1775If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001777string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001779 if (PyTuple_GET_SIZE(args) == 0)
1780 return do_strip(self, BOTHSTRIP); /* Common case */
1781 else
1782 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001783}
1784
1785
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001786PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001787"B.lstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001788\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001789Strip leading bytes contained in the argument.\n\
1790If the argument is omitted, strip leading ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001792string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001794 if (PyTuple_GET_SIZE(args) == 0)
1795 return do_strip(self, LEFTSTRIP); /* Common case */
1796 else
1797 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001798}
1799
1800
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001801PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001802"B.rstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001804Strip trailing bytes contained in the argument.\n\
1805If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001806static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001807string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001809 if (PyTuple_GET_SIZE(args) == 0)
1810 return do_strip(self, RIGHTSTRIP); /* Common case */
1811 else
1812 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813}
1814
1815
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001816PyDoc_STRVAR(count__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001817"B.count(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001819Return the number of non-overlapping occurrences of substring sub in\n\
1820string S[start:end]. Optional arguments start and end are interpreted\n\
1821as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822
1823static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001824string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001826 PyObject *sub_obj;
1827 const char *str = PyString_AS_STRING(self), *sub;
1828 Py_ssize_t sub_len;
1829 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830
Thomas Wouters477c8d52006-05-27 19:21:47 +00001831 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1832 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001833 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001834
Thomas Wouters477c8d52006-05-27 19:21:47 +00001835 if (PyString_Check(sub_obj)) {
1836 sub = PyString_AS_STRING(sub_obj);
1837 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001838 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001839 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001840 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001841 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001842 if (count == -1)
1843 return NULL;
1844 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00001845 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001846 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001847 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001848 return NULL;
1849
Thomas Wouters477c8d52006-05-27 19:21:47 +00001850 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001851
Thomas Wouters477c8d52006-05-27 19:21:47 +00001852 return PyInt_FromSsize_t(
1853 stringlib_count(str + start, end - start, sub, sub_len)
1854 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001855}
1856
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001857
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001858PyDoc_STRVAR(translate__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001859"B.translate(table[, deletechars]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001860\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001861Return a copy of B, where all characters occurring in the\n\
1862optional argument deletechars are removed, and the remaining\n\
1863characters have been mapped through the given translation\n\
1864table, which must be a bytes object of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001865
1866static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001867string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001868{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001869 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001870 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001871 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001872 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001873 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001874 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001875 PyObject *result;
1876 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001877 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00001879 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001880 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001881 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001882
1883 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00001884 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001885 tablen = PyString_GET_SIZE(tableobj);
1886 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001887 else if (tableobj == Py_None) {
1888 table = NULL;
1889 tablen = 256;
1890 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001891 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001892 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001893 parameter; instead a mapping to None will cause characters
1894 to be deleted. */
1895 if (delobj != NULL) {
1896 PyErr_SetString(PyExc_TypeError,
1897 "deletions are implemented differently for unicode");
1898 return NULL;
1899 }
1900 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1901 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001902 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001904
Martin v. Löwis00b61272002-12-12 20:03:19 +00001905 if (tablen != 256) {
1906 PyErr_SetString(PyExc_ValueError,
1907 "translation table must be 256 characters long");
1908 return NULL;
1909 }
1910
Guido van Rossum4c08d552000-03-10 22:55:18 +00001911 if (delobj != NULL) {
1912 if (PyString_Check(delobj)) {
1913 del_table = PyString_AS_STRING(delobj);
1914 dellen = PyString_GET_SIZE(delobj);
1915 }
1916 else if (PyUnicode_Check(delobj)) {
1917 PyErr_SetString(PyExc_TypeError,
1918 "deletions are implemented differently for unicode");
1919 return NULL;
1920 }
1921 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1922 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001923 }
1924 else {
1925 del_table = NULL;
1926 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927 }
1928
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001929 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930 result = PyString_FromStringAndSize((char *)NULL, inlen);
1931 if (result == NULL)
1932 return NULL;
1933 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001934 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935
Guido van Rossumd8faa362007-04-27 19:54:29 +00001936 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937 /* If no deletions are required, use faster code */
1938 for (i = inlen; --i >= 0; ) {
1939 c = Py_CHARMASK(*input++);
1940 if (Py_CHARMASK((*output++ = table[c])) != c)
1941 changed = 1;
1942 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001943 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944 return result;
1945 Py_DECREF(result);
1946 Py_INCREF(input_obj);
1947 return input_obj;
1948 }
1949
Guido van Rossumd8faa362007-04-27 19:54:29 +00001950 if (table == NULL) {
1951 for (i = 0; i < 256; i++)
1952 trans_table[i] = Py_CHARMASK(i);
1953 } else {
1954 for (i = 0; i < 256; i++)
1955 trans_table[i] = Py_CHARMASK(table[i]);
1956 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957
1958 for (i = 0; i < dellen; i++)
1959 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1960
1961 for (i = inlen; --i >= 0; ) {
1962 c = Py_CHARMASK(*input++);
1963 if (trans_table[c] != -1)
1964 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1965 continue;
1966 changed = 1;
1967 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001968 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001969 Py_DECREF(result);
1970 Py_INCREF(input_obj);
1971 return input_obj;
1972 }
1973 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001974 if (inlen > 0)
1975 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976 return result;
1977}
1978
1979
Thomas Wouters477c8d52006-05-27 19:21:47 +00001980#define FORWARD 1
1981#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982
Thomas Wouters477c8d52006-05-27 19:21:47 +00001983/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984
Thomas Wouters477c8d52006-05-27 19:21:47 +00001985#define findchar(target, target_len, c) \
1986 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987
Thomas Wouters477c8d52006-05-27 19:21:47 +00001988/* String ops must return a string. */
1989/* If the object is subclass of string, create a copy */
1990Py_LOCAL(PyStringObject *)
1991return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001993 if (PyString_CheckExact(self)) {
1994 Py_INCREF(self);
1995 return self;
1996 }
1997 return (PyStringObject *)PyString_FromStringAndSize(
1998 PyString_AS_STRING(self),
1999 PyString_GET_SIZE(self));
2000}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001
Thomas Wouters477c8d52006-05-27 19:21:47 +00002002Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002003countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002004{
2005 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002006 const char *start=target;
2007 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008
Thomas Wouters477c8d52006-05-27 19:21:47 +00002009 while ( (start=findchar(start, end-start, c)) != NULL ) {
2010 count++;
2011 if (count >= maxcount)
2012 break;
2013 start += 1;
2014 }
2015 return count;
2016}
2017
2018Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002019findstring(const char *target, Py_ssize_t target_len,
2020 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002021 Py_ssize_t start,
2022 Py_ssize_t end,
2023 int direction)
2024{
2025 if (start < 0) {
2026 start += target_len;
2027 if (start < 0)
2028 start = 0;
2029 }
2030 if (end > target_len) {
2031 end = target_len;
2032 } else if (end < 0) {
2033 end += target_len;
2034 if (end < 0)
2035 end = 0;
2036 }
2037
2038 /* zero-length substrings always match at the first attempt */
2039 if (pattern_len == 0)
2040 return (direction > 0) ? start : end;
2041
2042 end -= pattern_len;
2043
2044 if (direction < 0) {
2045 for (; end >= start; end--)
2046 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2047 return end;
2048 } else {
2049 for (; start <= end; start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002050 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
Thomas Wouters477c8d52006-05-27 19:21:47 +00002051 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002052 }
2053 return -1;
2054}
2055
Thomas Wouters477c8d52006-05-27 19:21:47 +00002056Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002057countstring(const char *target, Py_ssize_t target_len,
2058 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002059 Py_ssize_t start,
2060 Py_ssize_t end,
2061 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002062{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002063 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064
Thomas Wouters477c8d52006-05-27 19:21:47 +00002065 if (start < 0) {
2066 start += target_len;
2067 if (start < 0)
2068 start = 0;
2069 }
2070 if (end > target_len) {
2071 end = target_len;
2072 } else if (end < 0) {
2073 end += target_len;
2074 if (end < 0)
2075 end = 0;
2076 }
2077
2078 /* zero-length substrings match everywhere */
2079 if (pattern_len == 0 || maxcount == 0) {
2080 if (target_len+1 < maxcount)
2081 return target_len+1;
2082 return maxcount;
2083 }
2084
2085 end -= pattern_len;
2086 if (direction < 0) {
2087 for (; (end >= start); end--)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002088 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002089 count++;
2090 if (--maxcount <= 0) break;
2091 end -= pattern_len-1;
2092 }
2093 } else {
2094 for (; (start <= end); start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002095 if (Py_STRING_MATCH(target, start,
2096 pattern, pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002097 count++;
2098 if (--maxcount <= 0)
2099 break;
2100 start += pattern_len-1;
2101 }
2102 }
2103 return count;
2104}
2105
2106
2107/* Algorithms for different cases of string replacement */
2108
2109/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2110Py_LOCAL(PyStringObject *)
2111replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002112 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002113 Py_ssize_t maxcount)
2114{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002115 char *self_s, *result_s;
2116 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002117 Py_ssize_t count, i, product;
2118 PyStringObject *result;
2119
2120 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002121
Thomas Wouters477c8d52006-05-27 19:21:47 +00002122 /* 1 at the end plus 1 after every character */
2123 count = self_len+1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002124 if (maxcount < count)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002125 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002126
Thomas Wouters477c8d52006-05-27 19:21:47 +00002127 /* Check for overflow */
2128 /* result_len = count * to_len + self_len; */
2129 product = count * to_len;
2130 if (product / to_len != count) {
2131 PyErr_SetString(PyExc_OverflowError,
2132 "replace string is too long");
2133 return NULL;
2134 }
2135 result_len = product + self_len;
2136 if (result_len < 0) {
2137 PyErr_SetString(PyExc_OverflowError,
2138 "replace string is too long");
2139 return NULL;
2140 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002141
Thomas Wouters477c8d52006-05-27 19:21:47 +00002142 if (! (result = (PyStringObject *)
2143 PyString_FromStringAndSize(NULL, result_len)) )
2144 return NULL;
2145
2146 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002147 result_s = PyString_AS_STRING(result);
2148
2149 /* TODO: special case single character, which doesn't need memcpy */
2150
2151 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002152 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002153 result_s += to_len;
2154 count -= 1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002155
Thomas Wouters477c8d52006-05-27 19:21:47 +00002156 for (i=0; i<count; i++) {
2157 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002158 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002159 result_s += to_len;
2160 }
2161
2162 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002163 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002164
2165 return result;
2166}
2167
2168/* Special case for deleting a single character */
2169/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2170Py_LOCAL(PyStringObject *)
2171replace_delete_single_character(PyStringObject *self,
2172 char from_c, Py_ssize_t maxcount)
2173{
2174 char *self_s, *result_s;
2175 char *start, *next, *end;
2176 Py_ssize_t self_len, result_len;
2177 Py_ssize_t count;
2178 PyStringObject *result;
2179
2180 self_len = PyString_GET_SIZE(self);
2181 self_s = PyString_AS_STRING(self);
2182
2183 count = countchar(self_s, self_len, from_c, maxcount);
2184 if (count == 0) {
2185 return return_self(self);
2186 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002187
Thomas Wouters477c8d52006-05-27 19:21:47 +00002188 result_len = self_len - count; /* from_len == 1 */
2189 assert(result_len>=0);
2190
2191 if ( (result = (PyStringObject *)
2192 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2193 return NULL;
2194 result_s = PyString_AS_STRING(result);
2195
2196 start = self_s;
2197 end = self_s + self_len;
2198 while (count-- > 0) {
2199 next = findchar(start, end-start, from_c);
2200 if (next == NULL)
2201 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002202 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002203 result_s += (next-start);
2204 start = next+1;
2205 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002206 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002207
Thomas Wouters477c8d52006-05-27 19:21:47 +00002208 return result;
2209}
2210
2211/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2212
2213Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002214replace_delete_substring(PyStringObject *self,
2215 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002216 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002217 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002218 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002219 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002220 Py_ssize_t count, offset;
2221 PyStringObject *result;
2222
2223 self_len = PyString_GET_SIZE(self);
2224 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002225
2226 count = countstring(self_s, self_len,
2227 from_s, from_len,
2228 0, self_len, 1,
2229 maxcount);
2230
2231 if (count == 0) {
2232 /* no matches */
2233 return return_self(self);
2234 }
2235
2236 result_len = self_len - (count * from_len);
2237 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002238
Thomas Wouters477c8d52006-05-27 19:21:47 +00002239 if ( (result = (PyStringObject *)
2240 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2241 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002242
Thomas Wouters477c8d52006-05-27 19:21:47 +00002243 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002244
Thomas Wouters477c8d52006-05-27 19:21:47 +00002245 start = self_s;
2246 end = self_s + self_len;
2247 while (count-- > 0) {
2248 offset = findstring(start, end-start,
2249 from_s, from_len,
2250 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251 if (offset == -1)
2252 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002253 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002254
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002255 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002256
Thomas Wouters477c8d52006-05-27 19:21:47 +00002257 result_s += (next-start);
2258 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002259 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002260 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002261 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002262}
2263
Thomas Wouters477c8d52006-05-27 19:21:47 +00002264/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2265Py_LOCAL(PyStringObject *)
2266replace_single_character_in_place(PyStringObject *self,
2267 char from_c, char to_c,
2268 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002269{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002270 char *self_s, *result_s, *start, *end, *next;
2271 Py_ssize_t self_len;
2272 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002273
Thomas Wouters477c8d52006-05-27 19:21:47 +00002274 /* The result string will be the same size */
2275 self_s = PyString_AS_STRING(self);
2276 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002277
Thomas Wouters477c8d52006-05-27 19:21:47 +00002278 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002279
Thomas Wouters477c8d52006-05-27 19:21:47 +00002280 if (next == NULL) {
2281 /* No matches; return the original string */
2282 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002283 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002284
Thomas Wouters477c8d52006-05-27 19:21:47 +00002285 /* Need to make a new string */
2286 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2287 if (result == NULL)
2288 return NULL;
2289 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002290 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002291
Thomas Wouters477c8d52006-05-27 19:21:47 +00002292 /* change everything in-place, starting with this one */
2293 start = result_s + (next-self_s);
2294 *start = to_c;
2295 start++;
2296 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002297
Thomas Wouters477c8d52006-05-27 19:21:47 +00002298 while (--maxcount > 0) {
2299 next = findchar(start, end-start, from_c);
2300 if (next == NULL)
2301 break;
2302 *next = to_c;
2303 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002304 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002305
Thomas Wouters477c8d52006-05-27 19:21:47 +00002306 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307}
2308
Thomas Wouters477c8d52006-05-27 19:21:47 +00002309/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2310Py_LOCAL(PyStringObject *)
2311replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002312 const char *from_s, Py_ssize_t from_len,
2313 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002314 Py_ssize_t maxcount)
2315{
2316 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002317 char *self_s;
2318 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002319 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002320
Thomas Wouters477c8d52006-05-27 19:21:47 +00002321 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002322
Thomas Wouters477c8d52006-05-27 19:21:47 +00002323 self_s = PyString_AS_STRING(self);
2324 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002325
Thomas Wouters477c8d52006-05-27 19:21:47 +00002326 offset = findstring(self_s, self_len,
2327 from_s, from_len,
2328 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002329 if (offset == -1) {
2330 /* No matches; return the original string */
2331 return return_self(self);
2332 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002333
Thomas Wouters477c8d52006-05-27 19:21:47 +00002334 /* Need to make a new string */
2335 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2336 if (result == NULL)
2337 return NULL;
2338 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002339 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002340
Thomas Wouters477c8d52006-05-27 19:21:47 +00002341 /* change everything in-place, starting with this one */
2342 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002343 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002344 start += from_len;
2345 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002346
Thomas Wouters477c8d52006-05-27 19:21:47 +00002347 while ( --maxcount > 0) {
2348 offset = findstring(start, end-start,
2349 from_s, from_len,
2350 0, end-start, FORWARD);
2351 if (offset==-1)
2352 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002353 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002354 start += offset+from_len;
2355 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002356
Thomas Wouters477c8d52006-05-27 19:21:47 +00002357 return result;
2358}
2359
2360/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2361Py_LOCAL(PyStringObject *)
2362replace_single_character(PyStringObject *self,
2363 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002364 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002365 Py_ssize_t maxcount)
2366{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002367 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002368 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002369 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002370 Py_ssize_t count, product;
2371 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002372
Thomas Wouters477c8d52006-05-27 19:21:47 +00002373 self_s = PyString_AS_STRING(self);
2374 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002375
Thomas Wouters477c8d52006-05-27 19:21:47 +00002376 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002377 if (count == 0) {
2378 /* no matches, return unchanged */
2379 return return_self(self);
2380 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002381
Thomas Wouters477c8d52006-05-27 19:21:47 +00002382 /* use the difference between current and new, hence the "-1" */
2383 /* result_len = self_len + count * (to_len-1) */
2384 product = count * (to_len-1);
2385 if (product / (to_len-1) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002386 PyErr_SetString(PyExc_OverflowError,
2387 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002388 return NULL;
2389 }
2390 result_len = self_len + product;
2391 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002392 PyErr_SetString(PyExc_OverflowError,
2393 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002394 return NULL;
2395 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002396
Thomas Wouters477c8d52006-05-27 19:21:47 +00002397 if ( (result = (PyStringObject *)
2398 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2399 return NULL;
2400 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002401
Thomas Wouters477c8d52006-05-27 19:21:47 +00002402 start = self_s;
2403 end = self_s + self_len;
2404 while (count-- > 0) {
2405 next = findchar(start, end-start, from_c);
Guido van Rossumae404e22007-10-26 21:46:44 +00002406 if (next == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002407 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002408
Thomas Wouters477c8d52006-05-27 19:21:47 +00002409 if (next == start) {
2410 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002411 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002412 result_s += to_len;
2413 start += 1;
2414 } else {
2415 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002416 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002417 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002418 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002419 result_s += to_len;
2420 start = next+1;
2421 }
2422 }
2423 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002424 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002425
Thomas Wouters477c8d52006-05-27 19:21:47 +00002426 return result;
2427}
2428
2429/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2430Py_LOCAL(PyStringObject *)
2431replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002432 const char *from_s, Py_ssize_t from_len,
2433 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002434 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002435 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002436 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002437 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002438 Py_ssize_t count, offset, product;
2439 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002440
Thomas Wouters477c8d52006-05-27 19:21:47 +00002441 self_s = PyString_AS_STRING(self);
2442 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002443
Thomas Wouters477c8d52006-05-27 19:21:47 +00002444 count = countstring(self_s, self_len,
2445 from_s, from_len,
2446 0, self_len, FORWARD, maxcount);
2447 if (count == 0) {
2448 /* no matches, return unchanged */
2449 return return_self(self);
2450 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002451
Thomas Wouters477c8d52006-05-27 19:21:47 +00002452 /* Check for overflow */
2453 /* result_len = self_len + count * (to_len-from_len) */
2454 product = count * (to_len-from_len);
2455 if (product / (to_len-from_len) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002456 PyErr_SetString(PyExc_OverflowError,
2457 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002458 return NULL;
2459 }
2460 result_len = self_len + product;
2461 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002462 PyErr_SetString(PyExc_OverflowError,
2463 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002464 return NULL;
2465 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002466
Thomas Wouters477c8d52006-05-27 19:21:47 +00002467 if ( (result = (PyStringObject *)
2468 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2469 return NULL;
2470 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002471
Thomas Wouters477c8d52006-05-27 19:21:47 +00002472 start = self_s;
2473 end = self_s + self_len;
2474 while (count-- > 0) {
2475 offset = findstring(start, end-start,
2476 from_s, from_len,
2477 0, end-start, FORWARD);
2478 if (offset == -1)
2479 break;
2480 next = start+offset;
2481 if (next == start) {
2482 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002483 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002484 result_s += to_len;
2485 start += from_len;
2486 } else {
2487 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002488 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002489 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002490 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002491 result_s += to_len;
2492 start = next+from_len;
2493 }
2494 }
2495 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002496 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002497
Thomas Wouters477c8d52006-05-27 19:21:47 +00002498 return result;
2499}
2500
2501
2502Py_LOCAL(PyStringObject *)
2503replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002504 const char *from_s, Py_ssize_t from_len,
2505 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002506 Py_ssize_t maxcount)
2507{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002508 if (maxcount < 0) {
2509 maxcount = PY_SSIZE_T_MAX;
2510 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2511 /* nothing to do; return the original string */
2512 return return_self(self);
2513 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002514
Thomas Wouters477c8d52006-05-27 19:21:47 +00002515 if (maxcount == 0 ||
2516 (from_len == 0 && to_len == 0)) {
2517 /* nothing to do; return the original string */
2518 return return_self(self);
2519 }
2520
2521 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002522
Thomas Wouters477c8d52006-05-27 19:21:47 +00002523 if (from_len == 0) {
2524 /* insert the 'to' string everywhere. */
2525 /* >>> "Python".replace("", ".") */
2526 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002527 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002528 }
2529
2530 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2531 /* point for an empty self string to generate a non-empty string */
2532 /* Special case so the remaining code always gets a non-empty string */
2533 if (PyString_GET_SIZE(self) == 0) {
2534 return return_self(self);
2535 }
2536
2537 if (to_len == 0) {
2538 /* delete all occurances of 'from' string */
2539 if (from_len == 1) {
2540 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002541 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002542 } else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002543 return replace_delete_substring(self, from_s,
2544 from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002545 }
2546 }
2547
2548 /* Handle special case where both strings have the same length */
2549
2550 if (from_len == to_len) {
2551 if (from_len == 1) {
2552 return replace_single_character_in_place(
2553 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002554 from_s[0],
2555 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002556 maxcount);
2557 } else {
2558 return replace_substring_in_place(
Guido van Rossum98297ee2007-11-06 21:34:58 +00002559 self, from_s, from_len, to_s, to_len,
2560 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002561 }
2562 }
2563
2564 /* Otherwise use the more generic algorithms */
2565 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002566 return replace_single_character(self, from_s[0],
2567 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002568 } else {
2569 /* len('from')>=2, len('to')>=1 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002570 return replace_substring(self, from_s, from_len, to_s, to_len,
2571 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002572 }
2573}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002574
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002575PyDoc_STRVAR(replace__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002576"B.replace(old, new[, count]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002577\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002578Return a copy of B with all occurrences of subsection\n\
Fred Draked22bb652003-10-22 02:56:40 +00002579old replaced by new. If the optional argument count is\n\
2580given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002581
2582static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002583string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002584{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002585 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002586 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002587 const char *from_s, *to_s;
2588 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002589
Thomas Wouters477c8d52006-05-27 19:21:47 +00002590 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002591 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002592
Thomas Wouters477c8d52006-05-27 19:21:47 +00002593 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002594 from_s = PyString_AS_STRING(from);
2595 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002596 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002597 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002598 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002599 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002600 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002601 return NULL;
2602
Thomas Wouters477c8d52006-05-27 19:21:47 +00002603 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002604 to_s = PyString_AS_STRING(to);
2605 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002606 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002607 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002608 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002609 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002610 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002611 return NULL;
2612
Thomas Wouters477c8d52006-05-27 19:21:47 +00002613 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002614 from_s, from_len,
2615 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002616}
2617
Thomas Wouters477c8d52006-05-27 19:21:47 +00002618/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002619
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002620/* Matches the end (direction >= 0) or start (direction < 0) of self
2621 * against substr, using the start and end arguments. Returns
2622 * -1 on error, 0 if not found and 1 if found.
2623 */
2624Py_LOCAL(int)
2625_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2626 Py_ssize_t end, int direction)
2627{
2628 Py_ssize_t len = PyString_GET_SIZE(self);
2629 Py_ssize_t slen;
2630 const char* sub;
2631 const char* str;
2632
2633 if (PyString_Check(substr)) {
2634 sub = PyString_AS_STRING(substr);
2635 slen = PyString_GET_SIZE(substr);
2636 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002637 else if (PyUnicode_Check(substr))
2638 return PyUnicode_Tailmatch((PyObject *)self,
2639 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002640 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2641 return -1;
2642 str = PyString_AS_STRING(self);
2643
2644 string_adjust_indices(&start, &end, len);
2645
2646 if (direction < 0) {
2647 /* startswith */
2648 if (start+slen > len)
2649 return 0;
2650 } else {
2651 /* endswith */
2652 if (end-start < slen || start > len)
2653 return 0;
2654
2655 if (end-slen > start)
2656 start = end - slen;
2657 }
2658 if (end-start >= slen)
2659 return ! memcmp(str+start, sub, slen);
2660 return 0;
2661}
2662
2663
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002664PyDoc_STRVAR(startswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002665"B.startswith(prefix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002666\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002667Return True if B starts with the specified prefix, False otherwise.\n\
2668With optional start, test B beginning at that position.\n\
2669With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002670prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002671
2672static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002673string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002674{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002675 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002676 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002677 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002678 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002679
Guido van Rossumc6821402000-05-08 14:08:05 +00002680 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2681 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002682 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002683 if (PyTuple_Check(subobj)) {
2684 Py_ssize_t i;
2685 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2686 result = _string_tailmatch(self,
2687 PyTuple_GET_ITEM(subobj, i),
2688 start, end, -1);
2689 if (result == -1)
2690 return NULL;
2691 else if (result) {
2692 Py_RETURN_TRUE;
2693 }
2694 }
2695 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002696 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002697 result = _string_tailmatch(self, subobj, start, end, -1);
2698 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002699 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002700 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002701 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002702}
2703
2704
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002705PyDoc_STRVAR(endswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002706"B.endswith(suffix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002707\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002708Return True if B ends with the specified suffix, False otherwise.\n\
2709With optional start, test B beginning at that position.\n\
2710With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002711suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002712
2713static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002714string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002715{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002716 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002717 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002718 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002719 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002720
Guido van Rossumc6821402000-05-08 14:08:05 +00002721 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2722 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002723 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002724 if (PyTuple_Check(subobj)) {
2725 Py_ssize_t i;
2726 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2727 result = _string_tailmatch(self,
2728 PyTuple_GET_ITEM(subobj, i),
2729 start, end, +1);
2730 if (result == -1)
2731 return NULL;
2732 else if (result) {
2733 Py_RETURN_TRUE;
2734 }
2735 }
2736 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002737 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002738 result = _string_tailmatch(self, subobj, start, end, +1);
2739 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002740 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002741 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002742 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002743}
2744
2745
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002746PyDoc_STRVAR(decode__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002747"B.decode([encoding[, errors]]) -> object\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002748\n\
2749Decodes S using the codec registered for encoding. encoding defaults\n\
2750to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002751handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2752a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002753as well as any other name registerd with codecs.register_error that is\n\
2754able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002755
2756static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00002757string_decode(PyObject *self, PyObject *args)
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002758{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002759 const char *encoding = NULL;
2760 const char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002761
Guido van Rossum98297ee2007-11-06 21:34:58 +00002762 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2763 return NULL;
2764 if (encoding == NULL)
2765 encoding = PyUnicode_GetDefaultEncoding();
2766 return PyCodec_Decode(self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002767}
2768
2769
Guido van Rossumae404e22007-10-26 21:46:44 +00002770PyDoc_STRVAR(fromhex_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002771"bytes.fromhex(string) -> bytes\n\
Guido van Rossumae404e22007-10-26 21:46:44 +00002772\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002773Create a bytes object from a string of hexadecimal numbers.\n\
2774Spaces between two numbers are accepted.\n\
2775Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Guido van Rossumae404e22007-10-26 21:46:44 +00002776
2777static int
2778hex_digit_to_int(Py_UNICODE c)
2779{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002780 if (c >= 128)
2781 return -1;
2782 if (ISDIGIT(c))
2783 return c - '0';
2784 else {
2785 if (ISUPPER(c))
2786 c = TOLOWER(c);
2787 if (c >= 'a' && c <= 'f')
2788 return c - 'a' + 10;
2789 }
2790 return -1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002791}
2792
2793static PyObject *
2794string_fromhex(PyObject *cls, PyObject *args)
2795{
2796 PyObject *newstring, *hexobj;
2797 char *buf;
2798 Py_UNICODE *hex;
2799 Py_ssize_t hexlen, byteslen, i, j;
2800 int top, bot;
2801
2802 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2803 return NULL;
2804 assert(PyUnicode_Check(hexobj));
2805 hexlen = PyUnicode_GET_SIZE(hexobj);
2806 hex = PyUnicode_AS_UNICODE(hexobj);
2807 byteslen = hexlen/2; /* This overestimates if there are spaces */
2808 newstring = PyString_FromStringAndSize(NULL, byteslen);
2809 if (!newstring)
2810 return NULL;
2811 buf = PyString_AS_STRING(newstring);
2812 for (i = j = 0; i < hexlen; i += 2) {
2813 /* skip over spaces in the input */
2814 while (hex[i] == ' ')
2815 i++;
2816 if (i >= hexlen)
2817 break;
2818 top = hex_digit_to_int(hex[i]);
2819 bot = hex_digit_to_int(hex[i+1]);
2820 if (top == -1 || bot == -1) {
2821 PyErr_Format(PyExc_ValueError,
2822 "non-hexadecimal number found in "
2823 "fromhex() arg at position %zd", i);
2824 goto error;
2825 }
2826 buf[j++] = (top << 4) + bot;
2827 }
2828 if (_PyString_Resize(&newstring, j) < 0)
2829 goto error;
2830 return newstring;
2831
2832 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00002833 Py_XDECREF(newstring);
Guido van Rossumae404e22007-10-26 21:46:44 +00002834 return NULL;
2835}
2836
2837
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002838static PyObject *
2839string_getnewargs(PyStringObject *v)
2840{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002841 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002842}
2843
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002844
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002845static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002846string_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00002847 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002848 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2849 _Py_capitalize__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002850 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002851 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002852 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002853 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002854 endswith__doc__},
2855 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2856 expandtabs__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002857 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002858 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2859 fromhex_doc},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002860 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002861 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2862 _Py_isalnum__doc__},
2863 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2864 _Py_isalpha__doc__},
2865 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2866 _Py_isdigit__doc__},
2867 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2868 _Py_islower__doc__},
2869 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2870 _Py_isspace__doc__},
2871 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2872 _Py_istitle__doc__},
2873 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2874 _Py_isupper__doc__},
2875 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2876 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2877 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002878 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002879 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002880 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2881 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2882 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002883 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00002884 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2885 rpartition__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002886 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2887 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2888 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2889 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2890 splitlines__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002891 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002892 startswith__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002893 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002894 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2895 _Py_swapcase__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002896 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002897 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2898 translate__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002899 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002900 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002901 {NULL, NULL} /* sentinel */
2902};
2903
Jeremy Hylton938ace62002-07-17 16:30:39 +00002904static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002905str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2906
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002907static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002908string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002909{
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002910 PyObject *x = NULL, *it;
2911 PyObject *(*iternext)(PyObject *);
2912 const char *encoding = NULL;
2913 const char *errors = NULL;
2914 PyObject *new = NULL;
2915 Py_ssize_t i, size;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002916 static char *kwlist[] = {"source", "encoding", "errors", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00002917
Guido van Rossumae960af2001-08-30 03:11:59 +00002918 if (type != &PyString_Type)
2919 return str_subtype_new(type, args, kwds);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002920 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002921 &encoding, &errors))
Tim Peters6d6c1a32001-08-02 04:15:00 +00002922 return NULL;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002923 if (x == NULL) {
2924 if (encoding != NULL || errors != NULL) {
2925 PyErr_SetString(PyExc_TypeError,
2926 "encoding or errors without sequence "
2927 "argument");
2928 return NULL;
2929 }
Tim Peters6d6c1a32001-08-02 04:15:00 +00002930 return PyString_FromString("");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002931 }
2932
2933 if (PyUnicode_Check(x)) {
2934 /* Encode via the codec registry */
2935 if (encoding == NULL) {
2936 PyErr_SetString(PyExc_TypeError,
2937 "string argument without an encoding");
2938 return NULL;
2939 }
2940 new = PyCodec_Encode(x, encoding, errors);
2941 if (new == NULL)
2942 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002943 assert(PyString_Check(new));
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002944 return new;
2945 }
2946
2947 /* If it's not unicode, there can't be encoding or errors */
2948 if (encoding != NULL || errors != NULL) {
2949 PyErr_SetString(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002950 "encoding or errors without a string argument");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002951 return NULL;
2952 }
2953
Guido van Rossum98297ee2007-11-06 21:34:58 +00002954 /* Is it an int? */
2955 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2956 if (size == -1 && PyErr_Occurred()) {
2957 PyErr_Clear();
2958 }
2959 else {
2960 if (size < 0) {
2961 PyErr_SetString(PyExc_ValueError, "negative count");
2962 return NULL;
2963 }
2964 new = PyString_FromStringAndSize(NULL, size);
2965 if (new == NULL) {
2966 return NULL;
2967 }
2968 if (size > 0) {
2969 memset(((PyStringObject*)new)->ob_sval, 0, size);
2970 }
2971 return new;
2972 }
2973
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002974 /* Use the modern buffer interface */
2975 if (PyObject_CheckBuffer(x)) {
2976 Py_buffer view;
2977 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2978 return NULL;
2979 new = PyString_FromStringAndSize(NULL, view.len);
2980 if (!new)
2981 goto fail;
2982 // XXX(brett.cannon): Better way to get to internal buffer?
2983 if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
2984 &view, view.len, 'C') < 0)
2985 goto fail;
2986 PyObject_ReleaseBuffer(x, &view);
2987 return new;
2988 fail:
2989 Py_XDECREF(new);
2990 PyObject_ReleaseBuffer(x, &view);
2991 return NULL;
2992 }
2993
Guido van Rossum98297ee2007-11-06 21:34:58 +00002994 /* For iterator version, create a string object and resize as needed */
2995 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2996 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2997 input being a truly long iterator. */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002998 size = 64;
2999 new = PyString_FromStringAndSize(NULL, size);
3000 if (new == NULL)
3001 return NULL;
3002
3003 /* XXX Optimize this if the arguments is a list, tuple */
3004
3005 /* Get the iterator */
3006 it = PyObject_GetIter(x);
3007 if (it == NULL)
3008 goto error;
3009 // XXX(brett.cannon): No API for this?
3010 iternext = *Py_Type(it)->tp_iternext;
3011
3012 /* Run the iterator to exhaustion */
3013 for (i = 0; ; i++) {
3014 PyObject *item;
3015 Py_ssize_t value;
3016
3017 /* Get the next item */
3018 item = iternext(it);
3019 if (item == NULL) {
3020 if (PyErr_Occurred()) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003021 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
3022 goto error;
3023 PyErr_Clear();
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003024 }
3025 break;
3026 }
3027
3028 /* Interpret it as an int (__index__) */
3029 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3030 Py_DECREF(item);
3031 if (value == -1 && PyErr_Occurred())
3032 goto error;
3033
3034 /* Range check */
3035 if (value < 0 || value >= 256) {
3036 PyErr_SetString(PyExc_ValueError,
3037 "bytes must be in range(0, 256)");
3038 goto error;
3039 }
3040
3041 /* Append the byte */
3042 if (i >= size) {
3043 size *= 2;
3044 if (_PyString_Resize(&new, size) < 0)
3045 goto error;
3046 }
3047 ((PyStringObject *)new)->ob_sval[i] = value;
3048 }
3049 _PyString_Resize(&new, i);
3050
3051 /* Clean up and return success */
3052 Py_DECREF(it);
3053 return new;
3054
3055 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00003056 /* Error handling when new != NULL */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003057 Py_XDECREF(it);
3058 Py_DECREF(new);
3059 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003060}
3061
Guido van Rossumae960af2001-08-30 03:11:59 +00003062static PyObject *
3063str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3064{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003065 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003066 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003067
3068 assert(PyType_IsSubtype(type, &PyString_Type));
3069 tmp = string_new(&PyString_Type, args, kwds);
3070 if (tmp == NULL)
3071 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003072 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003073 n = PyString_GET_SIZE(tmp);
3074 pnew = type->tp_alloc(type, n);
3075 if (pnew != NULL) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003076 Py_MEMCPY(PyString_AS_STRING(pnew),
3077 PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003078 ((PyStringObject *)pnew)->ob_shash =
3079 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003080 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003081 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003082 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003083}
3084
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003085PyDoc_STRVAR(string_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00003086"bytes(iterable_of_ints) -> bytes.\n\
3087bytes(string, encoding[, errors]) -> bytes\n\
3088bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
3089bytes(memory_view) -> bytes.\n\
Tim Peters6d6c1a32001-08-02 04:15:00 +00003090\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003091Construct an immutable array of bytes from:\n\
3092 - an iterable yielding integers in range(256)\n\
3093 - a text string encoded using the specified encoding\n\
3094 - a bytes or a buffer object\n\
3095 - any object implementing the buffer API.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003096
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003097static PyObject *str_iter(PyObject *seq);
3098
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003099PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003100 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum98297ee2007-11-06 21:34:58 +00003101 "bytes",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003102 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003103 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003104 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003105 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003106 0, /* tp_getattr */
3107 0, /* tp_setattr */
3108 0, /* tp_compare */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003109 (reprfunc)string_repr, /* tp_repr */
3110 0, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003111 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003112 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003113 (hashfunc)string_hash, /* tp_hash */
3114 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003115 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003116 PyObject_GenericGetAttr, /* tp_getattro */
3117 0, /* tp_setattro */
3118 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003119 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3120 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003121 string_doc, /* tp_doc */
3122 0, /* tp_traverse */
3123 0, /* tp_clear */
3124 (richcmpfunc)string_richcompare, /* tp_richcompare */
3125 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003126 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003127 0, /* tp_iternext */
3128 string_methods, /* tp_methods */
3129 0, /* tp_members */
3130 0, /* tp_getset */
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003131 &PyBaseObject_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003132 0, /* tp_dict */
3133 0, /* tp_descr_get */
3134 0, /* tp_descr_set */
3135 0, /* tp_dictoffset */
3136 0, /* tp_init */
3137 0, /* tp_alloc */
3138 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003139 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003140};
3141
3142void
Fred Drakeba096332000-07-09 07:04:36 +00003143PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003144{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003145 register PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003146 assert(pv != NULL);
Guido van Rossum013142a1994-08-30 08:19:36 +00003147 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003148 return;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003149 if (w == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003150 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003151 *pv = NULL;
3152 return;
3153 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00003154 v = string_concat(*pv, w);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003155 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003156 *pv = v;
3157}
3158
Guido van Rossum013142a1994-08-30 08:19:36 +00003159void
Fred Drakeba096332000-07-09 07:04:36 +00003160PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003161{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003162 PyString_Concat(pv, w);
3163 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003164}
3165
3166
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003167/* The following function breaks the notion that strings are immutable:
3168 it changes the size of a string. We get away with this only if there
3169 is only one module referencing the object. You can also think of it
3170 as creating a new string object and destroying the old one, only
3171 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003172 already be known to some other part of the code...
3173 Note that if there's not enough memory to resize the string, the original
3174 string object at *pv is deallocated, *pv is set to NULL, an "out of
3175 memory" exception is set, and -1 is returned. Else (on success) 0 is
3176 returned, and the value in *pv may or may not be the same as on input.
3177 As always, an extra byte is allocated for a trailing \0 byte (newsize
3178 does *not* include that), and a trailing \0 byte is stored.
3179*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003180
3181int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003182_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003183{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003184 register PyObject *v;
3185 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003186 v = *pv;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003187 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003188 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003189 Py_DECREF(v);
3190 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003191 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003192 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003193 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003194 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003195 _Py_ForgetReference(v);
3196 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003197 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003198 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003199 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003200 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003201 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003202 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003203 _Py_NewReference(*pv);
3204 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003205 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003206 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003207 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003208 return 0;
3209}
Guido van Rossume5372401993-03-16 12:15:04 +00003210
Tim Peters38fd5b62000-09-21 05:43:11 +00003211/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3212 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3213 * Python's regular ints.
3214 * Return value: a new PyString*, or NULL if error.
3215 * . *pbuf is set to point into it,
3216 * *plen set to the # of chars following that.
3217 * Caller must decref it when done using pbuf.
3218 * The string starting at *pbuf is of the form
3219 * "-"? ("0x" | "0X")? digit+
3220 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003221 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003222 * There will be at least prec digits, zero-filled on the left if
3223 * necessary to get that many.
3224 * val object to be converted
3225 * flags bitmask of format flags; only F_ALT is looked at
3226 * prec minimum number of digits; 0-fill on left if needed
3227 * type a character in [duoxX]; u acts the same as d
3228 *
3229 * CAUTION: o, x and X conversions on regular ints can never
3230 * produce a '-' sign, but can for Python's unbounded ints.
3231 */
3232PyObject*
3233_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3234 char **pbuf, int *plen)
3235{
3236 PyObject *result = NULL;
3237 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003238 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003239 int sign; /* 1 if '-', else 0 */
3240 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003241 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003242 int numdigits; /* len == numnondigits + numdigits */
3243 int numnondigits = 0;
3244
Guido van Rossumddefaf32007-01-14 03:31:43 +00003245 /* Avoid exceeding SSIZE_T_MAX */
3246 if (prec > PY_SSIZE_T_MAX-3) {
3247 PyErr_SetString(PyExc_OverflowError,
3248 "precision too large");
3249 return NULL;
3250 }
3251
Tim Peters38fd5b62000-09-21 05:43:11 +00003252 switch (type) {
3253 case 'd':
3254 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00003255 /* Special-case boolean: we want 0/1 */
3256 if (PyBool_Check(val))
3257 result = PyNumber_ToBase(val, 10);
3258 else
3259 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00003260 break;
3261 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003262 numnondigits = 2;
3263 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00003264 break;
3265 case 'x':
3266 case 'X':
3267 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003268 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00003269 break;
3270 default:
3271 assert(!"'type' not in [duoxX]");
3272 }
3273 if (!result)
3274 return NULL;
3275
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00003276 buf = PyString_AsString(result);
3277 if (!buf) {
3278 Py_DECREF(result);
3279 return NULL;
3280 }
3281
Tim Peters38fd5b62000-09-21 05:43:11 +00003282 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003283 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003284 PyErr_BadInternalCall();
3285 return NULL;
3286 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00003287 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00003288 if (llen > INT_MAX) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003289 PyErr_SetString(PyExc_ValueError,
3290 "string too large in _PyString_FormatLong");
Martin v. Löwis725507b2006-03-07 12:08:51 +00003291 return NULL;
3292 }
3293 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003294 if (buf[len-1] == 'L') {
3295 --len;
3296 buf[len] = '\0';
3297 }
3298 sign = buf[0] == '-';
3299 numnondigits += sign;
3300 numdigits = len - numnondigits;
3301 assert(numdigits > 0);
3302
Tim Petersfff53252001-04-12 18:38:48 +00003303 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003304 if (((flags & F_ALT) == 0 &&
3305 (type == 'o' || type == 'x' || type == 'X'))) {
3306 assert(buf[sign] == '0');
3307 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
Guido van Rossum98297ee2007-11-06 21:34:58 +00003308 buf[sign+1] == 'o');
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003309 numnondigits -= 2;
3310 buf += 2;
3311 len -= 2;
3312 if (sign)
3313 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00003314 assert(len == numnondigits + numdigits);
3315 assert(numdigits > 0);
3316 }
3317
3318 /* Fill with leading zeroes to meet minimum width. */
3319 if (prec > numdigits) {
3320 PyObject *r1 = PyString_FromStringAndSize(NULL,
3321 numnondigits + prec);
3322 char *b1;
3323 if (!r1) {
3324 Py_DECREF(result);
3325 return NULL;
3326 }
3327 b1 = PyString_AS_STRING(r1);
3328 for (i = 0; i < numnondigits; ++i)
3329 *b1++ = *buf++;
3330 for (i = 0; i < prec - numdigits; i++)
3331 *b1++ = '0';
3332 for (i = 0; i < numdigits; i++)
3333 *b1++ = *buf++;
3334 *b1 = '\0';
3335 Py_DECREF(result);
3336 result = r1;
3337 buf = PyString_AS_STRING(result);
3338 len = numnondigits + prec;
3339 }
3340
3341 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003342 if (type == 'X') {
3343 /* Need to convert all lower case letters to upper case.
3344 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003345 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003346 if (buf[i] >= 'a' && buf[i] <= 'x')
3347 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003348 }
3349 *pbuf = buf;
3350 *plen = len;
3351 return result;
3352}
3353
Guido van Rossum8cf04761997-08-02 02:57:45 +00003354void
Fred Drakeba096332000-07-09 07:04:36 +00003355PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003356{
3357 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003358 for (i = 0; i < UCHAR_MAX + 1; i++) {
3359 Py_XDECREF(characters[i]);
3360 characters[i] = NULL;
3361 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003362 Py_XDECREF(nullstring);
3363 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003364}
Barry Warsawa903ad982001-02-23 16:40:48 +00003365
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003366/*********************** Str Iterator ****************************/
3367
3368typedef struct {
3369 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00003370 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003371 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
3372} striterobject;
3373
3374static void
3375striter_dealloc(striterobject *it)
3376{
3377 _PyObject_GC_UNTRACK(it);
3378 Py_XDECREF(it->it_seq);
3379 PyObject_GC_Del(it);
3380}
3381
3382static int
3383striter_traverse(striterobject *it, visitproc visit, void *arg)
3384{
3385 Py_VISIT(it->it_seq);
3386 return 0;
3387}
3388
3389static PyObject *
3390striter_next(striterobject *it)
3391{
3392 PyStringObject *seq;
3393 PyObject *item;
3394
3395 assert(it != NULL);
3396 seq = it->it_seq;
3397 if (seq == NULL)
3398 return NULL;
3399 assert(PyString_Check(seq));
3400
3401 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +00003402 item = PyInt_FromLong(
3403 (unsigned char)seq->ob_sval[it->it_index]);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003404 if (item != NULL)
3405 ++it->it_index;
3406 return item;
3407 }
3408
3409 Py_DECREF(seq);
3410 it->it_seq = NULL;
3411 return NULL;
3412}
3413
3414static PyObject *
3415striter_len(striterobject *it)
3416{
3417 Py_ssize_t len = 0;
3418 if (it->it_seq)
3419 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
3420 return PyInt_FromSsize_t(len);
3421}
3422
Guido van Rossum49d6b072006-08-17 21:11:47 +00003423PyDoc_STRVAR(length_hint_doc,
3424 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003425
3426static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00003427 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3428 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003429 {NULL, NULL} /* sentinel */
3430};
3431
3432PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003433 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum254348e2007-11-21 19:29:53 +00003434 "bytes_iterator", /* tp_name */
Guido van Rossum49d6b072006-08-17 21:11:47 +00003435 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003436 0, /* tp_itemsize */
3437 /* methods */
3438 (destructor)striter_dealloc, /* tp_dealloc */
3439 0, /* tp_print */
3440 0, /* tp_getattr */
3441 0, /* tp_setattr */
3442 0, /* tp_compare */
3443 0, /* tp_repr */
3444 0, /* tp_as_number */
3445 0, /* tp_as_sequence */
3446 0, /* tp_as_mapping */
3447 0, /* tp_hash */
3448 0, /* tp_call */
3449 0, /* tp_str */
3450 PyObject_GenericGetAttr, /* tp_getattro */
3451 0, /* tp_setattro */
3452 0, /* tp_as_buffer */
3453 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3454 0, /* tp_doc */
3455 (traverseproc)striter_traverse, /* tp_traverse */
3456 0, /* tp_clear */
3457 0, /* tp_richcompare */
3458 0, /* tp_weaklistoffset */
3459 PyObject_SelfIter, /* tp_iter */
3460 (iternextfunc)striter_next, /* tp_iternext */
3461 striter_methods, /* tp_methods */
3462 0,
3463};
3464
3465static PyObject *
3466str_iter(PyObject *seq)
3467{
3468 striterobject *it;
3469
3470 if (!PyString_Check(seq)) {
3471 PyErr_BadInternalCall();
3472 return NULL;
3473 }
3474 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
3475 if (it == NULL)
3476 return NULL;
3477 it->it_index = 0;
3478 Py_INCREF(seq);
3479 it->it_seq = (PyStringObject *)seq;
3480 _PyObject_GC_TRACK(it);
3481 return (PyObject *)it;
3482}