blob: 887b28a70ba5a7c28566bccd1c86858a4484d0d1 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossum98297ee2007-11-06 21:34:58 +00003/* XXX This is now called 'bytes' as far as the user is concerned.
4 Many docstrings and error messages need to be cleaned up. */
5
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00006#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00008#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000011
Guido van Rossum98297ee2007-11-06 21:34:58 +000012static Py_ssize_t
13_getbuffer(PyObject *obj, Py_buffer *view)
14{
15 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
16
17 if (buffer == NULL || buffer->bf_getbuffer == NULL)
18 {
19 PyErr_Format(PyExc_TypeError,
20 "Type %.100s doesn't support the buffer API",
21 Py_Type(obj)->tp_name);
22 return -1;
23 }
24
25 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
26 return -1;
27 return view->len;
28}
29
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000030#ifdef COUNT_ALLOCS
31int null_strings, one_strings;
32#endif
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000035static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036
37/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000038 For both PyString_FromString() and PyString_FromStringAndSize(), the
39 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000040 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000041
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000042 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000043 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 For PyString_FromStringAndSize(), the parameter the parameter `str' is
46 either NULL or else points to a string containing at least `size' bytes.
47 For PyString_FromStringAndSize(), the string in the `str' parameter does
48 not have to be null-terminated. (Therefore it is safe to construct a
49 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
50 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
51 bytes (setting the last byte to the null terminating character) and you can
52 fill in the data yourself. If `str' is non-NULL then the resulting
53 PyString object must be treated as immutable and you must not fill in nor
54 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000055
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000056 The PyObject member `op->ob_size', which denotes the number of "extra
57 items" in a variable-size object, will contain the number of bytes
58 allocated for string data, not counting the null terminating character. It
59 is therefore equal to the equal to the `size' parameter (for
60 PyString_FromStringAndSize()) or the length of the string in the `str'
61 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000064PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065{
Tim Peters9e897f42001-05-09 07:37:07 +000066 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000067 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 if (size == 0 && (op = nullstring) != NULL) {
69#ifdef COUNT_ALLOCS
70 null_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 if (size == 1 && str != NULL &&
76 (op = characters[*str & UCHAR_MAX]) != NULL)
77 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078#ifdef COUNT_ALLOCS
79 one_strings++;
80#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 Py_INCREF(op);
82 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000084
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000085 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000086 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000089 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 op->ob_shash = -1;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000091 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000092 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000094 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 if (size == 0) {
96 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 } else if (size == 1 && str != NULL) {
99 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103}
104
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000106PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Tim Peters62de65b2001-12-06 20:29:32 +0000108 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000109 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000110
111 assert(str != NULL);
112 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000113 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000114 PyErr_SetString(PyExc_OverflowError,
115 "string is too long for a Python string");
116 return NULL;
117 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 if (size == 0 && (op = nullstring) != NULL) {
119#ifdef COUNT_ALLOCS
120 null_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
125 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
126#ifdef COUNT_ALLOCS
127 one_strings++;
128#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
130 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000133 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000134 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000140 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
145 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000149}
150
Barry Warsawdadace02001-08-24 18:32:06 +0000151PyObject *
152PyString_FromFormatV(const char *format, va_list vargs)
153{
Tim Petersc15c4f12001-10-02 21:32:07 +0000154 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000155 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000156 const char* f;
157 char *s;
158 PyObject* string;
159
Tim Petersc15c4f12001-10-02 21:32:07 +0000160#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000161 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000163#ifdef __va_copy
164 __va_copy(count, vargs);
165#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000166 count = vargs;
167#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000168#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000169 /* step 1: figure out how large a buffer we need */
170 for (f = format; *f; f++) {
171 if (*f == '%') {
172 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000173 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000174 ;
175
Thomas Wouters477c8d52006-05-27 19:21:47 +0000176 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
177 * they don't affect the amount of space we reserve.
178 */
179 if ((*f == 'l' || *f == 'z') &&
180 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000181 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000182
Barry Warsawdadace02001-08-24 18:32:06 +0000183 switch (*f) {
184 case 'c':
185 (void)va_arg(count, int);
186 /* fall through... */
187 case '%':
188 n++;
189 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000191 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000192 /* 20 bytes is enough to hold a 64-bit
193 integer. Decimal takes the most space.
194 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000195 n += 20;
196 break;
197 case 's':
198 s = va_arg(count, char*);
199 n += strlen(s);
200 break;
201 case 'p':
202 (void) va_arg(count, int);
203 /* maximum 64-bit pointer representation:
204 * 0xffffffffffffffff
205 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000206 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000207 */
208 n += 19;
209 break;
210 default:
211 /* if we stumble upon an unknown
212 formatting code, copy the rest of
213 the format string to the output
214 string. (we cannot just skip the
215 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000216 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000217 n += strlen(p);
218 goto expand;
219 }
220 } else
221 n++;
222 }
223 expand:
224 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000225 /* Since we've analyzed how much space we need for the worst case,
226 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000227 string = PyString_FromStringAndSize(NULL, n);
228 if (!string)
229 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000230
Barry Warsawdadace02001-08-24 18:32:06 +0000231 s = PyString_AsString(string);
232
233 for (f = format; *f; f++) {
234 if (*f == '%') {
235 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000236 Py_ssize_t i;
237 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000238 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000239 /* parse the width.precision part (we're only
240 interested in the precision value, if any) */
241 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000242 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000243 n = (n*10) + *f++ - '0';
244 if (*f == '.') {
245 f++;
246 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000247 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000248 n = (n*10) + *f++ - '0';
249 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000250 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000251 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000252 /* handle the long flag, but only for %ld and %lu.
253 others can be added when necessary. */
254 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000255 longflag = 1;
256 ++f;
257 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000258 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000259 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000260 size_tflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000271 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000272 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
273 va_arg(vargs, Py_ssize_t));
274 else
275 sprintf(s, "%d", va_arg(vargs, int));
276 s += strlen(s);
277 break;
278 case 'u':
279 if (longflag)
280 sprintf(s, "%lu",
281 va_arg(vargs, unsigned long));
282 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000283 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
284 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000285 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000286 sprintf(s, "%u",
287 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 s += strlen(s);
289 break;
290 case 'i':
291 sprintf(s, "%i", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'x':
295 sprintf(s, "%x", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 's':
299 p = va_arg(vargs, char*);
300 i = strlen(p);
301 if (n > 0 && i > n)
302 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000303 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000304 s += i;
305 break;
306 case 'p':
307 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000308 /* %p is ill-defined: ensure leading 0x. */
309 if (s[1] == 'X')
310 s[1] = 'x';
311 else if (s[1] != 'x') {
312 memmove(s+2, s, strlen(s)+1);
313 s[0] = '0';
314 s[1] = 'x';
315 }
Barry Warsawdadace02001-08-24 18:32:06 +0000316 s += strlen(s);
317 break;
318 case '%':
319 *s++ = '%';
320 break;
321 default:
322 strcpy(s, p);
323 s += strlen(s);
324 goto end;
325 }
326 } else
327 *s++ = *f;
328 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000329
Barry Warsawdadace02001-08-24 18:32:06 +0000330 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000332 return string;
333}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000334
Barry Warsawdadace02001-08-24 18:32:06 +0000335PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000336PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000337{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000338 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000339 va_list vargs;
340
341#ifdef HAVE_STDARG_PROTOTYPES
342 va_start(vargs, format);
343#else
344 va_start(vargs);
345#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 ret = PyString_FromFormatV(format, vargs);
347 va_end(vargs);
348 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000349}
350
Guido van Rossum234f9421993-06-17 12:35:49 +0000351static void
Fred Drakeba096332000-07-09 07:04:36 +0000352string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000353{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000354 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000355}
356
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000357/* Unescape a backslash-escaped string. If unicode is non-zero,
358 the string is a u-literal. If recode_encoding is non-zero,
359 the string is UTF-8 encoded and should be re-encoded in the
360 specified encoding. */
361
362PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000363 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000364 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000365 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000366 const char *recode_encoding)
367{
368 int c;
369 char *p, *buf;
370 const char *end;
371 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000372 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000373 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000374 if (v == NULL)
375 return NULL;
376 p = buf = PyString_AsString(v);
377 end = s + len;
378 while (s < end) {
379 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000380 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000381 if (recode_encoding && (*s & 0x80)) {
382 PyObject *u, *w;
383 char *r;
384 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000385 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000386 t = s;
387 /* Decode non-ASCII bytes as UTF-8. */
388 while (t < end && (*t & 0x80)) t++;
389 u = PyUnicode_DecodeUTF8(s, t - s, errors);
390 if(!u) goto failed;
391
392 /* Recode them in target encoding. */
393 w = PyUnicode_AsEncodedString(
394 u, recode_encoding, errors);
395 Py_DECREF(u);
396 if (!w) goto failed;
397
398 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000399 assert(PyString_Check(w));
400 r = PyString_AS_STRING(w);
401 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000402 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000403 p += rn;
404 Py_DECREF(w);
405 s = t;
406 } else {
407 *p++ = *s++;
408 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000409 continue;
410 }
411 s++;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000412 if (s==end) {
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000413 PyErr_SetString(PyExc_ValueError,
414 "Trailing \\ in string");
415 goto failed;
416 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000417 switch (*s++) {
418 /* XXX This assumes ASCII! */
419 case '\n': break;
420 case '\\': *p++ = '\\'; break;
421 case '\'': *p++ = '\''; break;
422 case '\"': *p++ = '\"'; break;
423 case 'b': *p++ = '\b'; break;
424 case 'f': *p++ = '\014'; break; /* FF */
425 case 't': *p++ = '\t'; break;
426 case 'n': *p++ = '\n'; break;
427 case 'r': *p++ = '\r'; break;
428 case 'v': *p++ = '\013'; break; /* VT */
429 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
430 case '0': case '1': case '2': case '3':
431 case '4': case '5': case '6': case '7':
432 c = s[-1] - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000433 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000434 c = (c<<3) + *s++ - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000435 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000436 c = (c<<3) + *s++ - '0';
437 }
438 *p++ = c;
439 break;
440 case 'x':
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000441 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000442 unsigned int x = 0;
443 c = Py_CHARMASK(*s);
444 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000445 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000446 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000447 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000448 x = 10 + c - 'a';
449 else
450 x = 10 + c - 'A';
451 x = x << 4;
452 c = Py_CHARMASK(*s);
453 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000454 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000455 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000456 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000457 x += 10 + c - 'a';
458 else
459 x += 10 + c - 'A';
460 *p++ = x;
461 break;
462 }
463 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000464 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000465 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000466 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000467 }
468 if (strcmp(errors, "replace") == 0) {
469 *p++ = '?';
470 } else if (strcmp(errors, "ignore") == 0)
471 /* do nothing */;
472 else {
473 PyErr_Format(PyExc_ValueError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000474 "decoding error; unknown "
475 "error handling code: %.400s",
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000476 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000477 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000478 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000479 default:
480 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000481 s--;
482 goto non_esc; /* an arbitry number of unescaped
483 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000484 }
485 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000486 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000487 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000488 return v;
489 failed:
490 Py_DECREF(v);
491 return NULL;
492}
493
Thomas Wouters477c8d52006-05-27 19:21:47 +0000494/* -------------------------------------------------------------------- */
495/* object api */
496
Martin v. Löwis18e16552006-02-15 17:27:45 +0000497static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000498string_getsize(register PyObject *op)
499{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000500 char *s;
501 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000502 if (PyString_AsStringAndSize(op, &s, &len))
503 return -1;
504 return len;
505}
506
Martin v. Löwis18e16552006-02-15 17:27:45 +0000507Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000508PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000509{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000510 if (!PyString_Check(op))
511 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000512 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000513}
514
515/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000516PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000517{
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000518 if (!PyString_Check(op)) {
519 PyErr_Format(PyExc_TypeError,
520 "expected bytes, %.200s found", Py_Type(op)->tp_name);
521 return NULL;
522 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000523 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000524}
525
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000526int
527PyString_AsStringAndSize(register PyObject *obj,
528 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000529 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000530{
531 if (s == NULL) {
532 PyErr_BadInternalCall();
533 return -1;
534 }
535
536 if (!PyString_Check(obj)) {
Christian Heimesf3863112007-11-22 07:46:41 +0000537 PyErr_Format(PyExc_TypeError,
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000538 "expected bytes, %.200s found", Py_Type(obj)->tp_name);
Christian Heimesf3863112007-11-22 07:46:41 +0000539 return -1;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000540 }
541
542 *s = PyString_AS_STRING(obj);
543 if (len != NULL)
544 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000545 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000546 PyErr_SetString(PyExc_TypeError,
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000547 "expected bytes with no null");
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000548 return -1;
549 }
550 return 0;
551}
552
Thomas Wouters477c8d52006-05-27 19:21:47 +0000553/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000554/* Methods */
555
Thomas Wouters477c8d52006-05-27 19:21:47 +0000556#define STRINGLIB_CHAR char
557
558#define STRINGLIB_CMP memcmp
559#define STRINGLIB_LEN PyString_GET_SIZE
560#define STRINGLIB_NEW PyString_FromStringAndSize
561#define STRINGLIB_STR PyString_AS_STRING
Guido van Rossum98297ee2007-11-06 21:34:58 +0000562/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000563
564#define STRINGLIB_EMPTY nullstring
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000565#define STRINGLIB_CHECK_EXACT PyString_CheckExact
566#define STRINGLIB_MUTABLE 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000567
568#include "stringlib/fastsearch.h"
569
570#include "stringlib/count.h"
571#include "stringlib/find.h"
572#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000573#include "stringlib/ctype.h"
574#include "stringlib/transmogrify.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000575
576
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577PyObject *
578PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000580 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000581 register PyStringObject* op = (PyStringObject*) obj;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000582 Py_ssize_t length = Py_Size(op);
583 size_t newsize = 3 + 4 * length;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000584 PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000585 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000586 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000587 "bytes object is too large to make repr");
Guido van Rossum58da9312007-11-10 23:39:45 +0000588 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000589 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000590 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000591 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000592 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593 }
594 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000595 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000596 register Py_UNICODE c;
597 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000598 int quote;
599
Guido van Rossum98297ee2007-11-06 21:34:58 +0000600 /* Figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000601 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000602 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000603 char *test, *start;
604 start = PyString_AS_STRING(op);
605 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000606 if (*test == '"') {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000607 quote = '\''; /* back to single */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000608 goto decided;
609 }
610 else if (*test == '\'')
611 quote = '"';
612 }
613 decided:
614 ;
615 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000616
Guido van Rossum98297ee2007-11-06 21:34:58 +0000617 *p++ = 'b', *p++ = quote;
618 for (i = 0; i < length; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000619 /* There's at least enough room for a hex escape
620 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000621 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000622 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000623 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000625 else if (c == '\t')
626 *p++ = '\\', *p++ = 't';
627 else if (c == '\n')
628 *p++ = '\\', *p++ = 'n';
629 else if (c == '\r')
630 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000631 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000632 *p++ = '\\';
633 *p++ = 'x';
634 *p++ = hexdigits[(c & 0xf0) >> 4];
635 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000636 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000637 else
638 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000640 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000641 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000642 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000643 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
644 Py_DECREF(v);
645 return NULL;
646 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000647 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000648 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000649}
650
Guido van Rossum189f1df2001-05-01 16:51:53 +0000651static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652string_repr(PyObject *op)
653{
654 return PyString_Repr(op, 1);
655}
656
657static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000658string_str(PyObject *op)
Guido van Rossum189f1df2001-05-01 16:51:53 +0000659{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000660 if (Py_BytesWarningFlag) {
661 if (PyErr_WarnEx(PyExc_BytesWarning,
662 "str() on a bytes instance", 1))
663 return NULL;
Tim Petersc9933152001-10-16 20:18:24 +0000664 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000665 return string_repr(op);
Guido van Rossum189f1df2001-05-01 16:51:53 +0000666}
667
Martin v. Löwis18e16552006-02-15 17:27:45 +0000668static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000669string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000670{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000671 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000672}
673
Guido van Rossum98297ee2007-11-06 21:34:58 +0000674/* This is also used by PyString_Concat() */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000675static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000676string_concat(PyObject *a, PyObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000677{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000678 Py_ssize_t size;
679 Py_buffer va, vb;
680 PyObject *result = NULL;
681
682 va.len = -1;
683 vb.len = -1;
684 if (_getbuffer(a, &va) < 0 ||
685 _getbuffer(b, &vb) < 0) {
686 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
687 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
688 goto done;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000689 }
Guido van Rossumae404e22007-10-26 21:46:44 +0000690
Guido van Rossum98297ee2007-11-06 21:34:58 +0000691 /* Optimize end cases */
692 if (va.len == 0 && PyString_CheckExact(b)) {
693 result = b;
694 Py_INCREF(result);
695 goto done;
696 }
697 if (vb.len == 0 && PyString_CheckExact(a)) {
698 result = a;
699 Py_INCREF(result);
700 goto done;
701 }
702
703 size = va.len + vb.len;
704 if (size < 0) {
705 PyErr_NoMemory();
706 goto done;
707 }
708
709 result = PyString_FromStringAndSize(NULL, size);
710 if (result != NULL) {
711 memcpy(PyString_AS_STRING(result), va.buf, va.len);
712 memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
713 }
714
715 done:
716 if (va.len != -1)
717 PyObject_ReleaseBuffer(a, &va);
718 if (vb.len != -1)
719 PyObject_ReleaseBuffer(b, &vb);
720 return result;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000723static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000724string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000726 register Py_ssize_t i;
727 register Py_ssize_t j;
728 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000730 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731 if (n < 0)
732 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000733 /* watch out for overflows: the size can overflow int,
734 * and the # of bytes needed can overflow size_t
735 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000736 size = Py_Size(a) * n;
737 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000738 PyErr_SetString(PyExc_OverflowError,
739 "repeated string is too long");
740 return NULL;
741 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000742 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000743 Py_INCREF(a);
744 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000745 }
Tim Peterse7c05322004-06-27 17:24:49 +0000746 nbytes = (size_t)size;
747 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000748 PyErr_SetString(PyExc_OverflowError,
749 "repeated string is too long");
750 return NULL;
751 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000752 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000753 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000754 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000755 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000756 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000757 op->ob_shash = -1;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000758 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000759 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000760 memset(op->ob_sval, a->ob_sval[0] , n);
761 return (PyObject *) op;
762 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000763 i = 0;
764 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000765 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
766 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000767 }
768 while (i < size) {
769 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000770 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000771 i += j;
772 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000773 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000774}
775
Guido van Rossum9284a572000-03-07 15:53:43 +0000776static int
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777string_contains(PyObject *self, PyObject *arg)
Guido van Rossum9284a572000-03-07 15:53:43 +0000778{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000779 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
780 if (ival == -1 && PyErr_Occurred()) {
781 Py_buffer varg;
782 int pos;
783 PyErr_Clear();
784 if (_getbuffer(arg, &varg) < 0)
785 return -1;
786 pos = stringlib_find(PyString_AS_STRING(self), Py_Size(self),
787 varg.buf, varg.len, 0);
788 PyObject_ReleaseBuffer(arg, &varg);
789 return pos >= 0;
790 }
791 if (ival < 0 || ival >= 256) {
792 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
793 return -1;
794 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000795
Guido van Rossum98297ee2007-11-06 21:34:58 +0000796 return memchr(PyString_AS_STRING(self), ival, Py_Size(self)) != NULL;
797}
798
799static PyObject *
800string_item(PyStringObject *a, register Py_ssize_t i)
801{
802 if (i < 0 || i >= Py_Size(a)) {
803 PyErr_SetString(PyExc_IndexError, "string index out of range");
804 return NULL;
805 }
806 return PyInt_FromLong((unsigned char)a->ob_sval[i]);
Guido van Rossum9284a572000-03-07 15:53:43 +0000807}
808
Martin v. Löwiscd353062001-05-24 16:56:35 +0000809static PyObject*
810string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000812 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000813 Py_ssize_t len_a, len_b;
814 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +0000815 PyObject *result;
816
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000817 /* Make sure both arguments are strings. */
818 if (!(PyString_Check(a) && PyString_Check(b))) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000819 if (Py_BytesWarningFlag && (op == Py_EQ) &&
820 (PyObject_IsInstance((PyObject*)a,
821 (PyObject*)&PyUnicode_Type) ||
822 PyObject_IsInstance((PyObject*)b,
823 (PyObject*)&PyUnicode_Type))) {
824 if (PyErr_WarnEx(PyExc_BytesWarning,
825 "Comparsion between bytes and string", 1))
826 return NULL;
827 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000828 result = Py_NotImplemented;
829 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000830 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000831 if (a == b) {
832 switch (op) {
833 case Py_EQ:case Py_LE:case Py_GE:
834 result = Py_True;
835 goto out;
836 case Py_NE:case Py_LT:case Py_GT:
837 result = Py_False;
838 goto out;
839 }
840 }
841 if (op == Py_EQ) {
842 /* Supporting Py_NE here as well does not save
843 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000844 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +0000845 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000846 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000847 result = Py_True;
848 } else {
849 result = Py_False;
850 }
851 goto out;
852 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000853 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +0000854 min_len = (len_a < len_b) ? len_a : len_b;
855 if (min_len > 0) {
856 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
857 if (c==0)
858 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +0000859 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +0000860 c = 0;
861 if (c == 0)
862 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
863 switch (op) {
864 case Py_LT: c = c < 0; break;
865 case Py_LE: c = c <= 0; break;
866 case Py_EQ: assert(0); break; /* unreachable */
867 case Py_NE: c = c != 0; break;
868 case Py_GT: c = c > 0; break;
869 case Py_GE: c = c >= 0; break;
870 default:
871 result = Py_NotImplemented;
872 goto out;
873 }
874 result = c ? Py_True : Py_False;
875 out:
876 Py_INCREF(result);
877 return result;
878}
879
880int
881_PyString_Eq(PyObject *o1, PyObject *o2)
882{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000883 PyStringObject *a = (PyStringObject*) o1;
884 PyStringObject *b = (PyStringObject*) o2;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000885 return Py_Size(a) == Py_Size(b)
886 && *a->ob_sval == *b->ob_sval
887 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888}
889
Guido van Rossum9bfef441993-03-29 10:43:31 +0000890static long
Fred Drakeba096332000-07-09 07:04:36 +0000891string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000892{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000893 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000894 register unsigned char *p;
895 register long x;
896
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000897 if (a->ob_shash != -1)
898 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000899 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000900 p = (unsigned char *) a->ob_sval;
901 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000902 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000903 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000904 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +0000905 if (x == -1)
906 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000907 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000908 return x;
909}
910
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000911static PyObject*
912string_subscript(PyStringObject* self, PyObject* item)
913{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000914 if (PyIndex_Check(item)) {
915 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000916 if (i == -1 && PyErr_Occurred())
917 return NULL;
918 if (i < 0)
919 i += PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000920 if (i < 0 || i >= PyString_GET_SIZE(self)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +0000921 PyErr_SetString(PyExc_IndexError,
922 "string index out of range");
923 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000924 }
925 return PyInt_FromLong((unsigned char)self->ob_sval[i]);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000926 }
927 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000928 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000929 char* source_buf;
930 char* result_buf;
931 PyObject* result;
932
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000933 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000934 PyString_GET_SIZE(self),
935 &start, &stop, &step, &slicelength) < 0) {
936 return NULL;
937 }
938
939 if (slicelength <= 0) {
940 return PyString_FromStringAndSize("", 0);
941 }
Thomas Woutersed03b412007-08-28 21:37:11 +0000942 else if (start == 0 && step == 1 &&
943 slicelength == PyString_GET_SIZE(self) &&
944 PyString_CheckExact(self)) {
945 Py_INCREF(self);
946 return (PyObject *)self;
947 }
948 else if (step == 1) {
949 return PyString_FromStringAndSize(
950 PyString_AS_STRING(self) + start,
951 slicelength);
952 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000953 else {
954 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000955 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +0000956 if (result_buf == NULL)
957 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000958
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000959 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000960 cur += step, i++) {
961 result_buf[i] = source_buf[cur];
962 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000963
964 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000965 slicelength);
966 PyMem_Free(result_buf);
967 return result;
968 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000969 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000970 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000971 PyErr_Format(PyExc_TypeError,
972 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000973 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000974 return NULL;
975 }
976}
977
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000978static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000979string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000980{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000981 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self),
982 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +0000983}
984
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000985static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000986 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000987 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +0000988 (ssizeargfunc)string_repeat, /*sq_repeat*/
Guido van Rossum98297ee2007-11-06 21:34:58 +0000989 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +0000990 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000991 0, /*sq_ass_item*/
992 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000993 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000994};
995
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000996static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000997 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000998 (binaryfunc)string_subscript,
999 0,
1000};
1001
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001002static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001003 (getbufferproc)string_buffer_getbuffer,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001004 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001005};
1006
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001007
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001008#define LEFTSTRIP 0
1009#define RIGHTSTRIP 1
1010#define BOTHSTRIP 2
1011
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001012/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001013static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1014
1015#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001016
Thomas Wouters477c8d52006-05-27 19:21:47 +00001017
1018/* Don't call if length < 2 */
1019#define Py_STRING_MATCH(target, offset, pattern, length) \
1020 (target[offset] == pattern[0] && \
1021 target[offset+length-1] == pattern[length-1] && \
1022 !memcmp(target+offset+1, pattern+1, length-2) )
1023
1024
1025/* Overallocate the initial list to reduce the number of reallocs for small
1026 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1027 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1028 text (roughly 11 words per line) and field delimited data (usually 1-10
1029 fields). For large strings the split algorithms are bandwidth limited
1030 so increasing the preallocation likely will not improve things.*/
1031
1032#define MAX_PREALLOC 12
1033
1034/* 5 splits gives 6 elements */
1035#define PREALLOC_SIZE(maxsplit) \
1036 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1037
Thomas Wouters477c8d52006-05-27 19:21:47 +00001038#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001039 str = PyString_FromStringAndSize((data) + (left), \
1040 (right) - (left)); \
1041 if (str == NULL) \
1042 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001043 if (count < MAX_PREALLOC) { \
1044 PyList_SET_ITEM(list, count, str); \
1045 } else { \
1046 if (PyList_Append(list, str)) { \
1047 Py_DECREF(str); \
1048 goto onError; \
1049 } \
1050 else \
1051 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001052 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001053 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054
Thomas Wouters477c8d52006-05-27 19:21:47 +00001055/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001056#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001057
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001058#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1059#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1060#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1061#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001062
1063Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001064split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001065{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001066 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001067 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001068 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001069
1070 if (list == NULL)
1071 return NULL;
1072
Thomas Wouters477c8d52006-05-27 19:21:47 +00001073 i = j = 0;
1074
1075 while (maxsplit-- > 0) {
1076 SKIP_SPACE(s, i, len);
1077 if (i==len) break;
1078 j = i; i++;
1079 SKIP_NONSPACE(s, i, len);
1080 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001081 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001082
1083 if (i < len) {
1084 /* Only occurs when maxsplit was reached */
1085 /* Skip any remaining whitespace and copy to end of string */
1086 SKIP_SPACE(s, i, len);
1087 if (i != len)
1088 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001089 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001090 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001091 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001092 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001093 Py_DECREF(list);
1094 return NULL;
1095}
1096
Thomas Wouters477c8d52006-05-27 19:21:47 +00001097Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001098split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001099{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001100 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001101 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001102 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001103
1104 if (list == NULL)
1105 return NULL;
1106
Thomas Wouters477c8d52006-05-27 19:21:47 +00001107 i = j = 0;
1108 while ((j < len) && (maxcount-- > 0)) {
1109 for(; j<len; j++) {
1110 /* I found that using memchr makes no difference */
1111 if (s[j] == ch) {
1112 SPLIT_ADD(s, i, j);
1113 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001114 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001115 }
1116 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001117 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001118 if (i <= len) {
1119 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001120 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001121 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001122 return list;
1123
1124 onError:
1125 Py_DECREF(list);
1126 return NULL;
1127}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001128
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001129PyDoc_STRVAR(split__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001130"B.split([sep[, maxsplit]]) -> list of bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001131\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001132Return a list of the sections in B, using sep as the delimiter.\n\
1133If sep is not given, B is split on ASCII whitespace characters\n\
1134(space, tab, return, newline, formfeed, vertical tab).\n\
1135If maxsplit is given, at most maxsplit splits are done.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001136
1137static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001138string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001139{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001140 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001141 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001142 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001143 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001144 PyObject *list, *str, *subobj = Py_None;
1145#ifdef USE_FAST
1146 Py_ssize_t pos;
1147#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001148
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001149 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001150 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001151 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001152 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001153 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001154 return split_whitespace(s, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001155 if (_getbuffer(subobj, &vsub) < 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001156 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001157 sub = vsub.buf;
1158 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001159
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001160 if (n == 0) {
1161 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001162 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001163 return NULL;
1164 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001165 else if (n == 1) {
1166 char ch = sub[0];
1167 PyObject_ReleaseBuffer(subobj, &vsub);
1168 return split_char(s, len, ch, maxsplit);
1169 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001170
Thomas Wouters477c8d52006-05-27 19:21:47 +00001171 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001172 if (list == NULL) {
1173 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001174 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001175 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001176
Thomas Wouters477c8d52006-05-27 19:21:47 +00001177#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001178 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001179 while (maxsplit-- > 0) {
1180 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1181 if (pos < 0)
1182 break;
1183 j = i+pos;
1184 SPLIT_ADD(s, i, j);
1185 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001186 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001187#else
1188 i = j = 0;
1189 while ((j+n <= len) && (maxsplit-- > 0)) {
1190 for (; j+n <= len; j++) {
1191 if (Py_STRING_MATCH(s, j, sub, n)) {
1192 SPLIT_ADD(s, i, j);
1193 i = j = j + n;
1194 break;
1195 }
1196 }
1197 }
1198#endif
1199 SPLIT_ADD(s, i, len);
1200 FIX_PREALLOC_SIZE(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001201 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001202 return list;
1203
Thomas Wouters477c8d52006-05-27 19:21:47 +00001204 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001205 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001206 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001207 return NULL;
1208}
1209
Thomas Wouters477c8d52006-05-27 19:21:47 +00001210PyDoc_STRVAR(partition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001211"B.partition(sep) -> (head, sep, tail)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001212\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001213Searches for the separator sep in B, and returns the part before it,\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001214the separator itself, and the part after it. If the separator is not\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001215found, returns B and two empty bytes objects.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001216
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001217static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001218string_partition(PyStringObject *self, PyObject *sep_obj)
1219{
1220 const char *sep;
1221 Py_ssize_t sep_len;
1222
1223 if (PyString_Check(sep_obj)) {
1224 sep = PyString_AS_STRING(sep_obj);
1225 sep_len = PyString_GET_SIZE(sep_obj);
1226 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001227 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1228 return NULL;
1229
1230 return stringlib_partition(
1231 (PyObject*) self,
1232 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1233 sep_obj, sep, sep_len
1234 );
1235}
1236
1237PyDoc_STRVAR(rpartition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001238"B.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001239\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001240Searches for the separator sep in B, starting at the end of B,\n\
1241and returns the part before it, the separator itself, and the\n\
1242part after it. If the separator is not found, returns two empty\n\
1243bytes objects and B.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001244
1245static PyObject *
1246string_rpartition(PyStringObject *self, PyObject *sep_obj)
1247{
1248 const char *sep;
1249 Py_ssize_t sep_len;
1250
1251 if (PyString_Check(sep_obj)) {
1252 sep = PyString_AS_STRING(sep_obj);
1253 sep_len = PyString_GET_SIZE(sep_obj);
1254 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001255 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1256 return NULL;
1257
1258 return stringlib_rpartition(
1259 (PyObject*) self,
1260 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1261 sep_obj, sep, sep_len
1262 );
1263}
1264
1265Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001266rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001267{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001268 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001269 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001270 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001271
1272 if (list == NULL)
1273 return NULL;
1274
Thomas Wouters477c8d52006-05-27 19:21:47 +00001275 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001276
Thomas Wouters477c8d52006-05-27 19:21:47 +00001277 while (maxsplit-- > 0) {
1278 RSKIP_SPACE(s, i);
1279 if (i<0) break;
1280 j = i; i--;
1281 RSKIP_NONSPACE(s, i);
1282 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001283 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001284 if (i >= 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001285 /* Only occurs when maxsplit was reached. Skip any remaining
1286 whitespace and copy to beginning of string. */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001287 RSKIP_SPACE(s, i);
1288 if (i >= 0)
1289 SPLIT_ADD(s, 0, i + 1);
1290
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001291 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001292 FIX_PREALLOC_SIZE(list);
1293 if (PyList_Reverse(list) < 0)
1294 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001295 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001296 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001297 Py_DECREF(list);
1298 return NULL;
1299}
1300
Thomas Wouters477c8d52006-05-27 19:21:47 +00001301Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001302rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001303{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001304 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001305 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001306 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001307
1308 if (list == NULL)
1309 return NULL;
1310
Thomas Wouters477c8d52006-05-27 19:21:47 +00001311 i = j = len - 1;
1312 while ((i >= 0) && (maxcount-- > 0)) {
1313 for (; i >= 0; i--) {
1314 if (s[i] == ch) {
1315 SPLIT_ADD(s, i + 1, j + 1);
1316 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001317 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001318 }
1319 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001320 }
1321 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001322 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001323 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001324 FIX_PREALLOC_SIZE(list);
1325 if (PyList_Reverse(list) < 0)
1326 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001327 return list;
1328
1329 onError:
1330 Py_DECREF(list);
1331 return NULL;
1332}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001333
1334PyDoc_STRVAR(rsplit__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001335"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001336\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001337Return a list of the sections in B, using sep as the delimiter,\n\
1338starting at the end of B and working to the front.\n\
1339If sep is not given, B is split on ASCII whitespace characters\n\
1340(space, tab, return, newline, formfeed, vertical tab).\n\
1341If maxsplit is given, at most maxsplit splits are done.");
1342
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001343
1344static PyObject *
1345string_rsplit(PyStringObject *self, PyObject *args)
1346{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001347 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001348 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001349 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001350 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001351 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001352
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001353 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001354 return NULL;
1355 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001356 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001357 if (subobj == Py_None)
1358 return rsplit_whitespace(s, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001359 if (_getbuffer(subobj, &vsub) < 0)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001360 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001361 sub = vsub.buf;
1362 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001363
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001364 if (n == 0) {
1365 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001366 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001367 return NULL;
1368 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001369 else if (n == 1) {
1370 char ch = sub[0];
1371 PyObject_ReleaseBuffer(subobj, &vsub);
1372 return rsplit_char(s, len, ch, maxsplit);
1373 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001374
Thomas Wouters477c8d52006-05-27 19:21:47 +00001375 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001376 if (list == NULL) {
1377 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001378 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001379 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001380
1381 j = len;
1382 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001383
Thomas Wouters477c8d52006-05-27 19:21:47 +00001384 while ( (i >= 0) && (maxsplit-- > 0) ) {
1385 for (; i>=0; i--) {
1386 if (Py_STRING_MATCH(s, i, sub, n)) {
1387 SPLIT_ADD(s, i + n, j);
1388 j = i;
1389 i -= n;
1390 break;
1391 }
1392 }
1393 }
1394 SPLIT_ADD(s, 0, j);
1395 FIX_PREALLOC_SIZE(list);
1396 if (PyList_Reverse(list) < 0)
1397 goto onError;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001398 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001399 return list;
1400
Thomas Wouters477c8d52006-05-27 19:21:47 +00001401onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001402 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001403 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001404 return NULL;
1405}
1406
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001407#undef SPLIT_ADD
1408#undef MAX_PREALLOC
1409#undef PREALLOC_SIZE
1410
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001411
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001412PyDoc_STRVAR(join__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001413"B.join(iterable_of_bytes) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001414\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001415Concatenates any number of bytes objects, with B in between each pair.\n\
1416Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417
1418static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00001419string_join(PyObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001420{
1421 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001422 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001425 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001426 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001427 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001428 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001429
Tim Peters19fe14e2001-01-19 03:03:47 +00001430 seq = PySequence_Fast(orig, "");
1431 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001432 return NULL;
1433 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001434
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001435 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001436 if (seqlen == 0) {
1437 Py_DECREF(seq);
1438 return PyString_FromString("");
1439 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001440 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001441 item = PySequence_Fast_GET_ITEM(seq, 0);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001442 if (PyString_CheckExact(item)) {
Raymond Hettinger674f2412004-08-23 23:23:54 +00001443 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001444 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001445 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001446 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001448
Raymond Hettinger674f2412004-08-23 23:23:54 +00001449 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001450 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001451 * Do a pre-pass to figure out the total amount of space we'll
Guido van Rossum98297ee2007-11-06 21:34:58 +00001452 * need (sz), and see whether all argument are bytes.
Tim Peters19fe14e2001-01-19 03:03:47 +00001453 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001454 /* XXX Shouldn't we use _getbuffer() on these items instead? */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001455 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001456 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001457 item = PySequence_Fast_GET_ITEM(seq, i);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001458 if (!PyString_Check(item) && !PyBytes_Check(item)) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001459 PyErr_Format(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001460 "sequence item %zd: expected bytes,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001461 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001462 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001463 Py_DECREF(seq);
1464 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001465 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001466 sz += Py_Size(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001467 if (i != 0)
1468 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001469 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001470 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001471 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001472 Py_DECREF(seq);
1473 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001474 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001475 }
1476
1477 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001478 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001479 if (res == NULL) {
1480 Py_DECREF(seq);
1481 return NULL;
1482 }
1483
1484 /* Catenate everything. */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001485 /* I'm not worried about a PyBytes item growing because there's
1486 nowhere in this function where we release the GIL. */
Tim Peters19fe14e2001-01-19 03:03:47 +00001487 p = PyString_AS_STRING(res);
1488 for (i = 0; i < seqlen; ++i) {
1489 size_t n;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001490 char *q;
1491 if (i) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001492 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001493 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001494 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001495 item = PySequence_Fast_GET_ITEM(seq, i);
1496 n = Py_Size(item);
1497 if (PyString_Check(item))
1498 q = PyString_AS_STRING(item);
1499 else
1500 q = PyBytes_AS_STRING(item);
1501 Py_MEMCPY(p, q, n);
1502 p += n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001503 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001504
Jeremy Hylton49048292000-07-11 03:28:17 +00001505 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507}
1508
Tim Peters52e155e2001-06-16 05:42:57 +00001509PyObject *
1510_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001511{
Tim Petersa7259592001-06-16 05:11:17 +00001512 assert(sep != NULL && PyString_Check(sep));
1513 assert(x != NULL);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001514 return string_join(sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001515}
1516
Thomas Wouters477c8d52006-05-27 19:21:47 +00001517Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001518string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001519{
1520 if (*end > len)
1521 *end = len;
1522 else if (*end < 0)
1523 *end += len;
1524 if (*end < 0)
1525 *end = 0;
1526 if (*start < 0)
1527 *start += len;
1528 if (*start < 0)
1529 *start = 0;
1530}
1531
Thomas Wouters477c8d52006-05-27 19:21:47 +00001532Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001533string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001534{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001535 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001536 const char *sub;
1537 Py_ssize_t sub_len;
1538 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes9cd17752007-11-18 19:35:23 +00001539 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001540
Christian Heimes9cd17752007-11-18 19:35:23 +00001541 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1542 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001543 return -2;
Christian Heimes9cd17752007-11-18 19:35:23 +00001544 /* To support None in "start" and "end" arguments, meaning
1545 the same as if they were not passed.
1546 */
1547 if (obj_start != Py_None)
1548 if (!_PyEval_SliceIndex(obj_start, &start))
1549 return -2;
1550 if (obj_end != Py_None)
1551 if (!_PyEval_SliceIndex(obj_end, &end))
1552 return -2;
1553
Guido van Rossum4c08d552000-03-10 22:55:18 +00001554 if (PyString_Check(subobj)) {
1555 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001556 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001557 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001558 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001559 /* XXX - the "expected a character buffer object" is pretty
1560 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001561 return -2;
1562
Thomas Wouters477c8d52006-05-27 19:21:47 +00001563 if (dir > 0)
1564 return stringlib_find_slice(
1565 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1566 sub, sub_len, start, end);
1567 else
1568 return stringlib_rfind_slice(
1569 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1570 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001571}
1572
1573
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001574PyDoc_STRVAR(find__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001575"B.find(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001576\n\
1577Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001578such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579arguments start and end are interpreted as in slice notation.\n\
1580\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001581Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582
1583static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001584string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001585{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001586 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587 if (result == -2)
1588 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001589 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590}
1591
1592
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001593PyDoc_STRVAR(index__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001594"B.index(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001596Like B.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001597
1598static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001599string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001601 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602 if (result == -2)
1603 return NULL;
1604 if (result == -1) {
1605 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001606 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607 return NULL;
1608 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001609 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001610}
1611
1612
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001613PyDoc_STRVAR(rfind__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001614"B.rfind(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001616Return the highest index in B where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001617such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618arguments start and end are interpreted as in slice notation.\n\
1619\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001620Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621
1622static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001623string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001625 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626 if (result == -2)
1627 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001628 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629}
1630
1631
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001632PyDoc_STRVAR(rindex__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001633"B.rindex(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001634\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001635Like B.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636
1637static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001638string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001640 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641 if (result == -2)
1642 return NULL;
1643 if (result == -1) {
1644 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001645 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001646 return NULL;
1647 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001648 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001649}
1650
1651
Thomas Wouters477c8d52006-05-27 19:21:47 +00001652Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001653do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1654{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001655 Py_buffer vsep;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001656 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001657 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001658 char *sep;
1659 Py_ssize_t seplen;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001660 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001661
Guido van Rossum98297ee2007-11-06 21:34:58 +00001662 if (_getbuffer(sepobj, &vsep) < 0)
1663 return NULL;
1664 sep = vsep.buf;
1665 seplen = vsep.len;
1666
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001667 i = 0;
1668 if (striptype != RIGHTSTRIP) {
1669 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1670 i++;
1671 }
1672 }
1673
1674 j = len;
1675 if (striptype != LEFTSTRIP) {
1676 do {
1677 j--;
1678 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1679 j++;
1680 }
1681
Guido van Rossum98297ee2007-11-06 21:34:58 +00001682 PyObject_ReleaseBuffer(sepobj, &vsep);
1683
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001684 if (i == 0 && j == len && PyString_CheckExact(self)) {
1685 Py_INCREF(self);
1686 return (PyObject*)self;
1687 }
1688 else
1689 return PyString_FromStringAndSize(s+i, j-i);
1690}
1691
1692
Thomas Wouters477c8d52006-05-27 19:21:47 +00001693Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001694do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695{
1696 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001697 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001698
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001699 i = 0;
1700 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001701 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001702 i++;
1703 }
1704 }
1705
1706 j = len;
1707 if (striptype != LEFTSTRIP) {
1708 do {
1709 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001710 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001711 j++;
1712 }
1713
Tim Peters8fa5dd02001-09-12 02:18:30 +00001714 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001715 Py_INCREF(self);
1716 return (PyObject*)self;
1717 }
1718 else
1719 return PyString_FromStringAndSize(s+i, j-i);
1720}
1721
1722
Thomas Wouters477c8d52006-05-27 19:21:47 +00001723Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001724do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1725{
1726 PyObject *sep = NULL;
1727
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001728 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001729 return NULL;
1730
1731 if (sep != NULL && sep != Py_None) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001732 return do_xstrip(self, striptype, sep);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001733 }
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001734 return do_strip(self, striptype);
1735}
1736
1737
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001738PyDoc_STRVAR(strip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001739"B.strip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001741Strip leading and trailing bytes contained in the argument.\n\
1742If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001743static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001744string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001746 if (PyTuple_GET_SIZE(args) == 0)
1747 return do_strip(self, BOTHSTRIP); /* Common case */
1748 else
1749 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750}
1751
1752
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001753PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001754"B.lstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001756Strip leading bytes contained in the argument.\n\
1757If the argument is omitted, strip leading ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001759string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001761 if (PyTuple_GET_SIZE(args) == 0)
1762 return do_strip(self, LEFTSTRIP); /* Common case */
1763 else
1764 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765}
1766
1767
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001768PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001769"B.rstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001771Strip trailing bytes contained in the argument.\n\
1772If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001774string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001776 if (PyTuple_GET_SIZE(args) == 0)
1777 return do_strip(self, RIGHTSTRIP); /* Common case */
1778 else
1779 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780}
1781
1782
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001783PyDoc_STRVAR(count__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001784"B.count(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001786Return the number of non-overlapping occurrences of substring sub in\n\
1787string S[start:end]. Optional arguments start and end are interpreted\n\
1788as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789
1790static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001791string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001793 PyObject *sub_obj;
1794 const char *str = PyString_AS_STRING(self), *sub;
1795 Py_ssize_t sub_len;
1796 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797
Thomas Wouters477c8d52006-05-27 19:21:47 +00001798 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1799 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001801
Thomas Wouters477c8d52006-05-27 19:21:47 +00001802 if (PyString_Check(sub_obj)) {
1803 sub = PyString_AS_STRING(sub_obj);
1804 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001805 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001806 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001807 return NULL;
1808
Thomas Wouters477c8d52006-05-27 19:21:47 +00001809 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001810
Thomas Wouters477c8d52006-05-27 19:21:47 +00001811 return PyInt_FromSsize_t(
1812 stringlib_count(str + start, end - start, sub, sub_len)
1813 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814}
1815
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001817PyDoc_STRVAR(translate__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001818"B.translate(table[, deletechars]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001820Return a copy of B, where all characters occurring in the\n\
1821optional argument deletechars are removed, and the remaining\n\
1822characters have been mapped through the given translation\n\
1823table, which must be a bytes object of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824
1825static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001826string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001828 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001829 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001830 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001832 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001833 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834 PyObject *result;
1835 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001836 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001837
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00001838 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001839 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001841
1842 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00001843 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001844 tablen = PyString_GET_SIZE(tableobj);
1845 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001846 else if (tableobj == Py_None) {
1847 table = NULL;
1848 tablen = 256;
1849 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001850 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001852
Martin v. Löwis00b61272002-12-12 20:03:19 +00001853 if (tablen != 256) {
1854 PyErr_SetString(PyExc_ValueError,
1855 "translation table must be 256 characters long");
1856 return NULL;
1857 }
1858
Guido van Rossum4c08d552000-03-10 22:55:18 +00001859 if (delobj != NULL) {
1860 if (PyString_Check(delobj)) {
1861 del_table = PyString_AS_STRING(delobj);
1862 dellen = PyString_GET_SIZE(delobj);
1863 }
1864 else if (PyUnicode_Check(delobj)) {
1865 PyErr_SetString(PyExc_TypeError,
1866 "deletions are implemented differently for unicode");
1867 return NULL;
1868 }
1869 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1870 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001871 }
1872 else {
1873 del_table = NULL;
1874 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001875 }
1876
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001877 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878 result = PyString_FromStringAndSize((char *)NULL, inlen);
1879 if (result == NULL)
1880 return NULL;
1881 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001882 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001883
Guido van Rossumd8faa362007-04-27 19:54:29 +00001884 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001885 /* If no deletions are required, use faster code */
1886 for (i = inlen; --i >= 0; ) {
1887 c = Py_CHARMASK(*input++);
1888 if (Py_CHARMASK((*output++ = table[c])) != c)
1889 changed = 1;
1890 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001891 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892 return result;
1893 Py_DECREF(result);
1894 Py_INCREF(input_obj);
1895 return input_obj;
1896 }
1897
Guido van Rossumd8faa362007-04-27 19:54:29 +00001898 if (table == NULL) {
1899 for (i = 0; i < 256; i++)
1900 trans_table[i] = Py_CHARMASK(i);
1901 } else {
1902 for (i = 0; i < 256; i++)
1903 trans_table[i] = Py_CHARMASK(table[i]);
1904 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905
1906 for (i = 0; i < dellen; i++)
1907 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1908
1909 for (i = inlen; --i >= 0; ) {
1910 c = Py_CHARMASK(*input++);
1911 if (trans_table[c] != -1)
1912 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1913 continue;
1914 changed = 1;
1915 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001916 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917 Py_DECREF(result);
1918 Py_INCREF(input_obj);
1919 return input_obj;
1920 }
1921 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001922 if (inlen > 0)
1923 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924 return result;
1925}
1926
1927
Thomas Wouters477c8d52006-05-27 19:21:47 +00001928#define FORWARD 1
1929#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930
Thomas Wouters477c8d52006-05-27 19:21:47 +00001931/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932
Thomas Wouters477c8d52006-05-27 19:21:47 +00001933#define findchar(target, target_len, c) \
1934 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935
Thomas Wouters477c8d52006-05-27 19:21:47 +00001936/* String ops must return a string. */
1937/* If the object is subclass of string, create a copy */
1938Py_LOCAL(PyStringObject *)
1939return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001941 if (PyString_CheckExact(self)) {
1942 Py_INCREF(self);
1943 return self;
1944 }
1945 return (PyStringObject *)PyString_FromStringAndSize(
1946 PyString_AS_STRING(self),
1947 PyString_GET_SIZE(self));
1948}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949
Thomas Wouters477c8d52006-05-27 19:21:47 +00001950Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001951countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001952{
1953 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001954 const char *start=target;
1955 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956
Thomas Wouters477c8d52006-05-27 19:21:47 +00001957 while ( (start=findchar(start, end-start, c)) != NULL ) {
1958 count++;
1959 if (count >= maxcount)
1960 break;
1961 start += 1;
1962 }
1963 return count;
1964}
1965
1966Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001967findstring(const char *target, Py_ssize_t target_len,
1968 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00001969 Py_ssize_t start,
1970 Py_ssize_t end,
1971 int direction)
1972{
1973 if (start < 0) {
1974 start += target_len;
1975 if (start < 0)
1976 start = 0;
1977 }
1978 if (end > target_len) {
1979 end = target_len;
1980 } else if (end < 0) {
1981 end += target_len;
1982 if (end < 0)
1983 end = 0;
1984 }
1985
1986 /* zero-length substrings always match at the first attempt */
1987 if (pattern_len == 0)
1988 return (direction > 0) ? start : end;
1989
1990 end -= pattern_len;
1991
1992 if (direction < 0) {
1993 for (; end >= start; end--)
1994 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1995 return end;
1996 } else {
1997 for (; start <= end; start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001998 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001999 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000 }
2001 return -1;
2002}
2003
Thomas Wouters477c8d52006-05-27 19:21:47 +00002004Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002005countstring(const char *target, Py_ssize_t target_len,
2006 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002007 Py_ssize_t start,
2008 Py_ssize_t end,
2009 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002010{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002011 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012
Thomas Wouters477c8d52006-05-27 19:21:47 +00002013 if (start < 0) {
2014 start += target_len;
2015 if (start < 0)
2016 start = 0;
2017 }
2018 if (end > target_len) {
2019 end = target_len;
2020 } else if (end < 0) {
2021 end += target_len;
2022 if (end < 0)
2023 end = 0;
2024 }
2025
2026 /* zero-length substrings match everywhere */
2027 if (pattern_len == 0 || maxcount == 0) {
2028 if (target_len+1 < maxcount)
2029 return target_len+1;
2030 return maxcount;
2031 }
2032
2033 end -= pattern_len;
2034 if (direction < 0) {
2035 for (; (end >= start); end--)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002036 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002037 count++;
2038 if (--maxcount <= 0) break;
2039 end -= pattern_len-1;
2040 }
2041 } else {
2042 for (; (start <= end); start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002043 if (Py_STRING_MATCH(target, start,
2044 pattern, pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002045 count++;
2046 if (--maxcount <= 0)
2047 break;
2048 start += pattern_len-1;
2049 }
2050 }
2051 return count;
2052}
2053
2054
2055/* Algorithms for different cases of string replacement */
2056
2057/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2058Py_LOCAL(PyStringObject *)
2059replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002060 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002061 Py_ssize_t maxcount)
2062{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002063 char *self_s, *result_s;
2064 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002065 Py_ssize_t count, i, product;
2066 PyStringObject *result;
2067
2068 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002069
Thomas Wouters477c8d52006-05-27 19:21:47 +00002070 /* 1 at the end plus 1 after every character */
2071 count = self_len+1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002072 if (maxcount < count)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002073 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002074
Thomas Wouters477c8d52006-05-27 19:21:47 +00002075 /* Check for overflow */
2076 /* result_len = count * to_len + self_len; */
2077 product = count * to_len;
2078 if (product / to_len != count) {
2079 PyErr_SetString(PyExc_OverflowError,
2080 "replace string is too long");
2081 return NULL;
2082 }
2083 result_len = product + self_len;
2084 if (result_len < 0) {
2085 PyErr_SetString(PyExc_OverflowError,
2086 "replace string is too long");
2087 return NULL;
2088 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002089
Thomas Wouters477c8d52006-05-27 19:21:47 +00002090 if (! (result = (PyStringObject *)
2091 PyString_FromStringAndSize(NULL, result_len)) )
2092 return NULL;
2093
2094 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002095 result_s = PyString_AS_STRING(result);
2096
2097 /* TODO: special case single character, which doesn't need memcpy */
2098
2099 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002100 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002101 result_s += to_len;
2102 count -= 1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002103
Thomas Wouters477c8d52006-05-27 19:21:47 +00002104 for (i=0; i<count; i++) {
2105 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002106 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002107 result_s += to_len;
2108 }
2109
2110 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002111 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002112
2113 return result;
2114}
2115
2116/* Special case for deleting a single character */
2117/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2118Py_LOCAL(PyStringObject *)
2119replace_delete_single_character(PyStringObject *self,
2120 char from_c, Py_ssize_t maxcount)
2121{
2122 char *self_s, *result_s;
2123 char *start, *next, *end;
2124 Py_ssize_t self_len, result_len;
2125 Py_ssize_t count;
2126 PyStringObject *result;
2127
2128 self_len = PyString_GET_SIZE(self);
2129 self_s = PyString_AS_STRING(self);
2130
2131 count = countchar(self_s, self_len, from_c, maxcount);
2132 if (count == 0) {
2133 return return_self(self);
2134 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002135
Thomas Wouters477c8d52006-05-27 19:21:47 +00002136 result_len = self_len - count; /* from_len == 1 */
2137 assert(result_len>=0);
2138
2139 if ( (result = (PyStringObject *)
2140 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2141 return NULL;
2142 result_s = PyString_AS_STRING(result);
2143
2144 start = self_s;
2145 end = self_s + self_len;
2146 while (count-- > 0) {
2147 next = findchar(start, end-start, from_c);
2148 if (next == NULL)
2149 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002150 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002151 result_s += (next-start);
2152 start = next+1;
2153 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002154 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002155
Thomas Wouters477c8d52006-05-27 19:21:47 +00002156 return result;
2157}
2158
2159/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2160
2161Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002162replace_delete_substring(PyStringObject *self,
2163 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002164 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002165 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002166 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002167 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002168 Py_ssize_t count, offset;
2169 PyStringObject *result;
2170
2171 self_len = PyString_GET_SIZE(self);
2172 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002173
2174 count = countstring(self_s, self_len,
2175 from_s, from_len,
2176 0, self_len, 1,
2177 maxcount);
2178
2179 if (count == 0) {
2180 /* no matches */
2181 return return_self(self);
2182 }
2183
2184 result_len = self_len - (count * from_len);
2185 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002186
Thomas Wouters477c8d52006-05-27 19:21:47 +00002187 if ( (result = (PyStringObject *)
2188 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2189 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002190
Thomas Wouters477c8d52006-05-27 19:21:47 +00002191 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002192
Thomas Wouters477c8d52006-05-27 19:21:47 +00002193 start = self_s;
2194 end = self_s + self_len;
2195 while (count-- > 0) {
2196 offset = findstring(start, end-start,
2197 from_s, from_len,
2198 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199 if (offset == -1)
2200 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002201 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002202
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002203 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002204
Thomas Wouters477c8d52006-05-27 19:21:47 +00002205 result_s += (next-start);
2206 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002208 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002209 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210}
2211
Thomas Wouters477c8d52006-05-27 19:21:47 +00002212/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2213Py_LOCAL(PyStringObject *)
2214replace_single_character_in_place(PyStringObject *self,
2215 char from_c, char to_c,
2216 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002218 char *self_s, *result_s, *start, *end, *next;
2219 Py_ssize_t self_len;
2220 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002221
Thomas Wouters477c8d52006-05-27 19:21:47 +00002222 /* The result string will be the same size */
2223 self_s = PyString_AS_STRING(self);
2224 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002225
Thomas Wouters477c8d52006-05-27 19:21:47 +00002226 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002227
Thomas Wouters477c8d52006-05-27 19:21:47 +00002228 if (next == NULL) {
2229 /* No matches; return the original string */
2230 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002232
Thomas Wouters477c8d52006-05-27 19:21:47 +00002233 /* Need to make a new string */
2234 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2235 if (result == NULL)
2236 return NULL;
2237 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002238 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002239
Thomas Wouters477c8d52006-05-27 19:21:47 +00002240 /* change everything in-place, starting with this one */
2241 start = result_s + (next-self_s);
2242 *start = to_c;
2243 start++;
2244 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002245
Thomas Wouters477c8d52006-05-27 19:21:47 +00002246 while (--maxcount > 0) {
2247 next = findchar(start, end-start, from_c);
2248 if (next == NULL)
2249 break;
2250 *next = to_c;
2251 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002252 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002253
Thomas Wouters477c8d52006-05-27 19:21:47 +00002254 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255}
2256
Thomas Wouters477c8d52006-05-27 19:21:47 +00002257/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2258Py_LOCAL(PyStringObject *)
2259replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002260 const char *from_s, Py_ssize_t from_len,
2261 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002262 Py_ssize_t maxcount)
2263{
2264 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002265 char *self_s;
2266 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002267 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002268
Thomas Wouters477c8d52006-05-27 19:21:47 +00002269 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002270
Thomas Wouters477c8d52006-05-27 19:21:47 +00002271 self_s = PyString_AS_STRING(self);
2272 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002273
Thomas Wouters477c8d52006-05-27 19:21:47 +00002274 offset = findstring(self_s, self_len,
2275 from_s, from_len,
2276 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002277 if (offset == -1) {
2278 /* No matches; return the original string */
2279 return return_self(self);
2280 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002281
Thomas Wouters477c8d52006-05-27 19:21:47 +00002282 /* Need to make a new string */
2283 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2284 if (result == NULL)
2285 return NULL;
2286 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002287 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002288
Thomas Wouters477c8d52006-05-27 19:21:47 +00002289 /* change everything in-place, starting with this one */
2290 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002291 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002292 start += from_len;
2293 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002294
Thomas Wouters477c8d52006-05-27 19:21:47 +00002295 while ( --maxcount > 0) {
2296 offset = findstring(start, end-start,
2297 from_s, from_len,
2298 0, end-start, FORWARD);
2299 if (offset==-1)
2300 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002301 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002302 start += offset+from_len;
2303 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002304
Thomas Wouters477c8d52006-05-27 19:21:47 +00002305 return result;
2306}
2307
2308/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2309Py_LOCAL(PyStringObject *)
2310replace_single_character(PyStringObject *self,
2311 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002312 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002313 Py_ssize_t maxcount)
2314{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002315 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002316 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002317 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002318 Py_ssize_t count, product;
2319 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002320
Thomas Wouters477c8d52006-05-27 19:21:47 +00002321 self_s = PyString_AS_STRING(self);
2322 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002323
Thomas Wouters477c8d52006-05-27 19:21:47 +00002324 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002325 if (count == 0) {
2326 /* no matches, return unchanged */
2327 return return_self(self);
2328 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002329
Thomas Wouters477c8d52006-05-27 19:21:47 +00002330 /* use the difference between current and new, hence the "-1" */
2331 /* result_len = self_len + count * (to_len-1) */
2332 product = count * (to_len-1);
2333 if (product / (to_len-1) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002334 PyErr_SetString(PyExc_OverflowError,
2335 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002336 return NULL;
2337 }
2338 result_len = self_len + product;
2339 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002340 PyErr_SetString(PyExc_OverflowError,
2341 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002342 return NULL;
2343 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002344
Thomas Wouters477c8d52006-05-27 19:21:47 +00002345 if ( (result = (PyStringObject *)
2346 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2347 return NULL;
2348 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002349
Thomas Wouters477c8d52006-05-27 19:21:47 +00002350 start = self_s;
2351 end = self_s + self_len;
2352 while (count-- > 0) {
2353 next = findchar(start, end-start, from_c);
Guido van Rossumae404e22007-10-26 21:46:44 +00002354 if (next == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002355 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002356
Thomas Wouters477c8d52006-05-27 19:21:47 +00002357 if (next == start) {
2358 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002359 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002360 result_s += to_len;
2361 start += 1;
2362 } else {
2363 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002364 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002365 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002366 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002367 result_s += to_len;
2368 start = next+1;
2369 }
2370 }
2371 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002372 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002373
Thomas Wouters477c8d52006-05-27 19:21:47 +00002374 return result;
2375}
2376
2377/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2378Py_LOCAL(PyStringObject *)
2379replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002380 const char *from_s, Py_ssize_t from_len,
2381 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002382 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002383 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002384 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002385 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002386 Py_ssize_t count, offset, product;
2387 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002388
Thomas Wouters477c8d52006-05-27 19:21:47 +00002389 self_s = PyString_AS_STRING(self);
2390 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002391
Thomas Wouters477c8d52006-05-27 19:21:47 +00002392 count = countstring(self_s, self_len,
2393 from_s, from_len,
2394 0, self_len, FORWARD, maxcount);
2395 if (count == 0) {
2396 /* no matches, return unchanged */
2397 return return_self(self);
2398 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002399
Thomas Wouters477c8d52006-05-27 19:21:47 +00002400 /* Check for overflow */
2401 /* result_len = self_len + count * (to_len-from_len) */
2402 product = count * (to_len-from_len);
2403 if (product / (to_len-from_len) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002404 PyErr_SetString(PyExc_OverflowError,
2405 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002406 return NULL;
2407 }
2408 result_len = self_len + product;
2409 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002410 PyErr_SetString(PyExc_OverflowError,
2411 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002412 return NULL;
2413 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002414
Thomas Wouters477c8d52006-05-27 19:21:47 +00002415 if ( (result = (PyStringObject *)
2416 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2417 return NULL;
2418 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002419
Thomas Wouters477c8d52006-05-27 19:21:47 +00002420 start = self_s;
2421 end = self_s + self_len;
2422 while (count-- > 0) {
2423 offset = findstring(start, end-start,
2424 from_s, from_len,
2425 0, end-start, FORWARD);
2426 if (offset == -1)
2427 break;
2428 next = start+offset;
2429 if (next == start) {
2430 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002431 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002432 result_s += to_len;
2433 start += from_len;
2434 } else {
2435 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002436 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002437 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002438 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002439 result_s += to_len;
2440 start = next+from_len;
2441 }
2442 }
2443 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002444 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002445
Thomas Wouters477c8d52006-05-27 19:21:47 +00002446 return result;
2447}
2448
2449
2450Py_LOCAL(PyStringObject *)
2451replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002452 const char *from_s, Py_ssize_t from_len,
2453 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002454 Py_ssize_t maxcount)
2455{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002456 if (maxcount < 0) {
2457 maxcount = PY_SSIZE_T_MAX;
2458 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2459 /* nothing to do; return the original string */
2460 return return_self(self);
2461 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002462
Thomas Wouters477c8d52006-05-27 19:21:47 +00002463 if (maxcount == 0 ||
2464 (from_len == 0 && to_len == 0)) {
2465 /* nothing to do; return the original string */
2466 return return_self(self);
2467 }
2468
2469 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002470
Thomas Wouters477c8d52006-05-27 19:21:47 +00002471 if (from_len == 0) {
2472 /* insert the 'to' string everywhere. */
2473 /* >>> "Python".replace("", ".") */
2474 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002475 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002476 }
2477
2478 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2479 /* point for an empty self string to generate a non-empty string */
2480 /* Special case so the remaining code always gets a non-empty string */
2481 if (PyString_GET_SIZE(self) == 0) {
2482 return return_self(self);
2483 }
2484
2485 if (to_len == 0) {
2486 /* delete all occurances of 'from' string */
2487 if (from_len == 1) {
2488 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002489 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002490 } else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002491 return replace_delete_substring(self, from_s,
2492 from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002493 }
2494 }
2495
2496 /* Handle special case where both strings have the same length */
2497
2498 if (from_len == to_len) {
2499 if (from_len == 1) {
2500 return replace_single_character_in_place(
2501 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002502 from_s[0],
2503 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002504 maxcount);
2505 } else {
2506 return replace_substring_in_place(
Guido van Rossum98297ee2007-11-06 21:34:58 +00002507 self, from_s, from_len, to_s, to_len,
2508 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002509 }
2510 }
2511
2512 /* Otherwise use the more generic algorithms */
2513 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002514 return replace_single_character(self, from_s[0],
2515 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002516 } else {
2517 /* len('from')>=2, len('to')>=1 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002518 return replace_substring(self, from_s, from_len, to_s, to_len,
2519 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002520 }
2521}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002522
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002523PyDoc_STRVAR(replace__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002524"B.replace(old, new[, count]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002525\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002526Return a copy of B with all occurrences of subsection\n\
Fred Draked22bb652003-10-22 02:56:40 +00002527old replaced by new. If the optional argument count is\n\
2528given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002529
2530static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002531string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002532{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002533 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002534 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002535 const char *from_s, *to_s;
2536 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537
Thomas Wouters477c8d52006-05-27 19:21:47 +00002538 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002539 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002540
Thomas Wouters477c8d52006-05-27 19:21:47 +00002541 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002542 from_s = PyString_AS_STRING(from);
2543 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002544 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002545 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002546 return NULL;
2547
Thomas Wouters477c8d52006-05-27 19:21:47 +00002548 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002549 to_s = PyString_AS_STRING(to);
2550 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002551 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002552 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002553 return NULL;
2554
Thomas Wouters477c8d52006-05-27 19:21:47 +00002555 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002556 from_s, from_len,
2557 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002558}
2559
Thomas Wouters477c8d52006-05-27 19:21:47 +00002560/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002561
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002562/* Matches the end (direction >= 0) or start (direction < 0) of self
2563 * against substr, using the start and end arguments. Returns
2564 * -1 on error, 0 if not found and 1 if found.
2565 */
2566Py_LOCAL(int)
2567_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2568 Py_ssize_t end, int direction)
2569{
2570 Py_ssize_t len = PyString_GET_SIZE(self);
2571 Py_ssize_t slen;
2572 const char* sub;
2573 const char* str;
2574
2575 if (PyString_Check(substr)) {
2576 sub = PyString_AS_STRING(substr);
2577 slen = PyString_GET_SIZE(substr);
2578 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002579 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2580 return -1;
2581 str = PyString_AS_STRING(self);
2582
2583 string_adjust_indices(&start, &end, len);
2584
2585 if (direction < 0) {
2586 /* startswith */
2587 if (start+slen > len)
2588 return 0;
2589 } else {
2590 /* endswith */
2591 if (end-start < slen || start > len)
2592 return 0;
2593
2594 if (end-slen > start)
2595 start = end - slen;
2596 }
2597 if (end-start >= slen)
2598 return ! memcmp(str+start, sub, slen);
2599 return 0;
2600}
2601
2602
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002603PyDoc_STRVAR(startswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002604"B.startswith(prefix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002605\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002606Return True if B starts with the specified prefix, False otherwise.\n\
2607With optional start, test B beginning at that position.\n\
2608With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002609prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002610
2611static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002612string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002613{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002614 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002615 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002616 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002617 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002618
Guido van Rossumc6821402000-05-08 14:08:05 +00002619 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2620 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002621 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002622 if (PyTuple_Check(subobj)) {
2623 Py_ssize_t i;
2624 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2625 result = _string_tailmatch(self,
2626 PyTuple_GET_ITEM(subobj, i),
2627 start, end, -1);
2628 if (result == -1)
2629 return NULL;
2630 else if (result) {
2631 Py_RETURN_TRUE;
2632 }
2633 }
2634 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002635 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002636 result = _string_tailmatch(self, subobj, start, end, -1);
2637 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002638 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002639 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002640 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002641}
2642
2643
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002644PyDoc_STRVAR(endswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002645"B.endswith(suffix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002646\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002647Return True if B ends with the specified suffix, False otherwise.\n\
2648With optional start, test B beginning at that position.\n\
2649With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002650suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002651
2652static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002653string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002654{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002655 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002656 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002657 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002658 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002659
Guido van Rossumc6821402000-05-08 14:08:05 +00002660 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2661 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002662 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002663 if (PyTuple_Check(subobj)) {
2664 Py_ssize_t i;
2665 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2666 result = _string_tailmatch(self,
2667 PyTuple_GET_ITEM(subobj, i),
2668 start, end, +1);
2669 if (result == -1)
2670 return NULL;
2671 else if (result) {
2672 Py_RETURN_TRUE;
2673 }
2674 }
2675 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002676 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002677 result = _string_tailmatch(self, subobj, start, end, +1);
2678 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002679 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002680 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002681 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002682}
2683
2684
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002685PyDoc_STRVAR(decode__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002686"B.decode([encoding[, errors]]) -> object\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002687\n\
2688Decodes S using the codec registered for encoding. encoding defaults\n\
2689to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002690handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2691a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002692as well as any other name registerd with codecs.register_error that is\n\
2693able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002694
2695static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00002696string_decode(PyObject *self, PyObject *args)
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002697{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002698 const char *encoding = NULL;
2699 const char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002700
Guido van Rossum98297ee2007-11-06 21:34:58 +00002701 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2702 return NULL;
2703 if (encoding == NULL)
2704 encoding = PyUnicode_GetDefaultEncoding();
2705 return PyCodec_Decode(self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002706}
2707
2708
Guido van Rossumae404e22007-10-26 21:46:44 +00002709PyDoc_STRVAR(fromhex_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002710"bytes.fromhex(string) -> bytes\n\
Guido van Rossumae404e22007-10-26 21:46:44 +00002711\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002712Create a bytes object from a string of hexadecimal numbers.\n\
2713Spaces between two numbers are accepted.\n\
2714Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Guido van Rossumae404e22007-10-26 21:46:44 +00002715
2716static int
2717hex_digit_to_int(Py_UNICODE c)
2718{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002719 if (c >= 128)
2720 return -1;
2721 if (ISDIGIT(c))
2722 return c - '0';
2723 else {
2724 if (ISUPPER(c))
2725 c = TOLOWER(c);
2726 if (c >= 'a' && c <= 'f')
2727 return c - 'a' + 10;
2728 }
2729 return -1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002730}
2731
2732static PyObject *
2733string_fromhex(PyObject *cls, PyObject *args)
2734{
2735 PyObject *newstring, *hexobj;
2736 char *buf;
2737 Py_UNICODE *hex;
2738 Py_ssize_t hexlen, byteslen, i, j;
2739 int top, bot;
2740
2741 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2742 return NULL;
2743 assert(PyUnicode_Check(hexobj));
2744 hexlen = PyUnicode_GET_SIZE(hexobj);
2745 hex = PyUnicode_AS_UNICODE(hexobj);
2746 byteslen = hexlen/2; /* This overestimates if there are spaces */
2747 newstring = PyString_FromStringAndSize(NULL, byteslen);
2748 if (!newstring)
2749 return NULL;
2750 buf = PyString_AS_STRING(newstring);
2751 for (i = j = 0; i < hexlen; i += 2) {
2752 /* skip over spaces in the input */
2753 while (hex[i] == ' ')
2754 i++;
2755 if (i >= hexlen)
2756 break;
2757 top = hex_digit_to_int(hex[i]);
2758 bot = hex_digit_to_int(hex[i+1]);
2759 if (top == -1 || bot == -1) {
2760 PyErr_Format(PyExc_ValueError,
2761 "non-hexadecimal number found in "
2762 "fromhex() arg at position %zd", i);
2763 goto error;
2764 }
2765 buf[j++] = (top << 4) + bot;
2766 }
2767 if (_PyString_Resize(&newstring, j) < 0)
2768 goto error;
2769 return newstring;
2770
2771 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00002772 Py_XDECREF(newstring);
Guido van Rossumae404e22007-10-26 21:46:44 +00002773 return NULL;
2774}
2775
2776
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002777static PyObject *
2778string_getnewargs(PyStringObject *v)
2779{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002780 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002781}
2782
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002783
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002784static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002785string_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00002786 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002787 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2788 _Py_capitalize__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002789 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002790 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002791 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002792 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002793 endswith__doc__},
2794 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2795 expandtabs__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002796 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002797 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2798 fromhex_doc},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002799 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002800 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2801 _Py_isalnum__doc__},
2802 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2803 _Py_isalpha__doc__},
2804 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2805 _Py_isdigit__doc__},
2806 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2807 _Py_islower__doc__},
2808 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2809 _Py_isspace__doc__},
2810 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2811 _Py_istitle__doc__},
2812 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2813 _Py_isupper__doc__},
2814 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2815 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2816 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002817 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002818 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002819 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2820 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2821 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002822 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00002823 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2824 rpartition__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002825 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2826 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2827 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2828 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2829 splitlines__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002830 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002831 startswith__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002832 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002833 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2834 _Py_swapcase__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002835 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002836 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2837 translate__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002838 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002839 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002840 {NULL, NULL} /* sentinel */
2841};
2842
Jeremy Hylton938ace62002-07-17 16:30:39 +00002843static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002844str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2845
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002846static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002847string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002848{
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002849 PyObject *x = NULL, *it;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002850 const char *encoding = NULL;
2851 const char *errors = NULL;
2852 PyObject *new = NULL;
2853 Py_ssize_t i, size;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002854 static char *kwlist[] = {"source", "encoding", "errors", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00002855
Guido van Rossumae960af2001-08-30 03:11:59 +00002856 if (type != &PyString_Type)
2857 return str_subtype_new(type, args, kwds);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002858 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002859 &encoding, &errors))
Tim Peters6d6c1a32001-08-02 04:15:00 +00002860 return NULL;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002861 if (x == NULL) {
2862 if (encoding != NULL || errors != NULL) {
2863 PyErr_SetString(PyExc_TypeError,
2864 "encoding or errors without sequence "
2865 "argument");
2866 return NULL;
2867 }
Tim Peters6d6c1a32001-08-02 04:15:00 +00002868 return PyString_FromString("");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002869 }
2870
2871 if (PyUnicode_Check(x)) {
2872 /* Encode via the codec registry */
2873 if (encoding == NULL) {
2874 PyErr_SetString(PyExc_TypeError,
2875 "string argument without an encoding");
2876 return NULL;
2877 }
2878 new = PyCodec_Encode(x, encoding, errors);
2879 if (new == NULL)
2880 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002881 assert(PyString_Check(new));
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002882 return new;
2883 }
2884
2885 /* If it's not unicode, there can't be encoding or errors */
2886 if (encoding != NULL || errors != NULL) {
2887 PyErr_SetString(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002888 "encoding or errors without a string argument");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002889 return NULL;
2890 }
2891
Guido van Rossum98297ee2007-11-06 21:34:58 +00002892 /* Is it an int? */
2893 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2894 if (size == -1 && PyErr_Occurred()) {
2895 PyErr_Clear();
2896 }
2897 else {
2898 if (size < 0) {
2899 PyErr_SetString(PyExc_ValueError, "negative count");
2900 return NULL;
2901 }
2902 new = PyString_FromStringAndSize(NULL, size);
2903 if (new == NULL) {
2904 return NULL;
2905 }
2906 if (size > 0) {
2907 memset(((PyStringObject*)new)->ob_sval, 0, size);
2908 }
2909 return new;
2910 }
2911
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002912 /* Use the modern buffer interface */
2913 if (PyObject_CheckBuffer(x)) {
2914 Py_buffer view;
2915 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2916 return NULL;
2917 new = PyString_FromStringAndSize(NULL, view.len);
2918 if (!new)
2919 goto fail;
2920 // XXX(brett.cannon): Better way to get to internal buffer?
2921 if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
2922 &view, view.len, 'C') < 0)
2923 goto fail;
2924 PyObject_ReleaseBuffer(x, &view);
2925 return new;
2926 fail:
2927 Py_XDECREF(new);
2928 PyObject_ReleaseBuffer(x, &view);
2929 return NULL;
2930 }
2931
Guido van Rossum98297ee2007-11-06 21:34:58 +00002932 /* For iterator version, create a string object and resize as needed */
2933 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2934 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2935 input being a truly long iterator. */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002936 size = 64;
2937 new = PyString_FromStringAndSize(NULL, size);
2938 if (new == NULL)
2939 return NULL;
2940
2941 /* XXX Optimize this if the arguments is a list, tuple */
2942
2943 /* Get the iterator */
2944 it = PyObject_GetIter(x);
2945 if (it == NULL)
2946 goto error;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002947
2948 /* Run the iterator to exhaustion */
2949 for (i = 0; ; i++) {
2950 PyObject *item;
2951 Py_ssize_t value;
2952
2953 /* Get the next item */
Georg Brandl5fb8eb92007-11-24 20:42:02 +00002954 item = PyIter_Next(it);
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002955 if (item == NULL) {
Georg Brandl5fb8eb92007-11-24 20:42:02 +00002956 if (PyErr_Occurred())
2957 goto error;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002958 break;
2959 }
2960
2961 /* Interpret it as an int (__index__) */
2962 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2963 Py_DECREF(item);
2964 if (value == -1 && PyErr_Occurred())
2965 goto error;
2966
2967 /* Range check */
2968 if (value < 0 || value >= 256) {
2969 PyErr_SetString(PyExc_ValueError,
2970 "bytes must be in range(0, 256)");
2971 goto error;
2972 }
2973
2974 /* Append the byte */
2975 if (i >= size) {
2976 size *= 2;
2977 if (_PyString_Resize(&new, size) < 0)
2978 goto error;
2979 }
2980 ((PyStringObject *)new)->ob_sval[i] = value;
2981 }
2982 _PyString_Resize(&new, i);
2983
2984 /* Clean up and return success */
2985 Py_DECREF(it);
2986 return new;
2987
2988 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00002989 /* Error handling when new != NULL */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002990 Py_XDECREF(it);
2991 Py_DECREF(new);
2992 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002993}
2994
Guido van Rossumae960af2001-08-30 03:11:59 +00002995static PyObject *
2996str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2997{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002998 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002999 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003000
3001 assert(PyType_IsSubtype(type, &PyString_Type));
3002 tmp = string_new(&PyString_Type, args, kwds);
3003 if (tmp == NULL)
3004 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003005 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003006 n = PyString_GET_SIZE(tmp);
3007 pnew = type->tp_alloc(type, n);
3008 if (pnew != NULL) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003009 Py_MEMCPY(PyString_AS_STRING(pnew),
3010 PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003011 ((PyStringObject *)pnew)->ob_shash =
3012 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003013 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003014 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003015 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003016}
3017
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003018PyDoc_STRVAR(string_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00003019"bytes(iterable_of_ints) -> bytes.\n\
3020bytes(string, encoding[, errors]) -> bytes\n\
3021bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
3022bytes(memory_view) -> bytes.\n\
Tim Peters6d6c1a32001-08-02 04:15:00 +00003023\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003024Construct an immutable array of bytes from:\n\
3025 - an iterable yielding integers in range(256)\n\
3026 - a text string encoded using the specified encoding\n\
3027 - a bytes or a buffer object\n\
3028 - any object implementing the buffer API.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003029
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003030static PyObject *str_iter(PyObject *seq);
3031
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003032PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003033 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum98297ee2007-11-06 21:34:58 +00003034 "bytes",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003035 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003036 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003037 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003038 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003039 0, /* tp_getattr */
3040 0, /* tp_setattr */
3041 0, /* tp_compare */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003042 (reprfunc)string_repr, /* tp_repr */
3043 0, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003044 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003045 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003046 (hashfunc)string_hash, /* tp_hash */
3047 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003048 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003049 PyObject_GenericGetAttr, /* tp_getattro */
3050 0, /* tp_setattro */
3051 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003052 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3053 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003054 string_doc, /* tp_doc */
3055 0, /* tp_traverse */
3056 0, /* tp_clear */
3057 (richcmpfunc)string_richcompare, /* tp_richcompare */
3058 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003059 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003060 0, /* tp_iternext */
3061 string_methods, /* tp_methods */
3062 0, /* tp_members */
3063 0, /* tp_getset */
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003064 &PyBaseObject_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003065 0, /* tp_dict */
3066 0, /* tp_descr_get */
3067 0, /* tp_descr_set */
3068 0, /* tp_dictoffset */
3069 0, /* tp_init */
3070 0, /* tp_alloc */
3071 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003072 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003073};
3074
3075void
Fred Drakeba096332000-07-09 07:04:36 +00003076PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003077{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003078 register PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003079 assert(pv != NULL);
Guido van Rossum013142a1994-08-30 08:19:36 +00003080 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003081 return;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003082 if (w == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003083 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003084 *pv = NULL;
3085 return;
3086 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00003087 v = string_concat(*pv, w);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003088 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003089 *pv = v;
3090}
3091
Guido van Rossum013142a1994-08-30 08:19:36 +00003092void
Fred Drakeba096332000-07-09 07:04:36 +00003093PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003094{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003095 PyString_Concat(pv, w);
3096 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003097}
3098
3099
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003100/* The following function breaks the notion that strings are immutable:
3101 it changes the size of a string. We get away with this only if there
3102 is only one module referencing the object. You can also think of it
3103 as creating a new string object and destroying the old one, only
3104 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003105 already be known to some other part of the code...
3106 Note that if there's not enough memory to resize the string, the original
3107 string object at *pv is deallocated, *pv is set to NULL, an "out of
3108 memory" exception is set, and -1 is returned. Else (on success) 0 is
3109 returned, and the value in *pv may or may not be the same as on input.
3110 As always, an extra byte is allocated for a trailing \0 byte (newsize
3111 does *not* include that), and a trailing \0 byte is stored.
3112*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003113
3114int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003115_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003116{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003117 register PyObject *v;
3118 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003119 v = *pv;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003120 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003121 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003122 Py_DECREF(v);
3123 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003124 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003125 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003126 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003127 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003128 _Py_ForgetReference(v);
3129 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003130 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003131 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003132 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003133 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003134 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003135 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003136 _Py_NewReference(*pv);
3137 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003138 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003139 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003140 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003141 return 0;
3142}
Guido van Rossume5372401993-03-16 12:15:04 +00003143
Tim Peters38fd5b62000-09-21 05:43:11 +00003144/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3145 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3146 * Python's regular ints.
3147 * Return value: a new PyString*, or NULL if error.
3148 * . *pbuf is set to point into it,
3149 * *plen set to the # of chars following that.
3150 * Caller must decref it when done using pbuf.
3151 * The string starting at *pbuf is of the form
3152 * "-"? ("0x" | "0X")? digit+
3153 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003154 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003155 * There will be at least prec digits, zero-filled on the left if
3156 * necessary to get that many.
3157 * val object to be converted
3158 * flags bitmask of format flags; only F_ALT is looked at
3159 * prec minimum number of digits; 0-fill on left if needed
3160 * type a character in [duoxX]; u acts the same as d
3161 *
3162 * CAUTION: o, x and X conversions on regular ints can never
3163 * produce a '-' sign, but can for Python's unbounded ints.
3164 */
3165PyObject*
3166_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3167 char **pbuf, int *plen)
3168{
3169 PyObject *result = NULL;
3170 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003171 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003172 int sign; /* 1 if '-', else 0 */
3173 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003174 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003175 int numdigits; /* len == numnondigits + numdigits */
3176 int numnondigits = 0;
3177
Guido van Rossumddefaf32007-01-14 03:31:43 +00003178 /* Avoid exceeding SSIZE_T_MAX */
3179 if (prec > PY_SSIZE_T_MAX-3) {
3180 PyErr_SetString(PyExc_OverflowError,
3181 "precision too large");
3182 return NULL;
3183 }
3184
Tim Peters38fd5b62000-09-21 05:43:11 +00003185 switch (type) {
3186 case 'd':
3187 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00003188 /* Special-case boolean: we want 0/1 */
3189 if (PyBool_Check(val))
3190 result = PyNumber_ToBase(val, 10);
3191 else
3192 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00003193 break;
3194 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003195 numnondigits = 2;
3196 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00003197 break;
3198 case 'x':
3199 case 'X':
3200 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003201 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00003202 break;
3203 default:
3204 assert(!"'type' not in [duoxX]");
3205 }
3206 if (!result)
3207 return NULL;
3208
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003209 buf = PyUnicode_AsString(result);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00003210 if (!buf) {
3211 Py_DECREF(result);
3212 return NULL;
3213 }
3214
Tim Peters38fd5b62000-09-21 05:43:11 +00003215 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003216 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003217 PyErr_BadInternalCall();
3218 return NULL;
3219 }
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003220 llen = PyUnicode_GetSize(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00003221 if (llen > INT_MAX) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003222 PyErr_SetString(PyExc_ValueError,
3223 "string too large in _PyString_FormatLong");
Martin v. Löwis725507b2006-03-07 12:08:51 +00003224 return NULL;
3225 }
3226 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003227 if (buf[len-1] == 'L') {
3228 --len;
3229 buf[len] = '\0';
3230 }
3231 sign = buf[0] == '-';
3232 numnondigits += sign;
3233 numdigits = len - numnondigits;
3234 assert(numdigits > 0);
3235
Tim Petersfff53252001-04-12 18:38:48 +00003236 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003237 if (((flags & F_ALT) == 0 &&
3238 (type == 'o' || type == 'x' || type == 'X'))) {
3239 assert(buf[sign] == '0');
3240 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
Guido van Rossum98297ee2007-11-06 21:34:58 +00003241 buf[sign+1] == 'o');
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003242 numnondigits -= 2;
3243 buf += 2;
3244 len -= 2;
3245 if (sign)
3246 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00003247 assert(len == numnondigits + numdigits);
3248 assert(numdigits > 0);
3249 }
3250
3251 /* Fill with leading zeroes to meet minimum width. */
3252 if (prec > numdigits) {
3253 PyObject *r1 = PyString_FromStringAndSize(NULL,
3254 numnondigits + prec);
3255 char *b1;
3256 if (!r1) {
3257 Py_DECREF(result);
3258 return NULL;
3259 }
3260 b1 = PyString_AS_STRING(r1);
3261 for (i = 0; i < numnondigits; ++i)
3262 *b1++ = *buf++;
3263 for (i = 0; i < prec - numdigits; i++)
3264 *b1++ = '0';
3265 for (i = 0; i < numdigits; i++)
3266 *b1++ = *buf++;
3267 *b1 = '\0';
3268 Py_DECREF(result);
3269 result = r1;
3270 buf = PyString_AS_STRING(result);
3271 len = numnondigits + prec;
3272 }
3273
3274 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003275 if (type == 'X') {
3276 /* Need to convert all lower case letters to upper case.
3277 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003278 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003279 if (buf[i] >= 'a' && buf[i] <= 'x')
3280 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003281 }
3282 *pbuf = buf;
3283 *plen = len;
3284 return result;
3285}
3286
Guido van Rossum8cf04761997-08-02 02:57:45 +00003287void
Fred Drakeba096332000-07-09 07:04:36 +00003288PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003289{
3290 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003291 for (i = 0; i < UCHAR_MAX + 1; i++) {
3292 Py_XDECREF(characters[i]);
3293 characters[i] = NULL;
3294 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003295 Py_XDECREF(nullstring);
3296 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003297}
Barry Warsawa903ad982001-02-23 16:40:48 +00003298
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003299/*********************** Str Iterator ****************************/
3300
3301typedef struct {
3302 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00003303 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003304 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
3305} striterobject;
3306
3307static void
3308striter_dealloc(striterobject *it)
3309{
3310 _PyObject_GC_UNTRACK(it);
3311 Py_XDECREF(it->it_seq);
3312 PyObject_GC_Del(it);
3313}
3314
3315static int
3316striter_traverse(striterobject *it, visitproc visit, void *arg)
3317{
3318 Py_VISIT(it->it_seq);
3319 return 0;
3320}
3321
3322static PyObject *
3323striter_next(striterobject *it)
3324{
3325 PyStringObject *seq;
3326 PyObject *item;
3327
3328 assert(it != NULL);
3329 seq = it->it_seq;
3330 if (seq == NULL)
3331 return NULL;
3332 assert(PyString_Check(seq));
3333
3334 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +00003335 item = PyInt_FromLong(
3336 (unsigned char)seq->ob_sval[it->it_index]);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003337 if (item != NULL)
3338 ++it->it_index;
3339 return item;
3340 }
3341
3342 Py_DECREF(seq);
3343 it->it_seq = NULL;
3344 return NULL;
3345}
3346
3347static PyObject *
3348striter_len(striterobject *it)
3349{
3350 Py_ssize_t len = 0;
3351 if (it->it_seq)
3352 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
3353 return PyInt_FromSsize_t(len);
3354}
3355
Guido van Rossum49d6b072006-08-17 21:11:47 +00003356PyDoc_STRVAR(length_hint_doc,
3357 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003358
3359static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00003360 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3361 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003362 {NULL, NULL} /* sentinel */
3363};
3364
3365PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003366 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum254348e2007-11-21 19:29:53 +00003367 "bytes_iterator", /* tp_name */
Guido van Rossum49d6b072006-08-17 21:11:47 +00003368 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003369 0, /* tp_itemsize */
3370 /* methods */
3371 (destructor)striter_dealloc, /* tp_dealloc */
3372 0, /* tp_print */
3373 0, /* tp_getattr */
3374 0, /* tp_setattr */
3375 0, /* tp_compare */
3376 0, /* tp_repr */
3377 0, /* tp_as_number */
3378 0, /* tp_as_sequence */
3379 0, /* tp_as_mapping */
3380 0, /* tp_hash */
3381 0, /* tp_call */
3382 0, /* tp_str */
3383 PyObject_GenericGetAttr, /* tp_getattro */
3384 0, /* tp_setattro */
3385 0, /* tp_as_buffer */
3386 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3387 0, /* tp_doc */
3388 (traverseproc)striter_traverse, /* tp_traverse */
3389 0, /* tp_clear */
3390 0, /* tp_richcompare */
3391 0, /* tp_weaklistoffset */
3392 PyObject_SelfIter, /* tp_iter */
3393 (iternextfunc)striter_next, /* tp_iternext */
3394 striter_methods, /* tp_methods */
3395 0,
3396};
3397
3398static PyObject *
3399str_iter(PyObject *seq)
3400{
3401 striterobject *it;
3402
3403 if (!PyString_Check(seq)) {
3404 PyErr_BadInternalCall();
3405 return NULL;
3406 }
3407 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
3408 if (it == NULL)
3409 return NULL;
3410 it->it_index = 0;
3411 Py_INCREF(seq);
3412 it->it_seq = (PyStringObject *)seq;
3413 _PyObject_GC_TRACK(it);
3414 return (PyObject *)it;
3415}