blob: 0125a99c7de0be251cb96cccfc65a591f21b821b [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossum98297ee2007-11-06 21:34:58 +00003/* XXX This is now called 'bytes' as far as the user is concerned.
4 Many docstrings and error messages need to be cleaned up. */
5
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00006#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00008#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000011
Guido van Rossum98297ee2007-11-06 21:34:58 +000012static Py_ssize_t
13_getbuffer(PyObject *obj, Py_buffer *view)
14{
Christian Heimes90aa7642007-12-19 02:45:37 +000015 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossum98297ee2007-11-06 21:34:58 +000016
17 if (buffer == NULL || buffer->bf_getbuffer == NULL)
18 {
19 PyErr_Format(PyExc_TypeError,
20 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000021 Py_TYPE(obj)->tp_name);
Guido van Rossum98297ee2007-11-06 21:34:58 +000022 return -1;
23 }
24
25 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
26 return -1;
27 return view->len;
28}
29
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000030#ifdef COUNT_ALLOCS
31int null_strings, one_strings;
32#endif
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000035static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036
37/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000038 For both PyString_FromString() and PyString_FromStringAndSize(), the
39 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000040 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000041
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000042 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000043 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 For PyString_FromStringAndSize(), the parameter the parameter `str' is
46 either NULL or else points to a string containing at least `size' bytes.
47 For PyString_FromStringAndSize(), the string in the `str' parameter does
48 not have to be null-terminated. (Therefore it is safe to construct a
49 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
50 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
51 bytes (setting the last byte to the null terminating character) and you can
52 fill in the data yourself. If `str' is non-NULL then the resulting
53 PyString object must be treated as immutable and you must not fill in nor
54 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000055
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000056 The PyObject member `op->ob_size', which denotes the number of "extra
57 items" in a variable-size object, will contain the number of bytes
58 allocated for string data, not counting the null terminating character. It
59 is therefore equal to the equal to the `size' parameter (for
60 PyString_FromStringAndSize()) or the length of the string in the `str'
61 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000064PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065{
Tim Peters9e897f42001-05-09 07:37:07 +000066 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000067 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 if (size == 0 && (op = nullstring) != NULL) {
69#ifdef COUNT_ALLOCS
70 null_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 if (size == 1 && str != NULL &&
76 (op = characters[*str & UCHAR_MAX]) != NULL)
77 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078#ifdef COUNT_ALLOCS
79 one_strings++;
80#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 Py_INCREF(op);
82 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000084
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000085 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000086 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000089 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 op->ob_shash = -1;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000091 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000092 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000094 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 if (size == 0) {
96 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 } else if (size == 1 && str != NULL) {
99 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103}
104
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000106PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Tim Peters62de65b2001-12-06 20:29:32 +0000108 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000109 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000110
111 assert(str != NULL);
112 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000113 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000114 PyErr_SetString(PyExc_OverflowError,
115 "string is too long for a Python string");
116 return NULL;
117 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 if (size == 0 && (op = nullstring) != NULL) {
119#ifdef COUNT_ALLOCS
120 null_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
125 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
126#ifdef COUNT_ALLOCS
127 one_strings++;
128#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
130 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000133 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000134 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000140 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
145 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000149}
150
Barry Warsawdadace02001-08-24 18:32:06 +0000151PyObject *
152PyString_FromFormatV(const char *format, va_list vargs)
153{
Tim Petersc15c4f12001-10-02 21:32:07 +0000154 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000155 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000156 const char* f;
157 char *s;
158 PyObject* string;
159
Tim Petersc15c4f12001-10-02 21:32:07 +0000160#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000161 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000163#ifdef __va_copy
164 __va_copy(count, vargs);
165#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000166 count = vargs;
167#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000168#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000169 /* step 1: figure out how large a buffer we need */
170 for (f = format; *f; f++) {
171 if (*f == '%') {
172 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000173 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000174 ;
175
Thomas Wouters477c8d52006-05-27 19:21:47 +0000176 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
177 * they don't affect the amount of space we reserve.
178 */
179 if ((*f == 'l' || *f == 'z') &&
180 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000181 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000182
Barry Warsawdadace02001-08-24 18:32:06 +0000183 switch (*f) {
184 case 'c':
185 (void)va_arg(count, int);
186 /* fall through... */
187 case '%':
188 n++;
189 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000191 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000192 /* 20 bytes is enough to hold a 64-bit
193 integer. Decimal takes the most space.
194 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000195 n += 20;
196 break;
197 case 's':
198 s = va_arg(count, char*);
199 n += strlen(s);
200 break;
201 case 'p':
202 (void) va_arg(count, int);
203 /* maximum 64-bit pointer representation:
204 * 0xffffffffffffffff
205 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000206 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000207 */
208 n += 19;
209 break;
210 default:
211 /* if we stumble upon an unknown
212 formatting code, copy the rest of
213 the format string to the output
214 string. (we cannot just skip the
215 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000216 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000217 n += strlen(p);
218 goto expand;
219 }
220 } else
221 n++;
222 }
223 expand:
224 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000225 /* Since we've analyzed how much space we need for the worst case,
226 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000227 string = PyString_FromStringAndSize(NULL, n);
228 if (!string)
229 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000230
Barry Warsawdadace02001-08-24 18:32:06 +0000231 s = PyString_AsString(string);
232
233 for (f = format; *f; f++) {
234 if (*f == '%') {
235 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000236 Py_ssize_t i;
237 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000238 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000239 /* parse the width.precision part (we're only
240 interested in the precision value, if any) */
241 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000242 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000243 n = (n*10) + *f++ - '0';
244 if (*f == '.') {
245 f++;
246 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000247 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000248 n = (n*10) + *f++ - '0';
249 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000250 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000251 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000252 /* handle the long flag, but only for %ld and %lu.
253 others can be added when necessary. */
254 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000255 longflag = 1;
256 ++f;
257 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000258 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000259 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000260 size_tflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000271 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000272 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
273 va_arg(vargs, Py_ssize_t));
274 else
275 sprintf(s, "%d", va_arg(vargs, int));
276 s += strlen(s);
277 break;
278 case 'u':
279 if (longflag)
280 sprintf(s, "%lu",
281 va_arg(vargs, unsigned long));
282 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000283 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
284 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000285 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000286 sprintf(s, "%u",
287 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 s += strlen(s);
289 break;
290 case 'i':
291 sprintf(s, "%i", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'x':
295 sprintf(s, "%x", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 's':
299 p = va_arg(vargs, char*);
300 i = strlen(p);
301 if (n > 0 && i > n)
302 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000303 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000304 s += i;
305 break;
306 case 'p':
307 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000308 /* %p is ill-defined: ensure leading 0x. */
309 if (s[1] == 'X')
310 s[1] = 'x';
311 else if (s[1] != 'x') {
312 memmove(s+2, s, strlen(s)+1);
313 s[0] = '0';
314 s[1] = 'x';
315 }
Barry Warsawdadace02001-08-24 18:32:06 +0000316 s += strlen(s);
317 break;
318 case '%':
319 *s++ = '%';
320 break;
321 default:
322 strcpy(s, p);
323 s += strlen(s);
324 goto end;
325 }
326 } else
327 *s++ = *f;
328 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000329
Barry Warsawdadace02001-08-24 18:32:06 +0000330 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000332 return string;
333}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000334
Barry Warsawdadace02001-08-24 18:32:06 +0000335PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000336PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000337{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000338 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000339 va_list vargs;
340
341#ifdef HAVE_STDARG_PROTOTYPES
342 va_start(vargs, format);
343#else
344 va_start(vargs);
345#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 ret = PyString_FromFormatV(format, vargs);
347 va_end(vargs);
348 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000349}
350
Guido van Rossum234f9421993-06-17 12:35:49 +0000351static void
Fred Drakeba096332000-07-09 07:04:36 +0000352string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000353{
Christian Heimes90aa7642007-12-19 02:45:37 +0000354 Py_TYPE(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000355}
356
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000357/* Unescape a backslash-escaped string. If unicode is non-zero,
358 the string is a u-literal. If recode_encoding is non-zero,
359 the string is UTF-8 encoded and should be re-encoded in the
360 specified encoding. */
361
362PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000363 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000364 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000365 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000366 const char *recode_encoding)
367{
368 int c;
369 char *p, *buf;
370 const char *end;
371 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000372 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000373 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000374 if (v == NULL)
375 return NULL;
376 p = buf = PyString_AsString(v);
377 end = s + len;
378 while (s < end) {
379 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000380 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000381 if (recode_encoding && (*s & 0x80)) {
382 PyObject *u, *w;
383 char *r;
384 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000385 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000386 t = s;
387 /* Decode non-ASCII bytes as UTF-8. */
388 while (t < end && (*t & 0x80)) t++;
389 u = PyUnicode_DecodeUTF8(s, t - s, errors);
390 if(!u) goto failed;
391
392 /* Recode them in target encoding. */
393 w = PyUnicode_AsEncodedString(
394 u, recode_encoding, errors);
395 Py_DECREF(u);
396 if (!w) goto failed;
397
398 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000399 assert(PyString_Check(w));
400 r = PyString_AS_STRING(w);
401 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000402 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000403 p += rn;
404 Py_DECREF(w);
405 s = t;
406 } else {
407 *p++ = *s++;
408 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000409 continue;
410 }
411 s++;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000412 if (s==end) {
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000413 PyErr_SetString(PyExc_ValueError,
414 "Trailing \\ in string");
415 goto failed;
416 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000417 switch (*s++) {
418 /* XXX This assumes ASCII! */
419 case '\n': break;
420 case '\\': *p++ = '\\'; break;
421 case '\'': *p++ = '\''; break;
422 case '\"': *p++ = '\"'; break;
423 case 'b': *p++ = '\b'; break;
424 case 'f': *p++ = '\014'; break; /* FF */
425 case 't': *p++ = '\t'; break;
426 case 'n': *p++ = '\n'; break;
427 case 'r': *p++ = '\r'; break;
428 case 'v': *p++ = '\013'; break; /* VT */
429 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
430 case '0': case '1': case '2': case '3':
431 case '4': case '5': case '6': case '7':
432 c = s[-1] - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000433 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000434 c = (c<<3) + *s++ - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000435 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000436 c = (c<<3) + *s++ - '0';
437 }
438 *p++ = c;
439 break;
440 case 'x':
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000441 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000442 unsigned int x = 0;
443 c = Py_CHARMASK(*s);
444 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000445 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000446 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000447 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000448 x = 10 + c - 'a';
449 else
450 x = 10 + c - 'A';
451 x = x << 4;
452 c = Py_CHARMASK(*s);
453 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000454 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000455 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000456 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000457 x += 10 + c - 'a';
458 else
459 x += 10 + c - 'A';
460 *p++ = x;
461 break;
462 }
463 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000464 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000465 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000466 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000467 }
468 if (strcmp(errors, "replace") == 0) {
469 *p++ = '?';
470 } else if (strcmp(errors, "ignore") == 0)
471 /* do nothing */;
472 else {
473 PyErr_Format(PyExc_ValueError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000474 "decoding error; unknown "
475 "error handling code: %.400s",
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000476 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000477 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000478 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000479 default:
480 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000481 s--;
482 goto non_esc; /* an arbitry number of unescaped
483 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000484 }
485 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000486 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000487 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000488 return v;
489 failed:
490 Py_DECREF(v);
491 return NULL;
492}
493
Thomas Wouters477c8d52006-05-27 19:21:47 +0000494/* -------------------------------------------------------------------- */
495/* object api */
496
Martin v. Löwis18e16552006-02-15 17:27:45 +0000497static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000498string_getsize(register PyObject *op)
499{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000500 char *s;
501 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000502 if (PyString_AsStringAndSize(op, &s, &len))
503 return -1;
504 return len;
505}
506
Martin v. Löwis18e16552006-02-15 17:27:45 +0000507Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000508PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000509{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000510 if (!PyString_Check(op))
511 return string_getsize(op);
Christian Heimes90aa7642007-12-19 02:45:37 +0000512 return Py_SIZE(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000513}
514
515/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000516PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000517{
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000518 if (!PyString_Check(op)) {
519 PyErr_Format(PyExc_TypeError,
Christian Heimes90aa7642007-12-19 02:45:37 +0000520 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000521 return NULL;
522 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000523 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000524}
525
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000526int
527PyString_AsStringAndSize(register PyObject *obj,
528 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000529 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000530{
531 if (s == NULL) {
532 PyErr_BadInternalCall();
533 return -1;
534 }
535
536 if (!PyString_Check(obj)) {
Christian Heimesf3863112007-11-22 07:46:41 +0000537 PyErr_Format(PyExc_TypeError,
Christian Heimes90aa7642007-12-19 02:45:37 +0000538 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
Christian Heimesf3863112007-11-22 07:46:41 +0000539 return -1;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000540 }
541
542 *s = PyString_AS_STRING(obj);
543 if (len != NULL)
544 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000545 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000546 PyErr_SetString(PyExc_TypeError,
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000547 "expected bytes with no null");
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000548 return -1;
549 }
550 return 0;
551}
552
Thomas Wouters477c8d52006-05-27 19:21:47 +0000553/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000554/* Methods */
555
Thomas Wouters477c8d52006-05-27 19:21:47 +0000556#define STRINGLIB_CHAR char
557
558#define STRINGLIB_CMP memcmp
559#define STRINGLIB_LEN PyString_GET_SIZE
560#define STRINGLIB_NEW PyString_FromStringAndSize
561#define STRINGLIB_STR PyString_AS_STRING
Guido van Rossum98297ee2007-11-06 21:34:58 +0000562/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000563
564#define STRINGLIB_EMPTY nullstring
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000565#define STRINGLIB_CHECK_EXACT PyString_CheckExact
566#define STRINGLIB_MUTABLE 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000567
568#include "stringlib/fastsearch.h"
569
570#include "stringlib/count.h"
571#include "stringlib/find.h"
572#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000573#include "stringlib/ctype.h"
574#include "stringlib/transmogrify.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000575
576
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577PyObject *
578PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000580 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000581 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes90aa7642007-12-19 02:45:37 +0000582 Py_ssize_t length = Py_SIZE(op);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000583 size_t newsize = 3 + 4 * length;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000584 PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000585 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000586 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000587 "bytes object is too large to make repr");
Guido van Rossum58da9312007-11-10 23:39:45 +0000588 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000589 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000590 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000591 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000592 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593 }
594 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000595 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000596 register Py_UNICODE c;
597 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000598 int quote;
599
Guido van Rossum98297ee2007-11-06 21:34:58 +0000600 /* Figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000601 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000602 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000603 char *test, *start;
604 start = PyString_AS_STRING(op);
605 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000606 if (*test == '"') {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000607 quote = '\''; /* back to single */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000608 goto decided;
609 }
610 else if (*test == '\'')
611 quote = '"';
612 }
613 decided:
614 ;
615 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000616
Guido van Rossum98297ee2007-11-06 21:34:58 +0000617 *p++ = 'b', *p++ = quote;
618 for (i = 0; i < length; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000619 /* There's at least enough room for a hex escape
620 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000621 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000622 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000623 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000625 else if (c == '\t')
626 *p++ = '\\', *p++ = 't';
627 else if (c == '\n')
628 *p++ = '\\', *p++ = 'n';
629 else if (c == '\r')
630 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000631 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000632 *p++ = '\\';
633 *p++ = 'x';
634 *p++ = hexdigits[(c & 0xf0) >> 4];
635 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000636 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000637 else
638 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000640 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000641 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000642 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000643 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
644 Py_DECREF(v);
645 return NULL;
646 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000647 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000648 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000649}
650
Guido van Rossum189f1df2001-05-01 16:51:53 +0000651static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652string_repr(PyObject *op)
653{
654 return PyString_Repr(op, 1);
655}
656
657static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000658string_str(PyObject *op)
Guido van Rossum189f1df2001-05-01 16:51:53 +0000659{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000660 if (Py_BytesWarningFlag) {
661 if (PyErr_WarnEx(PyExc_BytesWarning,
662 "str() on a bytes instance", 1))
663 return NULL;
Tim Petersc9933152001-10-16 20:18:24 +0000664 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000665 return string_repr(op);
Guido van Rossum189f1df2001-05-01 16:51:53 +0000666}
667
Martin v. Löwis18e16552006-02-15 17:27:45 +0000668static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000669string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000670{
Christian Heimes90aa7642007-12-19 02:45:37 +0000671 return Py_SIZE(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000672}
673
Guido van Rossum98297ee2007-11-06 21:34:58 +0000674/* This is also used by PyString_Concat() */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000675static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000676string_concat(PyObject *a, PyObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000677{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000678 Py_ssize_t size;
679 Py_buffer va, vb;
680 PyObject *result = NULL;
681
682 va.len = -1;
683 vb.len = -1;
684 if (_getbuffer(a, &va) < 0 ||
685 _getbuffer(b, &vb) < 0) {
686 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Christian Heimes90aa7642007-12-19 02:45:37 +0000687 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000688 goto done;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000689 }
Guido van Rossumae404e22007-10-26 21:46:44 +0000690
Guido van Rossum98297ee2007-11-06 21:34:58 +0000691 /* Optimize end cases */
692 if (va.len == 0 && PyString_CheckExact(b)) {
693 result = b;
694 Py_INCREF(result);
695 goto done;
696 }
697 if (vb.len == 0 && PyString_CheckExact(a)) {
698 result = a;
699 Py_INCREF(result);
700 goto done;
701 }
702
703 size = va.len + vb.len;
704 if (size < 0) {
705 PyErr_NoMemory();
706 goto done;
707 }
708
709 result = PyString_FromStringAndSize(NULL, size);
710 if (result != NULL) {
711 memcpy(PyString_AS_STRING(result), va.buf, va.len);
712 memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
713 }
714
715 done:
716 if (va.len != -1)
717 PyObject_ReleaseBuffer(a, &va);
718 if (vb.len != -1)
719 PyObject_ReleaseBuffer(b, &vb);
720 return result;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000723static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000724string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000726 register Py_ssize_t i;
727 register Py_ssize_t j;
728 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000730 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731 if (n < 0)
732 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000733 /* watch out for overflows: the size can overflow int,
734 * and the # of bytes needed can overflow size_t
735 */
Christian Heimes90aa7642007-12-19 02:45:37 +0000736 size = Py_SIZE(a) * n;
737 if (n && size / n != Py_SIZE(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000738 PyErr_SetString(PyExc_OverflowError,
739 "repeated string is too long");
740 return NULL;
741 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000742 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000743 Py_INCREF(a);
744 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000745 }
Tim Peterse7c05322004-06-27 17:24:49 +0000746 nbytes = (size_t)size;
747 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000748 PyErr_SetString(PyExc_OverflowError,
749 "repeated string is too long");
750 return NULL;
751 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000752 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000753 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000754 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000755 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000756 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000757 op->ob_shash = -1;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000758 op->ob_sval[size] = '\0';
Christian Heimes90aa7642007-12-19 02:45:37 +0000759 if (Py_SIZE(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000760 memset(op->ob_sval, a->ob_sval[0] , n);
761 return (PyObject *) op;
762 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000763 i = 0;
764 if (i < size) {
Christian Heimes90aa7642007-12-19 02:45:37 +0000765 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
766 i = Py_SIZE(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000767 }
768 while (i < size) {
769 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000770 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000771 i += j;
772 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000773 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000774}
775
Guido van Rossum9284a572000-03-07 15:53:43 +0000776static int
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777string_contains(PyObject *self, PyObject *arg)
Guido van Rossum9284a572000-03-07 15:53:43 +0000778{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000779 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
780 if (ival == -1 && PyErr_Occurred()) {
781 Py_buffer varg;
782 int pos;
783 PyErr_Clear();
784 if (_getbuffer(arg, &varg) < 0)
785 return -1;
Christian Heimes90aa7642007-12-19 02:45:37 +0000786 pos = stringlib_find(PyString_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000787 varg.buf, varg.len, 0);
788 PyObject_ReleaseBuffer(arg, &varg);
789 return pos >= 0;
790 }
791 if (ival < 0 || ival >= 256) {
792 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
793 return -1;
794 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000795
Christian Heimes90aa7642007-12-19 02:45:37 +0000796 return memchr(PyString_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000797}
798
799static PyObject *
800string_item(PyStringObject *a, register Py_ssize_t i)
801{
Christian Heimes90aa7642007-12-19 02:45:37 +0000802 if (i < 0 || i >= Py_SIZE(a)) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000803 PyErr_SetString(PyExc_IndexError, "string index out of range");
804 return NULL;
805 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000806 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Guido van Rossum9284a572000-03-07 15:53:43 +0000807}
808
Martin v. Löwiscd353062001-05-24 16:56:35 +0000809static PyObject*
810string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000812 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000813 Py_ssize_t len_a, len_b;
814 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +0000815 PyObject *result;
816
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000817 /* Make sure both arguments are strings. */
818 if (!(PyString_Check(a) && PyString_Check(b))) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000819 if (Py_BytesWarningFlag && (op == Py_EQ) &&
820 (PyObject_IsInstance((PyObject*)a,
821 (PyObject*)&PyUnicode_Type) ||
822 PyObject_IsInstance((PyObject*)b,
823 (PyObject*)&PyUnicode_Type))) {
824 if (PyErr_WarnEx(PyExc_BytesWarning,
825 "Comparsion between bytes and string", 1))
826 return NULL;
827 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000828 result = Py_NotImplemented;
829 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000830 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000831 if (a == b) {
832 switch (op) {
833 case Py_EQ:case Py_LE:case Py_GE:
834 result = Py_True;
835 goto out;
836 case Py_NE:case Py_LT:case Py_GT:
837 result = Py_False;
838 goto out;
839 }
840 }
841 if (op == Py_EQ) {
842 /* Supporting Py_NE here as well does not save
843 much time, since Py_NE is rarely used. */
Christian Heimes90aa7642007-12-19 02:45:37 +0000844 if (Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +0000845 && (a->ob_sval[0] == b->ob_sval[0]
Christian Heimes90aa7642007-12-19 02:45:37 +0000846 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000847 result = Py_True;
848 } else {
849 result = Py_False;
850 }
851 goto out;
852 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000853 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +0000854 min_len = (len_a < len_b) ? len_a : len_b;
855 if (min_len > 0) {
856 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
857 if (c==0)
858 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +0000859 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +0000860 c = 0;
861 if (c == 0)
862 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
863 switch (op) {
864 case Py_LT: c = c < 0; break;
865 case Py_LE: c = c <= 0; break;
866 case Py_EQ: assert(0); break; /* unreachable */
867 case Py_NE: c = c != 0; break;
868 case Py_GT: c = c > 0; break;
869 case Py_GE: c = c >= 0; break;
870 default:
871 result = Py_NotImplemented;
872 goto out;
873 }
874 result = c ? Py_True : Py_False;
875 out:
876 Py_INCREF(result);
877 return result;
878}
879
Guido van Rossum9bfef441993-03-29 10:43:31 +0000880static long
Fred Drakeba096332000-07-09 07:04:36 +0000881string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000882{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000883 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000884 register unsigned char *p;
885 register long x;
886
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000887 if (a->ob_shash != -1)
888 return a->ob_shash;
Christian Heimes90aa7642007-12-19 02:45:37 +0000889 len = Py_SIZE(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000890 p = (unsigned char *) a->ob_sval;
891 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000892 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000893 x = (1000003*x) ^ *p++;
Christian Heimes90aa7642007-12-19 02:45:37 +0000894 x ^= Py_SIZE(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +0000895 if (x == -1)
896 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000897 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000898 return x;
899}
900
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000901static PyObject*
902string_subscript(PyStringObject* self, PyObject* item)
903{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000904 if (PyIndex_Check(item)) {
905 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000906 if (i == -1 && PyErr_Occurred())
907 return NULL;
908 if (i < 0)
909 i += PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000910 if (i < 0 || i >= PyString_GET_SIZE(self)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +0000911 PyErr_SetString(PyExc_IndexError,
912 "string index out of range");
913 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000914 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000915 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000916 }
917 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000918 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000919 char* source_buf;
920 char* result_buf;
921 PyObject* result;
922
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000923 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000924 PyString_GET_SIZE(self),
925 &start, &stop, &step, &slicelength) < 0) {
926 return NULL;
927 }
928
929 if (slicelength <= 0) {
930 return PyString_FromStringAndSize("", 0);
931 }
Thomas Woutersed03b412007-08-28 21:37:11 +0000932 else if (start == 0 && step == 1 &&
933 slicelength == PyString_GET_SIZE(self) &&
934 PyString_CheckExact(self)) {
935 Py_INCREF(self);
936 return (PyObject *)self;
937 }
938 else if (step == 1) {
939 return PyString_FromStringAndSize(
940 PyString_AS_STRING(self) + start,
941 slicelength);
942 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000943 else {
944 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000945 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +0000946 if (result_buf == NULL)
947 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000948
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000949 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000950 cur += step, i++) {
951 result_buf[i] = source_buf[cur];
952 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000953
954 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000955 slicelength);
956 PyMem_Free(result_buf);
957 return result;
958 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000959 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000960 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000961 PyErr_Format(PyExc_TypeError,
962 "string indices must be integers, not %.200s",
Christian Heimes90aa7642007-12-19 02:45:37 +0000963 Py_TYPE(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000964 return NULL;
965 }
966}
967
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000968static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000969string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000970{
Christian Heimes90aa7642007-12-19 02:45:37 +0000971 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000972 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +0000973}
974
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000975static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000976 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000977 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +0000978 (ssizeargfunc)string_repeat, /*sq_repeat*/
Guido van Rossum98297ee2007-11-06 21:34:58 +0000979 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +0000980 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000981 0, /*sq_ass_item*/
982 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000983 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984};
985
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000986static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000987 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000988 (binaryfunc)string_subscript,
989 0,
990};
991
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000992static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000993 (getbufferproc)string_buffer_getbuffer,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000994 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000995};
996
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000997
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000998#define LEFTSTRIP 0
999#define RIGHTSTRIP 1
1000#define BOTHSTRIP 2
1001
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001002/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001003static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1004
1005#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001006
Thomas Wouters477c8d52006-05-27 19:21:47 +00001007
1008/* Don't call if length < 2 */
1009#define Py_STRING_MATCH(target, offset, pattern, length) \
1010 (target[offset] == pattern[0] && \
1011 target[offset+length-1] == pattern[length-1] && \
1012 !memcmp(target+offset+1, pattern+1, length-2) )
1013
1014
1015/* Overallocate the initial list to reduce the number of reallocs for small
1016 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1017 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1018 text (roughly 11 words per line) and field delimited data (usually 1-10
1019 fields). For large strings the split algorithms are bandwidth limited
1020 so increasing the preallocation likely will not improve things.*/
1021
1022#define MAX_PREALLOC 12
1023
1024/* 5 splits gives 6 elements */
1025#define PREALLOC_SIZE(maxsplit) \
1026 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1027
Thomas Wouters477c8d52006-05-27 19:21:47 +00001028#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001029 str = PyString_FromStringAndSize((data) + (left), \
1030 (right) - (left)); \
1031 if (str == NULL) \
1032 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001033 if (count < MAX_PREALLOC) { \
1034 PyList_SET_ITEM(list, count, str); \
1035 } else { \
1036 if (PyList_Append(list, str)) { \
1037 Py_DECREF(str); \
1038 goto onError; \
1039 } \
1040 else \
1041 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001042 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001043 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001044
Thomas Wouters477c8d52006-05-27 19:21:47 +00001045/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001046#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001047
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001048#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1049#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1050#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1051#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001052
1053Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001054split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001055{
Christian Heimes895627f2007-12-08 17:28:33 +00001056 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001057 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001058 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001059 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001060
1061 if (list == NULL)
1062 return NULL;
1063
Thomas Wouters477c8d52006-05-27 19:21:47 +00001064 i = j = 0;
1065
1066 while (maxsplit-- > 0) {
1067 SKIP_SPACE(s, i, len);
1068 if (i==len) break;
1069 j = i; i++;
1070 SKIP_NONSPACE(s, i, len);
Christian Heimes895627f2007-12-08 17:28:33 +00001071 if (j == 0 && i == len && PyString_CheckExact(self)) {
1072 /* No whitespace in self, so just use it as list[0] */
1073 Py_INCREF(self);
1074 PyList_SET_ITEM(list, 0, (PyObject *)self);
1075 count++;
1076 break;
1077 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001078 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001079 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001080
1081 if (i < len) {
1082 /* Only occurs when maxsplit was reached */
1083 /* Skip any remaining whitespace and copy to end of string */
1084 SKIP_SPACE(s, i, len);
1085 if (i != len)
1086 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001087 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001088 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001089 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001090 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001091 Py_DECREF(list);
1092 return NULL;
1093}
1094
Thomas Wouters477c8d52006-05-27 19:21:47 +00001095Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001096split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001097{
Christian Heimes895627f2007-12-08 17:28:33 +00001098 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001099 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001100 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001101 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001102
1103 if (list == NULL)
1104 return NULL;
1105
Thomas Wouters477c8d52006-05-27 19:21:47 +00001106 i = j = 0;
1107 while ((j < len) && (maxcount-- > 0)) {
1108 for(; j<len; j++) {
1109 /* I found that using memchr makes no difference */
1110 if (s[j] == ch) {
1111 SPLIT_ADD(s, i, j);
1112 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001113 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001114 }
1115 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001116 }
Christian Heimes895627f2007-12-08 17:28:33 +00001117 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1118 /* ch not in self, so just use self as list[0] */
1119 Py_INCREF(self);
1120 PyList_SET_ITEM(list, 0, (PyObject *)self);
1121 count++;
1122 }
1123 else if (i <= len) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001124 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001125 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001126 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001127 return list;
1128
1129 onError:
1130 Py_DECREF(list);
1131 return NULL;
1132}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001133
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001134PyDoc_STRVAR(split__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001135"B.split([sep[, maxsplit]]) -> list of bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001136\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001137Return a list of the sections in B, using sep as the delimiter.\n\
1138If sep is not given, B is split on ASCII whitespace characters\n\
1139(space, tab, return, newline, formfeed, vertical tab).\n\
1140If maxsplit is given, at most maxsplit splits are done.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001141
1142static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001143string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001144{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001145 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001146 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001147 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001148 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001149 PyObject *list, *str, *subobj = Py_None;
1150#ifdef USE_FAST
1151 Py_ssize_t pos;
1152#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001153
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001154 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001155 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001156 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001157 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001158 if (subobj == Py_None)
Christian Heimes895627f2007-12-08 17:28:33 +00001159 return split_whitespace(self, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001160 if (_getbuffer(subobj, &vsub) < 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001161 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001162 sub = vsub.buf;
1163 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001164
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001165 if (n == 0) {
1166 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001167 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001168 return NULL;
1169 }
Christian Heimes895627f2007-12-08 17:28:33 +00001170 else if (n == 1)
1171 return split_char(self, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001172
Thomas Wouters477c8d52006-05-27 19:21:47 +00001173 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001174 if (list == NULL) {
1175 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001176 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001177 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001178
Thomas Wouters477c8d52006-05-27 19:21:47 +00001179#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001180 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001181 while (maxsplit-- > 0) {
1182 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1183 if (pos < 0)
1184 break;
1185 j = i+pos;
1186 SPLIT_ADD(s, i, j);
1187 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001188 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001189#else
1190 i = j = 0;
1191 while ((j+n <= len) && (maxsplit-- > 0)) {
1192 for (; j+n <= len; j++) {
1193 if (Py_STRING_MATCH(s, j, sub, n)) {
1194 SPLIT_ADD(s, i, j);
1195 i = j = j + n;
1196 break;
1197 }
1198 }
1199 }
1200#endif
1201 SPLIT_ADD(s, i, len);
1202 FIX_PREALLOC_SIZE(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001203 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001204 return list;
1205
Thomas Wouters477c8d52006-05-27 19:21:47 +00001206 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001207 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001208 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001209 return NULL;
1210}
1211
Thomas Wouters477c8d52006-05-27 19:21:47 +00001212PyDoc_STRVAR(partition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001213"B.partition(sep) -> (head, sep, tail)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001214\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001215Searches for the separator sep in B, and returns the part before it,\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001216the separator itself, and the part after it. If the separator is not\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001217found, returns B and two empty bytes objects.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001218
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001219static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001220string_partition(PyStringObject *self, PyObject *sep_obj)
1221{
1222 const char *sep;
1223 Py_ssize_t sep_len;
1224
1225 if (PyString_Check(sep_obj)) {
1226 sep = PyString_AS_STRING(sep_obj);
1227 sep_len = PyString_GET_SIZE(sep_obj);
1228 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001229 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1230 return NULL;
1231
1232 return stringlib_partition(
1233 (PyObject*) self,
1234 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1235 sep_obj, sep, sep_len
1236 );
1237}
1238
1239PyDoc_STRVAR(rpartition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001240"B.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001241\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001242Searches for the separator sep in B, starting at the end of B,\n\
1243and returns the part before it, the separator itself, and the\n\
1244part after it. If the separator is not found, returns two empty\n\
1245bytes objects and B.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001246
1247static PyObject *
1248string_rpartition(PyStringObject *self, PyObject *sep_obj)
1249{
1250 const char *sep;
1251 Py_ssize_t sep_len;
1252
1253 if (PyString_Check(sep_obj)) {
1254 sep = PyString_AS_STRING(sep_obj);
1255 sep_len = PyString_GET_SIZE(sep_obj);
1256 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001257 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1258 return NULL;
1259
1260 return stringlib_rpartition(
1261 (PyObject*) self,
1262 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1263 sep_obj, sep, sep_len
1264 );
1265}
1266
1267Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001268rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001269{
Christian Heimes895627f2007-12-08 17:28:33 +00001270 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001271 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001272 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001273 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001274
1275 if (list == NULL)
1276 return NULL;
1277
Thomas Wouters477c8d52006-05-27 19:21:47 +00001278 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001279
Thomas Wouters477c8d52006-05-27 19:21:47 +00001280 while (maxsplit-- > 0) {
1281 RSKIP_SPACE(s, i);
1282 if (i<0) break;
1283 j = i; i--;
1284 RSKIP_NONSPACE(s, i);
Christian Heimes895627f2007-12-08 17:28:33 +00001285 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1286 /* No whitespace in self, so just use it as list[0] */
1287 Py_INCREF(self);
1288 PyList_SET_ITEM(list, 0, (PyObject *)self);
1289 count++;
1290 break;
1291 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001292 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001293 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001294 if (i >= 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001295 /* Only occurs when maxsplit was reached. Skip any remaining
1296 whitespace and copy to beginning of string. */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001297 RSKIP_SPACE(s, i);
1298 if (i >= 0)
1299 SPLIT_ADD(s, 0, i + 1);
1300
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001301 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001302 FIX_PREALLOC_SIZE(list);
1303 if (PyList_Reverse(list) < 0)
1304 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001305 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001306 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001307 Py_DECREF(list);
1308 return NULL;
1309}
1310
Thomas Wouters477c8d52006-05-27 19:21:47 +00001311Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001312rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001313{
Christian Heimes895627f2007-12-08 17:28:33 +00001314 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001315 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001316 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001317 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001318
1319 if (list == NULL)
1320 return NULL;
1321
Thomas Wouters477c8d52006-05-27 19:21:47 +00001322 i = j = len - 1;
1323 while ((i >= 0) && (maxcount-- > 0)) {
1324 for (; i >= 0; i--) {
1325 if (s[i] == ch) {
1326 SPLIT_ADD(s, i + 1, j + 1);
1327 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001328 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001329 }
1330 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001331 }
Christian Heimes895627f2007-12-08 17:28:33 +00001332 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1333 /* ch not in self, so just use self as list[0] */
1334 Py_INCREF(self);
1335 PyList_SET_ITEM(list, 0, (PyObject *)self);
1336 count++;
1337 }
1338 else if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001339 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001340 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001341 FIX_PREALLOC_SIZE(list);
1342 if (PyList_Reverse(list) < 0)
1343 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001344 return list;
1345
1346 onError:
1347 Py_DECREF(list);
1348 return NULL;
1349}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001350
1351PyDoc_STRVAR(rsplit__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001352"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001353\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001354Return a list of the sections in B, using sep as the delimiter,\n\
1355starting at the end of B and working to the front.\n\
1356If sep is not given, B is split on ASCII whitespace characters\n\
1357(space, tab, return, newline, formfeed, vertical tab).\n\
1358If maxsplit is given, at most maxsplit splits are done.");
1359
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001360
1361static PyObject *
1362string_rsplit(PyStringObject *self, PyObject *args)
1363{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001364 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001365 Py_ssize_t maxsplit = -1, count=0;
Christian Heimes895627f2007-12-08 17:28:33 +00001366 const char *s, *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001367 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001368 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001369
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001370 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001371 return NULL;
1372 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001373 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001374 if (subobj == Py_None)
Christian Heimes895627f2007-12-08 17:28:33 +00001375 return rsplit_whitespace(self, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001376 if (_getbuffer(subobj, &vsub) < 0)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001377 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001378 sub = vsub.buf;
1379 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001380
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001381 if (n == 0) {
1382 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001383 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001384 return NULL;
1385 }
Christian Heimes895627f2007-12-08 17:28:33 +00001386 else if (n == 1)
1387 return rsplit_char(self, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001388
Thomas Wouters477c8d52006-05-27 19:21:47 +00001389 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001390 if (list == NULL) {
1391 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001392 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001393 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001394
1395 j = len;
1396 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001397
Christian Heimes895627f2007-12-08 17:28:33 +00001398 s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001399 while ( (i >= 0) && (maxsplit-- > 0) ) {
1400 for (; i>=0; i--) {
1401 if (Py_STRING_MATCH(s, i, sub, n)) {
1402 SPLIT_ADD(s, i + n, j);
1403 j = i;
1404 i -= n;
1405 break;
1406 }
1407 }
1408 }
1409 SPLIT_ADD(s, 0, j);
1410 FIX_PREALLOC_SIZE(list);
1411 if (PyList_Reverse(list) < 0)
1412 goto onError;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001413 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001414 return list;
1415
Thomas Wouters477c8d52006-05-27 19:21:47 +00001416onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001417 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001418 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001419 return NULL;
1420}
1421
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001422#undef SPLIT_ADD
1423#undef MAX_PREALLOC
1424#undef PREALLOC_SIZE
1425
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001427PyDoc_STRVAR(join__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001428"B.join(iterable_of_bytes) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001429\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001430Concatenates any number of bytes objects, with B in between each pair.\n\
1431Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001432
1433static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00001434string_join(PyObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435{
1436 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001437 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001438 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001440 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001441 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001442 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001443 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001444
Tim Peters19fe14e2001-01-19 03:03:47 +00001445 seq = PySequence_Fast(orig, "");
1446 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001447 return NULL;
1448 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001449
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001450 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001451 if (seqlen == 0) {
1452 Py_DECREF(seq);
1453 return PyString_FromString("");
1454 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001456 item = PySequence_Fast_GET_ITEM(seq, 0);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001457 if (PyString_CheckExact(item)) {
Raymond Hettinger674f2412004-08-23 23:23:54 +00001458 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001459 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001460 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001461 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001462 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001463
Raymond Hettinger674f2412004-08-23 23:23:54 +00001464 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001465 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001466 * Do a pre-pass to figure out the total amount of space we'll
Guido van Rossum98297ee2007-11-06 21:34:58 +00001467 * need (sz), and see whether all argument are bytes.
Tim Peters19fe14e2001-01-19 03:03:47 +00001468 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001469 /* XXX Shouldn't we use _getbuffer() on these items instead? */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001470 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001471 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001472 item = PySequence_Fast_GET_ITEM(seq, i);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001473 if (!PyString_Check(item) && !PyBytes_Check(item)) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001474 PyErr_Format(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001475 "sequence item %zd: expected bytes,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001476 " %.80s found",
Christian Heimes90aa7642007-12-19 02:45:37 +00001477 i, Py_TYPE(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001478 Py_DECREF(seq);
1479 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001480 }
Christian Heimes90aa7642007-12-19 02:45:37 +00001481 sz += Py_SIZE(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001482 if (i != 0)
1483 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001484 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001485 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001486 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001487 Py_DECREF(seq);
1488 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001490 }
1491
1492 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001493 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001494 if (res == NULL) {
1495 Py_DECREF(seq);
1496 return NULL;
1497 }
1498
1499 /* Catenate everything. */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001500 /* I'm not worried about a PyBytes item growing because there's
1501 nowhere in this function where we release the GIL. */
Tim Peters19fe14e2001-01-19 03:03:47 +00001502 p = PyString_AS_STRING(res);
1503 for (i = 0; i < seqlen; ++i) {
1504 size_t n;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001505 char *q;
1506 if (i) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001507 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001508 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001509 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001510 item = PySequence_Fast_GET_ITEM(seq, i);
Christian Heimes90aa7642007-12-19 02:45:37 +00001511 n = Py_SIZE(item);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001512 if (PyString_Check(item))
1513 q = PyString_AS_STRING(item);
1514 else
1515 q = PyBytes_AS_STRING(item);
1516 Py_MEMCPY(p, q, n);
1517 p += n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001518 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001519
Jeremy Hylton49048292000-07-11 03:28:17 +00001520 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001521 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522}
1523
Tim Peters52e155e2001-06-16 05:42:57 +00001524PyObject *
1525_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001526{
Tim Petersa7259592001-06-16 05:11:17 +00001527 assert(sep != NULL && PyString_Check(sep));
1528 assert(x != NULL);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001529 return string_join(sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001530}
1531
Thomas Wouters477c8d52006-05-27 19:21:47 +00001532Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001533string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001534{
1535 if (*end > len)
1536 *end = len;
1537 else if (*end < 0)
1538 *end += len;
1539 if (*end < 0)
1540 *end = 0;
1541 if (*start < 0)
1542 *start += len;
1543 if (*start < 0)
1544 *start = 0;
1545}
1546
Thomas Wouters477c8d52006-05-27 19:21:47 +00001547Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001548string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001549{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001550 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001551 const char *sub;
1552 Py_ssize_t sub_len;
1553 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes9cd17752007-11-18 19:35:23 +00001554 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001555
Christian Heimes9cd17752007-11-18 19:35:23 +00001556 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1557 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001558 return -2;
Christian Heimes9cd17752007-11-18 19:35:23 +00001559 /* To support None in "start" and "end" arguments, meaning
1560 the same as if they were not passed.
1561 */
1562 if (obj_start != Py_None)
1563 if (!_PyEval_SliceIndex(obj_start, &start))
1564 return -2;
1565 if (obj_end != Py_None)
1566 if (!_PyEval_SliceIndex(obj_end, &end))
1567 return -2;
1568
Guido van Rossum4c08d552000-03-10 22:55:18 +00001569 if (PyString_Check(subobj)) {
1570 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001571 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001572 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001573 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001574 /* XXX - the "expected a character buffer object" is pretty
1575 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001576 return -2;
1577
Thomas Wouters477c8d52006-05-27 19:21:47 +00001578 if (dir > 0)
1579 return stringlib_find_slice(
1580 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1581 sub, sub_len, start, end);
1582 else
1583 return stringlib_rfind_slice(
1584 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1585 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586}
1587
1588
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001589PyDoc_STRVAR(find__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001590"B.find(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001591\n\
1592Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001593such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001594arguments start and end are interpreted as in slice notation.\n\
1595\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001596Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001597
1598static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001599string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001601 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602 if (result == -2)
1603 return NULL;
Christian Heimes217cfd12007-12-02 14:31:20 +00001604 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001605}
1606
1607
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001608PyDoc_STRVAR(index__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001609"B.index(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001610\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001611Like B.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612
1613static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001614string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001616 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617 if (result == -2)
1618 return NULL;
1619 if (result == -1) {
1620 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001621 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622 return NULL;
1623 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001624 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625}
1626
1627
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001628PyDoc_STRVAR(rfind__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001629"B.rfind(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001631Return the highest index in B where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001632such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633arguments start and end are interpreted as in slice notation.\n\
1634\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001635Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636
1637static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001638string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001640 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641 if (result == -2)
1642 return NULL;
Christian Heimes217cfd12007-12-02 14:31:20 +00001643 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001644}
1645
1646
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001647PyDoc_STRVAR(rindex__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001648"B.rindex(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001649\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001650Like B.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651
1652static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001653string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001655 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656 if (result == -2)
1657 return NULL;
1658 if (result == -1) {
1659 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001660 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001661 return NULL;
1662 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001663 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001664}
1665
1666
Thomas Wouters477c8d52006-05-27 19:21:47 +00001667Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001668do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1669{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001670 Py_buffer vsep;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001671 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001672 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001673 char *sep;
1674 Py_ssize_t seplen;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001675 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001676
Guido van Rossum98297ee2007-11-06 21:34:58 +00001677 if (_getbuffer(sepobj, &vsep) < 0)
1678 return NULL;
1679 sep = vsep.buf;
1680 seplen = vsep.len;
1681
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001682 i = 0;
1683 if (striptype != RIGHTSTRIP) {
1684 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1685 i++;
1686 }
1687 }
1688
1689 j = len;
1690 if (striptype != LEFTSTRIP) {
1691 do {
1692 j--;
1693 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1694 j++;
1695 }
1696
Guido van Rossum98297ee2007-11-06 21:34:58 +00001697 PyObject_ReleaseBuffer(sepobj, &vsep);
1698
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001699 if (i == 0 && j == len && PyString_CheckExact(self)) {
1700 Py_INCREF(self);
1701 return (PyObject*)self;
1702 }
1703 else
1704 return PyString_FromStringAndSize(s+i, j-i);
1705}
1706
1707
Thomas Wouters477c8d52006-05-27 19:21:47 +00001708Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001709do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001710{
1711 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001712 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001713
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001714 i = 0;
1715 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001716 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717 i++;
1718 }
1719 }
1720
1721 j = len;
1722 if (striptype != LEFTSTRIP) {
1723 do {
1724 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001725 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001726 j++;
1727 }
1728
Tim Peters8fa5dd02001-09-12 02:18:30 +00001729 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730 Py_INCREF(self);
1731 return (PyObject*)self;
1732 }
1733 else
1734 return PyString_FromStringAndSize(s+i, j-i);
1735}
1736
1737
Thomas Wouters477c8d52006-05-27 19:21:47 +00001738Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001739do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1740{
1741 PyObject *sep = NULL;
1742
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001743 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001744 return NULL;
1745
1746 if (sep != NULL && sep != Py_None) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001747 return do_xstrip(self, striptype, sep);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001748 }
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001749 return do_strip(self, striptype);
1750}
1751
1752
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001753PyDoc_STRVAR(strip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001754"B.strip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001756Strip leading and trailing bytes contained in the argument.\n\
1757If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001759string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001761 if (PyTuple_GET_SIZE(args) == 0)
1762 return do_strip(self, BOTHSTRIP); /* Common case */
1763 else
1764 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765}
1766
1767
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001768PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001769"B.lstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001771Strip leading bytes contained in the argument.\n\
1772If the argument is omitted, strip leading ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001774string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001776 if (PyTuple_GET_SIZE(args) == 0)
1777 return do_strip(self, LEFTSTRIP); /* Common case */
1778 else
1779 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780}
1781
1782
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001783PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001784"B.rstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001786Strip trailing bytes contained in the argument.\n\
1787If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001788static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001789string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001791 if (PyTuple_GET_SIZE(args) == 0)
1792 return do_strip(self, RIGHTSTRIP); /* Common case */
1793 else
1794 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795}
1796
1797
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001798PyDoc_STRVAR(count__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001799"B.count(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001801Return the number of non-overlapping occurrences of substring sub in\n\
1802string S[start:end]. Optional arguments start and end are interpreted\n\
1803as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804
1805static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001806string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001808 PyObject *sub_obj;
1809 const char *str = PyString_AS_STRING(self), *sub;
1810 Py_ssize_t sub_len;
1811 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812
Thomas Wouters477c8d52006-05-27 19:21:47 +00001813 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1814 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001816
Thomas Wouters477c8d52006-05-27 19:21:47 +00001817 if (PyString_Check(sub_obj)) {
1818 sub = PyString_AS_STRING(sub_obj);
1819 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001820 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001821 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001822 return NULL;
1823
Thomas Wouters477c8d52006-05-27 19:21:47 +00001824 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001825
Christian Heimes217cfd12007-12-02 14:31:20 +00001826 return PyLong_FromSsize_t(
Thomas Wouters477c8d52006-05-27 19:21:47 +00001827 stringlib_count(str + start, end - start, sub, sub_len)
1828 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829}
1830
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001832PyDoc_STRVAR(translate__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001833"B.translate(table[, deletechars]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001835Return a copy of B, where all characters occurring in the\n\
1836optional argument deletechars are removed, and the remaining\n\
1837characters have been mapped through the given translation\n\
1838table, which must be a bytes object of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839
1840static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001841string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001843 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001844 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001845 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001847 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001848 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849 PyObject *result;
1850 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001851 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001852
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00001853 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001854 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001855 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001856
1857 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00001858 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001859 tablen = PyString_GET_SIZE(tableobj);
1860 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001861 else if (tableobj == Py_None) {
1862 table = NULL;
1863 tablen = 256;
1864 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001865 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001866 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001867
Martin v. Löwis00b61272002-12-12 20:03:19 +00001868 if (tablen != 256) {
1869 PyErr_SetString(PyExc_ValueError,
1870 "translation table must be 256 characters long");
1871 return NULL;
1872 }
1873
Guido van Rossum4c08d552000-03-10 22:55:18 +00001874 if (delobj != NULL) {
1875 if (PyString_Check(delobj)) {
1876 del_table = PyString_AS_STRING(delobj);
1877 dellen = PyString_GET_SIZE(delobj);
1878 }
1879 else if (PyUnicode_Check(delobj)) {
1880 PyErr_SetString(PyExc_TypeError,
1881 "deletions are implemented differently for unicode");
1882 return NULL;
1883 }
1884 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1885 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001886 }
1887 else {
1888 del_table = NULL;
1889 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001890 }
1891
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001892 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893 result = PyString_FromStringAndSize((char *)NULL, inlen);
1894 if (result == NULL)
1895 return NULL;
1896 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001897 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898
Guido van Rossumd8faa362007-04-27 19:54:29 +00001899 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900 /* If no deletions are required, use faster code */
1901 for (i = inlen; --i >= 0; ) {
1902 c = Py_CHARMASK(*input++);
1903 if (Py_CHARMASK((*output++ = table[c])) != c)
1904 changed = 1;
1905 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001906 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907 return result;
1908 Py_DECREF(result);
1909 Py_INCREF(input_obj);
1910 return input_obj;
1911 }
1912
Guido van Rossumd8faa362007-04-27 19:54:29 +00001913 if (table == NULL) {
1914 for (i = 0; i < 256; i++)
1915 trans_table[i] = Py_CHARMASK(i);
1916 } else {
1917 for (i = 0; i < 256; i++)
1918 trans_table[i] = Py_CHARMASK(table[i]);
1919 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920
1921 for (i = 0; i < dellen; i++)
1922 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1923
1924 for (i = inlen; --i >= 0; ) {
1925 c = Py_CHARMASK(*input++);
1926 if (trans_table[c] != -1)
1927 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1928 continue;
1929 changed = 1;
1930 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001931 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932 Py_DECREF(result);
1933 Py_INCREF(input_obj);
1934 return input_obj;
1935 }
1936 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001937 if (inlen > 0)
1938 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939 return result;
1940}
1941
1942
Thomas Wouters477c8d52006-05-27 19:21:47 +00001943#define FORWARD 1
1944#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945
Thomas Wouters477c8d52006-05-27 19:21:47 +00001946/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947
Thomas Wouters477c8d52006-05-27 19:21:47 +00001948#define findchar(target, target_len, c) \
1949 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950
Thomas Wouters477c8d52006-05-27 19:21:47 +00001951/* String ops must return a string. */
1952/* If the object is subclass of string, create a copy */
1953Py_LOCAL(PyStringObject *)
1954return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001956 if (PyString_CheckExact(self)) {
1957 Py_INCREF(self);
1958 return self;
1959 }
1960 return (PyStringObject *)PyString_FromStringAndSize(
1961 PyString_AS_STRING(self),
1962 PyString_GET_SIZE(self));
1963}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964
Thomas Wouters477c8d52006-05-27 19:21:47 +00001965Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001966countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001967{
1968 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001969 const char *start=target;
1970 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971
Thomas Wouters477c8d52006-05-27 19:21:47 +00001972 while ( (start=findchar(start, end-start, c)) != NULL ) {
1973 count++;
1974 if (count >= maxcount)
1975 break;
1976 start += 1;
1977 }
1978 return count;
1979}
1980
1981Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001982findstring(const char *target, Py_ssize_t target_len,
1983 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00001984 Py_ssize_t start,
1985 Py_ssize_t end,
1986 int direction)
1987{
1988 if (start < 0) {
1989 start += target_len;
1990 if (start < 0)
1991 start = 0;
1992 }
1993 if (end > target_len) {
1994 end = target_len;
1995 } else if (end < 0) {
1996 end += target_len;
1997 if (end < 0)
1998 end = 0;
1999 }
2000
2001 /* zero-length substrings always match at the first attempt */
2002 if (pattern_len == 0)
2003 return (direction > 0) ? start : end;
2004
2005 end -= pattern_len;
2006
2007 if (direction < 0) {
2008 for (; end >= start; end--)
2009 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2010 return end;
2011 } else {
2012 for (; start <= end; start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002013 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
Thomas Wouters477c8d52006-05-27 19:21:47 +00002014 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015 }
2016 return -1;
2017}
2018
Thomas Wouters477c8d52006-05-27 19:21:47 +00002019Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002020countstring(const char *target, Py_ssize_t target_len,
2021 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002022 Py_ssize_t start,
2023 Py_ssize_t end,
2024 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002026 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027
Thomas Wouters477c8d52006-05-27 19:21:47 +00002028 if (start < 0) {
2029 start += target_len;
2030 if (start < 0)
2031 start = 0;
2032 }
2033 if (end > target_len) {
2034 end = target_len;
2035 } else if (end < 0) {
2036 end += target_len;
2037 if (end < 0)
2038 end = 0;
2039 }
2040
2041 /* zero-length substrings match everywhere */
2042 if (pattern_len == 0 || maxcount == 0) {
2043 if (target_len+1 < maxcount)
2044 return target_len+1;
2045 return maxcount;
2046 }
2047
2048 end -= pattern_len;
2049 if (direction < 0) {
2050 for (; (end >= start); end--)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002051 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002052 count++;
2053 if (--maxcount <= 0) break;
2054 end -= pattern_len-1;
2055 }
2056 } else {
2057 for (; (start <= end); start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002058 if (Py_STRING_MATCH(target, start,
2059 pattern, pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002060 count++;
2061 if (--maxcount <= 0)
2062 break;
2063 start += pattern_len-1;
2064 }
2065 }
2066 return count;
2067}
2068
2069
2070/* Algorithms for different cases of string replacement */
2071
2072/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2073Py_LOCAL(PyStringObject *)
2074replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002075 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002076 Py_ssize_t maxcount)
2077{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002078 char *self_s, *result_s;
2079 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002080 Py_ssize_t count, i, product;
2081 PyStringObject *result;
2082
2083 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002084
Thomas Wouters477c8d52006-05-27 19:21:47 +00002085 /* 1 at the end plus 1 after every character */
2086 count = self_len+1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002087 if (maxcount < count)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002088 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002089
Thomas Wouters477c8d52006-05-27 19:21:47 +00002090 /* Check for overflow */
2091 /* result_len = count * to_len + self_len; */
2092 product = count * to_len;
2093 if (product / to_len != count) {
2094 PyErr_SetString(PyExc_OverflowError,
2095 "replace string is too long");
2096 return NULL;
2097 }
2098 result_len = product + self_len;
2099 if (result_len < 0) {
2100 PyErr_SetString(PyExc_OverflowError,
2101 "replace string is too long");
2102 return NULL;
2103 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002104
Thomas Wouters477c8d52006-05-27 19:21:47 +00002105 if (! (result = (PyStringObject *)
2106 PyString_FromStringAndSize(NULL, result_len)) )
2107 return NULL;
2108
2109 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002110 result_s = PyString_AS_STRING(result);
2111
2112 /* TODO: special case single character, which doesn't need memcpy */
2113
2114 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002115 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002116 result_s += to_len;
2117 count -= 1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002118
Thomas Wouters477c8d52006-05-27 19:21:47 +00002119 for (i=0; i<count; i++) {
2120 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002121 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002122 result_s += to_len;
2123 }
2124
2125 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002126 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002127
2128 return result;
2129}
2130
2131/* Special case for deleting a single character */
2132/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2133Py_LOCAL(PyStringObject *)
2134replace_delete_single_character(PyStringObject *self,
2135 char from_c, Py_ssize_t maxcount)
2136{
2137 char *self_s, *result_s;
2138 char *start, *next, *end;
2139 Py_ssize_t self_len, result_len;
2140 Py_ssize_t count;
2141 PyStringObject *result;
2142
2143 self_len = PyString_GET_SIZE(self);
2144 self_s = PyString_AS_STRING(self);
2145
2146 count = countchar(self_s, self_len, from_c, maxcount);
2147 if (count == 0) {
2148 return return_self(self);
2149 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002150
Thomas Wouters477c8d52006-05-27 19:21:47 +00002151 result_len = self_len - count; /* from_len == 1 */
2152 assert(result_len>=0);
2153
2154 if ( (result = (PyStringObject *)
2155 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2156 return NULL;
2157 result_s = PyString_AS_STRING(result);
2158
2159 start = self_s;
2160 end = self_s + self_len;
2161 while (count-- > 0) {
2162 next = findchar(start, end-start, from_c);
2163 if (next == NULL)
2164 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002165 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002166 result_s += (next-start);
2167 start = next+1;
2168 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002169 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002170
Thomas Wouters477c8d52006-05-27 19:21:47 +00002171 return result;
2172}
2173
2174/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2175
2176Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002177replace_delete_substring(PyStringObject *self,
2178 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002179 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002180 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002181 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002182 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002183 Py_ssize_t count, offset;
2184 PyStringObject *result;
2185
2186 self_len = PyString_GET_SIZE(self);
2187 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002188
2189 count = countstring(self_s, self_len,
2190 from_s, from_len,
2191 0, self_len, 1,
2192 maxcount);
2193
2194 if (count == 0) {
2195 /* no matches */
2196 return return_self(self);
2197 }
2198
2199 result_len = self_len - (count * from_len);
2200 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002201
Thomas Wouters477c8d52006-05-27 19:21:47 +00002202 if ( (result = (PyStringObject *)
2203 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2204 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002205
Thomas Wouters477c8d52006-05-27 19:21:47 +00002206 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002207
Thomas Wouters477c8d52006-05-27 19:21:47 +00002208 start = self_s;
2209 end = self_s + self_len;
2210 while (count-- > 0) {
2211 offset = findstring(start, end-start,
2212 from_s, from_len,
2213 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214 if (offset == -1)
2215 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002216 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002217
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002218 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002219
Thomas Wouters477c8d52006-05-27 19:21:47 +00002220 result_s += (next-start);
2221 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002223 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002224 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225}
2226
Thomas Wouters477c8d52006-05-27 19:21:47 +00002227/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2228Py_LOCAL(PyStringObject *)
2229replace_single_character_in_place(PyStringObject *self,
2230 char from_c, char to_c,
2231 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002233 char *self_s, *result_s, *start, *end, *next;
2234 Py_ssize_t self_len;
2235 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002236
Thomas Wouters477c8d52006-05-27 19:21:47 +00002237 /* The result string will be the same size */
2238 self_s = PyString_AS_STRING(self);
2239 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002240
Thomas Wouters477c8d52006-05-27 19:21:47 +00002241 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002242
Thomas Wouters477c8d52006-05-27 19:21:47 +00002243 if (next == NULL) {
2244 /* No matches; return the original string */
2245 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002246 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002247
Thomas Wouters477c8d52006-05-27 19:21:47 +00002248 /* Need to make a new string */
2249 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2250 if (result == NULL)
2251 return NULL;
2252 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002253 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002254
Thomas Wouters477c8d52006-05-27 19:21:47 +00002255 /* change everything in-place, starting with this one */
2256 start = result_s + (next-self_s);
2257 *start = to_c;
2258 start++;
2259 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002260
Thomas Wouters477c8d52006-05-27 19:21:47 +00002261 while (--maxcount > 0) {
2262 next = findchar(start, end-start, from_c);
2263 if (next == NULL)
2264 break;
2265 *next = to_c;
2266 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002267 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002268
Thomas Wouters477c8d52006-05-27 19:21:47 +00002269 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002270}
2271
Thomas Wouters477c8d52006-05-27 19:21:47 +00002272/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2273Py_LOCAL(PyStringObject *)
2274replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002275 const char *from_s, Py_ssize_t from_len,
2276 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002277 Py_ssize_t maxcount)
2278{
2279 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002280 char *self_s;
2281 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002282 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002283
Thomas Wouters477c8d52006-05-27 19:21:47 +00002284 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002285
Thomas Wouters477c8d52006-05-27 19:21:47 +00002286 self_s = PyString_AS_STRING(self);
2287 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002288
Thomas Wouters477c8d52006-05-27 19:21:47 +00002289 offset = findstring(self_s, self_len,
2290 from_s, from_len,
2291 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002292 if (offset == -1) {
2293 /* No matches; return the original string */
2294 return return_self(self);
2295 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002296
Thomas Wouters477c8d52006-05-27 19:21:47 +00002297 /* Need to make a new string */
2298 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2299 if (result == NULL)
2300 return NULL;
2301 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002302 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002303
Thomas Wouters477c8d52006-05-27 19:21:47 +00002304 /* change everything in-place, starting with this one */
2305 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002306 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002307 start += from_len;
2308 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002309
Thomas Wouters477c8d52006-05-27 19:21:47 +00002310 while ( --maxcount > 0) {
2311 offset = findstring(start, end-start,
2312 from_s, from_len,
2313 0, end-start, FORWARD);
2314 if (offset==-1)
2315 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002316 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002317 start += offset+from_len;
2318 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002319
Thomas Wouters477c8d52006-05-27 19:21:47 +00002320 return result;
2321}
2322
2323/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2324Py_LOCAL(PyStringObject *)
2325replace_single_character(PyStringObject *self,
2326 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002327 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002328 Py_ssize_t maxcount)
2329{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002330 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002331 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002332 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002333 Py_ssize_t count, product;
2334 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002335
Thomas Wouters477c8d52006-05-27 19:21:47 +00002336 self_s = PyString_AS_STRING(self);
2337 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002338
Thomas Wouters477c8d52006-05-27 19:21:47 +00002339 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002340 if (count == 0) {
2341 /* no matches, return unchanged */
2342 return return_self(self);
2343 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002344
Thomas Wouters477c8d52006-05-27 19:21:47 +00002345 /* use the difference between current and new, hence the "-1" */
2346 /* result_len = self_len + count * (to_len-1) */
2347 product = count * (to_len-1);
2348 if (product / (to_len-1) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002349 PyErr_SetString(PyExc_OverflowError,
2350 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002351 return NULL;
2352 }
2353 result_len = self_len + product;
2354 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002355 PyErr_SetString(PyExc_OverflowError,
2356 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002357 return NULL;
2358 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002359
Thomas Wouters477c8d52006-05-27 19:21:47 +00002360 if ( (result = (PyStringObject *)
2361 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2362 return NULL;
2363 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002364
Thomas Wouters477c8d52006-05-27 19:21:47 +00002365 start = self_s;
2366 end = self_s + self_len;
2367 while (count-- > 0) {
2368 next = findchar(start, end-start, from_c);
Guido van Rossumae404e22007-10-26 21:46:44 +00002369 if (next == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002370 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002371
Thomas Wouters477c8d52006-05-27 19:21:47 +00002372 if (next == start) {
2373 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002374 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002375 result_s += to_len;
2376 start += 1;
2377 } else {
2378 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002379 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002380 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002381 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002382 result_s += to_len;
2383 start = next+1;
2384 }
2385 }
2386 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002387 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002388
Thomas Wouters477c8d52006-05-27 19:21:47 +00002389 return result;
2390}
2391
2392/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2393Py_LOCAL(PyStringObject *)
2394replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002395 const char *from_s, Py_ssize_t from_len,
2396 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002397 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002398 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002399 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002400 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002401 Py_ssize_t count, offset, product;
2402 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002403
Thomas Wouters477c8d52006-05-27 19:21:47 +00002404 self_s = PyString_AS_STRING(self);
2405 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002406
Thomas Wouters477c8d52006-05-27 19:21:47 +00002407 count = countstring(self_s, self_len,
2408 from_s, from_len,
2409 0, self_len, FORWARD, maxcount);
2410 if (count == 0) {
2411 /* no matches, return unchanged */
2412 return return_self(self);
2413 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002414
Thomas Wouters477c8d52006-05-27 19:21:47 +00002415 /* Check for overflow */
2416 /* result_len = self_len + count * (to_len-from_len) */
2417 product = count * (to_len-from_len);
2418 if (product / (to_len-from_len) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002419 PyErr_SetString(PyExc_OverflowError,
2420 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002421 return NULL;
2422 }
2423 result_len = self_len + product;
2424 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002425 PyErr_SetString(PyExc_OverflowError,
2426 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002427 return NULL;
2428 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002429
Thomas Wouters477c8d52006-05-27 19:21:47 +00002430 if ( (result = (PyStringObject *)
2431 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2432 return NULL;
2433 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002434
Thomas Wouters477c8d52006-05-27 19:21:47 +00002435 start = self_s;
2436 end = self_s + self_len;
2437 while (count-- > 0) {
2438 offset = findstring(start, end-start,
2439 from_s, from_len,
2440 0, end-start, FORWARD);
2441 if (offset == -1)
2442 break;
2443 next = start+offset;
2444 if (next == start) {
2445 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002446 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002447 result_s += to_len;
2448 start += from_len;
2449 } else {
2450 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002451 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002452 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002453 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002454 result_s += to_len;
2455 start = next+from_len;
2456 }
2457 }
2458 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002459 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002460
Thomas Wouters477c8d52006-05-27 19:21:47 +00002461 return result;
2462}
2463
2464
2465Py_LOCAL(PyStringObject *)
2466replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002467 const char *from_s, Py_ssize_t from_len,
2468 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002469 Py_ssize_t maxcount)
2470{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002471 if (maxcount < 0) {
2472 maxcount = PY_SSIZE_T_MAX;
2473 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2474 /* nothing to do; return the original string */
2475 return return_self(self);
2476 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002477
Thomas Wouters477c8d52006-05-27 19:21:47 +00002478 if (maxcount == 0 ||
2479 (from_len == 0 && to_len == 0)) {
2480 /* nothing to do; return the original string */
2481 return return_self(self);
2482 }
2483
2484 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002485
Thomas Wouters477c8d52006-05-27 19:21:47 +00002486 if (from_len == 0) {
2487 /* insert the 'to' string everywhere. */
2488 /* >>> "Python".replace("", ".") */
2489 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002490 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002491 }
2492
2493 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2494 /* point for an empty self string to generate a non-empty string */
2495 /* Special case so the remaining code always gets a non-empty string */
2496 if (PyString_GET_SIZE(self) == 0) {
2497 return return_self(self);
2498 }
2499
2500 if (to_len == 0) {
2501 /* delete all occurances of 'from' string */
2502 if (from_len == 1) {
2503 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002504 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002505 } else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002506 return replace_delete_substring(self, from_s,
2507 from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002508 }
2509 }
2510
2511 /* Handle special case where both strings have the same length */
2512
2513 if (from_len == to_len) {
2514 if (from_len == 1) {
2515 return replace_single_character_in_place(
2516 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002517 from_s[0],
2518 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002519 maxcount);
2520 } else {
2521 return replace_substring_in_place(
Guido van Rossum98297ee2007-11-06 21:34:58 +00002522 self, from_s, from_len, to_s, to_len,
2523 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002524 }
2525 }
2526
2527 /* Otherwise use the more generic algorithms */
2528 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002529 return replace_single_character(self, from_s[0],
2530 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002531 } else {
2532 /* len('from')>=2, len('to')>=1 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002533 return replace_substring(self, from_s, from_len, to_s, to_len,
2534 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002535 }
2536}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002538PyDoc_STRVAR(replace__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002539"B.replace(old, new[, count]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002540\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002541Return a copy of B with all occurrences of subsection\n\
Fred Draked22bb652003-10-22 02:56:40 +00002542old replaced by new. If the optional argument count is\n\
2543given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002544
2545static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002546string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002547{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002548 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002549 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002550 const char *from_s, *to_s;
2551 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002552
Thomas Wouters477c8d52006-05-27 19:21:47 +00002553 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002554 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002555
Thomas Wouters477c8d52006-05-27 19:21:47 +00002556 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002557 from_s = PyString_AS_STRING(from);
2558 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002559 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002560 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002561 return NULL;
2562
Thomas Wouters477c8d52006-05-27 19:21:47 +00002563 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002564 to_s = PyString_AS_STRING(to);
2565 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002566 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002567 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002568 return NULL;
2569
Thomas Wouters477c8d52006-05-27 19:21:47 +00002570 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002571 from_s, from_len,
2572 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002573}
2574
Thomas Wouters477c8d52006-05-27 19:21:47 +00002575/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002576
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002577/* Matches the end (direction >= 0) or start (direction < 0) of self
2578 * against substr, using the start and end arguments. Returns
2579 * -1 on error, 0 if not found and 1 if found.
2580 */
2581Py_LOCAL(int)
2582_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2583 Py_ssize_t end, int direction)
2584{
2585 Py_ssize_t len = PyString_GET_SIZE(self);
2586 Py_ssize_t slen;
2587 const char* sub;
2588 const char* str;
2589
2590 if (PyString_Check(substr)) {
2591 sub = PyString_AS_STRING(substr);
2592 slen = PyString_GET_SIZE(substr);
2593 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002594 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2595 return -1;
2596 str = PyString_AS_STRING(self);
2597
2598 string_adjust_indices(&start, &end, len);
2599
2600 if (direction < 0) {
2601 /* startswith */
2602 if (start+slen > len)
2603 return 0;
2604 } else {
2605 /* endswith */
2606 if (end-start < slen || start > len)
2607 return 0;
2608
2609 if (end-slen > start)
2610 start = end - slen;
2611 }
2612 if (end-start >= slen)
2613 return ! memcmp(str+start, sub, slen);
2614 return 0;
2615}
2616
2617
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002618PyDoc_STRVAR(startswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002619"B.startswith(prefix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002620\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002621Return True if B starts with the specified prefix, False otherwise.\n\
2622With optional start, test B beginning at that position.\n\
2623With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002624prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002625
2626static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002627string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002628{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002629 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002630 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002631 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002632 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002633
Guido van Rossumc6821402000-05-08 14:08:05 +00002634 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2635 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002636 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002637 if (PyTuple_Check(subobj)) {
2638 Py_ssize_t i;
2639 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2640 result = _string_tailmatch(self,
2641 PyTuple_GET_ITEM(subobj, i),
2642 start, end, -1);
2643 if (result == -1)
2644 return NULL;
2645 else if (result) {
2646 Py_RETURN_TRUE;
2647 }
2648 }
2649 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002650 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002651 result = _string_tailmatch(self, subobj, start, end, -1);
2652 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002653 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002654 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002655 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002656}
2657
2658
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002659PyDoc_STRVAR(endswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002660"B.endswith(suffix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002661\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002662Return True if B ends with the specified suffix, False otherwise.\n\
2663With optional start, test B beginning at that position.\n\
2664With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002665suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002666
2667static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002668string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002669{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002670 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002671 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002672 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002673 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002674
Guido van Rossumc6821402000-05-08 14:08:05 +00002675 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2676 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002677 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002678 if (PyTuple_Check(subobj)) {
2679 Py_ssize_t i;
2680 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2681 result = _string_tailmatch(self,
2682 PyTuple_GET_ITEM(subobj, i),
2683 start, end, +1);
2684 if (result == -1)
2685 return NULL;
2686 else if (result) {
2687 Py_RETURN_TRUE;
2688 }
2689 }
2690 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002691 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002692 result = _string_tailmatch(self, subobj, start, end, +1);
2693 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002694 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002695 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002696 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002697}
2698
2699
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002700PyDoc_STRVAR(decode__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002701"B.decode([encoding[, errors]]) -> object\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002702\n\
2703Decodes S using the codec registered for encoding. encoding defaults\n\
2704to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002705handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2706a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002707as well as any other name registerd with codecs.register_error that is\n\
2708able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002709
2710static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00002711string_decode(PyObject *self, PyObject *args)
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002712{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002713 const char *encoding = NULL;
2714 const char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002715
Guido van Rossum98297ee2007-11-06 21:34:58 +00002716 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2717 return NULL;
2718 if (encoding == NULL)
2719 encoding = PyUnicode_GetDefaultEncoding();
2720 return PyCodec_Decode(self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002721}
2722
2723
Guido van Rossumae404e22007-10-26 21:46:44 +00002724PyDoc_STRVAR(fromhex_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002725"bytes.fromhex(string) -> bytes\n\
Guido van Rossumae404e22007-10-26 21:46:44 +00002726\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002727Create a bytes object from a string of hexadecimal numbers.\n\
2728Spaces between two numbers are accepted.\n\
2729Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Guido van Rossumae404e22007-10-26 21:46:44 +00002730
2731static int
2732hex_digit_to_int(Py_UNICODE c)
2733{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002734 if (c >= 128)
2735 return -1;
2736 if (ISDIGIT(c))
2737 return c - '0';
2738 else {
2739 if (ISUPPER(c))
2740 c = TOLOWER(c);
2741 if (c >= 'a' && c <= 'f')
2742 return c - 'a' + 10;
2743 }
2744 return -1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002745}
2746
2747static PyObject *
2748string_fromhex(PyObject *cls, PyObject *args)
2749{
2750 PyObject *newstring, *hexobj;
2751 char *buf;
2752 Py_UNICODE *hex;
2753 Py_ssize_t hexlen, byteslen, i, j;
2754 int top, bot;
2755
2756 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2757 return NULL;
2758 assert(PyUnicode_Check(hexobj));
2759 hexlen = PyUnicode_GET_SIZE(hexobj);
2760 hex = PyUnicode_AS_UNICODE(hexobj);
2761 byteslen = hexlen/2; /* This overestimates if there are spaces */
2762 newstring = PyString_FromStringAndSize(NULL, byteslen);
2763 if (!newstring)
2764 return NULL;
2765 buf = PyString_AS_STRING(newstring);
2766 for (i = j = 0; i < hexlen; i += 2) {
2767 /* skip over spaces in the input */
2768 while (hex[i] == ' ')
2769 i++;
2770 if (i >= hexlen)
2771 break;
2772 top = hex_digit_to_int(hex[i]);
2773 bot = hex_digit_to_int(hex[i+1]);
2774 if (top == -1 || bot == -1) {
2775 PyErr_Format(PyExc_ValueError,
2776 "non-hexadecimal number found in "
2777 "fromhex() arg at position %zd", i);
2778 goto error;
2779 }
2780 buf[j++] = (top << 4) + bot;
2781 }
2782 if (_PyString_Resize(&newstring, j) < 0)
2783 goto error;
2784 return newstring;
2785
2786 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00002787 Py_XDECREF(newstring);
Guido van Rossumae404e22007-10-26 21:46:44 +00002788 return NULL;
2789}
2790
2791
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002792static PyObject *
2793string_getnewargs(PyStringObject *v)
2794{
Christian Heimes90aa7642007-12-19 02:45:37 +00002795 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002796}
2797
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002798
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002799static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002800string_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00002801 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002802 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2803 _Py_capitalize__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002804 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002805 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002806 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002807 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002808 endswith__doc__},
2809 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2810 expandtabs__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002811 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002812 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2813 fromhex_doc},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002814 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002815 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2816 _Py_isalnum__doc__},
2817 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2818 _Py_isalpha__doc__},
2819 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2820 _Py_isdigit__doc__},
2821 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2822 _Py_islower__doc__},
2823 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2824 _Py_isspace__doc__},
2825 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2826 _Py_istitle__doc__},
2827 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2828 _Py_isupper__doc__},
2829 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2830 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2831 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002832 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002833 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002834 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2835 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2836 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002837 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00002838 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2839 rpartition__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002840 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2841 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2842 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2843 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2844 splitlines__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002845 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002846 startswith__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002847 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002848 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2849 _Py_swapcase__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002850 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002851 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2852 translate__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002853 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002854 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002855 {NULL, NULL} /* sentinel */
2856};
2857
Jeremy Hylton938ace62002-07-17 16:30:39 +00002858static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002859str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2860
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002861static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002862string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002863{
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002864 PyObject *x = NULL, *it;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002865 const char *encoding = NULL;
2866 const char *errors = NULL;
2867 PyObject *new = NULL;
2868 Py_ssize_t i, size;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002869 static char *kwlist[] = {"source", "encoding", "errors", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00002870
Guido van Rossumae960af2001-08-30 03:11:59 +00002871 if (type != &PyString_Type)
2872 return str_subtype_new(type, args, kwds);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002873 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002874 &encoding, &errors))
Tim Peters6d6c1a32001-08-02 04:15:00 +00002875 return NULL;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002876 if (x == NULL) {
2877 if (encoding != NULL || errors != NULL) {
2878 PyErr_SetString(PyExc_TypeError,
2879 "encoding or errors without sequence "
2880 "argument");
2881 return NULL;
2882 }
Tim Peters6d6c1a32001-08-02 04:15:00 +00002883 return PyString_FromString("");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002884 }
2885
2886 if (PyUnicode_Check(x)) {
2887 /* Encode via the codec registry */
2888 if (encoding == NULL) {
2889 PyErr_SetString(PyExc_TypeError,
2890 "string argument without an encoding");
2891 return NULL;
2892 }
2893 new = PyCodec_Encode(x, encoding, errors);
2894 if (new == NULL)
2895 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002896 assert(PyString_Check(new));
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002897 return new;
2898 }
2899
2900 /* If it's not unicode, there can't be encoding or errors */
2901 if (encoding != NULL || errors != NULL) {
2902 PyErr_SetString(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002903 "encoding or errors without a string argument");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002904 return NULL;
2905 }
2906
Guido van Rossum98297ee2007-11-06 21:34:58 +00002907 /* Is it an int? */
2908 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2909 if (size == -1 && PyErr_Occurred()) {
2910 PyErr_Clear();
2911 }
2912 else {
2913 if (size < 0) {
2914 PyErr_SetString(PyExc_ValueError, "negative count");
2915 return NULL;
2916 }
2917 new = PyString_FromStringAndSize(NULL, size);
2918 if (new == NULL) {
2919 return NULL;
2920 }
2921 if (size > 0) {
2922 memset(((PyStringObject*)new)->ob_sval, 0, size);
2923 }
2924 return new;
2925 }
2926
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002927 /* Use the modern buffer interface */
2928 if (PyObject_CheckBuffer(x)) {
2929 Py_buffer view;
2930 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2931 return NULL;
2932 new = PyString_FromStringAndSize(NULL, view.len);
2933 if (!new)
2934 goto fail;
2935 // XXX(brett.cannon): Better way to get to internal buffer?
2936 if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
2937 &view, view.len, 'C') < 0)
2938 goto fail;
2939 PyObject_ReleaseBuffer(x, &view);
2940 return new;
2941 fail:
2942 Py_XDECREF(new);
2943 PyObject_ReleaseBuffer(x, &view);
2944 return NULL;
2945 }
2946
Guido van Rossum98297ee2007-11-06 21:34:58 +00002947 /* For iterator version, create a string object and resize as needed */
2948 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2949 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2950 input being a truly long iterator. */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002951 size = 64;
2952 new = PyString_FromStringAndSize(NULL, size);
2953 if (new == NULL)
2954 return NULL;
2955
2956 /* XXX Optimize this if the arguments is a list, tuple */
2957
2958 /* Get the iterator */
2959 it = PyObject_GetIter(x);
2960 if (it == NULL)
2961 goto error;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002962
2963 /* Run the iterator to exhaustion */
2964 for (i = 0; ; i++) {
2965 PyObject *item;
2966 Py_ssize_t value;
2967
2968 /* Get the next item */
Georg Brandl5fb8eb92007-11-24 20:42:02 +00002969 item = PyIter_Next(it);
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002970 if (item == NULL) {
Georg Brandl5fb8eb92007-11-24 20:42:02 +00002971 if (PyErr_Occurred())
2972 goto error;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002973 break;
2974 }
2975
2976 /* Interpret it as an int (__index__) */
2977 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2978 Py_DECREF(item);
2979 if (value == -1 && PyErr_Occurred())
2980 goto error;
2981
2982 /* Range check */
2983 if (value < 0 || value >= 256) {
2984 PyErr_SetString(PyExc_ValueError,
2985 "bytes must be in range(0, 256)");
2986 goto error;
2987 }
2988
2989 /* Append the byte */
2990 if (i >= size) {
2991 size *= 2;
2992 if (_PyString_Resize(&new, size) < 0)
2993 goto error;
2994 }
2995 ((PyStringObject *)new)->ob_sval[i] = value;
2996 }
2997 _PyString_Resize(&new, i);
2998
2999 /* Clean up and return success */
3000 Py_DECREF(it);
3001 return new;
3002
3003 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00003004 /* Error handling when new != NULL */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003005 Py_XDECREF(it);
3006 Py_DECREF(new);
3007 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003008}
3009
Guido van Rossumae960af2001-08-30 03:11:59 +00003010static PyObject *
3011str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3012{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003013 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003014 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003015
3016 assert(PyType_IsSubtype(type, &PyString_Type));
3017 tmp = string_new(&PyString_Type, args, kwds);
3018 if (tmp == NULL)
3019 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003020 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003021 n = PyString_GET_SIZE(tmp);
3022 pnew = type->tp_alloc(type, n);
3023 if (pnew != NULL) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003024 Py_MEMCPY(PyString_AS_STRING(pnew),
3025 PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003026 ((PyStringObject *)pnew)->ob_shash =
3027 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003028 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003029 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003030 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003031}
3032
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003033PyDoc_STRVAR(string_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00003034"bytes(iterable_of_ints) -> bytes.\n\
3035bytes(string, encoding[, errors]) -> bytes\n\
3036bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
3037bytes(memory_view) -> bytes.\n\
Tim Peters6d6c1a32001-08-02 04:15:00 +00003038\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003039Construct an immutable array of bytes from:\n\
3040 - an iterable yielding integers in range(256)\n\
3041 - a text string encoded using the specified encoding\n\
3042 - a bytes or a buffer object\n\
3043 - any object implementing the buffer API.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003044
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003045static PyObject *str_iter(PyObject *seq);
3046
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003047PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003048 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum98297ee2007-11-06 21:34:58 +00003049 "bytes",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003050 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003051 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003052 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003053 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003054 0, /* tp_getattr */
3055 0, /* tp_setattr */
3056 0, /* tp_compare */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003057 (reprfunc)string_repr, /* tp_repr */
3058 0, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003059 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003060 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003061 (hashfunc)string_hash, /* tp_hash */
3062 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003063 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003064 PyObject_GenericGetAttr, /* tp_getattro */
3065 0, /* tp_setattro */
3066 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003067 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3068 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003069 string_doc, /* tp_doc */
3070 0, /* tp_traverse */
3071 0, /* tp_clear */
3072 (richcmpfunc)string_richcompare, /* tp_richcompare */
3073 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003074 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003075 0, /* tp_iternext */
3076 string_methods, /* tp_methods */
3077 0, /* tp_members */
3078 0, /* tp_getset */
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003079 &PyBaseObject_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003080 0, /* tp_dict */
3081 0, /* tp_descr_get */
3082 0, /* tp_descr_set */
3083 0, /* tp_dictoffset */
3084 0, /* tp_init */
3085 0, /* tp_alloc */
3086 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003087 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003088};
3089
3090void
Fred Drakeba096332000-07-09 07:04:36 +00003091PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003092{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003093 register PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003094 assert(pv != NULL);
Guido van Rossum013142a1994-08-30 08:19:36 +00003095 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003096 return;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003097 if (w == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003098 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003099 *pv = NULL;
3100 return;
3101 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00003102 v = string_concat(*pv, w);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003103 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003104 *pv = v;
3105}
3106
Guido van Rossum013142a1994-08-30 08:19:36 +00003107void
Fred Drakeba096332000-07-09 07:04:36 +00003108PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003109{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003110 PyString_Concat(pv, w);
3111 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003112}
3113
3114
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003115/* The following function breaks the notion that strings are immutable:
3116 it changes the size of a string. We get away with this only if there
3117 is only one module referencing the object. You can also think of it
3118 as creating a new string object and destroying the old one, only
3119 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003120 already be known to some other part of the code...
3121 Note that if there's not enough memory to resize the string, the original
3122 string object at *pv is deallocated, *pv is set to NULL, an "out of
3123 memory" exception is set, and -1 is returned. Else (on success) 0 is
3124 returned, and the value in *pv may or may not be the same as on input.
3125 As always, an extra byte is allocated for a trailing \0 byte (newsize
3126 does *not* include that), and a trailing \0 byte is stored.
3127*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003128
3129int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003130_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003131{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003132 register PyObject *v;
3133 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003134 v = *pv;
Christian Heimes90aa7642007-12-19 02:45:37 +00003135 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003136 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003137 Py_DECREF(v);
3138 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003139 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003140 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003141 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003142 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003143 _Py_ForgetReference(v);
3144 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003145 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003146 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003147 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003148 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003149 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003151 _Py_NewReference(*pv);
3152 sv = (PyStringObject *) *pv;
Christian Heimes90aa7642007-12-19 02:45:37 +00003153 Py_SIZE(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003154 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003155 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003156 return 0;
3157}
Guido van Rossume5372401993-03-16 12:15:04 +00003158
Tim Peters38fd5b62000-09-21 05:43:11 +00003159/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3160 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3161 * Python's regular ints.
3162 * Return value: a new PyString*, or NULL if error.
3163 * . *pbuf is set to point into it,
3164 * *plen set to the # of chars following that.
3165 * Caller must decref it when done using pbuf.
3166 * The string starting at *pbuf is of the form
3167 * "-"? ("0x" | "0X")? digit+
3168 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003169 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003170 * There will be at least prec digits, zero-filled on the left if
3171 * necessary to get that many.
3172 * val object to be converted
3173 * flags bitmask of format flags; only F_ALT is looked at
3174 * prec minimum number of digits; 0-fill on left if needed
3175 * type a character in [duoxX]; u acts the same as d
3176 *
3177 * CAUTION: o, x and X conversions on regular ints can never
3178 * produce a '-' sign, but can for Python's unbounded ints.
3179 */
3180PyObject*
3181_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3182 char **pbuf, int *plen)
3183{
3184 PyObject *result = NULL;
3185 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003186 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003187 int sign; /* 1 if '-', else 0 */
3188 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003189 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003190 int numdigits; /* len == numnondigits + numdigits */
3191 int numnondigits = 0;
3192
Guido van Rossumddefaf32007-01-14 03:31:43 +00003193 /* Avoid exceeding SSIZE_T_MAX */
3194 if (prec > PY_SSIZE_T_MAX-3) {
3195 PyErr_SetString(PyExc_OverflowError,
3196 "precision too large");
3197 return NULL;
3198 }
3199
Tim Peters38fd5b62000-09-21 05:43:11 +00003200 switch (type) {
3201 case 'd':
3202 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00003203 /* Special-case boolean: we want 0/1 */
3204 if (PyBool_Check(val))
3205 result = PyNumber_ToBase(val, 10);
3206 else
Christian Heimes90aa7642007-12-19 02:45:37 +00003207 result = Py_TYPE(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00003208 break;
3209 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003210 numnondigits = 2;
3211 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00003212 break;
3213 case 'x':
3214 case 'X':
3215 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003216 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00003217 break;
3218 default:
3219 assert(!"'type' not in [duoxX]");
3220 }
3221 if (!result)
3222 return NULL;
3223
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003224 buf = PyUnicode_AsString(result);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00003225 if (!buf) {
3226 Py_DECREF(result);
3227 return NULL;
3228 }
3229
Tim Peters38fd5b62000-09-21 05:43:11 +00003230 /* To modify the string in-place, there can only be one reference. */
Christian Heimes90aa7642007-12-19 02:45:37 +00003231 if (Py_REFCNT(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003232 PyErr_BadInternalCall();
3233 return NULL;
3234 }
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003235 llen = PyUnicode_GetSize(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00003236 if (llen > INT_MAX) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003237 PyErr_SetString(PyExc_ValueError,
3238 "string too large in _PyString_FormatLong");
Martin v. Löwis725507b2006-03-07 12:08:51 +00003239 return NULL;
3240 }
3241 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003242 if (buf[len-1] == 'L') {
3243 --len;
3244 buf[len] = '\0';
3245 }
3246 sign = buf[0] == '-';
3247 numnondigits += sign;
3248 numdigits = len - numnondigits;
3249 assert(numdigits > 0);
3250
Tim Petersfff53252001-04-12 18:38:48 +00003251 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003252 if (((flags & F_ALT) == 0 &&
3253 (type == 'o' || type == 'x' || type == 'X'))) {
3254 assert(buf[sign] == '0');
3255 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
Guido van Rossum98297ee2007-11-06 21:34:58 +00003256 buf[sign+1] == 'o');
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003257 numnondigits -= 2;
3258 buf += 2;
3259 len -= 2;
3260 if (sign)
3261 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00003262 assert(len == numnondigits + numdigits);
3263 assert(numdigits > 0);
3264 }
3265
3266 /* Fill with leading zeroes to meet minimum width. */
3267 if (prec > numdigits) {
3268 PyObject *r1 = PyString_FromStringAndSize(NULL,
3269 numnondigits + prec);
3270 char *b1;
3271 if (!r1) {
3272 Py_DECREF(result);
3273 return NULL;
3274 }
3275 b1 = PyString_AS_STRING(r1);
3276 for (i = 0; i < numnondigits; ++i)
3277 *b1++ = *buf++;
3278 for (i = 0; i < prec - numdigits; i++)
3279 *b1++ = '0';
3280 for (i = 0; i < numdigits; i++)
3281 *b1++ = *buf++;
3282 *b1 = '\0';
3283 Py_DECREF(result);
3284 result = r1;
3285 buf = PyString_AS_STRING(result);
3286 len = numnondigits + prec;
3287 }
3288
3289 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003290 if (type == 'X') {
3291 /* Need to convert all lower case letters to upper case.
3292 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003293 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003294 if (buf[i] >= 'a' && buf[i] <= 'x')
3295 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003296 }
3297 *pbuf = buf;
3298 *plen = len;
3299 return result;
3300}
3301
Guido van Rossum8cf04761997-08-02 02:57:45 +00003302void
Fred Drakeba096332000-07-09 07:04:36 +00003303PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003304{
3305 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003306 for (i = 0; i < UCHAR_MAX + 1; i++) {
3307 Py_XDECREF(characters[i]);
3308 characters[i] = NULL;
3309 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003310 Py_XDECREF(nullstring);
3311 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003312}
Barry Warsawa903ad982001-02-23 16:40:48 +00003313
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003314/*********************** Str Iterator ****************************/
3315
3316typedef struct {
3317 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00003318 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003319 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
3320} striterobject;
3321
3322static void
3323striter_dealloc(striterobject *it)
3324{
3325 _PyObject_GC_UNTRACK(it);
3326 Py_XDECREF(it->it_seq);
3327 PyObject_GC_Del(it);
3328}
3329
3330static int
3331striter_traverse(striterobject *it, visitproc visit, void *arg)
3332{
3333 Py_VISIT(it->it_seq);
3334 return 0;
3335}
3336
3337static PyObject *
3338striter_next(striterobject *it)
3339{
3340 PyStringObject *seq;
3341 PyObject *item;
3342
3343 assert(it != NULL);
3344 seq = it->it_seq;
3345 if (seq == NULL)
3346 return NULL;
3347 assert(PyString_Check(seq));
3348
3349 if (it->it_index < PyString_GET_SIZE(seq)) {
Christian Heimes217cfd12007-12-02 14:31:20 +00003350 item = PyLong_FromLong(
Guido van Rossum75a902d2007-10-19 22:06:24 +00003351 (unsigned char)seq->ob_sval[it->it_index]);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003352 if (item != NULL)
3353 ++it->it_index;
3354 return item;
3355 }
3356
3357 Py_DECREF(seq);
3358 it->it_seq = NULL;
3359 return NULL;
3360}
3361
3362static PyObject *
3363striter_len(striterobject *it)
3364{
3365 Py_ssize_t len = 0;
3366 if (it->it_seq)
3367 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
Christian Heimes217cfd12007-12-02 14:31:20 +00003368 return PyLong_FromSsize_t(len);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003369}
3370
Guido van Rossum49d6b072006-08-17 21:11:47 +00003371PyDoc_STRVAR(length_hint_doc,
3372 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003373
3374static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00003375 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3376 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003377 {NULL, NULL} /* sentinel */
3378};
3379
3380PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003381 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum254348e2007-11-21 19:29:53 +00003382 "bytes_iterator", /* tp_name */
Guido van Rossum49d6b072006-08-17 21:11:47 +00003383 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003384 0, /* tp_itemsize */
3385 /* methods */
3386 (destructor)striter_dealloc, /* tp_dealloc */
3387 0, /* tp_print */
3388 0, /* tp_getattr */
3389 0, /* tp_setattr */
3390 0, /* tp_compare */
3391 0, /* tp_repr */
3392 0, /* tp_as_number */
3393 0, /* tp_as_sequence */
3394 0, /* tp_as_mapping */
3395 0, /* tp_hash */
3396 0, /* tp_call */
3397 0, /* tp_str */
3398 PyObject_GenericGetAttr, /* tp_getattro */
3399 0, /* tp_setattro */
3400 0, /* tp_as_buffer */
3401 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3402 0, /* tp_doc */
3403 (traverseproc)striter_traverse, /* tp_traverse */
3404 0, /* tp_clear */
3405 0, /* tp_richcompare */
3406 0, /* tp_weaklistoffset */
3407 PyObject_SelfIter, /* tp_iter */
3408 (iternextfunc)striter_next, /* tp_iternext */
3409 striter_methods, /* tp_methods */
3410 0,
3411};
3412
3413static PyObject *
3414str_iter(PyObject *seq)
3415{
3416 striterobject *it;
3417
3418 if (!PyString_Check(seq)) {
3419 PyErr_BadInternalCall();
3420 return NULL;
3421 }
3422 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
3423 if (it == NULL)
3424 return NULL;
3425 it->it_index = 0;
3426 Py_INCREF(seq);
3427 it->it_seq = (PyStringObject *)seq;
3428 _PyObject_GC_TRACK(it);
3429 return (PyObject *)it;
3430}