blob: 011fc323abde7d010d1b33063e3ba08a2e122306 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossum98297ee2007-11-06 21:34:58 +00003/* XXX This is now called 'bytes' as far as the user is concerned.
4 Many docstrings and error messages need to be cleaned up. */
5
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00006#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00008#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000011
Guido van Rossum98297ee2007-11-06 21:34:58 +000012static Py_ssize_t
13_getbuffer(PyObject *obj, Py_buffer *view)
14{
Christian Heimes90aa7642007-12-19 02:45:37 +000015 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossum98297ee2007-11-06 21:34:58 +000016
17 if (buffer == NULL || buffer->bf_getbuffer == NULL)
18 {
19 PyErr_Format(PyExc_TypeError,
20 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000021 Py_TYPE(obj)->tp_name);
Guido van Rossum98297ee2007-11-06 21:34:58 +000022 return -1;
23 }
24
25 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
26 return -1;
27 return view->len;
28}
29
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000030#ifdef COUNT_ALLOCS
31int null_strings, one_strings;
32#endif
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000035static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036
37/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000038 For both PyString_FromString() and PyString_FromStringAndSize(), the
39 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000040 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000041
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000042 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000043 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 For PyString_FromStringAndSize(), the parameter the parameter `str' is
46 either NULL or else points to a string containing at least `size' bytes.
47 For PyString_FromStringAndSize(), the string in the `str' parameter does
48 not have to be null-terminated. (Therefore it is safe to construct a
49 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
50 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
51 bytes (setting the last byte to the null terminating character) and you can
52 fill in the data yourself. If `str' is non-NULL then the resulting
53 PyString object must be treated as immutable and you must not fill in nor
54 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000055
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000056 The PyObject member `op->ob_size', which denotes the number of "extra
57 items" in a variable-size object, will contain the number of bytes
58 allocated for string data, not counting the null terminating character. It
59 is therefore equal to the equal to the `size' parameter (for
60 PyString_FromStringAndSize()) or the length of the string in the `str'
61 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000064PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065{
Tim Peters9e897f42001-05-09 07:37:07 +000066 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000067 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 if (size == 0 && (op = nullstring) != NULL) {
69#ifdef COUNT_ALLOCS
70 null_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 if (size == 1 && str != NULL &&
76 (op = characters[*str & UCHAR_MAX]) != NULL)
77 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078#ifdef COUNT_ALLOCS
79 one_strings++;
80#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 Py_INCREF(op);
82 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000084
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000085 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000086 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000089 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 op->ob_shash = -1;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000091 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000092 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000094 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 if (size == 0) {
96 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 } else if (size == 1 && str != NULL) {
99 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103}
104
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000106PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Tim Peters62de65b2001-12-06 20:29:32 +0000108 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000109 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000110
111 assert(str != NULL);
112 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000113 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000114 PyErr_SetString(PyExc_OverflowError,
115 "string is too long for a Python string");
116 return NULL;
117 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 if (size == 0 && (op = nullstring) != NULL) {
119#ifdef COUNT_ALLOCS
120 null_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
125 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
126#ifdef COUNT_ALLOCS
127 one_strings++;
128#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
130 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000133 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000134 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000140 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
145 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000149}
150
Barry Warsawdadace02001-08-24 18:32:06 +0000151PyObject *
152PyString_FromFormatV(const char *format, va_list vargs)
153{
Tim Petersc15c4f12001-10-02 21:32:07 +0000154 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000155 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000156 const char* f;
157 char *s;
158 PyObject* string;
159
Tim Petersc15c4f12001-10-02 21:32:07 +0000160#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000161 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000163#ifdef __va_copy
164 __va_copy(count, vargs);
165#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000166 count = vargs;
167#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000168#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000169 /* step 1: figure out how large a buffer we need */
170 for (f = format; *f; f++) {
171 if (*f == '%') {
172 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000173 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000174 ;
175
Thomas Wouters477c8d52006-05-27 19:21:47 +0000176 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
177 * they don't affect the amount of space we reserve.
178 */
179 if ((*f == 'l' || *f == 'z') &&
180 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000181 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000182
Barry Warsawdadace02001-08-24 18:32:06 +0000183 switch (*f) {
184 case 'c':
185 (void)va_arg(count, int);
186 /* fall through... */
187 case '%':
188 n++;
189 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000191 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000192 /* 20 bytes is enough to hold a 64-bit
193 integer. Decimal takes the most space.
194 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000195 n += 20;
196 break;
197 case 's':
198 s = va_arg(count, char*);
199 n += strlen(s);
200 break;
201 case 'p':
202 (void) va_arg(count, int);
203 /* maximum 64-bit pointer representation:
204 * 0xffffffffffffffff
205 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000206 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000207 */
208 n += 19;
209 break;
210 default:
211 /* if we stumble upon an unknown
212 formatting code, copy the rest of
213 the format string to the output
214 string. (we cannot just skip the
215 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000216 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000217 n += strlen(p);
218 goto expand;
219 }
220 } else
221 n++;
222 }
223 expand:
224 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000225 /* Since we've analyzed how much space we need for the worst case,
226 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000227 string = PyString_FromStringAndSize(NULL, n);
228 if (!string)
229 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000230
Barry Warsawdadace02001-08-24 18:32:06 +0000231 s = PyString_AsString(string);
232
233 for (f = format; *f; f++) {
234 if (*f == '%') {
235 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000236 Py_ssize_t i;
237 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000238 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000239 /* parse the width.precision part (we're only
240 interested in the precision value, if any) */
241 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000242 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000243 n = (n*10) + *f++ - '0';
244 if (*f == '.') {
245 f++;
246 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000247 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000248 n = (n*10) + *f++ - '0';
249 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000250 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000251 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000252 /* handle the long flag, but only for %ld and %lu.
253 others can be added when necessary. */
254 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000255 longflag = 1;
256 ++f;
257 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000258 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000259 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000260 size_tflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000271 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000272 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
273 va_arg(vargs, Py_ssize_t));
274 else
275 sprintf(s, "%d", va_arg(vargs, int));
276 s += strlen(s);
277 break;
278 case 'u':
279 if (longflag)
280 sprintf(s, "%lu",
281 va_arg(vargs, unsigned long));
282 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000283 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
284 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000285 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000286 sprintf(s, "%u",
287 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 s += strlen(s);
289 break;
290 case 'i':
291 sprintf(s, "%i", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'x':
295 sprintf(s, "%x", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 's':
299 p = va_arg(vargs, char*);
300 i = strlen(p);
301 if (n > 0 && i > n)
302 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000303 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000304 s += i;
305 break;
306 case 'p':
307 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000308 /* %p is ill-defined: ensure leading 0x. */
309 if (s[1] == 'X')
310 s[1] = 'x';
311 else if (s[1] != 'x') {
312 memmove(s+2, s, strlen(s)+1);
313 s[0] = '0';
314 s[1] = 'x';
315 }
Barry Warsawdadace02001-08-24 18:32:06 +0000316 s += strlen(s);
317 break;
318 case '%':
319 *s++ = '%';
320 break;
321 default:
322 strcpy(s, p);
323 s += strlen(s);
324 goto end;
325 }
326 } else
327 *s++ = *f;
328 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000329
Barry Warsawdadace02001-08-24 18:32:06 +0000330 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000332 return string;
333}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000334
Barry Warsawdadace02001-08-24 18:32:06 +0000335PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000336PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000337{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000338 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000339 va_list vargs;
340
341#ifdef HAVE_STDARG_PROTOTYPES
342 va_start(vargs, format);
343#else
344 va_start(vargs);
345#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 ret = PyString_FromFormatV(format, vargs);
347 va_end(vargs);
348 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000349}
350
Guido van Rossum234f9421993-06-17 12:35:49 +0000351static void
Fred Drakeba096332000-07-09 07:04:36 +0000352string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000353{
Christian Heimes90aa7642007-12-19 02:45:37 +0000354 Py_TYPE(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000355}
356
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000357/* Unescape a backslash-escaped string. If unicode is non-zero,
358 the string is a u-literal. If recode_encoding is non-zero,
359 the string is UTF-8 encoded and should be re-encoded in the
360 specified encoding. */
361
362PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000363 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000364 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000365 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000366 const char *recode_encoding)
367{
368 int c;
369 char *p, *buf;
370 const char *end;
371 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000372 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000373 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000374 if (v == NULL)
375 return NULL;
376 p = buf = PyString_AsString(v);
377 end = s + len;
378 while (s < end) {
379 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000380 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000381 if (recode_encoding && (*s & 0x80)) {
382 PyObject *u, *w;
383 char *r;
384 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000385 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000386 t = s;
387 /* Decode non-ASCII bytes as UTF-8. */
388 while (t < end && (*t & 0x80)) t++;
389 u = PyUnicode_DecodeUTF8(s, t - s, errors);
390 if(!u) goto failed;
391
392 /* Recode them in target encoding. */
393 w = PyUnicode_AsEncodedString(
394 u, recode_encoding, errors);
395 Py_DECREF(u);
396 if (!w) goto failed;
397
398 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000399 assert(PyString_Check(w));
400 r = PyString_AS_STRING(w);
401 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000402 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000403 p += rn;
404 Py_DECREF(w);
405 s = t;
406 } else {
407 *p++ = *s++;
408 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000409 continue;
410 }
411 s++;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000412 if (s==end) {
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000413 PyErr_SetString(PyExc_ValueError,
414 "Trailing \\ in string");
415 goto failed;
416 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000417 switch (*s++) {
418 /* XXX This assumes ASCII! */
419 case '\n': break;
420 case '\\': *p++ = '\\'; break;
421 case '\'': *p++ = '\''; break;
422 case '\"': *p++ = '\"'; break;
423 case 'b': *p++ = '\b'; break;
424 case 'f': *p++ = '\014'; break; /* FF */
425 case 't': *p++ = '\t'; break;
426 case 'n': *p++ = '\n'; break;
427 case 'r': *p++ = '\r'; break;
428 case 'v': *p++ = '\013'; break; /* VT */
429 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
430 case '0': case '1': case '2': case '3':
431 case '4': case '5': case '6': case '7':
432 c = s[-1] - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000433 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000434 c = (c<<3) + *s++ - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000435 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000436 c = (c<<3) + *s++ - '0';
437 }
438 *p++ = c;
439 break;
440 case 'x':
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000441 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000442 unsigned int x = 0;
443 c = Py_CHARMASK(*s);
444 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000445 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000446 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000447 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000448 x = 10 + c - 'a';
449 else
450 x = 10 + c - 'A';
451 x = x << 4;
452 c = Py_CHARMASK(*s);
453 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000454 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000455 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000456 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000457 x += 10 + c - 'a';
458 else
459 x += 10 + c - 'A';
460 *p++ = x;
461 break;
462 }
463 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000464 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000465 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000466 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000467 }
468 if (strcmp(errors, "replace") == 0) {
469 *p++ = '?';
470 } else if (strcmp(errors, "ignore") == 0)
471 /* do nothing */;
472 else {
473 PyErr_Format(PyExc_ValueError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000474 "decoding error; unknown "
475 "error handling code: %.400s",
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000476 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000477 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000478 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000479 default:
480 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000481 s--;
482 goto non_esc; /* an arbitry number of unescaped
483 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000484 }
485 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000486 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000487 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000488 return v;
489 failed:
490 Py_DECREF(v);
491 return NULL;
492}
493
Thomas Wouters477c8d52006-05-27 19:21:47 +0000494/* -------------------------------------------------------------------- */
495/* object api */
496
Martin v. Löwis18e16552006-02-15 17:27:45 +0000497Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000498PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000499{
Alexandre Vassalottiad433db2008-01-07 02:06:10 +0000500 if (!PyString_Check(op)) {
501 PyErr_Format(PyExc_TypeError,
502 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
503 return -1;
504 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000505 return Py_SIZE(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000506}
507
Alexandre Vassalottiad433db2008-01-07 02:06:10 +0000508char *
Fred Drakeba096332000-07-09 07:04:36 +0000509PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510{
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000511 if (!PyString_Check(op)) {
512 PyErr_Format(PyExc_TypeError,
Christian Heimes90aa7642007-12-19 02:45:37 +0000513 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000514 return NULL;
515 }
Alexandre Vassalottiad433db2008-01-07 02:06:10 +0000516 return ((PyStringObject *)op)->ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000517}
518
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000519int
520PyString_AsStringAndSize(register PyObject *obj,
521 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000522 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000523{
524 if (s == NULL) {
525 PyErr_BadInternalCall();
526 return -1;
527 }
528
529 if (!PyString_Check(obj)) {
Christian Heimesf3863112007-11-22 07:46:41 +0000530 PyErr_Format(PyExc_TypeError,
Christian Heimes90aa7642007-12-19 02:45:37 +0000531 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
Christian Heimesf3863112007-11-22 07:46:41 +0000532 return -1;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000533 }
534
535 *s = PyString_AS_STRING(obj);
536 if (len != NULL)
537 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000538 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000539 PyErr_SetString(PyExc_TypeError,
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000540 "expected bytes with no null");
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000541 return -1;
542 }
543 return 0;
544}
545
Thomas Wouters477c8d52006-05-27 19:21:47 +0000546/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000547/* Methods */
548
Thomas Wouters477c8d52006-05-27 19:21:47 +0000549#define STRINGLIB_CHAR char
550
551#define STRINGLIB_CMP memcmp
552#define STRINGLIB_LEN PyString_GET_SIZE
553#define STRINGLIB_NEW PyString_FromStringAndSize
554#define STRINGLIB_STR PyString_AS_STRING
Guido van Rossum98297ee2007-11-06 21:34:58 +0000555/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000556
557#define STRINGLIB_EMPTY nullstring
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000558#define STRINGLIB_CHECK_EXACT PyString_CheckExact
559#define STRINGLIB_MUTABLE 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000560
561#include "stringlib/fastsearch.h"
562
563#include "stringlib/count.h"
564#include "stringlib/find.h"
565#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000566#include "stringlib/ctype.h"
567#include "stringlib/transmogrify.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000568
569
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000570PyObject *
571PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000572{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000573 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000574 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes90aa7642007-12-19 02:45:37 +0000575 Py_ssize_t length = Py_SIZE(op);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000576 size_t newsize = 3 + 4 * length;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000577 PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000578 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000579 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000580 "bytes object is too large to make repr");
Guido van Rossum58da9312007-11-10 23:39:45 +0000581 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000582 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000583 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000584 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000585 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000586 }
587 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000588 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000589 register Py_UNICODE c;
590 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000591 int quote;
592
Guido van Rossum98297ee2007-11-06 21:34:58 +0000593 /* Figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000594 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000595 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000596 char *test, *start;
597 start = PyString_AS_STRING(op);
598 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000599 if (*test == '"') {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000600 quote = '\''; /* back to single */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000601 goto decided;
602 }
603 else if (*test == '\'')
604 quote = '"';
605 }
606 decided:
607 ;
608 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000609
Guido van Rossum98297ee2007-11-06 21:34:58 +0000610 *p++ = 'b', *p++ = quote;
611 for (i = 0; i < length; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000612 /* There's at least enough room for a hex escape
613 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000614 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000615 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000616 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000617 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000618 else if (c == '\t')
619 *p++ = '\\', *p++ = 't';
620 else if (c == '\n')
621 *p++ = '\\', *p++ = 'n';
622 else if (c == '\r')
623 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000624 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000625 *p++ = '\\';
626 *p++ = 'x';
627 *p++ = hexdigits[(c & 0xf0) >> 4];
628 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000629 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000630 else
631 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000632 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000633 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000634 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000635 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000636 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
637 Py_DECREF(v);
638 return NULL;
639 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000640 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000641 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000642}
643
Guido van Rossum189f1df2001-05-01 16:51:53 +0000644static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000645string_repr(PyObject *op)
646{
647 return PyString_Repr(op, 1);
648}
649
650static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000651string_str(PyObject *op)
Guido van Rossum189f1df2001-05-01 16:51:53 +0000652{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000653 if (Py_BytesWarningFlag) {
654 if (PyErr_WarnEx(PyExc_BytesWarning,
655 "str() on a bytes instance", 1))
656 return NULL;
Tim Petersc9933152001-10-16 20:18:24 +0000657 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000658 return string_repr(op);
Guido van Rossum189f1df2001-05-01 16:51:53 +0000659}
660
Martin v. Löwis18e16552006-02-15 17:27:45 +0000661static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000662string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000663{
Christian Heimes90aa7642007-12-19 02:45:37 +0000664 return Py_SIZE(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000665}
666
Guido van Rossum98297ee2007-11-06 21:34:58 +0000667/* This is also used by PyString_Concat() */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000668static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000669string_concat(PyObject *a, PyObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000670{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000671 Py_ssize_t size;
672 Py_buffer va, vb;
673 PyObject *result = NULL;
674
675 va.len = -1;
676 vb.len = -1;
677 if (_getbuffer(a, &va) < 0 ||
678 _getbuffer(b, &vb) < 0) {
679 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Christian Heimes90aa7642007-12-19 02:45:37 +0000680 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000681 goto done;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000682 }
Guido van Rossumae404e22007-10-26 21:46:44 +0000683
Guido van Rossum98297ee2007-11-06 21:34:58 +0000684 /* Optimize end cases */
685 if (va.len == 0 && PyString_CheckExact(b)) {
686 result = b;
687 Py_INCREF(result);
688 goto done;
689 }
690 if (vb.len == 0 && PyString_CheckExact(a)) {
691 result = a;
692 Py_INCREF(result);
693 goto done;
694 }
695
696 size = va.len + vb.len;
697 if (size < 0) {
698 PyErr_NoMemory();
699 goto done;
700 }
701
702 result = PyString_FromStringAndSize(NULL, size);
703 if (result != NULL) {
704 memcpy(PyString_AS_STRING(result), va.buf, va.len);
705 memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
706 }
707
708 done:
709 if (va.len != -1)
710 PyObject_ReleaseBuffer(a, &va);
711 if (vb.len != -1)
712 PyObject_ReleaseBuffer(b, &vb);
713 return result;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000714}
715
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000716static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000717string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000719 register Py_ssize_t i;
720 register Py_ssize_t j;
721 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000722 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000723 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000724 if (n < 0)
725 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000726 /* watch out for overflows: the size can overflow int,
727 * and the # of bytes needed can overflow size_t
728 */
Christian Heimes90aa7642007-12-19 02:45:37 +0000729 size = Py_SIZE(a) * n;
730 if (n && size / n != Py_SIZE(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000731 PyErr_SetString(PyExc_OverflowError,
732 "repeated string is too long");
733 return NULL;
734 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000735 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000736 Py_INCREF(a);
737 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000738 }
Tim Peterse7c05322004-06-27 17:24:49 +0000739 nbytes = (size_t)size;
740 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000741 PyErr_SetString(PyExc_OverflowError,
742 "repeated string is too long");
743 return NULL;
744 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000745 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000746 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000747 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000748 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000749 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000750 op->ob_shash = -1;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000751 op->ob_sval[size] = '\0';
Christian Heimes90aa7642007-12-19 02:45:37 +0000752 if (Py_SIZE(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000753 memset(op->ob_sval, a->ob_sval[0] , n);
754 return (PyObject *) op;
755 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000756 i = 0;
757 if (i < size) {
Christian Heimes90aa7642007-12-19 02:45:37 +0000758 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
759 i = Py_SIZE(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000760 }
761 while (i < size) {
762 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000763 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000764 i += j;
765 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000766 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000767}
768
Guido van Rossum9284a572000-03-07 15:53:43 +0000769static int
Guido van Rossum98297ee2007-11-06 21:34:58 +0000770string_contains(PyObject *self, PyObject *arg)
Guido van Rossum9284a572000-03-07 15:53:43 +0000771{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000772 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
773 if (ival == -1 && PyErr_Occurred()) {
774 Py_buffer varg;
775 int pos;
776 PyErr_Clear();
777 if (_getbuffer(arg, &varg) < 0)
778 return -1;
Christian Heimes90aa7642007-12-19 02:45:37 +0000779 pos = stringlib_find(PyString_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000780 varg.buf, varg.len, 0);
781 PyObject_ReleaseBuffer(arg, &varg);
782 return pos >= 0;
783 }
784 if (ival < 0 || ival >= 256) {
785 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
786 return -1;
787 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000788
Christian Heimes90aa7642007-12-19 02:45:37 +0000789 return memchr(PyString_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000790}
791
792static PyObject *
793string_item(PyStringObject *a, register Py_ssize_t i)
794{
Christian Heimes90aa7642007-12-19 02:45:37 +0000795 if (i < 0 || i >= Py_SIZE(a)) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000796 PyErr_SetString(PyExc_IndexError, "string index out of range");
797 return NULL;
798 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000799 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Guido van Rossum9284a572000-03-07 15:53:43 +0000800}
801
Martin v. Löwiscd353062001-05-24 16:56:35 +0000802static PyObject*
803string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000805 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000806 Py_ssize_t len_a, len_b;
807 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +0000808 PyObject *result;
809
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000810 /* Make sure both arguments are strings. */
811 if (!(PyString_Check(a) && PyString_Check(b))) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000812 if (Py_BytesWarningFlag && (op == Py_EQ) &&
813 (PyObject_IsInstance((PyObject*)a,
814 (PyObject*)&PyUnicode_Type) ||
815 PyObject_IsInstance((PyObject*)b,
816 (PyObject*)&PyUnicode_Type))) {
817 if (PyErr_WarnEx(PyExc_BytesWarning,
818 "Comparsion between bytes and string", 1))
819 return NULL;
820 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000821 result = Py_NotImplemented;
822 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000823 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000824 if (a == b) {
825 switch (op) {
826 case Py_EQ:case Py_LE:case Py_GE:
827 result = Py_True;
828 goto out;
829 case Py_NE:case Py_LT:case Py_GT:
830 result = Py_False;
831 goto out;
832 }
833 }
834 if (op == Py_EQ) {
835 /* Supporting Py_NE here as well does not save
836 much time, since Py_NE is rarely used. */
Christian Heimes90aa7642007-12-19 02:45:37 +0000837 if (Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +0000838 && (a->ob_sval[0] == b->ob_sval[0]
Christian Heimes90aa7642007-12-19 02:45:37 +0000839 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000840 result = Py_True;
841 } else {
842 result = Py_False;
843 }
844 goto out;
845 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000846 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +0000847 min_len = (len_a < len_b) ? len_a : len_b;
848 if (min_len > 0) {
849 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
850 if (c==0)
851 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +0000852 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +0000853 c = 0;
854 if (c == 0)
855 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
856 switch (op) {
857 case Py_LT: c = c < 0; break;
858 case Py_LE: c = c <= 0; break;
859 case Py_EQ: assert(0); break; /* unreachable */
860 case Py_NE: c = c != 0; break;
861 case Py_GT: c = c > 0; break;
862 case Py_GE: c = c >= 0; break;
863 default:
864 result = Py_NotImplemented;
865 goto out;
866 }
867 result = c ? Py_True : Py_False;
868 out:
869 Py_INCREF(result);
870 return result;
871}
872
Guido van Rossum9bfef441993-03-29 10:43:31 +0000873static long
Fred Drakeba096332000-07-09 07:04:36 +0000874string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000875{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000876 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000877 register unsigned char *p;
878 register long x;
879
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000880 if (a->ob_shash != -1)
881 return a->ob_shash;
Christian Heimes90aa7642007-12-19 02:45:37 +0000882 len = Py_SIZE(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000883 p = (unsigned char *) a->ob_sval;
884 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000885 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000886 x = (1000003*x) ^ *p++;
Christian Heimes90aa7642007-12-19 02:45:37 +0000887 x ^= Py_SIZE(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +0000888 if (x == -1)
889 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000890 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000891 return x;
892}
893
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000894static PyObject*
895string_subscript(PyStringObject* self, PyObject* item)
896{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000897 if (PyIndex_Check(item)) {
898 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000899 if (i == -1 && PyErr_Occurred())
900 return NULL;
901 if (i < 0)
902 i += PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000903 if (i < 0 || i >= PyString_GET_SIZE(self)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +0000904 PyErr_SetString(PyExc_IndexError,
905 "string index out of range");
906 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000907 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000908 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000909 }
910 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000911 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000912 char* source_buf;
913 char* result_buf;
914 PyObject* result;
915
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000916 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000917 PyString_GET_SIZE(self),
918 &start, &stop, &step, &slicelength) < 0) {
919 return NULL;
920 }
921
922 if (slicelength <= 0) {
923 return PyString_FromStringAndSize("", 0);
924 }
Thomas Woutersed03b412007-08-28 21:37:11 +0000925 else if (start == 0 && step == 1 &&
926 slicelength == PyString_GET_SIZE(self) &&
927 PyString_CheckExact(self)) {
928 Py_INCREF(self);
929 return (PyObject *)self;
930 }
931 else if (step == 1) {
932 return PyString_FromStringAndSize(
933 PyString_AS_STRING(self) + start,
934 slicelength);
935 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000936 else {
937 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000938 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +0000939 if (result_buf == NULL)
940 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000941
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000942 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000943 cur += step, i++) {
944 result_buf[i] = source_buf[cur];
945 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000946
947 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000948 slicelength);
949 PyMem_Free(result_buf);
950 return result;
951 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000952 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000953 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000954 PyErr_Format(PyExc_TypeError,
955 "string indices must be integers, not %.200s",
Christian Heimes90aa7642007-12-19 02:45:37 +0000956 Py_TYPE(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000957 return NULL;
958 }
959}
960
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000961static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000962string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000963{
Christian Heimes90aa7642007-12-19 02:45:37 +0000964 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000965 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +0000966}
967
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000968static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000969 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000970 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +0000971 (ssizeargfunc)string_repeat, /*sq_repeat*/
Guido van Rossum98297ee2007-11-06 21:34:58 +0000972 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +0000973 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000974 0, /*sq_ass_item*/
975 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000976 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000977};
978
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000979static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000980 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000981 (binaryfunc)string_subscript,
982 0,
983};
984
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000985static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000986 (getbufferproc)string_buffer_getbuffer,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000987 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000988};
989
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000990
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000991#define LEFTSTRIP 0
992#define RIGHTSTRIP 1
993#define BOTHSTRIP 2
994
Guido van Rossum018b0eb2002-04-13 00:56:08 +0000995/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +0000996static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
997
998#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +0000999
Thomas Wouters477c8d52006-05-27 19:21:47 +00001000
1001/* Don't call if length < 2 */
1002#define Py_STRING_MATCH(target, offset, pattern, length) \
1003 (target[offset] == pattern[0] && \
1004 target[offset+length-1] == pattern[length-1] && \
1005 !memcmp(target+offset+1, pattern+1, length-2) )
1006
1007
1008/* Overallocate the initial list to reduce the number of reallocs for small
1009 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1010 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1011 text (roughly 11 words per line) and field delimited data (usually 1-10
1012 fields). For large strings the split algorithms are bandwidth limited
1013 so increasing the preallocation likely will not improve things.*/
1014
1015#define MAX_PREALLOC 12
1016
1017/* 5 splits gives 6 elements */
1018#define PREALLOC_SIZE(maxsplit) \
1019 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1020
Thomas Wouters477c8d52006-05-27 19:21:47 +00001021#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001022 str = PyString_FromStringAndSize((data) + (left), \
1023 (right) - (left)); \
1024 if (str == NULL) \
1025 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001026 if (count < MAX_PREALLOC) { \
1027 PyList_SET_ITEM(list, count, str); \
1028 } else { \
1029 if (PyList_Append(list, str)) { \
1030 Py_DECREF(str); \
1031 goto onError; \
1032 } \
1033 else \
1034 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001035 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001036 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001037
Thomas Wouters477c8d52006-05-27 19:21:47 +00001038/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001039#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001040
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001041#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1042#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1043#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1044#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001045
1046Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001047split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001048{
Christian Heimes895627f2007-12-08 17:28:33 +00001049 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001050 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001051 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001052 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001053
1054 if (list == NULL)
1055 return NULL;
1056
Thomas Wouters477c8d52006-05-27 19:21:47 +00001057 i = j = 0;
1058
1059 while (maxsplit-- > 0) {
1060 SKIP_SPACE(s, i, len);
1061 if (i==len) break;
1062 j = i; i++;
1063 SKIP_NONSPACE(s, i, len);
Christian Heimes895627f2007-12-08 17:28:33 +00001064 if (j == 0 && i == len && PyString_CheckExact(self)) {
1065 /* No whitespace in self, so just use it as list[0] */
1066 Py_INCREF(self);
1067 PyList_SET_ITEM(list, 0, (PyObject *)self);
1068 count++;
1069 break;
1070 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001071 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001072 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001073
1074 if (i < len) {
1075 /* Only occurs when maxsplit was reached */
1076 /* Skip any remaining whitespace and copy to end of string */
1077 SKIP_SPACE(s, i, len);
1078 if (i != len)
1079 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001080 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001081 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001082 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001083 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001084 Py_DECREF(list);
1085 return NULL;
1086}
1087
Thomas Wouters477c8d52006-05-27 19:21:47 +00001088Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001089split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001090{
Christian Heimes895627f2007-12-08 17:28:33 +00001091 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001092 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001093 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001094 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001095
1096 if (list == NULL)
1097 return NULL;
1098
Thomas Wouters477c8d52006-05-27 19:21:47 +00001099 i = j = 0;
1100 while ((j < len) && (maxcount-- > 0)) {
1101 for(; j<len; j++) {
1102 /* I found that using memchr makes no difference */
1103 if (s[j] == ch) {
1104 SPLIT_ADD(s, i, j);
1105 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001106 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001107 }
1108 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001109 }
Christian Heimes895627f2007-12-08 17:28:33 +00001110 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1111 /* ch not in self, so just use self as list[0] */
1112 Py_INCREF(self);
1113 PyList_SET_ITEM(list, 0, (PyObject *)self);
1114 count++;
1115 }
1116 else if (i <= len) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001117 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001118 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001119 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001120 return list;
1121
1122 onError:
1123 Py_DECREF(list);
1124 return NULL;
1125}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001126
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001127PyDoc_STRVAR(split__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001128"B.split([sep[, maxsplit]]) -> list of bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001129\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001130Return a list of the sections in B, using sep as the delimiter.\n\
1131If sep is not given, B is split on ASCII whitespace characters\n\
1132(space, tab, return, newline, formfeed, vertical tab).\n\
1133If maxsplit is given, at most maxsplit splits are done.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001134
1135static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001136string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001137{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001138 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001139 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001140 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001141 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001142 PyObject *list, *str, *subobj = Py_None;
1143#ifdef USE_FAST
1144 Py_ssize_t pos;
1145#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001146
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001147 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001148 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001149 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001150 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001151 if (subobj == Py_None)
Christian Heimes895627f2007-12-08 17:28:33 +00001152 return split_whitespace(self, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001153 if (_getbuffer(subobj, &vsub) < 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001154 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001155 sub = vsub.buf;
1156 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001157
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001158 if (n == 0) {
1159 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001160 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001161 return NULL;
1162 }
Christian Heimes895627f2007-12-08 17:28:33 +00001163 else if (n == 1)
1164 return split_char(self, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001165
Thomas Wouters477c8d52006-05-27 19:21:47 +00001166 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001167 if (list == NULL) {
1168 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001169 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001170 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001171
Thomas Wouters477c8d52006-05-27 19:21:47 +00001172#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001173 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001174 while (maxsplit-- > 0) {
1175 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1176 if (pos < 0)
1177 break;
1178 j = i+pos;
1179 SPLIT_ADD(s, i, j);
1180 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001181 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001182#else
1183 i = j = 0;
1184 while ((j+n <= len) && (maxsplit-- > 0)) {
1185 for (; j+n <= len; j++) {
1186 if (Py_STRING_MATCH(s, j, sub, n)) {
1187 SPLIT_ADD(s, i, j);
1188 i = j = j + n;
1189 break;
1190 }
1191 }
1192 }
1193#endif
1194 SPLIT_ADD(s, i, len);
1195 FIX_PREALLOC_SIZE(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001196 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001197 return list;
1198
Thomas Wouters477c8d52006-05-27 19:21:47 +00001199 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001200 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001201 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001202 return NULL;
1203}
1204
Thomas Wouters477c8d52006-05-27 19:21:47 +00001205PyDoc_STRVAR(partition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001206"B.partition(sep) -> (head, sep, tail)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001207\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001208Searches for the separator sep in B, and returns the part before it,\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001209the separator itself, and the part after it. If the separator is not\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001210found, returns B and two empty bytes objects.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001211
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001212static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001213string_partition(PyStringObject *self, PyObject *sep_obj)
1214{
1215 const char *sep;
1216 Py_ssize_t sep_len;
1217
1218 if (PyString_Check(sep_obj)) {
1219 sep = PyString_AS_STRING(sep_obj);
1220 sep_len = PyString_GET_SIZE(sep_obj);
1221 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001222 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1223 return NULL;
1224
1225 return stringlib_partition(
1226 (PyObject*) self,
1227 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1228 sep_obj, sep, sep_len
1229 );
1230}
1231
1232PyDoc_STRVAR(rpartition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001233"B.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001234\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001235Searches for the separator sep in B, starting at the end of B,\n\
1236and returns the part before it, the separator itself, and the\n\
1237part after it. If the separator is not found, returns two empty\n\
1238bytes objects and B.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001239
1240static PyObject *
1241string_rpartition(PyStringObject *self, PyObject *sep_obj)
1242{
1243 const char *sep;
1244 Py_ssize_t sep_len;
1245
1246 if (PyString_Check(sep_obj)) {
1247 sep = PyString_AS_STRING(sep_obj);
1248 sep_len = PyString_GET_SIZE(sep_obj);
1249 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001250 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1251 return NULL;
1252
1253 return stringlib_rpartition(
1254 (PyObject*) self,
1255 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1256 sep_obj, sep, sep_len
1257 );
1258}
1259
1260Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001261rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001262{
Christian Heimes895627f2007-12-08 17:28:33 +00001263 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001264 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001265 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001266 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001267
1268 if (list == NULL)
1269 return NULL;
1270
Thomas Wouters477c8d52006-05-27 19:21:47 +00001271 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001272
Thomas Wouters477c8d52006-05-27 19:21:47 +00001273 while (maxsplit-- > 0) {
1274 RSKIP_SPACE(s, i);
1275 if (i<0) break;
1276 j = i; i--;
1277 RSKIP_NONSPACE(s, i);
Christian Heimes895627f2007-12-08 17:28:33 +00001278 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1279 /* No whitespace in self, so just use it as list[0] */
1280 Py_INCREF(self);
1281 PyList_SET_ITEM(list, 0, (PyObject *)self);
1282 count++;
1283 break;
1284 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001285 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001286 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001287 if (i >= 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001288 /* Only occurs when maxsplit was reached. Skip any remaining
1289 whitespace and copy to beginning of string. */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001290 RSKIP_SPACE(s, i);
1291 if (i >= 0)
1292 SPLIT_ADD(s, 0, i + 1);
1293
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001294 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001295 FIX_PREALLOC_SIZE(list);
1296 if (PyList_Reverse(list) < 0)
1297 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001298 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001299 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001300 Py_DECREF(list);
1301 return NULL;
1302}
1303
Thomas Wouters477c8d52006-05-27 19:21:47 +00001304Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001305rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001306{
Christian Heimes895627f2007-12-08 17:28:33 +00001307 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001308 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001309 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001310 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001311
1312 if (list == NULL)
1313 return NULL;
1314
Thomas Wouters477c8d52006-05-27 19:21:47 +00001315 i = j = len - 1;
1316 while ((i >= 0) && (maxcount-- > 0)) {
1317 for (; i >= 0; i--) {
1318 if (s[i] == ch) {
1319 SPLIT_ADD(s, i + 1, j + 1);
1320 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001321 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001322 }
1323 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001324 }
Christian Heimes895627f2007-12-08 17:28:33 +00001325 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1326 /* ch not in self, so just use self as list[0] */
1327 Py_INCREF(self);
1328 PyList_SET_ITEM(list, 0, (PyObject *)self);
1329 count++;
1330 }
1331 else if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001332 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001333 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001334 FIX_PREALLOC_SIZE(list);
1335 if (PyList_Reverse(list) < 0)
1336 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001337 return list;
1338
1339 onError:
1340 Py_DECREF(list);
1341 return NULL;
1342}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001343
1344PyDoc_STRVAR(rsplit__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001345"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001346\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001347Return a list of the sections in B, using sep as the delimiter,\n\
1348starting at the end of B and working to the front.\n\
1349If sep is not given, B is split on ASCII whitespace characters\n\
1350(space, tab, return, newline, formfeed, vertical tab).\n\
1351If maxsplit is given, at most maxsplit splits are done.");
1352
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001353
1354static PyObject *
1355string_rsplit(PyStringObject *self, PyObject *args)
1356{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001357 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001358 Py_ssize_t maxsplit = -1, count=0;
Christian Heimes895627f2007-12-08 17:28:33 +00001359 const char *s, *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001360 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001361 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001362
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001363 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001364 return NULL;
1365 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001366 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001367 if (subobj == Py_None)
Christian Heimes895627f2007-12-08 17:28:33 +00001368 return rsplit_whitespace(self, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001369 if (_getbuffer(subobj, &vsub) < 0)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001370 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001371 sub = vsub.buf;
1372 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001373
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001374 if (n == 0) {
1375 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001376 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001377 return NULL;
1378 }
Christian Heimes895627f2007-12-08 17:28:33 +00001379 else if (n == 1)
1380 return rsplit_char(self, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001381
Thomas Wouters477c8d52006-05-27 19:21:47 +00001382 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001383 if (list == NULL) {
1384 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001385 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001386 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001387
1388 j = len;
1389 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001390
Christian Heimes895627f2007-12-08 17:28:33 +00001391 s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001392 while ( (i >= 0) && (maxsplit-- > 0) ) {
1393 for (; i>=0; i--) {
1394 if (Py_STRING_MATCH(s, i, sub, n)) {
1395 SPLIT_ADD(s, i + n, j);
1396 j = i;
1397 i -= n;
1398 break;
1399 }
1400 }
1401 }
1402 SPLIT_ADD(s, 0, j);
1403 FIX_PREALLOC_SIZE(list);
1404 if (PyList_Reverse(list) < 0)
1405 goto onError;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001406 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001407 return list;
1408
Thomas Wouters477c8d52006-05-27 19:21:47 +00001409onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001410 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001411 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001412 return NULL;
1413}
1414
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001415#undef SPLIT_ADD
1416#undef MAX_PREALLOC
1417#undef PREALLOC_SIZE
1418
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001420PyDoc_STRVAR(join__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001421"B.join(iterable_of_bytes) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001423Concatenates any number of bytes objects, with B in between each pair.\n\
1424Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425
1426static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00001427string_join(PyObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001428{
1429 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001430 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001431 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001432 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001433 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001434 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001435 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001436 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437
Tim Peters19fe14e2001-01-19 03:03:47 +00001438 seq = PySequence_Fast(orig, "");
1439 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001440 return NULL;
1441 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001442
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001443 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001444 if (seqlen == 0) {
1445 Py_DECREF(seq);
1446 return PyString_FromString("");
1447 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001448 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001449 item = PySequence_Fast_GET_ITEM(seq, 0);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001450 if (PyString_CheckExact(item)) {
Raymond Hettinger674f2412004-08-23 23:23:54 +00001451 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001452 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001453 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001454 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001456
Raymond Hettinger674f2412004-08-23 23:23:54 +00001457 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001458 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001459 * Do a pre-pass to figure out the total amount of space we'll
Guido van Rossum98297ee2007-11-06 21:34:58 +00001460 * need (sz), and see whether all argument are bytes.
Tim Peters19fe14e2001-01-19 03:03:47 +00001461 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001462 /* XXX Shouldn't we use _getbuffer() on these items instead? */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001463 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001464 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001465 item = PySequence_Fast_GET_ITEM(seq, i);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001466 if (!PyString_Check(item) && !PyBytes_Check(item)) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001467 PyErr_Format(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001468 "sequence item %zd: expected bytes,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001469 " %.80s found",
Christian Heimes90aa7642007-12-19 02:45:37 +00001470 i, Py_TYPE(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001471 Py_DECREF(seq);
1472 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001473 }
Christian Heimes90aa7642007-12-19 02:45:37 +00001474 sz += Py_SIZE(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001475 if (i != 0)
1476 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001477 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001478 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001479 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001480 Py_DECREF(seq);
1481 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001482 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001483 }
1484
1485 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001486 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001487 if (res == NULL) {
1488 Py_DECREF(seq);
1489 return NULL;
1490 }
1491
1492 /* Catenate everything. */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001493 /* I'm not worried about a PyBytes item growing because there's
1494 nowhere in this function where we release the GIL. */
Tim Peters19fe14e2001-01-19 03:03:47 +00001495 p = PyString_AS_STRING(res);
1496 for (i = 0; i < seqlen; ++i) {
1497 size_t n;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001498 char *q;
1499 if (i) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001500 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001501 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001502 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001503 item = PySequence_Fast_GET_ITEM(seq, i);
Christian Heimes90aa7642007-12-19 02:45:37 +00001504 n = Py_SIZE(item);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001505 if (PyString_Check(item))
1506 q = PyString_AS_STRING(item);
1507 else
1508 q = PyBytes_AS_STRING(item);
1509 Py_MEMCPY(p, q, n);
1510 p += n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001512
Jeremy Hylton49048292000-07-11 03:28:17 +00001513 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001514 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001515}
1516
Tim Peters52e155e2001-06-16 05:42:57 +00001517PyObject *
1518_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001519{
Tim Petersa7259592001-06-16 05:11:17 +00001520 assert(sep != NULL && PyString_Check(sep));
1521 assert(x != NULL);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001522 return string_join(sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001523}
1524
Thomas Wouters477c8d52006-05-27 19:21:47 +00001525Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001526string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001527{
1528 if (*end > len)
1529 *end = len;
1530 else if (*end < 0)
1531 *end += len;
1532 if (*end < 0)
1533 *end = 0;
1534 if (*start < 0)
1535 *start += len;
1536 if (*start < 0)
1537 *start = 0;
1538}
1539
Thomas Wouters477c8d52006-05-27 19:21:47 +00001540Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001541string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001543 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001544 const char *sub;
1545 Py_ssize_t sub_len;
1546 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes9cd17752007-11-18 19:35:23 +00001547 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001548
Christian Heimes9cd17752007-11-18 19:35:23 +00001549 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1550 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001551 return -2;
Christian Heimes9cd17752007-11-18 19:35:23 +00001552 /* To support None in "start" and "end" arguments, meaning
1553 the same as if they were not passed.
1554 */
1555 if (obj_start != Py_None)
1556 if (!_PyEval_SliceIndex(obj_start, &start))
1557 return -2;
1558 if (obj_end != Py_None)
1559 if (!_PyEval_SliceIndex(obj_end, &end))
1560 return -2;
1561
Guido van Rossum4c08d552000-03-10 22:55:18 +00001562 if (PyString_Check(subobj)) {
1563 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001564 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001565 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001566 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001567 /* XXX - the "expected a character buffer object" is pretty
1568 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001569 return -2;
1570
Thomas Wouters477c8d52006-05-27 19:21:47 +00001571 if (dir > 0)
1572 return stringlib_find_slice(
1573 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1574 sub, sub_len, start, end);
1575 else
1576 return stringlib_rfind_slice(
1577 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1578 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579}
1580
1581
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001582PyDoc_STRVAR(find__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001583"B.find(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001584\n\
1585Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001586such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587arguments start and end are interpreted as in slice notation.\n\
1588\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001589Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590
1591static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001592string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001593{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001594 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595 if (result == -2)
1596 return NULL;
Christian Heimes217cfd12007-12-02 14:31:20 +00001597 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598}
1599
1600
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001601PyDoc_STRVAR(index__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001602"B.index(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001604Like B.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001605
1606static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001607string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001609 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001610 if (result == -2)
1611 return NULL;
1612 if (result == -1) {
1613 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001614 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 return NULL;
1616 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001617 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618}
1619
1620
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001621PyDoc_STRVAR(rfind__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001622"B.rfind(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001624Return the highest index in B where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001625such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626arguments start and end are interpreted as in slice notation.\n\
1627\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001628Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629
1630static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001631string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001633 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001634 if (result == -2)
1635 return NULL;
Christian Heimes217cfd12007-12-02 14:31:20 +00001636 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001637}
1638
1639
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001640PyDoc_STRVAR(rindex__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001641"B.rindex(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001642\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001643Like B.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001644
1645static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001646string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001647{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001648 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001649 if (result == -2)
1650 return NULL;
1651 if (result == -1) {
1652 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001653 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654 return NULL;
1655 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001656 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001657}
1658
1659
Thomas Wouters477c8d52006-05-27 19:21:47 +00001660Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001661do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1662{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001663 Py_buffer vsep;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001664 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001665 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001666 char *sep;
1667 Py_ssize_t seplen;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001668 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001669
Guido van Rossum98297ee2007-11-06 21:34:58 +00001670 if (_getbuffer(sepobj, &vsep) < 0)
1671 return NULL;
1672 sep = vsep.buf;
1673 seplen = vsep.len;
1674
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001675 i = 0;
1676 if (striptype != RIGHTSTRIP) {
1677 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1678 i++;
1679 }
1680 }
1681
1682 j = len;
1683 if (striptype != LEFTSTRIP) {
1684 do {
1685 j--;
1686 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1687 j++;
1688 }
1689
Guido van Rossum98297ee2007-11-06 21:34:58 +00001690 PyObject_ReleaseBuffer(sepobj, &vsep);
1691
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001692 if (i == 0 && j == len && PyString_CheckExact(self)) {
1693 Py_INCREF(self);
1694 return (PyObject*)self;
1695 }
1696 else
1697 return PyString_FromStringAndSize(s+i, j-i);
1698}
1699
1700
Thomas Wouters477c8d52006-05-27 19:21:47 +00001701Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001702do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001703{
1704 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001705 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001706
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707 i = 0;
1708 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001709 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001710 i++;
1711 }
1712 }
1713
1714 j = len;
1715 if (striptype != LEFTSTRIP) {
1716 do {
1717 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001718 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719 j++;
1720 }
1721
Tim Peters8fa5dd02001-09-12 02:18:30 +00001722 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723 Py_INCREF(self);
1724 return (PyObject*)self;
1725 }
1726 else
1727 return PyString_FromStringAndSize(s+i, j-i);
1728}
1729
1730
Thomas Wouters477c8d52006-05-27 19:21:47 +00001731Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001732do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1733{
1734 PyObject *sep = NULL;
1735
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001736 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001737 return NULL;
1738
1739 if (sep != NULL && sep != Py_None) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001740 return do_xstrip(self, striptype, sep);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001741 }
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001742 return do_strip(self, striptype);
1743}
1744
1745
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001746PyDoc_STRVAR(strip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001747"B.strip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001749Strip leading and trailing bytes contained in the argument.\n\
1750If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001752string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001753{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001754 if (PyTuple_GET_SIZE(args) == 0)
1755 return do_strip(self, BOTHSTRIP); /* Common case */
1756 else
1757 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758}
1759
1760
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001761PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001762"B.lstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001763\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001764Strip leading bytes contained in the argument.\n\
1765If the argument is omitted, strip leading ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001767string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001768{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001769 if (PyTuple_GET_SIZE(args) == 0)
1770 return do_strip(self, LEFTSTRIP); /* Common case */
1771 else
1772 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773}
1774
1775
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001776PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001777"B.rstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001779Strip trailing bytes contained in the argument.\n\
1780If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001782string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001783{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001784 if (PyTuple_GET_SIZE(args) == 0)
1785 return do_strip(self, RIGHTSTRIP); /* Common case */
1786 else
1787 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001788}
1789
1790
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001791PyDoc_STRVAR(count__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001792"B.count(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001794Return the number of non-overlapping occurrences of substring sub in\n\
1795string S[start:end]. Optional arguments start and end are interpreted\n\
1796as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797
1798static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001799string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001801 PyObject *sub_obj;
1802 const char *str = PyString_AS_STRING(self), *sub;
1803 Py_ssize_t sub_len;
1804 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805
Thomas Wouters477c8d52006-05-27 19:21:47 +00001806 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1807 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001809
Thomas Wouters477c8d52006-05-27 19:21:47 +00001810 if (PyString_Check(sub_obj)) {
1811 sub = PyString_AS_STRING(sub_obj);
1812 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001813 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001814 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001815 return NULL;
1816
Thomas Wouters477c8d52006-05-27 19:21:47 +00001817 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001818
Christian Heimes217cfd12007-12-02 14:31:20 +00001819 return PyLong_FromSsize_t(
Thomas Wouters477c8d52006-05-27 19:21:47 +00001820 stringlib_count(str + start, end - start, sub, sub_len)
1821 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822}
1823
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001825PyDoc_STRVAR(translate__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001826"B.translate(table[, deletechars]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001828Return a copy of B, where all characters occurring in the\n\
1829optional argument deletechars are removed, and the remaining\n\
1830characters have been mapped through the given translation\n\
1831table, which must be a bytes object of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832
1833static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001834string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001836 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001837 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001838 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001840 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001841 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842 PyObject *result;
1843 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001844 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001845
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00001846 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001847 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001849
1850 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00001851 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001852 tablen = PyString_GET_SIZE(tableobj);
1853 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001854 else if (tableobj == Py_None) {
1855 table = NULL;
1856 tablen = 256;
1857 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001858 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001859 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001860
Martin v. Löwis00b61272002-12-12 20:03:19 +00001861 if (tablen != 256) {
1862 PyErr_SetString(PyExc_ValueError,
1863 "translation table must be 256 characters long");
1864 return NULL;
1865 }
1866
Guido van Rossum4c08d552000-03-10 22:55:18 +00001867 if (delobj != NULL) {
1868 if (PyString_Check(delobj)) {
1869 del_table = PyString_AS_STRING(delobj);
1870 dellen = PyString_GET_SIZE(delobj);
1871 }
1872 else if (PyUnicode_Check(delobj)) {
1873 PyErr_SetString(PyExc_TypeError,
1874 "deletions are implemented differently for unicode");
1875 return NULL;
1876 }
1877 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1878 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001879 }
1880 else {
1881 del_table = NULL;
1882 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001883 }
1884
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001885 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001886 result = PyString_FromStringAndSize((char *)NULL, inlen);
1887 if (result == NULL)
1888 return NULL;
1889 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001890 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891
Guido van Rossumd8faa362007-04-27 19:54:29 +00001892 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893 /* If no deletions are required, use faster code */
1894 for (i = inlen; --i >= 0; ) {
1895 c = Py_CHARMASK(*input++);
1896 if (Py_CHARMASK((*output++ = table[c])) != c)
1897 changed = 1;
1898 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001899 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900 return result;
1901 Py_DECREF(result);
1902 Py_INCREF(input_obj);
1903 return input_obj;
1904 }
1905
Guido van Rossumd8faa362007-04-27 19:54:29 +00001906 if (table == NULL) {
1907 for (i = 0; i < 256; i++)
1908 trans_table[i] = Py_CHARMASK(i);
1909 } else {
1910 for (i = 0; i < 256; i++)
1911 trans_table[i] = Py_CHARMASK(table[i]);
1912 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913
1914 for (i = 0; i < dellen; i++)
1915 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1916
1917 for (i = inlen; --i >= 0; ) {
1918 c = Py_CHARMASK(*input++);
1919 if (trans_table[c] != -1)
1920 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1921 continue;
1922 changed = 1;
1923 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001924 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925 Py_DECREF(result);
1926 Py_INCREF(input_obj);
1927 return input_obj;
1928 }
1929 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001930 if (inlen > 0)
1931 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932 return result;
1933}
1934
1935
Thomas Wouters477c8d52006-05-27 19:21:47 +00001936#define FORWARD 1
1937#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938
Thomas Wouters477c8d52006-05-27 19:21:47 +00001939/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940
Thomas Wouters477c8d52006-05-27 19:21:47 +00001941#define findchar(target, target_len, c) \
1942 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943
Thomas Wouters477c8d52006-05-27 19:21:47 +00001944/* String ops must return a string. */
1945/* If the object is subclass of string, create a copy */
1946Py_LOCAL(PyStringObject *)
1947return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001949 if (PyString_CheckExact(self)) {
1950 Py_INCREF(self);
1951 return self;
1952 }
1953 return (PyStringObject *)PyString_FromStringAndSize(
1954 PyString_AS_STRING(self),
1955 PyString_GET_SIZE(self));
1956}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957
Thomas Wouters477c8d52006-05-27 19:21:47 +00001958Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001959countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001960{
1961 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001962 const char *start=target;
1963 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964
Thomas Wouters477c8d52006-05-27 19:21:47 +00001965 while ( (start=findchar(start, end-start, c)) != NULL ) {
1966 count++;
1967 if (count >= maxcount)
1968 break;
1969 start += 1;
1970 }
1971 return count;
1972}
1973
1974Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001975findstring(const char *target, Py_ssize_t target_len,
1976 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00001977 Py_ssize_t start,
1978 Py_ssize_t end,
1979 int direction)
1980{
1981 if (start < 0) {
1982 start += target_len;
1983 if (start < 0)
1984 start = 0;
1985 }
1986 if (end > target_len) {
1987 end = target_len;
1988 } else if (end < 0) {
1989 end += target_len;
1990 if (end < 0)
1991 end = 0;
1992 }
1993
1994 /* zero-length substrings always match at the first attempt */
1995 if (pattern_len == 0)
1996 return (direction > 0) ? start : end;
1997
1998 end -= pattern_len;
1999
2000 if (direction < 0) {
2001 for (; end >= start; end--)
2002 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2003 return end;
2004 } else {
2005 for (; start <= end; start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002006 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
Thomas Wouters477c8d52006-05-27 19:21:47 +00002007 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008 }
2009 return -1;
2010}
2011
Thomas Wouters477c8d52006-05-27 19:21:47 +00002012Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002013countstring(const char *target, Py_ssize_t target_len,
2014 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002015 Py_ssize_t start,
2016 Py_ssize_t end,
2017 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002019 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002020
Thomas Wouters477c8d52006-05-27 19:21:47 +00002021 if (start < 0) {
2022 start += target_len;
2023 if (start < 0)
2024 start = 0;
2025 }
2026 if (end > target_len) {
2027 end = target_len;
2028 } else if (end < 0) {
2029 end += target_len;
2030 if (end < 0)
2031 end = 0;
2032 }
2033
2034 /* zero-length substrings match everywhere */
2035 if (pattern_len == 0 || maxcount == 0) {
2036 if (target_len+1 < maxcount)
2037 return target_len+1;
2038 return maxcount;
2039 }
2040
2041 end -= pattern_len;
2042 if (direction < 0) {
2043 for (; (end >= start); end--)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002044 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002045 count++;
2046 if (--maxcount <= 0) break;
2047 end -= pattern_len-1;
2048 }
2049 } else {
2050 for (; (start <= end); start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002051 if (Py_STRING_MATCH(target, start,
2052 pattern, pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002053 count++;
2054 if (--maxcount <= 0)
2055 break;
2056 start += pattern_len-1;
2057 }
2058 }
2059 return count;
2060}
2061
2062
2063/* Algorithms for different cases of string replacement */
2064
2065/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2066Py_LOCAL(PyStringObject *)
2067replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002068 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002069 Py_ssize_t maxcount)
2070{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002071 char *self_s, *result_s;
2072 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002073 Py_ssize_t count, i, product;
2074 PyStringObject *result;
2075
2076 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002077
Thomas Wouters477c8d52006-05-27 19:21:47 +00002078 /* 1 at the end plus 1 after every character */
2079 count = self_len+1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002080 if (maxcount < count)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002081 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002082
Thomas Wouters477c8d52006-05-27 19:21:47 +00002083 /* Check for overflow */
2084 /* result_len = count * to_len + self_len; */
2085 product = count * to_len;
2086 if (product / to_len != count) {
2087 PyErr_SetString(PyExc_OverflowError,
2088 "replace string is too long");
2089 return NULL;
2090 }
2091 result_len = product + self_len;
2092 if (result_len < 0) {
2093 PyErr_SetString(PyExc_OverflowError,
2094 "replace string is too long");
2095 return NULL;
2096 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002097
Thomas Wouters477c8d52006-05-27 19:21:47 +00002098 if (! (result = (PyStringObject *)
2099 PyString_FromStringAndSize(NULL, result_len)) )
2100 return NULL;
2101
2102 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002103 result_s = PyString_AS_STRING(result);
2104
2105 /* TODO: special case single character, which doesn't need memcpy */
2106
2107 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002108 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002109 result_s += to_len;
2110 count -= 1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002111
Thomas Wouters477c8d52006-05-27 19:21:47 +00002112 for (i=0; i<count; i++) {
2113 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002114 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002115 result_s += to_len;
2116 }
2117
2118 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002119 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002120
2121 return result;
2122}
2123
2124/* Special case for deleting a single character */
2125/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2126Py_LOCAL(PyStringObject *)
2127replace_delete_single_character(PyStringObject *self,
2128 char from_c, Py_ssize_t maxcount)
2129{
2130 char *self_s, *result_s;
2131 char *start, *next, *end;
2132 Py_ssize_t self_len, result_len;
2133 Py_ssize_t count;
2134 PyStringObject *result;
2135
2136 self_len = PyString_GET_SIZE(self);
2137 self_s = PyString_AS_STRING(self);
2138
2139 count = countchar(self_s, self_len, from_c, maxcount);
2140 if (count == 0) {
2141 return return_self(self);
2142 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002143
Thomas Wouters477c8d52006-05-27 19:21:47 +00002144 result_len = self_len - count; /* from_len == 1 */
2145 assert(result_len>=0);
2146
2147 if ( (result = (PyStringObject *)
2148 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2149 return NULL;
2150 result_s = PyString_AS_STRING(result);
2151
2152 start = self_s;
2153 end = self_s + self_len;
2154 while (count-- > 0) {
2155 next = findchar(start, end-start, from_c);
2156 if (next == NULL)
2157 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002158 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002159 result_s += (next-start);
2160 start = next+1;
2161 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002162 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002163
Thomas Wouters477c8d52006-05-27 19:21:47 +00002164 return result;
2165}
2166
2167/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2168
2169Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002170replace_delete_substring(PyStringObject *self,
2171 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002172 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002173 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002174 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002175 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002176 Py_ssize_t count, offset;
2177 PyStringObject *result;
2178
2179 self_len = PyString_GET_SIZE(self);
2180 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002181
2182 count = countstring(self_s, self_len,
2183 from_s, from_len,
2184 0, self_len, 1,
2185 maxcount);
2186
2187 if (count == 0) {
2188 /* no matches */
2189 return return_self(self);
2190 }
2191
2192 result_len = self_len - (count * from_len);
2193 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002194
Thomas Wouters477c8d52006-05-27 19:21:47 +00002195 if ( (result = (PyStringObject *)
2196 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2197 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002198
Thomas Wouters477c8d52006-05-27 19:21:47 +00002199 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002200
Thomas Wouters477c8d52006-05-27 19:21:47 +00002201 start = self_s;
2202 end = self_s + self_len;
2203 while (count-- > 0) {
2204 offset = findstring(start, end-start,
2205 from_s, from_len,
2206 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207 if (offset == -1)
2208 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002209 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002210
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002211 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002212
Thomas Wouters477c8d52006-05-27 19:21:47 +00002213 result_s += (next-start);
2214 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002215 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002216 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002217 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218}
2219
Thomas Wouters477c8d52006-05-27 19:21:47 +00002220/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2221Py_LOCAL(PyStringObject *)
2222replace_single_character_in_place(PyStringObject *self,
2223 char from_c, char to_c,
2224 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002226 char *self_s, *result_s, *start, *end, *next;
2227 Py_ssize_t self_len;
2228 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002229
Thomas Wouters477c8d52006-05-27 19:21:47 +00002230 /* The result string will be the same size */
2231 self_s = PyString_AS_STRING(self);
2232 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002233
Thomas Wouters477c8d52006-05-27 19:21:47 +00002234 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002235
Thomas Wouters477c8d52006-05-27 19:21:47 +00002236 if (next == NULL) {
2237 /* No matches; return the original string */
2238 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002240
Thomas Wouters477c8d52006-05-27 19:21:47 +00002241 /* Need to make a new string */
2242 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2243 if (result == NULL)
2244 return NULL;
2245 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002246 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002247
Thomas Wouters477c8d52006-05-27 19:21:47 +00002248 /* change everything in-place, starting with this one */
2249 start = result_s + (next-self_s);
2250 *start = to_c;
2251 start++;
2252 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002253
Thomas Wouters477c8d52006-05-27 19:21:47 +00002254 while (--maxcount > 0) {
2255 next = findchar(start, end-start, from_c);
2256 if (next == NULL)
2257 break;
2258 *next = to_c;
2259 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002260 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002261
Thomas Wouters477c8d52006-05-27 19:21:47 +00002262 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263}
2264
Thomas Wouters477c8d52006-05-27 19:21:47 +00002265/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2266Py_LOCAL(PyStringObject *)
2267replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002268 const char *from_s, Py_ssize_t from_len,
2269 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002270 Py_ssize_t maxcount)
2271{
2272 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002273 char *self_s;
2274 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002275 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002276
Thomas Wouters477c8d52006-05-27 19:21:47 +00002277 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002278
Thomas Wouters477c8d52006-05-27 19:21:47 +00002279 self_s = PyString_AS_STRING(self);
2280 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002281
Thomas Wouters477c8d52006-05-27 19:21:47 +00002282 offset = findstring(self_s, self_len,
2283 from_s, from_len,
2284 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002285 if (offset == -1) {
2286 /* No matches; return the original string */
2287 return return_self(self);
2288 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002289
Thomas Wouters477c8d52006-05-27 19:21:47 +00002290 /* Need to make a new string */
2291 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2292 if (result == NULL)
2293 return NULL;
2294 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002295 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002296
Thomas Wouters477c8d52006-05-27 19:21:47 +00002297 /* change everything in-place, starting with this one */
2298 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002299 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002300 start += from_len;
2301 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002302
Thomas Wouters477c8d52006-05-27 19:21:47 +00002303 while ( --maxcount > 0) {
2304 offset = findstring(start, end-start,
2305 from_s, from_len,
2306 0, end-start, FORWARD);
2307 if (offset==-1)
2308 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002309 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002310 start += offset+from_len;
2311 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002312
Thomas Wouters477c8d52006-05-27 19:21:47 +00002313 return result;
2314}
2315
2316/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2317Py_LOCAL(PyStringObject *)
2318replace_single_character(PyStringObject *self,
2319 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002320 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002321 Py_ssize_t maxcount)
2322{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002323 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002324 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002325 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002326 Py_ssize_t count, product;
2327 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002328
Thomas Wouters477c8d52006-05-27 19:21:47 +00002329 self_s = PyString_AS_STRING(self);
2330 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002331
Thomas Wouters477c8d52006-05-27 19:21:47 +00002332 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002333 if (count == 0) {
2334 /* no matches, return unchanged */
2335 return return_self(self);
2336 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002337
Thomas Wouters477c8d52006-05-27 19:21:47 +00002338 /* use the difference between current and new, hence the "-1" */
2339 /* result_len = self_len + count * (to_len-1) */
2340 product = count * (to_len-1);
2341 if (product / (to_len-1) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002342 PyErr_SetString(PyExc_OverflowError,
2343 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002344 return NULL;
2345 }
2346 result_len = self_len + product;
2347 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002348 PyErr_SetString(PyExc_OverflowError,
2349 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002350 return NULL;
2351 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002352
Thomas Wouters477c8d52006-05-27 19:21:47 +00002353 if ( (result = (PyStringObject *)
2354 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2355 return NULL;
2356 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002357
Thomas Wouters477c8d52006-05-27 19:21:47 +00002358 start = self_s;
2359 end = self_s + self_len;
2360 while (count-- > 0) {
2361 next = findchar(start, end-start, from_c);
Guido van Rossumae404e22007-10-26 21:46:44 +00002362 if (next == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002363 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002364
Thomas Wouters477c8d52006-05-27 19:21:47 +00002365 if (next == start) {
2366 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002367 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002368 result_s += to_len;
2369 start += 1;
2370 } else {
2371 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002372 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002373 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002374 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002375 result_s += to_len;
2376 start = next+1;
2377 }
2378 }
2379 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002380 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002381
Thomas Wouters477c8d52006-05-27 19:21:47 +00002382 return result;
2383}
2384
2385/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2386Py_LOCAL(PyStringObject *)
2387replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002388 const char *from_s, Py_ssize_t from_len,
2389 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002390 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002391 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002392 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002393 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002394 Py_ssize_t count, offset, product;
2395 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002396
Thomas Wouters477c8d52006-05-27 19:21:47 +00002397 self_s = PyString_AS_STRING(self);
2398 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002399
Thomas Wouters477c8d52006-05-27 19:21:47 +00002400 count = countstring(self_s, self_len,
2401 from_s, from_len,
2402 0, self_len, FORWARD, maxcount);
2403 if (count == 0) {
2404 /* no matches, return unchanged */
2405 return return_self(self);
2406 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002407
Thomas Wouters477c8d52006-05-27 19:21:47 +00002408 /* Check for overflow */
2409 /* result_len = self_len + count * (to_len-from_len) */
2410 product = count * (to_len-from_len);
2411 if (product / (to_len-from_len) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002412 PyErr_SetString(PyExc_OverflowError,
2413 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002414 return NULL;
2415 }
2416 result_len = self_len + product;
2417 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002418 PyErr_SetString(PyExc_OverflowError,
2419 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002420 return NULL;
2421 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002422
Thomas Wouters477c8d52006-05-27 19:21:47 +00002423 if ( (result = (PyStringObject *)
2424 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2425 return NULL;
2426 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002427
Thomas Wouters477c8d52006-05-27 19:21:47 +00002428 start = self_s;
2429 end = self_s + self_len;
2430 while (count-- > 0) {
2431 offset = findstring(start, end-start,
2432 from_s, from_len,
2433 0, end-start, FORWARD);
2434 if (offset == -1)
2435 break;
2436 next = start+offset;
2437 if (next == start) {
2438 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002439 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002440 result_s += to_len;
2441 start += from_len;
2442 } else {
2443 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002444 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002445 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002446 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002447 result_s += to_len;
2448 start = next+from_len;
2449 }
2450 }
2451 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002452 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002453
Thomas Wouters477c8d52006-05-27 19:21:47 +00002454 return result;
2455}
2456
2457
2458Py_LOCAL(PyStringObject *)
2459replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002460 const char *from_s, Py_ssize_t from_len,
2461 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002462 Py_ssize_t maxcount)
2463{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002464 if (maxcount < 0) {
2465 maxcount = PY_SSIZE_T_MAX;
2466 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2467 /* nothing to do; return the original string */
2468 return return_self(self);
2469 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002470
Thomas Wouters477c8d52006-05-27 19:21:47 +00002471 if (maxcount == 0 ||
2472 (from_len == 0 && to_len == 0)) {
2473 /* nothing to do; return the original string */
2474 return return_self(self);
2475 }
2476
2477 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002478
Thomas Wouters477c8d52006-05-27 19:21:47 +00002479 if (from_len == 0) {
2480 /* insert the 'to' string everywhere. */
2481 /* >>> "Python".replace("", ".") */
2482 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002483 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002484 }
2485
2486 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2487 /* point for an empty self string to generate a non-empty string */
2488 /* Special case so the remaining code always gets a non-empty string */
2489 if (PyString_GET_SIZE(self) == 0) {
2490 return return_self(self);
2491 }
2492
2493 if (to_len == 0) {
2494 /* delete all occurances of 'from' string */
2495 if (from_len == 1) {
2496 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002497 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002498 } else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002499 return replace_delete_substring(self, from_s,
2500 from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002501 }
2502 }
2503
2504 /* Handle special case where both strings have the same length */
2505
2506 if (from_len == to_len) {
2507 if (from_len == 1) {
2508 return replace_single_character_in_place(
2509 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002510 from_s[0],
2511 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002512 maxcount);
2513 } else {
2514 return replace_substring_in_place(
Guido van Rossum98297ee2007-11-06 21:34:58 +00002515 self, from_s, from_len, to_s, to_len,
2516 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002517 }
2518 }
2519
2520 /* Otherwise use the more generic algorithms */
2521 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002522 return replace_single_character(self, from_s[0],
2523 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002524 } else {
2525 /* len('from')>=2, len('to')>=1 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002526 return replace_substring(self, from_s, from_len, to_s, to_len,
2527 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002528 }
2529}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002530
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002531PyDoc_STRVAR(replace__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002532"B.replace(old, new[, count]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002533\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002534Return a copy of B with all occurrences of subsection\n\
Fred Draked22bb652003-10-22 02:56:40 +00002535old replaced by new. If the optional argument count is\n\
2536given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537
2538static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002539string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002540{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002541 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002542 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002543 const char *from_s, *to_s;
2544 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002545
Thomas Wouters477c8d52006-05-27 19:21:47 +00002546 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002547 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002548
Thomas Wouters477c8d52006-05-27 19:21:47 +00002549 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002550 from_s = PyString_AS_STRING(from);
2551 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002552 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002553 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002554 return NULL;
2555
Thomas Wouters477c8d52006-05-27 19:21:47 +00002556 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002557 to_s = PyString_AS_STRING(to);
2558 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002559 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002560 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002561 return NULL;
2562
Thomas Wouters477c8d52006-05-27 19:21:47 +00002563 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002564 from_s, from_len,
2565 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002566}
2567
Thomas Wouters477c8d52006-05-27 19:21:47 +00002568/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002569
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002570/* Matches the end (direction >= 0) or start (direction < 0) of self
2571 * against substr, using the start and end arguments. Returns
2572 * -1 on error, 0 if not found and 1 if found.
2573 */
2574Py_LOCAL(int)
2575_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2576 Py_ssize_t end, int direction)
2577{
2578 Py_ssize_t len = PyString_GET_SIZE(self);
2579 Py_ssize_t slen;
2580 const char* sub;
2581 const char* str;
2582
2583 if (PyString_Check(substr)) {
2584 sub = PyString_AS_STRING(substr);
2585 slen = PyString_GET_SIZE(substr);
2586 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002587 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2588 return -1;
2589 str = PyString_AS_STRING(self);
2590
2591 string_adjust_indices(&start, &end, len);
2592
2593 if (direction < 0) {
2594 /* startswith */
2595 if (start+slen > len)
2596 return 0;
2597 } else {
2598 /* endswith */
2599 if (end-start < slen || start > len)
2600 return 0;
2601
2602 if (end-slen > start)
2603 start = end - slen;
2604 }
2605 if (end-start >= slen)
2606 return ! memcmp(str+start, sub, slen);
2607 return 0;
2608}
2609
2610
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002611PyDoc_STRVAR(startswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002612"B.startswith(prefix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002613\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002614Return True if B starts with the specified prefix, False otherwise.\n\
2615With optional start, test B beginning at that position.\n\
2616With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002617prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002618
2619static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002620string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002621{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002622 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002623 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002625 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002626
Guido van Rossumc6821402000-05-08 14:08:05 +00002627 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2628 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002629 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002630 if (PyTuple_Check(subobj)) {
2631 Py_ssize_t i;
2632 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2633 result = _string_tailmatch(self,
2634 PyTuple_GET_ITEM(subobj, i),
2635 start, end, -1);
2636 if (result == -1)
2637 return NULL;
2638 else if (result) {
2639 Py_RETURN_TRUE;
2640 }
2641 }
2642 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002643 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002644 result = _string_tailmatch(self, subobj, start, end, -1);
2645 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002646 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002647 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002648 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002649}
2650
2651
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002652PyDoc_STRVAR(endswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002653"B.endswith(suffix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002654\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002655Return True if B ends with the specified suffix, False otherwise.\n\
2656With optional start, test B beginning at that position.\n\
2657With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002658suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002659
2660static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002661string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002662{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002663 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002664 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002665 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002666 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002667
Guido van Rossumc6821402000-05-08 14:08:05 +00002668 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2669 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002670 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002671 if (PyTuple_Check(subobj)) {
2672 Py_ssize_t i;
2673 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2674 result = _string_tailmatch(self,
2675 PyTuple_GET_ITEM(subobj, i),
2676 start, end, +1);
2677 if (result == -1)
2678 return NULL;
2679 else if (result) {
2680 Py_RETURN_TRUE;
2681 }
2682 }
2683 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002684 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002685 result = _string_tailmatch(self, subobj, start, end, +1);
2686 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002687 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002688 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002689 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002690}
2691
2692
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002693PyDoc_STRVAR(decode__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002694"B.decode([encoding[, errors]]) -> object\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002695\n\
2696Decodes S using the codec registered for encoding. encoding defaults\n\
2697to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002698handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2699a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002700as well as any other name registerd with codecs.register_error that is\n\
2701able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002702
2703static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00002704string_decode(PyObject *self, PyObject *args)
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002705{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002706 const char *encoding = NULL;
2707 const char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002708
Guido van Rossum98297ee2007-11-06 21:34:58 +00002709 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2710 return NULL;
2711 if (encoding == NULL)
2712 encoding = PyUnicode_GetDefaultEncoding();
2713 return PyCodec_Decode(self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002714}
2715
2716
Guido van Rossumae404e22007-10-26 21:46:44 +00002717PyDoc_STRVAR(fromhex_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002718"bytes.fromhex(string) -> bytes\n\
Guido van Rossumae404e22007-10-26 21:46:44 +00002719\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002720Create a bytes object from a string of hexadecimal numbers.\n\
2721Spaces between two numbers are accepted.\n\
2722Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Guido van Rossumae404e22007-10-26 21:46:44 +00002723
2724static int
2725hex_digit_to_int(Py_UNICODE c)
2726{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002727 if (c >= 128)
2728 return -1;
2729 if (ISDIGIT(c))
2730 return c - '0';
2731 else {
2732 if (ISUPPER(c))
2733 c = TOLOWER(c);
2734 if (c >= 'a' && c <= 'f')
2735 return c - 'a' + 10;
2736 }
2737 return -1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002738}
2739
2740static PyObject *
2741string_fromhex(PyObject *cls, PyObject *args)
2742{
2743 PyObject *newstring, *hexobj;
2744 char *buf;
2745 Py_UNICODE *hex;
2746 Py_ssize_t hexlen, byteslen, i, j;
2747 int top, bot;
2748
2749 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2750 return NULL;
2751 assert(PyUnicode_Check(hexobj));
2752 hexlen = PyUnicode_GET_SIZE(hexobj);
2753 hex = PyUnicode_AS_UNICODE(hexobj);
2754 byteslen = hexlen/2; /* This overestimates if there are spaces */
2755 newstring = PyString_FromStringAndSize(NULL, byteslen);
2756 if (!newstring)
2757 return NULL;
2758 buf = PyString_AS_STRING(newstring);
2759 for (i = j = 0; i < hexlen; i += 2) {
2760 /* skip over spaces in the input */
2761 while (hex[i] == ' ')
2762 i++;
2763 if (i >= hexlen)
2764 break;
2765 top = hex_digit_to_int(hex[i]);
2766 bot = hex_digit_to_int(hex[i+1]);
2767 if (top == -1 || bot == -1) {
2768 PyErr_Format(PyExc_ValueError,
2769 "non-hexadecimal number found in "
2770 "fromhex() arg at position %zd", i);
2771 goto error;
2772 }
2773 buf[j++] = (top << 4) + bot;
2774 }
2775 if (_PyString_Resize(&newstring, j) < 0)
2776 goto error;
2777 return newstring;
2778
2779 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00002780 Py_XDECREF(newstring);
Guido van Rossumae404e22007-10-26 21:46:44 +00002781 return NULL;
2782}
2783
2784
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002785static PyObject *
2786string_getnewargs(PyStringObject *v)
2787{
Christian Heimes90aa7642007-12-19 02:45:37 +00002788 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002789}
2790
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002791
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002792static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002793string_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00002794 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002795 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2796 _Py_capitalize__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002797 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002798 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002799 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002800 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002801 endswith__doc__},
2802 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2803 expandtabs__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002804 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002805 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2806 fromhex_doc},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002807 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002808 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2809 _Py_isalnum__doc__},
2810 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2811 _Py_isalpha__doc__},
2812 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2813 _Py_isdigit__doc__},
2814 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2815 _Py_islower__doc__},
2816 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2817 _Py_isspace__doc__},
2818 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2819 _Py_istitle__doc__},
2820 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2821 _Py_isupper__doc__},
2822 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2823 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2824 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002825 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002826 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002827 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2828 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2829 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002830 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00002831 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2832 rpartition__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002833 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2834 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2835 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2836 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2837 splitlines__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002838 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002839 startswith__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002840 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002841 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2842 _Py_swapcase__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002843 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002844 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2845 translate__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002846 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002847 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002848 {NULL, NULL} /* sentinel */
2849};
2850
Jeremy Hylton938ace62002-07-17 16:30:39 +00002851static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002852str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2853
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002854static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002855string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002856{
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002857 PyObject *x = NULL, *it;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002858 const char *encoding = NULL;
2859 const char *errors = NULL;
2860 PyObject *new = NULL;
2861 Py_ssize_t i, size;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002862 static char *kwlist[] = {"source", "encoding", "errors", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00002863
Guido van Rossumae960af2001-08-30 03:11:59 +00002864 if (type != &PyString_Type)
2865 return str_subtype_new(type, args, kwds);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002866 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002867 &encoding, &errors))
Tim Peters6d6c1a32001-08-02 04:15:00 +00002868 return NULL;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002869 if (x == NULL) {
2870 if (encoding != NULL || errors != NULL) {
2871 PyErr_SetString(PyExc_TypeError,
2872 "encoding or errors without sequence "
2873 "argument");
2874 return NULL;
2875 }
Tim Peters6d6c1a32001-08-02 04:15:00 +00002876 return PyString_FromString("");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002877 }
2878
2879 if (PyUnicode_Check(x)) {
2880 /* Encode via the codec registry */
2881 if (encoding == NULL) {
2882 PyErr_SetString(PyExc_TypeError,
2883 "string argument without an encoding");
2884 return NULL;
2885 }
2886 new = PyCodec_Encode(x, encoding, errors);
2887 if (new == NULL)
2888 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002889 assert(PyString_Check(new));
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002890 return new;
2891 }
2892
2893 /* If it's not unicode, there can't be encoding or errors */
2894 if (encoding != NULL || errors != NULL) {
2895 PyErr_SetString(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002896 "encoding or errors without a string argument");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002897 return NULL;
2898 }
2899
Guido van Rossum98297ee2007-11-06 21:34:58 +00002900 /* Is it an int? */
2901 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2902 if (size == -1 && PyErr_Occurred()) {
2903 PyErr_Clear();
2904 }
2905 else {
2906 if (size < 0) {
2907 PyErr_SetString(PyExc_ValueError, "negative count");
2908 return NULL;
2909 }
2910 new = PyString_FromStringAndSize(NULL, size);
2911 if (new == NULL) {
2912 return NULL;
2913 }
2914 if (size > 0) {
2915 memset(((PyStringObject*)new)->ob_sval, 0, size);
2916 }
2917 return new;
2918 }
2919
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002920 /* Use the modern buffer interface */
2921 if (PyObject_CheckBuffer(x)) {
2922 Py_buffer view;
2923 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2924 return NULL;
2925 new = PyString_FromStringAndSize(NULL, view.len);
2926 if (!new)
2927 goto fail;
2928 // XXX(brett.cannon): Better way to get to internal buffer?
2929 if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
2930 &view, view.len, 'C') < 0)
2931 goto fail;
2932 PyObject_ReleaseBuffer(x, &view);
2933 return new;
2934 fail:
2935 Py_XDECREF(new);
2936 PyObject_ReleaseBuffer(x, &view);
2937 return NULL;
2938 }
2939
Guido van Rossum98297ee2007-11-06 21:34:58 +00002940 /* For iterator version, create a string object and resize as needed */
2941 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2942 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2943 input being a truly long iterator. */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002944 size = 64;
2945 new = PyString_FromStringAndSize(NULL, size);
2946 if (new == NULL)
2947 return NULL;
2948
2949 /* XXX Optimize this if the arguments is a list, tuple */
2950
2951 /* Get the iterator */
2952 it = PyObject_GetIter(x);
2953 if (it == NULL)
2954 goto error;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002955
2956 /* Run the iterator to exhaustion */
2957 for (i = 0; ; i++) {
2958 PyObject *item;
2959 Py_ssize_t value;
2960
2961 /* Get the next item */
Georg Brandl5fb8eb92007-11-24 20:42:02 +00002962 item = PyIter_Next(it);
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002963 if (item == NULL) {
Georg Brandl5fb8eb92007-11-24 20:42:02 +00002964 if (PyErr_Occurred())
2965 goto error;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002966 break;
2967 }
2968
2969 /* Interpret it as an int (__index__) */
2970 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2971 Py_DECREF(item);
2972 if (value == -1 && PyErr_Occurred())
2973 goto error;
2974
2975 /* Range check */
2976 if (value < 0 || value >= 256) {
2977 PyErr_SetString(PyExc_ValueError,
2978 "bytes must be in range(0, 256)");
2979 goto error;
2980 }
2981
2982 /* Append the byte */
2983 if (i >= size) {
2984 size *= 2;
2985 if (_PyString_Resize(&new, size) < 0)
2986 goto error;
2987 }
2988 ((PyStringObject *)new)->ob_sval[i] = value;
2989 }
2990 _PyString_Resize(&new, i);
2991
2992 /* Clean up and return success */
2993 Py_DECREF(it);
2994 return new;
2995
2996 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00002997 /* Error handling when new != NULL */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002998 Py_XDECREF(it);
2999 Py_DECREF(new);
3000 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003001}
3002
Guido van Rossumae960af2001-08-30 03:11:59 +00003003static PyObject *
3004str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3005{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003006 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003007 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003008
3009 assert(PyType_IsSubtype(type, &PyString_Type));
3010 tmp = string_new(&PyString_Type, args, kwds);
3011 if (tmp == NULL)
3012 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003013 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003014 n = PyString_GET_SIZE(tmp);
3015 pnew = type->tp_alloc(type, n);
3016 if (pnew != NULL) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003017 Py_MEMCPY(PyString_AS_STRING(pnew),
3018 PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003019 ((PyStringObject *)pnew)->ob_shash =
3020 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003021 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003022 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003023 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003024}
3025
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003026PyDoc_STRVAR(string_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00003027"bytes(iterable_of_ints) -> bytes.\n\
3028bytes(string, encoding[, errors]) -> bytes\n\
3029bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
3030bytes(memory_view) -> bytes.\n\
Tim Peters6d6c1a32001-08-02 04:15:00 +00003031\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003032Construct an immutable array of bytes from:\n\
3033 - an iterable yielding integers in range(256)\n\
3034 - a text string encoded using the specified encoding\n\
3035 - a bytes or a buffer object\n\
3036 - any object implementing the buffer API.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003037
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003038static PyObject *str_iter(PyObject *seq);
3039
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003040PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003041 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum98297ee2007-11-06 21:34:58 +00003042 "bytes",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003043 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003044 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003045 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003046 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003047 0, /* tp_getattr */
3048 0, /* tp_setattr */
3049 0, /* tp_compare */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003050 (reprfunc)string_repr, /* tp_repr */
3051 0, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003052 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003053 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003054 (hashfunc)string_hash, /* tp_hash */
3055 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003056 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003057 PyObject_GenericGetAttr, /* tp_getattro */
3058 0, /* tp_setattro */
3059 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003060 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3061 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003062 string_doc, /* tp_doc */
3063 0, /* tp_traverse */
3064 0, /* tp_clear */
3065 (richcmpfunc)string_richcompare, /* tp_richcompare */
3066 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003067 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003068 0, /* tp_iternext */
3069 string_methods, /* tp_methods */
3070 0, /* tp_members */
3071 0, /* tp_getset */
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003072 &PyBaseObject_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003073 0, /* tp_dict */
3074 0, /* tp_descr_get */
3075 0, /* tp_descr_set */
3076 0, /* tp_dictoffset */
3077 0, /* tp_init */
3078 0, /* tp_alloc */
3079 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003080 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003081};
3082
3083void
Fred Drakeba096332000-07-09 07:04:36 +00003084PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003085{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003086 register PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003087 assert(pv != NULL);
Guido van Rossum013142a1994-08-30 08:19:36 +00003088 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003089 return;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003090 if (w == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003091 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003092 *pv = NULL;
3093 return;
3094 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00003095 v = string_concat(*pv, w);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003096 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003097 *pv = v;
3098}
3099
Guido van Rossum013142a1994-08-30 08:19:36 +00003100void
Fred Drakeba096332000-07-09 07:04:36 +00003101PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003102{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003103 PyString_Concat(pv, w);
3104 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003105}
3106
3107
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003108/* The following function breaks the notion that strings are immutable:
3109 it changes the size of a string. We get away with this only if there
3110 is only one module referencing the object. You can also think of it
3111 as creating a new string object and destroying the old one, only
3112 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003113 already be known to some other part of the code...
3114 Note that if there's not enough memory to resize the string, the original
3115 string object at *pv is deallocated, *pv is set to NULL, an "out of
3116 memory" exception is set, and -1 is returned. Else (on success) 0 is
3117 returned, and the value in *pv may or may not be the same as on input.
3118 As always, an extra byte is allocated for a trailing \0 byte (newsize
3119 does *not* include that), and a trailing \0 byte is stored.
3120*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003121
3122int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003123_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003124{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003125 register PyObject *v;
3126 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003127 v = *pv;
Christian Heimes90aa7642007-12-19 02:45:37 +00003128 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003129 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003130 Py_DECREF(v);
3131 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003132 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003133 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003134 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003135 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003136 _Py_ForgetReference(v);
3137 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003138 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003139 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003140 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003141 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003142 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003143 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003144 _Py_NewReference(*pv);
3145 sv = (PyStringObject *) *pv;
Christian Heimes90aa7642007-12-19 02:45:37 +00003146 Py_SIZE(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003147 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003148 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003149 return 0;
3150}
Guido van Rossume5372401993-03-16 12:15:04 +00003151
Tim Peters38fd5b62000-09-21 05:43:11 +00003152/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3153 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3154 * Python's regular ints.
3155 * Return value: a new PyString*, or NULL if error.
3156 * . *pbuf is set to point into it,
3157 * *plen set to the # of chars following that.
3158 * Caller must decref it when done using pbuf.
3159 * The string starting at *pbuf is of the form
3160 * "-"? ("0x" | "0X")? digit+
3161 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003162 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003163 * There will be at least prec digits, zero-filled on the left if
3164 * necessary to get that many.
3165 * val object to be converted
3166 * flags bitmask of format flags; only F_ALT is looked at
3167 * prec minimum number of digits; 0-fill on left if needed
3168 * type a character in [duoxX]; u acts the same as d
3169 *
3170 * CAUTION: o, x and X conversions on regular ints can never
3171 * produce a '-' sign, but can for Python's unbounded ints.
3172 */
3173PyObject*
3174_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3175 char **pbuf, int *plen)
3176{
3177 PyObject *result = NULL;
3178 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003179 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003180 int sign; /* 1 if '-', else 0 */
3181 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003182 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003183 int numdigits; /* len == numnondigits + numdigits */
3184 int numnondigits = 0;
3185
Guido van Rossumddefaf32007-01-14 03:31:43 +00003186 /* Avoid exceeding SSIZE_T_MAX */
3187 if (prec > PY_SSIZE_T_MAX-3) {
3188 PyErr_SetString(PyExc_OverflowError,
3189 "precision too large");
3190 return NULL;
3191 }
3192
Tim Peters38fd5b62000-09-21 05:43:11 +00003193 switch (type) {
3194 case 'd':
3195 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00003196 /* Special-case boolean: we want 0/1 */
3197 if (PyBool_Check(val))
3198 result = PyNumber_ToBase(val, 10);
3199 else
Christian Heimes90aa7642007-12-19 02:45:37 +00003200 result = Py_TYPE(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00003201 break;
3202 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003203 numnondigits = 2;
3204 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00003205 break;
3206 case 'x':
3207 case 'X':
3208 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003209 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00003210 break;
3211 default:
3212 assert(!"'type' not in [duoxX]");
3213 }
3214 if (!result)
3215 return NULL;
3216
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003217 buf = PyUnicode_AsString(result);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00003218 if (!buf) {
3219 Py_DECREF(result);
3220 return NULL;
3221 }
3222
Tim Peters38fd5b62000-09-21 05:43:11 +00003223 /* To modify the string in-place, there can only be one reference. */
Christian Heimes90aa7642007-12-19 02:45:37 +00003224 if (Py_REFCNT(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003225 PyErr_BadInternalCall();
3226 return NULL;
3227 }
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003228 llen = PyUnicode_GetSize(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00003229 if (llen > INT_MAX) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003230 PyErr_SetString(PyExc_ValueError,
3231 "string too large in _PyString_FormatLong");
Martin v. Löwis725507b2006-03-07 12:08:51 +00003232 return NULL;
3233 }
3234 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003235 if (buf[len-1] == 'L') {
3236 --len;
3237 buf[len] = '\0';
3238 }
3239 sign = buf[0] == '-';
3240 numnondigits += sign;
3241 numdigits = len - numnondigits;
3242 assert(numdigits > 0);
3243
Tim Petersfff53252001-04-12 18:38:48 +00003244 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003245 if (((flags & F_ALT) == 0 &&
3246 (type == 'o' || type == 'x' || type == 'X'))) {
3247 assert(buf[sign] == '0');
3248 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
Guido van Rossum98297ee2007-11-06 21:34:58 +00003249 buf[sign+1] == 'o');
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003250 numnondigits -= 2;
3251 buf += 2;
3252 len -= 2;
3253 if (sign)
3254 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00003255 assert(len == numnondigits + numdigits);
3256 assert(numdigits > 0);
3257 }
3258
3259 /* Fill with leading zeroes to meet minimum width. */
3260 if (prec > numdigits) {
3261 PyObject *r1 = PyString_FromStringAndSize(NULL,
3262 numnondigits + prec);
3263 char *b1;
3264 if (!r1) {
3265 Py_DECREF(result);
3266 return NULL;
3267 }
3268 b1 = PyString_AS_STRING(r1);
3269 for (i = 0; i < numnondigits; ++i)
3270 *b1++ = *buf++;
3271 for (i = 0; i < prec - numdigits; i++)
3272 *b1++ = '0';
3273 for (i = 0; i < numdigits; i++)
3274 *b1++ = *buf++;
3275 *b1 = '\0';
3276 Py_DECREF(result);
3277 result = r1;
3278 buf = PyString_AS_STRING(result);
3279 len = numnondigits + prec;
3280 }
3281
3282 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003283 if (type == 'X') {
3284 /* Need to convert all lower case letters to upper case.
3285 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003286 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003287 if (buf[i] >= 'a' && buf[i] <= 'x')
3288 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003289 }
3290 *pbuf = buf;
3291 *plen = len;
3292 return result;
3293}
3294
Guido van Rossum8cf04761997-08-02 02:57:45 +00003295void
Fred Drakeba096332000-07-09 07:04:36 +00003296PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003297{
3298 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003299 for (i = 0; i < UCHAR_MAX + 1; i++) {
3300 Py_XDECREF(characters[i]);
3301 characters[i] = NULL;
3302 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003303 Py_XDECREF(nullstring);
3304 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003305}
Barry Warsawa903ad982001-02-23 16:40:48 +00003306
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003307/*********************** Str Iterator ****************************/
3308
3309typedef struct {
3310 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00003311 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003312 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
3313} striterobject;
3314
3315static void
3316striter_dealloc(striterobject *it)
3317{
3318 _PyObject_GC_UNTRACK(it);
3319 Py_XDECREF(it->it_seq);
3320 PyObject_GC_Del(it);
3321}
3322
3323static int
3324striter_traverse(striterobject *it, visitproc visit, void *arg)
3325{
3326 Py_VISIT(it->it_seq);
3327 return 0;
3328}
3329
3330static PyObject *
3331striter_next(striterobject *it)
3332{
3333 PyStringObject *seq;
3334 PyObject *item;
3335
3336 assert(it != NULL);
3337 seq = it->it_seq;
3338 if (seq == NULL)
3339 return NULL;
3340 assert(PyString_Check(seq));
3341
3342 if (it->it_index < PyString_GET_SIZE(seq)) {
Christian Heimes217cfd12007-12-02 14:31:20 +00003343 item = PyLong_FromLong(
Guido van Rossum75a902d2007-10-19 22:06:24 +00003344 (unsigned char)seq->ob_sval[it->it_index]);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003345 if (item != NULL)
3346 ++it->it_index;
3347 return item;
3348 }
3349
3350 Py_DECREF(seq);
3351 it->it_seq = NULL;
3352 return NULL;
3353}
3354
3355static PyObject *
3356striter_len(striterobject *it)
3357{
3358 Py_ssize_t len = 0;
3359 if (it->it_seq)
3360 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
Christian Heimes217cfd12007-12-02 14:31:20 +00003361 return PyLong_FromSsize_t(len);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003362}
3363
Guido van Rossum49d6b072006-08-17 21:11:47 +00003364PyDoc_STRVAR(length_hint_doc,
3365 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003366
3367static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00003368 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3369 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003370 {NULL, NULL} /* sentinel */
3371};
3372
3373PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003374 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum254348e2007-11-21 19:29:53 +00003375 "bytes_iterator", /* tp_name */
Guido van Rossum49d6b072006-08-17 21:11:47 +00003376 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003377 0, /* tp_itemsize */
3378 /* methods */
3379 (destructor)striter_dealloc, /* tp_dealloc */
3380 0, /* tp_print */
3381 0, /* tp_getattr */
3382 0, /* tp_setattr */
3383 0, /* tp_compare */
3384 0, /* tp_repr */
3385 0, /* tp_as_number */
3386 0, /* tp_as_sequence */
3387 0, /* tp_as_mapping */
3388 0, /* tp_hash */
3389 0, /* tp_call */
3390 0, /* tp_str */
3391 PyObject_GenericGetAttr, /* tp_getattro */
3392 0, /* tp_setattro */
3393 0, /* tp_as_buffer */
3394 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3395 0, /* tp_doc */
3396 (traverseproc)striter_traverse, /* tp_traverse */
3397 0, /* tp_clear */
3398 0, /* tp_richcompare */
3399 0, /* tp_weaklistoffset */
3400 PyObject_SelfIter, /* tp_iter */
3401 (iternextfunc)striter_next, /* tp_iternext */
3402 striter_methods, /* tp_methods */
3403 0,
3404};
3405
3406static PyObject *
3407str_iter(PyObject *seq)
3408{
3409 striterobject *it;
3410
3411 if (!PyString_Check(seq)) {
3412 PyErr_BadInternalCall();
3413 return NULL;
3414 }
3415 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
3416 if (it == NULL)
3417 return NULL;
3418 it->it_index = 0;
3419 Py_INCREF(seq);
3420 it->it_seq = (PyStringObject *)seq;
3421 _PyObject_GC_TRACK(it);
3422 return (PyObject *)it;
3423}