blob: 855f6cd0e04a8664b0fa589ec58ddfce3b175af7 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossum98297ee2007-11-06 21:34:58 +00003/* XXX This is now called 'bytes' as far as the user is concerned.
4 Many docstrings and error messages need to be cleaned up. */
5
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00006#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00008#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000011
Guido van Rossum98297ee2007-11-06 21:34:58 +000012static Py_ssize_t
13_getbuffer(PyObject *obj, Py_buffer *view)
14{
15 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
16
17 if (buffer == NULL || buffer->bf_getbuffer == NULL)
18 {
19 PyErr_Format(PyExc_TypeError,
20 "Type %.100s doesn't support the buffer API",
21 Py_Type(obj)->tp_name);
22 return -1;
23 }
24
25 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
26 return -1;
27 return view->len;
28}
29
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000030#ifdef COUNT_ALLOCS
31int null_strings, one_strings;
32#endif
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000035static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036
37/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000038 For both PyString_FromString() and PyString_FromStringAndSize(), the
39 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000040 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000041
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000042 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000043 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 For PyString_FromStringAndSize(), the parameter the parameter `str' is
46 either NULL or else points to a string containing at least `size' bytes.
47 For PyString_FromStringAndSize(), the string in the `str' parameter does
48 not have to be null-terminated. (Therefore it is safe to construct a
49 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
50 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
51 bytes (setting the last byte to the null terminating character) and you can
52 fill in the data yourself. If `str' is non-NULL then the resulting
53 PyString object must be treated as immutable and you must not fill in nor
54 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000055
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000056 The PyObject member `op->ob_size', which denotes the number of "extra
57 items" in a variable-size object, will contain the number of bytes
58 allocated for string data, not counting the null terminating character. It
59 is therefore equal to the equal to the `size' parameter (for
60 PyString_FromStringAndSize()) or the length of the string in the `str'
61 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000064PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065{
Tim Peters9e897f42001-05-09 07:37:07 +000066 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000067 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 if (size == 0 && (op = nullstring) != NULL) {
69#ifdef COUNT_ALLOCS
70 null_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 if (size == 1 && str != NULL &&
76 (op = characters[*str & UCHAR_MAX]) != NULL)
77 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078#ifdef COUNT_ALLOCS
79 one_strings++;
80#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 Py_INCREF(op);
82 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000084
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000085 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000086 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000089 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 op->ob_shash = -1;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000091 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000092 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000094 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 if (size == 0) {
96 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 } else if (size == 1 && str != NULL) {
99 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103}
104
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000106PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Tim Peters62de65b2001-12-06 20:29:32 +0000108 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000109 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000110
111 assert(str != NULL);
112 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000113 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000114 PyErr_SetString(PyExc_OverflowError,
115 "string is too long for a Python string");
116 return NULL;
117 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 if (size == 0 && (op = nullstring) != NULL) {
119#ifdef COUNT_ALLOCS
120 null_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
125 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
126#ifdef COUNT_ALLOCS
127 one_strings++;
128#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
130 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000133 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000134 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000140 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
145 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000149}
150
Barry Warsawdadace02001-08-24 18:32:06 +0000151PyObject *
152PyString_FromFormatV(const char *format, va_list vargs)
153{
Tim Petersc15c4f12001-10-02 21:32:07 +0000154 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000155 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000156 const char* f;
157 char *s;
158 PyObject* string;
159
Tim Petersc15c4f12001-10-02 21:32:07 +0000160#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000161 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000163#ifdef __va_copy
164 __va_copy(count, vargs);
165#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000166 count = vargs;
167#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000168#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000169 /* step 1: figure out how large a buffer we need */
170 for (f = format; *f; f++) {
171 if (*f == '%') {
172 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000173 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000174 ;
175
Thomas Wouters477c8d52006-05-27 19:21:47 +0000176 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
177 * they don't affect the amount of space we reserve.
178 */
179 if ((*f == 'l' || *f == 'z') &&
180 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000181 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000182
Barry Warsawdadace02001-08-24 18:32:06 +0000183 switch (*f) {
184 case 'c':
185 (void)va_arg(count, int);
186 /* fall through... */
187 case '%':
188 n++;
189 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000191 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000192 /* 20 bytes is enough to hold a 64-bit
193 integer. Decimal takes the most space.
194 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000195 n += 20;
196 break;
197 case 's':
198 s = va_arg(count, char*);
199 n += strlen(s);
200 break;
201 case 'p':
202 (void) va_arg(count, int);
203 /* maximum 64-bit pointer representation:
204 * 0xffffffffffffffff
205 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000206 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000207 */
208 n += 19;
209 break;
210 default:
211 /* if we stumble upon an unknown
212 formatting code, copy the rest of
213 the format string to the output
214 string. (we cannot just skip the
215 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000216 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000217 n += strlen(p);
218 goto expand;
219 }
220 } else
221 n++;
222 }
223 expand:
224 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000225 /* Since we've analyzed how much space we need for the worst case,
226 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000227 string = PyString_FromStringAndSize(NULL, n);
228 if (!string)
229 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000230
Barry Warsawdadace02001-08-24 18:32:06 +0000231 s = PyString_AsString(string);
232
233 for (f = format; *f; f++) {
234 if (*f == '%') {
235 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000236 Py_ssize_t i;
237 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000238 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000239 /* parse the width.precision part (we're only
240 interested in the precision value, if any) */
241 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000242 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000243 n = (n*10) + *f++ - '0';
244 if (*f == '.') {
245 f++;
246 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000247 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000248 n = (n*10) + *f++ - '0';
249 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000250 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000251 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000252 /* handle the long flag, but only for %ld and %lu.
253 others can be added when necessary. */
254 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000255 longflag = 1;
256 ++f;
257 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000258 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000259 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000260 size_tflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000271 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000272 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
273 va_arg(vargs, Py_ssize_t));
274 else
275 sprintf(s, "%d", va_arg(vargs, int));
276 s += strlen(s);
277 break;
278 case 'u':
279 if (longflag)
280 sprintf(s, "%lu",
281 va_arg(vargs, unsigned long));
282 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000283 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
284 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000285 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000286 sprintf(s, "%u",
287 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 s += strlen(s);
289 break;
290 case 'i':
291 sprintf(s, "%i", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'x':
295 sprintf(s, "%x", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 's':
299 p = va_arg(vargs, char*);
300 i = strlen(p);
301 if (n > 0 && i > n)
302 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000303 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000304 s += i;
305 break;
306 case 'p':
307 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000308 /* %p is ill-defined: ensure leading 0x. */
309 if (s[1] == 'X')
310 s[1] = 'x';
311 else if (s[1] != 'x') {
312 memmove(s+2, s, strlen(s)+1);
313 s[0] = '0';
314 s[1] = 'x';
315 }
Barry Warsawdadace02001-08-24 18:32:06 +0000316 s += strlen(s);
317 break;
318 case '%':
319 *s++ = '%';
320 break;
321 default:
322 strcpy(s, p);
323 s += strlen(s);
324 goto end;
325 }
326 } else
327 *s++ = *f;
328 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000329
Barry Warsawdadace02001-08-24 18:32:06 +0000330 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000332 return string;
333}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000334
Barry Warsawdadace02001-08-24 18:32:06 +0000335PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000336PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000337{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000338 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000339 va_list vargs;
340
341#ifdef HAVE_STDARG_PROTOTYPES
342 va_start(vargs, format);
343#else
344 va_start(vargs);
345#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 ret = PyString_FromFormatV(format, vargs);
347 va_end(vargs);
348 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000349}
350
Guido van Rossum234f9421993-06-17 12:35:49 +0000351static void
Fred Drakeba096332000-07-09 07:04:36 +0000352string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000353{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000354 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000355}
356
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000357/* Unescape a backslash-escaped string. If unicode is non-zero,
358 the string is a u-literal. If recode_encoding is non-zero,
359 the string is UTF-8 encoded and should be re-encoded in the
360 specified encoding. */
361
362PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000363 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000364 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000365 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000366 const char *recode_encoding)
367{
368 int c;
369 char *p, *buf;
370 const char *end;
371 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000372 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000373 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000374 if (v == NULL)
375 return NULL;
376 p = buf = PyString_AsString(v);
377 end = s + len;
378 while (s < end) {
379 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000380 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000381 if (recode_encoding && (*s & 0x80)) {
382 PyObject *u, *w;
383 char *r;
384 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000385 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000386 t = s;
387 /* Decode non-ASCII bytes as UTF-8. */
388 while (t < end && (*t & 0x80)) t++;
389 u = PyUnicode_DecodeUTF8(s, t - s, errors);
390 if(!u) goto failed;
391
392 /* Recode them in target encoding. */
393 w = PyUnicode_AsEncodedString(
394 u, recode_encoding, errors);
395 Py_DECREF(u);
396 if (!w) goto failed;
397
398 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000399 assert(PyString_Check(w));
400 r = PyString_AS_STRING(w);
401 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000402 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000403 p += rn;
404 Py_DECREF(w);
405 s = t;
406 } else {
407 *p++ = *s++;
408 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000409 continue;
410 }
411 s++;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000412 if (s==end) {
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000413 PyErr_SetString(PyExc_ValueError,
414 "Trailing \\ in string");
415 goto failed;
416 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000417 switch (*s++) {
418 /* XXX This assumes ASCII! */
419 case '\n': break;
420 case '\\': *p++ = '\\'; break;
421 case '\'': *p++ = '\''; break;
422 case '\"': *p++ = '\"'; break;
423 case 'b': *p++ = '\b'; break;
424 case 'f': *p++ = '\014'; break; /* FF */
425 case 't': *p++ = '\t'; break;
426 case 'n': *p++ = '\n'; break;
427 case 'r': *p++ = '\r'; break;
428 case 'v': *p++ = '\013'; break; /* VT */
429 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
430 case '0': case '1': case '2': case '3':
431 case '4': case '5': case '6': case '7':
432 c = s[-1] - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000433 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000434 c = (c<<3) + *s++ - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000435 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000436 c = (c<<3) + *s++ - '0';
437 }
438 *p++ = c;
439 break;
440 case 'x':
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000441 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000442 unsigned int x = 0;
443 c = Py_CHARMASK(*s);
444 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000445 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000446 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000447 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000448 x = 10 + c - 'a';
449 else
450 x = 10 + c - 'A';
451 x = x << 4;
452 c = Py_CHARMASK(*s);
453 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000454 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000455 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000456 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000457 x += 10 + c - 'a';
458 else
459 x += 10 + c - 'A';
460 *p++ = x;
461 break;
462 }
463 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000464 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000465 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000466 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000467 }
468 if (strcmp(errors, "replace") == 0) {
469 *p++ = '?';
470 } else if (strcmp(errors, "ignore") == 0)
471 /* do nothing */;
472 else {
473 PyErr_Format(PyExc_ValueError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000474 "decoding error; unknown "
475 "error handling code: %.400s",
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000476 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000477 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000478 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000479 default:
480 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000481 s--;
482 goto non_esc; /* an arbitry number of unescaped
483 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000484 }
485 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000486 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000487 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000488 return v;
489 failed:
490 Py_DECREF(v);
491 return NULL;
492}
493
Thomas Wouters477c8d52006-05-27 19:21:47 +0000494/* -------------------------------------------------------------------- */
495/* object api */
496
Martin v. Löwis18e16552006-02-15 17:27:45 +0000497static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000498string_getsize(register PyObject *op)
499{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000500 char *s;
501 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000502 if (PyString_AsStringAndSize(op, &s, &len))
503 return -1;
504 return len;
505}
506
Martin v. Löwis18e16552006-02-15 17:27:45 +0000507Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000508PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000509{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000510 if (!PyString_Check(op))
511 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000512 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000513}
514
515/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000516PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000517{
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000518 if (!PyString_Check(op)) {
519 PyErr_Format(PyExc_TypeError,
520 "expected bytes, %.200s found", Py_Type(op)->tp_name);
521 return NULL;
522 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000523 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000524}
525
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000526int
527PyString_AsStringAndSize(register PyObject *obj,
528 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000529 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000530{
531 if (s == NULL) {
532 PyErr_BadInternalCall();
533 return -1;
534 }
535
536 if (!PyString_Check(obj)) {
Christian Heimesf3863112007-11-22 07:46:41 +0000537 PyErr_Format(PyExc_TypeError,
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000538 "expected bytes, %.200s found", Py_Type(obj)->tp_name);
Christian Heimesf3863112007-11-22 07:46:41 +0000539 return -1;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000540 }
541
542 *s = PyString_AS_STRING(obj);
543 if (len != NULL)
544 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000545 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000546 PyErr_SetString(PyExc_TypeError,
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000547 "expected bytes with no null");
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000548 return -1;
549 }
550 return 0;
551}
552
Thomas Wouters477c8d52006-05-27 19:21:47 +0000553/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000554/* Methods */
555
Thomas Wouters477c8d52006-05-27 19:21:47 +0000556#define STRINGLIB_CHAR char
557
558#define STRINGLIB_CMP memcmp
559#define STRINGLIB_LEN PyString_GET_SIZE
560#define STRINGLIB_NEW PyString_FromStringAndSize
561#define STRINGLIB_STR PyString_AS_STRING
Guido van Rossum98297ee2007-11-06 21:34:58 +0000562/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000563
564#define STRINGLIB_EMPTY nullstring
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000565#define STRINGLIB_CHECK_EXACT PyString_CheckExact
566#define STRINGLIB_MUTABLE 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000567
568#include "stringlib/fastsearch.h"
569
570#include "stringlib/count.h"
571#include "stringlib/find.h"
572#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000573#include "stringlib/ctype.h"
574#include "stringlib/transmogrify.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000575
576
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000577PyObject *
578PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000580 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000581 register PyStringObject* op = (PyStringObject*) obj;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000582 Py_ssize_t length = Py_Size(op);
583 size_t newsize = 3 + 4 * length;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000584 PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000585 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000586 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000587 "bytes object is too large to make repr");
Guido van Rossum58da9312007-11-10 23:39:45 +0000588 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000589 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000590 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000591 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000592 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593 }
594 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000595 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000596 register Py_UNICODE c;
597 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000598 int quote;
599
Guido van Rossum98297ee2007-11-06 21:34:58 +0000600 /* Figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000601 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000602 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000603 char *test, *start;
604 start = PyString_AS_STRING(op);
605 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000606 if (*test == '"') {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000607 quote = '\''; /* back to single */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000608 goto decided;
609 }
610 else if (*test == '\'')
611 quote = '"';
612 }
613 decided:
614 ;
615 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000616
Guido van Rossum98297ee2007-11-06 21:34:58 +0000617 *p++ = 'b', *p++ = quote;
618 for (i = 0; i < length; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000619 /* There's at least enough room for a hex escape
620 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000621 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000622 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000623 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000625 else if (c == '\t')
626 *p++ = '\\', *p++ = 't';
627 else if (c == '\n')
628 *p++ = '\\', *p++ = 'n';
629 else if (c == '\r')
630 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000631 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000632 *p++ = '\\';
633 *p++ = 'x';
634 *p++ = hexdigits[(c & 0xf0) >> 4];
635 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000636 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000637 else
638 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000640 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000641 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000642 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000643 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
644 Py_DECREF(v);
645 return NULL;
646 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000647 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000648 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000649}
650
Guido van Rossum189f1df2001-05-01 16:51:53 +0000651static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652string_repr(PyObject *op)
653{
654 return PyString_Repr(op, 1);
655}
656
657static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000658string_str(PyObject *op)
Guido van Rossum189f1df2001-05-01 16:51:53 +0000659{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000660 if (Py_BytesWarningFlag) {
661 if (PyErr_WarnEx(PyExc_BytesWarning,
662 "str() on a bytes instance", 1))
663 return NULL;
Tim Petersc9933152001-10-16 20:18:24 +0000664 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000665 return string_repr(op);
Guido van Rossum189f1df2001-05-01 16:51:53 +0000666}
667
Martin v. Löwis18e16552006-02-15 17:27:45 +0000668static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000669string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000670{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000671 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000672}
673
Guido van Rossum98297ee2007-11-06 21:34:58 +0000674/* This is also used by PyString_Concat() */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000675static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000676string_concat(PyObject *a, PyObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000677{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000678 Py_ssize_t size;
679 Py_buffer va, vb;
680 PyObject *result = NULL;
681
682 va.len = -1;
683 vb.len = -1;
684 if (_getbuffer(a, &va) < 0 ||
685 _getbuffer(b, &vb) < 0) {
686 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
687 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
688 goto done;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000689 }
Guido van Rossumae404e22007-10-26 21:46:44 +0000690
Guido van Rossum98297ee2007-11-06 21:34:58 +0000691 /* Optimize end cases */
692 if (va.len == 0 && PyString_CheckExact(b)) {
693 result = b;
694 Py_INCREF(result);
695 goto done;
696 }
697 if (vb.len == 0 && PyString_CheckExact(a)) {
698 result = a;
699 Py_INCREF(result);
700 goto done;
701 }
702
703 size = va.len + vb.len;
704 if (size < 0) {
705 PyErr_NoMemory();
706 goto done;
707 }
708
709 result = PyString_FromStringAndSize(NULL, size);
710 if (result != NULL) {
711 memcpy(PyString_AS_STRING(result), va.buf, va.len);
712 memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
713 }
714
715 done:
716 if (va.len != -1)
717 PyObject_ReleaseBuffer(a, &va);
718 if (vb.len != -1)
719 PyObject_ReleaseBuffer(b, &vb);
720 return result;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000723static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000724string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000726 register Py_ssize_t i;
727 register Py_ssize_t j;
728 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000730 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731 if (n < 0)
732 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000733 /* watch out for overflows: the size can overflow int,
734 * and the # of bytes needed can overflow size_t
735 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000736 size = Py_Size(a) * n;
737 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000738 PyErr_SetString(PyExc_OverflowError,
739 "repeated string is too long");
740 return NULL;
741 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000742 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000743 Py_INCREF(a);
744 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000745 }
Tim Peterse7c05322004-06-27 17:24:49 +0000746 nbytes = (size_t)size;
747 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000748 PyErr_SetString(PyExc_OverflowError,
749 "repeated string is too long");
750 return NULL;
751 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000752 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000753 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000754 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000755 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000756 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000757 op->ob_shash = -1;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000758 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000759 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000760 memset(op->ob_sval, a->ob_sval[0] , n);
761 return (PyObject *) op;
762 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000763 i = 0;
764 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000765 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
766 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000767 }
768 while (i < size) {
769 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000770 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000771 i += j;
772 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000773 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000774}
775
Guido van Rossum9284a572000-03-07 15:53:43 +0000776static int
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777string_contains(PyObject *self, PyObject *arg)
Guido van Rossum9284a572000-03-07 15:53:43 +0000778{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000779 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
780 if (ival == -1 && PyErr_Occurred()) {
781 Py_buffer varg;
782 int pos;
783 PyErr_Clear();
784 if (_getbuffer(arg, &varg) < 0)
785 return -1;
786 pos = stringlib_find(PyString_AS_STRING(self), Py_Size(self),
787 varg.buf, varg.len, 0);
788 PyObject_ReleaseBuffer(arg, &varg);
789 return pos >= 0;
790 }
791 if (ival < 0 || ival >= 256) {
792 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
793 return -1;
794 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000795
Guido van Rossum98297ee2007-11-06 21:34:58 +0000796 return memchr(PyString_AS_STRING(self), ival, Py_Size(self)) != NULL;
797}
798
799static PyObject *
800string_item(PyStringObject *a, register Py_ssize_t i)
801{
802 if (i < 0 || i >= Py_Size(a)) {
803 PyErr_SetString(PyExc_IndexError, "string index out of range");
804 return NULL;
805 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000806 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Guido van Rossum9284a572000-03-07 15:53:43 +0000807}
808
Martin v. Löwiscd353062001-05-24 16:56:35 +0000809static PyObject*
810string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000812 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000813 Py_ssize_t len_a, len_b;
814 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +0000815 PyObject *result;
816
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000817 /* Make sure both arguments are strings. */
818 if (!(PyString_Check(a) && PyString_Check(b))) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000819 if (Py_BytesWarningFlag && (op == Py_EQ) &&
820 (PyObject_IsInstance((PyObject*)a,
821 (PyObject*)&PyUnicode_Type) ||
822 PyObject_IsInstance((PyObject*)b,
823 (PyObject*)&PyUnicode_Type))) {
824 if (PyErr_WarnEx(PyExc_BytesWarning,
825 "Comparsion between bytes and string", 1))
826 return NULL;
827 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000828 result = Py_NotImplemented;
829 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000830 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000831 if (a == b) {
832 switch (op) {
833 case Py_EQ:case Py_LE:case Py_GE:
834 result = Py_True;
835 goto out;
836 case Py_NE:case Py_LT:case Py_GT:
837 result = Py_False;
838 goto out;
839 }
840 }
841 if (op == Py_EQ) {
842 /* Supporting Py_NE here as well does not save
843 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000844 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +0000845 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000846 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000847 result = Py_True;
848 } else {
849 result = Py_False;
850 }
851 goto out;
852 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000853 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +0000854 min_len = (len_a < len_b) ? len_a : len_b;
855 if (min_len > 0) {
856 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
857 if (c==0)
858 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +0000859 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +0000860 c = 0;
861 if (c == 0)
862 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
863 switch (op) {
864 case Py_LT: c = c < 0; break;
865 case Py_LE: c = c <= 0; break;
866 case Py_EQ: assert(0); break; /* unreachable */
867 case Py_NE: c = c != 0; break;
868 case Py_GT: c = c > 0; break;
869 case Py_GE: c = c >= 0; break;
870 default:
871 result = Py_NotImplemented;
872 goto out;
873 }
874 result = c ? Py_True : Py_False;
875 out:
876 Py_INCREF(result);
877 return result;
878}
879
880int
881_PyString_Eq(PyObject *o1, PyObject *o2)
882{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000883 PyStringObject *a = (PyStringObject*) o1;
884 PyStringObject *b = (PyStringObject*) o2;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000885 return Py_Size(a) == Py_Size(b)
886 && *a->ob_sval == *b->ob_sval
887 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888}
889
Guido van Rossum9bfef441993-03-29 10:43:31 +0000890static long
Fred Drakeba096332000-07-09 07:04:36 +0000891string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000892{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000893 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000894 register unsigned char *p;
895 register long x;
896
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000897 if (a->ob_shash != -1)
898 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000899 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000900 p = (unsigned char *) a->ob_sval;
901 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000902 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000903 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000904 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +0000905 if (x == -1)
906 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000907 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000908 return x;
909}
910
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000911static PyObject*
912string_subscript(PyStringObject* self, PyObject* item)
913{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000914 if (PyIndex_Check(item)) {
915 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000916 if (i == -1 && PyErr_Occurred())
917 return NULL;
918 if (i < 0)
919 i += PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000920 if (i < 0 || i >= PyString_GET_SIZE(self)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +0000921 PyErr_SetString(PyExc_IndexError,
922 "string index out of range");
923 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000924 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000925 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000926 }
927 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000928 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000929 char* source_buf;
930 char* result_buf;
931 PyObject* result;
932
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000933 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000934 PyString_GET_SIZE(self),
935 &start, &stop, &step, &slicelength) < 0) {
936 return NULL;
937 }
938
939 if (slicelength <= 0) {
940 return PyString_FromStringAndSize("", 0);
941 }
Thomas Woutersed03b412007-08-28 21:37:11 +0000942 else if (start == 0 && step == 1 &&
943 slicelength == PyString_GET_SIZE(self) &&
944 PyString_CheckExact(self)) {
945 Py_INCREF(self);
946 return (PyObject *)self;
947 }
948 else if (step == 1) {
949 return PyString_FromStringAndSize(
950 PyString_AS_STRING(self) + start,
951 slicelength);
952 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000953 else {
954 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000955 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +0000956 if (result_buf == NULL)
957 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000958
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000959 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000960 cur += step, i++) {
961 result_buf[i] = source_buf[cur];
962 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000963
964 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000965 slicelength);
966 PyMem_Free(result_buf);
967 return result;
968 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000969 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000970 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000971 PyErr_Format(PyExc_TypeError,
972 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000973 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000974 return NULL;
975 }
976}
977
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000978static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000979string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000980{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000981 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self),
982 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +0000983}
984
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000985static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000986 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000987 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +0000988 (ssizeargfunc)string_repeat, /*sq_repeat*/
Guido van Rossum98297ee2007-11-06 21:34:58 +0000989 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +0000990 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000991 0, /*sq_ass_item*/
992 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000993 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000994};
995
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000996static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000997 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000998 (binaryfunc)string_subscript,
999 0,
1000};
1001
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001002static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001003 (getbufferproc)string_buffer_getbuffer,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001004 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001005};
1006
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001007
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001008#define LEFTSTRIP 0
1009#define RIGHTSTRIP 1
1010#define BOTHSTRIP 2
1011
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001012/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001013static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1014
1015#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001016
Thomas Wouters477c8d52006-05-27 19:21:47 +00001017
1018/* Don't call if length < 2 */
1019#define Py_STRING_MATCH(target, offset, pattern, length) \
1020 (target[offset] == pattern[0] && \
1021 target[offset+length-1] == pattern[length-1] && \
1022 !memcmp(target+offset+1, pattern+1, length-2) )
1023
1024
1025/* Overallocate the initial list to reduce the number of reallocs for small
1026 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1027 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1028 text (roughly 11 words per line) and field delimited data (usually 1-10
1029 fields). For large strings the split algorithms are bandwidth limited
1030 so increasing the preallocation likely will not improve things.*/
1031
1032#define MAX_PREALLOC 12
1033
1034/* 5 splits gives 6 elements */
1035#define PREALLOC_SIZE(maxsplit) \
1036 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1037
Thomas Wouters477c8d52006-05-27 19:21:47 +00001038#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001039 str = PyString_FromStringAndSize((data) + (left), \
1040 (right) - (left)); \
1041 if (str == NULL) \
1042 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001043 if (count < MAX_PREALLOC) { \
1044 PyList_SET_ITEM(list, count, str); \
1045 } else { \
1046 if (PyList_Append(list, str)) { \
1047 Py_DECREF(str); \
1048 goto onError; \
1049 } \
1050 else \
1051 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001052 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001053 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054
Thomas Wouters477c8d52006-05-27 19:21:47 +00001055/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001056#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001057
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001058#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1059#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1060#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1061#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001062
1063Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001064split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001065{
Christian Heimes895627f2007-12-08 17:28:33 +00001066 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001067 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001068 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001069 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001070
1071 if (list == NULL)
1072 return NULL;
1073
Thomas Wouters477c8d52006-05-27 19:21:47 +00001074 i = j = 0;
1075
1076 while (maxsplit-- > 0) {
1077 SKIP_SPACE(s, i, len);
1078 if (i==len) break;
1079 j = i; i++;
1080 SKIP_NONSPACE(s, i, len);
Christian Heimes895627f2007-12-08 17:28:33 +00001081 if (j == 0 && i == len && PyString_CheckExact(self)) {
1082 /* No whitespace in self, so just use it as list[0] */
1083 Py_INCREF(self);
1084 PyList_SET_ITEM(list, 0, (PyObject *)self);
1085 count++;
1086 break;
1087 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001088 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001089 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001090
1091 if (i < len) {
1092 /* Only occurs when maxsplit was reached */
1093 /* Skip any remaining whitespace and copy to end of string */
1094 SKIP_SPACE(s, i, len);
1095 if (i != len)
1096 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001097 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001098 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001099 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001100 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001101 Py_DECREF(list);
1102 return NULL;
1103}
1104
Thomas Wouters477c8d52006-05-27 19:21:47 +00001105Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001106split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001107{
Christian Heimes895627f2007-12-08 17:28:33 +00001108 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001109 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001110 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001111 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001112
1113 if (list == NULL)
1114 return NULL;
1115
Thomas Wouters477c8d52006-05-27 19:21:47 +00001116 i = j = 0;
1117 while ((j < len) && (maxcount-- > 0)) {
1118 for(; j<len; j++) {
1119 /* I found that using memchr makes no difference */
1120 if (s[j] == ch) {
1121 SPLIT_ADD(s, i, j);
1122 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001123 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001124 }
1125 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001126 }
Christian Heimes895627f2007-12-08 17:28:33 +00001127 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1128 /* ch not in self, so just use self as list[0] */
1129 Py_INCREF(self);
1130 PyList_SET_ITEM(list, 0, (PyObject *)self);
1131 count++;
1132 }
1133 else if (i <= len) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001134 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001135 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001136 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001137 return list;
1138
1139 onError:
1140 Py_DECREF(list);
1141 return NULL;
1142}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001143
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001144PyDoc_STRVAR(split__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001145"B.split([sep[, maxsplit]]) -> list of bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001146\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001147Return a list of the sections in B, using sep as the delimiter.\n\
1148If sep is not given, B is split on ASCII whitespace characters\n\
1149(space, tab, return, newline, formfeed, vertical tab).\n\
1150If maxsplit is given, at most maxsplit splits are done.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001151
1152static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001153string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001154{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001155 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001156 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001157 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001158 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001159 PyObject *list, *str, *subobj = Py_None;
1160#ifdef USE_FAST
1161 Py_ssize_t pos;
1162#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001163
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001164 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001165 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001166 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001167 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001168 if (subobj == Py_None)
Christian Heimes895627f2007-12-08 17:28:33 +00001169 return split_whitespace(self, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001170 if (_getbuffer(subobj, &vsub) < 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001171 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001172 sub = vsub.buf;
1173 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001174
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001175 if (n == 0) {
1176 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001177 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001178 return NULL;
1179 }
Christian Heimes895627f2007-12-08 17:28:33 +00001180 else if (n == 1)
1181 return split_char(self, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001182
Thomas Wouters477c8d52006-05-27 19:21:47 +00001183 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001184 if (list == NULL) {
1185 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001186 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001187 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001188
Thomas Wouters477c8d52006-05-27 19:21:47 +00001189#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001190 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001191 while (maxsplit-- > 0) {
1192 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1193 if (pos < 0)
1194 break;
1195 j = i+pos;
1196 SPLIT_ADD(s, i, j);
1197 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001198 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001199#else
1200 i = j = 0;
1201 while ((j+n <= len) && (maxsplit-- > 0)) {
1202 for (; j+n <= len; j++) {
1203 if (Py_STRING_MATCH(s, j, sub, n)) {
1204 SPLIT_ADD(s, i, j);
1205 i = j = j + n;
1206 break;
1207 }
1208 }
1209 }
1210#endif
1211 SPLIT_ADD(s, i, len);
1212 FIX_PREALLOC_SIZE(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001213 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001214 return list;
1215
Thomas Wouters477c8d52006-05-27 19:21:47 +00001216 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001217 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001218 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001219 return NULL;
1220}
1221
Thomas Wouters477c8d52006-05-27 19:21:47 +00001222PyDoc_STRVAR(partition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001223"B.partition(sep) -> (head, sep, tail)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001224\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001225Searches for the separator sep in B, and returns the part before it,\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001226the separator itself, and the part after it. If the separator is not\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001227found, returns B and two empty bytes objects.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001228
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001229static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001230string_partition(PyStringObject *self, PyObject *sep_obj)
1231{
1232 const char *sep;
1233 Py_ssize_t sep_len;
1234
1235 if (PyString_Check(sep_obj)) {
1236 sep = PyString_AS_STRING(sep_obj);
1237 sep_len = PyString_GET_SIZE(sep_obj);
1238 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001239 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1240 return NULL;
1241
1242 return stringlib_partition(
1243 (PyObject*) self,
1244 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1245 sep_obj, sep, sep_len
1246 );
1247}
1248
1249PyDoc_STRVAR(rpartition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001250"B.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001251\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001252Searches for the separator sep in B, starting at the end of B,\n\
1253and returns the part before it, the separator itself, and the\n\
1254part after it. If the separator is not found, returns two empty\n\
1255bytes objects and B.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001256
1257static PyObject *
1258string_rpartition(PyStringObject *self, PyObject *sep_obj)
1259{
1260 const char *sep;
1261 Py_ssize_t sep_len;
1262
1263 if (PyString_Check(sep_obj)) {
1264 sep = PyString_AS_STRING(sep_obj);
1265 sep_len = PyString_GET_SIZE(sep_obj);
1266 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001267 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1268 return NULL;
1269
1270 return stringlib_rpartition(
1271 (PyObject*) self,
1272 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1273 sep_obj, sep, sep_len
1274 );
1275}
1276
1277Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001278rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001279{
Christian Heimes895627f2007-12-08 17:28:33 +00001280 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001281 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001282 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001283 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001284
1285 if (list == NULL)
1286 return NULL;
1287
Thomas Wouters477c8d52006-05-27 19:21:47 +00001288 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001289
Thomas Wouters477c8d52006-05-27 19:21:47 +00001290 while (maxsplit-- > 0) {
1291 RSKIP_SPACE(s, i);
1292 if (i<0) break;
1293 j = i; i--;
1294 RSKIP_NONSPACE(s, i);
Christian Heimes895627f2007-12-08 17:28:33 +00001295 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1296 /* No whitespace in self, so just use it as list[0] */
1297 Py_INCREF(self);
1298 PyList_SET_ITEM(list, 0, (PyObject *)self);
1299 count++;
1300 break;
1301 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001302 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001303 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001304 if (i >= 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001305 /* Only occurs when maxsplit was reached. Skip any remaining
1306 whitespace and copy to beginning of string. */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001307 RSKIP_SPACE(s, i);
1308 if (i >= 0)
1309 SPLIT_ADD(s, 0, i + 1);
1310
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001311 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001312 FIX_PREALLOC_SIZE(list);
1313 if (PyList_Reverse(list) < 0)
1314 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001315 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001316 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001317 Py_DECREF(list);
1318 return NULL;
1319}
1320
Thomas Wouters477c8d52006-05-27 19:21:47 +00001321Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001322rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001323{
Christian Heimes895627f2007-12-08 17:28:33 +00001324 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001325 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001326 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001327 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001328
1329 if (list == NULL)
1330 return NULL;
1331
Thomas Wouters477c8d52006-05-27 19:21:47 +00001332 i = j = len - 1;
1333 while ((i >= 0) && (maxcount-- > 0)) {
1334 for (; i >= 0; i--) {
1335 if (s[i] == ch) {
1336 SPLIT_ADD(s, i + 1, j + 1);
1337 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001338 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001339 }
1340 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001341 }
Christian Heimes895627f2007-12-08 17:28:33 +00001342 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1343 /* ch not in self, so just use self as list[0] */
1344 Py_INCREF(self);
1345 PyList_SET_ITEM(list, 0, (PyObject *)self);
1346 count++;
1347 }
1348 else if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001349 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001350 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001351 FIX_PREALLOC_SIZE(list);
1352 if (PyList_Reverse(list) < 0)
1353 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001354 return list;
1355
1356 onError:
1357 Py_DECREF(list);
1358 return NULL;
1359}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001360
1361PyDoc_STRVAR(rsplit__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001362"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001363\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001364Return a list of the sections in B, using sep as the delimiter,\n\
1365starting at the end of B and working to the front.\n\
1366If sep is not given, B is split on ASCII whitespace characters\n\
1367(space, tab, return, newline, formfeed, vertical tab).\n\
1368If maxsplit is given, at most maxsplit splits are done.");
1369
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001370
1371static PyObject *
1372string_rsplit(PyStringObject *self, PyObject *args)
1373{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001374 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001375 Py_ssize_t maxsplit = -1, count=0;
Christian Heimes895627f2007-12-08 17:28:33 +00001376 const char *s, *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001377 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001378 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001379
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001380 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001381 return NULL;
1382 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001383 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001384 if (subobj == Py_None)
Christian Heimes895627f2007-12-08 17:28:33 +00001385 return rsplit_whitespace(self, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001386 if (_getbuffer(subobj, &vsub) < 0)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001387 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001388 sub = vsub.buf;
1389 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001390
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001391 if (n == 0) {
1392 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001393 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001394 return NULL;
1395 }
Christian Heimes895627f2007-12-08 17:28:33 +00001396 else if (n == 1)
1397 return rsplit_char(self, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001398
Thomas Wouters477c8d52006-05-27 19:21:47 +00001399 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001400 if (list == NULL) {
1401 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001402 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001403 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001404
1405 j = len;
1406 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001407
Christian Heimes895627f2007-12-08 17:28:33 +00001408 s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001409 while ( (i >= 0) && (maxsplit-- > 0) ) {
1410 for (; i>=0; i--) {
1411 if (Py_STRING_MATCH(s, i, sub, n)) {
1412 SPLIT_ADD(s, i + n, j);
1413 j = i;
1414 i -= n;
1415 break;
1416 }
1417 }
1418 }
1419 SPLIT_ADD(s, 0, j);
1420 FIX_PREALLOC_SIZE(list);
1421 if (PyList_Reverse(list) < 0)
1422 goto onError;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001423 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001424 return list;
1425
Thomas Wouters477c8d52006-05-27 19:21:47 +00001426onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001427 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001428 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001429 return NULL;
1430}
1431
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001432#undef SPLIT_ADD
1433#undef MAX_PREALLOC
1434#undef PREALLOC_SIZE
1435
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001437PyDoc_STRVAR(join__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001438"B.join(iterable_of_bytes) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001440Concatenates any number of bytes objects, with B in between each pair.\n\
1441Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001442
1443static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00001444string_join(PyObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001445{
1446 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001447 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001448 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001450 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001451 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001452 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001453 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454
Tim Peters19fe14e2001-01-19 03:03:47 +00001455 seq = PySequence_Fast(orig, "");
1456 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001457 return NULL;
1458 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001459
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001460 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001461 if (seqlen == 0) {
1462 Py_DECREF(seq);
1463 return PyString_FromString("");
1464 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001465 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001466 item = PySequence_Fast_GET_ITEM(seq, 0);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001467 if (PyString_CheckExact(item)) {
Raymond Hettinger674f2412004-08-23 23:23:54 +00001468 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001469 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001470 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001471 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001472 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001473
Raymond Hettinger674f2412004-08-23 23:23:54 +00001474 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001475 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001476 * Do a pre-pass to figure out the total amount of space we'll
Guido van Rossum98297ee2007-11-06 21:34:58 +00001477 * need (sz), and see whether all argument are bytes.
Tim Peters19fe14e2001-01-19 03:03:47 +00001478 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001479 /* XXX Shouldn't we use _getbuffer() on these items instead? */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001480 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001481 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001482 item = PySequence_Fast_GET_ITEM(seq, i);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001483 if (!PyString_Check(item) && !PyBytes_Check(item)) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001484 PyErr_Format(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001485 "sequence item %zd: expected bytes,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001486 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001487 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001488 Py_DECREF(seq);
1489 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001490 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001491 sz += Py_Size(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001492 if (i != 0)
1493 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001494 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001495 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001496 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001497 Py_DECREF(seq);
1498 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001500 }
1501
1502 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001503 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001504 if (res == NULL) {
1505 Py_DECREF(seq);
1506 return NULL;
1507 }
1508
1509 /* Catenate everything. */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001510 /* I'm not worried about a PyBytes item growing because there's
1511 nowhere in this function where we release the GIL. */
Tim Peters19fe14e2001-01-19 03:03:47 +00001512 p = PyString_AS_STRING(res);
1513 for (i = 0; i < seqlen; ++i) {
1514 size_t n;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001515 char *q;
1516 if (i) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001517 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001518 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001519 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001520 item = PySequence_Fast_GET_ITEM(seq, i);
1521 n = Py_Size(item);
1522 if (PyString_Check(item))
1523 q = PyString_AS_STRING(item);
1524 else
1525 q = PyBytes_AS_STRING(item);
1526 Py_MEMCPY(p, q, n);
1527 p += n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001528 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001529
Jeremy Hylton49048292000-07-11 03:28:17 +00001530 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001532}
1533
Tim Peters52e155e2001-06-16 05:42:57 +00001534PyObject *
1535_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001536{
Tim Petersa7259592001-06-16 05:11:17 +00001537 assert(sep != NULL && PyString_Check(sep));
1538 assert(x != NULL);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001539 return string_join(sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001540}
1541
Thomas Wouters477c8d52006-05-27 19:21:47 +00001542Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001543string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001544{
1545 if (*end > len)
1546 *end = len;
1547 else if (*end < 0)
1548 *end += len;
1549 if (*end < 0)
1550 *end = 0;
1551 if (*start < 0)
1552 *start += len;
1553 if (*start < 0)
1554 *start = 0;
1555}
1556
Thomas Wouters477c8d52006-05-27 19:21:47 +00001557Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001558string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001559{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001560 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001561 const char *sub;
1562 Py_ssize_t sub_len;
1563 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes9cd17752007-11-18 19:35:23 +00001564 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565
Christian Heimes9cd17752007-11-18 19:35:23 +00001566 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1567 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001568 return -2;
Christian Heimes9cd17752007-11-18 19:35:23 +00001569 /* To support None in "start" and "end" arguments, meaning
1570 the same as if they were not passed.
1571 */
1572 if (obj_start != Py_None)
1573 if (!_PyEval_SliceIndex(obj_start, &start))
1574 return -2;
1575 if (obj_end != Py_None)
1576 if (!_PyEval_SliceIndex(obj_end, &end))
1577 return -2;
1578
Guido van Rossum4c08d552000-03-10 22:55:18 +00001579 if (PyString_Check(subobj)) {
1580 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001581 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001582 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001583 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001584 /* XXX - the "expected a character buffer object" is pretty
1585 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586 return -2;
1587
Thomas Wouters477c8d52006-05-27 19:21:47 +00001588 if (dir > 0)
1589 return stringlib_find_slice(
1590 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1591 sub, sub_len, start, end);
1592 else
1593 return stringlib_rfind_slice(
1594 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1595 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001596}
1597
1598
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001599PyDoc_STRVAR(find__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001600"B.find(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001601\n\
1602Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001603such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604arguments start and end are interpreted as in slice notation.\n\
1605\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001606Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607
1608static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001609string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001610{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001611 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612 if (result == -2)
1613 return NULL;
Christian Heimes217cfd12007-12-02 14:31:20 +00001614 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615}
1616
1617
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001618PyDoc_STRVAR(index__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001619"B.index(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001620\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001621Like B.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622
1623static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001624string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001626 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001627 if (result == -2)
1628 return NULL;
1629 if (result == -1) {
1630 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001631 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632 return NULL;
1633 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001634 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635}
1636
1637
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001638PyDoc_STRVAR(rfind__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001639"B.rfind(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001641Return the highest index in B where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001642such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643arguments start and end are interpreted as in slice notation.\n\
1644\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001645Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001646
1647static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001648string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001649{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001650 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651 if (result == -2)
1652 return NULL;
Christian Heimes217cfd12007-12-02 14:31:20 +00001653 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654}
1655
1656
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001657PyDoc_STRVAR(rindex__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001658"B.rindex(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001659\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001660Like B.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001661
1662static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001663string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001664{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001665 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001666 if (result == -2)
1667 return NULL;
1668 if (result == -1) {
1669 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001670 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001671 return NULL;
1672 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001673 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001674}
1675
1676
Thomas Wouters477c8d52006-05-27 19:21:47 +00001677Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001678do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1679{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001680 Py_buffer vsep;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001681 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001682 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001683 char *sep;
1684 Py_ssize_t seplen;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001685 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001686
Guido van Rossum98297ee2007-11-06 21:34:58 +00001687 if (_getbuffer(sepobj, &vsep) < 0)
1688 return NULL;
1689 sep = vsep.buf;
1690 seplen = vsep.len;
1691
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001692 i = 0;
1693 if (striptype != RIGHTSTRIP) {
1694 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1695 i++;
1696 }
1697 }
1698
1699 j = len;
1700 if (striptype != LEFTSTRIP) {
1701 do {
1702 j--;
1703 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1704 j++;
1705 }
1706
Guido van Rossum98297ee2007-11-06 21:34:58 +00001707 PyObject_ReleaseBuffer(sepobj, &vsep);
1708
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001709 if (i == 0 && j == len && PyString_CheckExact(self)) {
1710 Py_INCREF(self);
1711 return (PyObject*)self;
1712 }
1713 else
1714 return PyString_FromStringAndSize(s+i, j-i);
1715}
1716
1717
Thomas Wouters477c8d52006-05-27 19:21:47 +00001718Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001719do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720{
1721 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001722 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724 i = 0;
1725 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001726 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727 i++;
1728 }
1729 }
1730
1731 j = len;
1732 if (striptype != LEFTSTRIP) {
1733 do {
1734 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001735 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736 j++;
1737 }
1738
Tim Peters8fa5dd02001-09-12 02:18:30 +00001739 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740 Py_INCREF(self);
1741 return (PyObject*)self;
1742 }
1743 else
1744 return PyString_FromStringAndSize(s+i, j-i);
1745}
1746
1747
Thomas Wouters477c8d52006-05-27 19:21:47 +00001748Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001749do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1750{
1751 PyObject *sep = NULL;
1752
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001753 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001754 return NULL;
1755
1756 if (sep != NULL && sep != Py_None) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001757 return do_xstrip(self, striptype, sep);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001758 }
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001759 return do_strip(self, striptype);
1760}
1761
1762
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001763PyDoc_STRVAR(strip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001764"B.strip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001766Strip leading and trailing bytes contained in the argument.\n\
1767If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001768static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001769string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001771 if (PyTuple_GET_SIZE(args) == 0)
1772 return do_strip(self, BOTHSTRIP); /* Common case */
1773 else
1774 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775}
1776
1777
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001778PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001779"B.lstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001781Strip leading bytes contained in the argument.\n\
1782If the argument is omitted, strip leading ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001783static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001784string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001786 if (PyTuple_GET_SIZE(args) == 0)
1787 return do_strip(self, LEFTSTRIP); /* Common case */
1788 else
1789 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790}
1791
1792
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001793PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001794"B.rstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001796Strip trailing bytes contained in the argument.\n\
1797If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001798static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001799string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001801 if (PyTuple_GET_SIZE(args) == 0)
1802 return do_strip(self, RIGHTSTRIP); /* Common case */
1803 else
1804 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805}
1806
1807
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001808PyDoc_STRVAR(count__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001809"B.count(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001811Return the number of non-overlapping occurrences of substring sub in\n\
1812string S[start:end]. Optional arguments start and end are interpreted\n\
1813as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814
1815static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001816string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001818 PyObject *sub_obj;
1819 const char *str = PyString_AS_STRING(self), *sub;
1820 Py_ssize_t sub_len;
1821 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822
Thomas Wouters477c8d52006-05-27 19:21:47 +00001823 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1824 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001826
Thomas Wouters477c8d52006-05-27 19:21:47 +00001827 if (PyString_Check(sub_obj)) {
1828 sub = PyString_AS_STRING(sub_obj);
1829 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001830 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001831 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001832 return NULL;
1833
Thomas Wouters477c8d52006-05-27 19:21:47 +00001834 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001835
Christian Heimes217cfd12007-12-02 14:31:20 +00001836 return PyLong_FromSsize_t(
Thomas Wouters477c8d52006-05-27 19:21:47 +00001837 stringlib_count(str + start, end - start, sub, sub_len)
1838 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839}
1840
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001841
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001842PyDoc_STRVAR(translate__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001843"B.translate(table[, deletechars]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001845Return a copy of B, where all characters occurring in the\n\
1846optional argument deletechars are removed, and the remaining\n\
1847characters have been mapped through the given translation\n\
1848table, which must be a bytes object of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849
1850static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001851string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001852{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001853 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001854 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001855 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001857 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001858 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001859 PyObject *result;
1860 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001861 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001862
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00001863 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001864 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001865 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001866
1867 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00001868 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001869 tablen = PyString_GET_SIZE(tableobj);
1870 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001871 else if (tableobj == Py_None) {
1872 table = NULL;
1873 tablen = 256;
1874 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001875 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001876 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001877
Martin v. Löwis00b61272002-12-12 20:03:19 +00001878 if (tablen != 256) {
1879 PyErr_SetString(PyExc_ValueError,
1880 "translation table must be 256 characters long");
1881 return NULL;
1882 }
1883
Guido van Rossum4c08d552000-03-10 22:55:18 +00001884 if (delobj != NULL) {
1885 if (PyString_Check(delobj)) {
1886 del_table = PyString_AS_STRING(delobj);
1887 dellen = PyString_GET_SIZE(delobj);
1888 }
1889 else if (PyUnicode_Check(delobj)) {
1890 PyErr_SetString(PyExc_TypeError,
1891 "deletions are implemented differently for unicode");
1892 return NULL;
1893 }
1894 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1895 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001896 }
1897 else {
1898 del_table = NULL;
1899 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900 }
1901
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001902 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903 result = PyString_FromStringAndSize((char *)NULL, inlen);
1904 if (result == NULL)
1905 return NULL;
1906 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001907 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908
Guido van Rossumd8faa362007-04-27 19:54:29 +00001909 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910 /* If no deletions are required, use faster code */
1911 for (i = inlen; --i >= 0; ) {
1912 c = Py_CHARMASK(*input++);
1913 if (Py_CHARMASK((*output++ = table[c])) != c)
1914 changed = 1;
1915 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001916 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917 return result;
1918 Py_DECREF(result);
1919 Py_INCREF(input_obj);
1920 return input_obj;
1921 }
1922
Guido van Rossumd8faa362007-04-27 19:54:29 +00001923 if (table == NULL) {
1924 for (i = 0; i < 256; i++)
1925 trans_table[i] = Py_CHARMASK(i);
1926 } else {
1927 for (i = 0; i < 256; i++)
1928 trans_table[i] = Py_CHARMASK(table[i]);
1929 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930
1931 for (i = 0; i < dellen; i++)
1932 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1933
1934 for (i = inlen; --i >= 0; ) {
1935 c = Py_CHARMASK(*input++);
1936 if (trans_table[c] != -1)
1937 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1938 continue;
1939 changed = 1;
1940 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001941 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942 Py_DECREF(result);
1943 Py_INCREF(input_obj);
1944 return input_obj;
1945 }
1946 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001947 if (inlen > 0)
1948 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949 return result;
1950}
1951
1952
Thomas Wouters477c8d52006-05-27 19:21:47 +00001953#define FORWARD 1
1954#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955
Thomas Wouters477c8d52006-05-27 19:21:47 +00001956/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957
Thomas Wouters477c8d52006-05-27 19:21:47 +00001958#define findchar(target, target_len, c) \
1959 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960
Thomas Wouters477c8d52006-05-27 19:21:47 +00001961/* String ops must return a string. */
1962/* If the object is subclass of string, create a copy */
1963Py_LOCAL(PyStringObject *)
1964return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001966 if (PyString_CheckExact(self)) {
1967 Py_INCREF(self);
1968 return self;
1969 }
1970 return (PyStringObject *)PyString_FromStringAndSize(
1971 PyString_AS_STRING(self),
1972 PyString_GET_SIZE(self));
1973}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974
Thomas Wouters477c8d52006-05-27 19:21:47 +00001975Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001976countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001977{
1978 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001979 const char *start=target;
1980 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981
Thomas Wouters477c8d52006-05-27 19:21:47 +00001982 while ( (start=findchar(start, end-start, c)) != NULL ) {
1983 count++;
1984 if (count >= maxcount)
1985 break;
1986 start += 1;
1987 }
1988 return count;
1989}
1990
1991Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001992findstring(const char *target, Py_ssize_t target_len,
1993 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00001994 Py_ssize_t start,
1995 Py_ssize_t end,
1996 int direction)
1997{
1998 if (start < 0) {
1999 start += target_len;
2000 if (start < 0)
2001 start = 0;
2002 }
2003 if (end > target_len) {
2004 end = target_len;
2005 } else if (end < 0) {
2006 end += target_len;
2007 if (end < 0)
2008 end = 0;
2009 }
2010
2011 /* zero-length substrings always match at the first attempt */
2012 if (pattern_len == 0)
2013 return (direction > 0) ? start : end;
2014
2015 end -= pattern_len;
2016
2017 if (direction < 0) {
2018 for (; end >= start; end--)
2019 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2020 return end;
2021 } else {
2022 for (; start <= end; start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002023 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
Thomas Wouters477c8d52006-05-27 19:21:47 +00002024 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025 }
2026 return -1;
2027}
2028
Thomas Wouters477c8d52006-05-27 19:21:47 +00002029Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002030countstring(const char *target, Py_ssize_t target_len,
2031 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002032 Py_ssize_t start,
2033 Py_ssize_t end,
2034 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002036 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037
Thomas Wouters477c8d52006-05-27 19:21:47 +00002038 if (start < 0) {
2039 start += target_len;
2040 if (start < 0)
2041 start = 0;
2042 }
2043 if (end > target_len) {
2044 end = target_len;
2045 } else if (end < 0) {
2046 end += target_len;
2047 if (end < 0)
2048 end = 0;
2049 }
2050
2051 /* zero-length substrings match everywhere */
2052 if (pattern_len == 0 || maxcount == 0) {
2053 if (target_len+1 < maxcount)
2054 return target_len+1;
2055 return maxcount;
2056 }
2057
2058 end -= pattern_len;
2059 if (direction < 0) {
2060 for (; (end >= start); end--)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002061 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002062 count++;
2063 if (--maxcount <= 0) break;
2064 end -= pattern_len-1;
2065 }
2066 } else {
2067 for (; (start <= end); start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002068 if (Py_STRING_MATCH(target, start,
2069 pattern, pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002070 count++;
2071 if (--maxcount <= 0)
2072 break;
2073 start += pattern_len-1;
2074 }
2075 }
2076 return count;
2077}
2078
2079
2080/* Algorithms for different cases of string replacement */
2081
2082/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2083Py_LOCAL(PyStringObject *)
2084replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002085 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002086 Py_ssize_t maxcount)
2087{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002088 char *self_s, *result_s;
2089 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002090 Py_ssize_t count, i, product;
2091 PyStringObject *result;
2092
2093 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002094
Thomas Wouters477c8d52006-05-27 19:21:47 +00002095 /* 1 at the end plus 1 after every character */
2096 count = self_len+1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002097 if (maxcount < count)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002098 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002099
Thomas Wouters477c8d52006-05-27 19:21:47 +00002100 /* Check for overflow */
2101 /* result_len = count * to_len + self_len; */
2102 product = count * to_len;
2103 if (product / to_len != count) {
2104 PyErr_SetString(PyExc_OverflowError,
2105 "replace string is too long");
2106 return NULL;
2107 }
2108 result_len = product + self_len;
2109 if (result_len < 0) {
2110 PyErr_SetString(PyExc_OverflowError,
2111 "replace string is too long");
2112 return NULL;
2113 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002114
Thomas Wouters477c8d52006-05-27 19:21:47 +00002115 if (! (result = (PyStringObject *)
2116 PyString_FromStringAndSize(NULL, result_len)) )
2117 return NULL;
2118
2119 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002120 result_s = PyString_AS_STRING(result);
2121
2122 /* TODO: special case single character, which doesn't need memcpy */
2123
2124 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002125 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002126 result_s += to_len;
2127 count -= 1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002128
Thomas Wouters477c8d52006-05-27 19:21:47 +00002129 for (i=0; i<count; i++) {
2130 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002131 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002132 result_s += to_len;
2133 }
2134
2135 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002136 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002137
2138 return result;
2139}
2140
2141/* Special case for deleting a single character */
2142/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2143Py_LOCAL(PyStringObject *)
2144replace_delete_single_character(PyStringObject *self,
2145 char from_c, Py_ssize_t maxcount)
2146{
2147 char *self_s, *result_s;
2148 char *start, *next, *end;
2149 Py_ssize_t self_len, result_len;
2150 Py_ssize_t count;
2151 PyStringObject *result;
2152
2153 self_len = PyString_GET_SIZE(self);
2154 self_s = PyString_AS_STRING(self);
2155
2156 count = countchar(self_s, self_len, from_c, maxcount);
2157 if (count == 0) {
2158 return return_self(self);
2159 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002160
Thomas Wouters477c8d52006-05-27 19:21:47 +00002161 result_len = self_len - count; /* from_len == 1 */
2162 assert(result_len>=0);
2163
2164 if ( (result = (PyStringObject *)
2165 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2166 return NULL;
2167 result_s = PyString_AS_STRING(result);
2168
2169 start = self_s;
2170 end = self_s + self_len;
2171 while (count-- > 0) {
2172 next = findchar(start, end-start, from_c);
2173 if (next == NULL)
2174 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002175 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002176 result_s += (next-start);
2177 start = next+1;
2178 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002179 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002180
Thomas Wouters477c8d52006-05-27 19:21:47 +00002181 return result;
2182}
2183
2184/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2185
2186Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002187replace_delete_substring(PyStringObject *self,
2188 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002189 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002190 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002191 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002192 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002193 Py_ssize_t count, offset;
2194 PyStringObject *result;
2195
2196 self_len = PyString_GET_SIZE(self);
2197 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002198
2199 count = countstring(self_s, self_len,
2200 from_s, from_len,
2201 0, self_len, 1,
2202 maxcount);
2203
2204 if (count == 0) {
2205 /* no matches */
2206 return return_self(self);
2207 }
2208
2209 result_len = self_len - (count * from_len);
2210 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002211
Thomas Wouters477c8d52006-05-27 19:21:47 +00002212 if ( (result = (PyStringObject *)
2213 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2214 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002215
Thomas Wouters477c8d52006-05-27 19:21:47 +00002216 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002217
Thomas Wouters477c8d52006-05-27 19:21:47 +00002218 start = self_s;
2219 end = self_s + self_len;
2220 while (count-- > 0) {
2221 offset = findstring(start, end-start,
2222 from_s, from_len,
2223 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224 if (offset == -1)
2225 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002226 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002227
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002228 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002229
Thomas Wouters477c8d52006-05-27 19:21:47 +00002230 result_s += (next-start);
2231 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002233 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002234 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002235}
2236
Thomas Wouters477c8d52006-05-27 19:21:47 +00002237/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2238Py_LOCAL(PyStringObject *)
2239replace_single_character_in_place(PyStringObject *self,
2240 char from_c, char to_c,
2241 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002242{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002243 char *self_s, *result_s, *start, *end, *next;
2244 Py_ssize_t self_len;
2245 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002246
Thomas Wouters477c8d52006-05-27 19:21:47 +00002247 /* The result string will be the same size */
2248 self_s = PyString_AS_STRING(self);
2249 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002250
Thomas Wouters477c8d52006-05-27 19:21:47 +00002251 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002252
Thomas Wouters477c8d52006-05-27 19:21:47 +00002253 if (next == NULL) {
2254 /* No matches; return the original string */
2255 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002257
Thomas Wouters477c8d52006-05-27 19:21:47 +00002258 /* Need to make a new string */
2259 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2260 if (result == NULL)
2261 return NULL;
2262 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002263 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002264
Thomas Wouters477c8d52006-05-27 19:21:47 +00002265 /* change everything in-place, starting with this one */
2266 start = result_s + (next-self_s);
2267 *start = to_c;
2268 start++;
2269 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002270
Thomas Wouters477c8d52006-05-27 19:21:47 +00002271 while (--maxcount > 0) {
2272 next = findchar(start, end-start, from_c);
2273 if (next == NULL)
2274 break;
2275 *next = to_c;
2276 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002277 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002278
Thomas Wouters477c8d52006-05-27 19:21:47 +00002279 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002280}
2281
Thomas Wouters477c8d52006-05-27 19:21:47 +00002282/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2283Py_LOCAL(PyStringObject *)
2284replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002285 const char *from_s, Py_ssize_t from_len,
2286 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002287 Py_ssize_t maxcount)
2288{
2289 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002290 char *self_s;
2291 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002292 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002293
Thomas Wouters477c8d52006-05-27 19:21:47 +00002294 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002295
Thomas Wouters477c8d52006-05-27 19:21:47 +00002296 self_s = PyString_AS_STRING(self);
2297 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002298
Thomas Wouters477c8d52006-05-27 19:21:47 +00002299 offset = findstring(self_s, self_len,
2300 from_s, from_len,
2301 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002302 if (offset == -1) {
2303 /* No matches; return the original string */
2304 return return_self(self);
2305 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002306
Thomas Wouters477c8d52006-05-27 19:21:47 +00002307 /* Need to make a new string */
2308 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2309 if (result == NULL)
2310 return NULL;
2311 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002312 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002313
Thomas Wouters477c8d52006-05-27 19:21:47 +00002314 /* change everything in-place, starting with this one */
2315 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002316 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002317 start += from_len;
2318 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002319
Thomas Wouters477c8d52006-05-27 19:21:47 +00002320 while ( --maxcount > 0) {
2321 offset = findstring(start, end-start,
2322 from_s, from_len,
2323 0, end-start, FORWARD);
2324 if (offset==-1)
2325 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002326 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002327 start += offset+from_len;
2328 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002329
Thomas Wouters477c8d52006-05-27 19:21:47 +00002330 return result;
2331}
2332
2333/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2334Py_LOCAL(PyStringObject *)
2335replace_single_character(PyStringObject *self,
2336 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002337 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002338 Py_ssize_t maxcount)
2339{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002340 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002341 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002342 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002343 Py_ssize_t count, product;
2344 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002345
Thomas Wouters477c8d52006-05-27 19:21:47 +00002346 self_s = PyString_AS_STRING(self);
2347 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002348
Thomas Wouters477c8d52006-05-27 19:21:47 +00002349 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002350 if (count == 0) {
2351 /* no matches, return unchanged */
2352 return return_self(self);
2353 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002354
Thomas Wouters477c8d52006-05-27 19:21:47 +00002355 /* use the difference between current and new, hence the "-1" */
2356 /* result_len = self_len + count * (to_len-1) */
2357 product = count * (to_len-1);
2358 if (product / (to_len-1) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002359 PyErr_SetString(PyExc_OverflowError,
2360 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002361 return NULL;
2362 }
2363 result_len = self_len + product;
2364 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002365 PyErr_SetString(PyExc_OverflowError,
2366 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002367 return NULL;
2368 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002369
Thomas Wouters477c8d52006-05-27 19:21:47 +00002370 if ( (result = (PyStringObject *)
2371 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2372 return NULL;
2373 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002374
Thomas Wouters477c8d52006-05-27 19:21:47 +00002375 start = self_s;
2376 end = self_s + self_len;
2377 while (count-- > 0) {
2378 next = findchar(start, end-start, from_c);
Guido van Rossumae404e22007-10-26 21:46:44 +00002379 if (next == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002380 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002381
Thomas Wouters477c8d52006-05-27 19:21:47 +00002382 if (next == start) {
2383 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002384 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002385 result_s += to_len;
2386 start += 1;
2387 } else {
2388 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002389 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002390 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002391 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002392 result_s += to_len;
2393 start = next+1;
2394 }
2395 }
2396 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002397 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002398
Thomas Wouters477c8d52006-05-27 19:21:47 +00002399 return result;
2400}
2401
2402/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2403Py_LOCAL(PyStringObject *)
2404replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002405 const char *from_s, Py_ssize_t from_len,
2406 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002407 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002408 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002409 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002410 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002411 Py_ssize_t count, offset, product;
2412 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002413
Thomas Wouters477c8d52006-05-27 19:21:47 +00002414 self_s = PyString_AS_STRING(self);
2415 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002416
Thomas Wouters477c8d52006-05-27 19:21:47 +00002417 count = countstring(self_s, self_len,
2418 from_s, from_len,
2419 0, self_len, FORWARD, maxcount);
2420 if (count == 0) {
2421 /* no matches, return unchanged */
2422 return return_self(self);
2423 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002424
Thomas Wouters477c8d52006-05-27 19:21:47 +00002425 /* Check for overflow */
2426 /* result_len = self_len + count * (to_len-from_len) */
2427 product = count * (to_len-from_len);
2428 if (product / (to_len-from_len) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002429 PyErr_SetString(PyExc_OverflowError,
2430 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002431 return NULL;
2432 }
2433 result_len = self_len + product;
2434 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002435 PyErr_SetString(PyExc_OverflowError,
2436 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002437 return NULL;
2438 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002439
Thomas Wouters477c8d52006-05-27 19:21:47 +00002440 if ( (result = (PyStringObject *)
2441 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2442 return NULL;
2443 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002444
Thomas Wouters477c8d52006-05-27 19:21:47 +00002445 start = self_s;
2446 end = self_s + self_len;
2447 while (count-- > 0) {
2448 offset = findstring(start, end-start,
2449 from_s, from_len,
2450 0, end-start, FORWARD);
2451 if (offset == -1)
2452 break;
2453 next = start+offset;
2454 if (next == start) {
2455 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002456 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002457 result_s += to_len;
2458 start += from_len;
2459 } else {
2460 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002461 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002462 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002463 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002464 result_s += to_len;
2465 start = next+from_len;
2466 }
2467 }
2468 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002469 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002470
Thomas Wouters477c8d52006-05-27 19:21:47 +00002471 return result;
2472}
2473
2474
2475Py_LOCAL(PyStringObject *)
2476replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002477 const char *from_s, Py_ssize_t from_len,
2478 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002479 Py_ssize_t maxcount)
2480{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002481 if (maxcount < 0) {
2482 maxcount = PY_SSIZE_T_MAX;
2483 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2484 /* nothing to do; return the original string */
2485 return return_self(self);
2486 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002487
Thomas Wouters477c8d52006-05-27 19:21:47 +00002488 if (maxcount == 0 ||
2489 (from_len == 0 && to_len == 0)) {
2490 /* nothing to do; return the original string */
2491 return return_self(self);
2492 }
2493
2494 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002495
Thomas Wouters477c8d52006-05-27 19:21:47 +00002496 if (from_len == 0) {
2497 /* insert the 'to' string everywhere. */
2498 /* >>> "Python".replace("", ".") */
2499 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002500 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002501 }
2502
2503 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2504 /* point for an empty self string to generate a non-empty string */
2505 /* Special case so the remaining code always gets a non-empty string */
2506 if (PyString_GET_SIZE(self) == 0) {
2507 return return_self(self);
2508 }
2509
2510 if (to_len == 0) {
2511 /* delete all occurances of 'from' string */
2512 if (from_len == 1) {
2513 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002514 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002515 } else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002516 return replace_delete_substring(self, from_s,
2517 from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002518 }
2519 }
2520
2521 /* Handle special case where both strings have the same length */
2522
2523 if (from_len == to_len) {
2524 if (from_len == 1) {
2525 return replace_single_character_in_place(
2526 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002527 from_s[0],
2528 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002529 maxcount);
2530 } else {
2531 return replace_substring_in_place(
Guido van Rossum98297ee2007-11-06 21:34:58 +00002532 self, from_s, from_len, to_s, to_len,
2533 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002534 }
2535 }
2536
2537 /* Otherwise use the more generic algorithms */
2538 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002539 return replace_single_character(self, from_s[0],
2540 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002541 } else {
2542 /* len('from')>=2, len('to')>=1 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002543 return replace_substring(self, from_s, from_len, to_s, to_len,
2544 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002545 }
2546}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002547
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002548PyDoc_STRVAR(replace__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002549"B.replace(old, new[, count]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002550\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002551Return a copy of B with all occurrences of subsection\n\
Fred Draked22bb652003-10-22 02:56:40 +00002552old replaced by new. If the optional argument count is\n\
2553given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002554
2555static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002556string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002557{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002558 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002559 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002560 const char *from_s, *to_s;
2561 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002562
Thomas Wouters477c8d52006-05-27 19:21:47 +00002563 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002564 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002565
Thomas Wouters477c8d52006-05-27 19:21:47 +00002566 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002567 from_s = PyString_AS_STRING(from);
2568 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002569 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002570 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002571 return NULL;
2572
Thomas Wouters477c8d52006-05-27 19:21:47 +00002573 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002574 to_s = PyString_AS_STRING(to);
2575 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002576 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002577 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002578 return NULL;
2579
Thomas Wouters477c8d52006-05-27 19:21:47 +00002580 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002581 from_s, from_len,
2582 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002583}
2584
Thomas Wouters477c8d52006-05-27 19:21:47 +00002585/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002586
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002587/* Matches the end (direction >= 0) or start (direction < 0) of self
2588 * against substr, using the start and end arguments. Returns
2589 * -1 on error, 0 if not found and 1 if found.
2590 */
2591Py_LOCAL(int)
2592_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2593 Py_ssize_t end, int direction)
2594{
2595 Py_ssize_t len = PyString_GET_SIZE(self);
2596 Py_ssize_t slen;
2597 const char* sub;
2598 const char* str;
2599
2600 if (PyString_Check(substr)) {
2601 sub = PyString_AS_STRING(substr);
2602 slen = PyString_GET_SIZE(substr);
2603 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002604 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2605 return -1;
2606 str = PyString_AS_STRING(self);
2607
2608 string_adjust_indices(&start, &end, len);
2609
2610 if (direction < 0) {
2611 /* startswith */
2612 if (start+slen > len)
2613 return 0;
2614 } else {
2615 /* endswith */
2616 if (end-start < slen || start > len)
2617 return 0;
2618
2619 if (end-slen > start)
2620 start = end - slen;
2621 }
2622 if (end-start >= slen)
2623 return ! memcmp(str+start, sub, slen);
2624 return 0;
2625}
2626
2627
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002628PyDoc_STRVAR(startswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002629"B.startswith(prefix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002630\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002631Return True if B starts with the specified prefix, False otherwise.\n\
2632With optional start, test B beginning at that position.\n\
2633With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002634prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002635
2636static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002637string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002638{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002639 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002640 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002641 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002642 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002643
Guido van Rossumc6821402000-05-08 14:08:05 +00002644 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2645 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002646 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002647 if (PyTuple_Check(subobj)) {
2648 Py_ssize_t i;
2649 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2650 result = _string_tailmatch(self,
2651 PyTuple_GET_ITEM(subobj, i),
2652 start, end, -1);
2653 if (result == -1)
2654 return NULL;
2655 else if (result) {
2656 Py_RETURN_TRUE;
2657 }
2658 }
2659 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002660 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002661 result = _string_tailmatch(self, subobj, start, end, -1);
2662 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002663 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002664 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002665 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002666}
2667
2668
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002669PyDoc_STRVAR(endswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002670"B.endswith(suffix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002671\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002672Return True if B ends with the specified suffix, False otherwise.\n\
2673With optional start, test B beginning at that position.\n\
2674With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002675suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002676
2677static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002678string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002679{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002680 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002681 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002682 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002683 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002684
Guido van Rossumc6821402000-05-08 14:08:05 +00002685 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2686 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002687 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002688 if (PyTuple_Check(subobj)) {
2689 Py_ssize_t i;
2690 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2691 result = _string_tailmatch(self,
2692 PyTuple_GET_ITEM(subobj, i),
2693 start, end, +1);
2694 if (result == -1)
2695 return NULL;
2696 else if (result) {
2697 Py_RETURN_TRUE;
2698 }
2699 }
2700 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002701 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002702 result = _string_tailmatch(self, subobj, start, end, +1);
2703 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002704 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002705 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002706 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002707}
2708
2709
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002710PyDoc_STRVAR(decode__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002711"B.decode([encoding[, errors]]) -> object\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002712\n\
2713Decodes S using the codec registered for encoding. encoding defaults\n\
2714to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002715handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2716a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002717as well as any other name registerd with codecs.register_error that is\n\
2718able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002719
2720static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00002721string_decode(PyObject *self, PyObject *args)
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002722{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002723 const char *encoding = NULL;
2724 const char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002725
Guido van Rossum98297ee2007-11-06 21:34:58 +00002726 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2727 return NULL;
2728 if (encoding == NULL)
2729 encoding = PyUnicode_GetDefaultEncoding();
2730 return PyCodec_Decode(self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002731}
2732
2733
Guido van Rossumae404e22007-10-26 21:46:44 +00002734PyDoc_STRVAR(fromhex_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002735"bytes.fromhex(string) -> bytes\n\
Guido van Rossumae404e22007-10-26 21:46:44 +00002736\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002737Create a bytes object from a string of hexadecimal numbers.\n\
2738Spaces between two numbers are accepted.\n\
2739Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Guido van Rossumae404e22007-10-26 21:46:44 +00002740
2741static int
2742hex_digit_to_int(Py_UNICODE c)
2743{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002744 if (c >= 128)
2745 return -1;
2746 if (ISDIGIT(c))
2747 return c - '0';
2748 else {
2749 if (ISUPPER(c))
2750 c = TOLOWER(c);
2751 if (c >= 'a' && c <= 'f')
2752 return c - 'a' + 10;
2753 }
2754 return -1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002755}
2756
2757static PyObject *
2758string_fromhex(PyObject *cls, PyObject *args)
2759{
2760 PyObject *newstring, *hexobj;
2761 char *buf;
2762 Py_UNICODE *hex;
2763 Py_ssize_t hexlen, byteslen, i, j;
2764 int top, bot;
2765
2766 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2767 return NULL;
2768 assert(PyUnicode_Check(hexobj));
2769 hexlen = PyUnicode_GET_SIZE(hexobj);
2770 hex = PyUnicode_AS_UNICODE(hexobj);
2771 byteslen = hexlen/2; /* This overestimates if there are spaces */
2772 newstring = PyString_FromStringAndSize(NULL, byteslen);
2773 if (!newstring)
2774 return NULL;
2775 buf = PyString_AS_STRING(newstring);
2776 for (i = j = 0; i < hexlen; i += 2) {
2777 /* skip over spaces in the input */
2778 while (hex[i] == ' ')
2779 i++;
2780 if (i >= hexlen)
2781 break;
2782 top = hex_digit_to_int(hex[i]);
2783 bot = hex_digit_to_int(hex[i+1]);
2784 if (top == -1 || bot == -1) {
2785 PyErr_Format(PyExc_ValueError,
2786 "non-hexadecimal number found in "
2787 "fromhex() arg at position %zd", i);
2788 goto error;
2789 }
2790 buf[j++] = (top << 4) + bot;
2791 }
2792 if (_PyString_Resize(&newstring, j) < 0)
2793 goto error;
2794 return newstring;
2795
2796 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00002797 Py_XDECREF(newstring);
Guido van Rossumae404e22007-10-26 21:46:44 +00002798 return NULL;
2799}
2800
2801
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002802static PyObject *
2803string_getnewargs(PyStringObject *v)
2804{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002805 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002806}
2807
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002808
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002809static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002810string_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00002811 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002812 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2813 _Py_capitalize__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002814 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002815 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002816 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002817 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002818 endswith__doc__},
2819 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2820 expandtabs__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002821 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002822 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2823 fromhex_doc},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002824 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002825 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2826 _Py_isalnum__doc__},
2827 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2828 _Py_isalpha__doc__},
2829 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2830 _Py_isdigit__doc__},
2831 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2832 _Py_islower__doc__},
2833 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2834 _Py_isspace__doc__},
2835 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2836 _Py_istitle__doc__},
2837 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2838 _Py_isupper__doc__},
2839 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2840 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2841 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002842 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002843 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002844 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2845 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2846 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002847 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00002848 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2849 rpartition__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002850 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2851 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2852 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2853 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2854 splitlines__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002855 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002856 startswith__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002857 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002858 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2859 _Py_swapcase__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002860 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002861 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2862 translate__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002863 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002864 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002865 {NULL, NULL} /* sentinel */
2866};
2867
Jeremy Hylton938ace62002-07-17 16:30:39 +00002868static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002869str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2870
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002871static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002872string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002873{
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002874 PyObject *x = NULL, *it;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002875 const char *encoding = NULL;
2876 const char *errors = NULL;
2877 PyObject *new = NULL;
2878 Py_ssize_t i, size;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002879 static char *kwlist[] = {"source", "encoding", "errors", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00002880
Guido van Rossumae960af2001-08-30 03:11:59 +00002881 if (type != &PyString_Type)
2882 return str_subtype_new(type, args, kwds);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002883 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002884 &encoding, &errors))
Tim Peters6d6c1a32001-08-02 04:15:00 +00002885 return NULL;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002886 if (x == NULL) {
2887 if (encoding != NULL || errors != NULL) {
2888 PyErr_SetString(PyExc_TypeError,
2889 "encoding or errors without sequence "
2890 "argument");
2891 return NULL;
2892 }
Tim Peters6d6c1a32001-08-02 04:15:00 +00002893 return PyString_FromString("");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002894 }
2895
2896 if (PyUnicode_Check(x)) {
2897 /* Encode via the codec registry */
2898 if (encoding == NULL) {
2899 PyErr_SetString(PyExc_TypeError,
2900 "string argument without an encoding");
2901 return NULL;
2902 }
2903 new = PyCodec_Encode(x, encoding, errors);
2904 if (new == NULL)
2905 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002906 assert(PyString_Check(new));
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002907 return new;
2908 }
2909
2910 /* If it's not unicode, there can't be encoding or errors */
2911 if (encoding != NULL || errors != NULL) {
2912 PyErr_SetString(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002913 "encoding or errors without a string argument");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002914 return NULL;
2915 }
2916
Guido van Rossum98297ee2007-11-06 21:34:58 +00002917 /* Is it an int? */
2918 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2919 if (size == -1 && PyErr_Occurred()) {
2920 PyErr_Clear();
2921 }
2922 else {
2923 if (size < 0) {
2924 PyErr_SetString(PyExc_ValueError, "negative count");
2925 return NULL;
2926 }
2927 new = PyString_FromStringAndSize(NULL, size);
2928 if (new == NULL) {
2929 return NULL;
2930 }
2931 if (size > 0) {
2932 memset(((PyStringObject*)new)->ob_sval, 0, size);
2933 }
2934 return new;
2935 }
2936
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002937 /* Use the modern buffer interface */
2938 if (PyObject_CheckBuffer(x)) {
2939 Py_buffer view;
2940 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2941 return NULL;
2942 new = PyString_FromStringAndSize(NULL, view.len);
2943 if (!new)
2944 goto fail;
2945 // XXX(brett.cannon): Better way to get to internal buffer?
2946 if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
2947 &view, view.len, 'C') < 0)
2948 goto fail;
2949 PyObject_ReleaseBuffer(x, &view);
2950 return new;
2951 fail:
2952 Py_XDECREF(new);
2953 PyObject_ReleaseBuffer(x, &view);
2954 return NULL;
2955 }
2956
Guido van Rossum98297ee2007-11-06 21:34:58 +00002957 /* For iterator version, create a string object and resize as needed */
2958 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2959 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2960 input being a truly long iterator. */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002961 size = 64;
2962 new = PyString_FromStringAndSize(NULL, size);
2963 if (new == NULL)
2964 return NULL;
2965
2966 /* XXX Optimize this if the arguments is a list, tuple */
2967
2968 /* Get the iterator */
2969 it = PyObject_GetIter(x);
2970 if (it == NULL)
2971 goto error;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002972
2973 /* Run the iterator to exhaustion */
2974 for (i = 0; ; i++) {
2975 PyObject *item;
2976 Py_ssize_t value;
2977
2978 /* Get the next item */
Georg Brandl5fb8eb92007-11-24 20:42:02 +00002979 item = PyIter_Next(it);
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002980 if (item == NULL) {
Georg Brandl5fb8eb92007-11-24 20:42:02 +00002981 if (PyErr_Occurred())
2982 goto error;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002983 break;
2984 }
2985
2986 /* Interpret it as an int (__index__) */
2987 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2988 Py_DECREF(item);
2989 if (value == -1 && PyErr_Occurred())
2990 goto error;
2991
2992 /* Range check */
2993 if (value < 0 || value >= 256) {
2994 PyErr_SetString(PyExc_ValueError,
2995 "bytes must be in range(0, 256)");
2996 goto error;
2997 }
2998
2999 /* Append the byte */
3000 if (i >= size) {
3001 size *= 2;
3002 if (_PyString_Resize(&new, size) < 0)
3003 goto error;
3004 }
3005 ((PyStringObject *)new)->ob_sval[i] = value;
3006 }
3007 _PyString_Resize(&new, i);
3008
3009 /* Clean up and return success */
3010 Py_DECREF(it);
3011 return new;
3012
3013 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00003014 /* Error handling when new != NULL */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003015 Py_XDECREF(it);
3016 Py_DECREF(new);
3017 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003018}
3019
Guido van Rossumae960af2001-08-30 03:11:59 +00003020static PyObject *
3021str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3022{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003023 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003024 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003025
3026 assert(PyType_IsSubtype(type, &PyString_Type));
3027 tmp = string_new(&PyString_Type, args, kwds);
3028 if (tmp == NULL)
3029 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003030 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003031 n = PyString_GET_SIZE(tmp);
3032 pnew = type->tp_alloc(type, n);
3033 if (pnew != NULL) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003034 Py_MEMCPY(PyString_AS_STRING(pnew),
3035 PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003036 ((PyStringObject *)pnew)->ob_shash =
3037 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003038 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003039 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003040 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003041}
3042
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003043PyDoc_STRVAR(string_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00003044"bytes(iterable_of_ints) -> bytes.\n\
3045bytes(string, encoding[, errors]) -> bytes\n\
3046bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
3047bytes(memory_view) -> bytes.\n\
Tim Peters6d6c1a32001-08-02 04:15:00 +00003048\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003049Construct an immutable array of bytes from:\n\
3050 - an iterable yielding integers in range(256)\n\
3051 - a text string encoded using the specified encoding\n\
3052 - a bytes or a buffer object\n\
3053 - any object implementing the buffer API.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003054
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003055static PyObject *str_iter(PyObject *seq);
3056
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003057PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003058 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum98297ee2007-11-06 21:34:58 +00003059 "bytes",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003060 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003061 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003062 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003063 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003064 0, /* tp_getattr */
3065 0, /* tp_setattr */
3066 0, /* tp_compare */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003067 (reprfunc)string_repr, /* tp_repr */
3068 0, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003069 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003070 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003071 (hashfunc)string_hash, /* tp_hash */
3072 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003073 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003074 PyObject_GenericGetAttr, /* tp_getattro */
3075 0, /* tp_setattro */
3076 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003077 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3078 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003079 string_doc, /* tp_doc */
3080 0, /* tp_traverse */
3081 0, /* tp_clear */
3082 (richcmpfunc)string_richcompare, /* tp_richcompare */
3083 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003084 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003085 0, /* tp_iternext */
3086 string_methods, /* tp_methods */
3087 0, /* tp_members */
3088 0, /* tp_getset */
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003089 &PyBaseObject_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003090 0, /* tp_dict */
3091 0, /* tp_descr_get */
3092 0, /* tp_descr_set */
3093 0, /* tp_dictoffset */
3094 0, /* tp_init */
3095 0, /* tp_alloc */
3096 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003097 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003098};
3099
3100void
Fred Drakeba096332000-07-09 07:04:36 +00003101PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003102{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003103 register PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003104 assert(pv != NULL);
Guido van Rossum013142a1994-08-30 08:19:36 +00003105 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003106 return;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003107 if (w == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003108 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003109 *pv = NULL;
3110 return;
3111 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00003112 v = string_concat(*pv, w);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003113 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003114 *pv = v;
3115}
3116
Guido van Rossum013142a1994-08-30 08:19:36 +00003117void
Fred Drakeba096332000-07-09 07:04:36 +00003118PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003119{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003120 PyString_Concat(pv, w);
3121 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003122}
3123
3124
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003125/* The following function breaks the notion that strings are immutable:
3126 it changes the size of a string. We get away with this only if there
3127 is only one module referencing the object. You can also think of it
3128 as creating a new string object and destroying the old one, only
3129 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003130 already be known to some other part of the code...
3131 Note that if there's not enough memory to resize the string, the original
3132 string object at *pv is deallocated, *pv is set to NULL, an "out of
3133 memory" exception is set, and -1 is returned. Else (on success) 0 is
3134 returned, and the value in *pv may or may not be the same as on input.
3135 As always, an extra byte is allocated for a trailing \0 byte (newsize
3136 does *not* include that), and a trailing \0 byte is stored.
3137*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003138
3139int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003140_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003141{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003142 register PyObject *v;
3143 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003144 v = *pv;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003145 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003146 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003147 Py_DECREF(v);
3148 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003149 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003150 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003151 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003152 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003153 _Py_ForgetReference(v);
3154 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003155 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003156 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003157 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003158 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003159 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003160 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003161 _Py_NewReference(*pv);
3162 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003163 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003164 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003165 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003166 return 0;
3167}
Guido van Rossume5372401993-03-16 12:15:04 +00003168
Tim Peters38fd5b62000-09-21 05:43:11 +00003169/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3170 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3171 * Python's regular ints.
3172 * Return value: a new PyString*, or NULL if error.
3173 * . *pbuf is set to point into it,
3174 * *plen set to the # of chars following that.
3175 * Caller must decref it when done using pbuf.
3176 * The string starting at *pbuf is of the form
3177 * "-"? ("0x" | "0X")? digit+
3178 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003179 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003180 * There will be at least prec digits, zero-filled on the left if
3181 * necessary to get that many.
3182 * val object to be converted
3183 * flags bitmask of format flags; only F_ALT is looked at
3184 * prec minimum number of digits; 0-fill on left if needed
3185 * type a character in [duoxX]; u acts the same as d
3186 *
3187 * CAUTION: o, x and X conversions on regular ints can never
3188 * produce a '-' sign, but can for Python's unbounded ints.
3189 */
3190PyObject*
3191_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3192 char **pbuf, int *plen)
3193{
3194 PyObject *result = NULL;
3195 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003196 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003197 int sign; /* 1 if '-', else 0 */
3198 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003199 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003200 int numdigits; /* len == numnondigits + numdigits */
3201 int numnondigits = 0;
3202
Guido van Rossumddefaf32007-01-14 03:31:43 +00003203 /* Avoid exceeding SSIZE_T_MAX */
3204 if (prec > PY_SSIZE_T_MAX-3) {
3205 PyErr_SetString(PyExc_OverflowError,
3206 "precision too large");
3207 return NULL;
3208 }
3209
Tim Peters38fd5b62000-09-21 05:43:11 +00003210 switch (type) {
3211 case 'd':
3212 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00003213 /* Special-case boolean: we want 0/1 */
3214 if (PyBool_Check(val))
3215 result = PyNumber_ToBase(val, 10);
3216 else
3217 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00003218 break;
3219 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003220 numnondigits = 2;
3221 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00003222 break;
3223 case 'x':
3224 case 'X':
3225 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003226 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00003227 break;
3228 default:
3229 assert(!"'type' not in [duoxX]");
3230 }
3231 if (!result)
3232 return NULL;
3233
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003234 buf = PyUnicode_AsString(result);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00003235 if (!buf) {
3236 Py_DECREF(result);
3237 return NULL;
3238 }
3239
Tim Peters38fd5b62000-09-21 05:43:11 +00003240 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003241 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003242 PyErr_BadInternalCall();
3243 return NULL;
3244 }
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003245 llen = PyUnicode_GetSize(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00003246 if (llen > INT_MAX) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003247 PyErr_SetString(PyExc_ValueError,
3248 "string too large in _PyString_FormatLong");
Martin v. Löwis725507b2006-03-07 12:08:51 +00003249 return NULL;
3250 }
3251 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003252 if (buf[len-1] == 'L') {
3253 --len;
3254 buf[len] = '\0';
3255 }
3256 sign = buf[0] == '-';
3257 numnondigits += sign;
3258 numdigits = len - numnondigits;
3259 assert(numdigits > 0);
3260
Tim Petersfff53252001-04-12 18:38:48 +00003261 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003262 if (((flags & F_ALT) == 0 &&
3263 (type == 'o' || type == 'x' || type == 'X'))) {
3264 assert(buf[sign] == '0');
3265 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
Guido van Rossum98297ee2007-11-06 21:34:58 +00003266 buf[sign+1] == 'o');
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003267 numnondigits -= 2;
3268 buf += 2;
3269 len -= 2;
3270 if (sign)
3271 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00003272 assert(len == numnondigits + numdigits);
3273 assert(numdigits > 0);
3274 }
3275
3276 /* Fill with leading zeroes to meet minimum width. */
3277 if (prec > numdigits) {
3278 PyObject *r1 = PyString_FromStringAndSize(NULL,
3279 numnondigits + prec);
3280 char *b1;
3281 if (!r1) {
3282 Py_DECREF(result);
3283 return NULL;
3284 }
3285 b1 = PyString_AS_STRING(r1);
3286 for (i = 0; i < numnondigits; ++i)
3287 *b1++ = *buf++;
3288 for (i = 0; i < prec - numdigits; i++)
3289 *b1++ = '0';
3290 for (i = 0; i < numdigits; i++)
3291 *b1++ = *buf++;
3292 *b1 = '\0';
3293 Py_DECREF(result);
3294 result = r1;
3295 buf = PyString_AS_STRING(result);
3296 len = numnondigits + prec;
3297 }
3298
3299 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003300 if (type == 'X') {
3301 /* Need to convert all lower case letters to upper case.
3302 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003303 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003304 if (buf[i] >= 'a' && buf[i] <= 'x')
3305 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003306 }
3307 *pbuf = buf;
3308 *plen = len;
3309 return result;
3310}
3311
Guido van Rossum8cf04761997-08-02 02:57:45 +00003312void
Fred Drakeba096332000-07-09 07:04:36 +00003313PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003314{
3315 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003316 for (i = 0; i < UCHAR_MAX + 1; i++) {
3317 Py_XDECREF(characters[i]);
3318 characters[i] = NULL;
3319 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003320 Py_XDECREF(nullstring);
3321 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003322}
Barry Warsawa903ad982001-02-23 16:40:48 +00003323
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003324/*********************** Str Iterator ****************************/
3325
3326typedef struct {
3327 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00003328 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003329 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
3330} striterobject;
3331
3332static void
3333striter_dealloc(striterobject *it)
3334{
3335 _PyObject_GC_UNTRACK(it);
3336 Py_XDECREF(it->it_seq);
3337 PyObject_GC_Del(it);
3338}
3339
3340static int
3341striter_traverse(striterobject *it, visitproc visit, void *arg)
3342{
3343 Py_VISIT(it->it_seq);
3344 return 0;
3345}
3346
3347static PyObject *
3348striter_next(striterobject *it)
3349{
3350 PyStringObject *seq;
3351 PyObject *item;
3352
3353 assert(it != NULL);
3354 seq = it->it_seq;
3355 if (seq == NULL)
3356 return NULL;
3357 assert(PyString_Check(seq));
3358
3359 if (it->it_index < PyString_GET_SIZE(seq)) {
Christian Heimes217cfd12007-12-02 14:31:20 +00003360 item = PyLong_FromLong(
Guido van Rossum75a902d2007-10-19 22:06:24 +00003361 (unsigned char)seq->ob_sval[it->it_index]);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003362 if (item != NULL)
3363 ++it->it_index;
3364 return item;
3365 }
3366
3367 Py_DECREF(seq);
3368 it->it_seq = NULL;
3369 return NULL;
3370}
3371
3372static PyObject *
3373striter_len(striterobject *it)
3374{
3375 Py_ssize_t len = 0;
3376 if (it->it_seq)
3377 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
Christian Heimes217cfd12007-12-02 14:31:20 +00003378 return PyLong_FromSsize_t(len);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003379}
3380
Guido van Rossum49d6b072006-08-17 21:11:47 +00003381PyDoc_STRVAR(length_hint_doc,
3382 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003383
3384static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00003385 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3386 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003387 {NULL, NULL} /* sentinel */
3388};
3389
3390PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003391 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum254348e2007-11-21 19:29:53 +00003392 "bytes_iterator", /* tp_name */
Guido van Rossum49d6b072006-08-17 21:11:47 +00003393 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003394 0, /* tp_itemsize */
3395 /* methods */
3396 (destructor)striter_dealloc, /* tp_dealloc */
3397 0, /* tp_print */
3398 0, /* tp_getattr */
3399 0, /* tp_setattr */
3400 0, /* tp_compare */
3401 0, /* tp_repr */
3402 0, /* tp_as_number */
3403 0, /* tp_as_sequence */
3404 0, /* tp_as_mapping */
3405 0, /* tp_hash */
3406 0, /* tp_call */
3407 0, /* tp_str */
3408 PyObject_GenericGetAttr, /* tp_getattro */
3409 0, /* tp_setattro */
3410 0, /* tp_as_buffer */
3411 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3412 0, /* tp_doc */
3413 (traverseproc)striter_traverse, /* tp_traverse */
3414 0, /* tp_clear */
3415 0, /* tp_richcompare */
3416 0, /* tp_weaklistoffset */
3417 PyObject_SelfIter, /* tp_iter */
3418 (iternextfunc)striter_next, /* tp_iternext */
3419 striter_methods, /* tp_methods */
3420 0,
3421};
3422
3423static PyObject *
3424str_iter(PyObject *seq)
3425{
3426 striterobject *it;
3427
3428 if (!PyString_Check(seq)) {
3429 PyErr_BadInternalCall();
3430 return NULL;
3431 }
3432 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
3433 if (it == NULL)
3434 return NULL;
3435 it->it_index = 0;
3436 Py_INCREF(seq);
3437 it->it_seq = (PyStringObject *)seq;
3438 _PyObject_GC_TRACK(it);
3439 return (PyObject *)it;
3440}