blob: 2e729ea8219ff281e5df8bd4440ad919f93261d8 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossum98297ee2007-11-06 21:34:58 +00003/* XXX This is now called 'bytes' as far as the user is concerned.
4 Many docstrings and error messages need to be cleaned up. */
5
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00006#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00008#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000011
Guido van Rossum98297ee2007-11-06 21:34:58 +000012static Py_ssize_t
13_getbuffer(PyObject *obj, Py_buffer *view)
14{
15 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
16
17 if (buffer == NULL || buffer->bf_getbuffer == NULL)
18 {
19 PyErr_Format(PyExc_TypeError,
20 "Type %.100s doesn't support the buffer API",
21 Py_Type(obj)->tp_name);
22 return -1;
23 }
24
25 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
26 return -1;
27 return view->len;
28}
29
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000030#ifdef COUNT_ALLOCS
31int null_strings, one_strings;
32#endif
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000035static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036
37/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000038 For both PyString_FromString() and PyString_FromStringAndSize(), the
39 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000040 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000041
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000042 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000043 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 For PyString_FromStringAndSize(), the parameter the parameter `str' is
46 either NULL or else points to a string containing at least `size' bytes.
47 For PyString_FromStringAndSize(), the string in the `str' parameter does
48 not have to be null-terminated. (Therefore it is safe to construct a
49 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
50 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
51 bytes (setting the last byte to the null terminating character) and you can
52 fill in the data yourself. If `str' is non-NULL then the resulting
53 PyString object must be treated as immutable and you must not fill in nor
54 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000055
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000056 The PyObject member `op->ob_size', which denotes the number of "extra
57 items" in a variable-size object, will contain the number of bytes
58 allocated for string data, not counting the null terminating character. It
59 is therefore equal to the equal to the `size' parameter (for
60 PyString_FromStringAndSize()) or the length of the string in the `str'
61 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000064PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065{
Tim Peters9e897f42001-05-09 07:37:07 +000066 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000067 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 if (size == 0 && (op = nullstring) != NULL) {
69#ifdef COUNT_ALLOCS
70 null_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 if (size == 1 && str != NULL &&
76 (op = characters[*str & UCHAR_MAX]) != NULL)
77 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078#ifdef COUNT_ALLOCS
79 one_strings++;
80#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 Py_INCREF(op);
82 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000084
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000085 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000086 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000089 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 op->ob_shash = -1;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000091 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000092 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000094 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 if (size == 0) {
96 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 } else if (size == 1 && str != NULL) {
99 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103}
104
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000106PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Tim Peters62de65b2001-12-06 20:29:32 +0000108 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000109 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000110
111 assert(str != NULL);
112 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000113 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000114 PyErr_SetString(PyExc_OverflowError,
115 "string is too long for a Python string");
116 return NULL;
117 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 if (size == 0 && (op = nullstring) != NULL) {
119#ifdef COUNT_ALLOCS
120 null_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
125 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
126#ifdef COUNT_ALLOCS
127 one_strings++;
128#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
130 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000133 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000134 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000139 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000140 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
145 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000149}
150
Barry Warsawdadace02001-08-24 18:32:06 +0000151PyObject *
152PyString_FromFormatV(const char *format, va_list vargs)
153{
Tim Petersc15c4f12001-10-02 21:32:07 +0000154 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000155 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000156 const char* f;
157 char *s;
158 PyObject* string;
159
Tim Petersc15c4f12001-10-02 21:32:07 +0000160#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000161 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000163#ifdef __va_copy
164 __va_copy(count, vargs);
165#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000166 count = vargs;
167#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000168#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000169 /* step 1: figure out how large a buffer we need */
170 for (f = format; *f; f++) {
171 if (*f == '%') {
172 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000173 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000174 ;
175
Thomas Wouters477c8d52006-05-27 19:21:47 +0000176 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
177 * they don't affect the amount of space we reserve.
178 */
179 if ((*f == 'l' || *f == 'z') &&
180 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000181 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000182
Barry Warsawdadace02001-08-24 18:32:06 +0000183 switch (*f) {
184 case 'c':
185 (void)va_arg(count, int);
186 /* fall through... */
187 case '%':
188 n++;
189 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000191 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000192 /* 20 bytes is enough to hold a 64-bit
193 integer. Decimal takes the most space.
194 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000195 n += 20;
196 break;
197 case 's':
198 s = va_arg(count, char*);
199 n += strlen(s);
200 break;
201 case 'p':
202 (void) va_arg(count, int);
203 /* maximum 64-bit pointer representation:
204 * 0xffffffffffffffff
205 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000206 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000207 */
208 n += 19;
209 break;
210 default:
211 /* if we stumble upon an unknown
212 formatting code, copy the rest of
213 the format string to the output
214 string. (we cannot just skip the
215 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000216 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000217 n += strlen(p);
218 goto expand;
219 }
220 } else
221 n++;
222 }
223 expand:
224 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000225 /* Since we've analyzed how much space we need for the worst case,
226 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000227 string = PyString_FromStringAndSize(NULL, n);
228 if (!string)
229 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000230
Barry Warsawdadace02001-08-24 18:32:06 +0000231 s = PyString_AsString(string);
232
233 for (f = format; *f; f++) {
234 if (*f == '%') {
235 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000236 Py_ssize_t i;
237 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000238 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000239 /* parse the width.precision part (we're only
240 interested in the precision value, if any) */
241 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000242 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000243 n = (n*10) + *f++ - '0';
244 if (*f == '.') {
245 f++;
246 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000247 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000248 n = (n*10) + *f++ - '0';
249 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000250 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000251 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000252 /* handle the long flag, but only for %ld and %lu.
253 others can be added when necessary. */
254 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000255 longflag = 1;
256 ++f;
257 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000258 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000259 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000260 size_tflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000271 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000272 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
273 va_arg(vargs, Py_ssize_t));
274 else
275 sprintf(s, "%d", va_arg(vargs, int));
276 s += strlen(s);
277 break;
278 case 'u':
279 if (longflag)
280 sprintf(s, "%lu",
281 va_arg(vargs, unsigned long));
282 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000283 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
284 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000285 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000286 sprintf(s, "%u",
287 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 s += strlen(s);
289 break;
290 case 'i':
291 sprintf(s, "%i", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'x':
295 sprintf(s, "%x", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 's':
299 p = va_arg(vargs, char*);
300 i = strlen(p);
301 if (n > 0 && i > n)
302 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000303 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000304 s += i;
305 break;
306 case 'p':
307 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000308 /* %p is ill-defined: ensure leading 0x. */
309 if (s[1] == 'X')
310 s[1] = 'x';
311 else if (s[1] != 'x') {
312 memmove(s+2, s, strlen(s)+1);
313 s[0] = '0';
314 s[1] = 'x';
315 }
Barry Warsawdadace02001-08-24 18:32:06 +0000316 s += strlen(s);
317 break;
318 case '%':
319 *s++ = '%';
320 break;
321 default:
322 strcpy(s, p);
323 s += strlen(s);
324 goto end;
325 }
326 } else
327 *s++ = *f;
328 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000329
Barry Warsawdadace02001-08-24 18:32:06 +0000330 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000332 return string;
333}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000334
Barry Warsawdadace02001-08-24 18:32:06 +0000335PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000336PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000337{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000338 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000339 va_list vargs;
340
341#ifdef HAVE_STDARG_PROTOTYPES
342 va_start(vargs, format);
343#else
344 va_start(vargs);
345#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 ret = PyString_FromFormatV(format, vargs);
347 va_end(vargs);
348 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000349}
350
Guido van Rossum234f9421993-06-17 12:35:49 +0000351static void
Fred Drakeba096332000-07-09 07:04:36 +0000352string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000353{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000354 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000355}
356
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000357/* Unescape a backslash-escaped string. If unicode is non-zero,
358 the string is a u-literal. If recode_encoding is non-zero,
359 the string is UTF-8 encoded and should be re-encoded in the
360 specified encoding. */
361
362PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000363 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000364 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000365 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000366 const char *recode_encoding)
367{
368 int c;
369 char *p, *buf;
370 const char *end;
371 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000372 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000373 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000374 if (v == NULL)
375 return NULL;
376 p = buf = PyString_AsString(v);
377 end = s + len;
378 while (s < end) {
379 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000380 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000381 if (recode_encoding && (*s & 0x80)) {
382 PyObject *u, *w;
383 char *r;
384 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000385 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000386 t = s;
387 /* Decode non-ASCII bytes as UTF-8. */
388 while (t < end && (*t & 0x80)) t++;
389 u = PyUnicode_DecodeUTF8(s, t - s, errors);
390 if(!u) goto failed;
391
392 /* Recode them in target encoding. */
393 w = PyUnicode_AsEncodedString(
394 u, recode_encoding, errors);
395 Py_DECREF(u);
396 if (!w) goto failed;
397
398 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000399 assert(PyString_Check(w));
400 r = PyString_AS_STRING(w);
401 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000402 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000403 p += rn;
404 Py_DECREF(w);
405 s = t;
406 } else {
407 *p++ = *s++;
408 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000409 continue;
410 }
411 s++;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000412 if (s==end) {
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000413 PyErr_SetString(PyExc_ValueError,
414 "Trailing \\ in string");
415 goto failed;
416 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000417 switch (*s++) {
418 /* XXX This assumes ASCII! */
419 case '\n': break;
420 case '\\': *p++ = '\\'; break;
421 case '\'': *p++ = '\''; break;
422 case '\"': *p++ = '\"'; break;
423 case 'b': *p++ = '\b'; break;
424 case 'f': *p++ = '\014'; break; /* FF */
425 case 't': *p++ = '\t'; break;
426 case 'n': *p++ = '\n'; break;
427 case 'r': *p++ = '\r'; break;
428 case 'v': *p++ = '\013'; break; /* VT */
429 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
430 case '0': case '1': case '2': case '3':
431 case '4': case '5': case '6': case '7':
432 c = s[-1] - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000433 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000434 c = (c<<3) + *s++ - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000435 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000436 c = (c<<3) + *s++ - '0';
437 }
438 *p++ = c;
439 break;
440 case 'x':
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000441 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000442 unsigned int x = 0;
443 c = Py_CHARMASK(*s);
444 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000445 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000446 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000447 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000448 x = 10 + c - 'a';
449 else
450 x = 10 + c - 'A';
451 x = x << 4;
452 c = Py_CHARMASK(*s);
453 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000454 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000455 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000456 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000457 x += 10 + c - 'a';
458 else
459 x += 10 + c - 'A';
460 *p++ = x;
461 break;
462 }
463 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000464 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000465 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000466 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000467 }
468 if (strcmp(errors, "replace") == 0) {
469 *p++ = '?';
470 } else if (strcmp(errors, "ignore") == 0)
471 /* do nothing */;
472 else {
473 PyErr_Format(PyExc_ValueError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000474 "decoding error; unknown "
475 "error handling code: %.400s",
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000476 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000477 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000478 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000479 default:
480 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000481 s--;
482 goto non_esc; /* an arbitry number of unescaped
483 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000484 }
485 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000486 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000487 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000488 return v;
489 failed:
490 Py_DECREF(v);
491 return NULL;
492}
493
Thomas Wouters477c8d52006-05-27 19:21:47 +0000494/* -------------------------------------------------------------------- */
495/* object api */
496
Martin v. Löwis18e16552006-02-15 17:27:45 +0000497static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000498string_getsize(register PyObject *op)
499{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000500 char *s;
501 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000502 if (PyString_AsStringAndSize(op, &s, &len))
503 return -1;
504 return len;
505}
506
507static /*const*/ char *
508string_getbuffer(register PyObject *op)
509{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000510 char *s;
511 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000512 if (PyString_AsStringAndSize(op, &s, &len))
513 return NULL;
514 return s;
515}
516
Martin v. Löwis18e16552006-02-15 17:27:45 +0000517Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000518PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000520 if (PyUnicode_Check(op)) {
521 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
522 if (!op)
523 return -1;
524 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000525 if (!PyString_Check(op))
526 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000527 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000528}
529
530/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000531PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000532{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000533 if (PyUnicode_Check(op)) {
534 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
535 if (!op)
536 return NULL;
537 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000538 if (!PyString_Check(op))
539 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000540 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000541}
542
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000543int
544PyString_AsStringAndSize(register PyObject *obj,
545 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000547{
548 if (s == NULL) {
549 PyErr_BadInternalCall();
550 return -1;
551 }
552
553 if (!PyString_Check(obj)) {
554 if (PyUnicode_Check(obj)) {
555 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
556 if (obj == NULL)
557 return -1;
558 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000559 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000560 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000561 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000562 "expected string, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000563 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000564 return -1;
565 }
566 }
567
568 *s = PyString_AS_STRING(obj);
569 if (len != NULL)
570 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000571 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000572 PyErr_SetString(PyExc_TypeError,
573 "expected string without null bytes");
574 return -1;
575 }
576 return 0;
577}
578
Thomas Wouters477c8d52006-05-27 19:21:47 +0000579/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000580/* Methods */
581
Thomas Wouters477c8d52006-05-27 19:21:47 +0000582#define STRINGLIB_CHAR char
583
584#define STRINGLIB_CMP memcmp
585#define STRINGLIB_LEN PyString_GET_SIZE
586#define STRINGLIB_NEW PyString_FromStringAndSize
587#define STRINGLIB_STR PyString_AS_STRING
Guido van Rossum98297ee2007-11-06 21:34:58 +0000588/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000589
590#define STRINGLIB_EMPTY nullstring
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000591#define STRINGLIB_CHECK_EXACT PyString_CheckExact
592#define STRINGLIB_MUTABLE 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000593
594#include "stringlib/fastsearch.h"
595
596#include "stringlib/count.h"
597#include "stringlib/find.h"
598#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000599#include "stringlib/ctype.h"
600#include "stringlib/transmogrify.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000601
602
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603PyObject *
604PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000605{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000606 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000607 register PyStringObject* op = (PyStringObject*) obj;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000608 Py_ssize_t length = Py_Size(op);
609 size_t newsize = 3 + 4 * length;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000610 PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000611 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000612 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000613 "bytes object is too large to make repr");
Guido van Rossum58da9312007-11-10 23:39:45 +0000614 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000615 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000616 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000617 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000618 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000619 }
620 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000621 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000622 register Py_UNICODE c;
623 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000624 int quote;
625
Guido van Rossum98297ee2007-11-06 21:34:58 +0000626 /* Figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000627 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000628 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000629 char *test, *start;
630 start = PyString_AS_STRING(op);
631 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000632 if (*test == '"') {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000633 quote = '\''; /* back to single */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000634 goto decided;
635 }
636 else if (*test == '\'')
637 quote = '"';
638 }
639 decided:
640 ;
641 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000642
Guido van Rossum98297ee2007-11-06 21:34:58 +0000643 *p++ = 'b', *p++ = quote;
644 for (i = 0; i < length; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000645 /* There's at least enough room for a hex escape
646 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000647 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000648 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000649 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000650 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000651 else if (c == '\t')
652 *p++ = '\\', *p++ = 't';
653 else if (c == '\n')
654 *p++ = '\\', *p++ = 'n';
655 else if (c == '\r')
656 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000657 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000658 *p++ = '\\';
659 *p++ = 'x';
660 *p++ = hexdigits[(c & 0xf0) >> 4];
661 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000662 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000663 else
664 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000665 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000666 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000667 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000668 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000669 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
670 Py_DECREF(v);
671 return NULL;
672 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000673 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000674 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000675}
676
Guido van Rossum189f1df2001-05-01 16:51:53 +0000677static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000678string_repr(PyObject *op)
679{
680 return PyString_Repr(op, 1);
681}
682
683static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000684string_str(PyObject *op)
Guido van Rossum189f1df2001-05-01 16:51:53 +0000685{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000686 if (Py_BytesWarningFlag) {
687 if (PyErr_WarnEx(PyExc_BytesWarning,
688 "str() on a bytes instance", 1))
689 return NULL;
Tim Petersc9933152001-10-16 20:18:24 +0000690 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000691 return string_repr(op);
Guido van Rossum189f1df2001-05-01 16:51:53 +0000692}
693
Martin v. Löwis18e16552006-02-15 17:27:45 +0000694static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000695string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000697 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000698}
699
Guido van Rossum98297ee2007-11-06 21:34:58 +0000700/* This is also used by PyString_Concat() */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000701static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000702string_concat(PyObject *a, PyObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000704 Py_ssize_t size;
705 Py_buffer va, vb;
706 PyObject *result = NULL;
707
708 va.len = -1;
709 vb.len = -1;
710 if (_getbuffer(a, &va) < 0 ||
711 _getbuffer(b, &vb) < 0) {
712 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
713 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
714 goto done;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000715 }
Guido van Rossumae404e22007-10-26 21:46:44 +0000716
Guido van Rossum98297ee2007-11-06 21:34:58 +0000717 /* Optimize end cases */
718 if (va.len == 0 && PyString_CheckExact(b)) {
719 result = b;
720 Py_INCREF(result);
721 goto done;
722 }
723 if (vb.len == 0 && PyString_CheckExact(a)) {
724 result = a;
725 Py_INCREF(result);
726 goto done;
727 }
728
729 size = va.len + vb.len;
730 if (size < 0) {
731 PyErr_NoMemory();
732 goto done;
733 }
734
735 result = PyString_FromStringAndSize(NULL, size);
736 if (result != NULL) {
737 memcpy(PyString_AS_STRING(result), va.buf, va.len);
738 memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
739 }
740
741 done:
742 if (va.len != -1)
743 PyObject_ReleaseBuffer(a, &va);
744 if (vb.len != -1)
745 PyObject_ReleaseBuffer(b, &vb);
746 return result;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747}
748
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000749static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000750string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000751{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000752 register Py_ssize_t i;
753 register Py_ssize_t j;
754 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000755 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000756 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000757 if (n < 0)
758 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000759 /* watch out for overflows: the size can overflow int,
760 * and the # of bytes needed can overflow size_t
761 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000762 size = Py_Size(a) * n;
763 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000764 PyErr_SetString(PyExc_OverflowError,
765 "repeated string is too long");
766 return NULL;
767 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000768 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000769 Py_INCREF(a);
770 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000771 }
Tim Peterse7c05322004-06-27 17:24:49 +0000772 nbytes = (size_t)size;
773 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000774 PyErr_SetString(PyExc_OverflowError,
775 "repeated string is too long");
776 return NULL;
777 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000778 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000779 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000780 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000781 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000782 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000783 op->ob_shash = -1;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000784 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000785 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000786 memset(op->ob_sval, a->ob_sval[0] , n);
787 return (PyObject *) op;
788 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000789 i = 0;
790 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000791 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
792 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000793 }
794 while (i < size) {
795 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000796 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000797 i += j;
798 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000799 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800}
801
Guido van Rossum9284a572000-03-07 15:53:43 +0000802static int
Guido van Rossum98297ee2007-11-06 21:34:58 +0000803string_contains(PyObject *self, PyObject *arg)
Guido van Rossum9284a572000-03-07 15:53:43 +0000804{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000805 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
806 if (ival == -1 && PyErr_Occurred()) {
807 Py_buffer varg;
808 int pos;
809 PyErr_Clear();
810 if (_getbuffer(arg, &varg) < 0)
811 return -1;
812 pos = stringlib_find(PyString_AS_STRING(self), Py_Size(self),
813 varg.buf, varg.len, 0);
814 PyObject_ReleaseBuffer(arg, &varg);
815 return pos >= 0;
816 }
817 if (ival < 0 || ival >= 256) {
818 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
819 return -1;
820 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000821
Guido van Rossum98297ee2007-11-06 21:34:58 +0000822 return memchr(PyString_AS_STRING(self), ival, Py_Size(self)) != NULL;
823}
824
825static PyObject *
826string_item(PyStringObject *a, register Py_ssize_t i)
827{
828 if (i < 0 || i >= Py_Size(a)) {
829 PyErr_SetString(PyExc_IndexError, "string index out of range");
830 return NULL;
831 }
832 return PyInt_FromLong((unsigned char)a->ob_sval[i]);
Guido van Rossum9284a572000-03-07 15:53:43 +0000833}
834
Martin v. Löwiscd353062001-05-24 16:56:35 +0000835static PyObject*
836string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000838 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000839 Py_ssize_t len_a, len_b;
840 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +0000841 PyObject *result;
842
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000843 /* Make sure both arguments are strings. */
844 if (!(PyString_Check(a) && PyString_Check(b))) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000845 if (Py_BytesWarningFlag && (op == Py_EQ) &&
846 (PyObject_IsInstance((PyObject*)a,
847 (PyObject*)&PyUnicode_Type) ||
848 PyObject_IsInstance((PyObject*)b,
849 (PyObject*)&PyUnicode_Type))) {
850 if (PyErr_WarnEx(PyExc_BytesWarning,
851 "Comparsion between bytes and string", 1))
852 return NULL;
853 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000854 result = Py_NotImplemented;
855 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000856 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000857 if (a == b) {
858 switch (op) {
859 case Py_EQ:case Py_LE:case Py_GE:
860 result = Py_True;
861 goto out;
862 case Py_NE:case Py_LT:case Py_GT:
863 result = Py_False;
864 goto out;
865 }
866 }
867 if (op == Py_EQ) {
868 /* Supporting Py_NE here as well does not save
869 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000870 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +0000871 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000872 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000873 result = Py_True;
874 } else {
875 result = Py_False;
876 }
877 goto out;
878 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000879 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +0000880 min_len = (len_a < len_b) ? len_a : len_b;
881 if (min_len > 0) {
882 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
883 if (c==0)
884 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +0000885 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +0000886 c = 0;
887 if (c == 0)
888 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
889 switch (op) {
890 case Py_LT: c = c < 0; break;
891 case Py_LE: c = c <= 0; break;
892 case Py_EQ: assert(0); break; /* unreachable */
893 case Py_NE: c = c != 0; break;
894 case Py_GT: c = c > 0; break;
895 case Py_GE: c = c >= 0; break;
896 default:
897 result = Py_NotImplemented;
898 goto out;
899 }
900 result = c ? Py_True : Py_False;
901 out:
902 Py_INCREF(result);
903 return result;
904}
905
906int
907_PyString_Eq(PyObject *o1, PyObject *o2)
908{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000909 PyStringObject *a = (PyStringObject*) o1;
910 PyStringObject *b = (PyStringObject*) o2;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000911 return Py_Size(a) == Py_Size(b)
912 && *a->ob_sval == *b->ob_sval
913 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000914}
915
Guido van Rossum9bfef441993-03-29 10:43:31 +0000916static long
Fred Drakeba096332000-07-09 07:04:36 +0000917string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000918{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000919 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000920 register unsigned char *p;
921 register long x;
922
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000923 if (a->ob_shash != -1)
924 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000925 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000926 p = (unsigned char *) a->ob_sval;
927 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000928 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000929 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000930 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +0000931 if (x == -1)
932 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000933 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000934 return x;
935}
936
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000937static PyObject*
938string_subscript(PyStringObject* self, PyObject* item)
939{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000940 if (PyIndex_Check(item)) {
941 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000942 if (i == -1 && PyErr_Occurred())
943 return NULL;
944 if (i < 0)
945 i += PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000946 if (i < 0 || i >= PyString_GET_SIZE(self)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +0000947 PyErr_SetString(PyExc_IndexError,
948 "string index out of range");
949 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000950 }
951 return PyInt_FromLong((unsigned char)self->ob_sval[i]);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000952 }
953 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000954 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000955 char* source_buf;
956 char* result_buf;
957 PyObject* result;
958
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000959 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000960 PyString_GET_SIZE(self),
961 &start, &stop, &step, &slicelength) < 0) {
962 return NULL;
963 }
964
965 if (slicelength <= 0) {
966 return PyString_FromStringAndSize("", 0);
967 }
Thomas Woutersed03b412007-08-28 21:37:11 +0000968 else if (start == 0 && step == 1 &&
969 slicelength == PyString_GET_SIZE(self) &&
970 PyString_CheckExact(self)) {
971 Py_INCREF(self);
972 return (PyObject *)self;
973 }
974 else if (step == 1) {
975 return PyString_FromStringAndSize(
976 PyString_AS_STRING(self) + start,
977 slicelength);
978 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000979 else {
980 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000981 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +0000982 if (result_buf == NULL)
983 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000984
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000985 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000986 cur += step, i++) {
987 result_buf[i] = source_buf[cur];
988 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000989
990 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000991 slicelength);
992 PyMem_Free(result_buf);
993 return result;
994 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000995 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000996 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000997 PyErr_Format(PyExc_TypeError,
998 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000999 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001000 return NULL;
1001 }
1002}
1003
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001004static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00001005string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001006{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001007 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self),
1008 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +00001009}
1010
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001011static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001012 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001013 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001014 (ssizeargfunc)string_repeat, /*sq_repeat*/
Guido van Rossum98297ee2007-11-06 21:34:58 +00001015 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +00001016 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001017 0, /*sq_ass_item*/
1018 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001019 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001020};
1021
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001022static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001023 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001024 (binaryfunc)string_subscript,
1025 0,
1026};
1027
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001028static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001029 (getbufferproc)string_buffer_getbuffer,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001030 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001031};
1032
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001033
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001034#define LEFTSTRIP 0
1035#define RIGHTSTRIP 1
1036#define BOTHSTRIP 2
1037
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001038/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001039static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1040
1041#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001042
Thomas Wouters477c8d52006-05-27 19:21:47 +00001043
1044/* Don't call if length < 2 */
1045#define Py_STRING_MATCH(target, offset, pattern, length) \
1046 (target[offset] == pattern[0] && \
1047 target[offset+length-1] == pattern[length-1] && \
1048 !memcmp(target+offset+1, pattern+1, length-2) )
1049
1050
1051/* Overallocate the initial list to reduce the number of reallocs for small
1052 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1053 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1054 text (roughly 11 words per line) and field delimited data (usually 1-10
1055 fields). For large strings the split algorithms are bandwidth limited
1056 so increasing the preallocation likely will not improve things.*/
1057
1058#define MAX_PREALLOC 12
1059
1060/* 5 splits gives 6 elements */
1061#define PREALLOC_SIZE(maxsplit) \
1062 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1063
Thomas Wouters477c8d52006-05-27 19:21:47 +00001064#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001065 str = PyString_FromStringAndSize((data) + (left), \
1066 (right) - (left)); \
1067 if (str == NULL) \
1068 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001069 if (count < MAX_PREALLOC) { \
1070 PyList_SET_ITEM(list, count, str); \
1071 } else { \
1072 if (PyList_Append(list, str)) { \
1073 Py_DECREF(str); \
1074 goto onError; \
1075 } \
1076 else \
1077 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001078 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001079 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001080
Thomas Wouters477c8d52006-05-27 19:21:47 +00001081/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001082#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001083
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001084#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1085#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1086#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1087#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001088
1089Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001090split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001091{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001092 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001093 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001094 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001095
1096 if (list == NULL)
1097 return NULL;
1098
Thomas Wouters477c8d52006-05-27 19:21:47 +00001099 i = j = 0;
1100
1101 while (maxsplit-- > 0) {
1102 SKIP_SPACE(s, i, len);
1103 if (i==len) break;
1104 j = i; i++;
1105 SKIP_NONSPACE(s, i, len);
1106 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001107 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001108
1109 if (i < len) {
1110 /* Only occurs when maxsplit was reached */
1111 /* Skip any remaining whitespace and copy to end of string */
1112 SKIP_SPACE(s, i, len);
1113 if (i != len)
1114 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001115 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001116 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001117 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001118 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001119 Py_DECREF(list);
1120 return NULL;
1121}
1122
Thomas Wouters477c8d52006-05-27 19:21:47 +00001123Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001124split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001125{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001126 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001127 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001128 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001129
1130 if (list == NULL)
1131 return NULL;
1132
Thomas Wouters477c8d52006-05-27 19:21:47 +00001133 i = j = 0;
1134 while ((j < len) && (maxcount-- > 0)) {
1135 for(; j<len; j++) {
1136 /* I found that using memchr makes no difference */
1137 if (s[j] == ch) {
1138 SPLIT_ADD(s, i, j);
1139 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001140 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001141 }
1142 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001143 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001144 if (i <= len) {
1145 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001146 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001147 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001148 return list;
1149
1150 onError:
1151 Py_DECREF(list);
1152 return NULL;
1153}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001154
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001155PyDoc_STRVAR(split__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001156"B.split([sep[, maxsplit]]) -> list of bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001157\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001158Return a list of the sections in B, using sep as the delimiter.\n\
1159If sep is not given, B is split on ASCII whitespace characters\n\
1160(space, tab, return, newline, formfeed, vertical tab).\n\
1161If maxsplit is given, at most maxsplit splits are done.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001162
1163static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001164string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001165{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001166 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001167 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001168 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001169 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001170 PyObject *list, *str, *subobj = Py_None;
1171#ifdef USE_FAST
1172 Py_ssize_t pos;
1173#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001174
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001175 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001176 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001177 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001178 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001179 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001180 return split_whitespace(s, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001181 if (_getbuffer(subobj, &vsub) < 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001182 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001183 sub = vsub.buf;
1184 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001185
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001186 if (n == 0) {
1187 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001188 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001189 return NULL;
1190 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001191 else if (n == 1) {
1192 char ch = sub[0];
1193 PyObject_ReleaseBuffer(subobj, &vsub);
1194 return split_char(s, len, ch, maxsplit);
1195 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001196
Thomas Wouters477c8d52006-05-27 19:21:47 +00001197 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001198 if (list == NULL) {
1199 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001200 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001201 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001202
Thomas Wouters477c8d52006-05-27 19:21:47 +00001203#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001204 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001205 while (maxsplit-- > 0) {
1206 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1207 if (pos < 0)
1208 break;
1209 j = i+pos;
1210 SPLIT_ADD(s, i, j);
1211 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001212 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001213#else
1214 i = j = 0;
1215 while ((j+n <= len) && (maxsplit-- > 0)) {
1216 for (; j+n <= len; j++) {
1217 if (Py_STRING_MATCH(s, j, sub, n)) {
1218 SPLIT_ADD(s, i, j);
1219 i = j = j + n;
1220 break;
1221 }
1222 }
1223 }
1224#endif
1225 SPLIT_ADD(s, i, len);
1226 FIX_PREALLOC_SIZE(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001227 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001228 return list;
1229
Thomas Wouters477c8d52006-05-27 19:21:47 +00001230 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001231 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001232 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001233 return NULL;
1234}
1235
Thomas Wouters477c8d52006-05-27 19:21:47 +00001236PyDoc_STRVAR(partition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001237"B.partition(sep) -> (head, sep, tail)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001238\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001239Searches for the separator sep in B, and returns the part before it,\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001240the separator itself, and the part after it. If the separator is not\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001241found, returns B and two empty bytes objects.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001242
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001243static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001244string_partition(PyStringObject *self, PyObject *sep_obj)
1245{
1246 const char *sep;
1247 Py_ssize_t sep_len;
1248
1249 if (PyString_Check(sep_obj)) {
1250 sep = PyString_AS_STRING(sep_obj);
1251 sep_len = PyString_GET_SIZE(sep_obj);
1252 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001253 else if (PyUnicode_Check(sep_obj))
1254 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001255 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1256 return NULL;
1257
1258 return stringlib_partition(
1259 (PyObject*) self,
1260 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1261 sep_obj, sep, sep_len
1262 );
1263}
1264
1265PyDoc_STRVAR(rpartition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001266"B.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001267\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001268Searches for the separator sep in B, starting at the end of B,\n\
1269and returns the part before it, the separator itself, and the\n\
1270part after it. If the separator is not found, returns two empty\n\
1271bytes objects and B.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001272
1273static PyObject *
1274string_rpartition(PyStringObject *self, PyObject *sep_obj)
1275{
1276 const char *sep;
1277 Py_ssize_t sep_len;
1278
1279 if (PyString_Check(sep_obj)) {
1280 sep = PyString_AS_STRING(sep_obj);
1281 sep_len = PyString_GET_SIZE(sep_obj);
1282 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001283 else if (PyUnicode_Check(sep_obj))
1284 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001285 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1286 return NULL;
1287
1288 return stringlib_rpartition(
1289 (PyObject*) self,
1290 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1291 sep_obj, sep, sep_len
1292 );
1293}
1294
1295Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001296rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001297{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001298 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001299 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001300 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001301
1302 if (list == NULL)
1303 return NULL;
1304
Thomas Wouters477c8d52006-05-27 19:21:47 +00001305 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001306
Thomas Wouters477c8d52006-05-27 19:21:47 +00001307 while (maxsplit-- > 0) {
1308 RSKIP_SPACE(s, i);
1309 if (i<0) break;
1310 j = i; i--;
1311 RSKIP_NONSPACE(s, i);
1312 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001313 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001314 if (i >= 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001315 /* Only occurs when maxsplit was reached. Skip any remaining
1316 whitespace and copy to beginning of string. */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001317 RSKIP_SPACE(s, i);
1318 if (i >= 0)
1319 SPLIT_ADD(s, 0, i + 1);
1320
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001321 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001322 FIX_PREALLOC_SIZE(list);
1323 if (PyList_Reverse(list) < 0)
1324 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001325 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001326 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001327 Py_DECREF(list);
1328 return NULL;
1329}
1330
Thomas Wouters477c8d52006-05-27 19:21:47 +00001331Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001332rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001333{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001334 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001335 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001336 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001337
1338 if (list == NULL)
1339 return NULL;
1340
Thomas Wouters477c8d52006-05-27 19:21:47 +00001341 i = j = len - 1;
1342 while ((i >= 0) && (maxcount-- > 0)) {
1343 for (; i >= 0; i--) {
1344 if (s[i] == ch) {
1345 SPLIT_ADD(s, i + 1, j + 1);
1346 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001347 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001348 }
1349 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001350 }
1351 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001352 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001353 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001354 FIX_PREALLOC_SIZE(list);
1355 if (PyList_Reverse(list) < 0)
1356 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001357 return list;
1358
1359 onError:
1360 Py_DECREF(list);
1361 return NULL;
1362}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001363
1364PyDoc_STRVAR(rsplit__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001365"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001366\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001367Return a list of the sections in B, using sep as the delimiter,\n\
1368starting at the end of B and working to the front.\n\
1369If sep is not given, B is split on ASCII whitespace characters\n\
1370(space, tab, return, newline, formfeed, vertical tab).\n\
1371If maxsplit is given, at most maxsplit splits are done.");
1372
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001373
1374static PyObject *
1375string_rsplit(PyStringObject *self, PyObject *args)
1376{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001377 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001378 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001379 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001380 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001381 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001382
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001383 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001384 return NULL;
1385 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001386 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001387 if (subobj == Py_None)
1388 return rsplit_whitespace(s, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001389 if (_getbuffer(subobj, &vsub) < 0)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001390 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001391 sub = vsub.buf;
1392 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001393
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001394 if (n == 0) {
1395 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001396 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001397 return NULL;
1398 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001399 else if (n == 1) {
1400 char ch = sub[0];
1401 PyObject_ReleaseBuffer(subobj, &vsub);
1402 return rsplit_char(s, len, ch, maxsplit);
1403 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001404
Thomas Wouters477c8d52006-05-27 19:21:47 +00001405 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001406 if (list == NULL) {
1407 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001408 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001409 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001410
1411 j = len;
1412 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001413
Thomas Wouters477c8d52006-05-27 19:21:47 +00001414 while ( (i >= 0) && (maxsplit-- > 0) ) {
1415 for (; i>=0; i--) {
1416 if (Py_STRING_MATCH(s, i, sub, n)) {
1417 SPLIT_ADD(s, i + n, j);
1418 j = i;
1419 i -= n;
1420 break;
1421 }
1422 }
1423 }
1424 SPLIT_ADD(s, 0, j);
1425 FIX_PREALLOC_SIZE(list);
1426 if (PyList_Reverse(list) < 0)
1427 goto onError;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001428 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001429 return list;
1430
Thomas Wouters477c8d52006-05-27 19:21:47 +00001431onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001432 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001433 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001434 return NULL;
1435}
1436
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001437#undef SPLIT_ADD
1438#undef MAX_PREALLOC
1439#undef PREALLOC_SIZE
1440
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001442PyDoc_STRVAR(join__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001443"B.join(iterable_of_bytes) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001444\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001445Concatenates any number of bytes objects, with B in between each pair.\n\
1446Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447
1448static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00001449string_join(PyObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450{
1451 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001452 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001453 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001455 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001456 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001457 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001458 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459
Tim Peters19fe14e2001-01-19 03:03:47 +00001460 seq = PySequence_Fast(orig, "");
1461 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001462 return NULL;
1463 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001464
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001465 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001466 if (seqlen == 0) {
1467 Py_DECREF(seq);
1468 return PyString_FromString("");
1469 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001471 item = PySequence_Fast_GET_ITEM(seq, 0);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001472 if (PyString_CheckExact(item)) {
Raymond Hettinger674f2412004-08-23 23:23:54 +00001473 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001474 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001475 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001476 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001477 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001478
Raymond Hettinger674f2412004-08-23 23:23:54 +00001479 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001480 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001481 * Do a pre-pass to figure out the total amount of space we'll
Guido van Rossum98297ee2007-11-06 21:34:58 +00001482 * need (sz), and see whether all argument are bytes.
Tim Peters19fe14e2001-01-19 03:03:47 +00001483 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001484 /* XXX Shouldn't we use _getbuffer() on these items instead? */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001485 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001486 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001487 item = PySequence_Fast_GET_ITEM(seq, i);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001488 if (!PyString_Check(item) && !PyBytes_Check(item)) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001489 PyErr_Format(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001490 "sequence item %zd: expected bytes,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001491 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001492 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001493 Py_DECREF(seq);
1494 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001495 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001496 sz += Py_Size(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001497 if (i != 0)
1498 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001499 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001500 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001501 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001502 Py_DECREF(seq);
1503 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001505 }
1506
1507 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001508 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001509 if (res == NULL) {
1510 Py_DECREF(seq);
1511 return NULL;
1512 }
1513
1514 /* Catenate everything. */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001515 /* I'm not worried about a PyBytes item growing because there's
1516 nowhere in this function where we release the GIL. */
Tim Peters19fe14e2001-01-19 03:03:47 +00001517 p = PyString_AS_STRING(res);
1518 for (i = 0; i < seqlen; ++i) {
1519 size_t n;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001520 char *q;
1521 if (i) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001522 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001523 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001524 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001525 item = PySequence_Fast_GET_ITEM(seq, i);
1526 n = Py_Size(item);
1527 if (PyString_Check(item))
1528 q = PyString_AS_STRING(item);
1529 else
1530 q = PyBytes_AS_STRING(item);
1531 Py_MEMCPY(p, q, n);
1532 p += n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001533 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001534
Jeremy Hylton49048292000-07-11 03:28:17 +00001535 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001536 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001537}
1538
Tim Peters52e155e2001-06-16 05:42:57 +00001539PyObject *
1540_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001541{
Tim Petersa7259592001-06-16 05:11:17 +00001542 assert(sep != NULL && PyString_Check(sep));
1543 assert(x != NULL);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001544 return string_join(sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001545}
1546
Thomas Wouters477c8d52006-05-27 19:21:47 +00001547Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001548string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001549{
1550 if (*end > len)
1551 *end = len;
1552 else if (*end < 0)
1553 *end += len;
1554 if (*end < 0)
1555 *end = 0;
1556 if (*start < 0)
1557 *start += len;
1558 if (*start < 0)
1559 *start = 0;
1560}
1561
Thomas Wouters477c8d52006-05-27 19:21:47 +00001562Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001563string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001565 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001566 const char *sub;
1567 Py_ssize_t sub_len;
1568 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001569
Thomas Wouters477c8d52006-05-27 19:21:47 +00001570 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1571 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001572 return -2;
1573 if (PyString_Check(subobj)) {
1574 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001575 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001576 }
1577 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001578 return PyUnicode_Find(
1579 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001580 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001581 /* XXX - the "expected a character buffer object" is pretty
1582 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583 return -2;
1584
Thomas Wouters477c8d52006-05-27 19:21:47 +00001585 if (dir > 0)
1586 return stringlib_find_slice(
1587 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1588 sub, sub_len, start, end);
1589 else
1590 return stringlib_rfind_slice(
1591 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1592 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001593}
1594
1595
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001596PyDoc_STRVAR(find__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001597"B.find(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598\n\
1599Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001600such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001601arguments start and end are interpreted as in slice notation.\n\
1602\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001603Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604
1605static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001606string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001608 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609 if (result == -2)
1610 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001611 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612}
1613
1614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001615PyDoc_STRVAR(index__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001616"B.index(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001618Like B.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619
1620static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001621string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001623 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624 if (result == -2)
1625 return NULL;
1626 if (result == -1) {
1627 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001628 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629 return NULL;
1630 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001631 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632}
1633
1634
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001635PyDoc_STRVAR(rfind__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001636"B.rfind(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001637\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001638Return the highest index in B where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001639such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640arguments start and end are interpreted as in slice notation.\n\
1641\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001642Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643
1644static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001645string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001646{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001647 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001648 if (result == -2)
1649 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001650 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651}
1652
1653
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001654PyDoc_STRVAR(rindex__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001655"B.rindex(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001657Like B.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658
1659static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001660string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001661{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001662 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001663 if (result == -2)
1664 return NULL;
1665 if (result == -1) {
1666 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001667 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668 return NULL;
1669 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001670 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001671}
1672
1673
Thomas Wouters477c8d52006-05-27 19:21:47 +00001674Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001675do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1676{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001677 Py_buffer vsep;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001678 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001679 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001680 char *sep;
1681 Py_ssize_t seplen;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001682 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001683
Guido van Rossum98297ee2007-11-06 21:34:58 +00001684 if (_getbuffer(sepobj, &vsep) < 0)
1685 return NULL;
1686 sep = vsep.buf;
1687 seplen = vsep.len;
1688
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001689 i = 0;
1690 if (striptype != RIGHTSTRIP) {
1691 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1692 i++;
1693 }
1694 }
1695
1696 j = len;
1697 if (striptype != LEFTSTRIP) {
1698 do {
1699 j--;
1700 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1701 j++;
1702 }
1703
Guido van Rossum98297ee2007-11-06 21:34:58 +00001704 PyObject_ReleaseBuffer(sepobj, &vsep);
1705
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001706 if (i == 0 && j == len && PyString_CheckExact(self)) {
1707 Py_INCREF(self);
1708 return (PyObject*)self;
1709 }
1710 else
1711 return PyString_FromStringAndSize(s+i, j-i);
1712}
1713
1714
Thomas Wouters477c8d52006-05-27 19:21:47 +00001715Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001716do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717{
1718 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001719 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721 i = 0;
1722 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001723 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724 i++;
1725 }
1726 }
1727
1728 j = len;
1729 if (striptype != LEFTSTRIP) {
1730 do {
1731 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001732 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733 j++;
1734 }
1735
Tim Peters8fa5dd02001-09-12 02:18:30 +00001736 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001737 Py_INCREF(self);
1738 return (PyObject*)self;
1739 }
1740 else
1741 return PyString_FromStringAndSize(s+i, j-i);
1742}
1743
1744
Thomas Wouters477c8d52006-05-27 19:21:47 +00001745Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001746do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1747{
1748 PyObject *sep = NULL;
1749
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001750 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001751 return NULL;
1752
1753 if (sep != NULL && sep != Py_None) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001754 return do_xstrip(self, striptype, sep);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001755 }
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001756 return do_strip(self, striptype);
1757}
1758
1759
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001760PyDoc_STRVAR(strip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001761"B.strip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001763Strip leading and trailing bytes contained in the argument.\n\
1764If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001766string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001768 if (PyTuple_GET_SIZE(args) == 0)
1769 return do_strip(self, BOTHSTRIP); /* Common case */
1770 else
1771 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772}
1773
1774
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001775PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001776"B.lstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001778Strip leading bytes contained in the argument.\n\
1779If the argument is omitted, strip leading ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001781string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001783 if (PyTuple_GET_SIZE(args) == 0)
1784 return do_strip(self, LEFTSTRIP); /* Common case */
1785 else
1786 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787}
1788
1789
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001790PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001791"B.rstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001793Strip trailing bytes contained in the argument.\n\
1794If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001796string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001798 if (PyTuple_GET_SIZE(args) == 0)
1799 return do_strip(self, RIGHTSTRIP); /* Common case */
1800 else
1801 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001802}
1803
1804
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001805PyDoc_STRVAR(count__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001806"B.count(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001808Return the number of non-overlapping occurrences of substring sub in\n\
1809string S[start:end]. Optional arguments start and end are interpreted\n\
1810as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811
1812static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001813string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001815 PyObject *sub_obj;
1816 const char *str = PyString_AS_STRING(self), *sub;
1817 Py_ssize_t sub_len;
1818 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819
Thomas Wouters477c8d52006-05-27 19:21:47 +00001820 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1821 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001823
Thomas Wouters477c8d52006-05-27 19:21:47 +00001824 if (PyString_Check(sub_obj)) {
1825 sub = PyString_AS_STRING(sub_obj);
1826 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001827 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001828 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001829 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001830 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001831 if (count == -1)
1832 return NULL;
1833 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00001834 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001835 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001836 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001837 return NULL;
1838
Thomas Wouters477c8d52006-05-27 19:21:47 +00001839 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001840
Thomas Wouters477c8d52006-05-27 19:21:47 +00001841 return PyInt_FromSsize_t(
1842 stringlib_count(str + start, end - start, sub, sub_len)
1843 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844}
1845
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001847PyDoc_STRVAR(translate__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001848"B.translate(table[, deletechars]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001850Return a copy of B, where all characters occurring in the\n\
1851optional argument deletechars are removed, and the remaining\n\
1852characters have been mapped through the given translation\n\
1853table, which must be a bytes object of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001854
1855static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001856string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001857{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001858 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001859 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001860 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001862 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001863 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864 PyObject *result;
1865 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001866 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00001868 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001869 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001870 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001871
1872 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00001873 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001874 tablen = PyString_GET_SIZE(tableobj);
1875 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001876 else if (tableobj == Py_None) {
1877 table = NULL;
1878 tablen = 256;
1879 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001880 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001881 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001882 parameter; instead a mapping to None will cause characters
1883 to be deleted. */
1884 if (delobj != NULL) {
1885 PyErr_SetString(PyExc_TypeError,
1886 "deletions are implemented differently for unicode");
1887 return NULL;
1888 }
1889 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1890 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001891 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001893
Martin v. Löwis00b61272002-12-12 20:03:19 +00001894 if (tablen != 256) {
1895 PyErr_SetString(PyExc_ValueError,
1896 "translation table must be 256 characters long");
1897 return NULL;
1898 }
1899
Guido van Rossum4c08d552000-03-10 22:55:18 +00001900 if (delobj != NULL) {
1901 if (PyString_Check(delobj)) {
1902 del_table = PyString_AS_STRING(delobj);
1903 dellen = PyString_GET_SIZE(delobj);
1904 }
1905 else if (PyUnicode_Check(delobj)) {
1906 PyErr_SetString(PyExc_TypeError,
1907 "deletions are implemented differently for unicode");
1908 return NULL;
1909 }
1910 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1911 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001912 }
1913 else {
1914 del_table = NULL;
1915 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916 }
1917
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001918 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919 result = PyString_FromStringAndSize((char *)NULL, inlen);
1920 if (result == NULL)
1921 return NULL;
1922 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001923 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924
Guido van Rossumd8faa362007-04-27 19:54:29 +00001925 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001926 /* If no deletions are required, use faster code */
1927 for (i = inlen; --i >= 0; ) {
1928 c = Py_CHARMASK(*input++);
1929 if (Py_CHARMASK((*output++ = table[c])) != c)
1930 changed = 1;
1931 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001932 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933 return result;
1934 Py_DECREF(result);
1935 Py_INCREF(input_obj);
1936 return input_obj;
1937 }
1938
Guido van Rossumd8faa362007-04-27 19:54:29 +00001939 if (table == NULL) {
1940 for (i = 0; i < 256; i++)
1941 trans_table[i] = Py_CHARMASK(i);
1942 } else {
1943 for (i = 0; i < 256; i++)
1944 trans_table[i] = Py_CHARMASK(table[i]);
1945 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001946
1947 for (i = 0; i < dellen; i++)
1948 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1949
1950 for (i = inlen; --i >= 0; ) {
1951 c = Py_CHARMASK(*input++);
1952 if (trans_table[c] != -1)
1953 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1954 continue;
1955 changed = 1;
1956 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001957 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958 Py_DECREF(result);
1959 Py_INCREF(input_obj);
1960 return input_obj;
1961 }
1962 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001963 if (inlen > 0)
1964 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965 return result;
1966}
1967
1968
Thomas Wouters477c8d52006-05-27 19:21:47 +00001969#define FORWARD 1
1970#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971
Thomas Wouters477c8d52006-05-27 19:21:47 +00001972/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001973
Thomas Wouters477c8d52006-05-27 19:21:47 +00001974#define findchar(target, target_len, c) \
1975 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976
Thomas Wouters477c8d52006-05-27 19:21:47 +00001977/* String ops must return a string. */
1978/* If the object is subclass of string, create a copy */
1979Py_LOCAL(PyStringObject *)
1980return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001982 if (PyString_CheckExact(self)) {
1983 Py_INCREF(self);
1984 return self;
1985 }
1986 return (PyStringObject *)PyString_FromStringAndSize(
1987 PyString_AS_STRING(self),
1988 PyString_GET_SIZE(self));
1989}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990
Thomas Wouters477c8d52006-05-27 19:21:47 +00001991Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001992countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001993{
1994 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001995 const char *start=target;
1996 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997
Thomas Wouters477c8d52006-05-27 19:21:47 +00001998 while ( (start=findchar(start, end-start, c)) != NULL ) {
1999 count++;
2000 if (count >= maxcount)
2001 break;
2002 start += 1;
2003 }
2004 return count;
2005}
2006
2007Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002008findstring(const char *target, Py_ssize_t target_len,
2009 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002010 Py_ssize_t start,
2011 Py_ssize_t end,
2012 int direction)
2013{
2014 if (start < 0) {
2015 start += target_len;
2016 if (start < 0)
2017 start = 0;
2018 }
2019 if (end > target_len) {
2020 end = target_len;
2021 } else if (end < 0) {
2022 end += target_len;
2023 if (end < 0)
2024 end = 0;
2025 }
2026
2027 /* zero-length substrings always match at the first attempt */
2028 if (pattern_len == 0)
2029 return (direction > 0) ? start : end;
2030
2031 end -= pattern_len;
2032
2033 if (direction < 0) {
2034 for (; end >= start; end--)
2035 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2036 return end;
2037 } else {
2038 for (; start <= end; start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002039 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
Thomas Wouters477c8d52006-05-27 19:21:47 +00002040 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041 }
2042 return -1;
2043}
2044
Thomas Wouters477c8d52006-05-27 19:21:47 +00002045Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002046countstring(const char *target, Py_ssize_t target_len,
2047 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002048 Py_ssize_t start,
2049 Py_ssize_t end,
2050 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002052 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002053
Thomas Wouters477c8d52006-05-27 19:21:47 +00002054 if (start < 0) {
2055 start += target_len;
2056 if (start < 0)
2057 start = 0;
2058 }
2059 if (end > target_len) {
2060 end = target_len;
2061 } else if (end < 0) {
2062 end += target_len;
2063 if (end < 0)
2064 end = 0;
2065 }
2066
2067 /* zero-length substrings match everywhere */
2068 if (pattern_len == 0 || maxcount == 0) {
2069 if (target_len+1 < maxcount)
2070 return target_len+1;
2071 return maxcount;
2072 }
2073
2074 end -= pattern_len;
2075 if (direction < 0) {
2076 for (; (end >= start); end--)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002077 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002078 count++;
2079 if (--maxcount <= 0) break;
2080 end -= pattern_len-1;
2081 }
2082 } else {
2083 for (; (start <= end); start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002084 if (Py_STRING_MATCH(target, start,
2085 pattern, pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002086 count++;
2087 if (--maxcount <= 0)
2088 break;
2089 start += pattern_len-1;
2090 }
2091 }
2092 return count;
2093}
2094
2095
2096/* Algorithms for different cases of string replacement */
2097
2098/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2099Py_LOCAL(PyStringObject *)
2100replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002101 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002102 Py_ssize_t maxcount)
2103{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002104 char *self_s, *result_s;
2105 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002106 Py_ssize_t count, i, product;
2107 PyStringObject *result;
2108
2109 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002110
Thomas Wouters477c8d52006-05-27 19:21:47 +00002111 /* 1 at the end plus 1 after every character */
2112 count = self_len+1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002113 if (maxcount < count)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002114 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002115
Thomas Wouters477c8d52006-05-27 19:21:47 +00002116 /* Check for overflow */
2117 /* result_len = count * to_len + self_len; */
2118 product = count * to_len;
2119 if (product / to_len != count) {
2120 PyErr_SetString(PyExc_OverflowError,
2121 "replace string is too long");
2122 return NULL;
2123 }
2124 result_len = product + self_len;
2125 if (result_len < 0) {
2126 PyErr_SetString(PyExc_OverflowError,
2127 "replace string is too long");
2128 return NULL;
2129 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002130
Thomas Wouters477c8d52006-05-27 19:21:47 +00002131 if (! (result = (PyStringObject *)
2132 PyString_FromStringAndSize(NULL, result_len)) )
2133 return NULL;
2134
2135 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002136 result_s = PyString_AS_STRING(result);
2137
2138 /* TODO: special case single character, which doesn't need memcpy */
2139
2140 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002141 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002142 result_s += to_len;
2143 count -= 1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002144
Thomas Wouters477c8d52006-05-27 19:21:47 +00002145 for (i=0; i<count; i++) {
2146 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002147 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002148 result_s += to_len;
2149 }
2150
2151 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002152 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002153
2154 return result;
2155}
2156
2157/* Special case for deleting a single character */
2158/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2159Py_LOCAL(PyStringObject *)
2160replace_delete_single_character(PyStringObject *self,
2161 char from_c, Py_ssize_t maxcount)
2162{
2163 char *self_s, *result_s;
2164 char *start, *next, *end;
2165 Py_ssize_t self_len, result_len;
2166 Py_ssize_t count;
2167 PyStringObject *result;
2168
2169 self_len = PyString_GET_SIZE(self);
2170 self_s = PyString_AS_STRING(self);
2171
2172 count = countchar(self_s, self_len, from_c, maxcount);
2173 if (count == 0) {
2174 return return_self(self);
2175 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002176
Thomas Wouters477c8d52006-05-27 19:21:47 +00002177 result_len = self_len - count; /* from_len == 1 */
2178 assert(result_len>=0);
2179
2180 if ( (result = (PyStringObject *)
2181 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2182 return NULL;
2183 result_s = PyString_AS_STRING(result);
2184
2185 start = self_s;
2186 end = self_s + self_len;
2187 while (count-- > 0) {
2188 next = findchar(start, end-start, from_c);
2189 if (next == NULL)
2190 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002191 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002192 result_s += (next-start);
2193 start = next+1;
2194 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002195 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002196
Thomas Wouters477c8d52006-05-27 19:21:47 +00002197 return result;
2198}
2199
2200/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2201
2202Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002203replace_delete_substring(PyStringObject *self,
2204 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002205 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002206 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002207 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002208 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002209 Py_ssize_t count, offset;
2210 PyStringObject *result;
2211
2212 self_len = PyString_GET_SIZE(self);
2213 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002214
2215 count = countstring(self_s, self_len,
2216 from_s, from_len,
2217 0, self_len, 1,
2218 maxcount);
2219
2220 if (count == 0) {
2221 /* no matches */
2222 return return_self(self);
2223 }
2224
2225 result_len = self_len - (count * from_len);
2226 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002227
Thomas Wouters477c8d52006-05-27 19:21:47 +00002228 if ( (result = (PyStringObject *)
2229 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2230 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002231
Thomas Wouters477c8d52006-05-27 19:21:47 +00002232 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002233
Thomas Wouters477c8d52006-05-27 19:21:47 +00002234 start = self_s;
2235 end = self_s + self_len;
2236 while (count-- > 0) {
2237 offset = findstring(start, end-start,
2238 from_s, from_len,
2239 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240 if (offset == -1)
2241 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002242 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002243
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002244 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002245
Thomas Wouters477c8d52006-05-27 19:21:47 +00002246 result_s += (next-start);
2247 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002249 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002250 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251}
2252
Thomas Wouters477c8d52006-05-27 19:21:47 +00002253/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2254Py_LOCAL(PyStringObject *)
2255replace_single_character_in_place(PyStringObject *self,
2256 char from_c, char to_c,
2257 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002259 char *self_s, *result_s, *start, *end, *next;
2260 Py_ssize_t self_len;
2261 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002262
Thomas Wouters477c8d52006-05-27 19:21:47 +00002263 /* The result string will be the same size */
2264 self_s = PyString_AS_STRING(self);
2265 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002266
Thomas Wouters477c8d52006-05-27 19:21:47 +00002267 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002268
Thomas Wouters477c8d52006-05-27 19:21:47 +00002269 if (next == NULL) {
2270 /* No matches; return the original string */
2271 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002272 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002273
Thomas Wouters477c8d52006-05-27 19:21:47 +00002274 /* Need to make a new string */
2275 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2276 if (result == NULL)
2277 return NULL;
2278 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002279 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002280
Thomas Wouters477c8d52006-05-27 19:21:47 +00002281 /* change everything in-place, starting with this one */
2282 start = result_s + (next-self_s);
2283 *start = to_c;
2284 start++;
2285 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002286
Thomas Wouters477c8d52006-05-27 19:21:47 +00002287 while (--maxcount > 0) {
2288 next = findchar(start, end-start, from_c);
2289 if (next == NULL)
2290 break;
2291 *next = to_c;
2292 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002293 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002294
Thomas Wouters477c8d52006-05-27 19:21:47 +00002295 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296}
2297
Thomas Wouters477c8d52006-05-27 19:21:47 +00002298/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2299Py_LOCAL(PyStringObject *)
2300replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002301 const char *from_s, Py_ssize_t from_len,
2302 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002303 Py_ssize_t maxcount)
2304{
2305 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002306 char *self_s;
2307 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002308 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002309
Thomas Wouters477c8d52006-05-27 19:21:47 +00002310 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002311
Thomas Wouters477c8d52006-05-27 19:21:47 +00002312 self_s = PyString_AS_STRING(self);
2313 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002314
Thomas Wouters477c8d52006-05-27 19:21:47 +00002315 offset = findstring(self_s, self_len,
2316 from_s, from_len,
2317 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002318 if (offset == -1) {
2319 /* No matches; return the original string */
2320 return return_self(self);
2321 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002322
Thomas Wouters477c8d52006-05-27 19:21:47 +00002323 /* Need to make a new string */
2324 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2325 if (result == NULL)
2326 return NULL;
2327 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002328 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002329
Thomas Wouters477c8d52006-05-27 19:21:47 +00002330 /* change everything in-place, starting with this one */
2331 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002332 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002333 start += from_len;
2334 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002335
Thomas Wouters477c8d52006-05-27 19:21:47 +00002336 while ( --maxcount > 0) {
2337 offset = findstring(start, end-start,
2338 from_s, from_len,
2339 0, end-start, FORWARD);
2340 if (offset==-1)
2341 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002342 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002343 start += offset+from_len;
2344 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002345
Thomas Wouters477c8d52006-05-27 19:21:47 +00002346 return result;
2347}
2348
2349/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2350Py_LOCAL(PyStringObject *)
2351replace_single_character(PyStringObject *self,
2352 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002353 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002354 Py_ssize_t maxcount)
2355{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002356 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002357 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002358 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002359 Py_ssize_t count, product;
2360 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002361
Thomas Wouters477c8d52006-05-27 19:21:47 +00002362 self_s = PyString_AS_STRING(self);
2363 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002364
Thomas Wouters477c8d52006-05-27 19:21:47 +00002365 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002366 if (count == 0) {
2367 /* no matches, return unchanged */
2368 return return_self(self);
2369 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002370
Thomas Wouters477c8d52006-05-27 19:21:47 +00002371 /* use the difference between current and new, hence the "-1" */
2372 /* result_len = self_len + count * (to_len-1) */
2373 product = count * (to_len-1);
2374 if (product / (to_len-1) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002375 PyErr_SetString(PyExc_OverflowError,
2376 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002377 return NULL;
2378 }
2379 result_len = self_len + product;
2380 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002381 PyErr_SetString(PyExc_OverflowError,
2382 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002383 return NULL;
2384 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002385
Thomas Wouters477c8d52006-05-27 19:21:47 +00002386 if ( (result = (PyStringObject *)
2387 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2388 return NULL;
2389 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002390
Thomas Wouters477c8d52006-05-27 19:21:47 +00002391 start = self_s;
2392 end = self_s + self_len;
2393 while (count-- > 0) {
2394 next = findchar(start, end-start, from_c);
Guido van Rossumae404e22007-10-26 21:46:44 +00002395 if (next == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002396 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002397
Thomas Wouters477c8d52006-05-27 19:21:47 +00002398 if (next == start) {
2399 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002400 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002401 result_s += to_len;
2402 start += 1;
2403 } else {
2404 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002405 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002406 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002407 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002408 result_s += to_len;
2409 start = next+1;
2410 }
2411 }
2412 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002413 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002414
Thomas Wouters477c8d52006-05-27 19:21:47 +00002415 return result;
2416}
2417
2418/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2419Py_LOCAL(PyStringObject *)
2420replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002421 const char *from_s, Py_ssize_t from_len,
2422 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002423 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002424 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002425 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002426 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002427 Py_ssize_t count, offset, product;
2428 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002429
Thomas Wouters477c8d52006-05-27 19:21:47 +00002430 self_s = PyString_AS_STRING(self);
2431 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002432
Thomas Wouters477c8d52006-05-27 19:21:47 +00002433 count = countstring(self_s, self_len,
2434 from_s, from_len,
2435 0, self_len, FORWARD, maxcount);
2436 if (count == 0) {
2437 /* no matches, return unchanged */
2438 return return_self(self);
2439 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002440
Thomas Wouters477c8d52006-05-27 19:21:47 +00002441 /* Check for overflow */
2442 /* result_len = self_len + count * (to_len-from_len) */
2443 product = count * (to_len-from_len);
2444 if (product / (to_len-from_len) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002445 PyErr_SetString(PyExc_OverflowError,
2446 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002447 return NULL;
2448 }
2449 result_len = self_len + product;
2450 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002451 PyErr_SetString(PyExc_OverflowError,
2452 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002453 return NULL;
2454 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002455
Thomas Wouters477c8d52006-05-27 19:21:47 +00002456 if ( (result = (PyStringObject *)
2457 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2458 return NULL;
2459 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002460
Thomas Wouters477c8d52006-05-27 19:21:47 +00002461 start = self_s;
2462 end = self_s + self_len;
2463 while (count-- > 0) {
2464 offset = findstring(start, end-start,
2465 from_s, from_len,
2466 0, end-start, FORWARD);
2467 if (offset == -1)
2468 break;
2469 next = start+offset;
2470 if (next == start) {
2471 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002472 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002473 result_s += to_len;
2474 start += from_len;
2475 } else {
2476 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002477 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002478 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002479 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002480 result_s += to_len;
2481 start = next+from_len;
2482 }
2483 }
2484 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002485 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002486
Thomas Wouters477c8d52006-05-27 19:21:47 +00002487 return result;
2488}
2489
2490
2491Py_LOCAL(PyStringObject *)
2492replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002493 const char *from_s, Py_ssize_t from_len,
2494 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002495 Py_ssize_t maxcount)
2496{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002497 if (maxcount < 0) {
2498 maxcount = PY_SSIZE_T_MAX;
2499 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2500 /* nothing to do; return the original string */
2501 return return_self(self);
2502 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002503
Thomas Wouters477c8d52006-05-27 19:21:47 +00002504 if (maxcount == 0 ||
2505 (from_len == 0 && to_len == 0)) {
2506 /* nothing to do; return the original string */
2507 return return_self(self);
2508 }
2509
2510 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002511
Thomas Wouters477c8d52006-05-27 19:21:47 +00002512 if (from_len == 0) {
2513 /* insert the 'to' string everywhere. */
2514 /* >>> "Python".replace("", ".") */
2515 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002516 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002517 }
2518
2519 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2520 /* point for an empty self string to generate a non-empty string */
2521 /* Special case so the remaining code always gets a non-empty string */
2522 if (PyString_GET_SIZE(self) == 0) {
2523 return return_self(self);
2524 }
2525
2526 if (to_len == 0) {
2527 /* delete all occurances of 'from' string */
2528 if (from_len == 1) {
2529 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002530 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002531 } else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002532 return replace_delete_substring(self, from_s,
2533 from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002534 }
2535 }
2536
2537 /* Handle special case where both strings have the same length */
2538
2539 if (from_len == to_len) {
2540 if (from_len == 1) {
2541 return replace_single_character_in_place(
2542 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002543 from_s[0],
2544 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002545 maxcount);
2546 } else {
2547 return replace_substring_in_place(
Guido van Rossum98297ee2007-11-06 21:34:58 +00002548 self, from_s, from_len, to_s, to_len,
2549 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002550 }
2551 }
2552
2553 /* Otherwise use the more generic algorithms */
2554 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002555 return replace_single_character(self, from_s[0],
2556 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002557 } else {
2558 /* len('from')>=2, len('to')>=1 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002559 return replace_substring(self, from_s, from_len, to_s, to_len,
2560 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002561 }
2562}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002563
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002564PyDoc_STRVAR(replace__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002565"B.replace(old, new[, count]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002566\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002567Return a copy of B with all occurrences of subsection\n\
Fred Draked22bb652003-10-22 02:56:40 +00002568old replaced by new. If the optional argument count is\n\
2569given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002570
2571static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002572string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002573{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002574 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002575 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002576 const char *from_s, *to_s;
2577 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002578
Thomas Wouters477c8d52006-05-27 19:21:47 +00002579 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002580 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002581
Thomas Wouters477c8d52006-05-27 19:21:47 +00002582 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002583 from_s = PyString_AS_STRING(from);
2584 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002585 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002586 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002587 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002588 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002589 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002590 return NULL;
2591
Thomas Wouters477c8d52006-05-27 19:21:47 +00002592 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002593 to_s = PyString_AS_STRING(to);
2594 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002595 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002596 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002597 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002598 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002599 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002600 return NULL;
2601
Thomas Wouters477c8d52006-05-27 19:21:47 +00002602 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002603 from_s, from_len,
2604 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002605}
2606
Thomas Wouters477c8d52006-05-27 19:21:47 +00002607/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002608
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002609/* Matches the end (direction >= 0) or start (direction < 0) of self
2610 * against substr, using the start and end arguments. Returns
2611 * -1 on error, 0 if not found and 1 if found.
2612 */
2613Py_LOCAL(int)
2614_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2615 Py_ssize_t end, int direction)
2616{
2617 Py_ssize_t len = PyString_GET_SIZE(self);
2618 Py_ssize_t slen;
2619 const char* sub;
2620 const char* str;
2621
2622 if (PyString_Check(substr)) {
2623 sub = PyString_AS_STRING(substr);
2624 slen = PyString_GET_SIZE(substr);
2625 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002626 else if (PyUnicode_Check(substr))
2627 return PyUnicode_Tailmatch((PyObject *)self,
2628 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002629 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2630 return -1;
2631 str = PyString_AS_STRING(self);
2632
2633 string_adjust_indices(&start, &end, len);
2634
2635 if (direction < 0) {
2636 /* startswith */
2637 if (start+slen > len)
2638 return 0;
2639 } else {
2640 /* endswith */
2641 if (end-start < slen || start > len)
2642 return 0;
2643
2644 if (end-slen > start)
2645 start = end - slen;
2646 }
2647 if (end-start >= slen)
2648 return ! memcmp(str+start, sub, slen);
2649 return 0;
2650}
2651
2652
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002653PyDoc_STRVAR(startswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002654"B.startswith(prefix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002655\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002656Return True if B starts with the specified prefix, False otherwise.\n\
2657With optional start, test B beginning at that position.\n\
2658With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002659prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002660
2661static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002662string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002663{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002664 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002665 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002666 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002667 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002668
Guido van Rossumc6821402000-05-08 14:08:05 +00002669 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2670 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002671 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002672 if (PyTuple_Check(subobj)) {
2673 Py_ssize_t i;
2674 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2675 result = _string_tailmatch(self,
2676 PyTuple_GET_ITEM(subobj, i),
2677 start, end, -1);
2678 if (result == -1)
2679 return NULL;
2680 else if (result) {
2681 Py_RETURN_TRUE;
2682 }
2683 }
2684 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002685 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002686 result = _string_tailmatch(self, subobj, start, end, -1);
2687 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002688 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002689 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002690 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002691}
2692
2693
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002694PyDoc_STRVAR(endswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002695"B.endswith(suffix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002696\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002697Return True if B ends with the specified suffix, False otherwise.\n\
2698With optional start, test B beginning at that position.\n\
2699With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002700suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002701
2702static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002703string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002704{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002705 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002706 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002707 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002708 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002709
Guido van Rossumc6821402000-05-08 14:08:05 +00002710 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2711 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002712 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002713 if (PyTuple_Check(subobj)) {
2714 Py_ssize_t i;
2715 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2716 result = _string_tailmatch(self,
2717 PyTuple_GET_ITEM(subobj, i),
2718 start, end, +1);
2719 if (result == -1)
2720 return NULL;
2721 else if (result) {
2722 Py_RETURN_TRUE;
2723 }
2724 }
2725 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002726 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002727 result = _string_tailmatch(self, subobj, start, end, +1);
2728 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002729 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002730 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002731 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002732}
2733
2734
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002735PyDoc_STRVAR(decode__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002736"B.decode([encoding[, errors]]) -> object\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002737\n\
2738Decodes S using the codec registered for encoding. encoding defaults\n\
2739to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002740handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2741a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002742as well as any other name registerd with codecs.register_error that is\n\
2743able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002744
2745static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00002746string_decode(PyObject *self, PyObject *args)
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002747{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002748 const char *encoding = NULL;
2749 const char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002750
Guido van Rossum98297ee2007-11-06 21:34:58 +00002751 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2752 return NULL;
2753 if (encoding == NULL)
2754 encoding = PyUnicode_GetDefaultEncoding();
2755 return PyCodec_Decode(self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002756}
2757
2758
Guido van Rossumae404e22007-10-26 21:46:44 +00002759PyDoc_STRVAR(fromhex_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002760"bytes.fromhex(string) -> bytes\n\
Guido van Rossumae404e22007-10-26 21:46:44 +00002761\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002762Create a bytes object from a string of hexadecimal numbers.\n\
2763Spaces between two numbers are accepted.\n\
2764Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Guido van Rossumae404e22007-10-26 21:46:44 +00002765
2766static int
2767hex_digit_to_int(Py_UNICODE c)
2768{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002769 if (c >= 128)
2770 return -1;
2771 if (ISDIGIT(c))
2772 return c - '0';
2773 else {
2774 if (ISUPPER(c))
2775 c = TOLOWER(c);
2776 if (c >= 'a' && c <= 'f')
2777 return c - 'a' + 10;
2778 }
2779 return -1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002780}
2781
2782static PyObject *
2783string_fromhex(PyObject *cls, PyObject *args)
2784{
2785 PyObject *newstring, *hexobj;
2786 char *buf;
2787 Py_UNICODE *hex;
2788 Py_ssize_t hexlen, byteslen, i, j;
2789 int top, bot;
2790
2791 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2792 return NULL;
2793 assert(PyUnicode_Check(hexobj));
2794 hexlen = PyUnicode_GET_SIZE(hexobj);
2795 hex = PyUnicode_AS_UNICODE(hexobj);
2796 byteslen = hexlen/2; /* This overestimates if there are spaces */
2797 newstring = PyString_FromStringAndSize(NULL, byteslen);
2798 if (!newstring)
2799 return NULL;
2800 buf = PyString_AS_STRING(newstring);
2801 for (i = j = 0; i < hexlen; i += 2) {
2802 /* skip over spaces in the input */
2803 while (hex[i] == ' ')
2804 i++;
2805 if (i >= hexlen)
2806 break;
2807 top = hex_digit_to_int(hex[i]);
2808 bot = hex_digit_to_int(hex[i+1]);
2809 if (top == -1 || bot == -1) {
2810 PyErr_Format(PyExc_ValueError,
2811 "non-hexadecimal number found in "
2812 "fromhex() arg at position %zd", i);
2813 goto error;
2814 }
2815 buf[j++] = (top << 4) + bot;
2816 }
2817 if (_PyString_Resize(&newstring, j) < 0)
2818 goto error;
2819 return newstring;
2820
2821 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00002822 Py_XDECREF(newstring);
Guido van Rossumae404e22007-10-26 21:46:44 +00002823 return NULL;
2824}
2825
2826
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002827static PyObject *
2828string_getnewargs(PyStringObject *v)
2829{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002830 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002831}
2832
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002833
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002834static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002835string_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00002836 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002837 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2838 _Py_capitalize__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002839 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002840 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002841 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002842 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002843 endswith__doc__},
2844 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2845 expandtabs__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002846 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002847 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2848 fromhex_doc},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002849 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002850 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2851 _Py_isalnum__doc__},
2852 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2853 _Py_isalpha__doc__},
2854 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2855 _Py_isdigit__doc__},
2856 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2857 _Py_islower__doc__},
2858 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2859 _Py_isspace__doc__},
2860 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2861 _Py_istitle__doc__},
2862 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2863 _Py_isupper__doc__},
2864 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2865 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2866 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002867 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002868 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002869 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2870 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2871 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002872 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00002873 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2874 rpartition__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002875 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2876 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2877 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2878 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2879 splitlines__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002880 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002881 startswith__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002882 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002883 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2884 _Py_swapcase__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002885 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002886 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2887 translate__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002888 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002889 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002890 {NULL, NULL} /* sentinel */
2891};
2892
Jeremy Hylton938ace62002-07-17 16:30:39 +00002893static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002894str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2895
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002896static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002897string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002898{
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002899 PyObject *x = NULL, *it;
2900 PyObject *(*iternext)(PyObject *);
2901 const char *encoding = NULL;
2902 const char *errors = NULL;
2903 PyObject *new = NULL;
2904 Py_ssize_t i, size;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002905 static char *kwlist[] = {"source", "encoding", "errors", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00002906
Guido van Rossumae960af2001-08-30 03:11:59 +00002907 if (type != &PyString_Type)
2908 return str_subtype_new(type, args, kwds);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002909 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002910 &encoding, &errors))
Tim Peters6d6c1a32001-08-02 04:15:00 +00002911 return NULL;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002912 if (x == NULL) {
2913 if (encoding != NULL || errors != NULL) {
2914 PyErr_SetString(PyExc_TypeError,
2915 "encoding or errors without sequence "
2916 "argument");
2917 return NULL;
2918 }
Tim Peters6d6c1a32001-08-02 04:15:00 +00002919 return PyString_FromString("");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002920 }
2921
2922 if (PyUnicode_Check(x)) {
2923 /* Encode via the codec registry */
2924 if (encoding == NULL) {
2925 PyErr_SetString(PyExc_TypeError,
2926 "string argument without an encoding");
2927 return NULL;
2928 }
2929 new = PyCodec_Encode(x, encoding, errors);
2930 if (new == NULL)
2931 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002932 assert(PyString_Check(new));
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002933 return new;
2934 }
2935
2936 /* If it's not unicode, there can't be encoding or errors */
2937 if (encoding != NULL || errors != NULL) {
2938 PyErr_SetString(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002939 "encoding or errors without a string argument");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002940 return NULL;
2941 }
2942
Guido van Rossum98297ee2007-11-06 21:34:58 +00002943 /* Is it an int? */
2944 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2945 if (size == -1 && PyErr_Occurred()) {
2946 PyErr_Clear();
2947 }
2948 else {
2949 if (size < 0) {
2950 PyErr_SetString(PyExc_ValueError, "negative count");
2951 return NULL;
2952 }
2953 new = PyString_FromStringAndSize(NULL, size);
2954 if (new == NULL) {
2955 return NULL;
2956 }
2957 if (size > 0) {
2958 memset(((PyStringObject*)new)->ob_sval, 0, size);
2959 }
2960 return new;
2961 }
2962
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002963 /* Use the modern buffer interface */
2964 if (PyObject_CheckBuffer(x)) {
2965 Py_buffer view;
2966 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2967 return NULL;
2968 new = PyString_FromStringAndSize(NULL, view.len);
2969 if (!new)
2970 goto fail;
2971 // XXX(brett.cannon): Better way to get to internal buffer?
2972 if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
2973 &view, view.len, 'C') < 0)
2974 goto fail;
2975 PyObject_ReleaseBuffer(x, &view);
2976 return new;
2977 fail:
2978 Py_XDECREF(new);
2979 PyObject_ReleaseBuffer(x, &view);
2980 return NULL;
2981 }
2982
Guido van Rossum98297ee2007-11-06 21:34:58 +00002983 /* For iterator version, create a string object and resize as needed */
2984 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2985 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2986 input being a truly long iterator. */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002987 size = 64;
2988 new = PyString_FromStringAndSize(NULL, size);
2989 if (new == NULL)
2990 return NULL;
2991
2992 /* XXX Optimize this if the arguments is a list, tuple */
2993
2994 /* Get the iterator */
2995 it = PyObject_GetIter(x);
2996 if (it == NULL)
2997 goto error;
2998 // XXX(brett.cannon): No API for this?
2999 iternext = *Py_Type(it)->tp_iternext;
3000
3001 /* Run the iterator to exhaustion */
3002 for (i = 0; ; i++) {
3003 PyObject *item;
3004 Py_ssize_t value;
3005
3006 /* Get the next item */
3007 item = iternext(it);
3008 if (item == NULL) {
3009 if (PyErr_Occurred()) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003010 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
3011 goto error;
3012 PyErr_Clear();
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003013 }
3014 break;
3015 }
3016
3017 /* Interpret it as an int (__index__) */
3018 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3019 Py_DECREF(item);
3020 if (value == -1 && PyErr_Occurred())
3021 goto error;
3022
3023 /* Range check */
3024 if (value < 0 || value >= 256) {
3025 PyErr_SetString(PyExc_ValueError,
3026 "bytes must be in range(0, 256)");
3027 goto error;
3028 }
3029
3030 /* Append the byte */
3031 if (i >= size) {
3032 size *= 2;
3033 if (_PyString_Resize(&new, size) < 0)
3034 goto error;
3035 }
3036 ((PyStringObject *)new)->ob_sval[i] = value;
3037 }
3038 _PyString_Resize(&new, i);
3039
3040 /* Clean up and return success */
3041 Py_DECREF(it);
3042 return new;
3043
3044 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00003045 /* Error handling when new != NULL */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003046 Py_XDECREF(it);
3047 Py_DECREF(new);
3048 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003049}
3050
Guido van Rossumae960af2001-08-30 03:11:59 +00003051static PyObject *
3052str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3053{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003054 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003055 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003056
3057 assert(PyType_IsSubtype(type, &PyString_Type));
3058 tmp = string_new(&PyString_Type, args, kwds);
3059 if (tmp == NULL)
3060 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003061 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003062 n = PyString_GET_SIZE(tmp);
3063 pnew = type->tp_alloc(type, n);
3064 if (pnew != NULL) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003065 Py_MEMCPY(PyString_AS_STRING(pnew),
3066 PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003067 ((PyStringObject *)pnew)->ob_shash =
3068 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003069 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003070 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003071 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003072}
3073
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003074PyDoc_STRVAR(string_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00003075"bytes(iterable_of_ints) -> bytes.\n\
3076bytes(string, encoding[, errors]) -> bytes\n\
3077bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
3078bytes(memory_view) -> bytes.\n\
Tim Peters6d6c1a32001-08-02 04:15:00 +00003079\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003080Construct an immutable array of bytes from:\n\
3081 - an iterable yielding integers in range(256)\n\
3082 - a text string encoded using the specified encoding\n\
3083 - a bytes or a buffer object\n\
3084 - any object implementing the buffer API.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003085
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003086static PyObject *str_iter(PyObject *seq);
3087
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003088PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003089 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum98297ee2007-11-06 21:34:58 +00003090 "bytes",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003091 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003092 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003093 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003094 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003095 0, /* tp_getattr */
3096 0, /* tp_setattr */
3097 0, /* tp_compare */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003098 (reprfunc)string_repr, /* tp_repr */
3099 0, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003100 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003101 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003102 (hashfunc)string_hash, /* tp_hash */
3103 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003104 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003105 PyObject_GenericGetAttr, /* tp_getattro */
3106 0, /* tp_setattro */
3107 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003108 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3109 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003110 string_doc, /* tp_doc */
3111 0, /* tp_traverse */
3112 0, /* tp_clear */
3113 (richcmpfunc)string_richcompare, /* tp_richcompare */
3114 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003115 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003116 0, /* tp_iternext */
3117 string_methods, /* tp_methods */
3118 0, /* tp_members */
3119 0, /* tp_getset */
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003120 &PyBaseObject_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003121 0, /* tp_dict */
3122 0, /* tp_descr_get */
3123 0, /* tp_descr_set */
3124 0, /* tp_dictoffset */
3125 0, /* tp_init */
3126 0, /* tp_alloc */
3127 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003128 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003129};
3130
3131void
Fred Drakeba096332000-07-09 07:04:36 +00003132PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003133{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003134 register PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003135 assert(pv != NULL);
Guido van Rossum013142a1994-08-30 08:19:36 +00003136 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003137 return;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003138 if (w == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003139 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003140 *pv = NULL;
3141 return;
3142 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00003143 v = string_concat(*pv, w);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003144 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003145 *pv = v;
3146}
3147
Guido van Rossum013142a1994-08-30 08:19:36 +00003148void
Fred Drakeba096332000-07-09 07:04:36 +00003149PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003150{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003151 PyString_Concat(pv, w);
3152 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003153}
3154
3155
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003156/* The following function breaks the notion that strings are immutable:
3157 it changes the size of a string. We get away with this only if there
3158 is only one module referencing the object. You can also think of it
3159 as creating a new string object and destroying the old one, only
3160 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003161 already be known to some other part of the code...
3162 Note that if there's not enough memory to resize the string, the original
3163 string object at *pv is deallocated, *pv is set to NULL, an "out of
3164 memory" exception is set, and -1 is returned. Else (on success) 0 is
3165 returned, and the value in *pv may or may not be the same as on input.
3166 As always, an extra byte is allocated for a trailing \0 byte (newsize
3167 does *not* include that), and a trailing \0 byte is stored.
3168*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003169
3170int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003171_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003172{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003173 register PyObject *v;
3174 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003175 v = *pv;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003176 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003177 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003178 Py_DECREF(v);
3179 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003180 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003181 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003182 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003183 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003184 _Py_ForgetReference(v);
3185 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003186 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003187 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003188 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003189 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003190 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003191 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003192 _Py_NewReference(*pv);
3193 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003194 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003195 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003196 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003197 return 0;
3198}
Guido van Rossume5372401993-03-16 12:15:04 +00003199
Tim Peters38fd5b62000-09-21 05:43:11 +00003200/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3201 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3202 * Python's regular ints.
3203 * Return value: a new PyString*, or NULL if error.
3204 * . *pbuf is set to point into it,
3205 * *plen set to the # of chars following that.
3206 * Caller must decref it when done using pbuf.
3207 * The string starting at *pbuf is of the form
3208 * "-"? ("0x" | "0X")? digit+
3209 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003210 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003211 * There will be at least prec digits, zero-filled on the left if
3212 * necessary to get that many.
3213 * val object to be converted
3214 * flags bitmask of format flags; only F_ALT is looked at
3215 * prec minimum number of digits; 0-fill on left if needed
3216 * type a character in [duoxX]; u acts the same as d
3217 *
3218 * CAUTION: o, x and X conversions on regular ints can never
3219 * produce a '-' sign, but can for Python's unbounded ints.
3220 */
3221PyObject*
3222_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3223 char **pbuf, int *plen)
3224{
3225 PyObject *result = NULL;
3226 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003227 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003228 int sign; /* 1 if '-', else 0 */
3229 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003230 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003231 int numdigits; /* len == numnondigits + numdigits */
3232 int numnondigits = 0;
3233
Guido van Rossumddefaf32007-01-14 03:31:43 +00003234 /* Avoid exceeding SSIZE_T_MAX */
3235 if (prec > PY_SSIZE_T_MAX-3) {
3236 PyErr_SetString(PyExc_OverflowError,
3237 "precision too large");
3238 return NULL;
3239 }
3240
Tim Peters38fd5b62000-09-21 05:43:11 +00003241 switch (type) {
3242 case 'd':
3243 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00003244 /* Special-case boolean: we want 0/1 */
3245 if (PyBool_Check(val))
3246 result = PyNumber_ToBase(val, 10);
3247 else
3248 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00003249 break;
3250 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003251 numnondigits = 2;
3252 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00003253 break;
3254 case 'x':
3255 case 'X':
3256 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003257 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00003258 break;
3259 default:
3260 assert(!"'type' not in [duoxX]");
3261 }
3262 if (!result)
3263 return NULL;
3264
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00003265 buf = PyString_AsString(result);
3266 if (!buf) {
3267 Py_DECREF(result);
3268 return NULL;
3269 }
3270
Tim Peters38fd5b62000-09-21 05:43:11 +00003271 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003272 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003273 PyErr_BadInternalCall();
3274 return NULL;
3275 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00003276 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00003277 if (llen > INT_MAX) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003278 PyErr_SetString(PyExc_ValueError,
3279 "string too large in _PyString_FormatLong");
Martin v. Löwis725507b2006-03-07 12:08:51 +00003280 return NULL;
3281 }
3282 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003283 if (buf[len-1] == 'L') {
3284 --len;
3285 buf[len] = '\0';
3286 }
3287 sign = buf[0] == '-';
3288 numnondigits += sign;
3289 numdigits = len - numnondigits;
3290 assert(numdigits > 0);
3291
Tim Petersfff53252001-04-12 18:38:48 +00003292 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003293 if (((flags & F_ALT) == 0 &&
3294 (type == 'o' || type == 'x' || type == 'X'))) {
3295 assert(buf[sign] == '0');
3296 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
Guido van Rossum98297ee2007-11-06 21:34:58 +00003297 buf[sign+1] == 'o');
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003298 numnondigits -= 2;
3299 buf += 2;
3300 len -= 2;
3301 if (sign)
3302 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00003303 assert(len == numnondigits + numdigits);
3304 assert(numdigits > 0);
3305 }
3306
3307 /* Fill with leading zeroes to meet minimum width. */
3308 if (prec > numdigits) {
3309 PyObject *r1 = PyString_FromStringAndSize(NULL,
3310 numnondigits + prec);
3311 char *b1;
3312 if (!r1) {
3313 Py_DECREF(result);
3314 return NULL;
3315 }
3316 b1 = PyString_AS_STRING(r1);
3317 for (i = 0; i < numnondigits; ++i)
3318 *b1++ = *buf++;
3319 for (i = 0; i < prec - numdigits; i++)
3320 *b1++ = '0';
3321 for (i = 0; i < numdigits; i++)
3322 *b1++ = *buf++;
3323 *b1 = '\0';
3324 Py_DECREF(result);
3325 result = r1;
3326 buf = PyString_AS_STRING(result);
3327 len = numnondigits + prec;
3328 }
3329
3330 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003331 if (type == 'X') {
3332 /* Need to convert all lower case letters to upper case.
3333 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003334 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003335 if (buf[i] >= 'a' && buf[i] <= 'x')
3336 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003337 }
3338 *pbuf = buf;
3339 *plen = len;
3340 return result;
3341}
3342
Guido van Rossum8cf04761997-08-02 02:57:45 +00003343void
Fred Drakeba096332000-07-09 07:04:36 +00003344PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003345{
3346 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003347 for (i = 0; i < UCHAR_MAX + 1; i++) {
3348 Py_XDECREF(characters[i]);
3349 characters[i] = NULL;
3350 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003351 Py_XDECREF(nullstring);
3352 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003353}
Barry Warsawa903ad982001-02-23 16:40:48 +00003354
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003355/*********************** Str Iterator ****************************/
3356
3357typedef struct {
3358 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00003359 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003360 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
3361} striterobject;
3362
3363static void
3364striter_dealloc(striterobject *it)
3365{
3366 _PyObject_GC_UNTRACK(it);
3367 Py_XDECREF(it->it_seq);
3368 PyObject_GC_Del(it);
3369}
3370
3371static int
3372striter_traverse(striterobject *it, visitproc visit, void *arg)
3373{
3374 Py_VISIT(it->it_seq);
3375 return 0;
3376}
3377
3378static PyObject *
3379striter_next(striterobject *it)
3380{
3381 PyStringObject *seq;
3382 PyObject *item;
3383
3384 assert(it != NULL);
3385 seq = it->it_seq;
3386 if (seq == NULL)
3387 return NULL;
3388 assert(PyString_Check(seq));
3389
3390 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +00003391 item = PyInt_FromLong(
3392 (unsigned char)seq->ob_sval[it->it_index]);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003393 if (item != NULL)
3394 ++it->it_index;
3395 return item;
3396 }
3397
3398 Py_DECREF(seq);
3399 it->it_seq = NULL;
3400 return NULL;
3401}
3402
3403static PyObject *
3404striter_len(striterobject *it)
3405{
3406 Py_ssize_t len = 0;
3407 if (it->it_seq)
3408 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
3409 return PyInt_FromSsize_t(len);
3410}
3411
Guido van Rossum49d6b072006-08-17 21:11:47 +00003412PyDoc_STRVAR(length_hint_doc,
3413 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003414
3415static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00003416 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3417 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003418 {NULL, NULL} /* sentinel */
3419};
3420
3421PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003422 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum49d6b072006-08-17 21:11:47 +00003423 "striterator", /* tp_name */
3424 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003425 0, /* tp_itemsize */
3426 /* methods */
3427 (destructor)striter_dealloc, /* tp_dealloc */
3428 0, /* tp_print */
3429 0, /* tp_getattr */
3430 0, /* tp_setattr */
3431 0, /* tp_compare */
3432 0, /* tp_repr */
3433 0, /* tp_as_number */
3434 0, /* tp_as_sequence */
3435 0, /* tp_as_mapping */
3436 0, /* tp_hash */
3437 0, /* tp_call */
3438 0, /* tp_str */
3439 PyObject_GenericGetAttr, /* tp_getattro */
3440 0, /* tp_setattro */
3441 0, /* tp_as_buffer */
3442 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3443 0, /* tp_doc */
3444 (traverseproc)striter_traverse, /* tp_traverse */
3445 0, /* tp_clear */
3446 0, /* tp_richcompare */
3447 0, /* tp_weaklistoffset */
3448 PyObject_SelfIter, /* tp_iter */
3449 (iternextfunc)striter_next, /* tp_iternext */
3450 striter_methods, /* tp_methods */
3451 0,
3452};
3453
3454static PyObject *
3455str_iter(PyObject *seq)
3456{
3457 striterobject *it;
3458
3459 if (!PyString_Check(seq)) {
3460 PyErr_BadInternalCall();
3461 return NULL;
3462 }
3463 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
3464 if (it == NULL)
3465 return NULL;
3466 it->it_index = 0;
3467 Py_INCREF(seq);
3468 it->it_seq = (PyStringObject *)seq;
3469 _PyObject_GC_TRACK(it);
3470 return (PyObject *)it;
3471}