blob: 370ac41f3d01c8889a7889f92b6822a1b55169b3 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossum98297ee2007-11-06 21:34:58 +00003/* XXX This is now called 'bytes' as far as the user is concerned.
4 Many docstrings and error messages need to be cleaned up. */
5
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00006#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00007
Guido van Rossumc0b618a1997-05-02 03:12:38 +00008#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009
Gregory P. Smith60d241f2007-10-16 06:31:30 +000010#include "bytes_methods.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000011
Guido van Rossum98297ee2007-11-06 21:34:58 +000012static Py_ssize_t
13_getbuffer(PyObject *obj, Py_buffer *view)
14{
Christian Heimes90aa7642007-12-19 02:45:37 +000015 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossum98297ee2007-11-06 21:34:58 +000016
17 if (buffer == NULL || buffer->bf_getbuffer == NULL)
18 {
19 PyErr_Format(PyExc_TypeError,
20 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000021 Py_TYPE(obj)->tp_name);
Guido van Rossum98297ee2007-11-06 21:34:58 +000022 return -1;
23 }
24
25 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
26 return -1;
27 return view->len;
28}
29
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000030#ifdef COUNT_ALLOCS
31int null_strings, one_strings;
32#endif
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000035static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036
37/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000038 For both PyString_FromString() and PyString_FromStringAndSize(), the
39 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000040 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000041
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000042 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000043 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 For PyString_FromStringAndSize(), the parameter the parameter `str' is
46 either NULL or else points to a string containing at least `size' bytes.
47 For PyString_FromStringAndSize(), the string in the `str' parameter does
48 not have to be null-terminated. (Therefore it is safe to construct a
49 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
50 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
51 bytes (setting the last byte to the null terminating character) and you can
52 fill in the data yourself. If `str' is non-NULL then the resulting
53 PyString object must be treated as immutable and you must not fill in nor
54 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000055
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000056 The PyObject member `op->ob_size', which denotes the number of "extra
57 items" in a variable-size object, will contain the number of bytes
58 allocated for string data, not counting the null terminating character. It
59 is therefore equal to the equal to the `size' parameter (for
60 PyString_FromStringAndSize()) or the length of the string in the `str'
61 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000064PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065{
Tim Peters9e897f42001-05-09 07:37:07 +000066 register PyStringObject *op;
Christian Heimes33fe8092008-04-13 13:53:33 +000067 if (size < 0) {
68 PyErr_SetString(PyExc_SystemError,
69 "Negative size passed to PyString_FromStringAndSize");
70 return NULL;
71 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 if (size == 0 && (op = nullstring) != NULL) {
73#ifdef COUNT_ALLOCS
74 null_strings++;
75#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 Py_INCREF(op);
77 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000079 if (size == 1 && str != NULL &&
80 (op = characters[*str & UCHAR_MAX]) != NULL)
81 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082#ifdef COUNT_ALLOCS
83 one_strings++;
84#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 Py_INCREF(op);
86 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000088
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000089 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000090 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000091 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000093 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 op->ob_shash = -1;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000095 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000096 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000097 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000098 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000099 if (size == 0) {
100 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000102 } else if (size == 1 && str != NULL) {
103 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000105 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000106 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107}
108
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000109PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000110PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000111{
Tim Peters62de65b2001-12-06 20:29:32 +0000112 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000113 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000114
115 assert(str != NULL);
116 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000117 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000118 PyErr_SetString(PyExc_OverflowError,
119 "string is too long for a Python string");
120 return NULL;
121 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000122 if (size == 0 && (op = nullstring) != NULL) {
123#ifdef COUNT_ALLOCS
124 null_strings++;
125#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000128 }
129 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
130#ifdef COUNT_ALLOCS
131 one_strings++;
132#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 Py_INCREF(op);
134 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000136
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000137 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000138 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000139 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000140 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000141 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 op->ob_shash = -1;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000143 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000144 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 if (size == 0) {
146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
149 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Barry Warsawdadace02001-08-24 18:32:06 +0000155PyObject *
156PyString_FromFormatV(const char *format, va_list vargs)
157{
Tim Petersc15c4f12001-10-02 21:32:07 +0000158 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000159 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000160 const char* f;
161 char *s;
162 PyObject* string;
163
Tim Petersc15c4f12001-10-02 21:32:07 +0000164#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000165 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000166#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000167#ifdef __va_copy
168 __va_copy(count, vargs);
169#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000170 count = vargs;
171#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000172#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000177 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000178 ;
179
Thomas Wouters477c8d52006-05-27 19:21:47 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000185 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000186
Barry Warsawdadace02001-08-24 18:32:06 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000194 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000195 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000210 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000220 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
227 expand:
228 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000231 string = PyString_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000234
Barry Warsawdadace02001-08-24 18:32:06 +0000235 s = PyString_AsString(string);
236
237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000240 Py_ssize_t i;
241 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000242 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000246 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000251 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000252 n = (n*10) + *f++ - '0';
253 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000254 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000255 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000259 longflag = 1;
260 ++f;
261 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000262 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000267
Barry Warsawdadace02001-08-24 18:32:06 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000275 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000289 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000307 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
Barry Warsawdadace02001-08-24 18:32:06 +0000320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000333
Barry Warsawdadace02001-08-24 18:32:06 +0000334 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000335 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000336 return string;
337}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338
Barry Warsawdadace02001-08-24 18:32:06 +0000339PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000340PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000341{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000342 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000343 va_list vargs;
344
345#ifdef HAVE_STDARG_PROTOTYPES
346 va_start(vargs, format);
347#else
348 va_start(vargs);
349#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000350 ret = PyString_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000353}
354
Guido van Rossum234f9421993-06-17 12:35:49 +0000355static void
Fred Drakeba096332000-07-09 07:04:36 +0000356string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000357{
Christian Heimes90aa7642007-12-19 02:45:37 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000359}
360
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000367 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000368 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000369 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000370 const char *recode_encoding)
371{
372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000377 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000378 if (v == NULL)
379 return NULL;
380 p = buf = PyString_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000384 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000389 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
395
396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
401
402 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000403 assert(PyString_Check(w));
404 r = PyString_AS_STRING(w);
405 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000406 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000413 continue;
414 }
415 s++;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000416 if (s==end) {
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000437 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000438 c = (c<<3) + *s++ - '0';
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000439 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000445 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000449 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000450 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000451 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000458 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000459 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000460 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000468 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000469 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000470 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000478 "decoding error; unknown "
479 "error handling code: %.400s",
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000480 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000481 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000482 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000483 default:
484 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000485 s--;
486 goto non_esc; /* an arbitry number of unescaped
487 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000488 }
489 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000490 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000491 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000492 return v;
493 failed:
494 Py_DECREF(v);
495 return NULL;
496}
497
Thomas Wouters477c8d52006-05-27 19:21:47 +0000498/* -------------------------------------------------------------------- */
499/* object api */
500
Martin v. Löwis18e16552006-02-15 17:27:45 +0000501Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000502PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000503{
Alexandre Vassalottiad433db2008-01-07 02:06:10 +0000504 if (!PyString_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000509 return Py_SIZE(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510}
511
Alexandre Vassalottiad433db2008-01-07 02:06:10 +0000512char *
Fred Drakeba096332000-07-09 07:04:36 +0000513PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514{
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000515 if (!PyString_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
Christian Heimes90aa7642007-12-19 02:45:37 +0000517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000518 return NULL;
519 }
Alexandre Vassalottiad433db2008-01-07 02:06:10 +0000520 return ((PyStringObject *)op)->ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521}
522
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000523int
524PyString_AsStringAndSize(register PyObject *obj,
525 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000526 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000527{
528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
532
533 if (!PyString_Check(obj)) {
Christian Heimesf3863112007-11-22 07:46:41 +0000534 PyErr_Format(PyExc_TypeError,
Christian Heimes90aa7642007-12-19 02:45:37 +0000535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
Christian Heimesf3863112007-11-22 07:46:41 +0000536 return -1;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000537 }
538
539 *s = PyString_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000542 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000543 PyErr_SetString(PyExc_TypeError,
Amaury Forgeot d'Arce1657272007-11-22 21:42:04 +0000544 "expected bytes with no null");
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000545 return -1;
546 }
547 return 0;
548}
549
Thomas Wouters477c8d52006-05-27 19:21:47 +0000550/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000551/* Methods */
552
Thomas Wouters477c8d52006-05-27 19:21:47 +0000553#define STRINGLIB_CHAR char
554
555#define STRINGLIB_CMP memcmp
556#define STRINGLIB_LEN PyString_GET_SIZE
557#define STRINGLIB_NEW PyString_FromStringAndSize
558#define STRINGLIB_STR PyString_AS_STRING
Guido van Rossum98297ee2007-11-06 21:34:58 +0000559/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000560
561#define STRINGLIB_EMPTY nullstring
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000562#define STRINGLIB_CHECK_EXACT PyString_CheckExact
563#define STRINGLIB_MUTABLE 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000564
565#include "stringlib/fastsearch.h"
566
567#include "stringlib/count.h"
568#include "stringlib/find.h"
569#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000570#include "stringlib/ctype.h"
571#include "stringlib/transmogrify.h"
Thomas Wouters477c8d52006-05-27 19:21:47 +0000572
573
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000574PyObject *
575PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000576{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000577 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000578 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimes90aa7642007-12-19 02:45:37 +0000579 Py_ssize_t length = Py_SIZE(op);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000580 size_t newsize = 3 + 4 * length;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000581 PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000582 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000583 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000584 "bytes object is too large to make repr");
Guido van Rossum58da9312007-11-10 23:39:45 +0000585 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000586 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000587 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000588 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000589 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000590 }
591 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000592 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000593 register Py_UNICODE c;
594 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000595 int quote;
596
Guido van Rossum98297ee2007-11-06 21:34:58 +0000597 /* Figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000598 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000599 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +0000600 char *test, *start;
601 start = PyString_AS_STRING(op);
602 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000603 if (*test == '"') {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000604 quote = '\''; /* back to single */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000605 goto decided;
606 }
607 else if (*test == '\'')
608 quote = '"';
609 }
610 decided:
611 ;
612 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000613
Guido van Rossum98297ee2007-11-06 21:34:58 +0000614 *p++ = 'b', *p++ = quote;
615 for (i = 0; i < length; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000616 /* There's at least enough room for a hex escape
617 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000618 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000619 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000620 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000621 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000622 else if (c == '\t')
623 *p++ = '\\', *p++ = 't';
624 else if (c == '\n')
625 *p++ = '\\', *p++ = 'n';
626 else if (c == '\r')
627 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000628 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000629 *p++ = '\\';
630 *p++ = 'x';
631 *p++ = hexdigits[(c & 0xf0) >> 4];
632 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000633 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000634 else
635 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000636 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000637 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000638 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000640 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
641 Py_DECREF(v);
642 return NULL;
643 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000644 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000645 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000646}
647
Guido van Rossum189f1df2001-05-01 16:51:53 +0000648static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000649string_repr(PyObject *op)
650{
651 return PyString_Repr(op, 1);
652}
653
654static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000655string_str(PyObject *op)
Guido van Rossum189f1df2001-05-01 16:51:53 +0000656{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000657 if (Py_BytesWarningFlag) {
658 if (PyErr_WarnEx(PyExc_BytesWarning,
659 "str() on a bytes instance", 1))
660 return NULL;
Tim Petersc9933152001-10-16 20:18:24 +0000661 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000662 return string_repr(op);
Guido van Rossum189f1df2001-05-01 16:51:53 +0000663}
664
Martin v. Löwis18e16552006-02-15 17:27:45 +0000665static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000666string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000667{
Christian Heimes90aa7642007-12-19 02:45:37 +0000668 return Py_SIZE(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669}
670
Guido van Rossum98297ee2007-11-06 21:34:58 +0000671/* This is also used by PyString_Concat() */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000672static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000673string_concat(PyObject *a, PyObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000674{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000675 Py_ssize_t size;
676 Py_buffer va, vb;
677 PyObject *result = NULL;
678
679 va.len = -1;
680 vb.len = -1;
681 if (_getbuffer(a, &va) < 0 ||
682 _getbuffer(b, &vb) < 0) {
683 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
Christian Heimes90aa7642007-12-19 02:45:37 +0000684 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000685 goto done;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000686 }
Guido van Rossumae404e22007-10-26 21:46:44 +0000687
Guido van Rossum98297ee2007-11-06 21:34:58 +0000688 /* Optimize end cases */
689 if (va.len == 0 && PyString_CheckExact(b)) {
690 result = b;
691 Py_INCREF(result);
692 goto done;
693 }
694 if (vb.len == 0 && PyString_CheckExact(a)) {
695 result = a;
696 Py_INCREF(result);
697 goto done;
698 }
699
700 size = va.len + vb.len;
701 if (size < 0) {
702 PyErr_NoMemory();
703 goto done;
704 }
705
706 result = PyString_FromStringAndSize(NULL, size);
707 if (result != NULL) {
708 memcpy(PyString_AS_STRING(result), va.buf, va.len);
709 memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
710 }
711
712 done:
713 if (va.len != -1)
714 PyObject_ReleaseBuffer(a, &va);
715 if (vb.len != -1)
716 PyObject_ReleaseBuffer(b, &vb);
717 return result;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718}
719
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000721string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000723 register Py_ssize_t i;
724 register Py_ssize_t j;
725 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000726 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000727 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000728 if (n < 0)
729 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000730 /* watch out for overflows: the size can overflow int,
731 * and the # of bytes needed can overflow size_t
732 */
Christian Heimes90aa7642007-12-19 02:45:37 +0000733 size = Py_SIZE(a) * n;
734 if (n && size / n != Py_SIZE(a)) {
Tim Peters8f422462000-09-09 06:13:41 +0000735 PyErr_SetString(PyExc_OverflowError,
736 "repeated string is too long");
737 return NULL;
738 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000739 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000740 Py_INCREF(a);
741 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000742 }
Tim Peterse7c05322004-06-27 17:24:49 +0000743 nbytes = (size_t)size;
744 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000745 PyErr_SetString(PyExc_OverflowError,
746 "repeated string is too long");
747 return NULL;
748 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000749 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000750 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000751 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000752 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000753 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000754 op->ob_shash = -1;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000755 op->ob_sval[size] = '\0';
Christian Heimes90aa7642007-12-19 02:45:37 +0000756 if (Py_SIZE(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000757 memset(op->ob_sval, a->ob_sval[0] , n);
758 return (PyObject *) op;
759 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000760 i = 0;
761 if (i < size) {
Christian Heimes90aa7642007-12-19 02:45:37 +0000762 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
763 i = Py_SIZE(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000764 }
765 while (i < size) {
766 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000767 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +0000768 i += j;
769 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000770 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000771}
772
Guido van Rossum9284a572000-03-07 15:53:43 +0000773static int
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774string_contains(PyObject *self, PyObject *arg)
Guido van Rossum9284a572000-03-07 15:53:43 +0000775{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000776 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
777 if (ival == -1 && PyErr_Occurred()) {
778 Py_buffer varg;
779 int pos;
780 PyErr_Clear();
781 if (_getbuffer(arg, &varg) < 0)
782 return -1;
Christian Heimes90aa7642007-12-19 02:45:37 +0000783 pos = stringlib_find(PyString_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000784 varg.buf, varg.len, 0);
785 PyObject_ReleaseBuffer(arg, &varg);
786 return pos >= 0;
787 }
788 if (ival < 0 || ival >= 256) {
789 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
790 return -1;
791 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000792
Christian Heimes90aa7642007-12-19 02:45:37 +0000793 return memchr(PyString_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000794}
795
796static PyObject *
797string_item(PyStringObject *a, register Py_ssize_t i)
798{
Christian Heimes90aa7642007-12-19 02:45:37 +0000799 if (i < 0 || i >= Py_SIZE(a)) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000800 PyErr_SetString(PyExc_IndexError, "string index out of range");
801 return NULL;
802 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000803 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Guido van Rossum9284a572000-03-07 15:53:43 +0000804}
805
Martin v. Löwiscd353062001-05-24 16:56:35 +0000806static PyObject*
807string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000808{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000809 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000810 Py_ssize_t len_a, len_b;
811 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +0000812 PyObject *result;
813
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000814 /* Make sure both arguments are strings. */
815 if (!(PyString_Check(a) && PyString_Check(b))) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000816 if (Py_BytesWarningFlag && (op == Py_EQ) &&
817 (PyObject_IsInstance((PyObject*)a,
818 (PyObject*)&PyUnicode_Type) ||
819 PyObject_IsInstance((PyObject*)b,
820 (PyObject*)&PyUnicode_Type))) {
821 if (PyErr_WarnEx(PyExc_BytesWarning,
822 "Comparsion between bytes and string", 1))
823 return NULL;
824 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000825 result = Py_NotImplemented;
826 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000827 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000828 if (a == b) {
829 switch (op) {
830 case Py_EQ:case Py_LE:case Py_GE:
831 result = Py_True;
832 goto out;
833 case Py_NE:case Py_LT:case Py_GT:
834 result = Py_False;
835 goto out;
836 }
837 }
838 if (op == Py_EQ) {
839 /* Supporting Py_NE here as well does not save
840 much time, since Py_NE is rarely used. */
Christian Heimes90aa7642007-12-19 02:45:37 +0000841 if (Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +0000842 && (a->ob_sval[0] == b->ob_sval[0]
Christian Heimes90aa7642007-12-19 02:45:37 +0000843 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000844 result = Py_True;
845 } else {
846 result = Py_False;
847 }
848 goto out;
849 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000850 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +0000851 min_len = (len_a < len_b) ? len_a : len_b;
852 if (min_len > 0) {
853 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
854 if (c==0)
855 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +0000856 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +0000857 c = 0;
858 if (c == 0)
859 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
860 switch (op) {
861 case Py_LT: c = c < 0; break;
862 case Py_LE: c = c <= 0; break;
863 case Py_EQ: assert(0); break; /* unreachable */
864 case Py_NE: c = c != 0; break;
865 case Py_GT: c = c > 0; break;
866 case Py_GE: c = c >= 0; break;
867 default:
868 result = Py_NotImplemented;
869 goto out;
870 }
871 result = c ? Py_True : Py_False;
872 out:
873 Py_INCREF(result);
874 return result;
875}
876
Guido van Rossum9bfef441993-03-29 10:43:31 +0000877static long
Fred Drakeba096332000-07-09 07:04:36 +0000878string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000879{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000880 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000881 register unsigned char *p;
882 register long x;
883
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000884 if (a->ob_shash != -1)
885 return a->ob_shash;
Christian Heimes90aa7642007-12-19 02:45:37 +0000886 len = Py_SIZE(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000887 p = (unsigned char *) a->ob_sval;
888 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000889 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000890 x = (1000003*x) ^ *p++;
Christian Heimes90aa7642007-12-19 02:45:37 +0000891 x ^= Py_SIZE(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +0000892 if (x == -1)
893 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000894 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000895 return x;
896}
897
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000898static PyObject*
899string_subscript(PyStringObject* self, PyObject* item)
900{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000901 if (PyIndex_Check(item)) {
902 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000903 if (i == -1 && PyErr_Occurred())
904 return NULL;
905 if (i < 0)
906 i += PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000907 if (i < 0 || i >= PyString_GET_SIZE(self)) {
Guido van Rossum75a902d2007-10-19 22:06:24 +0000908 PyErr_SetString(PyExc_IndexError,
909 "string index out of range");
910 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000911 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000912 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000913 }
914 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000915 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000916 char* source_buf;
917 char* result_buf;
918 PyObject* result;
919
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000920 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000921 PyString_GET_SIZE(self),
922 &start, &stop, &step, &slicelength) < 0) {
923 return NULL;
924 }
925
926 if (slicelength <= 0) {
927 return PyString_FromStringAndSize("", 0);
928 }
Thomas Woutersed03b412007-08-28 21:37:11 +0000929 else if (start == 0 && step == 1 &&
930 slicelength == PyString_GET_SIZE(self) &&
931 PyString_CheckExact(self)) {
932 Py_INCREF(self);
933 return (PyObject *)self;
934 }
935 else if (step == 1) {
936 return PyString_FromStringAndSize(
937 PyString_AS_STRING(self) + start,
938 slicelength);
939 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000940 else {
941 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000942 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +0000943 if (result_buf == NULL)
944 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000945
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000946 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000947 cur += step, i++) {
948 result_buf[i] = source_buf[cur];
949 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000950
951 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000952 slicelength);
953 PyMem_Free(result_buf);
954 return result;
955 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000956 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000957 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000958 PyErr_Format(PyExc_TypeError,
959 "string indices must be integers, not %.200s",
Christian Heimes90aa7642007-12-19 02:45:37 +0000960 Py_TYPE(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000961 return NULL;
962 }
963}
964
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000965static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000966string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000967{
Christian Heimes90aa7642007-12-19 02:45:37 +0000968 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000969 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +0000970}
971
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000972static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000973 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000974 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +0000975 (ssizeargfunc)string_repeat, /*sq_repeat*/
Guido van Rossum98297ee2007-11-06 21:34:58 +0000976 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +0000977 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000978 0, /*sq_ass_item*/
979 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000980 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000981};
982
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000983static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000984 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000985 (binaryfunc)string_subscript,
986 0,
987};
988
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000989static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000990 (getbufferproc)string_buffer_getbuffer,
Guido van Rossum98297ee2007-11-06 21:34:58 +0000991 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000992};
993
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000994
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000995#define LEFTSTRIP 0
996#define RIGHTSTRIP 1
997#define BOTHSTRIP 2
998
Guido van Rossum018b0eb2002-04-13 00:56:08 +0000999/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001000static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1001
1002#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001003
Thomas Wouters477c8d52006-05-27 19:21:47 +00001004
1005/* Don't call if length < 2 */
1006#define Py_STRING_MATCH(target, offset, pattern, length) \
1007 (target[offset] == pattern[0] && \
1008 target[offset+length-1] == pattern[length-1] && \
1009 !memcmp(target+offset+1, pattern+1, length-2) )
1010
1011
1012/* Overallocate the initial list to reduce the number of reallocs for small
1013 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1014 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1015 text (roughly 11 words per line) and field delimited data (usually 1-10
1016 fields). For large strings the split algorithms are bandwidth limited
1017 so increasing the preallocation likely will not improve things.*/
1018
1019#define MAX_PREALLOC 12
1020
1021/* 5 splits gives 6 elements */
1022#define PREALLOC_SIZE(maxsplit) \
1023 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1024
Thomas Wouters477c8d52006-05-27 19:21:47 +00001025#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001026 str = PyString_FromStringAndSize((data) + (left), \
1027 (right) - (left)); \
1028 if (str == NULL) \
1029 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001030 if (count < MAX_PREALLOC) { \
1031 PyList_SET_ITEM(list, count, str); \
1032 } else { \
1033 if (PyList_Append(list, str)) { \
1034 Py_DECREF(str); \
1035 goto onError; \
1036 } \
1037 else \
1038 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001039 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001040 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001041
Thomas Wouters477c8d52006-05-27 19:21:47 +00001042/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001043#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001044
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001045#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1046#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1047#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1048#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001049
1050Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001051split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052{
Christian Heimes895627f2007-12-08 17:28:33 +00001053 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001054 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001055 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001056 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001057
1058 if (list == NULL)
1059 return NULL;
1060
Thomas Wouters477c8d52006-05-27 19:21:47 +00001061 i = j = 0;
1062
1063 while (maxsplit-- > 0) {
1064 SKIP_SPACE(s, i, len);
1065 if (i==len) break;
1066 j = i; i++;
1067 SKIP_NONSPACE(s, i, len);
Christian Heimes895627f2007-12-08 17:28:33 +00001068 if (j == 0 && i == len && PyString_CheckExact(self)) {
1069 /* No whitespace in self, so just use it as list[0] */
1070 Py_INCREF(self);
1071 PyList_SET_ITEM(list, 0, (PyObject *)self);
1072 count++;
1073 break;
1074 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001075 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001076 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001077
1078 if (i < len) {
1079 /* Only occurs when maxsplit was reached */
1080 /* Skip any remaining whitespace and copy to end of string */
1081 SKIP_SPACE(s, i, len);
1082 if (i != len)
1083 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001084 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001085 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001086 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001087 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001088 Py_DECREF(list);
1089 return NULL;
1090}
1091
Thomas Wouters477c8d52006-05-27 19:21:47 +00001092Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001093split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001094{
Christian Heimes895627f2007-12-08 17:28:33 +00001095 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001096 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001097 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001098 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001099
1100 if (list == NULL)
1101 return NULL;
1102
Thomas Wouters477c8d52006-05-27 19:21:47 +00001103 i = j = 0;
1104 while ((j < len) && (maxcount-- > 0)) {
1105 for(; j<len; j++) {
1106 /* I found that using memchr makes no difference */
1107 if (s[j] == ch) {
1108 SPLIT_ADD(s, i, j);
1109 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001110 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001111 }
1112 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001113 }
Christian Heimes895627f2007-12-08 17:28:33 +00001114 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1115 /* ch not in self, so just use self as list[0] */
1116 Py_INCREF(self);
1117 PyList_SET_ITEM(list, 0, (PyObject *)self);
1118 count++;
1119 }
1120 else if (i <= len) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001121 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001122 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001123 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001124 return list;
1125
1126 onError:
1127 Py_DECREF(list);
1128 return NULL;
1129}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001130
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001131PyDoc_STRVAR(split__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001132"B.split([sep[, maxsplit]]) -> list of bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001133\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001134Return a list of the sections in B, using sep as the delimiter.\n\
1135If sep is not given, B is split on ASCII whitespace characters\n\
1136(space, tab, return, newline, formfeed, vertical tab).\n\
1137If maxsplit is given, at most maxsplit splits are done.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001138
1139static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001140string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001141{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001142 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001143 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001144 const char *s = PyString_AS_STRING(self), *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001145 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001146 PyObject *list, *str, *subobj = Py_None;
1147#ifdef USE_FAST
1148 Py_ssize_t pos;
1149#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001150
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001151 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001152 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001153 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001154 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001155 if (subobj == Py_None)
Christian Heimes895627f2007-12-08 17:28:33 +00001156 return split_whitespace(self, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001157 if (_getbuffer(subobj, &vsub) < 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001158 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001159 sub = vsub.buf;
1160 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001161
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001162 if (n == 0) {
1163 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001164 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001165 return NULL;
1166 }
Christian Heimes895627f2007-12-08 17:28:33 +00001167 else if (n == 1)
1168 return split_char(self, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001169
Thomas Wouters477c8d52006-05-27 19:21:47 +00001170 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001171 if (list == NULL) {
1172 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001173 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001174 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001175
Thomas Wouters477c8d52006-05-27 19:21:47 +00001176#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001177 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001178 while (maxsplit-- > 0) {
1179 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1180 if (pos < 0)
1181 break;
1182 j = i+pos;
1183 SPLIT_ADD(s, i, j);
1184 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001185 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001186#else
1187 i = j = 0;
1188 while ((j+n <= len) && (maxsplit-- > 0)) {
1189 for (; j+n <= len; j++) {
1190 if (Py_STRING_MATCH(s, j, sub, n)) {
1191 SPLIT_ADD(s, i, j);
1192 i = j = j + n;
1193 break;
1194 }
1195 }
1196 }
1197#endif
1198 SPLIT_ADD(s, i, len);
1199 FIX_PREALLOC_SIZE(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001200 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001201 return list;
1202
Thomas Wouters477c8d52006-05-27 19:21:47 +00001203 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001204 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001205 PyObject_ReleaseBuffer(subobj, &vsub);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001206 return NULL;
1207}
1208
Thomas Wouters477c8d52006-05-27 19:21:47 +00001209PyDoc_STRVAR(partition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001210"B.partition(sep) -> (head, sep, tail)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001211\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001212Searches for the separator sep in B, and returns the part before it,\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001213the separator itself, and the part after it. If the separator is not\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001214found, returns B and two empty bytes objects.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001215
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001216static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001217string_partition(PyStringObject *self, PyObject *sep_obj)
1218{
1219 const char *sep;
1220 Py_ssize_t sep_len;
1221
1222 if (PyString_Check(sep_obj)) {
1223 sep = PyString_AS_STRING(sep_obj);
1224 sep_len = PyString_GET_SIZE(sep_obj);
1225 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001226 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1227 return NULL;
1228
1229 return stringlib_partition(
1230 (PyObject*) self,
1231 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1232 sep_obj, sep, sep_len
1233 );
1234}
1235
1236PyDoc_STRVAR(rpartition__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001237"B.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001238\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001239Searches for the separator sep in B, starting at the end of B,\n\
1240and returns the part before it, the separator itself, and the\n\
1241part after it. If the separator is not found, returns two empty\n\
1242bytes objects and B.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001243
1244static PyObject *
1245string_rpartition(PyStringObject *self, PyObject *sep_obj)
1246{
1247 const char *sep;
1248 Py_ssize_t sep_len;
1249
1250 if (PyString_Check(sep_obj)) {
1251 sep = PyString_AS_STRING(sep_obj);
1252 sep_len = PyString_GET_SIZE(sep_obj);
1253 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001254 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1255 return NULL;
1256
1257 return stringlib_rpartition(
1258 (PyObject*) self,
1259 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1260 sep_obj, sep, sep_len
1261 );
1262}
1263
1264Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001265rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001266{
Christian Heimes895627f2007-12-08 17:28:33 +00001267 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001268 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001269 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001270 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001271
1272 if (list == NULL)
1273 return NULL;
1274
Thomas Wouters477c8d52006-05-27 19:21:47 +00001275 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001276
Thomas Wouters477c8d52006-05-27 19:21:47 +00001277 while (maxsplit-- > 0) {
1278 RSKIP_SPACE(s, i);
1279 if (i<0) break;
1280 j = i; i--;
1281 RSKIP_NONSPACE(s, i);
Christian Heimes895627f2007-12-08 17:28:33 +00001282 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1283 /* No whitespace in self, so just use it as list[0] */
1284 Py_INCREF(self);
1285 PyList_SET_ITEM(list, 0, (PyObject *)self);
1286 count++;
1287 break;
1288 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001289 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001290 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001291 if (i >= 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001292 /* Only occurs when maxsplit was reached. Skip any remaining
1293 whitespace and copy to beginning of string. */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001294 RSKIP_SPACE(s, i);
1295 if (i >= 0)
1296 SPLIT_ADD(s, 0, i + 1);
1297
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001298 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001299 FIX_PREALLOC_SIZE(list);
1300 if (PyList_Reverse(list) < 0)
1301 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001302 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001303 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001304 Py_DECREF(list);
1305 return NULL;
1306}
1307
Thomas Wouters477c8d52006-05-27 19:21:47 +00001308Py_LOCAL_INLINE(PyObject *)
Christian Heimes895627f2007-12-08 17:28:33 +00001309rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001310{
Christian Heimes895627f2007-12-08 17:28:33 +00001311 const char *s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001312 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001313 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001314 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001315
1316 if (list == NULL)
1317 return NULL;
1318
Thomas Wouters477c8d52006-05-27 19:21:47 +00001319 i = j = len - 1;
1320 while ((i >= 0) && (maxcount-- > 0)) {
1321 for (; i >= 0; i--) {
1322 if (s[i] == ch) {
1323 SPLIT_ADD(s, i + 1, j + 1);
1324 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001325 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001326 }
1327 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001328 }
Christian Heimes895627f2007-12-08 17:28:33 +00001329 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1330 /* ch not in self, so just use self as list[0] */
1331 Py_INCREF(self);
1332 PyList_SET_ITEM(list, 0, (PyObject *)self);
1333 count++;
1334 }
1335 else if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001336 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001337 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001338 FIX_PREALLOC_SIZE(list);
1339 if (PyList_Reverse(list) < 0)
1340 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001341 return list;
1342
1343 onError:
1344 Py_DECREF(list);
1345 return NULL;
1346}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001347
1348PyDoc_STRVAR(rsplit__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001349"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001350\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001351Return a list of the sections in B, using sep as the delimiter,\n\
1352starting at the end of B and working to the front.\n\
1353If sep is not given, B is split on ASCII whitespace characters\n\
1354(space, tab, return, newline, formfeed, vertical tab).\n\
1355If maxsplit is given, at most maxsplit splits are done.");
1356
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001357
1358static PyObject *
1359string_rsplit(PyStringObject *self, PyObject *args)
1360{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001361 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001362 Py_ssize_t maxsplit = -1, count=0;
Christian Heimes895627f2007-12-08 17:28:33 +00001363 const char *s, *sub;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001364 Py_buffer vsub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001365 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001366
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001367 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001368 return NULL;
1369 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001370 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001371 if (subobj == Py_None)
Christian Heimes895627f2007-12-08 17:28:33 +00001372 return rsplit_whitespace(self, len, maxsplit);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001373 if (_getbuffer(subobj, &vsub) < 0)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001374 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001375 sub = vsub.buf;
1376 n = vsub.len;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001377
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001378 if (n == 0) {
1379 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum98297ee2007-11-06 21:34:58 +00001380 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001381 return NULL;
1382 }
Christian Heimes895627f2007-12-08 17:28:33 +00001383 else if (n == 1)
1384 return rsplit_char(self, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001385
Thomas Wouters477c8d52006-05-27 19:21:47 +00001386 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum98297ee2007-11-06 21:34:58 +00001387 if (list == NULL) {
1388 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001389 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001390 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001391
1392 j = len;
1393 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001394
Christian Heimes895627f2007-12-08 17:28:33 +00001395 s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001396 while ( (i >= 0) && (maxsplit-- > 0) ) {
1397 for (; i>=0; i--) {
1398 if (Py_STRING_MATCH(s, i, sub, n)) {
1399 SPLIT_ADD(s, i + n, j);
1400 j = i;
1401 i -= n;
1402 break;
1403 }
1404 }
1405 }
1406 SPLIT_ADD(s, 0, j);
1407 FIX_PREALLOC_SIZE(list);
1408 if (PyList_Reverse(list) < 0)
1409 goto onError;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001410 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001411 return list;
1412
Thomas Wouters477c8d52006-05-27 19:21:47 +00001413onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001414 Py_DECREF(list);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001415 PyObject_ReleaseBuffer(subobj, &vsub);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001416 return NULL;
1417}
1418
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001419#undef SPLIT_ADD
1420#undef MAX_PREALLOC
1421#undef PREALLOC_SIZE
1422
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001424PyDoc_STRVAR(join__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001425"B.join(iterable_of_bytes) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001427Concatenates any number of bytes objects, with B in between each pair.\n\
1428Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001429
1430static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00001431string_join(PyObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001432{
1433 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001434 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001437 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001438 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001439 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001440 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441
Tim Peters19fe14e2001-01-19 03:03:47 +00001442 seq = PySequence_Fast(orig, "");
1443 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001444 return NULL;
1445 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001446
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001447 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001448 if (seqlen == 0) {
1449 Py_DECREF(seq);
1450 return PyString_FromString("");
1451 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001453 item = PySequence_Fast_GET_ITEM(seq, 0);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001454 if (PyString_CheckExact(item)) {
Raymond Hettinger674f2412004-08-23 23:23:54 +00001455 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001456 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001457 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001458 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001460
Raymond Hettinger674f2412004-08-23 23:23:54 +00001461 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001462 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001463 * Do a pre-pass to figure out the total amount of space we'll
Guido van Rossum98297ee2007-11-06 21:34:58 +00001464 * need (sz), and see whether all argument are bytes.
Tim Peters19fe14e2001-01-19 03:03:47 +00001465 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001466 /* XXX Shouldn't we use _getbuffer() on these items instead? */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001467 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001468 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001469 item = PySequence_Fast_GET_ITEM(seq, i);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001470 if (!PyString_Check(item) && !PyBytes_Check(item)) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001471 PyErr_Format(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001472 "sequence item %zd: expected bytes,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001473 " %.80s found",
Christian Heimes90aa7642007-12-19 02:45:37 +00001474 i, Py_TYPE(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001475 Py_DECREF(seq);
1476 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001477 }
Christian Heimes90aa7642007-12-19 02:45:37 +00001478 sz += Py_SIZE(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001479 if (i != 0)
1480 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001481 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001482 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001483 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001484 Py_DECREF(seq);
1485 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001486 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001487 }
1488
1489 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001490 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001491 if (res == NULL) {
1492 Py_DECREF(seq);
1493 return NULL;
1494 }
1495
1496 /* Catenate everything. */
Guido van Rossum98297ee2007-11-06 21:34:58 +00001497 /* I'm not worried about a PyBytes item growing because there's
1498 nowhere in this function where we release the GIL. */
Tim Peters19fe14e2001-01-19 03:03:47 +00001499 p = PyString_AS_STRING(res);
1500 for (i = 0; i < seqlen; ++i) {
1501 size_t n;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001502 char *q;
1503 if (i) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001504 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001505 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001506 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00001507 item = PySequence_Fast_GET_ITEM(seq, i);
Christian Heimes90aa7642007-12-19 02:45:37 +00001508 n = Py_SIZE(item);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001509 if (PyString_Check(item))
1510 q = PyString_AS_STRING(item);
1511 else
1512 q = PyBytes_AS_STRING(item);
1513 Py_MEMCPY(p, q, n);
1514 p += n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001515 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001516
Jeremy Hylton49048292000-07-11 03:28:17 +00001517 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001518 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001519}
1520
Tim Peters52e155e2001-06-16 05:42:57 +00001521PyObject *
1522_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001523{
Tim Petersa7259592001-06-16 05:11:17 +00001524 assert(sep != NULL && PyString_Check(sep));
1525 assert(x != NULL);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001526 return string_join(sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001527}
1528
Thomas Wouters477c8d52006-05-27 19:21:47 +00001529Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001530string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001531{
1532 if (*end > len)
1533 *end = len;
1534 else if (*end < 0)
1535 *end += len;
1536 if (*end < 0)
1537 *end = 0;
1538 if (*start < 0)
1539 *start += len;
1540 if (*start < 0)
1541 *start = 0;
1542}
1543
Thomas Wouters477c8d52006-05-27 19:21:47 +00001544Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001545string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001546{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001547 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001548 const char *sub;
1549 Py_ssize_t sub_len;
1550 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes9cd17752007-11-18 19:35:23 +00001551 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001552
Christian Heimes9cd17752007-11-18 19:35:23 +00001553 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1554 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001555 return -2;
Christian Heimes9cd17752007-11-18 19:35:23 +00001556 /* To support None in "start" and "end" arguments, meaning
1557 the same as if they were not passed.
1558 */
1559 if (obj_start != Py_None)
1560 if (!_PyEval_SliceIndex(obj_start, &start))
1561 return -2;
1562 if (obj_end != Py_None)
1563 if (!_PyEval_SliceIndex(obj_end, &end))
1564 return -2;
1565
Guido van Rossum4c08d552000-03-10 22:55:18 +00001566 if (PyString_Check(subobj)) {
1567 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001568 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001569 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001570 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001571 /* XXX - the "expected a character buffer object" is pretty
1572 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001573 return -2;
1574
Thomas Wouters477c8d52006-05-27 19:21:47 +00001575 if (dir > 0)
1576 return stringlib_find_slice(
1577 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1578 sub, sub_len, start, end);
1579 else
1580 return stringlib_rfind_slice(
1581 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1582 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583}
1584
1585
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001586PyDoc_STRVAR(find__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001587"B.find(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001588\n\
1589Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001590such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001591arguments start and end are interpreted as in slice notation.\n\
1592\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001593Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001594
1595static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001596string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001597{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001598 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599 if (result == -2)
1600 return NULL;
Christian Heimes217cfd12007-12-02 14:31:20 +00001601 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602}
1603
1604
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001605PyDoc_STRVAR(index__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001606"B.index(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001608Like B.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609
1610static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001611string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001613 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614 if (result == -2)
1615 return NULL;
1616 if (result == -1) {
1617 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001618 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 return NULL;
1620 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001621 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622}
1623
1624
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001625PyDoc_STRVAR(rfind__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001626"B.rfind(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001627\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001628Return the highest index in B where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00001629such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630arguments start and end are interpreted as in slice notation.\n\
1631\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001632Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633
1634static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001635string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001637 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638 if (result == -2)
1639 return NULL;
Christian Heimes217cfd12007-12-02 14:31:20 +00001640 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641}
1642
1643
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001644PyDoc_STRVAR(rindex__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001645"B.rindex(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001646\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001647Like B.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001648
1649static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001650string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001652 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653 if (result == -2)
1654 return NULL;
1655 if (result == -1) {
1656 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001657 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658 return NULL;
1659 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001660 return PyLong_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001661}
1662
1663
Thomas Wouters477c8d52006-05-27 19:21:47 +00001664Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001665do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1666{
Guido van Rossum98297ee2007-11-06 21:34:58 +00001667 Py_buffer vsep;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001668 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001669 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum98297ee2007-11-06 21:34:58 +00001670 char *sep;
1671 Py_ssize_t seplen;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001672 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001673
Guido van Rossum98297ee2007-11-06 21:34:58 +00001674 if (_getbuffer(sepobj, &vsep) < 0)
1675 return NULL;
1676 sep = vsep.buf;
1677 seplen = vsep.len;
1678
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001679 i = 0;
1680 if (striptype != RIGHTSTRIP) {
1681 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1682 i++;
1683 }
1684 }
1685
1686 j = len;
1687 if (striptype != LEFTSTRIP) {
1688 do {
1689 j--;
1690 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1691 j++;
1692 }
1693
Guido van Rossum98297ee2007-11-06 21:34:58 +00001694 PyObject_ReleaseBuffer(sepobj, &vsep);
1695
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001696 if (i == 0 && j == len && PyString_CheckExact(self)) {
1697 Py_INCREF(self);
1698 return (PyObject*)self;
1699 }
1700 else
1701 return PyString_FromStringAndSize(s+i, j-i);
1702}
1703
1704
Thomas Wouters477c8d52006-05-27 19:21:47 +00001705Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001706do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707{
1708 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001709 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001710
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001711 i = 0;
1712 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001713 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001714 i++;
1715 }
1716 }
1717
1718 j = len;
1719 if (striptype != LEFTSTRIP) {
1720 do {
1721 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001722 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723 j++;
1724 }
1725
Tim Peters8fa5dd02001-09-12 02:18:30 +00001726 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727 Py_INCREF(self);
1728 return (PyObject*)self;
1729 }
1730 else
1731 return PyString_FromStringAndSize(s+i, j-i);
1732}
1733
1734
Thomas Wouters477c8d52006-05-27 19:21:47 +00001735Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001736do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1737{
1738 PyObject *sep = NULL;
1739
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001740 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001741 return NULL;
1742
1743 if (sep != NULL && sep != Py_None) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00001744 return do_xstrip(self, striptype, sep);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001745 }
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001746 return do_strip(self, striptype);
1747}
1748
1749
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001750PyDoc_STRVAR(strip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001751"B.strip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001753Strip leading and trailing bytes contained in the argument.\n\
1754If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001756string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001758 if (PyTuple_GET_SIZE(args) == 0)
1759 return do_strip(self, BOTHSTRIP); /* Common case */
1760 else
1761 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762}
1763
1764
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001765PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001766"B.lstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001768Strip leading bytes contained in the argument.\n\
1769If the argument is omitted, strip leading ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001771string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001773 if (PyTuple_GET_SIZE(args) == 0)
1774 return do_strip(self, LEFTSTRIP); /* Common case */
1775 else
1776 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777}
1778
1779
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001780PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001781"B.rstrip([bytes]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001783Strip trailing bytes contained in the argument.\n\
1784If the argument is omitted, strip trailing ASCII whitespace.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001786string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001788 if (PyTuple_GET_SIZE(args) == 0)
1789 return do_strip(self, RIGHTSTRIP); /* Common case */
1790 else
1791 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792}
1793
1794
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001795PyDoc_STRVAR(count__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001796"B.count(sub [,start [,end]]) -> int\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001798Return the number of non-overlapping occurrences of substring sub in\n\
1799string S[start:end]. Optional arguments start and end are interpreted\n\
1800as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001801
1802static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001803string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001805 PyObject *sub_obj;
1806 const char *str = PyString_AS_STRING(self), *sub;
1807 Py_ssize_t sub_len;
1808 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809
Thomas Wouters477c8d52006-05-27 19:21:47 +00001810 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1811 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001813
Thomas Wouters477c8d52006-05-27 19:21:47 +00001814 if (PyString_Check(sub_obj)) {
1815 sub = PyString_AS_STRING(sub_obj);
1816 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001817 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001818 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001819 return NULL;
1820
Thomas Wouters477c8d52006-05-27 19:21:47 +00001821 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001822
Christian Heimes217cfd12007-12-02 14:31:20 +00001823 return PyLong_FromSsize_t(
Thomas Wouters477c8d52006-05-27 19:21:47 +00001824 stringlib_count(str + start, end - start, sub, sub_len)
1825 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826}
1827
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001829PyDoc_STRVAR(translate__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001830"B.translate(table[, deletechars]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001832Return a copy of B, where all characters occurring in the\n\
1833optional argument deletechars are removed, and the remaining\n\
1834characters have been mapped through the given translation\n\
1835table, which must be a bytes object of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836
1837static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001838string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001840 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001841 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001842 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00001844 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001845 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846 PyObject *result;
1847 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001848 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00001850 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001851 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001852 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001853
1854 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00001855 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001856 tablen = PyString_GET_SIZE(tableobj);
1857 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001858 else if (tableobj == Py_None) {
1859 table = NULL;
1860 tablen = 256;
1861 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001862 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001864
Martin v. Löwis00b61272002-12-12 20:03:19 +00001865 if (tablen != 256) {
1866 PyErr_SetString(PyExc_ValueError,
1867 "translation table must be 256 characters long");
1868 return NULL;
1869 }
1870
Guido van Rossum4c08d552000-03-10 22:55:18 +00001871 if (delobj != NULL) {
1872 if (PyString_Check(delobj)) {
1873 del_table = PyString_AS_STRING(delobj);
1874 dellen = PyString_GET_SIZE(delobj);
1875 }
1876 else if (PyUnicode_Check(delobj)) {
1877 PyErr_SetString(PyExc_TypeError,
1878 "deletions are implemented differently for unicode");
1879 return NULL;
1880 }
1881 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1882 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001883 }
1884 else {
1885 del_table = NULL;
1886 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001887 }
1888
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001889 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001890 result = PyString_FromStringAndSize((char *)NULL, inlen);
1891 if (result == NULL)
1892 return NULL;
1893 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001894 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001895
Guido van Rossumd8faa362007-04-27 19:54:29 +00001896 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897 /* If no deletions are required, use faster code */
1898 for (i = inlen; --i >= 0; ) {
1899 c = Py_CHARMASK(*input++);
1900 if (Py_CHARMASK((*output++ = table[c])) != c)
1901 changed = 1;
1902 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001903 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904 return result;
1905 Py_DECREF(result);
1906 Py_INCREF(input_obj);
1907 return input_obj;
1908 }
1909
Guido van Rossumd8faa362007-04-27 19:54:29 +00001910 if (table == NULL) {
1911 for (i = 0; i < 256; i++)
1912 trans_table[i] = Py_CHARMASK(i);
1913 } else {
1914 for (i = 0; i < 256; i++)
1915 trans_table[i] = Py_CHARMASK(table[i]);
1916 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917
1918 for (i = 0; i < dellen; i++)
1919 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1920
1921 for (i = inlen; --i >= 0; ) {
1922 c = Py_CHARMASK(*input++);
1923 if (trans_table[c] != -1)
1924 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1925 continue;
1926 changed = 1;
1927 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001928 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001929 Py_DECREF(result);
1930 Py_INCREF(input_obj);
1931 return input_obj;
1932 }
1933 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001934 if (inlen > 0)
1935 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936 return result;
1937}
1938
1939
Thomas Wouters477c8d52006-05-27 19:21:47 +00001940#define FORWARD 1
1941#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942
Thomas Wouters477c8d52006-05-27 19:21:47 +00001943/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944
Thomas Wouters477c8d52006-05-27 19:21:47 +00001945#define findchar(target, target_len, c) \
1946 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947
Thomas Wouters477c8d52006-05-27 19:21:47 +00001948/* String ops must return a string. */
1949/* If the object is subclass of string, create a copy */
1950Py_LOCAL(PyStringObject *)
1951return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001953 if (PyString_CheckExact(self)) {
1954 Py_INCREF(self);
1955 return self;
1956 }
1957 return (PyStringObject *)PyString_FromStringAndSize(
1958 PyString_AS_STRING(self),
1959 PyString_GET_SIZE(self));
1960}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961
Thomas Wouters477c8d52006-05-27 19:21:47 +00001962Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001963countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001964{
1965 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001966 const char *start=target;
1967 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968
Thomas Wouters477c8d52006-05-27 19:21:47 +00001969 while ( (start=findchar(start, end-start, c)) != NULL ) {
1970 count++;
1971 if (count >= maxcount)
1972 break;
1973 start += 1;
1974 }
1975 return count;
1976}
1977
1978Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001979findstring(const char *target, Py_ssize_t target_len,
1980 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00001981 Py_ssize_t start,
1982 Py_ssize_t end,
1983 int direction)
1984{
1985 if (start < 0) {
1986 start += target_len;
1987 if (start < 0)
1988 start = 0;
1989 }
1990 if (end > target_len) {
1991 end = target_len;
1992 } else if (end < 0) {
1993 end += target_len;
1994 if (end < 0)
1995 end = 0;
1996 }
1997
1998 /* zero-length substrings always match at the first attempt */
1999 if (pattern_len == 0)
2000 return (direction > 0) ? start : end;
2001
2002 end -= pattern_len;
2003
2004 if (direction < 0) {
2005 for (; end >= start; end--)
2006 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2007 return end;
2008 } else {
2009 for (; start <= end; start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002010 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
Thomas Wouters477c8d52006-05-27 19:21:47 +00002011 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012 }
2013 return -1;
2014}
2015
Thomas Wouters477c8d52006-05-27 19:21:47 +00002016Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002017countstring(const char *target, Py_ssize_t target_len,
2018 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002019 Py_ssize_t start,
2020 Py_ssize_t end,
2021 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002023 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024
Thomas Wouters477c8d52006-05-27 19:21:47 +00002025 if (start < 0) {
2026 start += target_len;
2027 if (start < 0)
2028 start = 0;
2029 }
2030 if (end > target_len) {
2031 end = target_len;
2032 } else if (end < 0) {
2033 end += target_len;
2034 if (end < 0)
2035 end = 0;
2036 }
2037
2038 /* zero-length substrings match everywhere */
2039 if (pattern_len == 0 || maxcount == 0) {
2040 if (target_len+1 < maxcount)
2041 return target_len+1;
2042 return maxcount;
2043 }
2044
2045 end -= pattern_len;
2046 if (direction < 0) {
2047 for (; (end >= start); end--)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002048 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002049 count++;
2050 if (--maxcount <= 0) break;
2051 end -= pattern_len-1;
2052 }
2053 } else {
2054 for (; (start <= end); start++)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002055 if (Py_STRING_MATCH(target, start,
2056 pattern, pattern_len)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002057 count++;
2058 if (--maxcount <= 0)
2059 break;
2060 start += pattern_len-1;
2061 }
2062 }
2063 return count;
2064}
2065
2066
2067/* Algorithms for different cases of string replacement */
2068
2069/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2070Py_LOCAL(PyStringObject *)
2071replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002072 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002073 Py_ssize_t maxcount)
2074{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002075 char *self_s, *result_s;
2076 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002077 Py_ssize_t count, i, product;
2078 PyStringObject *result;
2079
2080 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002081
Thomas Wouters477c8d52006-05-27 19:21:47 +00002082 /* 1 at the end plus 1 after every character */
2083 count = self_len+1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002084 if (maxcount < count)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002085 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002086
Thomas Wouters477c8d52006-05-27 19:21:47 +00002087 /* Check for overflow */
2088 /* result_len = count * to_len + self_len; */
2089 product = count * to_len;
2090 if (product / to_len != count) {
2091 PyErr_SetString(PyExc_OverflowError,
2092 "replace string is too long");
2093 return NULL;
2094 }
2095 result_len = product + self_len;
2096 if (result_len < 0) {
2097 PyErr_SetString(PyExc_OverflowError,
2098 "replace string is too long");
2099 return NULL;
2100 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002101
Thomas Wouters477c8d52006-05-27 19:21:47 +00002102 if (! (result = (PyStringObject *)
2103 PyString_FromStringAndSize(NULL, result_len)) )
2104 return NULL;
2105
2106 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002107 result_s = PyString_AS_STRING(result);
2108
2109 /* TODO: special case single character, which doesn't need memcpy */
2110
2111 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002112 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002113 result_s += to_len;
2114 count -= 1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002115
Thomas Wouters477c8d52006-05-27 19:21:47 +00002116 for (i=0; i<count; i++) {
2117 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002118 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002119 result_s += to_len;
2120 }
2121
2122 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002123 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002124
2125 return result;
2126}
2127
2128/* Special case for deleting a single character */
2129/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2130Py_LOCAL(PyStringObject *)
2131replace_delete_single_character(PyStringObject *self,
2132 char from_c, Py_ssize_t maxcount)
2133{
2134 char *self_s, *result_s;
2135 char *start, *next, *end;
2136 Py_ssize_t self_len, result_len;
2137 Py_ssize_t count;
2138 PyStringObject *result;
2139
2140 self_len = PyString_GET_SIZE(self);
2141 self_s = PyString_AS_STRING(self);
2142
2143 count = countchar(self_s, self_len, from_c, maxcount);
2144 if (count == 0) {
2145 return return_self(self);
2146 }
Guido van Rossumae404e22007-10-26 21:46:44 +00002147
Thomas Wouters477c8d52006-05-27 19:21:47 +00002148 result_len = self_len - count; /* from_len == 1 */
2149 assert(result_len>=0);
2150
2151 if ( (result = (PyStringObject *)
2152 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2153 return NULL;
2154 result_s = PyString_AS_STRING(result);
2155
2156 start = self_s;
2157 end = self_s + self_len;
2158 while (count-- > 0) {
2159 next = findchar(start, end-start, from_c);
2160 if (next == NULL)
2161 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002162 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002163 result_s += (next-start);
2164 start = next+1;
2165 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002166 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002167
Thomas Wouters477c8d52006-05-27 19:21:47 +00002168 return result;
2169}
2170
2171/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2172
2173Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002174replace_delete_substring(PyStringObject *self,
2175 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002176 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002177 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002178 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002179 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002180 Py_ssize_t count, offset;
2181 PyStringObject *result;
2182
2183 self_len = PyString_GET_SIZE(self);
2184 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002185
2186 count = countstring(self_s, self_len,
2187 from_s, from_len,
2188 0, self_len, 1,
2189 maxcount);
2190
2191 if (count == 0) {
2192 /* no matches */
2193 return return_self(self);
2194 }
2195
2196 result_len = self_len - (count * from_len);
2197 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002198
Thomas Wouters477c8d52006-05-27 19:21:47 +00002199 if ( (result = (PyStringObject *)
2200 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2201 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002202
Thomas Wouters477c8d52006-05-27 19:21:47 +00002203 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002204
Thomas Wouters477c8d52006-05-27 19:21:47 +00002205 start = self_s;
2206 end = self_s + self_len;
2207 while (count-- > 0) {
2208 offset = findstring(start, end-start,
2209 from_s, from_len,
2210 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002211 if (offset == -1)
2212 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002213 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002214
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002215 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002216
Thomas Wouters477c8d52006-05-27 19:21:47 +00002217 result_s += (next-start);
2218 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002220 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002221 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222}
2223
Thomas Wouters477c8d52006-05-27 19:21:47 +00002224/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2225Py_LOCAL(PyStringObject *)
2226replace_single_character_in_place(PyStringObject *self,
2227 char from_c, char to_c,
2228 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002230 char *self_s, *result_s, *start, *end, *next;
2231 Py_ssize_t self_len;
2232 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002233
Thomas Wouters477c8d52006-05-27 19:21:47 +00002234 /* The result string will be the same size */
2235 self_s = PyString_AS_STRING(self);
2236 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002237
Thomas Wouters477c8d52006-05-27 19:21:47 +00002238 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002239
Thomas Wouters477c8d52006-05-27 19:21:47 +00002240 if (next == NULL) {
2241 /* No matches; return the original string */
2242 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002244
Thomas Wouters477c8d52006-05-27 19:21:47 +00002245 /* Need to make a new string */
2246 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2247 if (result == NULL)
2248 return NULL;
2249 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002250 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002251
Thomas Wouters477c8d52006-05-27 19:21:47 +00002252 /* change everything in-place, starting with this one */
2253 start = result_s + (next-self_s);
2254 *start = to_c;
2255 start++;
2256 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002257
Thomas Wouters477c8d52006-05-27 19:21:47 +00002258 while (--maxcount > 0) {
2259 next = findchar(start, end-start, from_c);
2260 if (next == NULL)
2261 break;
2262 *next = to_c;
2263 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002264 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002265
Thomas Wouters477c8d52006-05-27 19:21:47 +00002266 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267}
2268
Thomas Wouters477c8d52006-05-27 19:21:47 +00002269/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2270Py_LOCAL(PyStringObject *)
2271replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002272 const char *from_s, Py_ssize_t from_len,
2273 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002274 Py_ssize_t maxcount)
2275{
2276 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002277 char *self_s;
2278 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002279 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002280
Thomas Wouters477c8d52006-05-27 19:21:47 +00002281 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002282
Thomas Wouters477c8d52006-05-27 19:21:47 +00002283 self_s = PyString_AS_STRING(self);
2284 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002285
Thomas Wouters477c8d52006-05-27 19:21:47 +00002286 offset = findstring(self_s, self_len,
2287 from_s, from_len,
2288 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002289 if (offset == -1) {
2290 /* No matches; return the original string */
2291 return return_self(self);
2292 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002293
Thomas Wouters477c8d52006-05-27 19:21:47 +00002294 /* Need to make a new string */
2295 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2296 if (result == NULL)
2297 return NULL;
2298 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002299 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002300
Thomas Wouters477c8d52006-05-27 19:21:47 +00002301 /* change everything in-place, starting with this one */
2302 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002303 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002304 start += from_len;
2305 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002306
Thomas Wouters477c8d52006-05-27 19:21:47 +00002307 while ( --maxcount > 0) {
2308 offset = findstring(start, end-start,
2309 from_s, from_len,
2310 0, end-start, FORWARD);
2311 if (offset==-1)
2312 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002313 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002314 start += offset+from_len;
2315 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002316
Thomas Wouters477c8d52006-05-27 19:21:47 +00002317 return result;
2318}
2319
2320/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2321Py_LOCAL(PyStringObject *)
2322replace_single_character(PyStringObject *self,
2323 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002324 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002325 Py_ssize_t maxcount)
2326{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002327 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002328 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002329 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002330 Py_ssize_t count, product;
2331 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002332
Thomas Wouters477c8d52006-05-27 19:21:47 +00002333 self_s = PyString_AS_STRING(self);
2334 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002335
Thomas Wouters477c8d52006-05-27 19:21:47 +00002336 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002337 if (count == 0) {
2338 /* no matches, return unchanged */
2339 return return_self(self);
2340 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002341
Thomas Wouters477c8d52006-05-27 19:21:47 +00002342 /* use the difference between current and new, hence the "-1" */
2343 /* result_len = self_len + count * (to_len-1) */
2344 product = count * (to_len-1);
2345 if (product / (to_len-1) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002346 PyErr_SetString(PyExc_OverflowError,
2347 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002348 return NULL;
2349 }
2350 result_len = self_len + product;
2351 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002352 PyErr_SetString(PyExc_OverflowError,
2353 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002354 return NULL;
2355 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002356
Thomas Wouters477c8d52006-05-27 19:21:47 +00002357 if ( (result = (PyStringObject *)
2358 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2359 return NULL;
2360 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002361
Thomas Wouters477c8d52006-05-27 19:21:47 +00002362 start = self_s;
2363 end = self_s + self_len;
2364 while (count-- > 0) {
2365 next = findchar(start, end-start, from_c);
Guido van Rossumae404e22007-10-26 21:46:44 +00002366 if (next == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002367 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002368
Thomas Wouters477c8d52006-05-27 19:21:47 +00002369 if (next == start) {
2370 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002371 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002372 result_s += to_len;
2373 start += 1;
2374 } else {
2375 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002376 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002377 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002378 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002379 result_s += to_len;
2380 start = next+1;
2381 }
2382 }
2383 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002384 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002385
Thomas Wouters477c8d52006-05-27 19:21:47 +00002386 return result;
2387}
2388
2389/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2390Py_LOCAL(PyStringObject *)
2391replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002392 const char *from_s, Py_ssize_t from_len,
2393 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002394 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002395 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002396 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002397 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002398 Py_ssize_t count, offset, product;
2399 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002400
Thomas Wouters477c8d52006-05-27 19:21:47 +00002401 self_s = PyString_AS_STRING(self);
2402 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002403
Thomas Wouters477c8d52006-05-27 19:21:47 +00002404 count = countstring(self_s, self_len,
2405 from_s, from_len,
2406 0, self_len, FORWARD, maxcount);
2407 if (count == 0) {
2408 /* no matches, return unchanged */
2409 return return_self(self);
2410 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002411
Thomas Wouters477c8d52006-05-27 19:21:47 +00002412 /* Check for overflow */
2413 /* result_len = self_len + count * (to_len-from_len) */
2414 product = count * (to_len-from_len);
2415 if (product / (to_len-from_len) != count) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002416 PyErr_SetString(PyExc_OverflowError,
2417 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002418 return NULL;
2419 }
2420 result_len = self_len + product;
2421 if (result_len < 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002422 PyErr_SetString(PyExc_OverflowError,
2423 "replace string is too long");
Thomas Wouters477c8d52006-05-27 19:21:47 +00002424 return NULL;
2425 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002426
Thomas Wouters477c8d52006-05-27 19:21:47 +00002427 if ( (result = (PyStringObject *)
2428 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2429 return NULL;
2430 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002431
Thomas Wouters477c8d52006-05-27 19:21:47 +00002432 start = self_s;
2433 end = self_s + self_len;
2434 while (count-- > 0) {
2435 offset = findstring(start, end-start,
2436 from_s, from_len,
2437 0, end-start, FORWARD);
2438 if (offset == -1)
2439 break;
2440 next = start+offset;
2441 if (next == start) {
2442 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002443 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002444 result_s += to_len;
2445 start += from_len;
2446 } else {
2447 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002448 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002449 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002450 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002451 result_s += to_len;
2452 start = next+from_len;
2453 }
2454 }
2455 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002456 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002457
Thomas Wouters477c8d52006-05-27 19:21:47 +00002458 return result;
2459}
2460
2461
2462Py_LOCAL(PyStringObject *)
2463replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002464 const char *from_s, Py_ssize_t from_len,
2465 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002466 Py_ssize_t maxcount)
2467{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002468 if (maxcount < 0) {
2469 maxcount = PY_SSIZE_T_MAX;
2470 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2471 /* nothing to do; return the original string */
2472 return return_self(self);
2473 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002474
Thomas Wouters477c8d52006-05-27 19:21:47 +00002475 if (maxcount == 0 ||
2476 (from_len == 0 && to_len == 0)) {
2477 /* nothing to do; return the original string */
2478 return return_self(self);
2479 }
2480
2481 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002482
Thomas Wouters477c8d52006-05-27 19:21:47 +00002483 if (from_len == 0) {
2484 /* insert the 'to' string everywhere. */
2485 /* >>> "Python".replace("", ".") */
2486 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002487 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002488 }
2489
2490 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2491 /* point for an empty self string to generate a non-empty string */
2492 /* Special case so the remaining code always gets a non-empty string */
2493 if (PyString_GET_SIZE(self) == 0) {
2494 return return_self(self);
2495 }
2496
2497 if (to_len == 0) {
2498 /* delete all occurances of 'from' string */
2499 if (from_len == 1) {
2500 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002501 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002502 } else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002503 return replace_delete_substring(self, from_s,
2504 from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002505 }
2506 }
2507
2508 /* Handle special case where both strings have the same length */
2509
2510 if (from_len == to_len) {
2511 if (from_len == 1) {
2512 return replace_single_character_in_place(
2513 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002514 from_s[0],
2515 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00002516 maxcount);
2517 } else {
2518 return replace_substring_in_place(
Guido van Rossum98297ee2007-11-06 21:34:58 +00002519 self, from_s, from_len, to_s, to_len,
2520 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002521 }
2522 }
2523
2524 /* Otherwise use the more generic algorithms */
2525 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002526 return replace_single_character(self, from_s[0],
2527 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002528 } else {
2529 /* len('from')>=2, len('to')>=1 */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002530 return replace_substring(self, from_s, from_len, to_s, to_len,
2531 maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002532 }
2533}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002534
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002535PyDoc_STRVAR(replace__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002536"B.replace(old, new[, count]) -> bytes\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002538Return a copy of B with all occurrences of subsection\n\
Fred Draked22bb652003-10-22 02:56:40 +00002539old replaced by new. If the optional argument count is\n\
2540given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002541
2542static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002543string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002544{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002545 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002546 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002547 const char *from_s, *to_s;
2548 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002549
Thomas Wouters477c8d52006-05-27 19:21:47 +00002550 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002551 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002552
Thomas Wouters477c8d52006-05-27 19:21:47 +00002553 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002554 from_s = PyString_AS_STRING(from);
2555 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002556 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002557 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002558 return NULL;
2559
Thomas Wouters477c8d52006-05-27 19:21:47 +00002560 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002561 to_s = PyString_AS_STRING(to);
2562 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002563 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002564 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002565 return NULL;
2566
Thomas Wouters477c8d52006-05-27 19:21:47 +00002567 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002568 from_s, from_len,
2569 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002570}
2571
Thomas Wouters477c8d52006-05-27 19:21:47 +00002572/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002573
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002574/* Matches the end (direction >= 0) or start (direction < 0) of self
2575 * against substr, using the start and end arguments. Returns
2576 * -1 on error, 0 if not found and 1 if found.
2577 */
2578Py_LOCAL(int)
2579_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2580 Py_ssize_t end, int direction)
2581{
2582 Py_ssize_t len = PyString_GET_SIZE(self);
2583 Py_ssize_t slen;
2584 const char* sub;
2585 const char* str;
2586
2587 if (PyString_Check(substr)) {
2588 sub = PyString_AS_STRING(substr);
2589 slen = PyString_GET_SIZE(substr);
2590 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002591 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2592 return -1;
2593 str = PyString_AS_STRING(self);
2594
2595 string_adjust_indices(&start, &end, len);
2596
2597 if (direction < 0) {
2598 /* startswith */
2599 if (start+slen > len)
2600 return 0;
2601 } else {
2602 /* endswith */
2603 if (end-start < slen || start > len)
2604 return 0;
2605
2606 if (end-slen > start)
2607 start = end - slen;
2608 }
2609 if (end-start >= slen)
2610 return ! memcmp(str+start, sub, slen);
2611 return 0;
2612}
2613
2614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002615PyDoc_STRVAR(startswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002616"B.startswith(prefix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002617\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002618Return True if B starts with the specified prefix, False otherwise.\n\
2619With optional start, test B beginning at that position.\n\
2620With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002621prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002622
2623static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002624string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002625{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002626 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002627 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002628 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002629 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002630
Guido van Rossumc6821402000-05-08 14:08:05 +00002631 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2632 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002633 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002634 if (PyTuple_Check(subobj)) {
2635 Py_ssize_t i;
2636 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2637 result = _string_tailmatch(self,
2638 PyTuple_GET_ITEM(subobj, i),
2639 start, end, -1);
2640 if (result == -1)
2641 return NULL;
2642 else if (result) {
2643 Py_RETURN_TRUE;
2644 }
2645 }
2646 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002647 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002648 result = _string_tailmatch(self, subobj, start, end, -1);
2649 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002650 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002651 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002652 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002653}
2654
2655
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002656PyDoc_STRVAR(endswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002657"B.endswith(suffix [,start [,end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002658\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002659Return True if B ends with the specified suffix, False otherwise.\n\
2660With optional start, test B beginning at that position.\n\
2661With optional end, stop comparing B at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002662suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002663
2664static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002665string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002666{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002667 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002668 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002669 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002670 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002671
Guido van Rossumc6821402000-05-08 14:08:05 +00002672 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2673 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002674 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002675 if (PyTuple_Check(subobj)) {
2676 Py_ssize_t i;
2677 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2678 result = _string_tailmatch(self,
2679 PyTuple_GET_ITEM(subobj, i),
2680 start, end, +1);
2681 if (result == -1)
2682 return NULL;
2683 else if (result) {
2684 Py_RETURN_TRUE;
2685 }
2686 }
2687 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002688 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002689 result = _string_tailmatch(self, subobj, start, end, +1);
2690 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002691 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002692 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002693 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002694}
2695
2696
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002697PyDoc_STRVAR(decode__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002698"B.decode([encoding[, errors]]) -> object\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002699\n\
2700Decodes S using the codec registered for encoding. encoding defaults\n\
2701to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002702handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2703a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002704as well as any other name registerd with codecs.register_error that is\n\
2705able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002706
2707static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +00002708string_decode(PyObject *self, PyObject *args)
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002709{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002710 const char *encoding = NULL;
2711 const char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002712
Guido van Rossum98297ee2007-11-06 21:34:58 +00002713 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2714 return NULL;
2715 if (encoding == NULL)
2716 encoding = PyUnicode_GetDefaultEncoding();
2717 return PyCodec_Decode(self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002718}
2719
2720
Guido van Rossumae404e22007-10-26 21:46:44 +00002721PyDoc_STRVAR(fromhex_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002722"bytes.fromhex(string) -> bytes\n\
Guido van Rossumae404e22007-10-26 21:46:44 +00002723\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002724Create a bytes object from a string of hexadecimal numbers.\n\
2725Spaces between two numbers are accepted.\n\
2726Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Guido van Rossumae404e22007-10-26 21:46:44 +00002727
2728static int
2729hex_digit_to_int(Py_UNICODE c)
2730{
Guido van Rossum98297ee2007-11-06 21:34:58 +00002731 if (c >= 128)
2732 return -1;
2733 if (ISDIGIT(c))
2734 return c - '0';
2735 else {
2736 if (ISUPPER(c))
2737 c = TOLOWER(c);
2738 if (c >= 'a' && c <= 'f')
2739 return c - 'a' + 10;
2740 }
2741 return -1;
Guido van Rossumae404e22007-10-26 21:46:44 +00002742}
2743
2744static PyObject *
2745string_fromhex(PyObject *cls, PyObject *args)
2746{
2747 PyObject *newstring, *hexobj;
2748 char *buf;
2749 Py_UNICODE *hex;
2750 Py_ssize_t hexlen, byteslen, i, j;
2751 int top, bot;
2752
2753 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2754 return NULL;
2755 assert(PyUnicode_Check(hexobj));
2756 hexlen = PyUnicode_GET_SIZE(hexobj);
2757 hex = PyUnicode_AS_UNICODE(hexobj);
2758 byteslen = hexlen/2; /* This overestimates if there are spaces */
2759 newstring = PyString_FromStringAndSize(NULL, byteslen);
2760 if (!newstring)
2761 return NULL;
2762 buf = PyString_AS_STRING(newstring);
2763 for (i = j = 0; i < hexlen; i += 2) {
2764 /* skip over spaces in the input */
2765 while (hex[i] == ' ')
2766 i++;
2767 if (i >= hexlen)
2768 break;
2769 top = hex_digit_to_int(hex[i]);
2770 bot = hex_digit_to_int(hex[i+1]);
2771 if (top == -1 || bot == -1) {
2772 PyErr_Format(PyExc_ValueError,
2773 "non-hexadecimal number found in "
2774 "fromhex() arg at position %zd", i);
2775 goto error;
2776 }
2777 buf[j++] = (top << 4) + bot;
2778 }
Christian Heimes2c4a0722008-01-30 11:28:29 +00002779 if (j != byteslen && _PyString_Resize(&newstring, j) < 0)
Guido van Rossumae404e22007-10-26 21:46:44 +00002780 goto error;
2781 return newstring;
2782
2783 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00002784 Py_XDECREF(newstring);
Guido van Rossumae404e22007-10-26 21:46:44 +00002785 return NULL;
2786}
2787
2788
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002789static PyObject *
2790string_getnewargs(PyStringObject *v)
2791{
Christian Heimes90aa7642007-12-19 02:45:37 +00002792 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00002793}
2794
Christian Heimes2c4a0722008-01-30 11:28:29 +00002795
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002796static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002797string_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00002798 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002799 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2800 _Py_capitalize__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002801 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002802 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002803 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002804 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002805 endswith__doc__},
2806 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2807 expandtabs__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002808 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002809 {"fromhex", (PyCFunction)string_fromhex, METH_VARARGS|METH_CLASS,
2810 fromhex_doc},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002811 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002812 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2813 _Py_isalnum__doc__},
2814 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2815 _Py_isalpha__doc__},
2816 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2817 _Py_isdigit__doc__},
2818 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2819 _Py_islower__doc__},
2820 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2821 _Py_isspace__doc__},
2822 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2823 _Py_istitle__doc__},
2824 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2825 _Py_isupper__doc__},
2826 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2827 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2828 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002829 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002830 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002831 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2832 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2833 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002834 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00002835 {"rpartition", (PyCFunction)string_rpartition, METH_O,
2836 rpartition__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002837 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
2838 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2839 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2840 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2841 splitlines__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002842 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
Guido van Rossumae404e22007-10-26 21:46:44 +00002843 startswith__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002844 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002845 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2846 _Py_swapcase__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002847 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002848 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2849 translate__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00002850 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002851 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002852 {NULL, NULL} /* sentinel */
2853};
2854
Jeremy Hylton938ace62002-07-17 16:30:39 +00002855static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002856str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2857
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002858static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002859string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002860{
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002861 PyObject *x = NULL, *it;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002862 const char *encoding = NULL;
2863 const char *errors = NULL;
2864 PyObject *new = NULL;
2865 Py_ssize_t i, size;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002866 static char *kwlist[] = {"source", "encoding", "errors", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00002867
Guido van Rossumae960af2001-08-30 03:11:59 +00002868 if (type != &PyString_Type)
2869 return str_subtype_new(type, args, kwds);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002870 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002871 &encoding, &errors))
Tim Peters6d6c1a32001-08-02 04:15:00 +00002872 return NULL;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002873 if (x == NULL) {
2874 if (encoding != NULL || errors != NULL) {
2875 PyErr_SetString(PyExc_TypeError,
2876 "encoding or errors without sequence "
2877 "argument");
2878 return NULL;
2879 }
Tim Peters6d6c1a32001-08-02 04:15:00 +00002880 return PyString_FromString("");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002881 }
2882
2883 if (PyUnicode_Check(x)) {
2884 /* Encode via the codec registry */
2885 if (encoding == NULL) {
2886 PyErr_SetString(PyExc_TypeError,
2887 "string argument without an encoding");
2888 return NULL;
2889 }
2890 new = PyCodec_Encode(x, encoding, errors);
2891 if (new == NULL)
2892 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002893 assert(PyString_Check(new));
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002894 return new;
2895 }
2896
2897 /* If it's not unicode, there can't be encoding or errors */
2898 if (encoding != NULL || errors != NULL) {
2899 PyErr_SetString(PyExc_TypeError,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002900 "encoding or errors without a string argument");
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002901 return NULL;
2902 }
2903
Guido van Rossum98297ee2007-11-06 21:34:58 +00002904 /* Is it an int? */
2905 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2906 if (size == -1 && PyErr_Occurred()) {
2907 PyErr_Clear();
2908 }
2909 else {
2910 if (size < 0) {
2911 PyErr_SetString(PyExc_ValueError, "negative count");
2912 return NULL;
2913 }
2914 new = PyString_FromStringAndSize(NULL, size);
2915 if (new == NULL) {
2916 return NULL;
2917 }
2918 if (size > 0) {
2919 memset(((PyStringObject*)new)->ob_sval, 0, size);
2920 }
2921 return new;
2922 }
2923
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002924 /* Use the modern buffer interface */
2925 if (PyObject_CheckBuffer(x)) {
2926 Py_buffer view;
2927 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2928 return NULL;
2929 new = PyString_FromStringAndSize(NULL, view.len);
2930 if (!new)
2931 goto fail;
2932 // XXX(brett.cannon): Better way to get to internal buffer?
2933 if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
2934 &view, view.len, 'C') < 0)
2935 goto fail;
2936 PyObject_ReleaseBuffer(x, &view);
2937 return new;
2938 fail:
2939 Py_XDECREF(new);
2940 PyObject_ReleaseBuffer(x, &view);
2941 return NULL;
2942 }
2943
Guido van Rossum98297ee2007-11-06 21:34:58 +00002944 /* For iterator version, create a string object and resize as needed */
2945 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2946 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2947 input being a truly long iterator. */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002948 size = 64;
2949 new = PyString_FromStringAndSize(NULL, size);
2950 if (new == NULL)
2951 return NULL;
2952
2953 /* XXX Optimize this if the arguments is a list, tuple */
2954
2955 /* Get the iterator */
2956 it = PyObject_GetIter(x);
2957 if (it == NULL)
2958 goto error;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002959
2960 /* Run the iterator to exhaustion */
2961 for (i = 0; ; i++) {
2962 PyObject *item;
2963 Py_ssize_t value;
2964
2965 /* Get the next item */
Georg Brandl5fb8eb92007-11-24 20:42:02 +00002966 item = PyIter_Next(it);
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002967 if (item == NULL) {
Georg Brandl5fb8eb92007-11-24 20:42:02 +00002968 if (PyErr_Occurred())
2969 goto error;
Georg Brandlbd1c68c2007-10-24 18:55:37 +00002970 break;
2971 }
2972
2973 /* Interpret it as an int (__index__) */
2974 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2975 Py_DECREF(item);
2976 if (value == -1 && PyErr_Occurred())
2977 goto error;
2978
2979 /* Range check */
2980 if (value < 0 || value >= 256) {
2981 PyErr_SetString(PyExc_ValueError,
2982 "bytes must be in range(0, 256)");
2983 goto error;
2984 }
2985
2986 /* Append the byte */
2987 if (i >= size) {
2988 size *= 2;
2989 if (_PyString_Resize(&new, size) < 0)
2990 goto error;
2991 }
2992 ((PyStringObject *)new)->ob_sval[i] = value;
2993 }
2994 _PyString_Resize(&new, i);
2995
2996 /* Clean up and return success */
2997 Py_DECREF(it);
2998 return new;
2999
3000 error:
Guido van Rossum98297ee2007-11-06 21:34:58 +00003001 /* Error handling when new != NULL */
Georg Brandlbd1c68c2007-10-24 18:55:37 +00003002 Py_XDECREF(it);
3003 Py_DECREF(new);
3004 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003005}
3006
Guido van Rossumae960af2001-08-30 03:11:59 +00003007static PyObject *
3008str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3009{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003010 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003011 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003012
3013 assert(PyType_IsSubtype(type, &PyString_Type));
3014 tmp = string_new(&PyString_Type, args, kwds);
3015 if (tmp == NULL)
3016 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003017 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003018 n = PyString_GET_SIZE(tmp);
3019 pnew = type->tp_alloc(type, n);
3020 if (pnew != NULL) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003021 Py_MEMCPY(PyString_AS_STRING(pnew),
3022 PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003023 ((PyStringObject *)pnew)->ob_shash =
3024 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003025 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003026 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003027 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003028}
3029
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003030PyDoc_STRVAR(string_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00003031"bytes(iterable_of_ints) -> bytes.\n\
3032bytes(string, encoding[, errors]) -> bytes\n\
3033bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
3034bytes(memory_view) -> bytes.\n\
Tim Peters6d6c1a32001-08-02 04:15:00 +00003035\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003036Construct an immutable array of bytes from:\n\
3037 - an iterable yielding integers in range(256)\n\
3038 - a text string encoded using the specified encoding\n\
3039 - a bytes or a buffer object\n\
3040 - any object implementing the buffer API.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003041
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003042static PyObject *str_iter(PyObject *seq);
3043
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003044PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003045 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum98297ee2007-11-06 21:34:58 +00003046 "bytes",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003047 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003048 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003049 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00003050 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003051 0, /* tp_getattr */
3052 0, /* tp_setattr */
3053 0, /* tp_compare */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003054 (reprfunc)string_repr, /* tp_repr */
3055 0, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003056 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003057 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003058 (hashfunc)string_hash, /* tp_hash */
3059 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003060 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003061 PyObject_GenericGetAttr, /* tp_getattro */
3062 0, /* tp_setattro */
3063 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00003064 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3065 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003066 string_doc, /* tp_doc */
3067 0, /* tp_traverse */
3068 0, /* tp_clear */
3069 (richcmpfunc)string_richcompare, /* tp_richcompare */
3070 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003071 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003072 0, /* tp_iternext */
3073 string_methods, /* tp_methods */
3074 0, /* tp_members */
3075 0, /* tp_getset */
Guido van Rossum3172c5d2007-10-16 18:12:55 +00003076 &PyBaseObject_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003077 0, /* tp_dict */
3078 0, /* tp_descr_get */
3079 0, /* tp_descr_set */
3080 0, /* tp_dictoffset */
3081 0, /* tp_init */
3082 0, /* tp_alloc */
3083 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003084 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003085};
3086
3087void
Fred Drakeba096332000-07-09 07:04:36 +00003088PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003089{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003090 register PyObject *v;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003091 assert(pv != NULL);
Guido van Rossum013142a1994-08-30 08:19:36 +00003092 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003093 return;
Guido van Rossum98297ee2007-11-06 21:34:58 +00003094 if (w == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003095 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003096 *pv = NULL;
3097 return;
3098 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00003099 v = string_concat(*pv, w);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003100 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003101 *pv = v;
3102}
3103
Guido van Rossum013142a1994-08-30 08:19:36 +00003104void
Fred Drakeba096332000-07-09 07:04:36 +00003105PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003106{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003107 PyString_Concat(pv, w);
3108 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003109}
3110
3111
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003112/* The following function breaks the notion that strings are immutable:
3113 it changes the size of a string. We get away with this only if there
3114 is only one module referencing the object. You can also think of it
3115 as creating a new string object and destroying the old one, only
3116 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003117 already be known to some other part of the code...
3118 Note that if there's not enough memory to resize the string, the original
3119 string object at *pv is deallocated, *pv is set to NULL, an "out of
3120 memory" exception is set, and -1 is returned. Else (on success) 0 is
3121 returned, and the value in *pv may or may not be the same as on input.
3122 As always, an extra byte is allocated for a trailing \0 byte (newsize
3123 does *not* include that), and a trailing \0 byte is stored.
3124*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003125
3126int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003127_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003128{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003129 register PyObject *v;
3130 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003131 v = *pv;
Christian Heimes90aa7642007-12-19 02:45:37 +00003132 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003133 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003134 Py_DECREF(v);
3135 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003136 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003137 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003138 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003139 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003140 _Py_ForgetReference(v);
3141 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003142 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003143 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003144 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003145 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003146 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003147 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003148 _Py_NewReference(*pv);
3149 sv = (PyStringObject *) *pv;
Christian Heimes90aa7642007-12-19 02:45:37 +00003150 Py_SIZE(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00003151 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003152 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003153 return 0;
3154}
Guido van Rossume5372401993-03-16 12:15:04 +00003155
Tim Peters38fd5b62000-09-21 05:43:11 +00003156/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3157 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3158 * Python's regular ints.
3159 * Return value: a new PyString*, or NULL if error.
3160 * . *pbuf is set to point into it,
3161 * *plen set to the # of chars following that.
3162 * Caller must decref it when done using pbuf.
3163 * The string starting at *pbuf is of the form
3164 * "-"? ("0x" | "0X")? digit+
3165 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003166 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003167 * There will be at least prec digits, zero-filled on the left if
3168 * necessary to get that many.
3169 * val object to be converted
3170 * flags bitmask of format flags; only F_ALT is looked at
3171 * prec minimum number of digits; 0-fill on left if needed
3172 * type a character in [duoxX]; u acts the same as d
3173 *
3174 * CAUTION: o, x and X conversions on regular ints can never
3175 * produce a '-' sign, but can for Python's unbounded ints.
3176 */
3177PyObject*
3178_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3179 char **pbuf, int *plen)
3180{
3181 PyObject *result = NULL;
3182 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003183 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003184 int sign; /* 1 if '-', else 0 */
3185 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003186 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003187 int numdigits; /* len == numnondigits + numdigits */
3188 int numnondigits = 0;
3189
Guido van Rossumddefaf32007-01-14 03:31:43 +00003190 /* Avoid exceeding SSIZE_T_MAX */
3191 if (prec > PY_SSIZE_T_MAX-3) {
3192 PyErr_SetString(PyExc_OverflowError,
3193 "precision too large");
3194 return NULL;
3195 }
3196
Tim Peters38fd5b62000-09-21 05:43:11 +00003197 switch (type) {
3198 case 'd':
3199 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00003200 /* Special-case boolean: we want 0/1 */
3201 if (PyBool_Check(val))
3202 result = PyNumber_ToBase(val, 10);
3203 else
Christian Heimes90aa7642007-12-19 02:45:37 +00003204 result = Py_TYPE(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00003205 break;
3206 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003207 numnondigits = 2;
3208 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00003209 break;
3210 case 'x':
3211 case 'X':
3212 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003213 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00003214 break;
3215 default:
3216 assert(!"'type' not in [duoxX]");
3217 }
3218 if (!result)
3219 return NULL;
3220
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003221 buf = PyUnicode_AsString(result);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00003222 if (!buf) {
3223 Py_DECREF(result);
3224 return NULL;
3225 }
3226
Tim Peters38fd5b62000-09-21 05:43:11 +00003227 /* To modify the string in-place, there can only be one reference. */
Christian Heimes90aa7642007-12-19 02:45:37 +00003228 if (Py_REFCNT(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003229 PyErr_BadInternalCall();
3230 return NULL;
3231 }
Amaury Forgeot d'Arc39599dc2007-11-22 02:48:12 +00003232 llen = PyUnicode_GetSize(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00003233 if (llen > INT_MAX) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00003234 PyErr_SetString(PyExc_ValueError,
3235 "string too large in _PyString_FormatLong");
Martin v. Löwis725507b2006-03-07 12:08:51 +00003236 return NULL;
3237 }
3238 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003239 if (buf[len-1] == 'L') {
3240 --len;
3241 buf[len] = '\0';
3242 }
3243 sign = buf[0] == '-';
3244 numnondigits += sign;
3245 numdigits = len - numnondigits;
3246 assert(numdigits > 0);
3247
Tim Petersfff53252001-04-12 18:38:48 +00003248 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003249 if (((flags & F_ALT) == 0 &&
3250 (type == 'o' || type == 'x' || type == 'X'))) {
3251 assert(buf[sign] == '0');
3252 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
Guido van Rossum98297ee2007-11-06 21:34:58 +00003253 buf[sign+1] == 'o');
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003254 numnondigits -= 2;
3255 buf += 2;
3256 len -= 2;
3257 if (sign)
3258 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00003259 assert(len == numnondigits + numdigits);
3260 assert(numdigits > 0);
3261 }
3262
3263 /* Fill with leading zeroes to meet minimum width. */
3264 if (prec > numdigits) {
3265 PyObject *r1 = PyString_FromStringAndSize(NULL,
3266 numnondigits + prec);
3267 char *b1;
3268 if (!r1) {
3269 Py_DECREF(result);
3270 return NULL;
3271 }
3272 b1 = PyString_AS_STRING(r1);
3273 for (i = 0; i < numnondigits; ++i)
3274 *b1++ = *buf++;
3275 for (i = 0; i < prec - numdigits; i++)
3276 *b1++ = '0';
3277 for (i = 0; i < numdigits; i++)
3278 *b1++ = *buf++;
3279 *b1 = '\0';
3280 Py_DECREF(result);
3281 result = r1;
3282 buf = PyString_AS_STRING(result);
3283 len = numnondigits + prec;
3284 }
3285
3286 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003287 if (type == 'X') {
3288 /* Need to convert all lower case letters to upper case.
3289 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003290 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003291 if (buf[i] >= 'a' && buf[i] <= 'x')
3292 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003293 }
3294 *pbuf = buf;
3295 *plen = len;
3296 return result;
3297}
3298
Guido van Rossum8cf04761997-08-02 02:57:45 +00003299void
Fred Drakeba096332000-07-09 07:04:36 +00003300PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003301{
3302 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003303 for (i = 0; i < UCHAR_MAX + 1; i++) {
3304 Py_XDECREF(characters[i]);
3305 characters[i] = NULL;
3306 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003307 Py_XDECREF(nullstring);
3308 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003309}
Barry Warsawa903ad982001-02-23 16:40:48 +00003310
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003311/*********************** Str Iterator ****************************/
3312
3313typedef struct {
3314 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00003315 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003316 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
3317} striterobject;
3318
3319static void
3320striter_dealloc(striterobject *it)
3321{
3322 _PyObject_GC_UNTRACK(it);
3323 Py_XDECREF(it->it_seq);
3324 PyObject_GC_Del(it);
3325}
3326
3327static int
3328striter_traverse(striterobject *it, visitproc visit, void *arg)
3329{
3330 Py_VISIT(it->it_seq);
3331 return 0;
3332}
3333
3334static PyObject *
3335striter_next(striterobject *it)
3336{
3337 PyStringObject *seq;
3338 PyObject *item;
3339
3340 assert(it != NULL);
3341 seq = it->it_seq;
3342 if (seq == NULL)
3343 return NULL;
3344 assert(PyString_Check(seq));
3345
3346 if (it->it_index < PyString_GET_SIZE(seq)) {
Christian Heimes217cfd12007-12-02 14:31:20 +00003347 item = PyLong_FromLong(
Guido van Rossum75a902d2007-10-19 22:06:24 +00003348 (unsigned char)seq->ob_sval[it->it_index]);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003349 if (item != NULL)
3350 ++it->it_index;
3351 return item;
3352 }
3353
3354 Py_DECREF(seq);
3355 it->it_seq = NULL;
3356 return NULL;
3357}
3358
3359static PyObject *
3360striter_len(striterobject *it)
3361{
3362 Py_ssize_t len = 0;
3363 if (it->it_seq)
3364 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
Christian Heimes217cfd12007-12-02 14:31:20 +00003365 return PyLong_FromSsize_t(len);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003366}
3367
Guido van Rossum49d6b072006-08-17 21:11:47 +00003368PyDoc_STRVAR(length_hint_doc,
3369 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003370
3371static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00003372 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3373 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003374 {NULL, NULL} /* sentinel */
3375};
3376
3377PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003378 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum254348e2007-11-21 19:29:53 +00003379 "bytes_iterator", /* tp_name */
Guido van Rossum49d6b072006-08-17 21:11:47 +00003380 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003381 0, /* tp_itemsize */
3382 /* methods */
3383 (destructor)striter_dealloc, /* tp_dealloc */
3384 0, /* tp_print */
3385 0, /* tp_getattr */
3386 0, /* tp_setattr */
3387 0, /* tp_compare */
3388 0, /* tp_repr */
3389 0, /* tp_as_number */
3390 0, /* tp_as_sequence */
3391 0, /* tp_as_mapping */
3392 0, /* tp_hash */
3393 0, /* tp_call */
3394 0, /* tp_str */
3395 PyObject_GenericGetAttr, /* tp_getattro */
3396 0, /* tp_setattro */
3397 0, /* tp_as_buffer */
3398 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3399 0, /* tp_doc */
3400 (traverseproc)striter_traverse, /* tp_traverse */
3401 0, /* tp_clear */
3402 0, /* tp_richcompare */
3403 0, /* tp_weaklistoffset */
3404 PyObject_SelfIter, /* tp_iter */
3405 (iternextfunc)striter_next, /* tp_iternext */
3406 striter_methods, /* tp_methods */
3407 0,
3408};
3409
3410static PyObject *
3411str_iter(PyObject *seq)
3412{
3413 striterobject *it;
3414
3415 if (!PyString_Check(seq)) {
3416 PyErr_BadInternalCall();
3417 return NULL;
3418 }
3419 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
3420 if (it == NULL)
3421 return NULL;
3422 it->it_index = 0;
3423 Py_INCREF(seq);
3424 it->it_seq = (PyStringObject *)seq;
3425 _PyObject_GC_TRACK(it);
3426 return (PyObject *)it;
3427}