blob: b9056792f20e699e3f552d4c5d637e8e2a26e9cc [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
76 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 } else if (size == 1 && str != NULL) {
79 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000082#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000083 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000084}
85
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000087PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000089 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000091 if (size > INT_MAX) {
92 PyErr_SetString(PyExc_OverflowError,
93 "string is too long for a Python string");
94 return NULL;
95 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0 && (op = nullstring) != NULL) {
98#ifdef COUNT_ALLOCS
99 null_strings++;
100#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 Py_INCREF(op);
102 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 }
104 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
105#ifdef COUNT_ALLOCS
106 one_strings++;
107#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000108 Py_INCREF(op);
109 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000110 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000112
113 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000115 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000116 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119#ifdef CACHE_HASH
120 op->ob_shash = -1;
121#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000122#ifdef INTERN_STRINGS
123 op->ob_sinterned = NULL;
124#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000125 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000126#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 if (size == 0) {
128 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 } else if (size == 1) {
131 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000134#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000135 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136}
137
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000138PyObject *PyString_Decode(const char *s,
139 int size,
140 const char *encoding,
141 const char *errors)
142{
143 PyObject *buffer = NULL, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000144
145 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000146 encoding = PyUnicode_GetDefaultEncoding();
147
148 /* Decode via the codec registry */
149 buffer = PyBuffer_FromMemory((void *)s, size);
150 if (buffer == NULL)
151 goto onError;
152 str = PyCodec_Decode(buffer, encoding, errors);
153 if (str == NULL)
154 goto onError;
155 /* Convert Unicode to a string using the default encoding */
156 if (PyUnicode_Check(str)) {
157 PyObject *temp = str;
158 str = PyUnicode_AsEncodedString(str, NULL, NULL);
159 Py_DECREF(temp);
160 if (str == NULL)
161 goto onError;
162 }
163 if (!PyString_Check(str)) {
164 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000165 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000166 str->ob_type->tp_name);
167 Py_DECREF(str);
168 goto onError;
169 }
170 Py_DECREF(buffer);
171 return str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000172
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000173 onError:
174 Py_XDECREF(buffer);
175 return NULL;
176}
177
178PyObject *PyString_Encode(const char *s,
179 int size,
180 const char *encoding,
181 const char *errors)
182{
183 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000184
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000185 str = PyString_FromStringAndSize(s, size);
186 if (str == NULL)
187 return NULL;
188 v = PyString_AsEncodedString(str, encoding, errors);
189 Py_DECREF(str);
190 return v;
191}
192
193PyObject *PyString_AsEncodedString(PyObject *str,
194 const char *encoding,
195 const char *errors)
196{
197 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000198
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000199 if (!PyString_Check(str)) {
200 PyErr_BadArgument();
201 goto onError;
202 }
203
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000204 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000205 encoding = PyUnicode_GetDefaultEncoding();
206
207 /* Encode via the codec registry */
208 v = PyCodec_Encode(str, encoding, errors);
209 if (v == NULL)
210 goto onError;
211 /* Convert Unicode to a string using the default encoding */
212 if (PyUnicode_Check(v)) {
213 PyObject *temp = v;
214 v = PyUnicode_AsEncodedString(v, NULL, NULL);
215 Py_DECREF(temp);
216 if (v == NULL)
217 goto onError;
218 }
219 if (!PyString_Check(v)) {
220 PyErr_Format(PyExc_TypeError,
221 "encoder did not return a string object (type=%.400s)",
222 v->ob_type->tp_name);
223 Py_DECREF(v);
224 goto onError;
225 }
226 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000227
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000228 onError:
229 return NULL;
230}
231
Guido van Rossum234f9421993-06-17 12:35:49 +0000232static void
Fred Drakeba096332000-07-09 07:04:36 +0000233string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000234{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000235 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000236}
237
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000238static int
239string_getsize(register PyObject *op)
240{
241 char *s;
242 int len;
243 if (PyString_AsStringAndSize(op, &s, &len))
244 return -1;
245 return len;
246}
247
248static /*const*/ char *
249string_getbuffer(register PyObject *op)
250{
251 char *s;
252 int len;
253 if (PyString_AsStringAndSize(op, &s, &len))
254 return NULL;
255 return s;
256}
257
Guido van Rossumd7047b31995-01-02 19:07:15 +0000258int
Fred Drakeba096332000-07-09 07:04:36 +0000259PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000261 if (!PyString_Check(op))
262 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000263 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264}
265
266/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000267PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000268{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000269 if (!PyString_Check(op))
270 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000271 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272}
273
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000274/* Internal API needed by PyString_AsStringAndSize(): */
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000275extern
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000276PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
277 const char *errors);
278
279int
280PyString_AsStringAndSize(register PyObject *obj,
281 register char **s,
282 register int *len)
283{
284 if (s == NULL) {
285 PyErr_BadInternalCall();
286 return -1;
287 }
288
289 if (!PyString_Check(obj)) {
290 if (PyUnicode_Check(obj)) {
291 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
292 if (obj == NULL)
293 return -1;
294 }
295 else {
296 PyErr_Format(PyExc_TypeError,
297 "expected string or Unicode object, "
298 "%.200s found", obj->ob_type->tp_name);
299 return -1;
300 }
301 }
302
303 *s = PyString_AS_STRING(obj);
304 if (len != NULL)
305 *len = PyString_GET_SIZE(obj);
306 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
307 PyErr_SetString(PyExc_TypeError,
308 "expected string without null bytes");
309 return -1;
310 }
311 return 0;
312}
313
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000314/* Methods */
315
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000316static int
Fred Drakeba096332000-07-09 07:04:36 +0000317string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318{
319 int i;
320 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000321 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000322 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000323 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000324 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000325 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000327
Thomas Wouters7e474022000-07-16 12:04:32 +0000328 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000329 quote = '\'';
330 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
331 quote = '"';
332
333 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 for (i = 0; i < op->ob_size; i++) {
335 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000336 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000338 else if (c == '\t')
339 fprintf(fp, "\\t");
340 else if (c == '\n')
341 fprintf(fp, "\\n");
342 else if (c == '\r')
343 fprintf(fp, "\\r");
344 else if (c < ' ' || c >= 0x7f)
345 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000346 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000347 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000348 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000349 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000350 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000351}
352
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000353static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000354string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000355{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000356 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
357 PyObject *v;
358 if (newsize > INT_MAX) {
359 PyErr_SetString(PyExc_OverflowError,
360 "string is too large to make repr");
361 }
362 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000363 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000364 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000365 }
366 else {
367 register int i;
368 register char c;
369 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000370 int quote;
371
Thomas Wouters7e474022000-07-16 12:04:32 +0000372 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000373 quote = '\'';
374 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
375 quote = '"';
376
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000377 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000378 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000379 for (i = 0; i < op->ob_size; i++) {
380 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000381 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000382 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000383 else if (c == '\t')
384 *p++ = '\\', *p++ = 't';
385 else if (c == '\n')
386 *p++ = '\\', *p++ = 'n';
387 else if (c == '\r')
388 *p++ = '\\', *p++ = 'r';
389 else if (c < ' ' || c >= 0x7f) {
390 sprintf(p, "\\x%02x", c & 0xff);
391 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000392 }
393 else
394 *p++ = c;
395 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000396 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000397 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000398 _PyString_Resize(
399 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000400 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000401 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402}
403
Guido van Rossum189f1df2001-05-01 16:51:53 +0000404static PyObject *
405string_str(PyObject *s)
406{
407 Py_INCREF(s);
408 return s;
409}
410
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000411static int
Fred Drakeba096332000-07-09 07:04:36 +0000412string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000413{
414 return a->ob_size;
415}
416
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000417static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000418string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000419{
420 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000421 register PyStringObject *op;
422 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000423 if (PyUnicode_Check(bb))
424 return PyUnicode_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000425 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000426 "cannot add type \"%.200s\" to string",
427 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000428 return NULL;
429 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000430#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000431 /* Optimize cases with empty left or right operand */
432 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000434 return bb;
435 }
436 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000437 Py_INCREF(a);
438 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000439 }
440 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000441 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000442 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000443 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000444 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000445 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000446 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000447#ifdef CACHE_HASH
448 op->ob_shash = -1;
449#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000450#ifdef INTERN_STRINGS
451 op->ob_sinterned = NULL;
452#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000453 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
454 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
455 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000456 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000457#undef b
458}
459
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000460static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000461string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000462{
463 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000464 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000465 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000466 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000467 if (n < 0)
468 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000469 /* watch out for overflows: the size can overflow int,
470 * and the # of bytes needed can overflow size_t
471 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000472 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000473 if (n && size / n != a->ob_size) {
474 PyErr_SetString(PyExc_OverflowError,
475 "repeated string is too long");
476 return NULL;
477 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000478 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000479 Py_INCREF(a);
480 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481 }
Tim Peters8f422462000-09-09 06:13:41 +0000482 nbytes = size * sizeof(char);
483 if (nbytes / sizeof(char) != (size_t)size ||
484 nbytes + sizeof(PyStringObject) <= nbytes) {
485 PyErr_SetString(PyExc_OverflowError,
486 "repeated string is too long");
487 return NULL;
488 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000489 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000490 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000491 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000492 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000493 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000494#ifdef CACHE_HASH
495 op->ob_shash = -1;
496#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000497#ifdef INTERN_STRINGS
498 op->ob_sinterned = NULL;
499#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000500 for (i = 0; i < size; i += a->ob_size)
501 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
502 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000503 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504}
505
506/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
507
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000508static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000509string_slice(register PyStringObject *a, register int i, register int j)
510 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000511{
512 if (i < 0)
513 i = 0;
514 if (j < 0)
515 j = 0; /* Avoid signed/unsigned bug in next line */
516 if (j > a->ob_size)
517 j = a->ob_size;
518 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000519 Py_INCREF(a);
520 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521 }
522 if (j < i)
523 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000524 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000525}
526
Guido van Rossum9284a572000-03-07 15:53:43 +0000527static int
Fred Drakeba096332000-07-09 07:04:36 +0000528string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000529{
530 register char *s, *end;
531 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000532 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000533 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000534 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000535 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000536 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000537 return -1;
538 }
539 c = PyString_AsString(el)[0];
540 s = PyString_AsString(a);
541 end = s + PyString_Size(a);
542 while (s < end) {
543 if (c == *s++)
544 return 1;
545 }
546 return 0;
547}
548
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000549static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000550string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000551{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000552 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000553 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000554 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000555 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000556 return NULL;
557 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000558 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000559 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000560#ifdef COUNT_ALLOCS
561 if (v != NULL)
562 one_strings++;
563#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000564 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000566 if (v == NULL)
567 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000568 characters[c] = (PyStringObject *) v;
569 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000570 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000571 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000572 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000573}
574
575static int
Fred Drakeba096332000-07-09 07:04:36 +0000576string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000577{
Guido van Rossum253919f1991-02-13 23:18:39 +0000578 int len_a = a->ob_size, len_b = b->ob_size;
579 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000580 int cmp;
581 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000582 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000583 if (cmp == 0)
584 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
585 if (cmp != 0)
586 return cmp;
587 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000588 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000589}
590
Guido van Rossum9bfef441993-03-29 10:43:31 +0000591static long
Fred Drakeba096332000-07-09 07:04:36 +0000592string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000593{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000594 register int len;
595 register unsigned char *p;
596 register long x;
597
598#ifdef CACHE_HASH
599 if (a->ob_shash != -1)
600 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000601#ifdef INTERN_STRINGS
602 if (a->ob_sinterned != NULL)
603 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000604 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000605#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000606#endif
607 len = a->ob_size;
608 p = (unsigned char *) a->ob_sval;
609 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000610 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000611 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000612 x ^= a->ob_size;
613 if (x == -1)
614 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000615#ifdef CACHE_HASH
616 a->ob_shash = x;
617#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000618 return x;
619}
620
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000621static int
Fred Drakeba096332000-07-09 07:04:36 +0000622string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000623{
624 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000625 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000626 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000627 return -1;
628 }
629 *ptr = (void *)self->ob_sval;
630 return self->ob_size;
631}
632
633static int
Fred Drakeba096332000-07-09 07:04:36 +0000634string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000635{
Guido van Rossum045e6881997-09-08 18:30:11 +0000636 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000637 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000638 return -1;
639}
640
641static int
Fred Drakeba096332000-07-09 07:04:36 +0000642string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000643{
644 if ( lenp )
645 *lenp = self->ob_size;
646 return 1;
647}
648
Guido van Rossum1db70701998-10-08 02:18:52 +0000649static int
Fred Drakeba096332000-07-09 07:04:36 +0000650string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000651{
652 if ( index != 0 ) {
653 PyErr_SetString(PyExc_SystemError,
654 "accessing non-existent string segment");
655 return -1;
656 }
657 *ptr = self->ob_sval;
658 return self->ob_size;
659}
660
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000661static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000662 (inquiry)string_length, /*sq_length*/
663 (binaryfunc)string_concat, /*sq_concat*/
664 (intargfunc)string_repeat, /*sq_repeat*/
665 (intargfunc)string_item, /*sq_item*/
666 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000667 0, /*sq_ass_item*/
668 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000669 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000670};
671
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000672static PyBufferProcs string_as_buffer = {
673 (getreadbufferproc)string_buffer_getreadbuf,
674 (getwritebufferproc)string_buffer_getwritebuf,
675 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000676 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000677};
678
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000679
680
681#define LEFTSTRIP 0
682#define RIGHTSTRIP 1
683#define BOTHSTRIP 2
684
685
686static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000687split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000688{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000689 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000690 PyObject* item;
691 PyObject *list = PyList_New(0);
692
693 if (list == NULL)
694 return NULL;
695
Guido van Rossum4c08d552000-03-10 22:55:18 +0000696 for (i = j = 0; i < len; ) {
697 while (i < len && isspace(Py_CHARMASK(s[i])))
698 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000699 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000700 while (i < len && !isspace(Py_CHARMASK(s[i])))
701 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000702 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000703 if (maxsplit-- <= 0)
704 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000705 item = PyString_FromStringAndSize(s+j, (int)(i-j));
706 if (item == NULL)
707 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000708 err = PyList_Append(list, item);
709 Py_DECREF(item);
710 if (err < 0)
711 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000712 while (i < len && isspace(Py_CHARMASK(s[i])))
713 i++;
714 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000715 }
716 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000717 if (j < len) {
718 item = PyString_FromStringAndSize(s+j, (int)(len - j));
719 if (item == NULL)
720 goto finally;
721 err = PyList_Append(list, item);
722 Py_DECREF(item);
723 if (err < 0)
724 goto finally;
725 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000726 return list;
727 finally:
728 Py_DECREF(list);
729 return NULL;
730}
731
732
733static char split__doc__[] =
734"S.split([sep [,maxsplit]]) -> list of strings\n\
735\n\
736Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000737delimiter string. If maxsplit is given, at most maxsplit\n\
738splits are done. If sep is not specified, any whitespace string\n\
739is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000740
741static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000742string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000743{
744 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000745 int maxsplit = -1;
746 const char *s = PyString_AS_STRING(self), *sub;
747 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000748
Guido van Rossum4c08d552000-03-10 22:55:18 +0000749 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000750 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000751 if (maxsplit < 0)
752 maxsplit = INT_MAX;
753 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000754 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000755 if (PyString_Check(subobj)) {
756 sub = PyString_AS_STRING(subobj);
757 n = PyString_GET_SIZE(subobj);
758 }
759 else if (PyUnicode_Check(subobj))
760 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
761 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
762 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000763 if (n == 0) {
764 PyErr_SetString(PyExc_ValueError, "empty separator");
765 return NULL;
766 }
767
768 list = PyList_New(0);
769 if (list == NULL)
770 return NULL;
771
772 i = j = 0;
773 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000774 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000775 if (maxsplit-- <= 0)
776 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000777 item = PyString_FromStringAndSize(s+j, (int)(i-j));
778 if (item == NULL)
779 goto fail;
780 err = PyList_Append(list, item);
781 Py_DECREF(item);
782 if (err < 0)
783 goto fail;
784 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000785 }
786 else
787 i++;
788 }
789 item = PyString_FromStringAndSize(s+j, (int)(len-j));
790 if (item == NULL)
791 goto fail;
792 err = PyList_Append(list, item);
793 Py_DECREF(item);
794 if (err < 0)
795 goto fail;
796
797 return list;
798
799 fail:
800 Py_DECREF(list);
801 return NULL;
802}
803
804
805static char join__doc__[] =
806"S.join(sequence) -> string\n\
807\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000808Return a string which is the concatenation of the strings in the\n\
809sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000810
811static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000812string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000813{
814 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +0000815 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000816 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000817 char *p;
818 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +0000819 size_t sz = 0;
820 int i;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000821 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000822
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000823 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000824 return NULL;
825
Tim Peters19fe14e2001-01-19 03:03:47 +0000826 seq = PySequence_Fast(orig, "");
827 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000828 if (PyErr_ExceptionMatches(PyExc_TypeError))
829 PyErr_Format(PyExc_TypeError,
830 "sequence expected, %.80s found",
831 orig->ob_type->tp_name);
832 return NULL;
833 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000834
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000835 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +0000836 if (seqlen == 0) {
837 Py_DECREF(seq);
838 return PyString_FromString("");
839 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000840 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000841 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +0000842 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
843 PyErr_Format(PyExc_TypeError,
844 "sequence item 0: expected string,"
845 " %.80s found",
846 item->ob_type->tp_name);
847 Py_DECREF(seq);
848 return NULL;
849 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000850 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000851 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000852 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000853 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000854
Tim Peters19fe14e2001-01-19 03:03:47 +0000855 /* There are at least two things to join. Do a pre-pass to figure out
856 * the total amount of space we'll need (sz), see whether any argument
857 * is absurd, and defer to the Unicode join if appropriate.
858 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000859 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +0000860 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000861 item = PySequence_Fast_GET_ITEM(seq, i);
862 if (!PyString_Check(item)){
863 if (PyUnicode_Check(item)) {
Barry Warsaw771d0672000-07-11 04:58:12 +0000864 Py_DECREF(seq);
Guido van Rossum2ccda8a2000-11-27 18:46:26 +0000865 return PyUnicode_Join((PyObject *)self, orig);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000866 }
867 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000868 "sequence item %i: expected string,"
869 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000870 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +0000871 Py_DECREF(seq);
872 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000873 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000874 sz += PyString_GET_SIZE(item);
875 if (i != 0)
876 sz += seplen;
877 if (sz < old_sz || sz > INT_MAX) {
878 PyErr_SetString(PyExc_OverflowError,
879 "join() is too long for a Python string");
880 Py_DECREF(seq);
881 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000882 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000883 }
884
885 /* Allocate result space. */
886 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
887 if (res == NULL) {
888 Py_DECREF(seq);
889 return NULL;
890 }
891
892 /* Catenate everything. */
893 p = PyString_AS_STRING(res);
894 for (i = 0; i < seqlen; ++i) {
895 size_t n;
896 item = PySequence_Fast_GET_ITEM(seq, i);
897 n = PyString_GET_SIZE(item);
898 memcpy(p, PyString_AS_STRING(item), n);
899 p += n;
900 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000901 memcpy(p, sep, seplen);
902 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000903 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000904 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000905
Jeremy Hylton49048292000-07-11 03:28:17 +0000906 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000907 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000908}
909
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000910static long
Fred Drakeba096332000-07-09 07:04:36 +0000911string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000912{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000913 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000914 int len = PyString_GET_SIZE(self);
915 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000916 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000917
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000918 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +0000919 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000920 return -2;
921 if (PyString_Check(subobj)) {
922 sub = PyString_AS_STRING(subobj);
923 n = PyString_GET_SIZE(subobj);
924 }
925 else if (PyUnicode_Check(subobj))
926 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
927 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000928 return -2;
929
930 if (last > len)
931 last = len;
932 if (last < 0)
933 last += len;
934 if (last < 0)
935 last = 0;
936 if (i < 0)
937 i += len;
938 if (i < 0)
939 i = 0;
940
Guido van Rossum4c08d552000-03-10 22:55:18 +0000941 if (dir > 0) {
942 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000943 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000944 last -= n;
945 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000946 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000947 return (long)i;
948 }
949 else {
950 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000951
Guido van Rossum4c08d552000-03-10 22:55:18 +0000952 if (n == 0 && i <= last)
953 return (long)last;
954 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000955 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000956 return (long)j;
957 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000958
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000959 return -1;
960}
961
962
963static char find__doc__[] =
964"S.find(sub [,start [,end]]) -> int\n\
965\n\
966Return the lowest index in S where substring sub is found,\n\
967such that sub is contained within s[start,end]. Optional\n\
968arguments start and end are interpreted as in slice notation.\n\
969\n\
970Return -1 on failure.";
971
972static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000973string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000974{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000975 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000976 if (result == -2)
977 return NULL;
978 return PyInt_FromLong(result);
979}
980
981
982static char index__doc__[] =
983"S.index(sub [,start [,end]]) -> int\n\
984\n\
985Like S.find() but raise ValueError when the substring is not found.";
986
987static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000988string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000989{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000990 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000991 if (result == -2)
992 return NULL;
993 if (result == -1) {
994 PyErr_SetString(PyExc_ValueError,
995 "substring not found in string.index");
996 return NULL;
997 }
998 return PyInt_FromLong(result);
999}
1000
1001
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001002static char rfind__doc__[] =
1003"S.rfind(sub [,start [,end]]) -> int\n\
1004\n\
1005Return the highest index in S where substring sub is found,\n\
1006such that sub is contained within s[start,end]. Optional\n\
1007arguments start and end are interpreted as in slice notation.\n\
1008\n\
1009Return -1 on failure.";
1010
1011static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001012string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001013{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001014 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001015 if (result == -2)
1016 return NULL;
1017 return PyInt_FromLong(result);
1018}
1019
1020
1021static char rindex__doc__[] =
1022"S.rindex(sub [,start [,end]]) -> int\n\
1023\n\
1024Like S.rfind() but raise ValueError when the substring is not found.";
1025
1026static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001027string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001028{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001029 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001030 if (result == -2)
1031 return NULL;
1032 if (result == -1) {
1033 PyErr_SetString(PyExc_ValueError,
1034 "substring not found in string.rindex");
1035 return NULL;
1036 }
1037 return PyInt_FromLong(result);
1038}
1039
1040
1041static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001042do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001043{
1044 char *s = PyString_AS_STRING(self);
1045 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001046
Guido van Rossum43713e52000-02-29 13:59:29 +00001047 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001048 return NULL;
1049
1050 i = 0;
1051 if (striptype != RIGHTSTRIP) {
1052 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1053 i++;
1054 }
1055 }
1056
1057 j = len;
1058 if (striptype != LEFTSTRIP) {
1059 do {
1060 j--;
1061 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1062 j++;
1063 }
1064
1065 if (i == 0 && j == len) {
1066 Py_INCREF(self);
1067 return (PyObject*)self;
1068 }
1069 else
1070 return PyString_FromStringAndSize(s+i, j-i);
1071}
1072
1073
1074static char strip__doc__[] =
1075"S.strip() -> string\n\
1076\n\
1077Return a copy of the string S with leading and trailing\n\
1078whitespace removed.";
1079
1080static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001081string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001082{
1083 return do_strip(self, args, BOTHSTRIP);
1084}
1085
1086
1087static char lstrip__doc__[] =
1088"S.lstrip() -> string\n\
1089\n\
1090Return a copy of the string S with leading whitespace removed.";
1091
1092static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001093string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001094{
1095 return do_strip(self, args, LEFTSTRIP);
1096}
1097
1098
1099static char rstrip__doc__[] =
1100"S.rstrip() -> string\n\
1101\n\
1102Return a copy of the string S with trailing whitespace removed.";
1103
1104static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001105string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001106{
1107 return do_strip(self, args, RIGHTSTRIP);
1108}
1109
1110
1111static char lower__doc__[] =
1112"S.lower() -> string\n\
1113\n\
1114Return a copy of the string S converted to lowercase.";
1115
1116static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001117string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001118{
1119 char *s = PyString_AS_STRING(self), *s_new;
1120 int i, n = PyString_GET_SIZE(self);
1121 PyObject *new;
1122
Guido van Rossum43713e52000-02-29 13:59:29 +00001123 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001124 return NULL;
1125 new = PyString_FromStringAndSize(NULL, n);
1126 if (new == NULL)
1127 return NULL;
1128 s_new = PyString_AsString(new);
1129 for (i = 0; i < n; i++) {
1130 int c = Py_CHARMASK(*s++);
1131 if (isupper(c)) {
1132 *s_new = tolower(c);
1133 } else
1134 *s_new = c;
1135 s_new++;
1136 }
1137 return new;
1138}
1139
1140
1141static char upper__doc__[] =
1142"S.upper() -> string\n\
1143\n\
1144Return a copy of the string S converted to uppercase.";
1145
1146static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001147string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001148{
1149 char *s = PyString_AS_STRING(self), *s_new;
1150 int i, n = PyString_GET_SIZE(self);
1151 PyObject *new;
1152
Guido van Rossum43713e52000-02-29 13:59:29 +00001153 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001154 return NULL;
1155 new = PyString_FromStringAndSize(NULL, n);
1156 if (new == NULL)
1157 return NULL;
1158 s_new = PyString_AsString(new);
1159 for (i = 0; i < n; i++) {
1160 int c = Py_CHARMASK(*s++);
1161 if (islower(c)) {
1162 *s_new = toupper(c);
1163 } else
1164 *s_new = c;
1165 s_new++;
1166 }
1167 return new;
1168}
1169
1170
Guido van Rossum4c08d552000-03-10 22:55:18 +00001171static char title__doc__[] =
1172"S.title() -> string\n\
1173\n\
1174Return a titlecased version of S, i.e. words start with uppercase\n\
1175characters, all remaining cased characters have lowercase.";
1176
1177static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001178string_title(PyStringObject *self, PyObject *args)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001179{
1180 char *s = PyString_AS_STRING(self), *s_new;
1181 int i, n = PyString_GET_SIZE(self);
1182 int previous_is_cased = 0;
1183 PyObject *new;
1184
1185 if (!PyArg_ParseTuple(args, ":title"))
1186 return NULL;
1187 new = PyString_FromStringAndSize(NULL, n);
1188 if (new == NULL)
1189 return NULL;
1190 s_new = PyString_AsString(new);
1191 for (i = 0; i < n; i++) {
1192 int c = Py_CHARMASK(*s++);
1193 if (islower(c)) {
1194 if (!previous_is_cased)
1195 c = toupper(c);
1196 previous_is_cased = 1;
1197 } else if (isupper(c)) {
1198 if (previous_is_cased)
1199 c = tolower(c);
1200 previous_is_cased = 1;
1201 } else
1202 previous_is_cased = 0;
1203 *s_new++ = c;
1204 }
1205 return new;
1206}
1207
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001208static char capitalize__doc__[] =
1209"S.capitalize() -> string\n\
1210\n\
1211Return a copy of the string S with only its first character\n\
1212capitalized.";
1213
1214static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001215string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001216{
1217 char *s = PyString_AS_STRING(self), *s_new;
1218 int i, n = PyString_GET_SIZE(self);
1219 PyObject *new;
1220
Guido van Rossum43713e52000-02-29 13:59:29 +00001221 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001222 return NULL;
1223 new = PyString_FromStringAndSize(NULL, n);
1224 if (new == NULL)
1225 return NULL;
1226 s_new = PyString_AsString(new);
1227 if (0 < n) {
1228 int c = Py_CHARMASK(*s++);
1229 if (islower(c))
1230 *s_new = toupper(c);
1231 else
1232 *s_new = c;
1233 s_new++;
1234 }
1235 for (i = 1; i < n; i++) {
1236 int c = Py_CHARMASK(*s++);
1237 if (isupper(c))
1238 *s_new = tolower(c);
1239 else
1240 *s_new = c;
1241 s_new++;
1242 }
1243 return new;
1244}
1245
1246
1247static char count__doc__[] =
1248"S.count(sub[, start[, end]]) -> int\n\
1249\n\
1250Return the number of occurrences of substring sub in string\n\
1251S[start:end]. Optional arguments start and end are\n\
1252interpreted as in slice notation.";
1253
1254static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001255string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001256{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001257 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001258 int len = PyString_GET_SIZE(self), n;
1259 int i = 0, last = INT_MAX;
1260 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001261 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001262
Guido van Rossumc6821402000-05-08 14:08:05 +00001263 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1264 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001265 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001266
Guido van Rossum4c08d552000-03-10 22:55:18 +00001267 if (PyString_Check(subobj)) {
1268 sub = PyString_AS_STRING(subobj);
1269 n = PyString_GET_SIZE(subobj);
1270 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001271 else if (PyUnicode_Check(subobj)) {
1272 int count;
1273 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1274 if (count == -1)
1275 return NULL;
1276 else
1277 return PyInt_FromLong((long) count);
1278 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001279 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1280 return NULL;
1281
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001282 if (last > len)
1283 last = len;
1284 if (last < 0)
1285 last += len;
1286 if (last < 0)
1287 last = 0;
1288 if (i < 0)
1289 i += len;
1290 if (i < 0)
1291 i = 0;
1292 m = last + 1 - n;
1293 if (n == 0)
1294 return PyInt_FromLong((long) (m-i));
1295
1296 r = 0;
1297 while (i < m) {
1298 if (!memcmp(s+i, sub, n)) {
1299 r++;
1300 i += n;
1301 } else {
1302 i++;
1303 }
1304 }
1305 return PyInt_FromLong((long) r);
1306}
1307
1308
1309static char swapcase__doc__[] =
1310"S.swapcase() -> string\n\
1311\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001312Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001313converted to lowercase and vice versa.";
1314
1315static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001316string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001317{
1318 char *s = PyString_AS_STRING(self), *s_new;
1319 int i, n = PyString_GET_SIZE(self);
1320 PyObject *new;
1321
Guido van Rossum43713e52000-02-29 13:59:29 +00001322 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001323 return NULL;
1324 new = PyString_FromStringAndSize(NULL, n);
1325 if (new == NULL)
1326 return NULL;
1327 s_new = PyString_AsString(new);
1328 for (i = 0; i < n; i++) {
1329 int c = Py_CHARMASK(*s++);
1330 if (islower(c)) {
1331 *s_new = toupper(c);
1332 }
1333 else if (isupper(c)) {
1334 *s_new = tolower(c);
1335 }
1336 else
1337 *s_new = c;
1338 s_new++;
1339 }
1340 return new;
1341}
1342
1343
1344static char translate__doc__[] =
1345"S.translate(table [,deletechars]) -> string\n\
1346\n\
1347Return a copy of the string S, where all characters occurring\n\
1348in the optional argument deletechars are removed, and the\n\
1349remaining characters have been mapped through the given\n\
1350translation table, which must be a string of length 256.";
1351
1352static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001353string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001355 register char *input, *output;
1356 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001357 register int i, c, changed = 0;
1358 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001360 int inlen, tablen, dellen = 0;
1361 PyObject *result;
1362 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001363 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001364
Guido van Rossum4c08d552000-03-10 22:55:18 +00001365 if (!PyArg_ParseTuple(args, "O|O:translate",
1366 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001368
1369 if (PyString_Check(tableobj)) {
1370 table1 = PyString_AS_STRING(tableobj);
1371 tablen = PyString_GET_SIZE(tableobj);
1372 }
1373 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001374 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001375 parameter; instead a mapping to None will cause characters
1376 to be deleted. */
1377 if (delobj != NULL) {
1378 PyErr_SetString(PyExc_TypeError,
1379 "deletions are implemented differently for unicode");
1380 return NULL;
1381 }
1382 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1383 }
1384 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001386
1387 if (delobj != NULL) {
1388 if (PyString_Check(delobj)) {
1389 del_table = PyString_AS_STRING(delobj);
1390 dellen = PyString_GET_SIZE(delobj);
1391 }
1392 else if (PyUnicode_Check(delobj)) {
1393 PyErr_SetString(PyExc_TypeError,
1394 "deletions are implemented differently for unicode");
1395 return NULL;
1396 }
1397 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1398 return NULL;
1399
1400 if (tablen != 256) {
1401 PyErr_SetString(PyExc_ValueError,
1402 "translation table must be 256 characters long");
1403 return NULL;
1404 }
1405 }
1406 else {
1407 del_table = NULL;
1408 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001409 }
1410
1411 table = table1;
1412 inlen = PyString_Size(input_obj);
1413 result = PyString_FromStringAndSize((char *)NULL, inlen);
1414 if (result == NULL)
1415 return NULL;
1416 output_start = output = PyString_AsString(result);
1417 input = PyString_AsString(input_obj);
1418
1419 if (dellen == 0) {
1420 /* If no deletions are required, use faster code */
1421 for (i = inlen; --i >= 0; ) {
1422 c = Py_CHARMASK(*input++);
1423 if (Py_CHARMASK((*output++ = table[c])) != c)
1424 changed = 1;
1425 }
1426 if (changed)
1427 return result;
1428 Py_DECREF(result);
1429 Py_INCREF(input_obj);
1430 return input_obj;
1431 }
1432
1433 for (i = 0; i < 256; i++)
1434 trans_table[i] = Py_CHARMASK(table[i]);
1435
1436 for (i = 0; i < dellen; i++)
1437 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1438
1439 for (i = inlen; --i >= 0; ) {
1440 c = Py_CHARMASK(*input++);
1441 if (trans_table[c] != -1)
1442 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1443 continue;
1444 changed = 1;
1445 }
1446 if (!changed) {
1447 Py_DECREF(result);
1448 Py_INCREF(input_obj);
1449 return input_obj;
1450 }
1451 /* Fix the size of the resulting string */
1452 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1453 return NULL;
1454 return result;
1455}
1456
1457
1458/* What follows is used for implementing replace(). Perry Stoll. */
1459
1460/*
1461 mymemfind
1462
1463 strstr replacement for arbitrary blocks of memory.
1464
Barry Warsaw51ac5802000-03-20 16:36:48 +00001465 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001466 contents of memory pointed to by PAT. Returns the index into MEM if
1467 found, or -1 if not found. If len of PAT is greater than length of
1468 MEM, the function returns -1.
1469*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001470static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001471mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001472{
1473 register int ii;
1474
1475 /* pattern can not occur in the last pat_len-1 chars */
1476 len -= pat_len;
1477
1478 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001479 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001480 return ii;
1481 }
1482 }
1483 return -1;
1484}
1485
1486/*
1487 mymemcnt
1488
1489 Return the number of distinct times PAT is found in MEM.
1490 meaning mem=1111 and pat==11 returns 2.
1491 mem=11111 and pat==11 also return 2.
1492 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001493static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001494mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495{
1496 register int offset = 0;
1497 int nfound = 0;
1498
1499 while (len >= 0) {
1500 offset = mymemfind(mem, len, pat, pat_len);
1501 if (offset == -1)
1502 break;
1503 mem += offset + pat_len;
1504 len -= offset + pat_len;
1505 nfound++;
1506 }
1507 return nfound;
1508}
1509
1510/*
1511 mymemreplace
1512
Thomas Wouters7e474022000-07-16 12:04:32 +00001513 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001514 replaced with SUB.
1515
Thomas Wouters7e474022000-07-16 12:04:32 +00001516 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001517 of PAT in STR, then the original string is returned. Otherwise, a new
1518 string is allocated here and returned.
1519
1520 on return, out_len is:
1521 the length of output string, or
1522 -1 if the input string is returned, or
1523 unchanged if an error occurs (no memory).
1524
1525 return value is:
1526 the new string allocated locally, or
1527 NULL if an error occurred.
1528*/
1529static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001530mymemreplace(const char *str, int len, /* input string */
1531 const char *pat, int pat_len, /* pattern string to find */
1532 const char *sub, int sub_len, /* substitution string */
1533 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001534 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001535{
1536 char *out_s;
1537 char *new_s;
1538 int nfound, offset, new_len;
1539
1540 if (len == 0 || pat_len > len)
1541 goto return_same;
1542
1543 /* find length of output string */
1544 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001545 if (count < 0)
1546 count = INT_MAX;
1547 else if (nfound > count)
1548 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001549 if (nfound == 0)
1550 goto return_same;
1551 new_len = len + nfound*(sub_len - pat_len);
1552
Guido van Rossumb18618d2000-05-03 23:44:39 +00001553 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001554 if (new_s == NULL) return NULL;
1555
1556 *out_len = new_len;
1557 out_s = new_s;
1558
1559 while (len > 0) {
1560 /* find index of next instance of pattern */
1561 offset = mymemfind(str, len, pat, pat_len);
1562 /* if not found, break out of loop */
1563 if (offset == -1) break;
1564
1565 /* copy non matching part of input string */
1566 memcpy(new_s, str, offset); /* copy part of str before pat */
1567 str += offset + pat_len; /* move str past pattern */
1568 len -= offset + pat_len; /* reduce length of str remaining */
1569
1570 /* copy substitute into the output string */
1571 new_s += offset; /* move new_s to dest for sub string */
1572 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1573 new_s += sub_len; /* offset new_s past sub string */
1574
1575 /* break when we've done count replacements */
1576 if (--count == 0) break;
1577 }
1578 /* copy any remaining values into output string */
1579 if (len > 0)
1580 memcpy(new_s, str, len);
1581 return out_s;
1582
1583 return_same:
1584 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001585 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586}
1587
1588
1589static char replace__doc__[] =
1590"S.replace (old, new[, maxsplit]) -> string\n\
1591\n\
1592Return a copy of string S with all occurrences of substring\n\
1593old replaced by new. If the optional argument maxsplit is\n\
1594given, only the first maxsplit occurrences are replaced.";
1595
1596static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001597string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001599 const char *str = PyString_AS_STRING(self), *sub, *repl;
1600 char *new_s;
1601 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1602 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001604 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001605
Guido van Rossum4c08d552000-03-10 22:55:18 +00001606 if (!PyArg_ParseTuple(args, "OO|i:replace",
1607 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001609
1610 if (PyString_Check(subobj)) {
1611 sub = PyString_AS_STRING(subobj);
1612 sub_len = PyString_GET_SIZE(subobj);
1613 }
1614 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001615 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001616 subobj, replobj, count);
1617 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1618 return NULL;
1619
1620 if (PyString_Check(replobj)) {
1621 repl = PyString_AS_STRING(replobj);
1622 repl_len = PyString_GET_SIZE(replobj);
1623 }
1624 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001625 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001626 subobj, replobj, count);
1627 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1628 return NULL;
1629
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001630 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001631 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632 return NULL;
1633 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001634 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635 if (new_s == NULL) {
1636 PyErr_NoMemory();
1637 return NULL;
1638 }
1639 if (out_len == -1) {
1640 /* we're returning another reference to self */
1641 new = (PyObject*)self;
1642 Py_INCREF(new);
1643 }
1644 else {
1645 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001646 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001647 }
1648 return new;
1649}
1650
1651
1652static char startswith__doc__[] =
1653"S.startswith(prefix[, start[, end]]) -> int\n\
1654\n\
1655Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1656optional start, test S beginning at that position. With optional end, stop\n\
1657comparing S at that position.";
1658
1659static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001660string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001661{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001662 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001663 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001664 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001665 int plen;
1666 int start = 0;
1667 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001668 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001669
Guido van Rossumc6821402000-05-08 14:08:05 +00001670 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1671 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001672 return NULL;
1673 if (PyString_Check(subobj)) {
1674 prefix = PyString_AS_STRING(subobj);
1675 plen = PyString_GET_SIZE(subobj);
1676 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001677 else if (PyUnicode_Check(subobj)) {
1678 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001679 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001680 subobj, start, end, -1);
1681 if (rc == -1)
1682 return NULL;
1683 else
1684 return PyInt_FromLong((long) rc);
1685 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001686 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687 return NULL;
1688
1689 /* adopt Java semantics for index out of range. it is legal for
1690 * offset to be == plen, but this only returns true if prefix is
1691 * the empty string.
1692 */
1693 if (start < 0 || start+plen > len)
1694 return PyInt_FromLong(0);
1695
1696 if (!memcmp(str+start, prefix, plen)) {
1697 /* did the match end after the specified end? */
1698 if (end < 0)
1699 return PyInt_FromLong(1);
1700 else if (end - start < plen)
1701 return PyInt_FromLong(0);
1702 else
1703 return PyInt_FromLong(1);
1704 }
1705 else return PyInt_FromLong(0);
1706}
1707
1708
1709static char endswith__doc__[] =
1710"S.endswith(suffix[, start[, end]]) -> int\n\
1711\n\
1712Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1713optional start, test S beginning at that position. With optional end, stop\n\
1714comparing S at that position.";
1715
1716static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001717string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001718{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001719 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001721 const char* suffix;
1722 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723 int start = 0;
1724 int end = -1;
1725 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001726 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727
Guido van Rossumc6821402000-05-08 14:08:05 +00001728 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1729 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001730 return NULL;
1731 if (PyString_Check(subobj)) {
1732 suffix = PyString_AS_STRING(subobj);
1733 slen = PyString_GET_SIZE(subobj);
1734 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001735 else if (PyUnicode_Check(subobj)) {
1736 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001737 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001738 subobj, start, end, +1);
1739 if (rc == -1)
1740 return NULL;
1741 else
1742 return PyInt_FromLong((long) rc);
1743 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001744 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745 return NULL;
1746
Guido van Rossum4c08d552000-03-10 22:55:18 +00001747 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748 return PyInt_FromLong(0);
1749
1750 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001751 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752
Guido van Rossum4c08d552000-03-10 22:55:18 +00001753 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754 return PyInt_FromLong(1);
1755 else return PyInt_FromLong(0);
1756}
1757
1758
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001759static char encode__doc__[] =
1760"S.encode([encoding[,errors]]) -> string\n\
1761\n\
1762Return an encoded string version of S. Default encoding is the current\n\
1763default string encoding. errors may be given to set a different error\n\
1764handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1765a ValueError. Other possible values are 'ignore' and 'replace'.";
1766
1767static PyObject *
1768string_encode(PyStringObject *self, PyObject *args)
1769{
1770 char *encoding = NULL;
1771 char *errors = NULL;
1772 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1773 return NULL;
1774 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1775}
1776
1777
Guido van Rossum4c08d552000-03-10 22:55:18 +00001778static char expandtabs__doc__[] =
1779"S.expandtabs([tabsize]) -> string\n\
1780\n\
1781Return a copy of S where all tab characters are expanded using spaces.\n\
1782If tabsize is not given, a tab size of 8 characters is assumed.";
1783
1784static PyObject*
1785string_expandtabs(PyStringObject *self, PyObject *args)
1786{
1787 const char *e, *p;
1788 char *q;
1789 int i, j;
1790 PyObject *u;
1791 int tabsize = 8;
1792
1793 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1794 return NULL;
1795
Thomas Wouters7e474022000-07-16 12:04:32 +00001796 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001797 i = j = 0;
1798 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1799 for (p = PyString_AS_STRING(self); p < e; p++)
1800 if (*p == '\t') {
1801 if (tabsize > 0)
1802 j += tabsize - (j % tabsize);
1803 }
1804 else {
1805 j++;
1806 if (*p == '\n' || *p == '\r') {
1807 i += j;
1808 j = 0;
1809 }
1810 }
1811
1812 /* Second pass: create output string and fill it */
1813 u = PyString_FromStringAndSize(NULL, i + j);
1814 if (!u)
1815 return NULL;
1816
1817 j = 0;
1818 q = PyString_AS_STRING(u);
1819
1820 for (p = PyString_AS_STRING(self); p < e; p++)
1821 if (*p == '\t') {
1822 if (tabsize > 0) {
1823 i = tabsize - (j % tabsize);
1824 j += i;
1825 while (i--)
1826 *q++ = ' ';
1827 }
1828 }
1829 else {
1830 j++;
1831 *q++ = *p;
1832 if (*p == '\n' || *p == '\r')
1833 j = 0;
1834 }
1835
1836 return u;
1837}
1838
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001839static
1840PyObject *pad(PyStringObject *self,
1841 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001842 int right,
1843 char fill)
1844{
1845 PyObject *u;
1846
1847 if (left < 0)
1848 left = 0;
1849 if (right < 0)
1850 right = 0;
1851
1852 if (left == 0 && right == 0) {
1853 Py_INCREF(self);
1854 return (PyObject *)self;
1855 }
1856
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001857 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001858 left + PyString_GET_SIZE(self) + right);
1859 if (u) {
1860 if (left)
1861 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001862 memcpy(PyString_AS_STRING(u) + left,
1863 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00001864 PyString_GET_SIZE(self));
1865 if (right)
1866 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1867 fill, right);
1868 }
1869
1870 return u;
1871}
1872
1873static char ljust__doc__[] =
1874"S.ljust(width) -> string\n\
1875\n\
1876Return S left justified in a string of length width. Padding is\n\
1877done using spaces.";
1878
1879static PyObject *
1880string_ljust(PyStringObject *self, PyObject *args)
1881{
1882 int width;
1883 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1884 return NULL;
1885
1886 if (PyString_GET_SIZE(self) >= width) {
1887 Py_INCREF(self);
1888 return (PyObject*) self;
1889 }
1890
1891 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1892}
1893
1894
1895static char rjust__doc__[] =
1896"S.rjust(width) -> string\n\
1897\n\
1898Return S right justified in a string of length width. Padding is\n\
1899done using spaces.";
1900
1901static PyObject *
1902string_rjust(PyStringObject *self, PyObject *args)
1903{
1904 int width;
1905 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1906 return NULL;
1907
1908 if (PyString_GET_SIZE(self) >= width) {
1909 Py_INCREF(self);
1910 return (PyObject*) self;
1911 }
1912
1913 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1914}
1915
1916
1917static char center__doc__[] =
1918"S.center(width) -> string\n\
1919\n\
1920Return S centered in a string of length width. Padding is done\n\
1921using spaces.";
1922
1923static PyObject *
1924string_center(PyStringObject *self, PyObject *args)
1925{
1926 int marg, left;
1927 int width;
1928
1929 if (!PyArg_ParseTuple(args, "i:center", &width))
1930 return NULL;
1931
1932 if (PyString_GET_SIZE(self) >= width) {
1933 Py_INCREF(self);
1934 return (PyObject*) self;
1935 }
1936
1937 marg = width - PyString_GET_SIZE(self);
1938 left = marg / 2 + (marg & width & 1);
1939
1940 return pad(self, left, marg - left, ' ');
1941}
1942
1943#if 0
1944static char zfill__doc__[] =
1945"S.zfill(width) -> string\n\
1946\n\
1947Pad a numeric string x with zeros on the left, to fill a field\n\
1948of the specified width. The string x is never truncated.";
1949
1950static PyObject *
1951string_zfill(PyStringObject *self, PyObject *args)
1952{
1953 int fill;
1954 PyObject *u;
1955 char *str;
1956
1957 int width;
1958 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1959 return NULL;
1960
1961 if (PyString_GET_SIZE(self) >= width) {
1962 Py_INCREF(self);
1963 return (PyObject*) self;
1964 }
1965
1966 fill = width - PyString_GET_SIZE(self);
1967
1968 u = pad(self, fill, 0, '0');
1969 if (u == NULL)
1970 return NULL;
1971
1972 str = PyString_AS_STRING(u);
1973 if (str[fill] == '+' || str[fill] == '-') {
1974 /* move sign to beginning of string */
1975 str[0] = str[fill];
1976 str[fill] = '0';
1977 }
1978
1979 return u;
1980}
1981#endif
1982
1983static char isspace__doc__[] =
1984"S.isspace() -> int\n\
1985\n\
1986Return 1 if there are only whitespace characters in S,\n\
19870 otherwise.";
1988
1989static PyObject*
1990string_isspace(PyStringObject *self, PyObject *args)
1991{
Fred Drakeba096332000-07-09 07:04:36 +00001992 register const unsigned char *p
1993 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001994 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001995
1996 if (!PyArg_NoArgs(args))
1997 return NULL;
1998
1999 /* Shortcut for single character strings */
2000 if (PyString_GET_SIZE(self) == 1 &&
2001 isspace(*p))
2002 return PyInt_FromLong(1);
2003
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002004 /* Special case for empty strings */
2005 if (PyString_GET_SIZE(self) == 0)
2006 return PyInt_FromLong(0);
2007
Guido van Rossum4c08d552000-03-10 22:55:18 +00002008 e = p + PyString_GET_SIZE(self);
2009 for (; p < e; p++) {
2010 if (!isspace(*p))
2011 return PyInt_FromLong(0);
2012 }
2013 return PyInt_FromLong(1);
2014}
2015
2016
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002017static char isalpha__doc__[] =
2018"S.isalpha() -> int\n\
2019\n\
2020Return 1 if all characters in S are alphabetic\n\
2021and there is at least one character in S, 0 otherwise.";
2022
2023static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002024string_isalpha(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002025{
Fred Drakeba096332000-07-09 07:04:36 +00002026 register const unsigned char *p
2027 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002028 register const unsigned char *e;
2029
2030 if (!PyArg_NoArgs(args))
2031 return NULL;
2032
2033 /* Shortcut for single character strings */
2034 if (PyString_GET_SIZE(self) == 1 &&
2035 isalpha(*p))
2036 return PyInt_FromLong(1);
2037
2038 /* Special case for empty strings */
2039 if (PyString_GET_SIZE(self) == 0)
2040 return PyInt_FromLong(0);
2041
2042 e = p + PyString_GET_SIZE(self);
2043 for (; p < e; p++) {
2044 if (!isalpha(*p))
2045 return PyInt_FromLong(0);
2046 }
2047 return PyInt_FromLong(1);
2048}
2049
2050
2051static char isalnum__doc__[] =
2052"S.isalnum() -> int\n\
2053\n\
2054Return 1 if all characters in S are alphanumeric\n\
2055and there is at least one character in S, 0 otherwise.";
2056
2057static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002058string_isalnum(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002059{
Fred Drakeba096332000-07-09 07:04:36 +00002060 register const unsigned char *p
2061 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002062 register const unsigned char *e;
2063
2064 if (!PyArg_NoArgs(args))
2065 return NULL;
2066
2067 /* Shortcut for single character strings */
2068 if (PyString_GET_SIZE(self) == 1 &&
2069 isalnum(*p))
2070 return PyInt_FromLong(1);
2071
2072 /* Special case for empty strings */
2073 if (PyString_GET_SIZE(self) == 0)
2074 return PyInt_FromLong(0);
2075
2076 e = p + PyString_GET_SIZE(self);
2077 for (; p < e; p++) {
2078 if (!isalnum(*p))
2079 return PyInt_FromLong(0);
2080 }
2081 return PyInt_FromLong(1);
2082}
2083
2084
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085static char isdigit__doc__[] =
2086"S.isdigit() -> int\n\
2087\n\
2088Return 1 if there are only digit characters in S,\n\
20890 otherwise.";
2090
2091static PyObject*
2092string_isdigit(PyStringObject *self, PyObject *args)
2093{
Fred Drakeba096332000-07-09 07:04:36 +00002094 register const unsigned char *p
2095 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002096 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002097
2098 if (!PyArg_NoArgs(args))
2099 return NULL;
2100
2101 /* Shortcut for single character strings */
2102 if (PyString_GET_SIZE(self) == 1 &&
2103 isdigit(*p))
2104 return PyInt_FromLong(1);
2105
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002106 /* Special case for empty strings */
2107 if (PyString_GET_SIZE(self) == 0)
2108 return PyInt_FromLong(0);
2109
Guido van Rossum4c08d552000-03-10 22:55:18 +00002110 e = p + PyString_GET_SIZE(self);
2111 for (; p < e; p++) {
2112 if (!isdigit(*p))
2113 return PyInt_FromLong(0);
2114 }
2115 return PyInt_FromLong(1);
2116}
2117
2118
2119static char islower__doc__[] =
2120"S.islower() -> int\n\
2121\n\
2122Return 1 if all cased characters in S are lowercase and there is\n\
2123at least one cased character in S, 0 otherwise.";
2124
2125static PyObject*
2126string_islower(PyStringObject *self, PyObject *args)
2127{
Fred Drakeba096332000-07-09 07:04:36 +00002128 register const unsigned char *p
2129 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002130 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002131 int cased;
2132
2133 if (!PyArg_NoArgs(args))
2134 return NULL;
2135
2136 /* Shortcut for single character strings */
2137 if (PyString_GET_SIZE(self) == 1)
2138 return PyInt_FromLong(islower(*p) != 0);
2139
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002140 /* Special case for empty strings */
2141 if (PyString_GET_SIZE(self) == 0)
2142 return PyInt_FromLong(0);
2143
Guido van Rossum4c08d552000-03-10 22:55:18 +00002144 e = p + PyString_GET_SIZE(self);
2145 cased = 0;
2146 for (; p < e; p++) {
2147 if (isupper(*p))
2148 return PyInt_FromLong(0);
2149 else if (!cased && islower(*p))
2150 cased = 1;
2151 }
2152 return PyInt_FromLong(cased);
2153}
2154
2155
2156static char isupper__doc__[] =
2157"S.isupper() -> int\n\
2158\n\
2159Return 1 if all cased characters in S are uppercase and there is\n\
2160at least one cased character in S, 0 otherwise.";
2161
2162static PyObject*
2163string_isupper(PyStringObject *self, PyObject *args)
2164{
Fred Drakeba096332000-07-09 07:04:36 +00002165 register const unsigned char *p
2166 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002167 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002168 int cased;
2169
2170 if (!PyArg_NoArgs(args))
2171 return NULL;
2172
2173 /* Shortcut for single character strings */
2174 if (PyString_GET_SIZE(self) == 1)
2175 return PyInt_FromLong(isupper(*p) != 0);
2176
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002177 /* Special case for empty strings */
2178 if (PyString_GET_SIZE(self) == 0)
2179 return PyInt_FromLong(0);
2180
Guido van Rossum4c08d552000-03-10 22:55:18 +00002181 e = p + PyString_GET_SIZE(self);
2182 cased = 0;
2183 for (; p < e; p++) {
2184 if (islower(*p))
2185 return PyInt_FromLong(0);
2186 else if (!cased && isupper(*p))
2187 cased = 1;
2188 }
2189 return PyInt_FromLong(cased);
2190}
2191
2192
2193static char istitle__doc__[] =
2194"S.istitle() -> int\n\
2195\n\
2196Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2197may only follow uncased characters and lowercase characters only cased\n\
2198ones. Return 0 otherwise.";
2199
2200static PyObject*
2201string_istitle(PyStringObject *self, PyObject *args)
2202{
Fred Drakeba096332000-07-09 07:04:36 +00002203 register const unsigned char *p
2204 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002205 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002206 int cased, previous_is_cased;
2207
2208 if (!PyArg_NoArgs(args))
2209 return NULL;
2210
2211 /* Shortcut for single character strings */
2212 if (PyString_GET_SIZE(self) == 1)
2213 return PyInt_FromLong(isupper(*p) != 0);
2214
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002215 /* Special case for empty strings */
2216 if (PyString_GET_SIZE(self) == 0)
2217 return PyInt_FromLong(0);
2218
Guido van Rossum4c08d552000-03-10 22:55:18 +00002219 e = p + PyString_GET_SIZE(self);
2220 cased = 0;
2221 previous_is_cased = 0;
2222 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002223 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002224
2225 if (isupper(ch)) {
2226 if (previous_is_cased)
2227 return PyInt_FromLong(0);
2228 previous_is_cased = 1;
2229 cased = 1;
2230 }
2231 else if (islower(ch)) {
2232 if (!previous_is_cased)
2233 return PyInt_FromLong(0);
2234 previous_is_cased = 1;
2235 cased = 1;
2236 }
2237 else
2238 previous_is_cased = 0;
2239 }
2240 return PyInt_FromLong(cased);
2241}
2242
2243
2244static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002245"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002246\n\
2247Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002248Line breaks are not included in the resulting list unless keepends\n\
2249is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002250
2251#define SPLIT_APPEND(data, left, right) \
2252 str = PyString_FromStringAndSize(data + left, right - left); \
2253 if (!str) \
2254 goto onError; \
2255 if (PyList_Append(list, str)) { \
2256 Py_DECREF(str); \
2257 goto onError; \
2258 } \
2259 else \
2260 Py_DECREF(str);
2261
2262static PyObject*
2263string_splitlines(PyStringObject *self, PyObject *args)
2264{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002265 register int i;
2266 register int j;
2267 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002268 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002269 PyObject *list;
2270 PyObject *str;
2271 char *data;
2272
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002273 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274 return NULL;
2275
2276 data = PyString_AS_STRING(self);
2277 len = PyString_GET_SIZE(self);
2278
Guido van Rossum4c08d552000-03-10 22:55:18 +00002279 list = PyList_New(0);
2280 if (!list)
2281 goto onError;
2282
2283 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002284 int eol;
2285
Guido van Rossum4c08d552000-03-10 22:55:18 +00002286 /* Find a line and append it */
2287 while (i < len && data[i] != '\n' && data[i] != '\r')
2288 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002289
2290 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002291 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002292 if (i < len) {
2293 if (data[i] == '\r' && i + 1 < len &&
2294 data[i+1] == '\n')
2295 i += 2;
2296 else
2297 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002298 if (keepends)
2299 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002300 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002301 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002302 j = i;
2303 }
2304 if (j < len) {
2305 SPLIT_APPEND(data, j, len);
2306 }
2307
2308 return list;
2309
2310 onError:
2311 Py_DECREF(list);
2312 return NULL;
2313}
2314
2315#undef SPLIT_APPEND
2316
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002317
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002318static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002320 /* Counterparts of the obsolete stropmodule functions; except
2321 string.maketrans(). */
2322 {"join", (PyCFunction)string_join, 1, join__doc__},
2323 {"split", (PyCFunction)string_split, 1, split__doc__},
2324 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2325 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2326 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2327 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2328 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2329 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2330 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002331 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2332 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2334 {"count", (PyCFunction)string_count, 1, count__doc__},
2335 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2336 {"find", (PyCFunction)string_find, 1, find__doc__},
2337 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2340 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2341 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2342 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2344 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2345 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002346 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2347 {"title", (PyCFunction)string_title, 1, title__doc__},
2348 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2349 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2350 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002351 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002352 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2353 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2354#if 0
2355 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2356#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357 {NULL, NULL} /* sentinel */
2358};
2359
2360static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002361string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002362{
2363 return Py_FindMethod(string_methods, (PyObject*)s, name);
2364}
2365
2366
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002367PyTypeObject PyString_Type = {
2368 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002369 0,
2370 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002371 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002372 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002373 (destructor)string_dealloc, /*tp_dealloc*/
2374 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002375 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002376 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002377 (cmpfunc)string_compare, /*tp_compare*/
2378 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002379 0, /*tp_as_number*/
2380 &string_as_sequence, /*tp_as_sequence*/
2381 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002382 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002383 0, /*tp_call*/
Guido van Rossum189f1df2001-05-01 16:51:53 +00002384 (reprfunc)string_str, /*tp_str*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002385 0, /*tp_getattro*/
2386 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002387 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002388 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002389 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002390};
2391
2392void
Fred Drakeba096332000-07-09 07:04:36 +00002393PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002394{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002395 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002396 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002397 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002398 if (w == NULL || !PyString_Check(*pv)) {
2399 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002400 *pv = NULL;
2401 return;
2402 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002403 v = string_concat((PyStringObject *) *pv, w);
2404 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002405 *pv = v;
2406}
2407
Guido van Rossum013142a1994-08-30 08:19:36 +00002408void
Fred Drakeba096332000-07-09 07:04:36 +00002409PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002410{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002411 PyString_Concat(pv, w);
2412 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002413}
2414
2415
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002416/* The following function breaks the notion that strings are immutable:
2417 it changes the size of a string. We get away with this only if there
2418 is only one module referencing the object. You can also think of it
2419 as creating a new string object and destroying the old one, only
2420 more efficiently. In any case, don't use this if the string may
2421 already be known to some other part of the code... */
2422
2423int
Fred Drakeba096332000-07-09 07:04:36 +00002424_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002425{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002426 register PyObject *v;
2427 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002428 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002429 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002430 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002431 Py_DECREF(v);
2432 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002433 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002434 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002435 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002436#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002437 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002438#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002439 _Py_ForgetReference(v);
2440 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002441 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002442 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002443 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002444 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002445 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002446 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002447 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002448 _Py_NewReference(*pv);
2449 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002450 sv->ob_size = newsize;
2451 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002452 return 0;
2453}
Guido van Rossume5372401993-03-16 12:15:04 +00002454
2455/* Helpers for formatstring */
2456
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002457static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002458getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002459{
2460 int argidx = *p_argidx;
2461 if (argidx < arglen) {
2462 (*p_argidx)++;
2463 if (arglen < 0)
2464 return args;
2465 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002466 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002467 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002468 PyErr_SetString(PyExc_TypeError,
2469 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002470 return NULL;
2471}
2472
Tim Peters38fd5b62000-09-21 05:43:11 +00002473/* Format codes
2474 * F_LJUST '-'
2475 * F_SIGN '+'
2476 * F_BLANK ' '
2477 * F_ALT '#'
2478 * F_ZERO '0'
2479 */
Guido van Rossume5372401993-03-16 12:15:04 +00002480#define F_LJUST (1<<0)
2481#define F_SIGN (1<<1)
2482#define F_BLANK (1<<2)
2483#define F_ALT (1<<3)
2484#define F_ZERO (1<<4)
2485
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002486static int
Fred Drakeba096332000-07-09 07:04:36 +00002487formatfloat(char *buf, size_t buflen, int flags,
2488 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002489{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002490 /* fmt = '%#.' + `prec` + `type`
2491 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002492 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002493 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002494 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002495 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002496 if (prec < 0)
2497 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002498 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2499 type = 'g';
2500 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002501 /* worst case length calc to ensure no buffer overrun:
2502 fmt = %#.<prec>g
2503 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002504 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002505 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2506 If prec=0 the effective precision is 1 (the leading digit is
2507 always given), therefore increase by one to 10+prec. */
2508 if (buflen <= (size_t)10 + (size_t)prec) {
2509 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002510 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002511 return -1;
2512 }
Guido van Rossume5372401993-03-16 12:15:04 +00002513 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002514 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002515}
2516
Tim Peters38fd5b62000-09-21 05:43:11 +00002517/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2518 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2519 * Python's regular ints.
2520 * Return value: a new PyString*, or NULL if error.
2521 * . *pbuf is set to point into it,
2522 * *plen set to the # of chars following that.
2523 * Caller must decref it when done using pbuf.
2524 * The string starting at *pbuf is of the form
2525 * "-"? ("0x" | "0X")? digit+
2526 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002527 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002528 * There will be at least prec digits, zero-filled on the left if
2529 * necessary to get that many.
2530 * val object to be converted
2531 * flags bitmask of format flags; only F_ALT is looked at
2532 * prec minimum number of digits; 0-fill on left if needed
2533 * type a character in [duoxX]; u acts the same as d
2534 *
2535 * CAUTION: o, x and X conversions on regular ints can never
2536 * produce a '-' sign, but can for Python's unbounded ints.
2537 */
2538PyObject*
2539_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2540 char **pbuf, int *plen)
2541{
2542 PyObject *result = NULL;
2543 char *buf;
2544 int i;
2545 int sign; /* 1 if '-', else 0 */
2546 int len; /* number of characters */
2547 int numdigits; /* len == numnondigits + numdigits */
2548 int numnondigits = 0;
2549
2550 switch (type) {
2551 case 'd':
2552 case 'u':
2553 result = val->ob_type->tp_str(val);
2554 break;
2555 case 'o':
2556 result = val->ob_type->tp_as_number->nb_oct(val);
2557 break;
2558 case 'x':
2559 case 'X':
2560 numnondigits = 2;
2561 result = val->ob_type->tp_as_number->nb_hex(val);
2562 break;
2563 default:
2564 assert(!"'type' not in [duoxX]");
2565 }
2566 if (!result)
2567 return NULL;
2568
2569 /* To modify the string in-place, there can only be one reference. */
2570 if (result->ob_refcnt != 1) {
2571 PyErr_BadInternalCall();
2572 return NULL;
2573 }
2574 buf = PyString_AsString(result);
2575 len = PyString_Size(result);
2576 if (buf[len-1] == 'L') {
2577 --len;
2578 buf[len] = '\0';
2579 }
2580 sign = buf[0] == '-';
2581 numnondigits += sign;
2582 numdigits = len - numnondigits;
2583 assert(numdigits > 0);
2584
Tim Petersfff53252001-04-12 18:38:48 +00002585 /* Get rid of base marker unless F_ALT */
2586 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002587 /* Need to skip 0x, 0X or 0. */
2588 int skipped = 0;
2589 switch (type) {
2590 case 'o':
2591 assert(buf[sign] == '0');
2592 /* If 0 is only digit, leave it alone. */
2593 if (numdigits > 1) {
2594 skipped = 1;
2595 --numdigits;
2596 }
2597 break;
2598 case 'x':
2599 case 'X':
2600 assert(buf[sign] == '0');
2601 assert(buf[sign + 1] == 'x');
2602 skipped = 2;
2603 numnondigits -= 2;
2604 break;
2605 }
2606 if (skipped) {
2607 buf += skipped;
2608 len -= skipped;
2609 if (sign)
2610 buf[0] = '-';
2611 }
2612 assert(len == numnondigits + numdigits);
2613 assert(numdigits > 0);
2614 }
2615
2616 /* Fill with leading zeroes to meet minimum width. */
2617 if (prec > numdigits) {
2618 PyObject *r1 = PyString_FromStringAndSize(NULL,
2619 numnondigits + prec);
2620 char *b1;
2621 if (!r1) {
2622 Py_DECREF(result);
2623 return NULL;
2624 }
2625 b1 = PyString_AS_STRING(r1);
2626 for (i = 0; i < numnondigits; ++i)
2627 *b1++ = *buf++;
2628 for (i = 0; i < prec - numdigits; i++)
2629 *b1++ = '0';
2630 for (i = 0; i < numdigits; i++)
2631 *b1++ = *buf++;
2632 *b1 = '\0';
2633 Py_DECREF(result);
2634 result = r1;
2635 buf = PyString_AS_STRING(result);
2636 len = numnondigits + prec;
2637 }
2638
2639 /* Fix up case for hex conversions. */
2640 switch (type) {
2641 case 'x':
2642 /* Need to convert all upper case letters to lower case. */
2643 for (i = 0; i < len; i++)
2644 if (buf[i] >= 'A' && buf[i] <= 'F')
2645 buf[i] += 'a'-'A';
2646 break;
2647 case 'X':
2648 /* Need to convert 0x to 0X (and -0x to -0X). */
2649 if (buf[sign + 1] == 'x')
2650 buf[sign + 1] = 'X';
2651 break;
2652 }
2653 *pbuf = buf;
2654 *plen = len;
2655 return result;
2656}
2657
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002658static int
Fred Drakeba096332000-07-09 07:04:36 +00002659formatint(char *buf, size_t buflen, int flags,
2660 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002661{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002662 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002663 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2664 + 1 + 1 = 24 */
2665 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002666 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002667 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002668 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002669 if (prec < 0)
2670 prec = 1;
2671 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002672 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002673 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002674 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002675 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002676 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002677 return -1;
2678 }
Guido van Rossume5372401993-03-16 12:15:04 +00002679 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00002680 /* When converting 0 under %#x or %#X, C leaves off the base marker,
2681 * but we want it (for consistency with other %#x conversions, and
2682 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002683 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
2684 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
2685 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00002686 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002687 if (x == 0 &&
2688 (flags & F_ALT) &&
2689 (type == 'x' || type == 'X') &&
2690 buf[1] != (char)type) /* this last always true under std C */
2691 {
Tim Petersfff53252001-04-12 18:38:48 +00002692 memmove(buf+2, buf, strlen(buf) + 1);
2693 buf[0] = '0';
2694 buf[1] = (char)type;
2695 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002696 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002697}
2698
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002699static int
Fred Drakeba096332000-07-09 07:04:36 +00002700formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002701{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002702 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002703 if (PyString_Check(v)) {
2704 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002705 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002706 }
2707 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002708 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002709 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002710 }
2711 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002712 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002713}
2714
Guido van Rossum013142a1994-08-30 08:19:36 +00002715
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002716/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2717
2718 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2719 chars are formatted. XXX This is a magic number. Each formatting
2720 routine does bounds checking to ensure no overflow, but a better
2721 solution may be to malloc a buffer of appropriate size for each
2722 format. For now, the current solution is sufficient.
2723*/
2724#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002725
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002726PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002727PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002728{
2729 char *fmt, *res;
2730 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002731 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002732 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002733 PyObject *dict = NULL;
2734 if (format == NULL || !PyString_Check(format) || args == NULL) {
2735 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002736 return NULL;
2737 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002738 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002739 fmt = PyString_AsString(format);
2740 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002741 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002742 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002743 if (result == NULL)
2744 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002745 res = PyString_AsString(result);
2746 if (PyTuple_Check(args)) {
2747 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002748 argidx = 0;
2749 }
2750 else {
2751 arglen = -1;
2752 argidx = -2;
2753 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002754 if (args->ob_type->tp_as_mapping)
2755 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002756 while (--fmtcnt >= 0) {
2757 if (*fmt != '%') {
2758 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002759 rescnt = fmtcnt + 100;
2760 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002761 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002762 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002763 res = PyString_AsString(result)
2764 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002765 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002766 }
2767 *res++ = *fmt++;
2768 }
2769 else {
2770 /* Got a format specifier */
2771 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002772 int width = -1;
2773 int prec = -1;
2774 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002775 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002776 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002777 PyObject *v = NULL;
2778 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002779 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002780 int sign;
2781 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002782 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002783 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002784 int argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002785
Guido van Rossumda9c2711996-12-05 21:58:58 +00002786 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002787 if (*fmt == '(') {
2788 char *keystart;
2789 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002790 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002791 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002792
2793 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002794 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002795 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00002796 goto error;
2797 }
2798 ++fmt;
2799 --fmtcnt;
2800 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002801 /* Skip over balanced parentheses */
2802 while (pcount > 0 && --fmtcnt >= 0) {
2803 if (*fmt == ')')
2804 --pcount;
2805 else if (*fmt == '(')
2806 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002807 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002808 }
2809 keylen = fmt - keystart - 1;
2810 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002811 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002812 "incomplete format key");
2813 goto error;
2814 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002815 key = PyString_FromStringAndSize(keystart,
2816 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002817 if (key == NULL)
2818 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002819 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002820 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002821 args_owned = 0;
2822 }
2823 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002824 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002825 if (args == NULL) {
2826 goto error;
2827 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002828 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002829 arglen = -1;
2830 argidx = -2;
2831 }
Guido van Rossume5372401993-03-16 12:15:04 +00002832 while (--fmtcnt >= 0) {
2833 switch (c = *fmt++) {
2834 case '-': flags |= F_LJUST; continue;
2835 case '+': flags |= F_SIGN; continue;
2836 case ' ': flags |= F_BLANK; continue;
2837 case '#': flags |= F_ALT; continue;
2838 case '0': flags |= F_ZERO; continue;
2839 }
2840 break;
2841 }
2842 if (c == '*') {
2843 v = getnextarg(args, arglen, &argidx);
2844 if (v == NULL)
2845 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002846 if (!PyInt_Check(v)) {
2847 PyErr_SetString(PyExc_TypeError,
2848 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002849 goto error;
2850 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002851 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002852 if (width < 0) {
2853 flags |= F_LJUST;
2854 width = -width;
2855 }
Guido van Rossume5372401993-03-16 12:15:04 +00002856 if (--fmtcnt >= 0)
2857 c = *fmt++;
2858 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002859 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002860 width = c - '0';
2861 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002862 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002863 if (!isdigit(c))
2864 break;
2865 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002866 PyErr_SetString(
2867 PyExc_ValueError,
2868 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002869 goto error;
2870 }
2871 width = width*10 + (c - '0');
2872 }
2873 }
2874 if (c == '.') {
2875 prec = 0;
2876 if (--fmtcnt >= 0)
2877 c = *fmt++;
2878 if (c == '*') {
2879 v = getnextarg(args, arglen, &argidx);
2880 if (v == NULL)
2881 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002882 if (!PyInt_Check(v)) {
2883 PyErr_SetString(
2884 PyExc_TypeError,
2885 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002886 goto error;
2887 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002888 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002889 if (prec < 0)
2890 prec = 0;
2891 if (--fmtcnt >= 0)
2892 c = *fmt++;
2893 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002894 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002895 prec = c - '0';
2896 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002897 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002898 if (!isdigit(c))
2899 break;
2900 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002901 PyErr_SetString(
2902 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002903 "prec too big");
2904 goto error;
2905 }
2906 prec = prec*10 + (c - '0');
2907 }
2908 }
2909 } /* prec */
2910 if (fmtcnt >= 0) {
2911 if (c == 'h' || c == 'l' || c == 'L') {
2912 size = c;
2913 if (--fmtcnt >= 0)
2914 c = *fmt++;
2915 }
2916 }
2917 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002918 PyErr_SetString(PyExc_ValueError,
2919 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002920 goto error;
2921 }
2922 if (c != '%') {
2923 v = getnextarg(args, arglen, &argidx);
2924 if (v == NULL)
2925 goto error;
2926 }
2927 sign = 0;
2928 fill = ' ';
2929 switch (c) {
2930 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002931 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002932 len = 1;
2933 break;
2934 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002935 case 'r':
2936 if (PyUnicode_Check(v)) {
2937 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002938 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00002939 goto unicode;
2940 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002941 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002942 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002943 else
2944 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002945 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002946 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002947 if (!PyString_Check(temp)) {
2948 PyErr_SetString(PyExc_TypeError,
2949 "%s argument has non-string str()");
2950 goto error;
2951 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002952 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002953 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002954 if (prec >= 0 && len > prec)
2955 len = prec;
2956 break;
2957 case 'i':
2958 case 'd':
2959 case 'u':
2960 case 'o':
2961 case 'x':
2962 case 'X':
2963 if (c == 'i')
2964 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00002965 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002966 temp = _PyString_FormatLong(v, flags,
2967 prec, c, &pbuf, &len);
2968 if (!temp)
2969 goto error;
2970 /* unbounded ints can always produce
2971 a sign character! */
2972 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002973 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002974 else {
2975 pbuf = formatbuf;
2976 len = formatint(pbuf, sizeof(formatbuf),
2977 flags, prec, c, v);
2978 if (len < 0)
2979 goto error;
2980 /* only d conversion is signed */
2981 sign = c == 'd';
2982 }
2983 if (flags & F_ZERO)
2984 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00002985 break;
2986 case 'e':
2987 case 'E':
2988 case 'f':
2989 case 'g':
2990 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002991 pbuf = formatbuf;
2992 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002993 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002994 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002995 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00002996 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00002997 fill = '0';
2998 break;
2999 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003000 pbuf = formatbuf;
3001 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003002 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003003 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003004 break;
3005 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003006 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003007 "unsupported format character '%c' (0x%x) "
3008 "at index %i",
3009 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003010 goto error;
3011 }
3012 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003013 if (*pbuf == '-' || *pbuf == '+') {
3014 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003015 len--;
3016 }
3017 else if (flags & F_SIGN)
3018 sign = '+';
3019 else if (flags & F_BLANK)
3020 sign = ' ';
3021 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003022 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003023 }
3024 if (width < len)
3025 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003026 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003027 reslen -= rescnt;
3028 rescnt = width + fmtcnt + 100;
3029 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003030 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003031 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003032 res = PyString_AsString(result)
3033 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003034 }
3035 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003036 if (fill != ' ')
3037 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003038 rescnt--;
3039 if (width > len)
3040 width--;
3041 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003042 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3043 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003044 assert(pbuf[1] == c);
3045 if (fill != ' ') {
3046 *res++ = *pbuf++;
3047 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003048 }
Tim Petersfff53252001-04-12 18:38:48 +00003049 rescnt -= 2;
3050 width -= 2;
3051 if (width < 0)
3052 width = 0;
3053 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003054 }
3055 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003056 do {
3057 --rescnt;
3058 *res++ = fill;
3059 } while (--width > len);
3060 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003061 if (fill == ' ') {
3062 if (sign)
3063 *res++ = sign;
3064 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003065 (c == 'x' || c == 'X')) {
3066 assert(pbuf[0] == '0');
3067 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003068 *res++ = *pbuf++;
3069 *res++ = *pbuf++;
3070 }
3071 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003072 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003073 res += len;
3074 rescnt -= len;
3075 while (--width >= len) {
3076 --rescnt;
3077 *res++ = ' ';
3078 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003079 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003080 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003081 "not all arguments converted");
3082 goto error;
3083 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003084 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003085 } /* '%' */
3086 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003087 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003088 PyErr_SetString(PyExc_TypeError,
3089 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003090 goto error;
3091 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003092 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003093 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003094 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003095 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003096 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003097
3098 unicode:
3099 if (args_owned) {
3100 Py_DECREF(args);
3101 args_owned = 0;
3102 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003103 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003104 if (PyTuple_Check(orig_args) && argidx > 0) {
3105 PyObject *v;
3106 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3107 v = PyTuple_New(n);
3108 if (v == NULL)
3109 goto error;
3110 while (--n >= 0) {
3111 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3112 Py_INCREF(w);
3113 PyTuple_SET_ITEM(v, n, w);
3114 }
3115 args = v;
3116 } else {
3117 Py_INCREF(orig_args);
3118 args = orig_args;
3119 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003120 args_owned = 1;
3121 /* Take what we have of the result and let the Unicode formatting
3122 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003123 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003124 if (_PyString_Resize(&result, rescnt))
3125 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003126 fmtcnt = PyString_GET_SIZE(format) - \
3127 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003128 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3129 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003130 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003131 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003132 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003133 if (v == NULL)
3134 goto error;
3135 /* Paste what we have (result) to what the Unicode formatting
3136 function returned (v) and return the result (or error) */
3137 w = PyUnicode_Concat(result, v);
3138 Py_DECREF(result);
3139 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003140 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003141 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003142
Guido van Rossume5372401993-03-16 12:15:04 +00003143 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003144 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003145 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003146 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003147 }
Guido van Rossume5372401993-03-16 12:15:04 +00003148 return NULL;
3149}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003150
3151
3152#ifdef INTERN_STRINGS
3153
Barry Warsaw4df762f2000-08-16 23:41:01 +00003154/* This dictionary will leak at PyString_Fini() time. That's acceptable
3155 * because PyString_Fini() specifically frees interned strings that are
3156 * only referenced by this dictionary. The CVS log entry for revision 2.45
3157 * says:
3158 *
3159 * Change the Fini function to only remove otherwise unreferenced
3160 * strings from the interned table. There are references in
3161 * hard-to-find static variables all over the interpreter, and it's not
3162 * worth trying to get rid of all those; but "uninterning" isn't fair
3163 * either and may cause subtle failures later -- so we have to keep them
3164 * in the interned table.
3165 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003166static PyObject *interned;
3167
3168void
Fred Drakeba096332000-07-09 07:04:36 +00003169PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003170{
3171 register PyStringObject *s = (PyStringObject *)(*p);
3172 PyObject *t;
3173 if (s == NULL || !PyString_Check(s))
3174 Py_FatalError("PyString_InternInPlace: strings only please!");
3175 if ((t = s->ob_sinterned) != NULL) {
3176 if (t == (PyObject *)s)
3177 return;
3178 Py_INCREF(t);
3179 *p = t;
3180 Py_DECREF(s);
3181 return;
3182 }
3183 if (interned == NULL) {
3184 interned = PyDict_New();
3185 if (interned == NULL)
3186 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003187 }
3188 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3189 Py_INCREF(t);
3190 *p = s->ob_sinterned = t;
3191 Py_DECREF(s);
3192 return;
3193 }
3194 t = (PyObject *)s;
3195 if (PyDict_SetItem(interned, t, t) == 0) {
3196 s->ob_sinterned = t;
3197 return;
3198 }
3199 PyErr_Clear();
3200}
3201
3202
3203PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003204PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003205{
3206 PyObject *s = PyString_FromString(cp);
3207 if (s == NULL)
3208 return NULL;
3209 PyString_InternInPlace(&s);
3210 return s;
3211}
3212
3213#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003214
3215void
Fred Drakeba096332000-07-09 07:04:36 +00003216PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003217{
3218 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003219 for (i = 0; i < UCHAR_MAX + 1; i++) {
3220 Py_XDECREF(characters[i]);
3221 characters[i] = NULL;
3222 }
3223#ifndef DONT_SHARE_SHORT_STRINGS
3224 Py_XDECREF(nullstring);
3225 nullstring = NULL;
3226#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003227#ifdef INTERN_STRINGS
3228 if (interned) {
3229 int pos, changed;
3230 PyObject *key, *value;
3231 do {
3232 changed = 0;
3233 pos = 0;
3234 while (PyDict_Next(interned, &pos, &key, &value)) {
3235 if (key->ob_refcnt == 2 && key == value) {
3236 PyDict_DelItem(interned, key);
3237 changed = 1;
3238 }
3239 }
3240 } while (changed);
3241 }
3242#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003243}
Barry Warsawa903ad982001-02-23 16:40:48 +00003244
3245#ifdef INTERN_STRINGS
3246void _Py_ReleaseInternedStrings(void)
3247{
3248 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003249 fprintf(stderr, "releasing interned strings\n");
3250 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003251 Py_DECREF(interned);
3252 interned = NULL;
3253 }
3254}
3255#endif /* INTERN_STRINGS */