blob: 740cbe294e44ee6742092351d59c763fb303513b [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
76 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 } else if (size == 1 && str != NULL) {
79 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000082#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000083 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000084}
85
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000087PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000089 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000091 if (size > INT_MAX) {
92 PyErr_SetString(PyExc_OverflowError,
93 "string is too long for a Python string");
94 return NULL;
95 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0 && (op = nullstring) != NULL) {
98#ifdef COUNT_ALLOCS
99 null_strings++;
100#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 Py_INCREF(op);
102 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 }
104 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
105#ifdef COUNT_ALLOCS
106 one_strings++;
107#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000108 Py_INCREF(op);
109 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000110 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000112
113 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000115 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000116 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119#ifdef CACHE_HASH
120 op->ob_shash = -1;
121#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000122#ifdef INTERN_STRINGS
123 op->ob_sinterned = NULL;
124#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000125 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000126#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 if (size == 0) {
128 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 } else if (size == 1) {
131 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000134#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000135 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136}
137
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000138PyObject *PyString_Decode(const char *s,
139 int size,
140 const char *encoding,
141 const char *errors)
142{
143 PyObject *buffer = NULL, *str;
144
145 if (encoding == NULL)
146 encoding = PyUnicode_GetDefaultEncoding();
147
148 /* Decode via the codec registry */
149 buffer = PyBuffer_FromMemory((void *)s, size);
150 if (buffer == NULL)
151 goto onError;
152 str = PyCodec_Decode(buffer, encoding, errors);
153 if (str == NULL)
154 goto onError;
155 /* Convert Unicode to a string using the default encoding */
156 if (PyUnicode_Check(str)) {
157 PyObject *temp = str;
158 str = PyUnicode_AsEncodedString(str, NULL, NULL);
159 Py_DECREF(temp);
160 if (str == NULL)
161 goto onError;
162 }
163 if (!PyString_Check(str)) {
164 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000165 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000166 str->ob_type->tp_name);
167 Py_DECREF(str);
168 goto onError;
169 }
170 Py_DECREF(buffer);
171 return str;
172
173 onError:
174 Py_XDECREF(buffer);
175 return NULL;
176}
177
178PyObject *PyString_Encode(const char *s,
179 int size,
180 const char *encoding,
181 const char *errors)
182{
183 PyObject *v, *str;
184
185 str = PyString_FromStringAndSize(s, size);
186 if (str == NULL)
187 return NULL;
188 v = PyString_AsEncodedString(str, encoding, errors);
189 Py_DECREF(str);
190 return v;
191}
192
193PyObject *PyString_AsEncodedString(PyObject *str,
194 const char *encoding,
195 const char *errors)
196{
197 PyObject *v;
198
199 if (!PyString_Check(str)) {
200 PyErr_BadArgument();
201 goto onError;
202 }
203
204 if (encoding == NULL)
205 encoding = PyUnicode_GetDefaultEncoding();
206
207 /* Encode via the codec registry */
208 v = PyCodec_Encode(str, encoding, errors);
209 if (v == NULL)
210 goto onError;
211 /* Convert Unicode to a string using the default encoding */
212 if (PyUnicode_Check(v)) {
213 PyObject *temp = v;
214 v = PyUnicode_AsEncodedString(v, NULL, NULL);
215 Py_DECREF(temp);
216 if (v == NULL)
217 goto onError;
218 }
219 if (!PyString_Check(v)) {
220 PyErr_Format(PyExc_TypeError,
221 "encoder did not return a string object (type=%.400s)",
222 v->ob_type->tp_name);
223 Py_DECREF(v);
224 goto onError;
225 }
226 return v;
227
228 onError:
229 return NULL;
230}
231
Guido van Rossum234f9421993-06-17 12:35:49 +0000232static void
Fred Drakeba096332000-07-09 07:04:36 +0000233string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000234{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000235 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000236}
237
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000238static int
239string_getsize(register PyObject *op)
240{
241 char *s;
242 int len;
243 if (PyString_AsStringAndSize(op, &s, &len))
244 return -1;
245 return len;
246}
247
248static /*const*/ char *
249string_getbuffer(register PyObject *op)
250{
251 char *s;
252 int len;
253 if (PyString_AsStringAndSize(op, &s, &len))
254 return NULL;
255 return s;
256}
257
Guido van Rossumd7047b31995-01-02 19:07:15 +0000258int
Fred Drakeba096332000-07-09 07:04:36 +0000259PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000261 if (!PyString_Check(op))
262 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000263 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264}
265
266/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000267PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000268{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000269 if (!PyString_Check(op))
270 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000271 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272}
273
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000274/* Internal API needed by PyString_AsStringAndSize(): */
275extern
276PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
277 const char *errors);
278
279int
280PyString_AsStringAndSize(register PyObject *obj,
281 register char **s,
282 register int *len)
283{
284 if (s == NULL) {
285 PyErr_BadInternalCall();
286 return -1;
287 }
288
289 if (!PyString_Check(obj)) {
290 if (PyUnicode_Check(obj)) {
291 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
292 if (obj == NULL)
293 return -1;
294 }
295 else {
296 PyErr_Format(PyExc_TypeError,
297 "expected string or Unicode object, "
298 "%.200s found", obj->ob_type->tp_name);
299 return -1;
300 }
301 }
302
303 *s = PyString_AS_STRING(obj);
304 if (len != NULL)
305 *len = PyString_GET_SIZE(obj);
306 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
307 PyErr_SetString(PyExc_TypeError,
308 "expected string without null bytes");
309 return -1;
310 }
311 return 0;
312}
313
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000314/* Methods */
315
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000316static int
Fred Drakeba096332000-07-09 07:04:36 +0000317string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318{
319 int i;
320 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000321 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000322 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000323 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000324 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000325 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000327
Thomas Wouters7e474022000-07-16 12:04:32 +0000328 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000329 quote = '\'';
330 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
331 quote = '"';
332
333 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 for (i = 0; i < op->ob_size; i++) {
335 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000336 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000338 else if (c == '\t')
339 fprintf(fp, "\\t");
340 else if (c == '\n')
341 fprintf(fp, "\\n");
342 else if (c == '\r')
343 fprintf(fp, "\\r");
344 else if (c < ' ' || c >= 0x7f)
345 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000346 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000347 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000348 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000349 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000350 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000351}
352
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000353static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000354string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000355{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000356 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
357 PyObject *v;
358 if (newsize > INT_MAX) {
359 PyErr_SetString(PyExc_OverflowError,
360 "string is too large to make repr");
361 }
362 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000363 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000364 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000365 }
366 else {
367 register int i;
368 register char c;
369 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000370 int quote;
371
Thomas Wouters7e474022000-07-16 12:04:32 +0000372 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000373 quote = '\'';
374 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
375 quote = '"';
376
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000377 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000378 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000379 for (i = 0; i < op->ob_size; i++) {
380 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000381 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000382 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000383 else if (c == '\t')
384 *p++ = '\\', *p++ = 't';
385 else if (c == '\n')
386 *p++ = '\\', *p++ = 'n';
387 else if (c == '\r')
388 *p++ = '\\', *p++ = 'r';
389 else if (c < ' ' || c >= 0x7f) {
390 sprintf(p, "\\x%02x", c & 0xff);
391 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000392 }
393 else
394 *p++ = c;
395 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000396 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000397 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000398 _PyString_Resize(
399 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000400 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000401 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402}
403
404static int
Fred Drakeba096332000-07-09 07:04:36 +0000405string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000406{
407 return a->ob_size;
408}
409
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000410static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000411string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412{
413 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000414 register PyStringObject *op;
415 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000416 if (PyUnicode_Check(bb))
417 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000418 PyErr_Format(PyExc_TypeError,
419 "cannot add type \"%.200s\" to string",
420 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000421 return NULL;
422 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000423#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000424 /* Optimize cases with empty left or right operand */
425 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000426 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000427 return bb;
428 }
429 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000430 Py_INCREF(a);
431 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000432 }
433 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000434 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000435 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000436 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000437 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000438 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000439 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000440#ifdef CACHE_HASH
441 op->ob_shash = -1;
442#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000443#ifdef INTERN_STRINGS
444 op->ob_sinterned = NULL;
445#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000446 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
447 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
448 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000449 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450#undef b
451}
452
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000453static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000454string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000455{
456 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000457 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000458 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000459 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000460 if (n < 0)
461 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000462 /* watch out for overflows: the size can overflow int,
463 * and the # of bytes needed can overflow size_t
464 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000465 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000466 if (n && size / n != a->ob_size) {
467 PyErr_SetString(PyExc_OverflowError,
468 "repeated string is too long");
469 return NULL;
470 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000471 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000472 Py_INCREF(a);
473 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000474 }
Tim Peters8f422462000-09-09 06:13:41 +0000475 nbytes = size * sizeof(char);
476 if (nbytes / sizeof(char) != (size_t)size ||
477 nbytes + sizeof(PyStringObject) <= nbytes) {
478 PyErr_SetString(PyExc_OverflowError,
479 "repeated string is too long");
480 return NULL;
481 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000482 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000483 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000484 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000485 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000486 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000487#ifdef CACHE_HASH
488 op->ob_shash = -1;
489#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000490#ifdef INTERN_STRINGS
491 op->ob_sinterned = NULL;
492#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000493 for (i = 0; i < size; i += a->ob_size)
494 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
495 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000496 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000497}
498
499/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
500
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000501static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000502string_slice(register PyStringObject *a, register int i, register int j)
503 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504{
505 if (i < 0)
506 i = 0;
507 if (j < 0)
508 j = 0; /* Avoid signed/unsigned bug in next line */
509 if (j > a->ob_size)
510 j = a->ob_size;
511 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000512 Py_INCREF(a);
513 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514 }
515 if (j < i)
516 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000517 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000518}
519
Guido van Rossum9284a572000-03-07 15:53:43 +0000520static int
Fred Drakeba096332000-07-09 07:04:36 +0000521string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000522{
523 register char *s, *end;
524 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000525 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000526 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000527 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000528 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000529 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000530 return -1;
531 }
532 c = PyString_AsString(el)[0];
533 s = PyString_AsString(a);
534 end = s + PyString_Size(a);
535 while (s < end) {
536 if (c == *s++)
537 return 1;
538 }
539 return 0;
540}
541
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000542static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000543string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000544{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000545 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000546 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000547 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000548 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000549 return NULL;
550 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000551 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000552 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000553#ifdef COUNT_ALLOCS
554 if (v != NULL)
555 one_strings++;
556#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000557 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000558 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000559 if (v == NULL)
560 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000561 characters[c] = (PyStringObject *) v;
562 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000563 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000564 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000565 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566}
567
568static int
Fred Drakeba096332000-07-09 07:04:36 +0000569string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000570{
Guido van Rossum253919f1991-02-13 23:18:39 +0000571 int len_a = a->ob_size, len_b = b->ob_size;
572 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000573 int cmp;
574 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000575 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000576 if (cmp == 0)
577 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
578 if (cmp != 0)
579 return cmp;
580 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000581 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000582}
583
Guido van Rossum9bfef441993-03-29 10:43:31 +0000584static long
Fred Drakeba096332000-07-09 07:04:36 +0000585string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000586{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000587 register int len;
588 register unsigned char *p;
589 register long x;
590
591#ifdef CACHE_HASH
592 if (a->ob_shash != -1)
593 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000594#ifdef INTERN_STRINGS
595 if (a->ob_sinterned != NULL)
596 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000597 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000598#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000599#endif
600 len = a->ob_size;
601 p = (unsigned char *) a->ob_sval;
602 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000603 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000604 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000605 x ^= a->ob_size;
606 if (x == -1)
607 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000608#ifdef CACHE_HASH
609 a->ob_shash = x;
610#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000611 return x;
612}
613
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000614static int
Fred Drakeba096332000-07-09 07:04:36 +0000615string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000616{
617 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000618 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000619 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000620 return -1;
621 }
622 *ptr = (void *)self->ob_sval;
623 return self->ob_size;
624}
625
626static int
Fred Drakeba096332000-07-09 07:04:36 +0000627string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000628{
Guido van Rossum045e6881997-09-08 18:30:11 +0000629 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000630 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000631 return -1;
632}
633
634static int
Fred Drakeba096332000-07-09 07:04:36 +0000635string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000636{
637 if ( lenp )
638 *lenp = self->ob_size;
639 return 1;
640}
641
Guido van Rossum1db70701998-10-08 02:18:52 +0000642static int
Fred Drakeba096332000-07-09 07:04:36 +0000643string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000644{
645 if ( index != 0 ) {
646 PyErr_SetString(PyExc_SystemError,
647 "accessing non-existent string segment");
648 return -1;
649 }
650 *ptr = self->ob_sval;
651 return self->ob_size;
652}
653
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000654static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000655 (inquiry)string_length, /*sq_length*/
656 (binaryfunc)string_concat, /*sq_concat*/
657 (intargfunc)string_repeat, /*sq_repeat*/
658 (intargfunc)string_item, /*sq_item*/
659 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000660 0, /*sq_ass_item*/
661 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000662 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000663};
664
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000665static PyBufferProcs string_as_buffer = {
666 (getreadbufferproc)string_buffer_getreadbuf,
667 (getwritebufferproc)string_buffer_getwritebuf,
668 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000669 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000670};
671
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000672
673
674#define LEFTSTRIP 0
675#define RIGHTSTRIP 1
676#define BOTHSTRIP 2
677
678
679static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000680split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000681{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000682 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000683 PyObject* item;
684 PyObject *list = PyList_New(0);
685
686 if (list == NULL)
687 return NULL;
688
Guido van Rossum4c08d552000-03-10 22:55:18 +0000689 for (i = j = 0; i < len; ) {
690 while (i < len && isspace(Py_CHARMASK(s[i])))
691 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000692 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000693 while (i < len && !isspace(Py_CHARMASK(s[i])))
694 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000695 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000696 if (maxsplit-- <= 0)
697 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000698 item = PyString_FromStringAndSize(s+j, (int)(i-j));
699 if (item == NULL)
700 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000701 err = PyList_Append(list, item);
702 Py_DECREF(item);
703 if (err < 0)
704 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000705 while (i < len && isspace(Py_CHARMASK(s[i])))
706 i++;
707 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000708 }
709 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000710 if (j < len) {
711 item = PyString_FromStringAndSize(s+j, (int)(len - j));
712 if (item == NULL)
713 goto finally;
714 err = PyList_Append(list, item);
715 Py_DECREF(item);
716 if (err < 0)
717 goto finally;
718 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000719 return list;
720 finally:
721 Py_DECREF(list);
722 return NULL;
723}
724
725
726static char split__doc__[] =
727"S.split([sep [,maxsplit]]) -> list of strings\n\
728\n\
729Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000730delimiter string. If maxsplit is given, at most maxsplit\n\
731splits are done. If sep is not specified, any whitespace string\n\
732is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000733
734static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000735string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000736{
737 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000738 int maxsplit = -1;
739 const char *s = PyString_AS_STRING(self), *sub;
740 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000741
Guido van Rossum4c08d552000-03-10 22:55:18 +0000742 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000743 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000744 if (maxsplit < 0)
745 maxsplit = INT_MAX;
746 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000747 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000748 if (PyString_Check(subobj)) {
749 sub = PyString_AS_STRING(subobj);
750 n = PyString_GET_SIZE(subobj);
751 }
752 else if (PyUnicode_Check(subobj))
753 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
754 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
755 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000756 if (n == 0) {
757 PyErr_SetString(PyExc_ValueError, "empty separator");
758 return NULL;
759 }
760
761 list = PyList_New(0);
762 if (list == NULL)
763 return NULL;
764
765 i = j = 0;
766 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000767 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000768 if (maxsplit-- <= 0)
769 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000770 item = PyString_FromStringAndSize(s+j, (int)(i-j));
771 if (item == NULL)
772 goto fail;
773 err = PyList_Append(list, item);
774 Py_DECREF(item);
775 if (err < 0)
776 goto fail;
777 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000778 }
779 else
780 i++;
781 }
782 item = PyString_FromStringAndSize(s+j, (int)(len-j));
783 if (item == NULL)
784 goto fail;
785 err = PyList_Append(list, item);
786 Py_DECREF(item);
787 if (err < 0)
788 goto fail;
789
790 return list;
791
792 fail:
793 Py_DECREF(list);
794 return NULL;
795}
796
797
798static char join__doc__[] =
799"S.join(sequence) -> string\n\
800\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000801Return a string which is the concatenation of the strings in the\n\
802sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000803
804static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000805string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000806{
807 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +0000808 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000809 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000810 char *p;
811 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +0000812 size_t sz = 0;
813 int i;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000814 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000815
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000816 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000817 return NULL;
818
Tim Peters19fe14e2001-01-19 03:03:47 +0000819 seq = PySequence_Fast(orig, "");
820 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000821 if (PyErr_ExceptionMatches(PyExc_TypeError))
822 PyErr_Format(PyExc_TypeError,
823 "sequence expected, %.80s found",
824 orig->ob_type->tp_name);
825 return NULL;
826 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000827
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000828 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +0000829 if (seqlen == 0) {
830 Py_DECREF(seq);
831 return PyString_FromString("");
832 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000833 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000834 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +0000835 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
836 PyErr_Format(PyExc_TypeError,
837 "sequence item 0: expected string,"
838 " %.80s found",
839 item->ob_type->tp_name);
840 Py_DECREF(seq);
841 return NULL;
842 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000843 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000844 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000845 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000846 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000847
Tim Peters19fe14e2001-01-19 03:03:47 +0000848 /* There are at least two things to join. Do a pre-pass to figure out
849 * the total amount of space we'll need (sz), see whether any argument
850 * is absurd, and defer to the Unicode join if appropriate.
851 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000852 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +0000853 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000854 item = PySequence_Fast_GET_ITEM(seq, i);
855 if (!PyString_Check(item)){
856 if (PyUnicode_Check(item)) {
Barry Warsaw771d0672000-07-11 04:58:12 +0000857 Py_DECREF(seq);
Guido van Rossum2ccda8a2000-11-27 18:46:26 +0000858 return PyUnicode_Join((PyObject *)self, orig);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000859 }
860 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000861 "sequence item %i: expected string,"
862 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000863 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +0000864 Py_DECREF(seq);
865 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000866 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000867 sz += PyString_GET_SIZE(item);
868 if (i != 0)
869 sz += seplen;
870 if (sz < old_sz || sz > INT_MAX) {
871 PyErr_SetString(PyExc_OverflowError,
872 "join() is too long for a Python string");
873 Py_DECREF(seq);
874 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000875 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000876 }
877
878 /* Allocate result space. */
879 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
880 if (res == NULL) {
881 Py_DECREF(seq);
882 return NULL;
883 }
884
885 /* Catenate everything. */
886 p = PyString_AS_STRING(res);
887 for (i = 0; i < seqlen; ++i) {
888 size_t n;
889 item = PySequence_Fast_GET_ITEM(seq, i);
890 n = PyString_GET_SIZE(item);
891 memcpy(p, PyString_AS_STRING(item), n);
892 p += n;
893 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000894 memcpy(p, sep, seplen);
895 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000896 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000897 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000898
Jeremy Hylton49048292000-07-11 03:28:17 +0000899 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000900 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000901}
902
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000903static long
Fred Drakeba096332000-07-09 07:04:36 +0000904string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000905{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000906 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000907 int len = PyString_GET_SIZE(self);
908 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000909 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000910
Guido van Rossumc6821402000-05-08 14:08:05 +0000911 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
912 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000913 return -2;
914 if (PyString_Check(subobj)) {
915 sub = PyString_AS_STRING(subobj);
916 n = PyString_GET_SIZE(subobj);
917 }
918 else if (PyUnicode_Check(subobj))
919 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
920 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000921 return -2;
922
923 if (last > len)
924 last = len;
925 if (last < 0)
926 last += len;
927 if (last < 0)
928 last = 0;
929 if (i < 0)
930 i += len;
931 if (i < 0)
932 i = 0;
933
Guido van Rossum4c08d552000-03-10 22:55:18 +0000934 if (dir > 0) {
935 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000936 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000937 last -= n;
938 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000939 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000940 return (long)i;
941 }
942 else {
943 int j;
944
945 if (n == 0 && i <= last)
946 return (long)last;
947 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000948 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000949 return (long)j;
950 }
951
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000952 return -1;
953}
954
955
956static char find__doc__[] =
957"S.find(sub [,start [,end]]) -> int\n\
958\n\
959Return the lowest index in S where substring sub is found,\n\
960such that sub is contained within s[start,end]. Optional\n\
961arguments start and end are interpreted as in slice notation.\n\
962\n\
963Return -1 on failure.";
964
965static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000966string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000967{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000968 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000969 if (result == -2)
970 return NULL;
971 return PyInt_FromLong(result);
972}
973
974
975static char index__doc__[] =
976"S.index(sub [,start [,end]]) -> int\n\
977\n\
978Like S.find() but raise ValueError when the substring is not found.";
979
980static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000981string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000982{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000983 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000984 if (result == -2)
985 return NULL;
986 if (result == -1) {
987 PyErr_SetString(PyExc_ValueError,
988 "substring not found in string.index");
989 return NULL;
990 }
991 return PyInt_FromLong(result);
992}
993
994
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000995static char rfind__doc__[] =
996"S.rfind(sub [,start [,end]]) -> int\n\
997\n\
998Return the highest index in S where substring sub is found,\n\
999such that sub is contained within s[start,end]. Optional\n\
1000arguments start and end are interpreted as in slice notation.\n\
1001\n\
1002Return -1 on failure.";
1003
1004static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001005string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001006{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001007 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001008 if (result == -2)
1009 return NULL;
1010 return PyInt_FromLong(result);
1011}
1012
1013
1014static char rindex__doc__[] =
1015"S.rindex(sub [,start [,end]]) -> int\n\
1016\n\
1017Like S.rfind() but raise ValueError when the substring is not found.";
1018
1019static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001020string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001021{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001022 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001023 if (result == -2)
1024 return NULL;
1025 if (result == -1) {
1026 PyErr_SetString(PyExc_ValueError,
1027 "substring not found in string.rindex");
1028 return NULL;
1029 }
1030 return PyInt_FromLong(result);
1031}
1032
1033
1034static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001035do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001036{
1037 char *s = PyString_AS_STRING(self);
1038 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001039
Guido van Rossum43713e52000-02-29 13:59:29 +00001040 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001041 return NULL;
1042
1043 i = 0;
1044 if (striptype != RIGHTSTRIP) {
1045 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1046 i++;
1047 }
1048 }
1049
1050 j = len;
1051 if (striptype != LEFTSTRIP) {
1052 do {
1053 j--;
1054 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1055 j++;
1056 }
1057
1058 if (i == 0 && j == len) {
1059 Py_INCREF(self);
1060 return (PyObject*)self;
1061 }
1062 else
1063 return PyString_FromStringAndSize(s+i, j-i);
1064}
1065
1066
1067static char strip__doc__[] =
1068"S.strip() -> string\n\
1069\n\
1070Return a copy of the string S with leading and trailing\n\
1071whitespace removed.";
1072
1073static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001074string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001075{
1076 return do_strip(self, args, BOTHSTRIP);
1077}
1078
1079
1080static char lstrip__doc__[] =
1081"S.lstrip() -> string\n\
1082\n\
1083Return a copy of the string S with leading whitespace removed.";
1084
1085static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001086string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001087{
1088 return do_strip(self, args, LEFTSTRIP);
1089}
1090
1091
1092static char rstrip__doc__[] =
1093"S.rstrip() -> string\n\
1094\n\
1095Return a copy of the string S with trailing whitespace removed.";
1096
1097static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001098string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001099{
1100 return do_strip(self, args, RIGHTSTRIP);
1101}
1102
1103
1104static char lower__doc__[] =
1105"S.lower() -> string\n\
1106\n\
1107Return a copy of the string S converted to lowercase.";
1108
1109static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001110string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001111{
1112 char *s = PyString_AS_STRING(self), *s_new;
1113 int i, n = PyString_GET_SIZE(self);
1114 PyObject *new;
1115
Guido van Rossum43713e52000-02-29 13:59:29 +00001116 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001117 return NULL;
1118 new = PyString_FromStringAndSize(NULL, n);
1119 if (new == NULL)
1120 return NULL;
1121 s_new = PyString_AsString(new);
1122 for (i = 0; i < n; i++) {
1123 int c = Py_CHARMASK(*s++);
1124 if (isupper(c)) {
1125 *s_new = tolower(c);
1126 } else
1127 *s_new = c;
1128 s_new++;
1129 }
1130 return new;
1131}
1132
1133
1134static char upper__doc__[] =
1135"S.upper() -> string\n\
1136\n\
1137Return a copy of the string S converted to uppercase.";
1138
1139static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001140string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001141{
1142 char *s = PyString_AS_STRING(self), *s_new;
1143 int i, n = PyString_GET_SIZE(self);
1144 PyObject *new;
1145
Guido van Rossum43713e52000-02-29 13:59:29 +00001146 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001147 return NULL;
1148 new = PyString_FromStringAndSize(NULL, n);
1149 if (new == NULL)
1150 return NULL;
1151 s_new = PyString_AsString(new);
1152 for (i = 0; i < n; i++) {
1153 int c = Py_CHARMASK(*s++);
1154 if (islower(c)) {
1155 *s_new = toupper(c);
1156 } else
1157 *s_new = c;
1158 s_new++;
1159 }
1160 return new;
1161}
1162
1163
Guido van Rossum4c08d552000-03-10 22:55:18 +00001164static char title__doc__[] =
1165"S.title() -> string\n\
1166\n\
1167Return a titlecased version of S, i.e. words start with uppercase\n\
1168characters, all remaining cased characters have lowercase.";
1169
1170static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001171string_title(PyStringObject *self, PyObject *args)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001172{
1173 char *s = PyString_AS_STRING(self), *s_new;
1174 int i, n = PyString_GET_SIZE(self);
1175 int previous_is_cased = 0;
1176 PyObject *new;
1177
1178 if (!PyArg_ParseTuple(args, ":title"))
1179 return NULL;
1180 new = PyString_FromStringAndSize(NULL, n);
1181 if (new == NULL)
1182 return NULL;
1183 s_new = PyString_AsString(new);
1184 for (i = 0; i < n; i++) {
1185 int c = Py_CHARMASK(*s++);
1186 if (islower(c)) {
1187 if (!previous_is_cased)
1188 c = toupper(c);
1189 previous_is_cased = 1;
1190 } else if (isupper(c)) {
1191 if (previous_is_cased)
1192 c = tolower(c);
1193 previous_is_cased = 1;
1194 } else
1195 previous_is_cased = 0;
1196 *s_new++ = c;
1197 }
1198 return new;
1199}
1200
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001201static char capitalize__doc__[] =
1202"S.capitalize() -> string\n\
1203\n\
1204Return a copy of the string S with only its first character\n\
1205capitalized.";
1206
1207static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001208string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001209{
1210 char *s = PyString_AS_STRING(self), *s_new;
1211 int i, n = PyString_GET_SIZE(self);
1212 PyObject *new;
1213
Guido van Rossum43713e52000-02-29 13:59:29 +00001214 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001215 return NULL;
1216 new = PyString_FromStringAndSize(NULL, n);
1217 if (new == NULL)
1218 return NULL;
1219 s_new = PyString_AsString(new);
1220 if (0 < n) {
1221 int c = Py_CHARMASK(*s++);
1222 if (islower(c))
1223 *s_new = toupper(c);
1224 else
1225 *s_new = c;
1226 s_new++;
1227 }
1228 for (i = 1; i < n; i++) {
1229 int c = Py_CHARMASK(*s++);
1230 if (isupper(c))
1231 *s_new = tolower(c);
1232 else
1233 *s_new = c;
1234 s_new++;
1235 }
1236 return new;
1237}
1238
1239
1240static char count__doc__[] =
1241"S.count(sub[, start[, end]]) -> int\n\
1242\n\
1243Return the number of occurrences of substring sub in string\n\
1244S[start:end]. Optional arguments start and end are\n\
1245interpreted as in slice notation.";
1246
1247static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001248string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001249{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001250 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001251 int len = PyString_GET_SIZE(self), n;
1252 int i = 0, last = INT_MAX;
1253 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001254 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001255
Guido van Rossumc6821402000-05-08 14:08:05 +00001256 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1257 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001258 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001259
Guido van Rossum4c08d552000-03-10 22:55:18 +00001260 if (PyString_Check(subobj)) {
1261 sub = PyString_AS_STRING(subobj);
1262 n = PyString_GET_SIZE(subobj);
1263 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001264 else if (PyUnicode_Check(subobj)) {
1265 int count;
1266 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1267 if (count == -1)
1268 return NULL;
1269 else
1270 return PyInt_FromLong((long) count);
1271 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001272 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1273 return NULL;
1274
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001275 if (last > len)
1276 last = len;
1277 if (last < 0)
1278 last += len;
1279 if (last < 0)
1280 last = 0;
1281 if (i < 0)
1282 i += len;
1283 if (i < 0)
1284 i = 0;
1285 m = last + 1 - n;
1286 if (n == 0)
1287 return PyInt_FromLong((long) (m-i));
1288
1289 r = 0;
1290 while (i < m) {
1291 if (!memcmp(s+i, sub, n)) {
1292 r++;
1293 i += n;
1294 } else {
1295 i++;
1296 }
1297 }
1298 return PyInt_FromLong((long) r);
1299}
1300
1301
1302static char swapcase__doc__[] =
1303"S.swapcase() -> string\n\
1304\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001305Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001306converted to lowercase and vice versa.";
1307
1308static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001309string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310{
1311 char *s = PyString_AS_STRING(self), *s_new;
1312 int i, n = PyString_GET_SIZE(self);
1313 PyObject *new;
1314
Guido van Rossum43713e52000-02-29 13:59:29 +00001315 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001316 return NULL;
1317 new = PyString_FromStringAndSize(NULL, n);
1318 if (new == NULL)
1319 return NULL;
1320 s_new = PyString_AsString(new);
1321 for (i = 0; i < n; i++) {
1322 int c = Py_CHARMASK(*s++);
1323 if (islower(c)) {
1324 *s_new = toupper(c);
1325 }
1326 else if (isupper(c)) {
1327 *s_new = tolower(c);
1328 }
1329 else
1330 *s_new = c;
1331 s_new++;
1332 }
1333 return new;
1334}
1335
1336
1337static char translate__doc__[] =
1338"S.translate(table [,deletechars]) -> string\n\
1339\n\
1340Return a copy of the string S, where all characters occurring\n\
1341in the optional argument deletechars are removed, and the\n\
1342remaining characters have been mapped through the given\n\
1343translation table, which must be a string of length 256.";
1344
1345static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001346string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001348 register char *input, *output;
1349 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001350 register int i, c, changed = 0;
1351 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001352 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001353 int inlen, tablen, dellen = 0;
1354 PyObject *result;
1355 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001356 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001357
Guido van Rossum4c08d552000-03-10 22:55:18 +00001358 if (!PyArg_ParseTuple(args, "O|O:translate",
1359 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001360 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001361
1362 if (PyString_Check(tableobj)) {
1363 table1 = PyString_AS_STRING(tableobj);
1364 tablen = PyString_GET_SIZE(tableobj);
1365 }
1366 else if (PyUnicode_Check(tableobj)) {
1367 /* Unicode .translate() does not support the deletechars
1368 parameter; instead a mapping to None will cause characters
1369 to be deleted. */
1370 if (delobj != NULL) {
1371 PyErr_SetString(PyExc_TypeError,
1372 "deletions are implemented differently for unicode");
1373 return NULL;
1374 }
1375 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1376 }
1377 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001379
1380 if (delobj != NULL) {
1381 if (PyString_Check(delobj)) {
1382 del_table = PyString_AS_STRING(delobj);
1383 dellen = PyString_GET_SIZE(delobj);
1384 }
1385 else if (PyUnicode_Check(delobj)) {
1386 PyErr_SetString(PyExc_TypeError,
1387 "deletions are implemented differently for unicode");
1388 return NULL;
1389 }
1390 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1391 return NULL;
1392
1393 if (tablen != 256) {
1394 PyErr_SetString(PyExc_ValueError,
1395 "translation table must be 256 characters long");
1396 return NULL;
1397 }
1398 }
1399 else {
1400 del_table = NULL;
1401 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 }
1403
1404 table = table1;
1405 inlen = PyString_Size(input_obj);
1406 result = PyString_FromStringAndSize((char *)NULL, inlen);
1407 if (result == NULL)
1408 return NULL;
1409 output_start = output = PyString_AsString(result);
1410 input = PyString_AsString(input_obj);
1411
1412 if (dellen == 0) {
1413 /* If no deletions are required, use faster code */
1414 for (i = inlen; --i >= 0; ) {
1415 c = Py_CHARMASK(*input++);
1416 if (Py_CHARMASK((*output++ = table[c])) != c)
1417 changed = 1;
1418 }
1419 if (changed)
1420 return result;
1421 Py_DECREF(result);
1422 Py_INCREF(input_obj);
1423 return input_obj;
1424 }
1425
1426 for (i = 0; i < 256; i++)
1427 trans_table[i] = Py_CHARMASK(table[i]);
1428
1429 for (i = 0; i < dellen; i++)
1430 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1431
1432 for (i = inlen; --i >= 0; ) {
1433 c = Py_CHARMASK(*input++);
1434 if (trans_table[c] != -1)
1435 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1436 continue;
1437 changed = 1;
1438 }
1439 if (!changed) {
1440 Py_DECREF(result);
1441 Py_INCREF(input_obj);
1442 return input_obj;
1443 }
1444 /* Fix the size of the resulting string */
1445 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1446 return NULL;
1447 return result;
1448}
1449
1450
1451/* What follows is used for implementing replace(). Perry Stoll. */
1452
1453/*
1454 mymemfind
1455
1456 strstr replacement for arbitrary blocks of memory.
1457
Barry Warsaw51ac5802000-03-20 16:36:48 +00001458 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459 contents of memory pointed to by PAT. Returns the index into MEM if
1460 found, or -1 if not found. If len of PAT is greater than length of
1461 MEM, the function returns -1.
1462*/
1463static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001464mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001465{
1466 register int ii;
1467
1468 /* pattern can not occur in the last pat_len-1 chars */
1469 len -= pat_len;
1470
1471 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001472 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001473 return ii;
1474 }
1475 }
1476 return -1;
1477}
1478
1479/*
1480 mymemcnt
1481
1482 Return the number of distinct times PAT is found in MEM.
1483 meaning mem=1111 and pat==11 returns 2.
1484 mem=11111 and pat==11 also return 2.
1485 */
1486static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001487mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488{
1489 register int offset = 0;
1490 int nfound = 0;
1491
1492 while (len >= 0) {
1493 offset = mymemfind(mem, len, pat, pat_len);
1494 if (offset == -1)
1495 break;
1496 mem += offset + pat_len;
1497 len -= offset + pat_len;
1498 nfound++;
1499 }
1500 return nfound;
1501}
1502
1503/*
1504 mymemreplace
1505
Thomas Wouters7e474022000-07-16 12:04:32 +00001506 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507 replaced with SUB.
1508
Thomas Wouters7e474022000-07-16 12:04:32 +00001509 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001510 of PAT in STR, then the original string is returned. Otherwise, a new
1511 string is allocated here and returned.
1512
1513 on return, out_len is:
1514 the length of output string, or
1515 -1 if the input string is returned, or
1516 unchanged if an error occurs (no memory).
1517
1518 return value is:
1519 the new string allocated locally, or
1520 NULL if an error occurred.
1521*/
1522static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001523mymemreplace(const char *str, int len, /* input string */
1524 const char *pat, int pat_len, /* pattern string to find */
1525 const char *sub, int sub_len, /* substitution string */
1526 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001527 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001528{
1529 char *out_s;
1530 char *new_s;
1531 int nfound, offset, new_len;
1532
1533 if (len == 0 || pat_len > len)
1534 goto return_same;
1535
1536 /* find length of output string */
1537 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001538 if (count < 0)
1539 count = INT_MAX;
1540 else if (nfound > count)
1541 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542 if (nfound == 0)
1543 goto return_same;
1544 new_len = len + nfound*(sub_len - pat_len);
1545
Guido van Rossumb18618d2000-05-03 23:44:39 +00001546 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001547 if (new_s == NULL) return NULL;
1548
1549 *out_len = new_len;
1550 out_s = new_s;
1551
1552 while (len > 0) {
1553 /* find index of next instance of pattern */
1554 offset = mymemfind(str, len, pat, pat_len);
1555 /* if not found, break out of loop */
1556 if (offset == -1) break;
1557
1558 /* copy non matching part of input string */
1559 memcpy(new_s, str, offset); /* copy part of str before pat */
1560 str += offset + pat_len; /* move str past pattern */
1561 len -= offset + pat_len; /* reduce length of str remaining */
1562
1563 /* copy substitute into the output string */
1564 new_s += offset; /* move new_s to dest for sub string */
1565 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1566 new_s += sub_len; /* offset new_s past sub string */
1567
1568 /* break when we've done count replacements */
1569 if (--count == 0) break;
1570 }
1571 /* copy any remaining values into output string */
1572 if (len > 0)
1573 memcpy(new_s, str, len);
1574 return out_s;
1575
1576 return_same:
1577 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001578 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579}
1580
1581
1582static char replace__doc__[] =
1583"S.replace (old, new[, maxsplit]) -> string\n\
1584\n\
1585Return a copy of string S with all occurrences of substring\n\
1586old replaced by new. If the optional argument maxsplit is\n\
1587given, only the first maxsplit occurrences are replaced.";
1588
1589static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001590string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001591{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001592 const char *str = PyString_AS_STRING(self), *sub, *repl;
1593 char *new_s;
1594 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1595 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001596 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001597 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598
Guido van Rossum4c08d552000-03-10 22:55:18 +00001599 if (!PyArg_ParseTuple(args, "OO|i:replace",
1600 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001601 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001602
1603 if (PyString_Check(subobj)) {
1604 sub = PyString_AS_STRING(subobj);
1605 sub_len = PyString_GET_SIZE(subobj);
1606 }
1607 else if (PyUnicode_Check(subobj))
1608 return PyUnicode_Replace((PyObject *)self,
1609 subobj, replobj, count);
1610 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1611 return NULL;
1612
1613 if (PyString_Check(replobj)) {
1614 repl = PyString_AS_STRING(replobj);
1615 repl_len = PyString_GET_SIZE(replobj);
1616 }
1617 else if (PyUnicode_Check(replobj))
1618 return PyUnicode_Replace((PyObject *)self,
1619 subobj, replobj, count);
1620 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1621 return NULL;
1622
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001623 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001624 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625 return NULL;
1626 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001627 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628 if (new_s == NULL) {
1629 PyErr_NoMemory();
1630 return NULL;
1631 }
1632 if (out_len == -1) {
1633 /* we're returning another reference to self */
1634 new = (PyObject*)self;
1635 Py_INCREF(new);
1636 }
1637 else {
1638 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001639 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640 }
1641 return new;
1642}
1643
1644
1645static char startswith__doc__[] =
1646"S.startswith(prefix[, start[, end]]) -> int\n\
1647\n\
1648Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1649optional start, test S beginning at that position. With optional end, stop\n\
1650comparing S at that position.";
1651
1652static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001653string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001655 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001657 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658 int plen;
1659 int start = 0;
1660 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001661 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001662
Guido van Rossumc6821402000-05-08 14:08:05 +00001663 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1664 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001665 return NULL;
1666 if (PyString_Check(subobj)) {
1667 prefix = PyString_AS_STRING(subobj);
1668 plen = PyString_GET_SIZE(subobj);
1669 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001670 else if (PyUnicode_Check(subobj)) {
1671 int rc;
1672 rc = PyUnicode_Tailmatch((PyObject *)self,
1673 subobj, start, end, -1);
1674 if (rc == -1)
1675 return NULL;
1676 else
1677 return PyInt_FromLong((long) rc);
1678 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001679 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001680 return NULL;
1681
1682 /* adopt Java semantics for index out of range. it is legal for
1683 * offset to be == plen, but this only returns true if prefix is
1684 * the empty string.
1685 */
1686 if (start < 0 || start+plen > len)
1687 return PyInt_FromLong(0);
1688
1689 if (!memcmp(str+start, prefix, plen)) {
1690 /* did the match end after the specified end? */
1691 if (end < 0)
1692 return PyInt_FromLong(1);
1693 else if (end - start < plen)
1694 return PyInt_FromLong(0);
1695 else
1696 return PyInt_FromLong(1);
1697 }
1698 else return PyInt_FromLong(0);
1699}
1700
1701
1702static char endswith__doc__[] =
1703"S.endswith(suffix[, start[, end]]) -> int\n\
1704\n\
1705Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1706optional start, test S beginning at that position. With optional end, stop\n\
1707comparing S at that position.";
1708
1709static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001710string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001711{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001712 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001713 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001714 const char* suffix;
1715 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001716 int start = 0;
1717 int end = -1;
1718 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001719 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720
Guido van Rossumc6821402000-05-08 14:08:05 +00001721 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1722 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001723 return NULL;
1724 if (PyString_Check(subobj)) {
1725 suffix = PyString_AS_STRING(subobj);
1726 slen = PyString_GET_SIZE(subobj);
1727 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001728 else if (PyUnicode_Check(subobj)) {
1729 int rc;
1730 rc = PyUnicode_Tailmatch((PyObject *)self,
1731 subobj, start, end, +1);
1732 if (rc == -1)
1733 return NULL;
1734 else
1735 return PyInt_FromLong((long) rc);
1736 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001737 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738 return NULL;
1739
Guido van Rossum4c08d552000-03-10 22:55:18 +00001740 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741 return PyInt_FromLong(0);
1742
1743 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001744 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745
Guido van Rossum4c08d552000-03-10 22:55:18 +00001746 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747 return PyInt_FromLong(1);
1748 else return PyInt_FromLong(0);
1749}
1750
1751
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001752static char encode__doc__[] =
1753"S.encode([encoding[,errors]]) -> string\n\
1754\n\
1755Return an encoded string version of S. Default encoding is the current\n\
1756default string encoding. errors may be given to set a different error\n\
1757handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1758a ValueError. Other possible values are 'ignore' and 'replace'.";
1759
1760static PyObject *
1761string_encode(PyStringObject *self, PyObject *args)
1762{
1763 char *encoding = NULL;
1764 char *errors = NULL;
1765 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1766 return NULL;
1767 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1768}
1769
1770
Guido van Rossum4c08d552000-03-10 22:55:18 +00001771static char expandtabs__doc__[] =
1772"S.expandtabs([tabsize]) -> string\n\
1773\n\
1774Return a copy of S where all tab characters are expanded using spaces.\n\
1775If tabsize is not given, a tab size of 8 characters is assumed.";
1776
1777static PyObject*
1778string_expandtabs(PyStringObject *self, PyObject *args)
1779{
1780 const char *e, *p;
1781 char *q;
1782 int i, j;
1783 PyObject *u;
1784 int tabsize = 8;
1785
1786 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1787 return NULL;
1788
Thomas Wouters7e474022000-07-16 12:04:32 +00001789 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001790 i = j = 0;
1791 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1792 for (p = PyString_AS_STRING(self); p < e; p++)
1793 if (*p == '\t') {
1794 if (tabsize > 0)
1795 j += tabsize - (j % tabsize);
1796 }
1797 else {
1798 j++;
1799 if (*p == '\n' || *p == '\r') {
1800 i += j;
1801 j = 0;
1802 }
1803 }
1804
1805 /* Second pass: create output string and fill it */
1806 u = PyString_FromStringAndSize(NULL, i + j);
1807 if (!u)
1808 return NULL;
1809
1810 j = 0;
1811 q = PyString_AS_STRING(u);
1812
1813 for (p = PyString_AS_STRING(self); p < e; p++)
1814 if (*p == '\t') {
1815 if (tabsize > 0) {
1816 i = tabsize - (j % tabsize);
1817 j += i;
1818 while (i--)
1819 *q++ = ' ';
1820 }
1821 }
1822 else {
1823 j++;
1824 *q++ = *p;
1825 if (*p == '\n' || *p == '\r')
1826 j = 0;
1827 }
1828
1829 return u;
1830}
1831
1832static
1833PyObject *pad(PyStringObject *self,
1834 int left,
1835 int right,
1836 char fill)
1837{
1838 PyObject *u;
1839
1840 if (left < 0)
1841 left = 0;
1842 if (right < 0)
1843 right = 0;
1844
1845 if (left == 0 && right == 0) {
1846 Py_INCREF(self);
1847 return (PyObject *)self;
1848 }
1849
1850 u = PyString_FromStringAndSize(NULL,
1851 left + PyString_GET_SIZE(self) + right);
1852 if (u) {
1853 if (left)
1854 memset(PyString_AS_STRING(u), fill, left);
1855 memcpy(PyString_AS_STRING(u) + left,
1856 PyString_AS_STRING(self),
1857 PyString_GET_SIZE(self));
1858 if (right)
1859 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1860 fill, right);
1861 }
1862
1863 return u;
1864}
1865
1866static char ljust__doc__[] =
1867"S.ljust(width) -> string\n\
1868\n\
1869Return S left justified in a string of length width. Padding is\n\
1870done using spaces.";
1871
1872static PyObject *
1873string_ljust(PyStringObject *self, PyObject *args)
1874{
1875 int width;
1876 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1877 return NULL;
1878
1879 if (PyString_GET_SIZE(self) >= width) {
1880 Py_INCREF(self);
1881 return (PyObject*) self;
1882 }
1883
1884 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1885}
1886
1887
1888static char rjust__doc__[] =
1889"S.rjust(width) -> string\n\
1890\n\
1891Return S right justified in a string of length width. Padding is\n\
1892done using spaces.";
1893
1894static PyObject *
1895string_rjust(PyStringObject *self, PyObject *args)
1896{
1897 int width;
1898 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1899 return NULL;
1900
1901 if (PyString_GET_SIZE(self) >= width) {
1902 Py_INCREF(self);
1903 return (PyObject*) self;
1904 }
1905
1906 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1907}
1908
1909
1910static char center__doc__[] =
1911"S.center(width) -> string\n\
1912\n\
1913Return S centered in a string of length width. Padding is done\n\
1914using spaces.";
1915
1916static PyObject *
1917string_center(PyStringObject *self, PyObject *args)
1918{
1919 int marg, left;
1920 int width;
1921
1922 if (!PyArg_ParseTuple(args, "i:center", &width))
1923 return NULL;
1924
1925 if (PyString_GET_SIZE(self) >= width) {
1926 Py_INCREF(self);
1927 return (PyObject*) self;
1928 }
1929
1930 marg = width - PyString_GET_SIZE(self);
1931 left = marg / 2 + (marg & width & 1);
1932
1933 return pad(self, left, marg - left, ' ');
1934}
1935
1936#if 0
1937static char zfill__doc__[] =
1938"S.zfill(width) -> string\n\
1939\n\
1940Pad a numeric string x with zeros on the left, to fill a field\n\
1941of the specified width. The string x is never truncated.";
1942
1943static PyObject *
1944string_zfill(PyStringObject *self, PyObject *args)
1945{
1946 int fill;
1947 PyObject *u;
1948 char *str;
1949
1950 int width;
1951 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1952 return NULL;
1953
1954 if (PyString_GET_SIZE(self) >= width) {
1955 Py_INCREF(self);
1956 return (PyObject*) self;
1957 }
1958
1959 fill = width - PyString_GET_SIZE(self);
1960
1961 u = pad(self, fill, 0, '0');
1962 if (u == NULL)
1963 return NULL;
1964
1965 str = PyString_AS_STRING(u);
1966 if (str[fill] == '+' || str[fill] == '-') {
1967 /* move sign to beginning of string */
1968 str[0] = str[fill];
1969 str[fill] = '0';
1970 }
1971
1972 return u;
1973}
1974#endif
1975
1976static char isspace__doc__[] =
1977"S.isspace() -> int\n\
1978\n\
1979Return 1 if there are only whitespace characters in S,\n\
19800 otherwise.";
1981
1982static PyObject*
1983string_isspace(PyStringObject *self, PyObject *args)
1984{
Fred Drakeba096332000-07-09 07:04:36 +00001985 register const unsigned char *p
1986 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001987 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001988
1989 if (!PyArg_NoArgs(args))
1990 return NULL;
1991
1992 /* Shortcut for single character strings */
1993 if (PyString_GET_SIZE(self) == 1 &&
1994 isspace(*p))
1995 return PyInt_FromLong(1);
1996
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001997 /* Special case for empty strings */
1998 if (PyString_GET_SIZE(self) == 0)
1999 return PyInt_FromLong(0);
2000
Guido van Rossum4c08d552000-03-10 22:55:18 +00002001 e = p + PyString_GET_SIZE(self);
2002 for (; p < e; p++) {
2003 if (!isspace(*p))
2004 return PyInt_FromLong(0);
2005 }
2006 return PyInt_FromLong(1);
2007}
2008
2009
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002010static char isalpha__doc__[] =
2011"S.isalpha() -> int\n\
2012\n\
2013Return 1 if all characters in S are alphabetic\n\
2014and there is at least one character in S, 0 otherwise.";
2015
2016static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002017string_isalpha(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002018{
Fred Drakeba096332000-07-09 07:04:36 +00002019 register const unsigned char *p
2020 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002021 register const unsigned char *e;
2022
2023 if (!PyArg_NoArgs(args))
2024 return NULL;
2025
2026 /* Shortcut for single character strings */
2027 if (PyString_GET_SIZE(self) == 1 &&
2028 isalpha(*p))
2029 return PyInt_FromLong(1);
2030
2031 /* Special case for empty strings */
2032 if (PyString_GET_SIZE(self) == 0)
2033 return PyInt_FromLong(0);
2034
2035 e = p + PyString_GET_SIZE(self);
2036 for (; p < e; p++) {
2037 if (!isalpha(*p))
2038 return PyInt_FromLong(0);
2039 }
2040 return PyInt_FromLong(1);
2041}
2042
2043
2044static char isalnum__doc__[] =
2045"S.isalnum() -> int\n\
2046\n\
2047Return 1 if all characters in S are alphanumeric\n\
2048and there is at least one character in S, 0 otherwise.";
2049
2050static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002051string_isalnum(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002052{
Fred Drakeba096332000-07-09 07:04:36 +00002053 register const unsigned char *p
2054 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002055 register const unsigned char *e;
2056
2057 if (!PyArg_NoArgs(args))
2058 return NULL;
2059
2060 /* Shortcut for single character strings */
2061 if (PyString_GET_SIZE(self) == 1 &&
2062 isalnum(*p))
2063 return PyInt_FromLong(1);
2064
2065 /* Special case for empty strings */
2066 if (PyString_GET_SIZE(self) == 0)
2067 return PyInt_FromLong(0);
2068
2069 e = p + PyString_GET_SIZE(self);
2070 for (; p < e; p++) {
2071 if (!isalnum(*p))
2072 return PyInt_FromLong(0);
2073 }
2074 return PyInt_FromLong(1);
2075}
2076
2077
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078static char isdigit__doc__[] =
2079"S.isdigit() -> int\n\
2080\n\
2081Return 1 if there are only digit characters in S,\n\
20820 otherwise.";
2083
2084static PyObject*
2085string_isdigit(PyStringObject *self, PyObject *args)
2086{
Fred Drakeba096332000-07-09 07:04:36 +00002087 register const unsigned char *p
2088 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002089 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002090
2091 if (!PyArg_NoArgs(args))
2092 return NULL;
2093
2094 /* Shortcut for single character strings */
2095 if (PyString_GET_SIZE(self) == 1 &&
2096 isdigit(*p))
2097 return PyInt_FromLong(1);
2098
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002099 /* Special case for empty strings */
2100 if (PyString_GET_SIZE(self) == 0)
2101 return PyInt_FromLong(0);
2102
Guido van Rossum4c08d552000-03-10 22:55:18 +00002103 e = p + PyString_GET_SIZE(self);
2104 for (; p < e; p++) {
2105 if (!isdigit(*p))
2106 return PyInt_FromLong(0);
2107 }
2108 return PyInt_FromLong(1);
2109}
2110
2111
2112static char islower__doc__[] =
2113"S.islower() -> int\n\
2114\n\
2115Return 1 if all cased characters in S are lowercase and there is\n\
2116at least one cased character in S, 0 otherwise.";
2117
2118static PyObject*
2119string_islower(PyStringObject *self, PyObject *args)
2120{
Fred Drakeba096332000-07-09 07:04:36 +00002121 register const unsigned char *p
2122 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002123 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002124 int cased;
2125
2126 if (!PyArg_NoArgs(args))
2127 return NULL;
2128
2129 /* Shortcut for single character strings */
2130 if (PyString_GET_SIZE(self) == 1)
2131 return PyInt_FromLong(islower(*p) != 0);
2132
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002133 /* Special case for empty strings */
2134 if (PyString_GET_SIZE(self) == 0)
2135 return PyInt_FromLong(0);
2136
Guido van Rossum4c08d552000-03-10 22:55:18 +00002137 e = p + PyString_GET_SIZE(self);
2138 cased = 0;
2139 for (; p < e; p++) {
2140 if (isupper(*p))
2141 return PyInt_FromLong(0);
2142 else if (!cased && islower(*p))
2143 cased = 1;
2144 }
2145 return PyInt_FromLong(cased);
2146}
2147
2148
2149static char isupper__doc__[] =
2150"S.isupper() -> int\n\
2151\n\
2152Return 1 if all cased characters in S are uppercase and there is\n\
2153at least one cased character in S, 0 otherwise.";
2154
2155static PyObject*
2156string_isupper(PyStringObject *self, PyObject *args)
2157{
Fred Drakeba096332000-07-09 07:04:36 +00002158 register const unsigned char *p
2159 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002160 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002161 int cased;
2162
2163 if (!PyArg_NoArgs(args))
2164 return NULL;
2165
2166 /* Shortcut for single character strings */
2167 if (PyString_GET_SIZE(self) == 1)
2168 return PyInt_FromLong(isupper(*p) != 0);
2169
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002170 /* Special case for empty strings */
2171 if (PyString_GET_SIZE(self) == 0)
2172 return PyInt_FromLong(0);
2173
Guido van Rossum4c08d552000-03-10 22:55:18 +00002174 e = p + PyString_GET_SIZE(self);
2175 cased = 0;
2176 for (; p < e; p++) {
2177 if (islower(*p))
2178 return PyInt_FromLong(0);
2179 else if (!cased && isupper(*p))
2180 cased = 1;
2181 }
2182 return PyInt_FromLong(cased);
2183}
2184
2185
2186static char istitle__doc__[] =
2187"S.istitle() -> int\n\
2188\n\
2189Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2190may only follow uncased characters and lowercase characters only cased\n\
2191ones. Return 0 otherwise.";
2192
2193static PyObject*
2194string_istitle(PyStringObject *self, PyObject *args)
2195{
Fred Drakeba096332000-07-09 07:04:36 +00002196 register const unsigned char *p
2197 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002198 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002199 int cased, previous_is_cased;
2200
2201 if (!PyArg_NoArgs(args))
2202 return NULL;
2203
2204 /* Shortcut for single character strings */
2205 if (PyString_GET_SIZE(self) == 1)
2206 return PyInt_FromLong(isupper(*p) != 0);
2207
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002208 /* Special case for empty strings */
2209 if (PyString_GET_SIZE(self) == 0)
2210 return PyInt_FromLong(0);
2211
Guido van Rossum4c08d552000-03-10 22:55:18 +00002212 e = p + PyString_GET_SIZE(self);
2213 cased = 0;
2214 previous_is_cased = 0;
2215 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002216 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002217
2218 if (isupper(ch)) {
2219 if (previous_is_cased)
2220 return PyInt_FromLong(0);
2221 previous_is_cased = 1;
2222 cased = 1;
2223 }
2224 else if (islower(ch)) {
2225 if (!previous_is_cased)
2226 return PyInt_FromLong(0);
2227 previous_is_cased = 1;
2228 cased = 1;
2229 }
2230 else
2231 previous_is_cased = 0;
2232 }
2233 return PyInt_FromLong(cased);
2234}
2235
2236
2237static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002238"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002239\n\
2240Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002241Line breaks are not included in the resulting list unless keepends\n\
2242is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002243
2244#define SPLIT_APPEND(data, left, right) \
2245 str = PyString_FromStringAndSize(data + left, right - left); \
2246 if (!str) \
2247 goto onError; \
2248 if (PyList_Append(list, str)) { \
2249 Py_DECREF(str); \
2250 goto onError; \
2251 } \
2252 else \
2253 Py_DECREF(str);
2254
2255static PyObject*
2256string_splitlines(PyStringObject *self, PyObject *args)
2257{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002258 register int i;
2259 register int j;
2260 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002261 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262 PyObject *list;
2263 PyObject *str;
2264 char *data;
2265
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002266 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002267 return NULL;
2268
2269 data = PyString_AS_STRING(self);
2270 len = PyString_GET_SIZE(self);
2271
Guido van Rossum4c08d552000-03-10 22:55:18 +00002272 list = PyList_New(0);
2273 if (!list)
2274 goto onError;
2275
2276 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002277 int eol;
2278
Guido van Rossum4c08d552000-03-10 22:55:18 +00002279 /* Find a line and append it */
2280 while (i < len && data[i] != '\n' && data[i] != '\r')
2281 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002282
2283 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002284 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002285 if (i < len) {
2286 if (data[i] == '\r' && i + 1 < len &&
2287 data[i+1] == '\n')
2288 i += 2;
2289 else
2290 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002291 if (keepends)
2292 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002294 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295 j = i;
2296 }
2297 if (j < len) {
2298 SPLIT_APPEND(data, j, len);
2299 }
2300
2301 return list;
2302
2303 onError:
2304 Py_DECREF(list);
2305 return NULL;
2306}
2307
2308#undef SPLIT_APPEND
2309
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310
2311static PyMethodDef
2312string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002313 /* Counterparts of the obsolete stropmodule functions; except
2314 string.maketrans(). */
2315 {"join", (PyCFunction)string_join, 1, join__doc__},
2316 {"split", (PyCFunction)string_split, 1, split__doc__},
2317 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2318 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2319 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2320 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2321 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2322 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2323 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002324 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2325 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002326 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2327 {"count", (PyCFunction)string_count, 1, count__doc__},
2328 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2329 {"find", (PyCFunction)string_find, 1, find__doc__},
2330 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2333 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2334 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2335 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2337 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2338 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002339 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2340 {"title", (PyCFunction)string_title, 1, title__doc__},
2341 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2342 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2343 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002344 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002345 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2346 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2347#if 0
2348 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2349#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002350 {NULL, NULL} /* sentinel */
2351};
2352
2353static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002354string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355{
2356 return Py_FindMethod(string_methods, (PyObject*)s, name);
2357}
2358
2359
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002360PyTypeObject PyString_Type = {
2361 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002362 0,
2363 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002364 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002365 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002366 (destructor)string_dealloc, /*tp_dealloc*/
2367 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002368 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002369 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002370 (cmpfunc)string_compare, /*tp_compare*/
2371 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002372 0, /*tp_as_number*/
2373 &string_as_sequence, /*tp_as_sequence*/
2374 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002375 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002376 0, /*tp_call*/
2377 0, /*tp_str*/
2378 0, /*tp_getattro*/
2379 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002380 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002381 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002382 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002383};
2384
2385void
Fred Drakeba096332000-07-09 07:04:36 +00002386PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002387{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002388 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002389 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002390 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002391 if (w == NULL || !PyString_Check(*pv)) {
2392 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002393 *pv = NULL;
2394 return;
2395 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002396 v = string_concat((PyStringObject *) *pv, w);
2397 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002398 *pv = v;
2399}
2400
Guido van Rossum013142a1994-08-30 08:19:36 +00002401void
Fred Drakeba096332000-07-09 07:04:36 +00002402PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002403{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002404 PyString_Concat(pv, w);
2405 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002406}
2407
2408
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002409/* The following function breaks the notion that strings are immutable:
2410 it changes the size of a string. We get away with this only if there
2411 is only one module referencing the object. You can also think of it
2412 as creating a new string object and destroying the old one, only
2413 more efficiently. In any case, don't use this if the string may
2414 already be known to some other part of the code... */
2415
2416int
Fred Drakeba096332000-07-09 07:04:36 +00002417_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002418{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002419 register PyObject *v;
2420 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002421 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002422 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002423 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002424 Py_DECREF(v);
2425 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002426 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002427 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002428 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002429#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002430 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002431#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002432 _Py_ForgetReference(v);
2433 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002434 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002435 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002436 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002437 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002438 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002439 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002440 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002441 _Py_NewReference(*pv);
2442 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002443 sv->ob_size = newsize;
2444 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002445 return 0;
2446}
Guido van Rossume5372401993-03-16 12:15:04 +00002447
2448/* Helpers for formatstring */
2449
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002450static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002451getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002452{
2453 int argidx = *p_argidx;
2454 if (argidx < arglen) {
2455 (*p_argidx)++;
2456 if (arglen < 0)
2457 return args;
2458 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002459 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002460 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002461 PyErr_SetString(PyExc_TypeError,
2462 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002463 return NULL;
2464}
2465
Tim Peters38fd5b62000-09-21 05:43:11 +00002466/* Format codes
2467 * F_LJUST '-'
2468 * F_SIGN '+'
2469 * F_BLANK ' '
2470 * F_ALT '#'
2471 * F_ZERO '0'
2472 */
Guido van Rossume5372401993-03-16 12:15:04 +00002473#define F_LJUST (1<<0)
2474#define F_SIGN (1<<1)
2475#define F_BLANK (1<<2)
2476#define F_ALT (1<<3)
2477#define F_ZERO (1<<4)
2478
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002479static int
Fred Drakeba096332000-07-09 07:04:36 +00002480formatfloat(char *buf, size_t buflen, int flags,
2481 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002482{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002483 /* fmt = '%#.' + `prec` + `type`
2484 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002485 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002486 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002487 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002488 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002489 if (prec < 0)
2490 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002491 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2492 type = 'g';
2493 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002494 /* worst case length calc to ensure no buffer overrun:
2495 fmt = %#.<prec>g
2496 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2497 for any double rep.)
2498 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2499 If prec=0 the effective precision is 1 (the leading digit is
2500 always given), therefore increase by one to 10+prec. */
2501 if (buflen <= (size_t)10 + (size_t)prec) {
2502 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002503 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002504 return -1;
2505 }
Guido van Rossume5372401993-03-16 12:15:04 +00002506 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002507 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002508}
2509
Tim Peters38fd5b62000-09-21 05:43:11 +00002510/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2511 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2512 * Python's regular ints.
2513 * Return value: a new PyString*, or NULL if error.
2514 * . *pbuf is set to point into it,
2515 * *plen set to the # of chars following that.
2516 * Caller must decref it when done using pbuf.
2517 * The string starting at *pbuf is of the form
2518 * "-"? ("0x" | "0X")? digit+
2519 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2520 * set in flags. The case of hex digits will be correct,
2521 * There will be at least prec digits, zero-filled on the left if
2522 * necessary to get that many.
2523 * val object to be converted
2524 * flags bitmask of format flags; only F_ALT is looked at
2525 * prec minimum number of digits; 0-fill on left if needed
2526 * type a character in [duoxX]; u acts the same as d
2527 *
2528 * CAUTION: o, x and X conversions on regular ints can never
2529 * produce a '-' sign, but can for Python's unbounded ints.
2530 */
2531PyObject*
2532_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2533 char **pbuf, int *plen)
2534{
2535 PyObject *result = NULL;
2536 char *buf;
2537 int i;
2538 int sign; /* 1 if '-', else 0 */
2539 int len; /* number of characters */
2540 int numdigits; /* len == numnondigits + numdigits */
2541 int numnondigits = 0;
2542
2543 switch (type) {
2544 case 'd':
2545 case 'u':
2546 result = val->ob_type->tp_str(val);
2547 break;
2548 case 'o':
2549 result = val->ob_type->tp_as_number->nb_oct(val);
2550 break;
2551 case 'x':
2552 case 'X':
2553 numnondigits = 2;
2554 result = val->ob_type->tp_as_number->nb_hex(val);
2555 break;
2556 default:
2557 assert(!"'type' not in [duoxX]");
2558 }
2559 if (!result)
2560 return NULL;
2561
2562 /* To modify the string in-place, there can only be one reference. */
2563 if (result->ob_refcnt != 1) {
2564 PyErr_BadInternalCall();
2565 return NULL;
2566 }
2567 buf = PyString_AsString(result);
2568 len = PyString_Size(result);
2569 if (buf[len-1] == 'L') {
2570 --len;
2571 buf[len] = '\0';
2572 }
2573 sign = buf[0] == '-';
2574 numnondigits += sign;
2575 numdigits = len - numnondigits;
2576 assert(numdigits > 0);
2577
2578 /* Get rid of base marker unless F_ALT */
2579 if ((flags & F_ALT) == 0) {
2580 /* Need to skip 0x, 0X or 0. */
2581 int skipped = 0;
2582 switch (type) {
2583 case 'o':
2584 assert(buf[sign] == '0');
2585 /* If 0 is only digit, leave it alone. */
2586 if (numdigits > 1) {
2587 skipped = 1;
2588 --numdigits;
2589 }
2590 break;
2591 case 'x':
2592 case 'X':
2593 assert(buf[sign] == '0');
2594 assert(buf[sign + 1] == 'x');
2595 skipped = 2;
2596 numnondigits -= 2;
2597 break;
2598 }
2599 if (skipped) {
2600 buf += skipped;
2601 len -= skipped;
2602 if (sign)
2603 buf[0] = '-';
2604 }
2605 assert(len == numnondigits + numdigits);
2606 assert(numdigits > 0);
2607 }
2608
2609 /* Fill with leading zeroes to meet minimum width. */
2610 if (prec > numdigits) {
2611 PyObject *r1 = PyString_FromStringAndSize(NULL,
2612 numnondigits + prec);
2613 char *b1;
2614 if (!r1) {
2615 Py_DECREF(result);
2616 return NULL;
2617 }
2618 b1 = PyString_AS_STRING(r1);
2619 for (i = 0; i < numnondigits; ++i)
2620 *b1++ = *buf++;
2621 for (i = 0; i < prec - numdigits; i++)
2622 *b1++ = '0';
2623 for (i = 0; i < numdigits; i++)
2624 *b1++ = *buf++;
2625 *b1 = '\0';
2626 Py_DECREF(result);
2627 result = r1;
2628 buf = PyString_AS_STRING(result);
2629 len = numnondigits + prec;
2630 }
2631
2632 /* Fix up case for hex conversions. */
2633 switch (type) {
2634 case 'x':
2635 /* Need to convert all upper case letters to lower case. */
2636 for (i = 0; i < len; i++)
2637 if (buf[i] >= 'A' && buf[i] <= 'F')
2638 buf[i] += 'a'-'A';
2639 break;
2640 case 'X':
2641 /* Need to convert 0x to 0X (and -0x to -0X). */
2642 if (buf[sign + 1] == 'x')
2643 buf[sign + 1] = 'X';
2644 break;
2645 }
2646 *pbuf = buf;
2647 *plen = len;
2648 return result;
2649}
2650
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002651static int
Fred Drakeba096332000-07-09 07:04:36 +00002652formatint(char *buf, size_t buflen, int flags,
2653 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002654{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002655 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002656 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2657 + 1 + 1 = 24 */
2658 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002659 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002660 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002661 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002662 if (prec < 0)
2663 prec = 1;
2664 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002665 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002666 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002667 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002668 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002669 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002670 return -1;
2671 }
Guido van Rossume5372401993-03-16 12:15:04 +00002672 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002673 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002674}
2675
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002676static int
Fred Drakeba096332000-07-09 07:04:36 +00002677formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002678{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002679 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002680 if (PyString_Check(v)) {
2681 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002682 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002683 }
2684 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002685 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002686 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002687 }
2688 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002689 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002690}
2691
Guido van Rossum013142a1994-08-30 08:19:36 +00002692
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002693/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2694
2695 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2696 chars are formatted. XXX This is a magic number. Each formatting
2697 routine does bounds checking to ensure no overflow, but a better
2698 solution may be to malloc a buffer of appropriate size for each
2699 format. For now, the current solution is sufficient.
2700*/
2701#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002702
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002703PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002704PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002705{
2706 char *fmt, *res;
2707 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002708 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002709 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002710 PyObject *dict = NULL;
2711 if (format == NULL || !PyString_Check(format) || args == NULL) {
2712 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002713 return NULL;
2714 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002715 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002716 fmt = PyString_AsString(format);
2717 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002718 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002719 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002720 if (result == NULL)
2721 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002722 res = PyString_AsString(result);
2723 if (PyTuple_Check(args)) {
2724 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002725 argidx = 0;
2726 }
2727 else {
2728 arglen = -1;
2729 argidx = -2;
2730 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002731 if (args->ob_type->tp_as_mapping)
2732 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002733 while (--fmtcnt >= 0) {
2734 if (*fmt != '%') {
2735 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002736 rescnt = fmtcnt + 100;
2737 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002738 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002739 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002740 res = PyString_AsString(result)
2741 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002742 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002743 }
2744 *res++ = *fmt++;
2745 }
2746 else {
2747 /* Got a format specifier */
2748 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002749 int width = -1;
2750 int prec = -1;
2751 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002752 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002753 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002754 PyObject *v = NULL;
2755 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002756 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002757 int sign;
2758 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002759 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002760 char *fmt_start = fmt;
2761
Guido van Rossumda9c2711996-12-05 21:58:58 +00002762 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002763 if (*fmt == '(') {
2764 char *keystart;
2765 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002766 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002767 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002768
2769 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002770 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002771 "format requires a mapping");
2772 goto error;
2773 }
2774 ++fmt;
2775 --fmtcnt;
2776 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002777 /* Skip over balanced parentheses */
2778 while (pcount > 0 && --fmtcnt >= 0) {
2779 if (*fmt == ')')
2780 --pcount;
2781 else if (*fmt == '(')
2782 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002783 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002784 }
2785 keylen = fmt - keystart - 1;
2786 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002787 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002788 "incomplete format key");
2789 goto error;
2790 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002791 key = PyString_FromStringAndSize(keystart,
2792 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002793 if (key == NULL)
2794 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002795 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002796 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002797 args_owned = 0;
2798 }
2799 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002800 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002801 if (args == NULL) {
2802 goto error;
2803 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002804 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002805 arglen = -1;
2806 argidx = -2;
2807 }
Guido van Rossume5372401993-03-16 12:15:04 +00002808 while (--fmtcnt >= 0) {
2809 switch (c = *fmt++) {
2810 case '-': flags |= F_LJUST; continue;
2811 case '+': flags |= F_SIGN; continue;
2812 case ' ': flags |= F_BLANK; continue;
2813 case '#': flags |= F_ALT; continue;
2814 case '0': flags |= F_ZERO; continue;
2815 }
2816 break;
2817 }
2818 if (c == '*') {
2819 v = getnextarg(args, arglen, &argidx);
2820 if (v == NULL)
2821 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002822 if (!PyInt_Check(v)) {
2823 PyErr_SetString(PyExc_TypeError,
2824 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002825 goto error;
2826 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002827 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002828 if (width < 0) {
2829 flags |= F_LJUST;
2830 width = -width;
2831 }
Guido van Rossume5372401993-03-16 12:15:04 +00002832 if (--fmtcnt >= 0)
2833 c = *fmt++;
2834 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002835 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002836 width = c - '0';
2837 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002838 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002839 if (!isdigit(c))
2840 break;
2841 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002842 PyErr_SetString(
2843 PyExc_ValueError,
2844 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002845 goto error;
2846 }
2847 width = width*10 + (c - '0');
2848 }
2849 }
2850 if (c == '.') {
2851 prec = 0;
2852 if (--fmtcnt >= 0)
2853 c = *fmt++;
2854 if (c == '*') {
2855 v = getnextarg(args, arglen, &argidx);
2856 if (v == NULL)
2857 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002858 if (!PyInt_Check(v)) {
2859 PyErr_SetString(
2860 PyExc_TypeError,
2861 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002862 goto error;
2863 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002864 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002865 if (prec < 0)
2866 prec = 0;
2867 if (--fmtcnt >= 0)
2868 c = *fmt++;
2869 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002870 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002871 prec = c - '0';
2872 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002873 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002874 if (!isdigit(c))
2875 break;
2876 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002877 PyErr_SetString(
2878 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002879 "prec too big");
2880 goto error;
2881 }
2882 prec = prec*10 + (c - '0');
2883 }
2884 }
2885 } /* prec */
2886 if (fmtcnt >= 0) {
2887 if (c == 'h' || c == 'l' || c == 'L') {
2888 size = c;
2889 if (--fmtcnt >= 0)
2890 c = *fmt++;
2891 }
2892 }
2893 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002894 PyErr_SetString(PyExc_ValueError,
2895 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002896 goto error;
2897 }
2898 if (c != '%') {
2899 v = getnextarg(args, arglen, &argidx);
2900 if (v == NULL)
2901 goto error;
2902 }
2903 sign = 0;
2904 fill = ' ';
2905 switch (c) {
2906 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002907 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002908 len = 1;
2909 break;
2910 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002911 case 'r':
2912 if (PyUnicode_Check(v)) {
2913 fmt = fmt_start;
2914 goto unicode;
2915 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002916 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002917 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002918 else
2919 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002920 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002921 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002922 if (!PyString_Check(temp)) {
2923 PyErr_SetString(PyExc_TypeError,
2924 "%s argument has non-string str()");
2925 goto error;
2926 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002927 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002928 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002929 if (prec >= 0 && len > prec)
2930 len = prec;
2931 break;
2932 case 'i':
2933 case 'd':
2934 case 'u':
2935 case 'o':
2936 case 'x':
2937 case 'X':
2938 if (c == 'i')
2939 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00002940 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002941 temp = _PyString_FormatLong(v, flags,
2942 prec, c, &pbuf, &len);
2943 if (!temp)
2944 goto error;
2945 /* unbounded ints can always produce
2946 a sign character! */
2947 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002948 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002949 else {
2950 pbuf = formatbuf;
2951 len = formatint(pbuf, sizeof(formatbuf),
2952 flags, prec, c, v);
2953 if (len < 0)
2954 goto error;
2955 /* only d conversion is signed */
2956 sign = c == 'd';
2957 }
2958 if (flags & F_ZERO)
2959 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00002960 break;
2961 case 'e':
2962 case 'E':
2963 case 'f':
2964 case 'g':
2965 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002966 pbuf = formatbuf;
2967 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002968 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002969 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002970 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00002971 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00002972 fill = '0';
2973 break;
2974 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002975 pbuf = formatbuf;
2976 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002977 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002978 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002979 break;
2980 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002981 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00002982 "unsupported format character '%c' (0x%x) "
2983 "at index %i",
2984 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00002985 goto error;
2986 }
2987 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002988 if (*pbuf == '-' || *pbuf == '+') {
2989 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002990 len--;
2991 }
2992 else if (flags & F_SIGN)
2993 sign = '+';
2994 else if (flags & F_BLANK)
2995 sign = ' ';
2996 else
Tim Peters38fd5b62000-09-21 05:43:11 +00002997 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002998 }
2999 if (width < len)
3000 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003001 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003002 reslen -= rescnt;
3003 rescnt = width + fmtcnt + 100;
3004 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003005 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003006 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003007 res = PyString_AsString(result)
3008 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003009 }
3010 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003011 if (fill != ' ')
3012 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003013 rescnt--;
3014 if (width > len)
3015 width--;
3016 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003017 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3018 assert(pbuf[0] == '0');
3019 assert(pbuf[1] == c);
3020 if (fill != ' ') {
3021 *res++ = *pbuf++;
3022 *res++ = *pbuf++;
3023 }
3024 rescnt -= 2;
3025 width -= 2;
3026 if (width < 0)
3027 width = 0;
3028 len -= 2;
3029 }
3030 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003031 do {
3032 --rescnt;
3033 *res++ = fill;
3034 } while (--width > len);
3035 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003036 if (fill == ' ') {
3037 if (sign)
3038 *res++ = sign;
3039 if ((flags & F_ALT) &&
3040 (c == 'x' || c == 'X')) {
3041 assert(pbuf[0] == '0');
3042 assert(pbuf[1] == c);
3043 *res++ = *pbuf++;
3044 *res++ = *pbuf++;
3045 }
3046 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003047 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003048 res += len;
3049 rescnt -= len;
3050 while (--width >= len) {
3051 --rescnt;
3052 *res++ = ' ';
3053 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003054 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003055 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003056 "not all arguments converted");
3057 goto error;
3058 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003059 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003060 } /* '%' */
3061 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003062 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003063 PyErr_SetString(PyExc_TypeError,
3064 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003065 goto error;
3066 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003067 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003068 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003069 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003070 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003071 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003072
3073 unicode:
3074 if (args_owned) {
3075 Py_DECREF(args);
3076 args_owned = 0;
3077 }
3078 /* Fiddle args right (remove the first argidx-1 arguments) */
3079 --argidx;
3080 if (PyTuple_Check(orig_args) && argidx > 0) {
3081 PyObject *v;
3082 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3083 v = PyTuple_New(n);
3084 if (v == NULL)
3085 goto error;
3086 while (--n >= 0) {
3087 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3088 Py_INCREF(w);
3089 PyTuple_SET_ITEM(v, n, w);
3090 }
3091 args = v;
3092 } else {
3093 Py_INCREF(orig_args);
3094 args = orig_args;
3095 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003096 args_owned = 1;
3097 /* Take what we have of the result and let the Unicode formatting
3098 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003099 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003100 if (_PyString_Resize(&result, rescnt))
3101 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003102 fmtcnt = PyString_GET_SIZE(format) - \
3103 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003104 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3105 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003106 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003107 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003108 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003109 if (v == NULL)
3110 goto error;
3111 /* Paste what we have (result) to what the Unicode formatting
3112 function returned (v) and return the result (or error) */
3113 w = PyUnicode_Concat(result, v);
3114 Py_DECREF(result);
3115 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003116 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003117 return w;
Guido van Rossum90daa872000-04-10 13:47:21 +00003118
Guido van Rossume5372401993-03-16 12:15:04 +00003119 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003120 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003121 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003122 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003123 }
Guido van Rossume5372401993-03-16 12:15:04 +00003124 return NULL;
3125}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003126
3127
3128#ifdef INTERN_STRINGS
3129
Barry Warsaw4df762f2000-08-16 23:41:01 +00003130/* This dictionary will leak at PyString_Fini() time. That's acceptable
3131 * because PyString_Fini() specifically frees interned strings that are
3132 * only referenced by this dictionary. The CVS log entry for revision 2.45
3133 * says:
3134 *
3135 * Change the Fini function to only remove otherwise unreferenced
3136 * strings from the interned table. There are references in
3137 * hard-to-find static variables all over the interpreter, and it's not
3138 * worth trying to get rid of all those; but "uninterning" isn't fair
3139 * either and may cause subtle failures later -- so we have to keep them
3140 * in the interned table.
3141 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003142static PyObject *interned;
3143
3144void
Fred Drakeba096332000-07-09 07:04:36 +00003145PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003146{
3147 register PyStringObject *s = (PyStringObject *)(*p);
3148 PyObject *t;
3149 if (s == NULL || !PyString_Check(s))
3150 Py_FatalError("PyString_InternInPlace: strings only please!");
3151 if ((t = s->ob_sinterned) != NULL) {
3152 if (t == (PyObject *)s)
3153 return;
3154 Py_INCREF(t);
3155 *p = t;
3156 Py_DECREF(s);
3157 return;
3158 }
3159 if (interned == NULL) {
3160 interned = PyDict_New();
3161 if (interned == NULL)
3162 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003163 }
3164 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3165 Py_INCREF(t);
3166 *p = s->ob_sinterned = t;
3167 Py_DECREF(s);
3168 return;
3169 }
3170 t = (PyObject *)s;
3171 if (PyDict_SetItem(interned, t, t) == 0) {
3172 s->ob_sinterned = t;
3173 return;
3174 }
3175 PyErr_Clear();
3176}
3177
3178
3179PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003180PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003181{
3182 PyObject *s = PyString_FromString(cp);
3183 if (s == NULL)
3184 return NULL;
3185 PyString_InternInPlace(&s);
3186 return s;
3187}
3188
3189#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003190
3191void
Fred Drakeba096332000-07-09 07:04:36 +00003192PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003193{
3194 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003195 for (i = 0; i < UCHAR_MAX + 1; i++) {
3196 Py_XDECREF(characters[i]);
3197 characters[i] = NULL;
3198 }
3199#ifndef DONT_SHARE_SHORT_STRINGS
3200 Py_XDECREF(nullstring);
3201 nullstring = NULL;
3202#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003203#ifdef INTERN_STRINGS
3204 if (interned) {
3205 int pos, changed;
3206 PyObject *key, *value;
3207 do {
3208 changed = 0;
3209 pos = 0;
3210 while (PyDict_Next(interned, &pos, &key, &value)) {
3211 if (key->ob_refcnt == 2 && key == value) {
3212 PyDict_DelItem(interned, key);
3213 changed = 1;
3214 }
3215 }
3216 } while (changed);
3217 }
3218#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003219}
Barry Warsawa903ad982001-02-23 16:40:48 +00003220
3221#ifdef INTERN_STRINGS
3222void _Py_ReleaseInternedStrings(void)
3223{
3224 if (interned) {
3225 Py_DECREF(interned);
3226 interned = NULL;
3227 }
3228}
3229#endif /* INTERN_STRINGS */