blob: df3ab492f7b77e9e6d7c8791d87ba8d3f8bf3dd0 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
76 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 } else if (size == 1 && str != NULL) {
79 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000082#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000083 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000084}
85
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000087PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000089 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000091 if (size > INT_MAX) {
92 PyErr_SetString(PyExc_OverflowError,
93 "string is too long for a Python string");
94 return NULL;
95 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0 && (op = nullstring) != NULL) {
98#ifdef COUNT_ALLOCS
99 null_strings++;
100#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 Py_INCREF(op);
102 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 }
104 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
105#ifdef COUNT_ALLOCS
106 one_strings++;
107#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000108 Py_INCREF(op);
109 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000110 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000112
113 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000115 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000116 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119#ifdef CACHE_HASH
120 op->ob_shash = -1;
121#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000122#ifdef INTERN_STRINGS
123 op->ob_sinterned = NULL;
124#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000125 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000126#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 if (size == 0) {
128 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 } else if (size == 1) {
131 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000134#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000135 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136}
137
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000138PyObject *PyString_Decode(const char *s,
139 int size,
140 const char *encoding,
141 const char *errors)
142{
143 PyObject *buffer = NULL, *str;
144
145 if (encoding == NULL)
146 encoding = PyUnicode_GetDefaultEncoding();
147
148 /* Decode via the codec registry */
149 buffer = PyBuffer_FromMemory((void *)s, size);
150 if (buffer == NULL)
151 goto onError;
152 str = PyCodec_Decode(buffer, encoding, errors);
153 if (str == NULL)
154 goto onError;
155 /* Convert Unicode to a string using the default encoding */
156 if (PyUnicode_Check(str)) {
157 PyObject *temp = str;
158 str = PyUnicode_AsEncodedString(str, NULL, NULL);
159 Py_DECREF(temp);
160 if (str == NULL)
161 goto onError;
162 }
163 if (!PyString_Check(str)) {
164 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000165 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000166 str->ob_type->tp_name);
167 Py_DECREF(str);
168 goto onError;
169 }
170 Py_DECREF(buffer);
171 return str;
172
173 onError:
174 Py_XDECREF(buffer);
175 return NULL;
176}
177
178PyObject *PyString_Encode(const char *s,
179 int size,
180 const char *encoding,
181 const char *errors)
182{
183 PyObject *v, *str;
184
185 str = PyString_FromStringAndSize(s, size);
186 if (str == NULL)
187 return NULL;
188 v = PyString_AsEncodedString(str, encoding, errors);
189 Py_DECREF(str);
190 return v;
191}
192
193PyObject *PyString_AsEncodedString(PyObject *str,
194 const char *encoding,
195 const char *errors)
196{
197 PyObject *v;
198
199 if (!PyString_Check(str)) {
200 PyErr_BadArgument();
201 goto onError;
202 }
203
204 if (encoding == NULL)
205 encoding = PyUnicode_GetDefaultEncoding();
206
207 /* Encode via the codec registry */
208 v = PyCodec_Encode(str, encoding, errors);
209 if (v == NULL)
210 goto onError;
211 /* Convert Unicode to a string using the default encoding */
212 if (PyUnicode_Check(v)) {
213 PyObject *temp = v;
214 v = PyUnicode_AsEncodedString(v, NULL, NULL);
215 Py_DECREF(temp);
216 if (v == NULL)
217 goto onError;
218 }
219 if (!PyString_Check(v)) {
220 PyErr_Format(PyExc_TypeError,
221 "encoder did not return a string object (type=%.400s)",
222 v->ob_type->tp_name);
223 Py_DECREF(v);
224 goto onError;
225 }
226 return v;
227
228 onError:
229 return NULL;
230}
231
Guido van Rossum234f9421993-06-17 12:35:49 +0000232static void
Fred Drakeba096332000-07-09 07:04:36 +0000233string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000234{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000235 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000236}
237
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000238static int
239string_getsize(register PyObject *op)
240{
241 char *s;
242 int len;
243 if (PyString_AsStringAndSize(op, &s, &len))
244 return -1;
245 return len;
246}
247
248static /*const*/ char *
249string_getbuffer(register PyObject *op)
250{
251 char *s;
252 int len;
253 if (PyString_AsStringAndSize(op, &s, &len))
254 return NULL;
255 return s;
256}
257
Guido van Rossumd7047b31995-01-02 19:07:15 +0000258int
Fred Drakeba096332000-07-09 07:04:36 +0000259PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000261 if (!PyString_Check(op))
262 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000263 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264}
265
266/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000267PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000268{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000269 if (!PyString_Check(op))
270 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000271 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272}
273
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000274/* Internal API needed by PyString_AsStringAndSize(): */
275extern
276PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
277 const char *errors);
278
279int
280PyString_AsStringAndSize(register PyObject *obj,
281 register char **s,
282 register int *len)
283{
284 if (s == NULL) {
285 PyErr_BadInternalCall();
286 return -1;
287 }
288
289 if (!PyString_Check(obj)) {
290 if (PyUnicode_Check(obj)) {
291 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
292 if (obj == NULL)
293 return -1;
294 }
295 else {
296 PyErr_Format(PyExc_TypeError,
297 "expected string or Unicode object, "
298 "%.200s found", obj->ob_type->tp_name);
299 return -1;
300 }
301 }
302
303 *s = PyString_AS_STRING(obj);
304 if (len != NULL)
305 *len = PyString_GET_SIZE(obj);
306 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
307 PyErr_SetString(PyExc_TypeError,
308 "expected string without null bytes");
309 return -1;
310 }
311 return 0;
312}
313
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000314/* Methods */
315
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000316static int
Fred Drakeba096332000-07-09 07:04:36 +0000317string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318{
319 int i;
320 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000321 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000322 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000323 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000324 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000325 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000327
Thomas Wouters7e474022000-07-16 12:04:32 +0000328 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000329 quote = '\'';
330 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
331 quote = '"';
332
333 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 for (i = 0; i < op->ob_size; i++) {
335 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000336 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 fprintf(fp, "\\%c", c);
338 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000339 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000340 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000341 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000342 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000343 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000344 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000345}
346
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000347static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000348string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000350 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
351 PyObject *v;
352 if (newsize > INT_MAX) {
353 PyErr_SetString(PyExc_OverflowError,
354 "string is too large to make repr");
355 }
356 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000357 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000358 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000359 }
360 else {
361 register int i;
362 register char c;
363 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000364 int quote;
365
Thomas Wouters7e474022000-07-16 12:04:32 +0000366 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000367 quote = '\'';
368 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
369 quote = '"';
370
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000371 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000372 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000373 for (i = 0; i < op->ob_size; i++) {
374 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000375 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000376 *p++ = '\\', *p++ = c;
377 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000378 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000379 while (*p != '\0')
380 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000381 }
382 else
383 *p++ = c;
384 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000385 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000386 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000387 _PyString_Resize(
388 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000389 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000390 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000391}
392
393static int
Fred Drakeba096332000-07-09 07:04:36 +0000394string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000395{
396 return a->ob_size;
397}
398
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000399static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000400string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000401{
402 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000403 register PyStringObject *op;
404 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000405 if (PyUnicode_Check(bb))
406 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000407 PyErr_Format(PyExc_TypeError,
408 "cannot add type \"%.200s\" to string",
409 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 return NULL;
411 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000412#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000413 /* Optimize cases with empty left or right operand */
414 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000416 return bb;
417 }
418 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000419 Py_INCREF(a);
420 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000421 }
422 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000423 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000424 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000425 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000426 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000427 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000428 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000429#ifdef CACHE_HASH
430 op->ob_shash = -1;
431#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000432#ifdef INTERN_STRINGS
433 op->ob_sinterned = NULL;
434#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000435 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
436 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
437 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000438 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000439#undef b
440}
441
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000442static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000443string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000444{
445 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000446 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000447 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000448 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000449 if (n < 0)
450 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000451 /* watch out for overflows: the size can overflow int,
452 * and the # of bytes needed can overflow size_t
453 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000454 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000455 if (n && size / n != a->ob_size) {
456 PyErr_SetString(PyExc_OverflowError,
457 "repeated string is too long");
458 return NULL;
459 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000460 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000461 Py_INCREF(a);
462 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000463 }
Tim Peters8f422462000-09-09 06:13:41 +0000464 nbytes = size * sizeof(char);
465 if (nbytes / sizeof(char) != (size_t)size ||
466 nbytes + sizeof(PyStringObject) <= nbytes) {
467 PyErr_SetString(PyExc_OverflowError,
468 "repeated string is too long");
469 return NULL;
470 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000471 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000472 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000473 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000474 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000475 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000476#ifdef CACHE_HASH
477 op->ob_shash = -1;
478#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000479#ifdef INTERN_STRINGS
480 op->ob_sinterned = NULL;
481#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000482 for (i = 0; i < size; i += a->ob_size)
483 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
484 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000485 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000486}
487
488/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
489
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000490static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000491string_slice(register PyStringObject *a, register int i, register int j)
492 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000493{
494 if (i < 0)
495 i = 0;
496 if (j < 0)
497 j = 0; /* Avoid signed/unsigned bug in next line */
498 if (j > a->ob_size)
499 j = a->ob_size;
500 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000501 Py_INCREF(a);
502 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000503 }
504 if (j < i)
505 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000506 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000507}
508
Guido van Rossum9284a572000-03-07 15:53:43 +0000509static int
Fred Drakeba096332000-07-09 07:04:36 +0000510string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000511{
512 register char *s, *end;
513 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000514 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000515 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000516 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000517 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000518 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000519 return -1;
520 }
521 c = PyString_AsString(el)[0];
522 s = PyString_AsString(a);
523 end = s + PyString_Size(a);
524 while (s < end) {
525 if (c == *s++)
526 return 1;
527 }
528 return 0;
529}
530
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000531static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000532string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000533{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000534 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000535 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000536 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000537 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000538 return NULL;
539 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000540 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000541 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000542#ifdef COUNT_ALLOCS
543 if (v != NULL)
544 one_strings++;
545#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000546 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000547 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000548 if (v == NULL)
549 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000550 characters[c] = (PyStringObject *) v;
551 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000552 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000553 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000554 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000555}
556
557static int
Fred Drakeba096332000-07-09 07:04:36 +0000558string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000559{
Guido van Rossum253919f1991-02-13 23:18:39 +0000560 int len_a = a->ob_size, len_b = b->ob_size;
561 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000562 int cmp;
563 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000564 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000565 if (cmp == 0)
566 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
567 if (cmp != 0)
568 return cmp;
569 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000570 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000571}
572
Guido van Rossum9bfef441993-03-29 10:43:31 +0000573static long
Fred Drakeba096332000-07-09 07:04:36 +0000574string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000575{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000576 register int len;
577 register unsigned char *p;
578 register long x;
579
580#ifdef CACHE_HASH
581 if (a->ob_shash != -1)
582 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000583#ifdef INTERN_STRINGS
584 if (a->ob_sinterned != NULL)
585 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000587#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000588#endif
589 len = a->ob_size;
590 p = (unsigned char *) a->ob_sval;
591 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000592 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000593 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000594 x ^= a->ob_size;
595 if (x == -1)
596 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000597#ifdef CACHE_HASH
598 a->ob_shash = x;
599#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000600 return x;
601}
602
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000603static int
Fred Drakeba096332000-07-09 07:04:36 +0000604string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000605{
606 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000607 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000608 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000609 return -1;
610 }
611 *ptr = (void *)self->ob_sval;
612 return self->ob_size;
613}
614
615static int
Fred Drakeba096332000-07-09 07:04:36 +0000616string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000617{
Guido van Rossum045e6881997-09-08 18:30:11 +0000618 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000619 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000620 return -1;
621}
622
623static int
Fred Drakeba096332000-07-09 07:04:36 +0000624string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000625{
626 if ( lenp )
627 *lenp = self->ob_size;
628 return 1;
629}
630
Guido van Rossum1db70701998-10-08 02:18:52 +0000631static int
Fred Drakeba096332000-07-09 07:04:36 +0000632string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000633{
634 if ( index != 0 ) {
635 PyErr_SetString(PyExc_SystemError,
636 "accessing non-existent string segment");
637 return -1;
638 }
639 *ptr = self->ob_sval;
640 return self->ob_size;
641}
642
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000643static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000644 (inquiry)string_length, /*sq_length*/
645 (binaryfunc)string_concat, /*sq_concat*/
646 (intargfunc)string_repeat, /*sq_repeat*/
647 (intargfunc)string_item, /*sq_item*/
648 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000649 0, /*sq_ass_item*/
650 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000651 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000652};
653
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000654static PyBufferProcs string_as_buffer = {
655 (getreadbufferproc)string_buffer_getreadbuf,
656 (getwritebufferproc)string_buffer_getwritebuf,
657 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000658 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000659};
660
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000661
662
663#define LEFTSTRIP 0
664#define RIGHTSTRIP 1
665#define BOTHSTRIP 2
666
667
668static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000669split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000670{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000671 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000672 PyObject* item;
673 PyObject *list = PyList_New(0);
674
675 if (list == NULL)
676 return NULL;
677
Guido van Rossum4c08d552000-03-10 22:55:18 +0000678 for (i = j = 0; i < len; ) {
679 while (i < len && isspace(Py_CHARMASK(s[i])))
680 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000681 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000682 while (i < len && !isspace(Py_CHARMASK(s[i])))
683 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000684 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000685 if (maxsplit-- <= 0)
686 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000687 item = PyString_FromStringAndSize(s+j, (int)(i-j));
688 if (item == NULL)
689 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000690 err = PyList_Append(list, item);
691 Py_DECREF(item);
692 if (err < 0)
693 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000694 while (i < len && isspace(Py_CHARMASK(s[i])))
695 i++;
696 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000697 }
698 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000699 if (j < len) {
700 item = PyString_FromStringAndSize(s+j, (int)(len - j));
701 if (item == NULL)
702 goto finally;
703 err = PyList_Append(list, item);
704 Py_DECREF(item);
705 if (err < 0)
706 goto finally;
707 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000708 return list;
709 finally:
710 Py_DECREF(list);
711 return NULL;
712}
713
714
715static char split__doc__[] =
716"S.split([sep [,maxsplit]]) -> list of strings\n\
717\n\
718Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000719delimiter string. If maxsplit is given, at most maxsplit\n\
720splits are done. If sep is not specified, any whitespace string\n\
721is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000722
723static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000724string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000725{
726 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000727 int maxsplit = -1;
728 const char *s = PyString_AS_STRING(self), *sub;
729 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000730
Guido van Rossum4c08d552000-03-10 22:55:18 +0000731 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000732 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000733 if (maxsplit < 0)
734 maxsplit = INT_MAX;
735 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000736 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000737 if (PyString_Check(subobj)) {
738 sub = PyString_AS_STRING(subobj);
739 n = PyString_GET_SIZE(subobj);
740 }
741 else if (PyUnicode_Check(subobj))
742 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
743 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
744 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000745 if (n == 0) {
746 PyErr_SetString(PyExc_ValueError, "empty separator");
747 return NULL;
748 }
749
750 list = PyList_New(0);
751 if (list == NULL)
752 return NULL;
753
754 i = j = 0;
755 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000756 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000757 if (maxsplit-- <= 0)
758 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000759 item = PyString_FromStringAndSize(s+j, (int)(i-j));
760 if (item == NULL)
761 goto fail;
762 err = PyList_Append(list, item);
763 Py_DECREF(item);
764 if (err < 0)
765 goto fail;
766 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000767 }
768 else
769 i++;
770 }
771 item = PyString_FromStringAndSize(s+j, (int)(len-j));
772 if (item == NULL)
773 goto fail;
774 err = PyList_Append(list, item);
775 Py_DECREF(item);
776 if (err < 0)
777 goto fail;
778
779 return list;
780
781 fail:
782 Py_DECREF(list);
783 return NULL;
784}
785
786
787static char join__doc__[] =
788"S.join(sequence) -> string\n\
789\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000790Return a string which is the concatenation of the strings in the\n\
791sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000792
793static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000794string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000795{
796 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +0000797 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000798 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000799 char *p;
800 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +0000801 size_t sz = 0;
802 int i;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000803 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000804
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000805 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000806 return NULL;
807
Tim Peters19fe14e2001-01-19 03:03:47 +0000808 seq = PySequence_Fast(orig, "");
809 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000810 if (PyErr_ExceptionMatches(PyExc_TypeError))
811 PyErr_Format(PyExc_TypeError,
812 "sequence expected, %.80s found",
813 orig->ob_type->tp_name);
814 return NULL;
815 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000816
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000817 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +0000818 if (seqlen == 0) {
819 Py_DECREF(seq);
820 return PyString_FromString("");
821 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000822 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000823 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +0000824 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
825 PyErr_Format(PyExc_TypeError,
826 "sequence item 0: expected string,"
827 " %.80s found",
828 item->ob_type->tp_name);
829 Py_DECREF(seq);
830 return NULL;
831 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000832 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000833 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000834 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000835 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000836
Tim Peters19fe14e2001-01-19 03:03:47 +0000837 /* There are at least two things to join. Do a pre-pass to figure out
838 * the total amount of space we'll need (sz), see whether any argument
839 * is absurd, and defer to the Unicode join if appropriate.
840 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000841 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +0000842 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000843 item = PySequence_Fast_GET_ITEM(seq, i);
844 if (!PyString_Check(item)){
845 if (PyUnicode_Check(item)) {
Barry Warsaw771d0672000-07-11 04:58:12 +0000846 Py_DECREF(seq);
Guido van Rossum2ccda8a2000-11-27 18:46:26 +0000847 return PyUnicode_Join((PyObject *)self, orig);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000848 }
849 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000850 "sequence item %i: expected string,"
851 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000852 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +0000853 Py_DECREF(seq);
854 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000855 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000856 sz += PyString_GET_SIZE(item);
857 if (i != 0)
858 sz += seplen;
859 if (sz < old_sz || sz > INT_MAX) {
860 PyErr_SetString(PyExc_OverflowError,
861 "join() is too long for a Python string");
862 Py_DECREF(seq);
863 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000864 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000865 }
866
867 /* Allocate result space. */
868 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
869 if (res == NULL) {
870 Py_DECREF(seq);
871 return NULL;
872 }
873
874 /* Catenate everything. */
875 p = PyString_AS_STRING(res);
876 for (i = 0; i < seqlen; ++i) {
877 size_t n;
878 item = PySequence_Fast_GET_ITEM(seq, i);
879 n = PyString_GET_SIZE(item);
880 memcpy(p, PyString_AS_STRING(item), n);
881 p += n;
882 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000883 memcpy(p, sep, seplen);
884 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000885 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000886 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000887
Jeremy Hylton49048292000-07-11 03:28:17 +0000888 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000889 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000890}
891
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000892static long
Fred Drakeba096332000-07-09 07:04:36 +0000893string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000894{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000895 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000896 int len = PyString_GET_SIZE(self);
897 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000898 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000899
Guido van Rossumc6821402000-05-08 14:08:05 +0000900 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
901 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000902 return -2;
903 if (PyString_Check(subobj)) {
904 sub = PyString_AS_STRING(subobj);
905 n = PyString_GET_SIZE(subobj);
906 }
907 else if (PyUnicode_Check(subobj))
908 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
909 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000910 return -2;
911
912 if (last > len)
913 last = len;
914 if (last < 0)
915 last += len;
916 if (last < 0)
917 last = 0;
918 if (i < 0)
919 i += len;
920 if (i < 0)
921 i = 0;
922
Guido van Rossum4c08d552000-03-10 22:55:18 +0000923 if (dir > 0) {
924 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000925 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000926 last -= n;
927 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000928 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000929 return (long)i;
930 }
931 else {
932 int j;
933
934 if (n == 0 && i <= last)
935 return (long)last;
936 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000937 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000938 return (long)j;
939 }
940
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000941 return -1;
942}
943
944
945static char find__doc__[] =
946"S.find(sub [,start [,end]]) -> int\n\
947\n\
948Return the lowest index in S where substring sub is found,\n\
949such that sub is contained within s[start,end]. Optional\n\
950arguments start and end are interpreted as in slice notation.\n\
951\n\
952Return -1 on failure.";
953
954static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000955string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000956{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000957 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000958 if (result == -2)
959 return NULL;
960 return PyInt_FromLong(result);
961}
962
963
964static char index__doc__[] =
965"S.index(sub [,start [,end]]) -> int\n\
966\n\
967Like S.find() but raise ValueError when the substring is not found.";
968
969static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000970string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000971{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000972 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000973 if (result == -2)
974 return NULL;
975 if (result == -1) {
976 PyErr_SetString(PyExc_ValueError,
977 "substring not found in string.index");
978 return NULL;
979 }
980 return PyInt_FromLong(result);
981}
982
983
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000984static char rfind__doc__[] =
985"S.rfind(sub [,start [,end]]) -> int\n\
986\n\
987Return the highest index in S where substring sub is found,\n\
988such that sub is contained within s[start,end]. Optional\n\
989arguments start and end are interpreted as in slice notation.\n\
990\n\
991Return -1 on failure.";
992
993static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000994string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000995{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000996 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000997 if (result == -2)
998 return NULL;
999 return PyInt_FromLong(result);
1000}
1001
1002
1003static char rindex__doc__[] =
1004"S.rindex(sub [,start [,end]]) -> int\n\
1005\n\
1006Like S.rfind() but raise ValueError when the substring is not found.";
1007
1008static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001009string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001010{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001011 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001012 if (result == -2)
1013 return NULL;
1014 if (result == -1) {
1015 PyErr_SetString(PyExc_ValueError,
1016 "substring not found in string.rindex");
1017 return NULL;
1018 }
1019 return PyInt_FromLong(result);
1020}
1021
1022
1023static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001024do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001025{
1026 char *s = PyString_AS_STRING(self);
1027 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001028
Guido van Rossum43713e52000-02-29 13:59:29 +00001029 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001030 return NULL;
1031
1032 i = 0;
1033 if (striptype != RIGHTSTRIP) {
1034 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1035 i++;
1036 }
1037 }
1038
1039 j = len;
1040 if (striptype != LEFTSTRIP) {
1041 do {
1042 j--;
1043 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1044 j++;
1045 }
1046
1047 if (i == 0 && j == len) {
1048 Py_INCREF(self);
1049 return (PyObject*)self;
1050 }
1051 else
1052 return PyString_FromStringAndSize(s+i, j-i);
1053}
1054
1055
1056static char strip__doc__[] =
1057"S.strip() -> string\n\
1058\n\
1059Return a copy of the string S with leading and trailing\n\
1060whitespace removed.";
1061
1062static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001063string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001064{
1065 return do_strip(self, args, BOTHSTRIP);
1066}
1067
1068
1069static char lstrip__doc__[] =
1070"S.lstrip() -> string\n\
1071\n\
1072Return a copy of the string S with leading whitespace removed.";
1073
1074static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001075string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001076{
1077 return do_strip(self, args, LEFTSTRIP);
1078}
1079
1080
1081static char rstrip__doc__[] =
1082"S.rstrip() -> string\n\
1083\n\
1084Return a copy of the string S with trailing whitespace removed.";
1085
1086static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001087string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001088{
1089 return do_strip(self, args, RIGHTSTRIP);
1090}
1091
1092
1093static char lower__doc__[] =
1094"S.lower() -> string\n\
1095\n\
1096Return a copy of the string S converted to lowercase.";
1097
1098static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001099string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001100{
1101 char *s = PyString_AS_STRING(self), *s_new;
1102 int i, n = PyString_GET_SIZE(self);
1103 PyObject *new;
1104
Guido van Rossum43713e52000-02-29 13:59:29 +00001105 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001106 return NULL;
1107 new = PyString_FromStringAndSize(NULL, n);
1108 if (new == NULL)
1109 return NULL;
1110 s_new = PyString_AsString(new);
1111 for (i = 0; i < n; i++) {
1112 int c = Py_CHARMASK(*s++);
1113 if (isupper(c)) {
1114 *s_new = tolower(c);
1115 } else
1116 *s_new = c;
1117 s_new++;
1118 }
1119 return new;
1120}
1121
1122
1123static char upper__doc__[] =
1124"S.upper() -> string\n\
1125\n\
1126Return a copy of the string S converted to uppercase.";
1127
1128static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001129string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001130{
1131 char *s = PyString_AS_STRING(self), *s_new;
1132 int i, n = PyString_GET_SIZE(self);
1133 PyObject *new;
1134
Guido van Rossum43713e52000-02-29 13:59:29 +00001135 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001136 return NULL;
1137 new = PyString_FromStringAndSize(NULL, n);
1138 if (new == NULL)
1139 return NULL;
1140 s_new = PyString_AsString(new);
1141 for (i = 0; i < n; i++) {
1142 int c = Py_CHARMASK(*s++);
1143 if (islower(c)) {
1144 *s_new = toupper(c);
1145 } else
1146 *s_new = c;
1147 s_new++;
1148 }
1149 return new;
1150}
1151
1152
Guido van Rossum4c08d552000-03-10 22:55:18 +00001153static char title__doc__[] =
1154"S.title() -> string\n\
1155\n\
1156Return a titlecased version of S, i.e. words start with uppercase\n\
1157characters, all remaining cased characters have lowercase.";
1158
1159static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001160string_title(PyStringObject *self, PyObject *args)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001161{
1162 char *s = PyString_AS_STRING(self), *s_new;
1163 int i, n = PyString_GET_SIZE(self);
1164 int previous_is_cased = 0;
1165 PyObject *new;
1166
1167 if (!PyArg_ParseTuple(args, ":title"))
1168 return NULL;
1169 new = PyString_FromStringAndSize(NULL, n);
1170 if (new == NULL)
1171 return NULL;
1172 s_new = PyString_AsString(new);
1173 for (i = 0; i < n; i++) {
1174 int c = Py_CHARMASK(*s++);
1175 if (islower(c)) {
1176 if (!previous_is_cased)
1177 c = toupper(c);
1178 previous_is_cased = 1;
1179 } else if (isupper(c)) {
1180 if (previous_is_cased)
1181 c = tolower(c);
1182 previous_is_cased = 1;
1183 } else
1184 previous_is_cased = 0;
1185 *s_new++ = c;
1186 }
1187 return new;
1188}
1189
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001190static char capitalize__doc__[] =
1191"S.capitalize() -> string\n\
1192\n\
1193Return a copy of the string S with only its first character\n\
1194capitalized.";
1195
1196static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001197string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001198{
1199 char *s = PyString_AS_STRING(self), *s_new;
1200 int i, n = PyString_GET_SIZE(self);
1201 PyObject *new;
1202
Guido van Rossum43713e52000-02-29 13:59:29 +00001203 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001204 return NULL;
1205 new = PyString_FromStringAndSize(NULL, n);
1206 if (new == NULL)
1207 return NULL;
1208 s_new = PyString_AsString(new);
1209 if (0 < n) {
1210 int c = Py_CHARMASK(*s++);
1211 if (islower(c))
1212 *s_new = toupper(c);
1213 else
1214 *s_new = c;
1215 s_new++;
1216 }
1217 for (i = 1; i < n; i++) {
1218 int c = Py_CHARMASK(*s++);
1219 if (isupper(c))
1220 *s_new = tolower(c);
1221 else
1222 *s_new = c;
1223 s_new++;
1224 }
1225 return new;
1226}
1227
1228
1229static char count__doc__[] =
1230"S.count(sub[, start[, end]]) -> int\n\
1231\n\
1232Return the number of occurrences of substring sub in string\n\
1233S[start:end]. Optional arguments start and end are\n\
1234interpreted as in slice notation.";
1235
1236static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001237string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001238{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001239 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001240 int len = PyString_GET_SIZE(self), n;
1241 int i = 0, last = INT_MAX;
1242 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001243 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001244
Guido van Rossumc6821402000-05-08 14:08:05 +00001245 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1246 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001247 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001248
Guido van Rossum4c08d552000-03-10 22:55:18 +00001249 if (PyString_Check(subobj)) {
1250 sub = PyString_AS_STRING(subobj);
1251 n = PyString_GET_SIZE(subobj);
1252 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001253 else if (PyUnicode_Check(subobj)) {
1254 int count;
1255 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1256 if (count == -1)
1257 return NULL;
1258 else
1259 return PyInt_FromLong((long) count);
1260 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001261 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1262 return NULL;
1263
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001264 if (last > len)
1265 last = len;
1266 if (last < 0)
1267 last += len;
1268 if (last < 0)
1269 last = 0;
1270 if (i < 0)
1271 i += len;
1272 if (i < 0)
1273 i = 0;
1274 m = last + 1 - n;
1275 if (n == 0)
1276 return PyInt_FromLong((long) (m-i));
1277
1278 r = 0;
1279 while (i < m) {
1280 if (!memcmp(s+i, sub, n)) {
1281 r++;
1282 i += n;
1283 } else {
1284 i++;
1285 }
1286 }
1287 return PyInt_FromLong((long) r);
1288}
1289
1290
1291static char swapcase__doc__[] =
1292"S.swapcase() -> string\n\
1293\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001294Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001295converted to lowercase and vice versa.";
1296
1297static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001298string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299{
1300 char *s = PyString_AS_STRING(self), *s_new;
1301 int i, n = PyString_GET_SIZE(self);
1302 PyObject *new;
1303
Guido van Rossum43713e52000-02-29 13:59:29 +00001304 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001305 return NULL;
1306 new = PyString_FromStringAndSize(NULL, n);
1307 if (new == NULL)
1308 return NULL;
1309 s_new = PyString_AsString(new);
1310 for (i = 0; i < n; i++) {
1311 int c = Py_CHARMASK(*s++);
1312 if (islower(c)) {
1313 *s_new = toupper(c);
1314 }
1315 else if (isupper(c)) {
1316 *s_new = tolower(c);
1317 }
1318 else
1319 *s_new = c;
1320 s_new++;
1321 }
1322 return new;
1323}
1324
1325
1326static char translate__doc__[] =
1327"S.translate(table [,deletechars]) -> string\n\
1328\n\
1329Return a copy of the string S, where all characters occurring\n\
1330in the optional argument deletechars are removed, and the\n\
1331remaining characters have been mapped through the given\n\
1332translation table, which must be a string of length 256.";
1333
1334static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001335string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001337 register char *input, *output;
1338 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001339 register int i, c, changed = 0;
1340 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001341 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001342 int inlen, tablen, dellen = 0;
1343 PyObject *result;
1344 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001345 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001346
Guido van Rossum4c08d552000-03-10 22:55:18 +00001347 if (!PyArg_ParseTuple(args, "O|O:translate",
1348 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001350
1351 if (PyString_Check(tableobj)) {
1352 table1 = PyString_AS_STRING(tableobj);
1353 tablen = PyString_GET_SIZE(tableobj);
1354 }
1355 else if (PyUnicode_Check(tableobj)) {
1356 /* Unicode .translate() does not support the deletechars
1357 parameter; instead a mapping to None will cause characters
1358 to be deleted. */
1359 if (delobj != NULL) {
1360 PyErr_SetString(PyExc_TypeError,
1361 "deletions are implemented differently for unicode");
1362 return NULL;
1363 }
1364 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1365 }
1366 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001368
1369 if (delobj != NULL) {
1370 if (PyString_Check(delobj)) {
1371 del_table = PyString_AS_STRING(delobj);
1372 dellen = PyString_GET_SIZE(delobj);
1373 }
1374 else if (PyUnicode_Check(delobj)) {
1375 PyErr_SetString(PyExc_TypeError,
1376 "deletions are implemented differently for unicode");
1377 return NULL;
1378 }
1379 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1380 return NULL;
1381
1382 if (tablen != 256) {
1383 PyErr_SetString(PyExc_ValueError,
1384 "translation table must be 256 characters long");
1385 return NULL;
1386 }
1387 }
1388 else {
1389 del_table = NULL;
1390 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391 }
1392
1393 table = table1;
1394 inlen = PyString_Size(input_obj);
1395 result = PyString_FromStringAndSize((char *)NULL, inlen);
1396 if (result == NULL)
1397 return NULL;
1398 output_start = output = PyString_AsString(result);
1399 input = PyString_AsString(input_obj);
1400
1401 if (dellen == 0) {
1402 /* If no deletions are required, use faster code */
1403 for (i = inlen; --i >= 0; ) {
1404 c = Py_CHARMASK(*input++);
1405 if (Py_CHARMASK((*output++ = table[c])) != c)
1406 changed = 1;
1407 }
1408 if (changed)
1409 return result;
1410 Py_DECREF(result);
1411 Py_INCREF(input_obj);
1412 return input_obj;
1413 }
1414
1415 for (i = 0; i < 256; i++)
1416 trans_table[i] = Py_CHARMASK(table[i]);
1417
1418 for (i = 0; i < dellen; i++)
1419 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1420
1421 for (i = inlen; --i >= 0; ) {
1422 c = Py_CHARMASK(*input++);
1423 if (trans_table[c] != -1)
1424 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1425 continue;
1426 changed = 1;
1427 }
1428 if (!changed) {
1429 Py_DECREF(result);
1430 Py_INCREF(input_obj);
1431 return input_obj;
1432 }
1433 /* Fix the size of the resulting string */
1434 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1435 return NULL;
1436 return result;
1437}
1438
1439
1440/* What follows is used for implementing replace(). Perry Stoll. */
1441
1442/*
1443 mymemfind
1444
1445 strstr replacement for arbitrary blocks of memory.
1446
Barry Warsaw51ac5802000-03-20 16:36:48 +00001447 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001448 contents of memory pointed to by PAT. Returns the index into MEM if
1449 found, or -1 if not found. If len of PAT is greater than length of
1450 MEM, the function returns -1.
1451*/
1452static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001453mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454{
1455 register int ii;
1456
1457 /* pattern can not occur in the last pat_len-1 chars */
1458 len -= pat_len;
1459
1460 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001461 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001462 return ii;
1463 }
1464 }
1465 return -1;
1466}
1467
1468/*
1469 mymemcnt
1470
1471 Return the number of distinct times PAT is found in MEM.
1472 meaning mem=1111 and pat==11 returns 2.
1473 mem=11111 and pat==11 also return 2.
1474 */
1475static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001476mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001477{
1478 register int offset = 0;
1479 int nfound = 0;
1480
1481 while (len >= 0) {
1482 offset = mymemfind(mem, len, pat, pat_len);
1483 if (offset == -1)
1484 break;
1485 mem += offset + pat_len;
1486 len -= offset + pat_len;
1487 nfound++;
1488 }
1489 return nfound;
1490}
1491
1492/*
1493 mymemreplace
1494
Thomas Wouters7e474022000-07-16 12:04:32 +00001495 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496 replaced with SUB.
1497
Thomas Wouters7e474022000-07-16 12:04:32 +00001498 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 of PAT in STR, then the original string is returned. Otherwise, a new
1500 string is allocated here and returned.
1501
1502 on return, out_len is:
1503 the length of output string, or
1504 -1 if the input string is returned, or
1505 unchanged if an error occurs (no memory).
1506
1507 return value is:
1508 the new string allocated locally, or
1509 NULL if an error occurred.
1510*/
1511static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001512mymemreplace(const char *str, int len, /* input string */
1513 const char *pat, int pat_len, /* pattern string to find */
1514 const char *sub, int sub_len, /* substitution string */
1515 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001516 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001517{
1518 char *out_s;
1519 char *new_s;
1520 int nfound, offset, new_len;
1521
1522 if (len == 0 || pat_len > len)
1523 goto return_same;
1524
1525 /* find length of output string */
1526 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001527 if (count < 0)
1528 count = INT_MAX;
1529 else if (nfound > count)
1530 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531 if (nfound == 0)
1532 goto return_same;
1533 new_len = len + nfound*(sub_len - pat_len);
1534
Guido van Rossumb18618d2000-05-03 23:44:39 +00001535 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001536 if (new_s == NULL) return NULL;
1537
1538 *out_len = new_len;
1539 out_s = new_s;
1540
1541 while (len > 0) {
1542 /* find index of next instance of pattern */
1543 offset = mymemfind(str, len, pat, pat_len);
1544 /* if not found, break out of loop */
1545 if (offset == -1) break;
1546
1547 /* copy non matching part of input string */
1548 memcpy(new_s, str, offset); /* copy part of str before pat */
1549 str += offset + pat_len; /* move str past pattern */
1550 len -= offset + pat_len; /* reduce length of str remaining */
1551
1552 /* copy substitute into the output string */
1553 new_s += offset; /* move new_s to dest for sub string */
1554 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1555 new_s += sub_len; /* offset new_s past sub string */
1556
1557 /* break when we've done count replacements */
1558 if (--count == 0) break;
1559 }
1560 /* copy any remaining values into output string */
1561 if (len > 0)
1562 memcpy(new_s, str, len);
1563 return out_s;
1564
1565 return_same:
1566 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001567 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568}
1569
1570
1571static char replace__doc__[] =
1572"S.replace (old, new[, maxsplit]) -> string\n\
1573\n\
1574Return a copy of string S with all occurrences of substring\n\
1575old replaced by new. If the optional argument maxsplit is\n\
1576given, only the first maxsplit occurrences are replaced.";
1577
1578static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001579string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001580{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001581 const char *str = PyString_AS_STRING(self), *sub, *repl;
1582 char *new_s;
1583 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1584 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001585 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001586 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587
Guido van Rossum4c08d552000-03-10 22:55:18 +00001588 if (!PyArg_ParseTuple(args, "OO|i:replace",
1589 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001591
1592 if (PyString_Check(subobj)) {
1593 sub = PyString_AS_STRING(subobj);
1594 sub_len = PyString_GET_SIZE(subobj);
1595 }
1596 else if (PyUnicode_Check(subobj))
1597 return PyUnicode_Replace((PyObject *)self,
1598 subobj, replobj, count);
1599 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1600 return NULL;
1601
1602 if (PyString_Check(replobj)) {
1603 repl = PyString_AS_STRING(replobj);
1604 repl_len = PyString_GET_SIZE(replobj);
1605 }
1606 else if (PyUnicode_Check(replobj))
1607 return PyUnicode_Replace((PyObject *)self,
1608 subobj, replobj, count);
1609 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1610 return NULL;
1611
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001612 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001613 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614 return NULL;
1615 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001616 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617 if (new_s == NULL) {
1618 PyErr_NoMemory();
1619 return NULL;
1620 }
1621 if (out_len == -1) {
1622 /* we're returning another reference to self */
1623 new = (PyObject*)self;
1624 Py_INCREF(new);
1625 }
1626 else {
1627 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001628 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629 }
1630 return new;
1631}
1632
1633
1634static char startswith__doc__[] =
1635"S.startswith(prefix[, start[, end]]) -> int\n\
1636\n\
1637Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1638optional start, test S beginning at that position. With optional end, stop\n\
1639comparing S at that position.";
1640
1641static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001642string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001644 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001645 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001646 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001647 int plen;
1648 int start = 0;
1649 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001650 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651
Guido van Rossumc6821402000-05-08 14:08:05 +00001652 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1653 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001654 return NULL;
1655 if (PyString_Check(subobj)) {
1656 prefix = PyString_AS_STRING(subobj);
1657 plen = PyString_GET_SIZE(subobj);
1658 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001659 else if (PyUnicode_Check(subobj)) {
1660 int rc;
1661 rc = PyUnicode_Tailmatch((PyObject *)self,
1662 subobj, start, end, -1);
1663 if (rc == -1)
1664 return NULL;
1665 else
1666 return PyInt_FromLong((long) rc);
1667 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001668 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001669 return NULL;
1670
1671 /* adopt Java semantics for index out of range. it is legal for
1672 * offset to be == plen, but this only returns true if prefix is
1673 * the empty string.
1674 */
1675 if (start < 0 || start+plen > len)
1676 return PyInt_FromLong(0);
1677
1678 if (!memcmp(str+start, prefix, plen)) {
1679 /* did the match end after the specified end? */
1680 if (end < 0)
1681 return PyInt_FromLong(1);
1682 else if (end - start < plen)
1683 return PyInt_FromLong(0);
1684 else
1685 return PyInt_FromLong(1);
1686 }
1687 else return PyInt_FromLong(0);
1688}
1689
1690
1691static char endswith__doc__[] =
1692"S.endswith(suffix[, start[, end]]) -> int\n\
1693\n\
1694Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1695optional start, test S beginning at that position. With optional end, stop\n\
1696comparing S at that position.";
1697
1698static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001699string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001700{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001701 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001702 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001703 const char* suffix;
1704 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705 int start = 0;
1706 int end = -1;
1707 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001708 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001709
Guido van Rossumc6821402000-05-08 14:08:05 +00001710 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1711 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001712 return NULL;
1713 if (PyString_Check(subobj)) {
1714 suffix = PyString_AS_STRING(subobj);
1715 slen = PyString_GET_SIZE(subobj);
1716 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001717 else if (PyUnicode_Check(subobj)) {
1718 int rc;
1719 rc = PyUnicode_Tailmatch((PyObject *)self,
1720 subobj, start, end, +1);
1721 if (rc == -1)
1722 return NULL;
1723 else
1724 return PyInt_FromLong((long) rc);
1725 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001726 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727 return NULL;
1728
Guido van Rossum4c08d552000-03-10 22:55:18 +00001729 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730 return PyInt_FromLong(0);
1731
1732 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001733 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001734
Guido van Rossum4c08d552000-03-10 22:55:18 +00001735 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736 return PyInt_FromLong(1);
1737 else return PyInt_FromLong(0);
1738}
1739
1740
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001741static char encode__doc__[] =
1742"S.encode([encoding[,errors]]) -> string\n\
1743\n\
1744Return an encoded string version of S. Default encoding is the current\n\
1745default string encoding. errors may be given to set a different error\n\
1746handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1747a ValueError. Other possible values are 'ignore' and 'replace'.";
1748
1749static PyObject *
1750string_encode(PyStringObject *self, PyObject *args)
1751{
1752 char *encoding = NULL;
1753 char *errors = NULL;
1754 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1755 return NULL;
1756 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1757}
1758
1759
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760static char expandtabs__doc__[] =
1761"S.expandtabs([tabsize]) -> string\n\
1762\n\
1763Return a copy of S where all tab characters are expanded using spaces.\n\
1764If tabsize is not given, a tab size of 8 characters is assumed.";
1765
1766static PyObject*
1767string_expandtabs(PyStringObject *self, PyObject *args)
1768{
1769 const char *e, *p;
1770 char *q;
1771 int i, j;
1772 PyObject *u;
1773 int tabsize = 8;
1774
1775 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1776 return NULL;
1777
Thomas Wouters7e474022000-07-16 12:04:32 +00001778 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001779 i = j = 0;
1780 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1781 for (p = PyString_AS_STRING(self); p < e; p++)
1782 if (*p == '\t') {
1783 if (tabsize > 0)
1784 j += tabsize - (j % tabsize);
1785 }
1786 else {
1787 j++;
1788 if (*p == '\n' || *p == '\r') {
1789 i += j;
1790 j = 0;
1791 }
1792 }
1793
1794 /* Second pass: create output string and fill it */
1795 u = PyString_FromStringAndSize(NULL, i + j);
1796 if (!u)
1797 return NULL;
1798
1799 j = 0;
1800 q = PyString_AS_STRING(u);
1801
1802 for (p = PyString_AS_STRING(self); p < e; p++)
1803 if (*p == '\t') {
1804 if (tabsize > 0) {
1805 i = tabsize - (j % tabsize);
1806 j += i;
1807 while (i--)
1808 *q++ = ' ';
1809 }
1810 }
1811 else {
1812 j++;
1813 *q++ = *p;
1814 if (*p == '\n' || *p == '\r')
1815 j = 0;
1816 }
1817
1818 return u;
1819}
1820
1821static
1822PyObject *pad(PyStringObject *self,
1823 int left,
1824 int right,
1825 char fill)
1826{
1827 PyObject *u;
1828
1829 if (left < 0)
1830 left = 0;
1831 if (right < 0)
1832 right = 0;
1833
1834 if (left == 0 && right == 0) {
1835 Py_INCREF(self);
1836 return (PyObject *)self;
1837 }
1838
1839 u = PyString_FromStringAndSize(NULL,
1840 left + PyString_GET_SIZE(self) + right);
1841 if (u) {
1842 if (left)
1843 memset(PyString_AS_STRING(u), fill, left);
1844 memcpy(PyString_AS_STRING(u) + left,
1845 PyString_AS_STRING(self),
1846 PyString_GET_SIZE(self));
1847 if (right)
1848 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1849 fill, right);
1850 }
1851
1852 return u;
1853}
1854
1855static char ljust__doc__[] =
1856"S.ljust(width) -> string\n\
1857\n\
1858Return S left justified in a string of length width. Padding is\n\
1859done using spaces.";
1860
1861static PyObject *
1862string_ljust(PyStringObject *self, PyObject *args)
1863{
1864 int width;
1865 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1866 return NULL;
1867
1868 if (PyString_GET_SIZE(self) >= width) {
1869 Py_INCREF(self);
1870 return (PyObject*) self;
1871 }
1872
1873 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1874}
1875
1876
1877static char rjust__doc__[] =
1878"S.rjust(width) -> string\n\
1879\n\
1880Return S right justified in a string of length width. Padding is\n\
1881done using spaces.";
1882
1883static PyObject *
1884string_rjust(PyStringObject *self, PyObject *args)
1885{
1886 int width;
1887 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1888 return NULL;
1889
1890 if (PyString_GET_SIZE(self) >= width) {
1891 Py_INCREF(self);
1892 return (PyObject*) self;
1893 }
1894
1895 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1896}
1897
1898
1899static char center__doc__[] =
1900"S.center(width) -> string\n\
1901\n\
1902Return S centered in a string of length width. Padding is done\n\
1903using spaces.";
1904
1905static PyObject *
1906string_center(PyStringObject *self, PyObject *args)
1907{
1908 int marg, left;
1909 int width;
1910
1911 if (!PyArg_ParseTuple(args, "i:center", &width))
1912 return NULL;
1913
1914 if (PyString_GET_SIZE(self) >= width) {
1915 Py_INCREF(self);
1916 return (PyObject*) self;
1917 }
1918
1919 marg = width - PyString_GET_SIZE(self);
1920 left = marg / 2 + (marg & width & 1);
1921
1922 return pad(self, left, marg - left, ' ');
1923}
1924
1925#if 0
1926static char zfill__doc__[] =
1927"S.zfill(width) -> string\n\
1928\n\
1929Pad a numeric string x with zeros on the left, to fill a field\n\
1930of the specified width. The string x is never truncated.";
1931
1932static PyObject *
1933string_zfill(PyStringObject *self, PyObject *args)
1934{
1935 int fill;
1936 PyObject *u;
1937 char *str;
1938
1939 int width;
1940 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1941 return NULL;
1942
1943 if (PyString_GET_SIZE(self) >= width) {
1944 Py_INCREF(self);
1945 return (PyObject*) self;
1946 }
1947
1948 fill = width - PyString_GET_SIZE(self);
1949
1950 u = pad(self, fill, 0, '0');
1951 if (u == NULL)
1952 return NULL;
1953
1954 str = PyString_AS_STRING(u);
1955 if (str[fill] == '+' || str[fill] == '-') {
1956 /* move sign to beginning of string */
1957 str[0] = str[fill];
1958 str[fill] = '0';
1959 }
1960
1961 return u;
1962}
1963#endif
1964
1965static char isspace__doc__[] =
1966"S.isspace() -> int\n\
1967\n\
1968Return 1 if there are only whitespace characters in S,\n\
19690 otherwise.";
1970
1971static PyObject*
1972string_isspace(PyStringObject *self, PyObject *args)
1973{
Fred Drakeba096332000-07-09 07:04:36 +00001974 register const unsigned char *p
1975 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001976 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001977
1978 if (!PyArg_NoArgs(args))
1979 return NULL;
1980
1981 /* Shortcut for single character strings */
1982 if (PyString_GET_SIZE(self) == 1 &&
1983 isspace(*p))
1984 return PyInt_FromLong(1);
1985
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001986 /* Special case for empty strings */
1987 if (PyString_GET_SIZE(self) == 0)
1988 return PyInt_FromLong(0);
1989
Guido van Rossum4c08d552000-03-10 22:55:18 +00001990 e = p + PyString_GET_SIZE(self);
1991 for (; p < e; p++) {
1992 if (!isspace(*p))
1993 return PyInt_FromLong(0);
1994 }
1995 return PyInt_FromLong(1);
1996}
1997
1998
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001999static char isalpha__doc__[] =
2000"S.isalpha() -> int\n\
2001\n\
2002Return 1 if all characters in S are alphabetic\n\
2003and there is at least one character in S, 0 otherwise.";
2004
2005static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002006string_isalpha(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002007{
Fred Drakeba096332000-07-09 07:04:36 +00002008 register const unsigned char *p
2009 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002010 register const unsigned char *e;
2011
2012 if (!PyArg_NoArgs(args))
2013 return NULL;
2014
2015 /* Shortcut for single character strings */
2016 if (PyString_GET_SIZE(self) == 1 &&
2017 isalpha(*p))
2018 return PyInt_FromLong(1);
2019
2020 /* Special case for empty strings */
2021 if (PyString_GET_SIZE(self) == 0)
2022 return PyInt_FromLong(0);
2023
2024 e = p + PyString_GET_SIZE(self);
2025 for (; p < e; p++) {
2026 if (!isalpha(*p))
2027 return PyInt_FromLong(0);
2028 }
2029 return PyInt_FromLong(1);
2030}
2031
2032
2033static char isalnum__doc__[] =
2034"S.isalnum() -> int\n\
2035\n\
2036Return 1 if all characters in S are alphanumeric\n\
2037and there is at least one character in S, 0 otherwise.";
2038
2039static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002040string_isalnum(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002041{
Fred Drakeba096332000-07-09 07:04:36 +00002042 register const unsigned char *p
2043 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002044 register const unsigned char *e;
2045
2046 if (!PyArg_NoArgs(args))
2047 return NULL;
2048
2049 /* Shortcut for single character strings */
2050 if (PyString_GET_SIZE(self) == 1 &&
2051 isalnum(*p))
2052 return PyInt_FromLong(1);
2053
2054 /* Special case for empty strings */
2055 if (PyString_GET_SIZE(self) == 0)
2056 return PyInt_FromLong(0);
2057
2058 e = p + PyString_GET_SIZE(self);
2059 for (; p < e; p++) {
2060 if (!isalnum(*p))
2061 return PyInt_FromLong(0);
2062 }
2063 return PyInt_FromLong(1);
2064}
2065
2066
Guido van Rossum4c08d552000-03-10 22:55:18 +00002067static char isdigit__doc__[] =
2068"S.isdigit() -> int\n\
2069\n\
2070Return 1 if there are only digit characters in S,\n\
20710 otherwise.";
2072
2073static PyObject*
2074string_isdigit(PyStringObject *self, PyObject *args)
2075{
Fred Drakeba096332000-07-09 07:04:36 +00002076 register const unsigned char *p
2077 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002078 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002079
2080 if (!PyArg_NoArgs(args))
2081 return NULL;
2082
2083 /* Shortcut for single character strings */
2084 if (PyString_GET_SIZE(self) == 1 &&
2085 isdigit(*p))
2086 return PyInt_FromLong(1);
2087
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002088 /* Special case for empty strings */
2089 if (PyString_GET_SIZE(self) == 0)
2090 return PyInt_FromLong(0);
2091
Guido van Rossum4c08d552000-03-10 22:55:18 +00002092 e = p + PyString_GET_SIZE(self);
2093 for (; p < e; p++) {
2094 if (!isdigit(*p))
2095 return PyInt_FromLong(0);
2096 }
2097 return PyInt_FromLong(1);
2098}
2099
2100
2101static char islower__doc__[] =
2102"S.islower() -> int\n\
2103\n\
2104Return 1 if all cased characters in S are lowercase and there is\n\
2105at least one cased character in S, 0 otherwise.";
2106
2107static PyObject*
2108string_islower(PyStringObject *self, PyObject *args)
2109{
Fred Drakeba096332000-07-09 07:04:36 +00002110 register const unsigned char *p
2111 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002112 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002113 int cased;
2114
2115 if (!PyArg_NoArgs(args))
2116 return NULL;
2117
2118 /* Shortcut for single character strings */
2119 if (PyString_GET_SIZE(self) == 1)
2120 return PyInt_FromLong(islower(*p) != 0);
2121
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002122 /* Special case for empty strings */
2123 if (PyString_GET_SIZE(self) == 0)
2124 return PyInt_FromLong(0);
2125
Guido van Rossum4c08d552000-03-10 22:55:18 +00002126 e = p + PyString_GET_SIZE(self);
2127 cased = 0;
2128 for (; p < e; p++) {
2129 if (isupper(*p))
2130 return PyInt_FromLong(0);
2131 else if (!cased && islower(*p))
2132 cased = 1;
2133 }
2134 return PyInt_FromLong(cased);
2135}
2136
2137
2138static char isupper__doc__[] =
2139"S.isupper() -> int\n\
2140\n\
2141Return 1 if all cased characters in S are uppercase and there is\n\
2142at least one cased character in S, 0 otherwise.";
2143
2144static PyObject*
2145string_isupper(PyStringObject *self, PyObject *args)
2146{
Fred Drakeba096332000-07-09 07:04:36 +00002147 register const unsigned char *p
2148 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002149 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002150 int cased;
2151
2152 if (!PyArg_NoArgs(args))
2153 return NULL;
2154
2155 /* Shortcut for single character strings */
2156 if (PyString_GET_SIZE(self) == 1)
2157 return PyInt_FromLong(isupper(*p) != 0);
2158
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002159 /* Special case for empty strings */
2160 if (PyString_GET_SIZE(self) == 0)
2161 return PyInt_FromLong(0);
2162
Guido van Rossum4c08d552000-03-10 22:55:18 +00002163 e = p + PyString_GET_SIZE(self);
2164 cased = 0;
2165 for (; p < e; p++) {
2166 if (islower(*p))
2167 return PyInt_FromLong(0);
2168 else if (!cased && isupper(*p))
2169 cased = 1;
2170 }
2171 return PyInt_FromLong(cased);
2172}
2173
2174
2175static char istitle__doc__[] =
2176"S.istitle() -> int\n\
2177\n\
2178Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2179may only follow uncased characters and lowercase characters only cased\n\
2180ones. Return 0 otherwise.";
2181
2182static PyObject*
2183string_istitle(PyStringObject *self, PyObject *args)
2184{
Fred Drakeba096332000-07-09 07:04:36 +00002185 register const unsigned char *p
2186 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002187 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188 int cased, previous_is_cased;
2189
2190 if (!PyArg_NoArgs(args))
2191 return NULL;
2192
2193 /* Shortcut for single character strings */
2194 if (PyString_GET_SIZE(self) == 1)
2195 return PyInt_FromLong(isupper(*p) != 0);
2196
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002197 /* Special case for empty strings */
2198 if (PyString_GET_SIZE(self) == 0)
2199 return PyInt_FromLong(0);
2200
Guido van Rossum4c08d552000-03-10 22:55:18 +00002201 e = p + PyString_GET_SIZE(self);
2202 cased = 0;
2203 previous_is_cased = 0;
2204 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002205 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002206
2207 if (isupper(ch)) {
2208 if (previous_is_cased)
2209 return PyInt_FromLong(0);
2210 previous_is_cased = 1;
2211 cased = 1;
2212 }
2213 else if (islower(ch)) {
2214 if (!previous_is_cased)
2215 return PyInt_FromLong(0);
2216 previous_is_cased = 1;
2217 cased = 1;
2218 }
2219 else
2220 previous_is_cased = 0;
2221 }
2222 return PyInt_FromLong(cased);
2223}
2224
2225
2226static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002227"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228\n\
2229Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002230Line breaks are not included in the resulting list unless keepends\n\
2231is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002232
2233#define SPLIT_APPEND(data, left, right) \
2234 str = PyString_FromStringAndSize(data + left, right - left); \
2235 if (!str) \
2236 goto onError; \
2237 if (PyList_Append(list, str)) { \
2238 Py_DECREF(str); \
2239 goto onError; \
2240 } \
2241 else \
2242 Py_DECREF(str);
2243
2244static PyObject*
2245string_splitlines(PyStringObject *self, PyObject *args)
2246{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002247 register int i;
2248 register int j;
2249 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002250 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002251 PyObject *list;
2252 PyObject *str;
2253 char *data;
2254
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002255 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002256 return NULL;
2257
2258 data = PyString_AS_STRING(self);
2259 len = PyString_GET_SIZE(self);
2260
Guido van Rossum4c08d552000-03-10 22:55:18 +00002261 list = PyList_New(0);
2262 if (!list)
2263 goto onError;
2264
2265 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002266 int eol;
2267
Guido van Rossum4c08d552000-03-10 22:55:18 +00002268 /* Find a line and append it */
2269 while (i < len && data[i] != '\n' && data[i] != '\r')
2270 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002271
2272 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002273 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274 if (i < len) {
2275 if (data[i] == '\r' && i + 1 < len &&
2276 data[i+1] == '\n')
2277 i += 2;
2278 else
2279 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002280 if (keepends)
2281 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002282 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002283 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002284 j = i;
2285 }
2286 if (j < len) {
2287 SPLIT_APPEND(data, j, len);
2288 }
2289
2290 return list;
2291
2292 onError:
2293 Py_DECREF(list);
2294 return NULL;
2295}
2296
2297#undef SPLIT_APPEND
2298
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299
2300static PyMethodDef
2301string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002302 /* Counterparts of the obsolete stropmodule functions; except
2303 string.maketrans(). */
2304 {"join", (PyCFunction)string_join, 1, join__doc__},
2305 {"split", (PyCFunction)string_split, 1, split__doc__},
2306 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2307 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2308 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2309 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2310 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2311 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2312 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002313 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2314 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002315 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2316 {"count", (PyCFunction)string_count, 1, count__doc__},
2317 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2318 {"find", (PyCFunction)string_find, 1, find__doc__},
2319 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002320 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2322 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2323 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2324 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2326 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2327 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002328 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2329 {"title", (PyCFunction)string_title, 1, title__doc__},
2330 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2331 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2332 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002333 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002334 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2335 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2336#if 0
2337 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2338#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339 {NULL, NULL} /* sentinel */
2340};
2341
2342static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002343string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344{
2345 return Py_FindMethod(string_methods, (PyObject*)s, name);
2346}
2347
2348
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002349PyTypeObject PyString_Type = {
2350 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002351 0,
2352 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002353 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002354 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002355 (destructor)string_dealloc, /*tp_dealloc*/
2356 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002358 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002359 (cmpfunc)string_compare, /*tp_compare*/
2360 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002361 0, /*tp_as_number*/
2362 &string_as_sequence, /*tp_as_sequence*/
2363 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002364 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002365 0, /*tp_call*/
2366 0, /*tp_str*/
2367 0, /*tp_getattro*/
2368 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002369 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002370 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002371 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002372};
2373
2374void
Fred Drakeba096332000-07-09 07:04:36 +00002375PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002376{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002377 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002378 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002379 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002380 if (w == NULL || !PyString_Check(*pv)) {
2381 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002382 *pv = NULL;
2383 return;
2384 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002385 v = string_concat((PyStringObject *) *pv, w);
2386 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002387 *pv = v;
2388}
2389
Guido van Rossum013142a1994-08-30 08:19:36 +00002390void
Fred Drakeba096332000-07-09 07:04:36 +00002391PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002392{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002393 PyString_Concat(pv, w);
2394 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002395}
2396
2397
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002398/* The following function breaks the notion that strings are immutable:
2399 it changes the size of a string. We get away with this only if there
2400 is only one module referencing the object. You can also think of it
2401 as creating a new string object and destroying the old one, only
2402 more efficiently. In any case, don't use this if the string may
2403 already be known to some other part of the code... */
2404
2405int
Fred Drakeba096332000-07-09 07:04:36 +00002406_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002407{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002408 register PyObject *v;
2409 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002410 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002411 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002412 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002413 Py_DECREF(v);
2414 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002415 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002416 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002417 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002418#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002419 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002420#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002421 _Py_ForgetReference(v);
2422 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002423 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002424 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002425 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002426 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002427 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002428 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002429 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002430 _Py_NewReference(*pv);
2431 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002432 sv->ob_size = newsize;
2433 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002434 return 0;
2435}
Guido van Rossume5372401993-03-16 12:15:04 +00002436
2437/* Helpers for formatstring */
2438
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002439static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002440getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002441{
2442 int argidx = *p_argidx;
2443 if (argidx < arglen) {
2444 (*p_argidx)++;
2445 if (arglen < 0)
2446 return args;
2447 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002448 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002449 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002450 PyErr_SetString(PyExc_TypeError,
2451 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002452 return NULL;
2453}
2454
Tim Peters38fd5b62000-09-21 05:43:11 +00002455/* Format codes
2456 * F_LJUST '-'
2457 * F_SIGN '+'
2458 * F_BLANK ' '
2459 * F_ALT '#'
2460 * F_ZERO '0'
2461 */
Guido van Rossume5372401993-03-16 12:15:04 +00002462#define F_LJUST (1<<0)
2463#define F_SIGN (1<<1)
2464#define F_BLANK (1<<2)
2465#define F_ALT (1<<3)
2466#define F_ZERO (1<<4)
2467
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002468static int
Fred Drakeba096332000-07-09 07:04:36 +00002469formatfloat(char *buf, size_t buflen, int flags,
2470 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002471{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002472 /* fmt = '%#.' + `prec` + `type`
2473 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002474 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002475 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002476 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002477 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002478 if (prec < 0)
2479 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002480 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2481 type = 'g';
2482 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002483 /* worst case length calc to ensure no buffer overrun:
2484 fmt = %#.<prec>g
2485 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2486 for any double rep.)
2487 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2488 If prec=0 the effective precision is 1 (the leading digit is
2489 always given), therefore increase by one to 10+prec. */
2490 if (buflen <= (size_t)10 + (size_t)prec) {
2491 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002492 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002493 return -1;
2494 }
Guido van Rossume5372401993-03-16 12:15:04 +00002495 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002496 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002497}
2498
Tim Peters38fd5b62000-09-21 05:43:11 +00002499/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2500 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2501 * Python's regular ints.
2502 * Return value: a new PyString*, or NULL if error.
2503 * . *pbuf is set to point into it,
2504 * *plen set to the # of chars following that.
2505 * Caller must decref it when done using pbuf.
2506 * The string starting at *pbuf is of the form
2507 * "-"? ("0x" | "0X")? digit+
2508 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2509 * set in flags. The case of hex digits will be correct,
2510 * There will be at least prec digits, zero-filled on the left if
2511 * necessary to get that many.
2512 * val object to be converted
2513 * flags bitmask of format flags; only F_ALT is looked at
2514 * prec minimum number of digits; 0-fill on left if needed
2515 * type a character in [duoxX]; u acts the same as d
2516 *
2517 * CAUTION: o, x and X conversions on regular ints can never
2518 * produce a '-' sign, but can for Python's unbounded ints.
2519 */
2520PyObject*
2521_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2522 char **pbuf, int *plen)
2523{
2524 PyObject *result = NULL;
2525 char *buf;
2526 int i;
2527 int sign; /* 1 if '-', else 0 */
2528 int len; /* number of characters */
2529 int numdigits; /* len == numnondigits + numdigits */
2530 int numnondigits = 0;
2531
2532 switch (type) {
2533 case 'd':
2534 case 'u':
2535 result = val->ob_type->tp_str(val);
2536 break;
2537 case 'o':
2538 result = val->ob_type->tp_as_number->nb_oct(val);
2539 break;
2540 case 'x':
2541 case 'X':
2542 numnondigits = 2;
2543 result = val->ob_type->tp_as_number->nb_hex(val);
2544 break;
2545 default:
2546 assert(!"'type' not in [duoxX]");
2547 }
2548 if (!result)
2549 return NULL;
2550
2551 /* To modify the string in-place, there can only be one reference. */
2552 if (result->ob_refcnt != 1) {
2553 PyErr_BadInternalCall();
2554 return NULL;
2555 }
2556 buf = PyString_AsString(result);
2557 len = PyString_Size(result);
2558 if (buf[len-1] == 'L') {
2559 --len;
2560 buf[len] = '\0';
2561 }
2562 sign = buf[0] == '-';
2563 numnondigits += sign;
2564 numdigits = len - numnondigits;
2565 assert(numdigits > 0);
2566
2567 /* Get rid of base marker unless F_ALT */
2568 if ((flags & F_ALT) == 0) {
2569 /* Need to skip 0x, 0X or 0. */
2570 int skipped = 0;
2571 switch (type) {
2572 case 'o':
2573 assert(buf[sign] == '0');
2574 /* If 0 is only digit, leave it alone. */
2575 if (numdigits > 1) {
2576 skipped = 1;
2577 --numdigits;
2578 }
2579 break;
2580 case 'x':
2581 case 'X':
2582 assert(buf[sign] == '0');
2583 assert(buf[sign + 1] == 'x');
2584 skipped = 2;
2585 numnondigits -= 2;
2586 break;
2587 }
2588 if (skipped) {
2589 buf += skipped;
2590 len -= skipped;
2591 if (sign)
2592 buf[0] = '-';
2593 }
2594 assert(len == numnondigits + numdigits);
2595 assert(numdigits > 0);
2596 }
2597
2598 /* Fill with leading zeroes to meet minimum width. */
2599 if (prec > numdigits) {
2600 PyObject *r1 = PyString_FromStringAndSize(NULL,
2601 numnondigits + prec);
2602 char *b1;
2603 if (!r1) {
2604 Py_DECREF(result);
2605 return NULL;
2606 }
2607 b1 = PyString_AS_STRING(r1);
2608 for (i = 0; i < numnondigits; ++i)
2609 *b1++ = *buf++;
2610 for (i = 0; i < prec - numdigits; i++)
2611 *b1++ = '0';
2612 for (i = 0; i < numdigits; i++)
2613 *b1++ = *buf++;
2614 *b1 = '\0';
2615 Py_DECREF(result);
2616 result = r1;
2617 buf = PyString_AS_STRING(result);
2618 len = numnondigits + prec;
2619 }
2620
2621 /* Fix up case for hex conversions. */
2622 switch (type) {
2623 case 'x':
2624 /* Need to convert all upper case letters to lower case. */
2625 for (i = 0; i < len; i++)
2626 if (buf[i] >= 'A' && buf[i] <= 'F')
2627 buf[i] += 'a'-'A';
2628 break;
2629 case 'X':
2630 /* Need to convert 0x to 0X (and -0x to -0X). */
2631 if (buf[sign + 1] == 'x')
2632 buf[sign + 1] = 'X';
2633 break;
2634 }
2635 *pbuf = buf;
2636 *plen = len;
2637 return result;
2638}
2639
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002640static int
Fred Drakeba096332000-07-09 07:04:36 +00002641formatint(char *buf, size_t buflen, int flags,
2642 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002643{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002644 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002645 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2646 + 1 + 1 = 24 */
2647 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002648 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002649 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002650 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002651 if (prec < 0)
2652 prec = 1;
2653 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002654 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002655 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002656 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002657 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002658 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002659 return -1;
2660 }
Guido van Rossume5372401993-03-16 12:15:04 +00002661 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002662 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002663}
2664
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002665static int
Fred Drakeba096332000-07-09 07:04:36 +00002666formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002667{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002668 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002669 if (PyString_Check(v)) {
2670 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002671 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002672 }
2673 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002674 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002675 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002676 }
2677 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002678 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002679}
2680
Guido van Rossum013142a1994-08-30 08:19:36 +00002681
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002682/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2683
2684 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2685 chars are formatted. XXX This is a magic number. Each formatting
2686 routine does bounds checking to ensure no overflow, but a better
2687 solution may be to malloc a buffer of appropriate size for each
2688 format. For now, the current solution is sufficient.
2689*/
2690#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002691
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002692PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002693PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002694{
2695 char *fmt, *res;
2696 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002697 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002698 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002699 PyObject *dict = NULL;
2700 if (format == NULL || !PyString_Check(format) || args == NULL) {
2701 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002702 return NULL;
2703 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002704 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002705 fmt = PyString_AsString(format);
2706 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002707 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002708 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002709 if (result == NULL)
2710 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002711 res = PyString_AsString(result);
2712 if (PyTuple_Check(args)) {
2713 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002714 argidx = 0;
2715 }
2716 else {
2717 arglen = -1;
2718 argidx = -2;
2719 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002720 if (args->ob_type->tp_as_mapping)
2721 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002722 while (--fmtcnt >= 0) {
2723 if (*fmt != '%') {
2724 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002725 rescnt = fmtcnt + 100;
2726 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002727 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002728 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002729 res = PyString_AsString(result)
2730 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002731 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002732 }
2733 *res++ = *fmt++;
2734 }
2735 else {
2736 /* Got a format specifier */
2737 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002738 int width = -1;
2739 int prec = -1;
2740 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002741 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002742 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002743 PyObject *v = NULL;
2744 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002745 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002746 int sign;
2747 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002748 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002749 char *fmt_start = fmt;
2750
Guido van Rossumda9c2711996-12-05 21:58:58 +00002751 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002752 if (*fmt == '(') {
2753 char *keystart;
2754 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002755 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002756 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002757
2758 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002759 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002760 "format requires a mapping");
2761 goto error;
2762 }
2763 ++fmt;
2764 --fmtcnt;
2765 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002766 /* Skip over balanced parentheses */
2767 while (pcount > 0 && --fmtcnt >= 0) {
2768 if (*fmt == ')')
2769 --pcount;
2770 else if (*fmt == '(')
2771 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002772 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002773 }
2774 keylen = fmt - keystart - 1;
2775 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002776 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002777 "incomplete format key");
2778 goto error;
2779 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002780 key = PyString_FromStringAndSize(keystart,
2781 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002782 if (key == NULL)
2783 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002784 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002785 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002786 args_owned = 0;
2787 }
2788 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002789 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002790 if (args == NULL) {
2791 goto error;
2792 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002793 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002794 arglen = -1;
2795 argidx = -2;
2796 }
Guido van Rossume5372401993-03-16 12:15:04 +00002797 while (--fmtcnt >= 0) {
2798 switch (c = *fmt++) {
2799 case '-': flags |= F_LJUST; continue;
2800 case '+': flags |= F_SIGN; continue;
2801 case ' ': flags |= F_BLANK; continue;
2802 case '#': flags |= F_ALT; continue;
2803 case '0': flags |= F_ZERO; continue;
2804 }
2805 break;
2806 }
2807 if (c == '*') {
2808 v = getnextarg(args, arglen, &argidx);
2809 if (v == NULL)
2810 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002811 if (!PyInt_Check(v)) {
2812 PyErr_SetString(PyExc_TypeError,
2813 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002814 goto error;
2815 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002816 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002817 if (width < 0) {
2818 flags |= F_LJUST;
2819 width = -width;
2820 }
Guido van Rossume5372401993-03-16 12:15:04 +00002821 if (--fmtcnt >= 0)
2822 c = *fmt++;
2823 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002824 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002825 width = c - '0';
2826 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002827 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002828 if (!isdigit(c))
2829 break;
2830 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002831 PyErr_SetString(
2832 PyExc_ValueError,
2833 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002834 goto error;
2835 }
2836 width = width*10 + (c - '0');
2837 }
2838 }
2839 if (c == '.') {
2840 prec = 0;
2841 if (--fmtcnt >= 0)
2842 c = *fmt++;
2843 if (c == '*') {
2844 v = getnextarg(args, arglen, &argidx);
2845 if (v == NULL)
2846 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002847 if (!PyInt_Check(v)) {
2848 PyErr_SetString(
2849 PyExc_TypeError,
2850 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002851 goto error;
2852 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002853 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002854 if (prec < 0)
2855 prec = 0;
2856 if (--fmtcnt >= 0)
2857 c = *fmt++;
2858 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002859 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002860 prec = c - '0';
2861 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002862 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002863 if (!isdigit(c))
2864 break;
2865 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002866 PyErr_SetString(
2867 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002868 "prec too big");
2869 goto error;
2870 }
2871 prec = prec*10 + (c - '0');
2872 }
2873 }
2874 } /* prec */
2875 if (fmtcnt >= 0) {
2876 if (c == 'h' || c == 'l' || c == 'L') {
2877 size = c;
2878 if (--fmtcnt >= 0)
2879 c = *fmt++;
2880 }
2881 }
2882 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002883 PyErr_SetString(PyExc_ValueError,
2884 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002885 goto error;
2886 }
2887 if (c != '%') {
2888 v = getnextarg(args, arglen, &argidx);
2889 if (v == NULL)
2890 goto error;
2891 }
2892 sign = 0;
2893 fill = ' ';
2894 switch (c) {
2895 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002896 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002897 len = 1;
2898 break;
2899 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002900 case 'r':
2901 if (PyUnicode_Check(v)) {
2902 fmt = fmt_start;
2903 goto unicode;
2904 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002905 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002906 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002907 else
2908 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002909 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002910 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002911 if (!PyString_Check(temp)) {
2912 PyErr_SetString(PyExc_TypeError,
2913 "%s argument has non-string str()");
2914 goto error;
2915 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002916 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002917 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002918 if (prec >= 0 && len > prec)
2919 len = prec;
2920 break;
2921 case 'i':
2922 case 'd':
2923 case 'u':
2924 case 'o':
2925 case 'x':
2926 case 'X':
2927 if (c == 'i')
2928 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00002929 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002930 temp = _PyString_FormatLong(v, flags,
2931 prec, c, &pbuf, &len);
2932 if (!temp)
2933 goto error;
2934 /* unbounded ints can always produce
2935 a sign character! */
2936 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002937 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002938 else {
2939 pbuf = formatbuf;
2940 len = formatint(pbuf, sizeof(formatbuf),
2941 flags, prec, c, v);
2942 if (len < 0)
2943 goto error;
2944 /* only d conversion is signed */
2945 sign = c == 'd';
2946 }
2947 if (flags & F_ZERO)
2948 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00002949 break;
2950 case 'e':
2951 case 'E':
2952 case 'f':
2953 case 'g':
2954 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002955 pbuf = formatbuf;
2956 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002957 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002958 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002959 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00002960 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00002961 fill = '0';
2962 break;
2963 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002964 pbuf = formatbuf;
2965 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002966 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002967 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002968 break;
2969 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002970 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00002971 "unsupported format character '%c' (0x%x) "
2972 "at index %i",
2973 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00002974 goto error;
2975 }
2976 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002977 if (*pbuf == '-' || *pbuf == '+') {
2978 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002979 len--;
2980 }
2981 else if (flags & F_SIGN)
2982 sign = '+';
2983 else if (flags & F_BLANK)
2984 sign = ' ';
2985 else
Tim Peters38fd5b62000-09-21 05:43:11 +00002986 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002987 }
2988 if (width < len)
2989 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00002990 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002991 reslen -= rescnt;
2992 rescnt = width + fmtcnt + 100;
2993 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002994 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002995 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002996 res = PyString_AsString(result)
2997 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002998 }
2999 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003000 if (fill != ' ')
3001 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003002 rescnt--;
3003 if (width > len)
3004 width--;
3005 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003006 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3007 assert(pbuf[0] == '0');
3008 assert(pbuf[1] == c);
3009 if (fill != ' ') {
3010 *res++ = *pbuf++;
3011 *res++ = *pbuf++;
3012 }
3013 rescnt -= 2;
3014 width -= 2;
3015 if (width < 0)
3016 width = 0;
3017 len -= 2;
3018 }
3019 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003020 do {
3021 --rescnt;
3022 *res++ = fill;
3023 } while (--width > len);
3024 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003025 if (fill == ' ') {
3026 if (sign)
3027 *res++ = sign;
3028 if ((flags & F_ALT) &&
3029 (c == 'x' || c == 'X')) {
3030 assert(pbuf[0] == '0');
3031 assert(pbuf[1] == c);
3032 *res++ = *pbuf++;
3033 *res++ = *pbuf++;
3034 }
3035 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003036 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003037 res += len;
3038 rescnt -= len;
3039 while (--width >= len) {
3040 --rescnt;
3041 *res++ = ' ';
3042 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003043 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003044 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003045 "not all arguments converted");
3046 goto error;
3047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003048 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003049 } /* '%' */
3050 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003051 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003052 PyErr_SetString(PyExc_TypeError,
3053 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003054 goto error;
3055 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003056 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003057 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003058 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003059 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003060 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003061
3062 unicode:
3063 if (args_owned) {
3064 Py_DECREF(args);
3065 args_owned = 0;
3066 }
3067 /* Fiddle args right (remove the first argidx-1 arguments) */
3068 --argidx;
3069 if (PyTuple_Check(orig_args) && argidx > 0) {
3070 PyObject *v;
3071 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3072 v = PyTuple_New(n);
3073 if (v == NULL)
3074 goto error;
3075 while (--n >= 0) {
3076 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3077 Py_INCREF(w);
3078 PyTuple_SET_ITEM(v, n, w);
3079 }
3080 args = v;
3081 } else {
3082 Py_INCREF(orig_args);
3083 args = orig_args;
3084 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003085 args_owned = 1;
3086 /* Take what we have of the result and let the Unicode formatting
3087 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003088 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003089 if (_PyString_Resize(&result, rescnt))
3090 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003091 fmtcnt = PyString_GET_SIZE(format) - \
3092 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003093 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3094 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003095 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003096 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003097 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003098 if (v == NULL)
3099 goto error;
3100 /* Paste what we have (result) to what the Unicode formatting
3101 function returned (v) and return the result (or error) */
3102 w = PyUnicode_Concat(result, v);
3103 Py_DECREF(result);
3104 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003105 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003106 return w;
Guido van Rossum90daa872000-04-10 13:47:21 +00003107
Guido van Rossume5372401993-03-16 12:15:04 +00003108 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003109 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003110 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003111 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003112 }
Guido van Rossume5372401993-03-16 12:15:04 +00003113 return NULL;
3114}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003115
3116
3117#ifdef INTERN_STRINGS
3118
Barry Warsaw4df762f2000-08-16 23:41:01 +00003119/* This dictionary will leak at PyString_Fini() time. That's acceptable
3120 * because PyString_Fini() specifically frees interned strings that are
3121 * only referenced by this dictionary. The CVS log entry for revision 2.45
3122 * says:
3123 *
3124 * Change the Fini function to only remove otherwise unreferenced
3125 * strings from the interned table. There are references in
3126 * hard-to-find static variables all over the interpreter, and it's not
3127 * worth trying to get rid of all those; but "uninterning" isn't fair
3128 * either and may cause subtle failures later -- so we have to keep them
3129 * in the interned table.
3130 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003131static PyObject *interned;
3132
3133void
Fred Drakeba096332000-07-09 07:04:36 +00003134PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003135{
3136 register PyStringObject *s = (PyStringObject *)(*p);
3137 PyObject *t;
3138 if (s == NULL || !PyString_Check(s))
3139 Py_FatalError("PyString_InternInPlace: strings only please!");
3140 if ((t = s->ob_sinterned) != NULL) {
3141 if (t == (PyObject *)s)
3142 return;
3143 Py_INCREF(t);
3144 *p = t;
3145 Py_DECREF(s);
3146 return;
3147 }
3148 if (interned == NULL) {
3149 interned = PyDict_New();
3150 if (interned == NULL)
3151 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003152 }
3153 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3154 Py_INCREF(t);
3155 *p = s->ob_sinterned = t;
3156 Py_DECREF(s);
3157 return;
3158 }
3159 t = (PyObject *)s;
3160 if (PyDict_SetItem(interned, t, t) == 0) {
3161 s->ob_sinterned = t;
3162 return;
3163 }
3164 PyErr_Clear();
3165}
3166
3167
3168PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003169PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003170{
3171 PyObject *s = PyString_FromString(cp);
3172 if (s == NULL)
3173 return NULL;
3174 PyString_InternInPlace(&s);
3175 return s;
3176}
3177
3178#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003179
3180void
Fred Drakeba096332000-07-09 07:04:36 +00003181PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003182{
3183 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003184 for (i = 0; i < UCHAR_MAX + 1; i++) {
3185 Py_XDECREF(characters[i]);
3186 characters[i] = NULL;
3187 }
3188#ifndef DONT_SHARE_SHORT_STRINGS
3189 Py_XDECREF(nullstring);
3190 nullstring = NULL;
3191#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003192#ifdef INTERN_STRINGS
3193 if (interned) {
3194 int pos, changed;
3195 PyObject *key, *value;
3196 do {
3197 changed = 0;
3198 pos = 0;
3199 while (PyDict_Next(interned, &pos, &key, &value)) {
3200 if (key->ob_refcnt == 2 && key == value) {
3201 PyDict_DelItem(interned, key);
3202 changed = 1;
3203 }
3204 }
3205 } while (changed);
3206 }
3207#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003208}