blob: eed4687d369fa141f24947ef0673942c61e2ac27 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
76 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 } else if (size == 1 && str != NULL) {
79 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000082#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000083 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000084}
85
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000087PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000089 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000091 if (size > INT_MAX) {
92 PyErr_SetString(PyExc_OverflowError,
93 "string is too long for a Python string");
94 return NULL;
95 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0 && (op = nullstring) != NULL) {
98#ifdef COUNT_ALLOCS
99 null_strings++;
100#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 Py_INCREF(op);
102 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 }
104 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
105#ifdef COUNT_ALLOCS
106 one_strings++;
107#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000108 Py_INCREF(op);
109 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000110 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000112
113 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000115 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000116 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119#ifdef CACHE_HASH
120 op->ob_shash = -1;
121#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000122#ifdef INTERN_STRINGS
123 op->ob_sinterned = NULL;
124#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000125 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000126#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 if (size == 0) {
128 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 } else if (size == 1) {
131 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000134#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000135 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136}
137
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000138PyObject *PyString_Decode(const char *s,
139 int size,
140 const char *encoding,
141 const char *errors)
142{
143 PyObject *buffer = NULL, *str;
144
145 if (encoding == NULL)
146 encoding = PyUnicode_GetDefaultEncoding();
147
148 /* Decode via the codec registry */
149 buffer = PyBuffer_FromMemory((void *)s, size);
150 if (buffer == NULL)
151 goto onError;
152 str = PyCodec_Decode(buffer, encoding, errors);
153 if (str == NULL)
154 goto onError;
155 /* Convert Unicode to a string using the default encoding */
156 if (PyUnicode_Check(str)) {
157 PyObject *temp = str;
158 str = PyUnicode_AsEncodedString(str, NULL, NULL);
159 Py_DECREF(temp);
160 if (str == NULL)
161 goto onError;
162 }
163 if (!PyString_Check(str)) {
164 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000165 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000166 str->ob_type->tp_name);
167 Py_DECREF(str);
168 goto onError;
169 }
170 Py_DECREF(buffer);
171 return str;
172
173 onError:
174 Py_XDECREF(buffer);
175 return NULL;
176}
177
178PyObject *PyString_Encode(const char *s,
179 int size,
180 const char *encoding,
181 const char *errors)
182{
183 PyObject *v, *str;
184
185 str = PyString_FromStringAndSize(s, size);
186 if (str == NULL)
187 return NULL;
188 v = PyString_AsEncodedString(str, encoding, errors);
189 Py_DECREF(str);
190 return v;
191}
192
193PyObject *PyString_AsEncodedString(PyObject *str,
194 const char *encoding,
195 const char *errors)
196{
197 PyObject *v;
198
199 if (!PyString_Check(str)) {
200 PyErr_BadArgument();
201 goto onError;
202 }
203
204 if (encoding == NULL)
205 encoding = PyUnicode_GetDefaultEncoding();
206
207 /* Encode via the codec registry */
208 v = PyCodec_Encode(str, encoding, errors);
209 if (v == NULL)
210 goto onError;
211 /* Convert Unicode to a string using the default encoding */
212 if (PyUnicode_Check(v)) {
213 PyObject *temp = v;
214 v = PyUnicode_AsEncodedString(v, NULL, NULL);
215 Py_DECREF(temp);
216 if (v == NULL)
217 goto onError;
218 }
219 if (!PyString_Check(v)) {
220 PyErr_Format(PyExc_TypeError,
221 "encoder did not return a string object (type=%.400s)",
222 v->ob_type->tp_name);
223 Py_DECREF(v);
224 goto onError;
225 }
226 return v;
227
228 onError:
229 return NULL;
230}
231
Guido van Rossum234f9421993-06-17 12:35:49 +0000232static void
Fred Drakeba096332000-07-09 07:04:36 +0000233string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000234{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000235 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000236}
237
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000238static int
239string_getsize(register PyObject *op)
240{
241 char *s;
242 int len;
243 if (PyString_AsStringAndSize(op, &s, &len))
244 return -1;
245 return len;
246}
247
248static /*const*/ char *
249string_getbuffer(register PyObject *op)
250{
251 char *s;
252 int len;
253 if (PyString_AsStringAndSize(op, &s, &len))
254 return NULL;
255 return s;
256}
257
Guido van Rossumd7047b31995-01-02 19:07:15 +0000258int
Fred Drakeba096332000-07-09 07:04:36 +0000259PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000261 if (!PyString_Check(op))
262 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000263 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264}
265
266/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000267PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000268{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000269 if (!PyString_Check(op))
270 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000271 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272}
273
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000274/* Internal API needed by PyString_AsStringAndSize(): */
275extern
276PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
277 const char *errors);
278
279int
280PyString_AsStringAndSize(register PyObject *obj,
281 register char **s,
282 register int *len)
283{
284 if (s == NULL) {
285 PyErr_BadInternalCall();
286 return -1;
287 }
288
289 if (!PyString_Check(obj)) {
290 if (PyUnicode_Check(obj)) {
291 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
292 if (obj == NULL)
293 return -1;
294 }
295 else {
296 PyErr_Format(PyExc_TypeError,
297 "expected string or Unicode object, "
298 "%.200s found", obj->ob_type->tp_name);
299 return -1;
300 }
301 }
302
303 *s = PyString_AS_STRING(obj);
304 if (len != NULL)
305 *len = PyString_GET_SIZE(obj);
306 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
307 PyErr_SetString(PyExc_TypeError,
308 "expected string without null bytes");
309 return -1;
310 }
311 return 0;
312}
313
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000314/* Methods */
315
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000316static int
Fred Drakeba096332000-07-09 07:04:36 +0000317string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318{
319 int i;
320 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000321 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000322 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000323 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000324 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000325 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000327
Thomas Wouters7e474022000-07-16 12:04:32 +0000328 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000329 quote = '\'';
330 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
331 quote = '"';
332
333 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 for (i = 0; i < op->ob_size; i++) {
335 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000336 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 fprintf(fp, "\\%c", c);
338 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000339 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000340 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000341 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000342 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000343 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000344 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000345}
346
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000347static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000348string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000350 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
351 PyObject *v;
352 if (newsize > INT_MAX) {
353 PyErr_SetString(PyExc_OverflowError,
354 "string is too large to make repr");
355 }
356 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000357 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000358 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000359 }
360 else {
361 register int i;
362 register char c;
363 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000364 int quote;
365
Thomas Wouters7e474022000-07-16 12:04:32 +0000366 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000367 quote = '\'';
368 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
369 quote = '"';
370
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000371 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000372 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000373 for (i = 0; i < op->ob_size; i++) {
374 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000375 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000376 *p++ = '\\', *p++ = c;
377 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000378 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000379 while (*p != '\0')
380 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000381 }
382 else
383 *p++ = c;
384 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000385 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000386 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000387 _PyString_Resize(
388 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000389 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000390 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000391}
392
393static int
Fred Drakeba096332000-07-09 07:04:36 +0000394string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000395{
396 return a->ob_size;
397}
398
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000399static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000400string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000401{
402 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000403 register PyStringObject *op;
404 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000405 if (PyUnicode_Check(bb))
406 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000407 PyErr_Format(PyExc_TypeError,
408 "cannot add type \"%.200s\" to string",
409 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 return NULL;
411 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000412#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000413 /* Optimize cases with empty left or right operand */
414 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000416 return bb;
417 }
418 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000419 Py_INCREF(a);
420 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000421 }
422 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000423 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000424 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000425 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000426 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000427 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000428 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000429#ifdef CACHE_HASH
430 op->ob_shash = -1;
431#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000432#ifdef INTERN_STRINGS
433 op->ob_sinterned = NULL;
434#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000435 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
436 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
437 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000438 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000439#undef b
440}
441
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000442static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000443string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000444{
445 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000446 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000447 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000448 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000449 if (n < 0)
450 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000451 /* watch out for overflows: the size can overflow int,
452 * and the # of bytes needed can overflow size_t
453 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000454 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000455 if (n && size / n != a->ob_size) {
456 PyErr_SetString(PyExc_OverflowError,
457 "repeated string is too long");
458 return NULL;
459 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000460 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000461 Py_INCREF(a);
462 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000463 }
Tim Peters8f422462000-09-09 06:13:41 +0000464 nbytes = size * sizeof(char);
465 if (nbytes / sizeof(char) != (size_t)size ||
466 nbytes + sizeof(PyStringObject) <= nbytes) {
467 PyErr_SetString(PyExc_OverflowError,
468 "repeated string is too long");
469 return NULL;
470 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000471 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000472 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000473 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000474 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000475 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000476#ifdef CACHE_HASH
477 op->ob_shash = -1;
478#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000479#ifdef INTERN_STRINGS
480 op->ob_sinterned = NULL;
481#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000482 for (i = 0; i < size; i += a->ob_size)
483 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
484 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000485 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000486}
487
488/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
489
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000490static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000491string_slice(register PyStringObject *a, register int i, register int j)
492 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000493{
494 if (i < 0)
495 i = 0;
496 if (j < 0)
497 j = 0; /* Avoid signed/unsigned bug in next line */
498 if (j > a->ob_size)
499 j = a->ob_size;
500 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000501 Py_INCREF(a);
502 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000503 }
504 if (j < i)
505 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000506 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000507}
508
Guido van Rossum9284a572000-03-07 15:53:43 +0000509static int
Fred Drakeba096332000-07-09 07:04:36 +0000510string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000511{
512 register char *s, *end;
513 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000514 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000515 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000516 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000517 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000518 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000519 return -1;
520 }
521 c = PyString_AsString(el)[0];
522 s = PyString_AsString(a);
523 end = s + PyString_Size(a);
524 while (s < end) {
525 if (c == *s++)
526 return 1;
527 }
528 return 0;
529}
530
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000531static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000532string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000533{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000534 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000535 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000536 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000537 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000538 return NULL;
539 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000540 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000541 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000542#ifdef COUNT_ALLOCS
543 if (v != NULL)
544 one_strings++;
545#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000546 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000547 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000548 if (v == NULL)
549 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000550 characters[c] = (PyStringObject *) v;
551 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000552 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000553 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000554 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000555}
556
557static int
Fred Drakeba096332000-07-09 07:04:36 +0000558string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000559{
Guido van Rossum253919f1991-02-13 23:18:39 +0000560 int len_a = a->ob_size, len_b = b->ob_size;
561 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000562 int cmp;
563 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000564 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000565 if (cmp == 0)
566 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
567 if (cmp != 0)
568 return cmp;
569 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000570 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000571}
572
Guido van Rossum9bfef441993-03-29 10:43:31 +0000573static long
Fred Drakeba096332000-07-09 07:04:36 +0000574string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000575{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000576 register int len;
577 register unsigned char *p;
578 register long x;
579
580#ifdef CACHE_HASH
581 if (a->ob_shash != -1)
582 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000583#ifdef INTERN_STRINGS
584 if (a->ob_sinterned != NULL)
585 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000587#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000588#endif
589 len = a->ob_size;
590 p = (unsigned char *) a->ob_sval;
591 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000592 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000593 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000594 x ^= a->ob_size;
595 if (x == -1)
596 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000597#ifdef CACHE_HASH
598 a->ob_shash = x;
599#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000600 return x;
601}
602
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000603static int
Fred Drakeba096332000-07-09 07:04:36 +0000604string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000605{
606 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000607 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000608 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000609 return -1;
610 }
611 *ptr = (void *)self->ob_sval;
612 return self->ob_size;
613}
614
615static int
Fred Drakeba096332000-07-09 07:04:36 +0000616string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000617{
Guido van Rossum045e6881997-09-08 18:30:11 +0000618 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000619 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000620 return -1;
621}
622
623static int
Fred Drakeba096332000-07-09 07:04:36 +0000624string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000625{
626 if ( lenp )
627 *lenp = self->ob_size;
628 return 1;
629}
630
Guido van Rossum1db70701998-10-08 02:18:52 +0000631static int
Fred Drakeba096332000-07-09 07:04:36 +0000632string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000633{
634 if ( index != 0 ) {
635 PyErr_SetString(PyExc_SystemError,
636 "accessing non-existent string segment");
637 return -1;
638 }
639 *ptr = self->ob_sval;
640 return self->ob_size;
641}
642
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000643static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000644 (inquiry)string_length, /*sq_length*/
645 (binaryfunc)string_concat, /*sq_concat*/
646 (intargfunc)string_repeat, /*sq_repeat*/
647 (intargfunc)string_item, /*sq_item*/
648 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000649 0, /*sq_ass_item*/
650 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000651 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000652};
653
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000654static PyBufferProcs string_as_buffer = {
655 (getreadbufferproc)string_buffer_getreadbuf,
656 (getwritebufferproc)string_buffer_getwritebuf,
657 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000658 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000659};
660
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000661
662
663#define LEFTSTRIP 0
664#define RIGHTSTRIP 1
665#define BOTHSTRIP 2
666
667
668static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000669split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000670{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000671 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000672 PyObject* item;
673 PyObject *list = PyList_New(0);
674
675 if (list == NULL)
676 return NULL;
677
Guido van Rossum4c08d552000-03-10 22:55:18 +0000678 for (i = j = 0; i < len; ) {
679 while (i < len && isspace(Py_CHARMASK(s[i])))
680 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000681 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000682 while (i < len && !isspace(Py_CHARMASK(s[i])))
683 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000684 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000685 if (maxsplit-- <= 0)
686 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000687 item = PyString_FromStringAndSize(s+j, (int)(i-j));
688 if (item == NULL)
689 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000690 err = PyList_Append(list, item);
691 Py_DECREF(item);
692 if (err < 0)
693 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000694 while (i < len && isspace(Py_CHARMASK(s[i])))
695 i++;
696 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000697 }
698 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000699 if (j < len) {
700 item = PyString_FromStringAndSize(s+j, (int)(len - j));
701 if (item == NULL)
702 goto finally;
703 err = PyList_Append(list, item);
704 Py_DECREF(item);
705 if (err < 0)
706 goto finally;
707 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000708 return list;
709 finally:
710 Py_DECREF(list);
711 return NULL;
712}
713
714
715static char split__doc__[] =
716"S.split([sep [,maxsplit]]) -> list of strings\n\
717\n\
718Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000719delimiter string. If maxsplit is given, at most maxsplit\n\
720splits are done. If sep is not specified, any whitespace string\n\
721is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000722
723static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000724string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000725{
726 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000727 int maxsplit = -1;
728 const char *s = PyString_AS_STRING(self), *sub;
729 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000730
Guido van Rossum4c08d552000-03-10 22:55:18 +0000731 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000732 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000733 if (maxsplit < 0)
734 maxsplit = INT_MAX;
735 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000736 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000737 if (PyString_Check(subobj)) {
738 sub = PyString_AS_STRING(subobj);
739 n = PyString_GET_SIZE(subobj);
740 }
741 else if (PyUnicode_Check(subobj))
742 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
743 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
744 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000745 if (n == 0) {
746 PyErr_SetString(PyExc_ValueError, "empty separator");
747 return NULL;
748 }
749
750 list = PyList_New(0);
751 if (list == NULL)
752 return NULL;
753
754 i = j = 0;
755 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000756 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000757 if (maxsplit-- <= 0)
758 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000759 item = PyString_FromStringAndSize(s+j, (int)(i-j));
760 if (item == NULL)
761 goto fail;
762 err = PyList_Append(list, item);
763 Py_DECREF(item);
764 if (err < 0)
765 goto fail;
766 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000767 }
768 else
769 i++;
770 }
771 item = PyString_FromStringAndSize(s+j, (int)(len-j));
772 if (item == NULL)
773 goto fail;
774 err = PyList_Append(list, item);
775 Py_DECREF(item);
776 if (err < 0)
777 goto fail;
778
779 return list;
780
781 fail:
782 Py_DECREF(list);
783 return NULL;
784}
785
786
787static char join__doc__[] =
788"S.join(sequence) -> string\n\
789\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000790Return a string which is the concatenation of the strings in the\n\
791sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000792
793static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000794string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000795{
796 char *sep = PyString_AS_STRING(self);
797 int seplen = PyString_GET_SIZE(self);
798 PyObject *res = NULL;
799 int reslen = 0;
800 char *p;
801 int seqlen = 0;
802 int sz = 100;
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000803 int i, slen, sz_incr;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000804 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000805
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000806 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000807 return NULL;
808
Barry Warsaw771d0672000-07-11 04:58:12 +0000809 if (!(seq = PySequence_Fast(orig, ""))) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000810 if (PyErr_ExceptionMatches(PyExc_TypeError))
811 PyErr_Format(PyExc_TypeError,
812 "sequence expected, %.80s found",
813 orig->ob_type->tp_name);
814 return NULL;
815 }
Barry Warsaw771d0672000-07-11 04:58:12 +0000816 /* From here on out, errors go through finally: for proper
817 * reference count manipulations.
818 */
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000819 seqlen = PySequence_Size(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000820 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000821 item = PySequence_Fast_GET_ITEM(seq, 0);
822 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000823 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000824 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000825 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000826
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000827 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
Barry Warsaw771d0672000-07-11 04:58:12 +0000828 goto finally;
829
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000830 p = PyString_AS_STRING(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000831
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000832 for (i = 0; i < seqlen; i++) {
833 item = PySequence_Fast_GET_ITEM(seq, i);
834 if (!PyString_Check(item)){
835 if (PyUnicode_Check(item)) {
836 Py_DECREF(res);
Barry Warsaw771d0672000-07-11 04:58:12 +0000837 Py_DECREF(seq);
Guido van Rossum2ccda8a2000-11-27 18:46:26 +0000838 return PyUnicode_Join((PyObject *)self, orig);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000839 }
840 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000841 "sequence item %i: expected string,"
842 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000843 i, item->ob_type->tp_name);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000844 goto finally;
845 }
846 slen = PyString_GET_SIZE(item);
847 while (reslen + slen + seplen >= sz) {
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000848 /* at least double the size of the string */
849 sz_incr = slen + seplen > sz ? slen + seplen : sz;
850 if (_PyString_Resize(&res, sz + sz_incr)) {
Barry Warsawbf325832000-03-06 14:52:18 +0000851 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000852 }
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000853 sz += sz_incr;
854 p = PyString_AS_STRING(res) + reslen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000855 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000856 if (i > 0) {
857 memcpy(p, sep, seplen);
858 p += seplen;
859 reslen += seplen;
860 }
861 memcpy(p, PyString_AS_STRING(item), slen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000862 p += slen;
863 reslen += slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000864 }
865 if (_PyString_Resize(&res, reslen))
866 goto finally;
Jeremy Hylton49048292000-07-11 03:28:17 +0000867 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000868 return res;
869
870 finally:
Jeremy Hylton49048292000-07-11 03:28:17 +0000871 Py_DECREF(seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000872 Py_XDECREF(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000873 return NULL;
874}
875
876
877
878static long
Fred Drakeba096332000-07-09 07:04:36 +0000879string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000880{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000881 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000882 int len = PyString_GET_SIZE(self);
883 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000884 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000885
Guido van Rossumc6821402000-05-08 14:08:05 +0000886 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
887 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000888 return -2;
889 if (PyString_Check(subobj)) {
890 sub = PyString_AS_STRING(subobj);
891 n = PyString_GET_SIZE(subobj);
892 }
893 else if (PyUnicode_Check(subobj))
894 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
895 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000896 return -2;
897
898 if (last > len)
899 last = len;
900 if (last < 0)
901 last += len;
902 if (last < 0)
903 last = 0;
904 if (i < 0)
905 i += len;
906 if (i < 0)
907 i = 0;
908
Guido van Rossum4c08d552000-03-10 22:55:18 +0000909 if (dir > 0) {
910 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000911 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000912 last -= n;
913 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000914 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000915 return (long)i;
916 }
917 else {
918 int j;
919
920 if (n == 0 && i <= last)
921 return (long)last;
922 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000923 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000924 return (long)j;
925 }
926
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000927 return -1;
928}
929
930
931static char find__doc__[] =
932"S.find(sub [,start [,end]]) -> int\n\
933\n\
934Return the lowest index in S where substring sub is found,\n\
935such that sub is contained within s[start,end]. Optional\n\
936arguments start and end are interpreted as in slice notation.\n\
937\n\
938Return -1 on failure.";
939
940static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000941string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000942{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000943 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000944 if (result == -2)
945 return NULL;
946 return PyInt_FromLong(result);
947}
948
949
950static char index__doc__[] =
951"S.index(sub [,start [,end]]) -> int\n\
952\n\
953Like S.find() but raise ValueError when the substring is not found.";
954
955static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000956string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000957{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000958 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000959 if (result == -2)
960 return NULL;
961 if (result == -1) {
962 PyErr_SetString(PyExc_ValueError,
963 "substring not found in string.index");
964 return NULL;
965 }
966 return PyInt_FromLong(result);
967}
968
969
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000970static char rfind__doc__[] =
971"S.rfind(sub [,start [,end]]) -> int\n\
972\n\
973Return the highest index in S where substring sub is found,\n\
974such that sub is contained within s[start,end]. Optional\n\
975arguments start and end are interpreted as in slice notation.\n\
976\n\
977Return -1 on failure.";
978
979static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000980string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000981{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000982 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000983 if (result == -2)
984 return NULL;
985 return PyInt_FromLong(result);
986}
987
988
989static char rindex__doc__[] =
990"S.rindex(sub [,start [,end]]) -> int\n\
991\n\
992Like S.rfind() but raise ValueError when the substring is not found.";
993
994static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000995string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000996{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000997 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000998 if (result == -2)
999 return NULL;
1000 if (result == -1) {
1001 PyErr_SetString(PyExc_ValueError,
1002 "substring not found in string.rindex");
1003 return NULL;
1004 }
1005 return PyInt_FromLong(result);
1006}
1007
1008
1009static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001010do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001011{
1012 char *s = PyString_AS_STRING(self);
1013 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001014
Guido van Rossum43713e52000-02-29 13:59:29 +00001015 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001016 return NULL;
1017
1018 i = 0;
1019 if (striptype != RIGHTSTRIP) {
1020 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1021 i++;
1022 }
1023 }
1024
1025 j = len;
1026 if (striptype != LEFTSTRIP) {
1027 do {
1028 j--;
1029 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1030 j++;
1031 }
1032
1033 if (i == 0 && j == len) {
1034 Py_INCREF(self);
1035 return (PyObject*)self;
1036 }
1037 else
1038 return PyString_FromStringAndSize(s+i, j-i);
1039}
1040
1041
1042static char strip__doc__[] =
1043"S.strip() -> string\n\
1044\n\
1045Return a copy of the string S with leading and trailing\n\
1046whitespace removed.";
1047
1048static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001049string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001050{
1051 return do_strip(self, args, BOTHSTRIP);
1052}
1053
1054
1055static char lstrip__doc__[] =
1056"S.lstrip() -> string\n\
1057\n\
1058Return a copy of the string S with leading whitespace removed.";
1059
1060static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001061string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001062{
1063 return do_strip(self, args, LEFTSTRIP);
1064}
1065
1066
1067static char rstrip__doc__[] =
1068"S.rstrip() -> string\n\
1069\n\
1070Return a copy of the string S with trailing whitespace removed.";
1071
1072static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001073string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001074{
1075 return do_strip(self, args, RIGHTSTRIP);
1076}
1077
1078
1079static char lower__doc__[] =
1080"S.lower() -> string\n\
1081\n\
1082Return a copy of the string S converted to lowercase.";
1083
1084static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001085string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001086{
1087 char *s = PyString_AS_STRING(self), *s_new;
1088 int i, n = PyString_GET_SIZE(self);
1089 PyObject *new;
1090
Guido van Rossum43713e52000-02-29 13:59:29 +00001091 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001092 return NULL;
1093 new = PyString_FromStringAndSize(NULL, n);
1094 if (new == NULL)
1095 return NULL;
1096 s_new = PyString_AsString(new);
1097 for (i = 0; i < n; i++) {
1098 int c = Py_CHARMASK(*s++);
1099 if (isupper(c)) {
1100 *s_new = tolower(c);
1101 } else
1102 *s_new = c;
1103 s_new++;
1104 }
1105 return new;
1106}
1107
1108
1109static char upper__doc__[] =
1110"S.upper() -> string\n\
1111\n\
1112Return a copy of the string S converted to uppercase.";
1113
1114static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001115string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001116{
1117 char *s = PyString_AS_STRING(self), *s_new;
1118 int i, n = PyString_GET_SIZE(self);
1119 PyObject *new;
1120
Guido van Rossum43713e52000-02-29 13:59:29 +00001121 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001122 return NULL;
1123 new = PyString_FromStringAndSize(NULL, n);
1124 if (new == NULL)
1125 return NULL;
1126 s_new = PyString_AsString(new);
1127 for (i = 0; i < n; i++) {
1128 int c = Py_CHARMASK(*s++);
1129 if (islower(c)) {
1130 *s_new = toupper(c);
1131 } else
1132 *s_new = c;
1133 s_new++;
1134 }
1135 return new;
1136}
1137
1138
Guido van Rossum4c08d552000-03-10 22:55:18 +00001139static char title__doc__[] =
1140"S.title() -> string\n\
1141\n\
1142Return a titlecased version of S, i.e. words start with uppercase\n\
1143characters, all remaining cased characters have lowercase.";
1144
1145static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001146string_title(PyStringObject *self, PyObject *args)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001147{
1148 char *s = PyString_AS_STRING(self), *s_new;
1149 int i, n = PyString_GET_SIZE(self);
1150 int previous_is_cased = 0;
1151 PyObject *new;
1152
1153 if (!PyArg_ParseTuple(args, ":title"))
1154 return NULL;
1155 new = PyString_FromStringAndSize(NULL, n);
1156 if (new == NULL)
1157 return NULL;
1158 s_new = PyString_AsString(new);
1159 for (i = 0; i < n; i++) {
1160 int c = Py_CHARMASK(*s++);
1161 if (islower(c)) {
1162 if (!previous_is_cased)
1163 c = toupper(c);
1164 previous_is_cased = 1;
1165 } else if (isupper(c)) {
1166 if (previous_is_cased)
1167 c = tolower(c);
1168 previous_is_cased = 1;
1169 } else
1170 previous_is_cased = 0;
1171 *s_new++ = c;
1172 }
1173 return new;
1174}
1175
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001176static char capitalize__doc__[] =
1177"S.capitalize() -> string\n\
1178\n\
1179Return a copy of the string S with only its first character\n\
1180capitalized.";
1181
1182static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001183string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001184{
1185 char *s = PyString_AS_STRING(self), *s_new;
1186 int i, n = PyString_GET_SIZE(self);
1187 PyObject *new;
1188
Guido van Rossum43713e52000-02-29 13:59:29 +00001189 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001190 return NULL;
1191 new = PyString_FromStringAndSize(NULL, n);
1192 if (new == NULL)
1193 return NULL;
1194 s_new = PyString_AsString(new);
1195 if (0 < n) {
1196 int c = Py_CHARMASK(*s++);
1197 if (islower(c))
1198 *s_new = toupper(c);
1199 else
1200 *s_new = c;
1201 s_new++;
1202 }
1203 for (i = 1; i < n; i++) {
1204 int c = Py_CHARMASK(*s++);
1205 if (isupper(c))
1206 *s_new = tolower(c);
1207 else
1208 *s_new = c;
1209 s_new++;
1210 }
1211 return new;
1212}
1213
1214
1215static char count__doc__[] =
1216"S.count(sub[, start[, end]]) -> int\n\
1217\n\
1218Return the number of occurrences of substring sub in string\n\
1219S[start:end]. Optional arguments start and end are\n\
1220interpreted as in slice notation.";
1221
1222static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001223string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001224{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001225 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001226 int len = PyString_GET_SIZE(self), n;
1227 int i = 0, last = INT_MAX;
1228 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001229 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001230
Guido van Rossumc6821402000-05-08 14:08:05 +00001231 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1232 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001233 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001234
Guido van Rossum4c08d552000-03-10 22:55:18 +00001235 if (PyString_Check(subobj)) {
1236 sub = PyString_AS_STRING(subobj);
1237 n = PyString_GET_SIZE(subobj);
1238 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001239 else if (PyUnicode_Check(subobj)) {
1240 int count;
1241 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1242 if (count == -1)
1243 return NULL;
1244 else
1245 return PyInt_FromLong((long) count);
1246 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001247 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1248 return NULL;
1249
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001250 if (last > len)
1251 last = len;
1252 if (last < 0)
1253 last += len;
1254 if (last < 0)
1255 last = 0;
1256 if (i < 0)
1257 i += len;
1258 if (i < 0)
1259 i = 0;
1260 m = last + 1 - n;
1261 if (n == 0)
1262 return PyInt_FromLong((long) (m-i));
1263
1264 r = 0;
1265 while (i < m) {
1266 if (!memcmp(s+i, sub, n)) {
1267 r++;
1268 i += n;
1269 } else {
1270 i++;
1271 }
1272 }
1273 return PyInt_FromLong((long) r);
1274}
1275
1276
1277static char swapcase__doc__[] =
1278"S.swapcase() -> string\n\
1279\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001280Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281converted to lowercase and vice versa.";
1282
1283static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001284string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001285{
1286 char *s = PyString_AS_STRING(self), *s_new;
1287 int i, n = PyString_GET_SIZE(self);
1288 PyObject *new;
1289
Guido van Rossum43713e52000-02-29 13:59:29 +00001290 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001291 return NULL;
1292 new = PyString_FromStringAndSize(NULL, n);
1293 if (new == NULL)
1294 return NULL;
1295 s_new = PyString_AsString(new);
1296 for (i = 0; i < n; i++) {
1297 int c = Py_CHARMASK(*s++);
1298 if (islower(c)) {
1299 *s_new = toupper(c);
1300 }
1301 else if (isupper(c)) {
1302 *s_new = tolower(c);
1303 }
1304 else
1305 *s_new = c;
1306 s_new++;
1307 }
1308 return new;
1309}
1310
1311
1312static char translate__doc__[] =
1313"S.translate(table [,deletechars]) -> string\n\
1314\n\
1315Return a copy of the string S, where all characters occurring\n\
1316in the optional argument deletechars are removed, and the\n\
1317remaining characters have been mapped through the given\n\
1318translation table, which must be a string of length 256.";
1319
1320static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001321string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001322{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001323 register char *input, *output;
1324 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001325 register int i, c, changed = 0;
1326 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001327 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001328 int inlen, tablen, dellen = 0;
1329 PyObject *result;
1330 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001331 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332
Guido van Rossum4c08d552000-03-10 22:55:18 +00001333 if (!PyArg_ParseTuple(args, "O|O:translate",
1334 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001336
1337 if (PyString_Check(tableobj)) {
1338 table1 = PyString_AS_STRING(tableobj);
1339 tablen = PyString_GET_SIZE(tableobj);
1340 }
1341 else if (PyUnicode_Check(tableobj)) {
1342 /* Unicode .translate() does not support the deletechars
1343 parameter; instead a mapping to None will cause characters
1344 to be deleted. */
1345 if (delobj != NULL) {
1346 PyErr_SetString(PyExc_TypeError,
1347 "deletions are implemented differently for unicode");
1348 return NULL;
1349 }
1350 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1351 }
1352 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001353 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001354
1355 if (delobj != NULL) {
1356 if (PyString_Check(delobj)) {
1357 del_table = PyString_AS_STRING(delobj);
1358 dellen = PyString_GET_SIZE(delobj);
1359 }
1360 else if (PyUnicode_Check(delobj)) {
1361 PyErr_SetString(PyExc_TypeError,
1362 "deletions are implemented differently for unicode");
1363 return NULL;
1364 }
1365 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1366 return NULL;
1367
1368 if (tablen != 256) {
1369 PyErr_SetString(PyExc_ValueError,
1370 "translation table must be 256 characters long");
1371 return NULL;
1372 }
1373 }
1374 else {
1375 del_table = NULL;
1376 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001377 }
1378
1379 table = table1;
1380 inlen = PyString_Size(input_obj);
1381 result = PyString_FromStringAndSize((char *)NULL, inlen);
1382 if (result == NULL)
1383 return NULL;
1384 output_start = output = PyString_AsString(result);
1385 input = PyString_AsString(input_obj);
1386
1387 if (dellen == 0) {
1388 /* If no deletions are required, use faster code */
1389 for (i = inlen; --i >= 0; ) {
1390 c = Py_CHARMASK(*input++);
1391 if (Py_CHARMASK((*output++ = table[c])) != c)
1392 changed = 1;
1393 }
1394 if (changed)
1395 return result;
1396 Py_DECREF(result);
1397 Py_INCREF(input_obj);
1398 return input_obj;
1399 }
1400
1401 for (i = 0; i < 256; i++)
1402 trans_table[i] = Py_CHARMASK(table[i]);
1403
1404 for (i = 0; i < dellen; i++)
1405 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1406
1407 for (i = inlen; --i >= 0; ) {
1408 c = Py_CHARMASK(*input++);
1409 if (trans_table[c] != -1)
1410 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1411 continue;
1412 changed = 1;
1413 }
1414 if (!changed) {
1415 Py_DECREF(result);
1416 Py_INCREF(input_obj);
1417 return input_obj;
1418 }
1419 /* Fix the size of the resulting string */
1420 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1421 return NULL;
1422 return result;
1423}
1424
1425
1426/* What follows is used for implementing replace(). Perry Stoll. */
1427
1428/*
1429 mymemfind
1430
1431 strstr replacement for arbitrary blocks of memory.
1432
Barry Warsaw51ac5802000-03-20 16:36:48 +00001433 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001434 contents of memory pointed to by PAT. Returns the index into MEM if
1435 found, or -1 if not found. If len of PAT is greater than length of
1436 MEM, the function returns -1.
1437*/
1438static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001439mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001440{
1441 register int ii;
1442
1443 /* pattern can not occur in the last pat_len-1 chars */
1444 len -= pat_len;
1445
1446 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001447 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001448 return ii;
1449 }
1450 }
1451 return -1;
1452}
1453
1454/*
1455 mymemcnt
1456
1457 Return the number of distinct times PAT is found in MEM.
1458 meaning mem=1111 and pat==11 returns 2.
1459 mem=11111 and pat==11 also return 2.
1460 */
1461static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001462mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001463{
1464 register int offset = 0;
1465 int nfound = 0;
1466
1467 while (len >= 0) {
1468 offset = mymemfind(mem, len, pat, pat_len);
1469 if (offset == -1)
1470 break;
1471 mem += offset + pat_len;
1472 len -= offset + pat_len;
1473 nfound++;
1474 }
1475 return nfound;
1476}
1477
1478/*
1479 mymemreplace
1480
Thomas Wouters7e474022000-07-16 12:04:32 +00001481 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001482 replaced with SUB.
1483
Thomas Wouters7e474022000-07-16 12:04:32 +00001484 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001485 of PAT in STR, then the original string is returned. Otherwise, a new
1486 string is allocated here and returned.
1487
1488 on return, out_len is:
1489 the length of output string, or
1490 -1 if the input string is returned, or
1491 unchanged if an error occurs (no memory).
1492
1493 return value is:
1494 the new string allocated locally, or
1495 NULL if an error occurred.
1496*/
1497static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001498mymemreplace(const char *str, int len, /* input string */
1499 const char *pat, int pat_len, /* pattern string to find */
1500 const char *sub, int sub_len, /* substitution string */
1501 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001502 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001503{
1504 char *out_s;
1505 char *new_s;
1506 int nfound, offset, new_len;
1507
1508 if (len == 0 || pat_len > len)
1509 goto return_same;
1510
1511 /* find length of output string */
1512 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001513 if (count < 0)
1514 count = INT_MAX;
1515 else if (nfound > count)
1516 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001517 if (nfound == 0)
1518 goto return_same;
1519 new_len = len + nfound*(sub_len - pat_len);
1520
Guido van Rossumb18618d2000-05-03 23:44:39 +00001521 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522 if (new_s == NULL) return NULL;
1523
1524 *out_len = new_len;
1525 out_s = new_s;
1526
1527 while (len > 0) {
1528 /* find index of next instance of pattern */
1529 offset = mymemfind(str, len, pat, pat_len);
1530 /* if not found, break out of loop */
1531 if (offset == -1) break;
1532
1533 /* copy non matching part of input string */
1534 memcpy(new_s, str, offset); /* copy part of str before pat */
1535 str += offset + pat_len; /* move str past pattern */
1536 len -= offset + pat_len; /* reduce length of str remaining */
1537
1538 /* copy substitute into the output string */
1539 new_s += offset; /* move new_s to dest for sub string */
1540 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1541 new_s += sub_len; /* offset new_s past sub string */
1542
1543 /* break when we've done count replacements */
1544 if (--count == 0) break;
1545 }
1546 /* copy any remaining values into output string */
1547 if (len > 0)
1548 memcpy(new_s, str, len);
1549 return out_s;
1550
1551 return_same:
1552 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001553 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001554}
1555
1556
1557static char replace__doc__[] =
1558"S.replace (old, new[, maxsplit]) -> string\n\
1559\n\
1560Return a copy of string S with all occurrences of substring\n\
1561old replaced by new. If the optional argument maxsplit is\n\
1562given, only the first maxsplit occurrences are replaced.";
1563
1564static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001565string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001566{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001567 const char *str = PyString_AS_STRING(self), *sub, *repl;
1568 char *new_s;
1569 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1570 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001571 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001572 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001573
Guido van Rossum4c08d552000-03-10 22:55:18 +00001574 if (!PyArg_ParseTuple(args, "OO|i:replace",
1575 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001576 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001577
1578 if (PyString_Check(subobj)) {
1579 sub = PyString_AS_STRING(subobj);
1580 sub_len = PyString_GET_SIZE(subobj);
1581 }
1582 else if (PyUnicode_Check(subobj))
1583 return PyUnicode_Replace((PyObject *)self,
1584 subobj, replobj, count);
1585 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1586 return NULL;
1587
1588 if (PyString_Check(replobj)) {
1589 repl = PyString_AS_STRING(replobj);
1590 repl_len = PyString_GET_SIZE(replobj);
1591 }
1592 else if (PyUnicode_Check(replobj))
1593 return PyUnicode_Replace((PyObject *)self,
1594 subobj, replobj, count);
1595 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1596 return NULL;
1597
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001598 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001599 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600 return NULL;
1601 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001602 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603 if (new_s == NULL) {
1604 PyErr_NoMemory();
1605 return NULL;
1606 }
1607 if (out_len == -1) {
1608 /* we're returning another reference to self */
1609 new = (PyObject*)self;
1610 Py_INCREF(new);
1611 }
1612 else {
1613 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001614 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 }
1616 return new;
1617}
1618
1619
1620static char startswith__doc__[] =
1621"S.startswith(prefix[, start[, end]]) -> int\n\
1622\n\
1623Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1624optional start, test S beginning at that position. With optional end, stop\n\
1625comparing S at that position.";
1626
1627static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001628string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001630 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001632 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633 int plen;
1634 int start = 0;
1635 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001636 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001637
Guido van Rossumc6821402000-05-08 14:08:05 +00001638 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1639 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001640 return NULL;
1641 if (PyString_Check(subobj)) {
1642 prefix = PyString_AS_STRING(subobj);
1643 plen = PyString_GET_SIZE(subobj);
1644 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001645 else if (PyUnicode_Check(subobj)) {
1646 int rc;
1647 rc = PyUnicode_Tailmatch((PyObject *)self,
1648 subobj, start, end, -1);
1649 if (rc == -1)
1650 return NULL;
1651 else
1652 return PyInt_FromLong((long) rc);
1653 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001654 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001655 return NULL;
1656
1657 /* adopt Java semantics for index out of range. it is legal for
1658 * offset to be == plen, but this only returns true if prefix is
1659 * the empty string.
1660 */
1661 if (start < 0 || start+plen > len)
1662 return PyInt_FromLong(0);
1663
1664 if (!memcmp(str+start, prefix, plen)) {
1665 /* did the match end after the specified end? */
1666 if (end < 0)
1667 return PyInt_FromLong(1);
1668 else if (end - start < plen)
1669 return PyInt_FromLong(0);
1670 else
1671 return PyInt_FromLong(1);
1672 }
1673 else return PyInt_FromLong(0);
1674}
1675
1676
1677static char endswith__doc__[] =
1678"S.endswith(suffix[, start[, end]]) -> int\n\
1679\n\
1680Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1681optional start, test S beginning at that position. With optional end, stop\n\
1682comparing S at that position.";
1683
1684static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001685string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001686{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001687 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001688 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001689 const char* suffix;
1690 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001691 int start = 0;
1692 int end = -1;
1693 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001694 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695
Guido van Rossumc6821402000-05-08 14:08:05 +00001696 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1697 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001698 return NULL;
1699 if (PyString_Check(subobj)) {
1700 suffix = PyString_AS_STRING(subobj);
1701 slen = PyString_GET_SIZE(subobj);
1702 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001703 else if (PyUnicode_Check(subobj)) {
1704 int rc;
1705 rc = PyUnicode_Tailmatch((PyObject *)self,
1706 subobj, start, end, +1);
1707 if (rc == -1)
1708 return NULL;
1709 else
1710 return PyInt_FromLong((long) rc);
1711 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001712 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001713 return NULL;
1714
Guido van Rossum4c08d552000-03-10 22:55:18 +00001715 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001716 return PyInt_FromLong(0);
1717
1718 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001719 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720
Guido van Rossum4c08d552000-03-10 22:55:18 +00001721 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001722 return PyInt_FromLong(1);
1723 else return PyInt_FromLong(0);
1724}
1725
1726
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001727static char encode__doc__[] =
1728"S.encode([encoding[,errors]]) -> string\n\
1729\n\
1730Return an encoded string version of S. Default encoding is the current\n\
1731default string encoding. errors may be given to set a different error\n\
1732handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1733a ValueError. Other possible values are 'ignore' and 'replace'.";
1734
1735static PyObject *
1736string_encode(PyStringObject *self, PyObject *args)
1737{
1738 char *encoding = NULL;
1739 char *errors = NULL;
1740 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1741 return NULL;
1742 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1743}
1744
1745
Guido van Rossum4c08d552000-03-10 22:55:18 +00001746static char expandtabs__doc__[] =
1747"S.expandtabs([tabsize]) -> string\n\
1748\n\
1749Return a copy of S where all tab characters are expanded using spaces.\n\
1750If tabsize is not given, a tab size of 8 characters is assumed.";
1751
1752static PyObject*
1753string_expandtabs(PyStringObject *self, PyObject *args)
1754{
1755 const char *e, *p;
1756 char *q;
1757 int i, j;
1758 PyObject *u;
1759 int tabsize = 8;
1760
1761 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1762 return NULL;
1763
Thomas Wouters7e474022000-07-16 12:04:32 +00001764 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001765 i = j = 0;
1766 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1767 for (p = PyString_AS_STRING(self); p < e; p++)
1768 if (*p == '\t') {
1769 if (tabsize > 0)
1770 j += tabsize - (j % tabsize);
1771 }
1772 else {
1773 j++;
1774 if (*p == '\n' || *p == '\r') {
1775 i += j;
1776 j = 0;
1777 }
1778 }
1779
1780 /* Second pass: create output string and fill it */
1781 u = PyString_FromStringAndSize(NULL, i + j);
1782 if (!u)
1783 return NULL;
1784
1785 j = 0;
1786 q = PyString_AS_STRING(u);
1787
1788 for (p = PyString_AS_STRING(self); p < e; p++)
1789 if (*p == '\t') {
1790 if (tabsize > 0) {
1791 i = tabsize - (j % tabsize);
1792 j += i;
1793 while (i--)
1794 *q++ = ' ';
1795 }
1796 }
1797 else {
1798 j++;
1799 *q++ = *p;
1800 if (*p == '\n' || *p == '\r')
1801 j = 0;
1802 }
1803
1804 return u;
1805}
1806
1807static
1808PyObject *pad(PyStringObject *self,
1809 int left,
1810 int right,
1811 char fill)
1812{
1813 PyObject *u;
1814
1815 if (left < 0)
1816 left = 0;
1817 if (right < 0)
1818 right = 0;
1819
1820 if (left == 0 && right == 0) {
1821 Py_INCREF(self);
1822 return (PyObject *)self;
1823 }
1824
1825 u = PyString_FromStringAndSize(NULL,
1826 left + PyString_GET_SIZE(self) + right);
1827 if (u) {
1828 if (left)
1829 memset(PyString_AS_STRING(u), fill, left);
1830 memcpy(PyString_AS_STRING(u) + left,
1831 PyString_AS_STRING(self),
1832 PyString_GET_SIZE(self));
1833 if (right)
1834 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1835 fill, right);
1836 }
1837
1838 return u;
1839}
1840
1841static char ljust__doc__[] =
1842"S.ljust(width) -> string\n\
1843\n\
1844Return S left justified in a string of length width. Padding is\n\
1845done using spaces.";
1846
1847static PyObject *
1848string_ljust(PyStringObject *self, PyObject *args)
1849{
1850 int width;
1851 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1852 return NULL;
1853
1854 if (PyString_GET_SIZE(self) >= width) {
1855 Py_INCREF(self);
1856 return (PyObject*) self;
1857 }
1858
1859 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1860}
1861
1862
1863static char rjust__doc__[] =
1864"S.rjust(width) -> string\n\
1865\n\
1866Return S right justified in a string of length width. Padding is\n\
1867done using spaces.";
1868
1869static PyObject *
1870string_rjust(PyStringObject *self, PyObject *args)
1871{
1872 int width;
1873 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1874 return NULL;
1875
1876 if (PyString_GET_SIZE(self) >= width) {
1877 Py_INCREF(self);
1878 return (PyObject*) self;
1879 }
1880
1881 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1882}
1883
1884
1885static char center__doc__[] =
1886"S.center(width) -> string\n\
1887\n\
1888Return S centered in a string of length width. Padding is done\n\
1889using spaces.";
1890
1891static PyObject *
1892string_center(PyStringObject *self, PyObject *args)
1893{
1894 int marg, left;
1895 int width;
1896
1897 if (!PyArg_ParseTuple(args, "i:center", &width))
1898 return NULL;
1899
1900 if (PyString_GET_SIZE(self) >= width) {
1901 Py_INCREF(self);
1902 return (PyObject*) self;
1903 }
1904
1905 marg = width - PyString_GET_SIZE(self);
1906 left = marg / 2 + (marg & width & 1);
1907
1908 return pad(self, left, marg - left, ' ');
1909}
1910
1911#if 0
1912static char zfill__doc__[] =
1913"S.zfill(width) -> string\n\
1914\n\
1915Pad a numeric string x with zeros on the left, to fill a field\n\
1916of the specified width. The string x is never truncated.";
1917
1918static PyObject *
1919string_zfill(PyStringObject *self, PyObject *args)
1920{
1921 int fill;
1922 PyObject *u;
1923 char *str;
1924
1925 int width;
1926 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1927 return NULL;
1928
1929 if (PyString_GET_SIZE(self) >= width) {
1930 Py_INCREF(self);
1931 return (PyObject*) self;
1932 }
1933
1934 fill = width - PyString_GET_SIZE(self);
1935
1936 u = pad(self, fill, 0, '0');
1937 if (u == NULL)
1938 return NULL;
1939
1940 str = PyString_AS_STRING(u);
1941 if (str[fill] == '+' || str[fill] == '-') {
1942 /* move sign to beginning of string */
1943 str[0] = str[fill];
1944 str[fill] = '0';
1945 }
1946
1947 return u;
1948}
1949#endif
1950
1951static char isspace__doc__[] =
1952"S.isspace() -> int\n\
1953\n\
1954Return 1 if there are only whitespace characters in S,\n\
19550 otherwise.";
1956
1957static PyObject*
1958string_isspace(PyStringObject *self, PyObject *args)
1959{
Fred Drakeba096332000-07-09 07:04:36 +00001960 register const unsigned char *p
1961 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001962 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001963
1964 if (!PyArg_NoArgs(args))
1965 return NULL;
1966
1967 /* Shortcut for single character strings */
1968 if (PyString_GET_SIZE(self) == 1 &&
1969 isspace(*p))
1970 return PyInt_FromLong(1);
1971
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001972 /* Special case for empty strings */
1973 if (PyString_GET_SIZE(self) == 0)
1974 return PyInt_FromLong(0);
1975
Guido van Rossum4c08d552000-03-10 22:55:18 +00001976 e = p + PyString_GET_SIZE(self);
1977 for (; p < e; p++) {
1978 if (!isspace(*p))
1979 return PyInt_FromLong(0);
1980 }
1981 return PyInt_FromLong(1);
1982}
1983
1984
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001985static char isalpha__doc__[] =
1986"S.isalpha() -> int\n\
1987\n\
1988Return 1 if all characters in S are alphabetic\n\
1989and there is at least one character in S, 0 otherwise.";
1990
1991static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001992string_isalpha(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001993{
Fred Drakeba096332000-07-09 07:04:36 +00001994 register const unsigned char *p
1995 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001996 register const unsigned char *e;
1997
1998 if (!PyArg_NoArgs(args))
1999 return NULL;
2000
2001 /* Shortcut for single character strings */
2002 if (PyString_GET_SIZE(self) == 1 &&
2003 isalpha(*p))
2004 return PyInt_FromLong(1);
2005
2006 /* Special case for empty strings */
2007 if (PyString_GET_SIZE(self) == 0)
2008 return PyInt_FromLong(0);
2009
2010 e = p + PyString_GET_SIZE(self);
2011 for (; p < e; p++) {
2012 if (!isalpha(*p))
2013 return PyInt_FromLong(0);
2014 }
2015 return PyInt_FromLong(1);
2016}
2017
2018
2019static char isalnum__doc__[] =
2020"S.isalnum() -> int\n\
2021\n\
2022Return 1 if all characters in S are alphanumeric\n\
2023and there is at least one character in S, 0 otherwise.";
2024
2025static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002026string_isalnum(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002027{
Fred Drakeba096332000-07-09 07:04:36 +00002028 register const unsigned char *p
2029 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002030 register const unsigned char *e;
2031
2032 if (!PyArg_NoArgs(args))
2033 return NULL;
2034
2035 /* Shortcut for single character strings */
2036 if (PyString_GET_SIZE(self) == 1 &&
2037 isalnum(*p))
2038 return PyInt_FromLong(1);
2039
2040 /* Special case for empty strings */
2041 if (PyString_GET_SIZE(self) == 0)
2042 return PyInt_FromLong(0);
2043
2044 e = p + PyString_GET_SIZE(self);
2045 for (; p < e; p++) {
2046 if (!isalnum(*p))
2047 return PyInt_FromLong(0);
2048 }
2049 return PyInt_FromLong(1);
2050}
2051
2052
Guido van Rossum4c08d552000-03-10 22:55:18 +00002053static char isdigit__doc__[] =
2054"S.isdigit() -> int\n\
2055\n\
2056Return 1 if there are only digit characters in S,\n\
20570 otherwise.";
2058
2059static PyObject*
2060string_isdigit(PyStringObject *self, PyObject *args)
2061{
Fred Drakeba096332000-07-09 07:04:36 +00002062 register const unsigned char *p
2063 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002064 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065
2066 if (!PyArg_NoArgs(args))
2067 return NULL;
2068
2069 /* Shortcut for single character strings */
2070 if (PyString_GET_SIZE(self) == 1 &&
2071 isdigit(*p))
2072 return PyInt_FromLong(1);
2073
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002074 /* Special case for empty strings */
2075 if (PyString_GET_SIZE(self) == 0)
2076 return PyInt_FromLong(0);
2077
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078 e = p + PyString_GET_SIZE(self);
2079 for (; p < e; p++) {
2080 if (!isdigit(*p))
2081 return PyInt_FromLong(0);
2082 }
2083 return PyInt_FromLong(1);
2084}
2085
2086
2087static char islower__doc__[] =
2088"S.islower() -> int\n\
2089\n\
2090Return 1 if all cased characters in S are lowercase and there is\n\
2091at least one cased character in S, 0 otherwise.";
2092
2093static PyObject*
2094string_islower(PyStringObject *self, PyObject *args)
2095{
Fred Drakeba096332000-07-09 07:04:36 +00002096 register const unsigned char *p
2097 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002098 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002099 int cased;
2100
2101 if (!PyArg_NoArgs(args))
2102 return NULL;
2103
2104 /* Shortcut for single character strings */
2105 if (PyString_GET_SIZE(self) == 1)
2106 return PyInt_FromLong(islower(*p) != 0);
2107
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002108 /* Special case for empty strings */
2109 if (PyString_GET_SIZE(self) == 0)
2110 return PyInt_FromLong(0);
2111
Guido van Rossum4c08d552000-03-10 22:55:18 +00002112 e = p + PyString_GET_SIZE(self);
2113 cased = 0;
2114 for (; p < e; p++) {
2115 if (isupper(*p))
2116 return PyInt_FromLong(0);
2117 else if (!cased && islower(*p))
2118 cased = 1;
2119 }
2120 return PyInt_FromLong(cased);
2121}
2122
2123
2124static char isupper__doc__[] =
2125"S.isupper() -> int\n\
2126\n\
2127Return 1 if all cased characters in S are uppercase and there is\n\
2128at least one cased character in S, 0 otherwise.";
2129
2130static PyObject*
2131string_isupper(PyStringObject *self, PyObject *args)
2132{
Fred Drakeba096332000-07-09 07:04:36 +00002133 register const unsigned char *p
2134 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002135 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002136 int cased;
2137
2138 if (!PyArg_NoArgs(args))
2139 return NULL;
2140
2141 /* Shortcut for single character strings */
2142 if (PyString_GET_SIZE(self) == 1)
2143 return PyInt_FromLong(isupper(*p) != 0);
2144
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002145 /* Special case for empty strings */
2146 if (PyString_GET_SIZE(self) == 0)
2147 return PyInt_FromLong(0);
2148
Guido van Rossum4c08d552000-03-10 22:55:18 +00002149 e = p + PyString_GET_SIZE(self);
2150 cased = 0;
2151 for (; p < e; p++) {
2152 if (islower(*p))
2153 return PyInt_FromLong(0);
2154 else if (!cased && isupper(*p))
2155 cased = 1;
2156 }
2157 return PyInt_FromLong(cased);
2158}
2159
2160
2161static char istitle__doc__[] =
2162"S.istitle() -> int\n\
2163\n\
2164Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2165may only follow uncased characters and lowercase characters only cased\n\
2166ones. Return 0 otherwise.";
2167
2168static PyObject*
2169string_istitle(PyStringObject *self, PyObject *args)
2170{
Fred Drakeba096332000-07-09 07:04:36 +00002171 register const unsigned char *p
2172 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002173 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002174 int cased, previous_is_cased;
2175
2176 if (!PyArg_NoArgs(args))
2177 return NULL;
2178
2179 /* Shortcut for single character strings */
2180 if (PyString_GET_SIZE(self) == 1)
2181 return PyInt_FromLong(isupper(*p) != 0);
2182
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002183 /* Special case for empty strings */
2184 if (PyString_GET_SIZE(self) == 0)
2185 return PyInt_FromLong(0);
2186
Guido van Rossum4c08d552000-03-10 22:55:18 +00002187 e = p + PyString_GET_SIZE(self);
2188 cased = 0;
2189 previous_is_cased = 0;
2190 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002191 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002192
2193 if (isupper(ch)) {
2194 if (previous_is_cased)
2195 return PyInt_FromLong(0);
2196 previous_is_cased = 1;
2197 cased = 1;
2198 }
2199 else if (islower(ch)) {
2200 if (!previous_is_cased)
2201 return PyInt_FromLong(0);
2202 previous_is_cased = 1;
2203 cased = 1;
2204 }
2205 else
2206 previous_is_cased = 0;
2207 }
2208 return PyInt_FromLong(cased);
2209}
2210
2211
2212static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002213"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002214\n\
2215Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002216Line breaks are not included in the resulting list unless keepends\n\
2217is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002218
2219#define SPLIT_APPEND(data, left, right) \
2220 str = PyString_FromStringAndSize(data + left, right - left); \
2221 if (!str) \
2222 goto onError; \
2223 if (PyList_Append(list, str)) { \
2224 Py_DECREF(str); \
2225 goto onError; \
2226 } \
2227 else \
2228 Py_DECREF(str);
2229
2230static PyObject*
2231string_splitlines(PyStringObject *self, PyObject *args)
2232{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002233 register int i;
2234 register int j;
2235 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002236 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002237 PyObject *list;
2238 PyObject *str;
2239 char *data;
2240
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002241 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002242 return NULL;
2243
2244 data = PyString_AS_STRING(self);
2245 len = PyString_GET_SIZE(self);
2246
Guido van Rossum4c08d552000-03-10 22:55:18 +00002247 list = PyList_New(0);
2248 if (!list)
2249 goto onError;
2250
2251 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002252 int eol;
2253
Guido van Rossum4c08d552000-03-10 22:55:18 +00002254 /* Find a line and append it */
2255 while (i < len && data[i] != '\n' && data[i] != '\r')
2256 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002257
2258 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002259 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002260 if (i < len) {
2261 if (data[i] == '\r' && i + 1 < len &&
2262 data[i+1] == '\n')
2263 i += 2;
2264 else
2265 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002266 if (keepends)
2267 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002268 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002269 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002270 j = i;
2271 }
2272 if (j < len) {
2273 SPLIT_APPEND(data, j, len);
2274 }
2275
2276 return list;
2277
2278 onError:
2279 Py_DECREF(list);
2280 return NULL;
2281}
2282
2283#undef SPLIT_APPEND
2284
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002285
2286static PyMethodDef
2287string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002288 /* Counterparts of the obsolete stropmodule functions; except
2289 string.maketrans(). */
2290 {"join", (PyCFunction)string_join, 1, join__doc__},
2291 {"split", (PyCFunction)string_split, 1, split__doc__},
2292 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2293 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2294 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2295 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2296 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2297 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2298 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002299 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2300 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2302 {"count", (PyCFunction)string_count, 1, count__doc__},
2303 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2304 {"find", (PyCFunction)string_find, 1, find__doc__},
2305 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002306 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2308 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2309 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2310 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002311 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2312 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2313 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002314 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2315 {"title", (PyCFunction)string_title, 1, title__doc__},
2316 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2317 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2318 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002319 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002320 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2321 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2322#if 0
2323 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2324#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325 {NULL, NULL} /* sentinel */
2326};
2327
2328static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002329string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002330{
2331 return Py_FindMethod(string_methods, (PyObject*)s, name);
2332}
2333
2334
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002335PyTypeObject PyString_Type = {
2336 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002337 0,
2338 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002339 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002340 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002341 (destructor)string_dealloc, /*tp_dealloc*/
2342 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002344 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002345 (cmpfunc)string_compare, /*tp_compare*/
2346 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002347 0, /*tp_as_number*/
2348 &string_as_sequence, /*tp_as_sequence*/
2349 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002350 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002351 0, /*tp_call*/
2352 0, /*tp_str*/
2353 0, /*tp_getattro*/
2354 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002355 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002356 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002357 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002358};
2359
2360void
Fred Drakeba096332000-07-09 07:04:36 +00002361PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002362{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002363 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002364 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002365 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002366 if (w == NULL || !PyString_Check(*pv)) {
2367 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002368 *pv = NULL;
2369 return;
2370 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002371 v = string_concat((PyStringObject *) *pv, w);
2372 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002373 *pv = v;
2374}
2375
Guido van Rossum013142a1994-08-30 08:19:36 +00002376void
Fred Drakeba096332000-07-09 07:04:36 +00002377PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002378{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002379 PyString_Concat(pv, w);
2380 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002381}
2382
2383
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002384/* The following function breaks the notion that strings are immutable:
2385 it changes the size of a string. We get away with this only if there
2386 is only one module referencing the object. You can also think of it
2387 as creating a new string object and destroying the old one, only
2388 more efficiently. In any case, don't use this if the string may
2389 already be known to some other part of the code... */
2390
2391int
Fred Drakeba096332000-07-09 07:04:36 +00002392_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002393{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002394 register PyObject *v;
2395 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002396 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002397 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002398 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002399 Py_DECREF(v);
2400 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002401 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002402 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002403 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002404#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002405 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002406#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002407 _Py_ForgetReference(v);
2408 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002409 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002410 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002411 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002412 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002413 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002414 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002415 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002416 _Py_NewReference(*pv);
2417 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002418 sv->ob_size = newsize;
2419 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002420 return 0;
2421}
Guido van Rossume5372401993-03-16 12:15:04 +00002422
2423/* Helpers for formatstring */
2424
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002425static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002426getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002427{
2428 int argidx = *p_argidx;
2429 if (argidx < arglen) {
2430 (*p_argidx)++;
2431 if (arglen < 0)
2432 return args;
2433 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002434 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002435 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002436 PyErr_SetString(PyExc_TypeError,
2437 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002438 return NULL;
2439}
2440
Tim Peters38fd5b62000-09-21 05:43:11 +00002441/* Format codes
2442 * F_LJUST '-'
2443 * F_SIGN '+'
2444 * F_BLANK ' '
2445 * F_ALT '#'
2446 * F_ZERO '0'
2447 */
Guido van Rossume5372401993-03-16 12:15:04 +00002448#define F_LJUST (1<<0)
2449#define F_SIGN (1<<1)
2450#define F_BLANK (1<<2)
2451#define F_ALT (1<<3)
2452#define F_ZERO (1<<4)
2453
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002454static int
Fred Drakeba096332000-07-09 07:04:36 +00002455formatfloat(char *buf, size_t buflen, int flags,
2456 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002457{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002458 /* fmt = '%#.' + `prec` + `type`
2459 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002460 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002461 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002462 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002463 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002464 if (prec < 0)
2465 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002466 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2467 type = 'g';
2468 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002469 /* worst case length calc to ensure no buffer overrun:
2470 fmt = %#.<prec>g
2471 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2472 for any double rep.)
2473 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2474 If prec=0 the effective precision is 1 (the leading digit is
2475 always given), therefore increase by one to 10+prec. */
2476 if (buflen <= (size_t)10 + (size_t)prec) {
2477 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002478 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002479 return -1;
2480 }
Guido van Rossume5372401993-03-16 12:15:04 +00002481 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002482 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002483}
2484
Tim Peters38fd5b62000-09-21 05:43:11 +00002485/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2486 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2487 * Python's regular ints.
2488 * Return value: a new PyString*, or NULL if error.
2489 * . *pbuf is set to point into it,
2490 * *plen set to the # of chars following that.
2491 * Caller must decref it when done using pbuf.
2492 * The string starting at *pbuf is of the form
2493 * "-"? ("0x" | "0X")? digit+
2494 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2495 * set in flags. The case of hex digits will be correct,
2496 * There will be at least prec digits, zero-filled on the left if
2497 * necessary to get that many.
2498 * val object to be converted
2499 * flags bitmask of format flags; only F_ALT is looked at
2500 * prec minimum number of digits; 0-fill on left if needed
2501 * type a character in [duoxX]; u acts the same as d
2502 *
2503 * CAUTION: o, x and X conversions on regular ints can never
2504 * produce a '-' sign, but can for Python's unbounded ints.
2505 */
2506PyObject*
2507_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2508 char **pbuf, int *plen)
2509{
2510 PyObject *result = NULL;
2511 char *buf;
2512 int i;
2513 int sign; /* 1 if '-', else 0 */
2514 int len; /* number of characters */
2515 int numdigits; /* len == numnondigits + numdigits */
2516 int numnondigits = 0;
2517
2518 switch (type) {
2519 case 'd':
2520 case 'u':
2521 result = val->ob_type->tp_str(val);
2522 break;
2523 case 'o':
2524 result = val->ob_type->tp_as_number->nb_oct(val);
2525 break;
2526 case 'x':
2527 case 'X':
2528 numnondigits = 2;
2529 result = val->ob_type->tp_as_number->nb_hex(val);
2530 break;
2531 default:
2532 assert(!"'type' not in [duoxX]");
2533 }
2534 if (!result)
2535 return NULL;
2536
2537 /* To modify the string in-place, there can only be one reference. */
2538 if (result->ob_refcnt != 1) {
2539 PyErr_BadInternalCall();
2540 return NULL;
2541 }
2542 buf = PyString_AsString(result);
2543 len = PyString_Size(result);
2544 if (buf[len-1] == 'L') {
2545 --len;
2546 buf[len] = '\0';
2547 }
2548 sign = buf[0] == '-';
2549 numnondigits += sign;
2550 numdigits = len - numnondigits;
2551 assert(numdigits > 0);
2552
2553 /* Get rid of base marker unless F_ALT */
2554 if ((flags & F_ALT) == 0) {
2555 /* Need to skip 0x, 0X or 0. */
2556 int skipped = 0;
2557 switch (type) {
2558 case 'o':
2559 assert(buf[sign] == '0');
2560 /* If 0 is only digit, leave it alone. */
2561 if (numdigits > 1) {
2562 skipped = 1;
2563 --numdigits;
2564 }
2565 break;
2566 case 'x':
2567 case 'X':
2568 assert(buf[sign] == '0');
2569 assert(buf[sign + 1] == 'x');
2570 skipped = 2;
2571 numnondigits -= 2;
2572 break;
2573 }
2574 if (skipped) {
2575 buf += skipped;
2576 len -= skipped;
2577 if (sign)
2578 buf[0] = '-';
2579 }
2580 assert(len == numnondigits + numdigits);
2581 assert(numdigits > 0);
2582 }
2583
2584 /* Fill with leading zeroes to meet minimum width. */
2585 if (prec > numdigits) {
2586 PyObject *r1 = PyString_FromStringAndSize(NULL,
2587 numnondigits + prec);
2588 char *b1;
2589 if (!r1) {
2590 Py_DECREF(result);
2591 return NULL;
2592 }
2593 b1 = PyString_AS_STRING(r1);
2594 for (i = 0; i < numnondigits; ++i)
2595 *b1++ = *buf++;
2596 for (i = 0; i < prec - numdigits; i++)
2597 *b1++ = '0';
2598 for (i = 0; i < numdigits; i++)
2599 *b1++ = *buf++;
2600 *b1 = '\0';
2601 Py_DECREF(result);
2602 result = r1;
2603 buf = PyString_AS_STRING(result);
2604 len = numnondigits + prec;
2605 }
2606
2607 /* Fix up case for hex conversions. */
2608 switch (type) {
2609 case 'x':
2610 /* Need to convert all upper case letters to lower case. */
2611 for (i = 0; i < len; i++)
2612 if (buf[i] >= 'A' && buf[i] <= 'F')
2613 buf[i] += 'a'-'A';
2614 break;
2615 case 'X':
2616 /* Need to convert 0x to 0X (and -0x to -0X). */
2617 if (buf[sign + 1] == 'x')
2618 buf[sign + 1] = 'X';
2619 break;
2620 }
2621 *pbuf = buf;
2622 *plen = len;
2623 return result;
2624}
2625
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002626static int
Fred Drakeba096332000-07-09 07:04:36 +00002627formatint(char *buf, size_t buflen, int flags,
2628 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002629{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002630 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002631 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2632 + 1 + 1 = 24 */
2633 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002634 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002635 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002636 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002637 if (prec < 0)
2638 prec = 1;
2639 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002640 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002641 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002642 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002643 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002644 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002645 return -1;
2646 }
Guido van Rossume5372401993-03-16 12:15:04 +00002647 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002648 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002649}
2650
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002651static int
Fred Drakeba096332000-07-09 07:04:36 +00002652formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002653{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002654 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002655 if (PyString_Check(v)) {
2656 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002657 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002658 }
2659 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002660 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002661 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002662 }
2663 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002664 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002665}
2666
Guido van Rossum013142a1994-08-30 08:19:36 +00002667
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002668/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2669
2670 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2671 chars are formatted. XXX This is a magic number. Each formatting
2672 routine does bounds checking to ensure no overflow, but a better
2673 solution may be to malloc a buffer of appropriate size for each
2674 format. For now, the current solution is sufficient.
2675*/
2676#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002677
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002678PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002679PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002680{
2681 char *fmt, *res;
2682 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002683 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002684 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002685 PyObject *dict = NULL;
2686 if (format == NULL || !PyString_Check(format) || args == NULL) {
2687 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002688 return NULL;
2689 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002690 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002691 fmt = PyString_AsString(format);
2692 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002693 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002694 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002695 if (result == NULL)
2696 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002697 res = PyString_AsString(result);
2698 if (PyTuple_Check(args)) {
2699 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002700 argidx = 0;
2701 }
2702 else {
2703 arglen = -1;
2704 argidx = -2;
2705 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002706 if (args->ob_type->tp_as_mapping)
2707 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002708 while (--fmtcnt >= 0) {
2709 if (*fmt != '%') {
2710 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002711 rescnt = fmtcnt + 100;
2712 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002713 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002714 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002715 res = PyString_AsString(result)
2716 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002717 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002718 }
2719 *res++ = *fmt++;
2720 }
2721 else {
2722 /* Got a format specifier */
2723 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002724 int width = -1;
2725 int prec = -1;
2726 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002727 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002728 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002729 PyObject *v = NULL;
2730 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002731 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002732 int sign;
2733 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002734 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002735 char *fmt_start = fmt;
2736
Guido van Rossumda9c2711996-12-05 21:58:58 +00002737 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002738 if (*fmt == '(') {
2739 char *keystart;
2740 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002741 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002742 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002743
2744 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002745 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002746 "format requires a mapping");
2747 goto error;
2748 }
2749 ++fmt;
2750 --fmtcnt;
2751 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002752 /* Skip over balanced parentheses */
2753 while (pcount > 0 && --fmtcnt >= 0) {
2754 if (*fmt == ')')
2755 --pcount;
2756 else if (*fmt == '(')
2757 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002758 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002759 }
2760 keylen = fmt - keystart - 1;
2761 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002762 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002763 "incomplete format key");
2764 goto error;
2765 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002766 key = PyString_FromStringAndSize(keystart,
2767 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002768 if (key == NULL)
2769 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002770 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002771 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002772 args_owned = 0;
2773 }
2774 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002775 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002776 if (args == NULL) {
2777 goto error;
2778 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002779 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002780 arglen = -1;
2781 argidx = -2;
2782 }
Guido van Rossume5372401993-03-16 12:15:04 +00002783 while (--fmtcnt >= 0) {
2784 switch (c = *fmt++) {
2785 case '-': flags |= F_LJUST; continue;
2786 case '+': flags |= F_SIGN; continue;
2787 case ' ': flags |= F_BLANK; continue;
2788 case '#': flags |= F_ALT; continue;
2789 case '0': flags |= F_ZERO; continue;
2790 }
2791 break;
2792 }
2793 if (c == '*') {
2794 v = getnextarg(args, arglen, &argidx);
2795 if (v == NULL)
2796 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002797 if (!PyInt_Check(v)) {
2798 PyErr_SetString(PyExc_TypeError,
2799 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002800 goto error;
2801 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002802 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002803 if (width < 0) {
2804 flags |= F_LJUST;
2805 width = -width;
2806 }
Guido van Rossume5372401993-03-16 12:15:04 +00002807 if (--fmtcnt >= 0)
2808 c = *fmt++;
2809 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002810 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002811 width = c - '0';
2812 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002813 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002814 if (!isdigit(c))
2815 break;
2816 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002817 PyErr_SetString(
2818 PyExc_ValueError,
2819 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002820 goto error;
2821 }
2822 width = width*10 + (c - '0');
2823 }
2824 }
2825 if (c == '.') {
2826 prec = 0;
2827 if (--fmtcnt >= 0)
2828 c = *fmt++;
2829 if (c == '*') {
2830 v = getnextarg(args, arglen, &argidx);
2831 if (v == NULL)
2832 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002833 if (!PyInt_Check(v)) {
2834 PyErr_SetString(
2835 PyExc_TypeError,
2836 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002837 goto error;
2838 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002839 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002840 if (prec < 0)
2841 prec = 0;
2842 if (--fmtcnt >= 0)
2843 c = *fmt++;
2844 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002845 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002846 prec = c - '0';
2847 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002848 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002849 if (!isdigit(c))
2850 break;
2851 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002852 PyErr_SetString(
2853 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002854 "prec too big");
2855 goto error;
2856 }
2857 prec = prec*10 + (c - '0');
2858 }
2859 }
2860 } /* prec */
2861 if (fmtcnt >= 0) {
2862 if (c == 'h' || c == 'l' || c == 'L') {
2863 size = c;
2864 if (--fmtcnt >= 0)
2865 c = *fmt++;
2866 }
2867 }
2868 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002869 PyErr_SetString(PyExc_ValueError,
2870 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002871 goto error;
2872 }
2873 if (c != '%') {
2874 v = getnextarg(args, arglen, &argidx);
2875 if (v == NULL)
2876 goto error;
2877 }
2878 sign = 0;
2879 fill = ' ';
2880 switch (c) {
2881 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002882 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002883 len = 1;
2884 break;
2885 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002886 case 'r':
2887 if (PyUnicode_Check(v)) {
2888 fmt = fmt_start;
2889 goto unicode;
2890 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002891 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002892 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002893 else
2894 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002895 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002896 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002897 if (!PyString_Check(temp)) {
2898 PyErr_SetString(PyExc_TypeError,
2899 "%s argument has non-string str()");
2900 goto error;
2901 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002902 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002903 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002904 if (prec >= 0 && len > prec)
2905 len = prec;
2906 break;
2907 case 'i':
2908 case 'd':
2909 case 'u':
2910 case 'o':
2911 case 'x':
2912 case 'X':
2913 if (c == 'i')
2914 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00002915 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002916 temp = _PyString_FormatLong(v, flags,
2917 prec, c, &pbuf, &len);
2918 if (!temp)
2919 goto error;
2920 /* unbounded ints can always produce
2921 a sign character! */
2922 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002923 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002924 else {
2925 pbuf = formatbuf;
2926 len = formatint(pbuf, sizeof(formatbuf),
2927 flags, prec, c, v);
2928 if (len < 0)
2929 goto error;
2930 /* only d conversion is signed */
2931 sign = c == 'd';
2932 }
2933 if (flags & F_ZERO)
2934 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00002935 break;
2936 case 'e':
2937 case 'E':
2938 case 'f':
2939 case 'g':
2940 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002941 pbuf = formatbuf;
2942 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002943 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002944 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002945 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00002946 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00002947 fill = '0';
2948 break;
2949 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002950 pbuf = formatbuf;
2951 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002952 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002953 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002954 break;
2955 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002956 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00002957 "unsupported format character '%c' (0x%x) "
2958 "at index %i",
2959 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00002960 goto error;
2961 }
2962 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002963 if (*pbuf == '-' || *pbuf == '+') {
2964 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002965 len--;
2966 }
2967 else if (flags & F_SIGN)
2968 sign = '+';
2969 else if (flags & F_BLANK)
2970 sign = ' ';
2971 else
Tim Peters38fd5b62000-09-21 05:43:11 +00002972 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002973 }
2974 if (width < len)
2975 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00002976 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002977 reslen -= rescnt;
2978 rescnt = width + fmtcnt + 100;
2979 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002980 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002981 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002982 res = PyString_AsString(result)
2983 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002984 }
2985 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002986 if (fill != ' ')
2987 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002988 rescnt--;
2989 if (width > len)
2990 width--;
2991 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002992 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
2993 assert(pbuf[0] == '0');
2994 assert(pbuf[1] == c);
2995 if (fill != ' ') {
2996 *res++ = *pbuf++;
2997 *res++ = *pbuf++;
2998 }
2999 rescnt -= 2;
3000 width -= 2;
3001 if (width < 0)
3002 width = 0;
3003 len -= 2;
3004 }
3005 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003006 do {
3007 --rescnt;
3008 *res++ = fill;
3009 } while (--width > len);
3010 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003011 if (fill == ' ') {
3012 if (sign)
3013 *res++ = sign;
3014 if ((flags & F_ALT) &&
3015 (c == 'x' || c == 'X')) {
3016 assert(pbuf[0] == '0');
3017 assert(pbuf[1] == c);
3018 *res++ = *pbuf++;
3019 *res++ = *pbuf++;
3020 }
3021 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003022 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003023 res += len;
3024 rescnt -= len;
3025 while (--width >= len) {
3026 --rescnt;
3027 *res++ = ' ';
3028 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003029 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003030 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003031 "not all arguments converted");
3032 goto error;
3033 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003034 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003035 } /* '%' */
3036 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003037 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003038 PyErr_SetString(PyExc_TypeError,
3039 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003040 goto error;
3041 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003042 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003043 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003044 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003045 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003046 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003047
3048 unicode:
3049 if (args_owned) {
3050 Py_DECREF(args);
3051 args_owned = 0;
3052 }
3053 /* Fiddle args right (remove the first argidx-1 arguments) */
3054 --argidx;
3055 if (PyTuple_Check(orig_args) && argidx > 0) {
3056 PyObject *v;
3057 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3058 v = PyTuple_New(n);
3059 if (v == NULL)
3060 goto error;
3061 while (--n >= 0) {
3062 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3063 Py_INCREF(w);
3064 PyTuple_SET_ITEM(v, n, w);
3065 }
3066 args = v;
3067 } else {
3068 Py_INCREF(orig_args);
3069 args = orig_args;
3070 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003071 args_owned = 1;
3072 /* Take what we have of the result and let the Unicode formatting
3073 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003074 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003075 if (_PyString_Resize(&result, rescnt))
3076 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003077 fmtcnt = PyString_GET_SIZE(format) - \
3078 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003079 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3080 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003081 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003082 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003083 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003084 if (v == NULL)
3085 goto error;
3086 /* Paste what we have (result) to what the Unicode formatting
3087 function returned (v) and return the result (or error) */
3088 w = PyUnicode_Concat(result, v);
3089 Py_DECREF(result);
3090 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003091 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003092 return w;
Guido van Rossum90daa872000-04-10 13:47:21 +00003093
Guido van Rossume5372401993-03-16 12:15:04 +00003094 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003095 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003096 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003097 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003098 }
Guido van Rossume5372401993-03-16 12:15:04 +00003099 return NULL;
3100}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003101
3102
3103#ifdef INTERN_STRINGS
3104
Barry Warsaw4df762f2000-08-16 23:41:01 +00003105/* This dictionary will leak at PyString_Fini() time. That's acceptable
3106 * because PyString_Fini() specifically frees interned strings that are
3107 * only referenced by this dictionary. The CVS log entry for revision 2.45
3108 * says:
3109 *
3110 * Change the Fini function to only remove otherwise unreferenced
3111 * strings from the interned table. There are references in
3112 * hard-to-find static variables all over the interpreter, and it's not
3113 * worth trying to get rid of all those; but "uninterning" isn't fair
3114 * either and may cause subtle failures later -- so we have to keep them
3115 * in the interned table.
3116 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003117static PyObject *interned;
3118
3119void
Fred Drakeba096332000-07-09 07:04:36 +00003120PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003121{
3122 register PyStringObject *s = (PyStringObject *)(*p);
3123 PyObject *t;
3124 if (s == NULL || !PyString_Check(s))
3125 Py_FatalError("PyString_InternInPlace: strings only please!");
3126 if ((t = s->ob_sinterned) != NULL) {
3127 if (t == (PyObject *)s)
3128 return;
3129 Py_INCREF(t);
3130 *p = t;
3131 Py_DECREF(s);
3132 return;
3133 }
3134 if (interned == NULL) {
3135 interned = PyDict_New();
3136 if (interned == NULL)
3137 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003138 }
3139 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3140 Py_INCREF(t);
3141 *p = s->ob_sinterned = t;
3142 Py_DECREF(s);
3143 return;
3144 }
3145 t = (PyObject *)s;
3146 if (PyDict_SetItem(interned, t, t) == 0) {
3147 s->ob_sinterned = t;
3148 return;
3149 }
3150 PyErr_Clear();
3151}
3152
3153
3154PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003155PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003156{
3157 PyObject *s = PyString_FromString(cp);
3158 if (s == NULL)
3159 return NULL;
3160 PyString_InternInPlace(&s);
3161 return s;
3162}
3163
3164#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003165
3166void
Fred Drakeba096332000-07-09 07:04:36 +00003167PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003168{
3169 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003170 for (i = 0; i < UCHAR_MAX + 1; i++) {
3171 Py_XDECREF(characters[i]);
3172 characters[i] = NULL;
3173 }
3174#ifndef DONT_SHARE_SHORT_STRINGS
3175 Py_XDECREF(nullstring);
3176 nullstring = NULL;
3177#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003178#ifdef INTERN_STRINGS
3179 if (interned) {
3180 int pos, changed;
3181 PyObject *key, *value;
3182 do {
3183 changed = 0;
3184 pos = 0;
3185 while (PyDict_Next(interned, &pos, &key, &value)) {
3186 if (key->ob_refcnt == 2 && key == value) {
3187 PyDict_DelItem(interned, key);
3188 changed = 1;
3189 }
3190 }
3191 } while (changed);
3192 }
3193#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003194}