blob: dbade8ca795cbd02dc7b79079161bfbb6b08a809 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
76 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 } else if (size == 1 && str != NULL) {
79 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000082#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000083 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000084}
85
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000087PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000089 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000091 if (size > INT_MAX) {
92 PyErr_SetString(PyExc_OverflowError,
93 "string is too long for a Python string");
94 return NULL;
95 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0 && (op = nullstring) != NULL) {
98#ifdef COUNT_ALLOCS
99 null_strings++;
100#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 Py_INCREF(op);
102 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 }
104 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
105#ifdef COUNT_ALLOCS
106 one_strings++;
107#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000108 Py_INCREF(op);
109 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000110 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000112
113 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000115 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000116 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119#ifdef CACHE_HASH
120 op->ob_shash = -1;
121#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000122#ifdef INTERN_STRINGS
123 op->ob_sinterned = NULL;
124#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000125 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000126#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 if (size == 0) {
128 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 } else if (size == 1) {
131 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000134#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000135 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136}
137
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000138PyObject *PyString_Decode(const char *s,
139 int size,
140 const char *encoding,
141 const char *errors)
142{
143 PyObject *buffer = NULL, *str;
144
145 if (encoding == NULL)
146 encoding = PyUnicode_GetDefaultEncoding();
147
148 /* Decode via the codec registry */
149 buffer = PyBuffer_FromMemory((void *)s, size);
150 if (buffer == NULL)
151 goto onError;
152 str = PyCodec_Decode(buffer, encoding, errors);
153 if (str == NULL)
154 goto onError;
155 /* Convert Unicode to a string using the default encoding */
156 if (PyUnicode_Check(str)) {
157 PyObject *temp = str;
158 str = PyUnicode_AsEncodedString(str, NULL, NULL);
159 Py_DECREF(temp);
160 if (str == NULL)
161 goto onError;
162 }
163 if (!PyString_Check(str)) {
164 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000165 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000166 str->ob_type->tp_name);
167 Py_DECREF(str);
168 goto onError;
169 }
170 Py_DECREF(buffer);
171 return str;
172
173 onError:
174 Py_XDECREF(buffer);
175 return NULL;
176}
177
178PyObject *PyString_Encode(const char *s,
179 int size,
180 const char *encoding,
181 const char *errors)
182{
183 PyObject *v, *str;
184
185 str = PyString_FromStringAndSize(s, size);
186 if (str == NULL)
187 return NULL;
188 v = PyString_AsEncodedString(str, encoding, errors);
189 Py_DECREF(str);
190 return v;
191}
192
193PyObject *PyString_AsEncodedString(PyObject *str,
194 const char *encoding,
195 const char *errors)
196{
197 PyObject *v;
198
199 if (!PyString_Check(str)) {
200 PyErr_BadArgument();
201 goto onError;
202 }
203
204 if (encoding == NULL)
205 encoding = PyUnicode_GetDefaultEncoding();
206
207 /* Encode via the codec registry */
208 v = PyCodec_Encode(str, encoding, errors);
209 if (v == NULL)
210 goto onError;
211 /* Convert Unicode to a string using the default encoding */
212 if (PyUnicode_Check(v)) {
213 PyObject *temp = v;
214 v = PyUnicode_AsEncodedString(v, NULL, NULL);
215 Py_DECREF(temp);
216 if (v == NULL)
217 goto onError;
218 }
219 if (!PyString_Check(v)) {
220 PyErr_Format(PyExc_TypeError,
221 "encoder did not return a string object (type=%.400s)",
222 v->ob_type->tp_name);
223 Py_DECREF(v);
224 goto onError;
225 }
226 return v;
227
228 onError:
229 return NULL;
230}
231
Guido van Rossum234f9421993-06-17 12:35:49 +0000232static void
Fred Drakeba096332000-07-09 07:04:36 +0000233string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000234{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000235 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000236}
237
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000238static int
239string_getsize(register PyObject *op)
240{
241 char *s;
242 int len;
243 if (PyString_AsStringAndSize(op, &s, &len))
244 return -1;
245 return len;
246}
247
248static /*const*/ char *
249string_getbuffer(register PyObject *op)
250{
251 char *s;
252 int len;
253 if (PyString_AsStringAndSize(op, &s, &len))
254 return NULL;
255 return s;
256}
257
Guido van Rossumd7047b31995-01-02 19:07:15 +0000258int
Fred Drakeba096332000-07-09 07:04:36 +0000259PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000261 if (!PyString_Check(op))
262 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000263 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264}
265
266/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000267PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000268{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000269 if (!PyString_Check(op))
270 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000271 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272}
273
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000274/* Internal API needed by PyString_AsStringAndSize(): */
275extern
276PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
277 const char *errors);
278
279int
280PyString_AsStringAndSize(register PyObject *obj,
281 register char **s,
282 register int *len)
283{
284 if (s == NULL) {
285 PyErr_BadInternalCall();
286 return -1;
287 }
288
289 if (!PyString_Check(obj)) {
290 if (PyUnicode_Check(obj)) {
291 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
292 if (obj == NULL)
293 return -1;
294 }
295 else {
296 PyErr_Format(PyExc_TypeError,
297 "expected string or Unicode object, "
298 "%.200s found", obj->ob_type->tp_name);
299 return -1;
300 }
301 }
302
303 *s = PyString_AS_STRING(obj);
304 if (len != NULL)
305 *len = PyString_GET_SIZE(obj);
306 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
307 PyErr_SetString(PyExc_TypeError,
308 "expected string without null bytes");
309 return -1;
310 }
311 return 0;
312}
313
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000314/* Methods */
315
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000316static int
Fred Drakeba096332000-07-09 07:04:36 +0000317string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318{
319 int i;
320 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000321 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000322 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000323 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000324 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000325 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000327
Thomas Wouters7e474022000-07-16 12:04:32 +0000328 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000329 quote = '\'';
330 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
331 quote = '"';
332
333 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 for (i = 0; i < op->ob_size; i++) {
335 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000336 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 fprintf(fp, "\\%c", c);
338 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000339 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000340 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000341 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000342 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000343 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000344 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000345}
346
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000347static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000348string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000350 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
351 PyObject *v;
352 if (newsize > INT_MAX) {
353 PyErr_SetString(PyExc_OverflowError,
354 "string is too large to make repr");
355 }
356 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000357 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000358 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000359 }
360 else {
361 register int i;
362 register char c;
363 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000364 int quote;
365
Thomas Wouters7e474022000-07-16 12:04:32 +0000366 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000367 quote = '\'';
368 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
369 quote = '"';
370
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000371 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000372 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000373 for (i = 0; i < op->ob_size; i++) {
374 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000375 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000376 *p++ = '\\', *p++ = c;
377 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000378 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000379 while (*p != '\0')
380 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000381 }
382 else
383 *p++ = c;
384 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000385 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000386 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000387 _PyString_Resize(
388 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000389 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000390 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000391}
392
393static int
Fred Drakeba096332000-07-09 07:04:36 +0000394string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000395{
396 return a->ob_size;
397}
398
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000399static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000400string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000401{
402 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000403 register PyStringObject *op;
404 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000405 if (PyUnicode_Check(bb))
406 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000407 PyErr_Format(PyExc_TypeError,
408 "cannot add type \"%.200s\" to string",
409 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 return NULL;
411 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000412#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000413 /* Optimize cases with empty left or right operand */
414 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000416 return bb;
417 }
418 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000419 Py_INCREF(a);
420 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000421 }
422 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000423 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000424 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000425 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000426 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000427 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000428 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000429#ifdef CACHE_HASH
430 op->ob_shash = -1;
431#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000432#ifdef INTERN_STRINGS
433 op->ob_sinterned = NULL;
434#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000435 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
436 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
437 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000438 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000439#undef b
440}
441
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000442static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000443string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000444{
445 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000446 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000447 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000448 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000449 if (n < 0)
450 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000451 /* watch out for overflows: the size can overflow int,
452 * and the # of bytes needed can overflow size_t
453 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000454 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000455 if (n && size / n != a->ob_size) {
456 PyErr_SetString(PyExc_OverflowError,
457 "repeated string is too long");
458 return NULL;
459 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000460 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000461 Py_INCREF(a);
462 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000463 }
Tim Peters8f422462000-09-09 06:13:41 +0000464 nbytes = size * sizeof(char);
465 if (nbytes / sizeof(char) != (size_t)size ||
466 nbytes + sizeof(PyStringObject) <= nbytes) {
467 PyErr_SetString(PyExc_OverflowError,
468 "repeated string is too long");
469 return NULL;
470 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000471 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000472 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000473 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000474 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000475 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000476#ifdef CACHE_HASH
477 op->ob_shash = -1;
478#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000479#ifdef INTERN_STRINGS
480 op->ob_sinterned = NULL;
481#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000482 for (i = 0; i < size; i += a->ob_size)
483 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
484 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000485 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000486}
487
488/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
489
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000490static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000491string_slice(register PyStringObject *a, register int i, register int j)
492 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000493{
494 if (i < 0)
495 i = 0;
496 if (j < 0)
497 j = 0; /* Avoid signed/unsigned bug in next line */
498 if (j > a->ob_size)
499 j = a->ob_size;
500 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000501 Py_INCREF(a);
502 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000503 }
504 if (j < i)
505 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000506 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000507}
508
Guido van Rossum9284a572000-03-07 15:53:43 +0000509static int
Fred Drakeba096332000-07-09 07:04:36 +0000510string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000511{
512 register char *s, *end;
513 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000514 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000515 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000516 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000517 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000518 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000519 return -1;
520 }
521 c = PyString_AsString(el)[0];
522 s = PyString_AsString(a);
523 end = s + PyString_Size(a);
524 while (s < end) {
525 if (c == *s++)
526 return 1;
527 }
528 return 0;
529}
530
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000531static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000532string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000533{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000534 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000535 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000536 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000537 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000538 return NULL;
539 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000540 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000541 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000542#ifdef COUNT_ALLOCS
543 if (v != NULL)
544 one_strings++;
545#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000546 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000547 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000548 if (v == NULL)
549 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000550 characters[c] = (PyStringObject *) v;
551 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000552 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000553 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000554 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000555}
556
557static int
Fred Drakeba096332000-07-09 07:04:36 +0000558string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000559{
Guido van Rossum253919f1991-02-13 23:18:39 +0000560 int len_a = a->ob_size, len_b = b->ob_size;
561 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000562 int cmp;
563 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000564 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000565 if (cmp == 0)
566 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
567 if (cmp != 0)
568 return cmp;
569 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000570 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000571}
572
Guido van Rossum9bfef441993-03-29 10:43:31 +0000573static long
Fred Drakeba096332000-07-09 07:04:36 +0000574string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000575{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000576 register int len;
577 register unsigned char *p;
578 register long x;
579
580#ifdef CACHE_HASH
581 if (a->ob_shash != -1)
582 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000583#ifdef INTERN_STRINGS
584 if (a->ob_sinterned != NULL)
585 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000587#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000588#endif
589 len = a->ob_size;
590 p = (unsigned char *) a->ob_sval;
591 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000592 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000593 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000594 x ^= a->ob_size;
595 if (x == -1)
596 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000597#ifdef CACHE_HASH
598 a->ob_shash = x;
599#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000600 return x;
601}
602
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000603static int
Fred Drakeba096332000-07-09 07:04:36 +0000604string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000605{
606 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000607 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000608 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000609 return -1;
610 }
611 *ptr = (void *)self->ob_sval;
612 return self->ob_size;
613}
614
615static int
Fred Drakeba096332000-07-09 07:04:36 +0000616string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000617{
Guido van Rossum045e6881997-09-08 18:30:11 +0000618 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000619 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000620 return -1;
621}
622
623static int
Fred Drakeba096332000-07-09 07:04:36 +0000624string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000625{
626 if ( lenp )
627 *lenp = self->ob_size;
628 return 1;
629}
630
Guido van Rossum1db70701998-10-08 02:18:52 +0000631static int
Fred Drakeba096332000-07-09 07:04:36 +0000632string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000633{
634 if ( index != 0 ) {
635 PyErr_SetString(PyExc_SystemError,
636 "accessing non-existent string segment");
637 return -1;
638 }
639 *ptr = self->ob_sval;
640 return self->ob_size;
641}
642
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000643static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000644 (inquiry)string_length, /*sq_length*/
645 (binaryfunc)string_concat, /*sq_concat*/
646 (intargfunc)string_repeat, /*sq_repeat*/
647 (intargfunc)string_item, /*sq_item*/
648 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000649 0, /*sq_ass_item*/
650 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000651 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000652};
653
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000654static PyBufferProcs string_as_buffer = {
655 (getreadbufferproc)string_buffer_getreadbuf,
656 (getwritebufferproc)string_buffer_getwritebuf,
657 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000658 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000659};
660
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000661
662
663#define LEFTSTRIP 0
664#define RIGHTSTRIP 1
665#define BOTHSTRIP 2
666
667
668static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000669split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000670{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000671 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000672 PyObject* item;
673 PyObject *list = PyList_New(0);
674
675 if (list == NULL)
676 return NULL;
677
Guido van Rossum4c08d552000-03-10 22:55:18 +0000678 for (i = j = 0; i < len; ) {
679 while (i < len && isspace(Py_CHARMASK(s[i])))
680 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000681 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000682 while (i < len && !isspace(Py_CHARMASK(s[i])))
683 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000684 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000685 if (maxsplit-- <= 0)
686 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000687 item = PyString_FromStringAndSize(s+j, (int)(i-j));
688 if (item == NULL)
689 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000690 err = PyList_Append(list, item);
691 Py_DECREF(item);
692 if (err < 0)
693 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000694 while (i < len && isspace(Py_CHARMASK(s[i])))
695 i++;
696 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000697 }
698 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000699 if (j < len) {
700 item = PyString_FromStringAndSize(s+j, (int)(len - j));
701 if (item == NULL)
702 goto finally;
703 err = PyList_Append(list, item);
704 Py_DECREF(item);
705 if (err < 0)
706 goto finally;
707 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000708 return list;
709 finally:
710 Py_DECREF(list);
711 return NULL;
712}
713
714
715static char split__doc__[] =
716"S.split([sep [,maxsplit]]) -> list of strings\n\
717\n\
718Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000719delimiter string. If maxsplit is given, at most maxsplit\n\
720splits are done. If sep is not specified, any whitespace string\n\
721is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000722
723static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000724string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000725{
726 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000727 int maxsplit = -1;
728 const char *s = PyString_AS_STRING(self), *sub;
729 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000730
Guido van Rossum4c08d552000-03-10 22:55:18 +0000731 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000732 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000733 if (maxsplit < 0)
734 maxsplit = INT_MAX;
735 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000736 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000737 if (PyString_Check(subobj)) {
738 sub = PyString_AS_STRING(subobj);
739 n = PyString_GET_SIZE(subobj);
740 }
741 else if (PyUnicode_Check(subobj))
742 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
743 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
744 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000745 if (n == 0) {
746 PyErr_SetString(PyExc_ValueError, "empty separator");
747 return NULL;
748 }
749
750 list = PyList_New(0);
751 if (list == NULL)
752 return NULL;
753
754 i = j = 0;
755 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000756 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000757 if (maxsplit-- <= 0)
758 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000759 item = PyString_FromStringAndSize(s+j, (int)(i-j));
760 if (item == NULL)
761 goto fail;
762 err = PyList_Append(list, item);
763 Py_DECREF(item);
764 if (err < 0)
765 goto fail;
766 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000767 }
768 else
769 i++;
770 }
771 item = PyString_FromStringAndSize(s+j, (int)(len-j));
772 if (item == NULL)
773 goto fail;
774 err = PyList_Append(list, item);
775 Py_DECREF(item);
776 if (err < 0)
777 goto fail;
778
779 return list;
780
781 fail:
782 Py_DECREF(list);
783 return NULL;
784}
785
786
787static char join__doc__[] =
788"S.join(sequence) -> string\n\
789\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000790Return a string which is the concatenation of the strings in the\n\
791sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000792
793static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000794string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000795{
796 char *sep = PyString_AS_STRING(self);
797 int seplen = PyString_GET_SIZE(self);
798 PyObject *res = NULL;
799 int reslen = 0;
800 char *p;
801 int seqlen = 0;
802 int sz = 100;
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000803 int i, slen, sz_incr;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000804 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000805
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000806 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000807 return NULL;
808
Barry Warsaw771d0672000-07-11 04:58:12 +0000809 if (!(seq = PySequence_Fast(orig, ""))) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000810 if (PyErr_ExceptionMatches(PyExc_TypeError))
811 PyErr_Format(PyExc_TypeError,
812 "sequence expected, %.80s found",
813 orig->ob_type->tp_name);
814 return NULL;
815 }
Barry Warsaw771d0672000-07-11 04:58:12 +0000816 /* From here on out, errors go through finally: for proper
817 * reference count manipulations.
818 */
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000819 seqlen = PySequence_Size(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000820 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000821 item = PySequence_Fast_GET_ITEM(seq, 0);
822 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000823 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000824 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000825 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000826
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000827 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
Barry Warsaw771d0672000-07-11 04:58:12 +0000828 goto finally;
829
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000830 p = PyString_AS_STRING(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000831
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000832 for (i = 0; i < seqlen; i++) {
833 item = PySequence_Fast_GET_ITEM(seq, i);
834 if (!PyString_Check(item)){
835 if (PyUnicode_Check(item)) {
836 Py_DECREF(res);
Barry Warsaw771d0672000-07-11 04:58:12 +0000837 Py_DECREF(seq);
838 return PyUnicode_Join((PyObject *)self, seq);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000839 }
840 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000841 "sequence item %i: expected string,"
842 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000843 i, item->ob_type->tp_name);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000844 goto finally;
845 }
846 slen = PyString_GET_SIZE(item);
847 while (reslen + slen + seplen >= sz) {
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000848 /* at least double the size of the string */
849 sz_incr = slen + seplen > sz ? slen + seplen : sz;
850 if (_PyString_Resize(&res, sz + sz_incr)) {
Barry Warsawbf325832000-03-06 14:52:18 +0000851 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000852 }
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000853 sz += sz_incr;
854 p = PyString_AS_STRING(res) + reslen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000855 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000856 if (i > 0) {
857 memcpy(p, sep, seplen);
858 p += seplen;
859 reslen += seplen;
860 }
861 memcpy(p, PyString_AS_STRING(item), slen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000862 p += slen;
863 reslen += slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000864 }
865 if (_PyString_Resize(&res, reslen))
866 goto finally;
Jeremy Hylton49048292000-07-11 03:28:17 +0000867 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000868 return res;
869
870 finally:
Jeremy Hylton49048292000-07-11 03:28:17 +0000871 Py_DECREF(seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000872 Py_XDECREF(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000873 return NULL;
874}
875
876
877
878static long
Fred Drakeba096332000-07-09 07:04:36 +0000879string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000880{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000881 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000882 int len = PyString_GET_SIZE(self);
883 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000884 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000885
Guido van Rossumc6821402000-05-08 14:08:05 +0000886 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
887 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000888 return -2;
889 if (PyString_Check(subobj)) {
890 sub = PyString_AS_STRING(subobj);
891 n = PyString_GET_SIZE(subobj);
892 }
893 else if (PyUnicode_Check(subobj))
894 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
895 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000896 return -2;
897
898 if (last > len)
899 last = len;
900 if (last < 0)
901 last += len;
902 if (last < 0)
903 last = 0;
904 if (i < 0)
905 i += len;
906 if (i < 0)
907 i = 0;
908
Guido van Rossum4c08d552000-03-10 22:55:18 +0000909 if (dir > 0) {
910 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000911 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000912 last -= n;
913 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000914 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000915 return (long)i;
916 }
917 else {
918 int j;
919
920 if (n == 0 && i <= last)
921 return (long)last;
922 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000923 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000924 return (long)j;
925 }
926
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000927 return -1;
928}
929
930
931static char find__doc__[] =
932"S.find(sub [,start [,end]]) -> int\n\
933\n\
934Return the lowest index in S where substring sub is found,\n\
935such that sub is contained within s[start,end]. Optional\n\
936arguments start and end are interpreted as in slice notation.\n\
937\n\
938Return -1 on failure.";
939
940static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000941string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000942{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000943 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000944 if (result == -2)
945 return NULL;
946 return PyInt_FromLong(result);
947}
948
949
950static char index__doc__[] =
951"S.index(sub [,start [,end]]) -> int\n\
952\n\
953Like S.find() but raise ValueError when the substring is not found.";
954
955static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000956string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000957{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000958 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000959 if (result == -2)
960 return NULL;
961 if (result == -1) {
962 PyErr_SetString(PyExc_ValueError,
963 "substring not found in string.index");
964 return NULL;
965 }
966 return PyInt_FromLong(result);
967}
968
969
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000970static char rfind__doc__[] =
971"S.rfind(sub [,start [,end]]) -> int\n\
972\n\
973Return the highest index in S where substring sub is found,\n\
974such that sub is contained within s[start,end]. Optional\n\
975arguments start and end are interpreted as in slice notation.\n\
976\n\
977Return -1 on failure.";
978
979static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000980string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000981{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000982 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000983 if (result == -2)
984 return NULL;
985 return PyInt_FromLong(result);
986}
987
988
989static char rindex__doc__[] =
990"S.rindex(sub [,start [,end]]) -> int\n\
991\n\
992Like S.rfind() but raise ValueError when the substring is not found.";
993
994static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000995string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000996{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000997 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000998 if (result == -2)
999 return NULL;
1000 if (result == -1) {
1001 PyErr_SetString(PyExc_ValueError,
1002 "substring not found in string.rindex");
1003 return NULL;
1004 }
1005 return PyInt_FromLong(result);
1006}
1007
1008
1009static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001010do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001011{
1012 char *s = PyString_AS_STRING(self);
1013 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001014
Guido van Rossum43713e52000-02-29 13:59:29 +00001015 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001016 return NULL;
1017
1018 i = 0;
1019 if (striptype != RIGHTSTRIP) {
1020 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1021 i++;
1022 }
1023 }
1024
1025 j = len;
1026 if (striptype != LEFTSTRIP) {
1027 do {
1028 j--;
1029 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1030 j++;
1031 }
1032
1033 if (i == 0 && j == len) {
1034 Py_INCREF(self);
1035 return (PyObject*)self;
1036 }
1037 else
1038 return PyString_FromStringAndSize(s+i, j-i);
1039}
1040
1041
1042static char strip__doc__[] =
1043"S.strip() -> string\n\
1044\n\
1045Return a copy of the string S with leading and trailing\n\
1046whitespace removed.";
1047
1048static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001049string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001050{
1051 return do_strip(self, args, BOTHSTRIP);
1052}
1053
1054
1055static char lstrip__doc__[] =
1056"S.lstrip() -> string\n\
1057\n\
1058Return a copy of the string S with leading whitespace removed.";
1059
1060static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001061string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001062{
1063 return do_strip(self, args, LEFTSTRIP);
1064}
1065
1066
1067static char rstrip__doc__[] =
1068"S.rstrip() -> string\n\
1069\n\
1070Return a copy of the string S with trailing whitespace removed.";
1071
1072static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001073string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001074{
1075 return do_strip(self, args, RIGHTSTRIP);
1076}
1077
1078
1079static char lower__doc__[] =
1080"S.lower() -> string\n\
1081\n\
1082Return a copy of the string S converted to lowercase.";
1083
1084static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001085string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001086{
1087 char *s = PyString_AS_STRING(self), *s_new;
1088 int i, n = PyString_GET_SIZE(self);
1089 PyObject *new;
1090
Guido van Rossum43713e52000-02-29 13:59:29 +00001091 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001092 return NULL;
1093 new = PyString_FromStringAndSize(NULL, n);
1094 if (new == NULL)
1095 return NULL;
1096 s_new = PyString_AsString(new);
1097 for (i = 0; i < n; i++) {
1098 int c = Py_CHARMASK(*s++);
1099 if (isupper(c)) {
1100 *s_new = tolower(c);
1101 } else
1102 *s_new = c;
1103 s_new++;
1104 }
1105 return new;
1106}
1107
1108
1109static char upper__doc__[] =
1110"S.upper() -> string\n\
1111\n\
1112Return a copy of the string S converted to uppercase.";
1113
1114static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001115string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001116{
1117 char *s = PyString_AS_STRING(self), *s_new;
1118 int i, n = PyString_GET_SIZE(self);
1119 PyObject *new;
1120
Guido van Rossum43713e52000-02-29 13:59:29 +00001121 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001122 return NULL;
1123 new = PyString_FromStringAndSize(NULL, n);
1124 if (new == NULL)
1125 return NULL;
1126 s_new = PyString_AsString(new);
1127 for (i = 0; i < n; i++) {
1128 int c = Py_CHARMASK(*s++);
1129 if (islower(c)) {
1130 *s_new = toupper(c);
1131 } else
1132 *s_new = c;
1133 s_new++;
1134 }
1135 return new;
1136}
1137
1138
Guido van Rossum4c08d552000-03-10 22:55:18 +00001139static char title__doc__[] =
1140"S.title() -> string\n\
1141\n\
1142Return a titlecased version of S, i.e. words start with uppercase\n\
1143characters, all remaining cased characters have lowercase.";
1144
1145static PyObject*
1146string_title(PyUnicodeObject *self, PyObject *args)
1147{
1148 char *s = PyString_AS_STRING(self), *s_new;
1149 int i, n = PyString_GET_SIZE(self);
1150 int previous_is_cased = 0;
1151 PyObject *new;
1152
1153 if (!PyArg_ParseTuple(args, ":title"))
1154 return NULL;
1155 new = PyString_FromStringAndSize(NULL, n);
1156 if (new == NULL)
1157 return NULL;
1158 s_new = PyString_AsString(new);
1159 for (i = 0; i < n; i++) {
1160 int c = Py_CHARMASK(*s++);
1161 if (islower(c)) {
1162 if (!previous_is_cased)
1163 c = toupper(c);
1164 previous_is_cased = 1;
1165 } else if (isupper(c)) {
1166 if (previous_is_cased)
1167 c = tolower(c);
1168 previous_is_cased = 1;
1169 } else
1170 previous_is_cased = 0;
1171 *s_new++ = c;
1172 }
1173 return new;
1174}
1175
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001176static char capitalize__doc__[] =
1177"S.capitalize() -> string\n\
1178\n\
1179Return a copy of the string S with only its first character\n\
1180capitalized.";
1181
1182static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001183string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001184{
1185 char *s = PyString_AS_STRING(self), *s_new;
1186 int i, n = PyString_GET_SIZE(self);
1187 PyObject *new;
1188
Guido van Rossum43713e52000-02-29 13:59:29 +00001189 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001190 return NULL;
1191 new = PyString_FromStringAndSize(NULL, n);
1192 if (new == NULL)
1193 return NULL;
1194 s_new = PyString_AsString(new);
1195 if (0 < n) {
1196 int c = Py_CHARMASK(*s++);
1197 if (islower(c))
1198 *s_new = toupper(c);
1199 else
1200 *s_new = c;
1201 s_new++;
1202 }
1203 for (i = 1; i < n; i++) {
1204 int c = Py_CHARMASK(*s++);
1205 if (isupper(c))
1206 *s_new = tolower(c);
1207 else
1208 *s_new = c;
1209 s_new++;
1210 }
1211 return new;
1212}
1213
1214
1215static char count__doc__[] =
1216"S.count(sub[, start[, end]]) -> int\n\
1217\n\
1218Return the number of occurrences of substring sub in string\n\
1219S[start:end]. Optional arguments start and end are\n\
1220interpreted as in slice notation.";
1221
1222static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001223string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001224{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001225 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001226 int len = PyString_GET_SIZE(self), n;
1227 int i = 0, last = INT_MAX;
1228 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001229 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001230
Guido van Rossumc6821402000-05-08 14:08:05 +00001231 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1232 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001233 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001234
Guido van Rossum4c08d552000-03-10 22:55:18 +00001235 if (PyString_Check(subobj)) {
1236 sub = PyString_AS_STRING(subobj);
1237 n = PyString_GET_SIZE(subobj);
1238 }
1239 else if (PyUnicode_Check(subobj))
1240 return PyInt_FromLong(
1241 PyUnicode_Count((PyObject *)self, subobj, i, last));
1242 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1243 return NULL;
1244
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001245 if (last > len)
1246 last = len;
1247 if (last < 0)
1248 last += len;
1249 if (last < 0)
1250 last = 0;
1251 if (i < 0)
1252 i += len;
1253 if (i < 0)
1254 i = 0;
1255 m = last + 1 - n;
1256 if (n == 0)
1257 return PyInt_FromLong((long) (m-i));
1258
1259 r = 0;
1260 while (i < m) {
1261 if (!memcmp(s+i, sub, n)) {
1262 r++;
1263 i += n;
1264 } else {
1265 i++;
1266 }
1267 }
1268 return PyInt_FromLong((long) r);
1269}
1270
1271
1272static char swapcase__doc__[] =
1273"S.swapcase() -> string\n\
1274\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001275Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001276converted to lowercase and vice versa.";
1277
1278static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001279string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001280{
1281 char *s = PyString_AS_STRING(self), *s_new;
1282 int i, n = PyString_GET_SIZE(self);
1283 PyObject *new;
1284
Guido van Rossum43713e52000-02-29 13:59:29 +00001285 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001286 return NULL;
1287 new = PyString_FromStringAndSize(NULL, n);
1288 if (new == NULL)
1289 return NULL;
1290 s_new = PyString_AsString(new);
1291 for (i = 0; i < n; i++) {
1292 int c = Py_CHARMASK(*s++);
1293 if (islower(c)) {
1294 *s_new = toupper(c);
1295 }
1296 else if (isupper(c)) {
1297 *s_new = tolower(c);
1298 }
1299 else
1300 *s_new = c;
1301 s_new++;
1302 }
1303 return new;
1304}
1305
1306
1307static char translate__doc__[] =
1308"S.translate(table [,deletechars]) -> string\n\
1309\n\
1310Return a copy of the string S, where all characters occurring\n\
1311in the optional argument deletechars are removed, and the\n\
1312remaining characters have been mapped through the given\n\
1313translation table, which must be a string of length 256.";
1314
1315static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001316string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001317{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001318 register char *input, *output;
1319 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001320 register int i, c, changed = 0;
1321 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001322 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001323 int inlen, tablen, dellen = 0;
1324 PyObject *result;
1325 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001326 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001327
Guido van Rossum4c08d552000-03-10 22:55:18 +00001328 if (!PyArg_ParseTuple(args, "O|O:translate",
1329 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001331
1332 if (PyString_Check(tableobj)) {
1333 table1 = PyString_AS_STRING(tableobj);
1334 tablen = PyString_GET_SIZE(tableobj);
1335 }
1336 else if (PyUnicode_Check(tableobj)) {
1337 /* Unicode .translate() does not support the deletechars
1338 parameter; instead a mapping to None will cause characters
1339 to be deleted. */
1340 if (delobj != NULL) {
1341 PyErr_SetString(PyExc_TypeError,
1342 "deletions are implemented differently for unicode");
1343 return NULL;
1344 }
1345 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1346 }
1347 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001348 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349
1350 if (delobj != NULL) {
1351 if (PyString_Check(delobj)) {
1352 del_table = PyString_AS_STRING(delobj);
1353 dellen = PyString_GET_SIZE(delobj);
1354 }
1355 else if (PyUnicode_Check(delobj)) {
1356 PyErr_SetString(PyExc_TypeError,
1357 "deletions are implemented differently for unicode");
1358 return NULL;
1359 }
1360 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1361 return NULL;
1362
1363 if (tablen != 256) {
1364 PyErr_SetString(PyExc_ValueError,
1365 "translation table must be 256 characters long");
1366 return NULL;
1367 }
1368 }
1369 else {
1370 del_table = NULL;
1371 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001372 }
1373
1374 table = table1;
1375 inlen = PyString_Size(input_obj);
1376 result = PyString_FromStringAndSize((char *)NULL, inlen);
1377 if (result == NULL)
1378 return NULL;
1379 output_start = output = PyString_AsString(result);
1380 input = PyString_AsString(input_obj);
1381
1382 if (dellen == 0) {
1383 /* If no deletions are required, use faster code */
1384 for (i = inlen; --i >= 0; ) {
1385 c = Py_CHARMASK(*input++);
1386 if (Py_CHARMASK((*output++ = table[c])) != c)
1387 changed = 1;
1388 }
1389 if (changed)
1390 return result;
1391 Py_DECREF(result);
1392 Py_INCREF(input_obj);
1393 return input_obj;
1394 }
1395
1396 for (i = 0; i < 256; i++)
1397 trans_table[i] = Py_CHARMASK(table[i]);
1398
1399 for (i = 0; i < dellen; i++)
1400 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1401
1402 for (i = inlen; --i >= 0; ) {
1403 c = Py_CHARMASK(*input++);
1404 if (trans_table[c] != -1)
1405 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1406 continue;
1407 changed = 1;
1408 }
1409 if (!changed) {
1410 Py_DECREF(result);
1411 Py_INCREF(input_obj);
1412 return input_obj;
1413 }
1414 /* Fix the size of the resulting string */
1415 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1416 return NULL;
1417 return result;
1418}
1419
1420
1421/* What follows is used for implementing replace(). Perry Stoll. */
1422
1423/*
1424 mymemfind
1425
1426 strstr replacement for arbitrary blocks of memory.
1427
Barry Warsaw51ac5802000-03-20 16:36:48 +00001428 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001429 contents of memory pointed to by PAT. Returns the index into MEM if
1430 found, or -1 if not found. If len of PAT is greater than length of
1431 MEM, the function returns -1.
1432*/
1433static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001434mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435{
1436 register int ii;
1437
1438 /* pattern can not occur in the last pat_len-1 chars */
1439 len -= pat_len;
1440
1441 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001442 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001443 return ii;
1444 }
1445 }
1446 return -1;
1447}
1448
1449/*
1450 mymemcnt
1451
1452 Return the number of distinct times PAT is found in MEM.
1453 meaning mem=1111 and pat==11 returns 2.
1454 mem=11111 and pat==11 also return 2.
1455 */
1456static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001457mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458{
1459 register int offset = 0;
1460 int nfound = 0;
1461
1462 while (len >= 0) {
1463 offset = mymemfind(mem, len, pat, pat_len);
1464 if (offset == -1)
1465 break;
1466 mem += offset + pat_len;
1467 len -= offset + pat_len;
1468 nfound++;
1469 }
1470 return nfound;
1471}
1472
1473/*
1474 mymemreplace
1475
Thomas Wouters7e474022000-07-16 12:04:32 +00001476 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001477 replaced with SUB.
1478
Thomas Wouters7e474022000-07-16 12:04:32 +00001479 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001480 of PAT in STR, then the original string is returned. Otherwise, a new
1481 string is allocated here and returned.
1482
1483 on return, out_len is:
1484 the length of output string, or
1485 -1 if the input string is returned, or
1486 unchanged if an error occurs (no memory).
1487
1488 return value is:
1489 the new string allocated locally, or
1490 NULL if an error occurred.
1491*/
1492static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001493mymemreplace(const char *str, int len, /* input string */
1494 const char *pat, int pat_len, /* pattern string to find */
1495 const char *sub, int sub_len, /* substitution string */
1496 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001497 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498{
1499 char *out_s;
1500 char *new_s;
1501 int nfound, offset, new_len;
1502
1503 if (len == 0 || pat_len > len)
1504 goto return_same;
1505
1506 /* find length of output string */
1507 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001508 if (count < 0)
1509 count = INT_MAX;
1510 else if (nfound > count)
1511 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001512 if (nfound == 0)
1513 goto return_same;
1514 new_len = len + nfound*(sub_len - pat_len);
1515
Guido van Rossumb18618d2000-05-03 23:44:39 +00001516 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001517 if (new_s == NULL) return NULL;
1518
1519 *out_len = new_len;
1520 out_s = new_s;
1521
1522 while (len > 0) {
1523 /* find index of next instance of pattern */
1524 offset = mymemfind(str, len, pat, pat_len);
1525 /* if not found, break out of loop */
1526 if (offset == -1) break;
1527
1528 /* copy non matching part of input string */
1529 memcpy(new_s, str, offset); /* copy part of str before pat */
1530 str += offset + pat_len; /* move str past pattern */
1531 len -= offset + pat_len; /* reduce length of str remaining */
1532
1533 /* copy substitute into the output string */
1534 new_s += offset; /* move new_s to dest for sub string */
1535 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1536 new_s += sub_len; /* offset new_s past sub string */
1537
1538 /* break when we've done count replacements */
1539 if (--count == 0) break;
1540 }
1541 /* copy any remaining values into output string */
1542 if (len > 0)
1543 memcpy(new_s, str, len);
1544 return out_s;
1545
1546 return_same:
1547 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001548 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001549}
1550
1551
1552static char replace__doc__[] =
1553"S.replace (old, new[, maxsplit]) -> string\n\
1554\n\
1555Return a copy of string S with all occurrences of substring\n\
1556old replaced by new. If the optional argument maxsplit is\n\
1557given, only the first maxsplit occurrences are replaced.";
1558
1559static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001560string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001561{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001562 const char *str = PyString_AS_STRING(self), *sub, *repl;
1563 char *new_s;
1564 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1565 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001566 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001567 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568
Guido van Rossum4c08d552000-03-10 22:55:18 +00001569 if (!PyArg_ParseTuple(args, "OO|i:replace",
1570 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001571 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001572
1573 if (PyString_Check(subobj)) {
1574 sub = PyString_AS_STRING(subobj);
1575 sub_len = PyString_GET_SIZE(subobj);
1576 }
1577 else if (PyUnicode_Check(subobj))
1578 return PyUnicode_Replace((PyObject *)self,
1579 subobj, replobj, count);
1580 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1581 return NULL;
1582
1583 if (PyString_Check(replobj)) {
1584 repl = PyString_AS_STRING(replobj);
1585 repl_len = PyString_GET_SIZE(replobj);
1586 }
1587 else if (PyUnicode_Check(replobj))
1588 return PyUnicode_Replace((PyObject *)self,
1589 subobj, replobj, count);
1590 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1591 return NULL;
1592
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001593 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001594 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595 return NULL;
1596 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001597 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598 if (new_s == NULL) {
1599 PyErr_NoMemory();
1600 return NULL;
1601 }
1602 if (out_len == -1) {
1603 /* we're returning another reference to self */
1604 new = (PyObject*)self;
1605 Py_INCREF(new);
1606 }
1607 else {
1608 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001609 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001610 }
1611 return new;
1612}
1613
1614
1615static char startswith__doc__[] =
1616"S.startswith(prefix[, start[, end]]) -> int\n\
1617\n\
1618Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1619optional start, test S beginning at that position. With optional end, stop\n\
1620comparing S at that position.";
1621
1622static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001623string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001625 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001627 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628 int plen;
1629 int start = 0;
1630 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001631 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632
Guido van Rossumc6821402000-05-08 14:08:05 +00001633 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1634 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001635 return NULL;
1636 if (PyString_Check(subobj)) {
1637 prefix = PyString_AS_STRING(subobj);
1638 plen = PyString_GET_SIZE(subobj);
1639 }
1640 else if (PyUnicode_Check(subobj))
1641 return PyInt_FromLong(
1642 PyUnicode_Tailmatch((PyObject *)self,
1643 subobj, start, end, -1));
1644 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001645 return NULL;
1646
1647 /* adopt Java semantics for index out of range. it is legal for
1648 * offset to be == plen, but this only returns true if prefix is
1649 * the empty string.
1650 */
1651 if (start < 0 || start+plen > len)
1652 return PyInt_FromLong(0);
1653
1654 if (!memcmp(str+start, prefix, plen)) {
1655 /* did the match end after the specified end? */
1656 if (end < 0)
1657 return PyInt_FromLong(1);
1658 else if (end - start < plen)
1659 return PyInt_FromLong(0);
1660 else
1661 return PyInt_FromLong(1);
1662 }
1663 else return PyInt_FromLong(0);
1664}
1665
1666
1667static char endswith__doc__[] =
1668"S.endswith(suffix[, start[, end]]) -> int\n\
1669\n\
1670Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1671optional start, test S beginning at that position. With optional end, stop\n\
1672comparing S at that position.";
1673
1674static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001675string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001676{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001677 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001678 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001679 const char* suffix;
1680 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001681 int start = 0;
1682 int end = -1;
1683 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001684 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001685
Guido van Rossumc6821402000-05-08 14:08:05 +00001686 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1687 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001688 return NULL;
1689 if (PyString_Check(subobj)) {
1690 suffix = PyString_AS_STRING(subobj);
1691 slen = PyString_GET_SIZE(subobj);
1692 }
1693 else if (PyUnicode_Check(subobj))
1694 return PyInt_FromLong(
1695 PyUnicode_Tailmatch((PyObject *)self,
1696 subobj, start, end, +1));
1697 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001698 return NULL;
1699
Guido van Rossum4c08d552000-03-10 22:55:18 +00001700 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001701 return PyInt_FromLong(0);
1702
1703 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001704 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705
Guido van Rossum4c08d552000-03-10 22:55:18 +00001706 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707 return PyInt_FromLong(1);
1708 else return PyInt_FromLong(0);
1709}
1710
1711
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001712static char encode__doc__[] =
1713"S.encode([encoding[,errors]]) -> string\n\
1714\n\
1715Return an encoded string version of S. Default encoding is the current\n\
1716default string encoding. errors may be given to set a different error\n\
1717handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1718a ValueError. Other possible values are 'ignore' and 'replace'.";
1719
1720static PyObject *
1721string_encode(PyStringObject *self, PyObject *args)
1722{
1723 char *encoding = NULL;
1724 char *errors = NULL;
1725 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1726 return NULL;
1727 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1728}
1729
1730
Guido van Rossum4c08d552000-03-10 22:55:18 +00001731static char expandtabs__doc__[] =
1732"S.expandtabs([tabsize]) -> string\n\
1733\n\
1734Return a copy of S where all tab characters are expanded using spaces.\n\
1735If tabsize is not given, a tab size of 8 characters is assumed.";
1736
1737static PyObject*
1738string_expandtabs(PyStringObject *self, PyObject *args)
1739{
1740 const char *e, *p;
1741 char *q;
1742 int i, j;
1743 PyObject *u;
1744 int tabsize = 8;
1745
1746 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1747 return NULL;
1748
Thomas Wouters7e474022000-07-16 12:04:32 +00001749 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001750 i = j = 0;
1751 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1752 for (p = PyString_AS_STRING(self); p < e; p++)
1753 if (*p == '\t') {
1754 if (tabsize > 0)
1755 j += tabsize - (j % tabsize);
1756 }
1757 else {
1758 j++;
1759 if (*p == '\n' || *p == '\r') {
1760 i += j;
1761 j = 0;
1762 }
1763 }
1764
1765 /* Second pass: create output string and fill it */
1766 u = PyString_FromStringAndSize(NULL, i + j);
1767 if (!u)
1768 return NULL;
1769
1770 j = 0;
1771 q = PyString_AS_STRING(u);
1772
1773 for (p = PyString_AS_STRING(self); p < e; p++)
1774 if (*p == '\t') {
1775 if (tabsize > 0) {
1776 i = tabsize - (j % tabsize);
1777 j += i;
1778 while (i--)
1779 *q++ = ' ';
1780 }
1781 }
1782 else {
1783 j++;
1784 *q++ = *p;
1785 if (*p == '\n' || *p == '\r')
1786 j = 0;
1787 }
1788
1789 return u;
1790}
1791
1792static
1793PyObject *pad(PyStringObject *self,
1794 int left,
1795 int right,
1796 char fill)
1797{
1798 PyObject *u;
1799
1800 if (left < 0)
1801 left = 0;
1802 if (right < 0)
1803 right = 0;
1804
1805 if (left == 0 && right == 0) {
1806 Py_INCREF(self);
1807 return (PyObject *)self;
1808 }
1809
1810 u = PyString_FromStringAndSize(NULL,
1811 left + PyString_GET_SIZE(self) + right);
1812 if (u) {
1813 if (left)
1814 memset(PyString_AS_STRING(u), fill, left);
1815 memcpy(PyString_AS_STRING(u) + left,
1816 PyString_AS_STRING(self),
1817 PyString_GET_SIZE(self));
1818 if (right)
1819 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1820 fill, right);
1821 }
1822
1823 return u;
1824}
1825
1826static char ljust__doc__[] =
1827"S.ljust(width) -> string\n\
1828\n\
1829Return S left justified in a string of length width. Padding is\n\
1830done using spaces.";
1831
1832static PyObject *
1833string_ljust(PyStringObject *self, PyObject *args)
1834{
1835 int width;
1836 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1837 return NULL;
1838
1839 if (PyString_GET_SIZE(self) >= width) {
1840 Py_INCREF(self);
1841 return (PyObject*) self;
1842 }
1843
1844 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1845}
1846
1847
1848static char rjust__doc__[] =
1849"S.rjust(width) -> string\n\
1850\n\
1851Return S right justified in a string of length width. Padding is\n\
1852done using spaces.";
1853
1854static PyObject *
1855string_rjust(PyStringObject *self, PyObject *args)
1856{
1857 int width;
1858 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1859 return NULL;
1860
1861 if (PyString_GET_SIZE(self) >= width) {
1862 Py_INCREF(self);
1863 return (PyObject*) self;
1864 }
1865
1866 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1867}
1868
1869
1870static char center__doc__[] =
1871"S.center(width) -> string\n\
1872\n\
1873Return S centered in a string of length width. Padding is done\n\
1874using spaces.";
1875
1876static PyObject *
1877string_center(PyStringObject *self, PyObject *args)
1878{
1879 int marg, left;
1880 int width;
1881
1882 if (!PyArg_ParseTuple(args, "i:center", &width))
1883 return NULL;
1884
1885 if (PyString_GET_SIZE(self) >= width) {
1886 Py_INCREF(self);
1887 return (PyObject*) self;
1888 }
1889
1890 marg = width - PyString_GET_SIZE(self);
1891 left = marg / 2 + (marg & width & 1);
1892
1893 return pad(self, left, marg - left, ' ');
1894}
1895
1896#if 0
1897static char zfill__doc__[] =
1898"S.zfill(width) -> string\n\
1899\n\
1900Pad a numeric string x with zeros on the left, to fill a field\n\
1901of the specified width. The string x is never truncated.";
1902
1903static PyObject *
1904string_zfill(PyStringObject *self, PyObject *args)
1905{
1906 int fill;
1907 PyObject *u;
1908 char *str;
1909
1910 int width;
1911 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1912 return NULL;
1913
1914 if (PyString_GET_SIZE(self) >= width) {
1915 Py_INCREF(self);
1916 return (PyObject*) self;
1917 }
1918
1919 fill = width - PyString_GET_SIZE(self);
1920
1921 u = pad(self, fill, 0, '0');
1922 if (u == NULL)
1923 return NULL;
1924
1925 str = PyString_AS_STRING(u);
1926 if (str[fill] == '+' || str[fill] == '-') {
1927 /* move sign to beginning of string */
1928 str[0] = str[fill];
1929 str[fill] = '0';
1930 }
1931
1932 return u;
1933}
1934#endif
1935
1936static char isspace__doc__[] =
1937"S.isspace() -> int\n\
1938\n\
1939Return 1 if there are only whitespace characters in S,\n\
19400 otherwise.";
1941
1942static PyObject*
1943string_isspace(PyStringObject *self, PyObject *args)
1944{
Fred Drakeba096332000-07-09 07:04:36 +00001945 register const unsigned char *p
1946 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001947 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001948
1949 if (!PyArg_NoArgs(args))
1950 return NULL;
1951
1952 /* Shortcut for single character strings */
1953 if (PyString_GET_SIZE(self) == 1 &&
1954 isspace(*p))
1955 return PyInt_FromLong(1);
1956
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001957 /* Special case for empty strings */
1958 if (PyString_GET_SIZE(self) == 0)
1959 return PyInt_FromLong(0);
1960
Guido van Rossum4c08d552000-03-10 22:55:18 +00001961 e = p + PyString_GET_SIZE(self);
1962 for (; p < e; p++) {
1963 if (!isspace(*p))
1964 return PyInt_FromLong(0);
1965 }
1966 return PyInt_FromLong(1);
1967}
1968
1969
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001970static char isalpha__doc__[] =
1971"S.isalpha() -> int\n\
1972\n\
1973Return 1 if all characters in S are alphabetic\n\
1974and there is at least one character in S, 0 otherwise.";
1975
1976static PyObject*
1977string_isalpha(PyUnicodeObject *self, PyObject *args)
1978{
Fred Drakeba096332000-07-09 07:04:36 +00001979 register const unsigned char *p
1980 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001981 register const unsigned char *e;
1982
1983 if (!PyArg_NoArgs(args))
1984 return NULL;
1985
1986 /* Shortcut for single character strings */
1987 if (PyString_GET_SIZE(self) == 1 &&
1988 isalpha(*p))
1989 return PyInt_FromLong(1);
1990
1991 /* Special case for empty strings */
1992 if (PyString_GET_SIZE(self) == 0)
1993 return PyInt_FromLong(0);
1994
1995 e = p + PyString_GET_SIZE(self);
1996 for (; p < e; p++) {
1997 if (!isalpha(*p))
1998 return PyInt_FromLong(0);
1999 }
2000 return PyInt_FromLong(1);
2001}
2002
2003
2004static char isalnum__doc__[] =
2005"S.isalnum() -> int\n\
2006\n\
2007Return 1 if all characters in S are alphanumeric\n\
2008and there is at least one character in S, 0 otherwise.";
2009
2010static PyObject*
2011string_isalnum(PyUnicodeObject *self, PyObject *args)
2012{
Fred Drakeba096332000-07-09 07:04:36 +00002013 register const unsigned char *p
2014 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002015 register const unsigned char *e;
2016
2017 if (!PyArg_NoArgs(args))
2018 return NULL;
2019
2020 /* Shortcut for single character strings */
2021 if (PyString_GET_SIZE(self) == 1 &&
2022 isalnum(*p))
2023 return PyInt_FromLong(1);
2024
2025 /* Special case for empty strings */
2026 if (PyString_GET_SIZE(self) == 0)
2027 return PyInt_FromLong(0);
2028
2029 e = p + PyString_GET_SIZE(self);
2030 for (; p < e; p++) {
2031 if (!isalnum(*p))
2032 return PyInt_FromLong(0);
2033 }
2034 return PyInt_FromLong(1);
2035}
2036
2037
Guido van Rossum4c08d552000-03-10 22:55:18 +00002038static char isdigit__doc__[] =
2039"S.isdigit() -> int\n\
2040\n\
2041Return 1 if there are only digit characters in S,\n\
20420 otherwise.";
2043
2044static PyObject*
2045string_isdigit(PyStringObject *self, PyObject *args)
2046{
Fred Drakeba096332000-07-09 07:04:36 +00002047 register const unsigned char *p
2048 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002049 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002050
2051 if (!PyArg_NoArgs(args))
2052 return NULL;
2053
2054 /* Shortcut for single character strings */
2055 if (PyString_GET_SIZE(self) == 1 &&
2056 isdigit(*p))
2057 return PyInt_FromLong(1);
2058
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002059 /* Special case for empty strings */
2060 if (PyString_GET_SIZE(self) == 0)
2061 return PyInt_FromLong(0);
2062
Guido van Rossum4c08d552000-03-10 22:55:18 +00002063 e = p + PyString_GET_SIZE(self);
2064 for (; p < e; p++) {
2065 if (!isdigit(*p))
2066 return PyInt_FromLong(0);
2067 }
2068 return PyInt_FromLong(1);
2069}
2070
2071
2072static char islower__doc__[] =
2073"S.islower() -> int\n\
2074\n\
2075Return 1 if all cased characters in S are lowercase and there is\n\
2076at least one cased character in S, 0 otherwise.";
2077
2078static PyObject*
2079string_islower(PyStringObject *self, PyObject *args)
2080{
Fred Drakeba096332000-07-09 07:04:36 +00002081 register const unsigned char *p
2082 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002083 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002084 int cased;
2085
2086 if (!PyArg_NoArgs(args))
2087 return NULL;
2088
2089 /* Shortcut for single character strings */
2090 if (PyString_GET_SIZE(self) == 1)
2091 return PyInt_FromLong(islower(*p) != 0);
2092
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002093 /* Special case for empty strings */
2094 if (PyString_GET_SIZE(self) == 0)
2095 return PyInt_FromLong(0);
2096
Guido van Rossum4c08d552000-03-10 22:55:18 +00002097 e = p + PyString_GET_SIZE(self);
2098 cased = 0;
2099 for (; p < e; p++) {
2100 if (isupper(*p))
2101 return PyInt_FromLong(0);
2102 else if (!cased && islower(*p))
2103 cased = 1;
2104 }
2105 return PyInt_FromLong(cased);
2106}
2107
2108
2109static char isupper__doc__[] =
2110"S.isupper() -> int\n\
2111\n\
2112Return 1 if all cased characters in S are uppercase and there is\n\
2113at least one cased character in S, 0 otherwise.";
2114
2115static PyObject*
2116string_isupper(PyStringObject *self, PyObject *args)
2117{
Fred Drakeba096332000-07-09 07:04:36 +00002118 register const unsigned char *p
2119 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002120 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002121 int cased;
2122
2123 if (!PyArg_NoArgs(args))
2124 return NULL;
2125
2126 /* Shortcut for single character strings */
2127 if (PyString_GET_SIZE(self) == 1)
2128 return PyInt_FromLong(isupper(*p) != 0);
2129
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002130 /* Special case for empty strings */
2131 if (PyString_GET_SIZE(self) == 0)
2132 return PyInt_FromLong(0);
2133
Guido van Rossum4c08d552000-03-10 22:55:18 +00002134 e = p + PyString_GET_SIZE(self);
2135 cased = 0;
2136 for (; p < e; p++) {
2137 if (islower(*p))
2138 return PyInt_FromLong(0);
2139 else if (!cased && isupper(*p))
2140 cased = 1;
2141 }
2142 return PyInt_FromLong(cased);
2143}
2144
2145
2146static char istitle__doc__[] =
2147"S.istitle() -> int\n\
2148\n\
2149Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2150may only follow uncased characters and lowercase characters only cased\n\
2151ones. Return 0 otherwise.";
2152
2153static PyObject*
2154string_istitle(PyStringObject *self, PyObject *args)
2155{
Fred Drakeba096332000-07-09 07:04:36 +00002156 register const unsigned char *p
2157 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002158 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002159 int cased, previous_is_cased;
2160
2161 if (!PyArg_NoArgs(args))
2162 return NULL;
2163
2164 /* Shortcut for single character strings */
2165 if (PyString_GET_SIZE(self) == 1)
2166 return PyInt_FromLong(isupper(*p) != 0);
2167
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002168 /* Special case for empty strings */
2169 if (PyString_GET_SIZE(self) == 0)
2170 return PyInt_FromLong(0);
2171
Guido van Rossum4c08d552000-03-10 22:55:18 +00002172 e = p + PyString_GET_SIZE(self);
2173 cased = 0;
2174 previous_is_cased = 0;
2175 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002176 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002177
2178 if (isupper(ch)) {
2179 if (previous_is_cased)
2180 return PyInt_FromLong(0);
2181 previous_is_cased = 1;
2182 cased = 1;
2183 }
2184 else if (islower(ch)) {
2185 if (!previous_is_cased)
2186 return PyInt_FromLong(0);
2187 previous_is_cased = 1;
2188 cased = 1;
2189 }
2190 else
2191 previous_is_cased = 0;
2192 }
2193 return PyInt_FromLong(cased);
2194}
2195
2196
2197static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002198"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002199\n\
2200Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002201Line breaks are not included in the resulting list unless keepends\n\
2202is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203
2204#define SPLIT_APPEND(data, left, right) \
2205 str = PyString_FromStringAndSize(data + left, right - left); \
2206 if (!str) \
2207 goto onError; \
2208 if (PyList_Append(list, str)) { \
2209 Py_DECREF(str); \
2210 goto onError; \
2211 } \
2212 else \
2213 Py_DECREF(str);
2214
2215static PyObject*
2216string_splitlines(PyStringObject *self, PyObject *args)
2217{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002218 register int i;
2219 register int j;
2220 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002221 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222 PyObject *list;
2223 PyObject *str;
2224 char *data;
2225
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002226 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002227 return NULL;
2228
2229 data = PyString_AS_STRING(self);
2230 len = PyString_GET_SIZE(self);
2231
Guido van Rossum4c08d552000-03-10 22:55:18 +00002232 list = PyList_New(0);
2233 if (!list)
2234 goto onError;
2235
2236 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002237 int eol;
2238
Guido van Rossum4c08d552000-03-10 22:55:18 +00002239 /* Find a line and append it */
2240 while (i < len && data[i] != '\n' && data[i] != '\r')
2241 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002242
2243 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002244 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002245 if (i < len) {
2246 if (data[i] == '\r' && i + 1 < len &&
2247 data[i+1] == '\n')
2248 i += 2;
2249 else
2250 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002251 if (keepends)
2252 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002254 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002255 j = i;
2256 }
2257 if (j < len) {
2258 SPLIT_APPEND(data, j, len);
2259 }
2260
2261 return list;
2262
2263 onError:
2264 Py_DECREF(list);
2265 return NULL;
2266}
2267
2268#undef SPLIT_APPEND
2269
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002270
2271static PyMethodDef
2272string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002273 /* Counterparts of the obsolete stropmodule functions; except
2274 string.maketrans(). */
2275 {"join", (PyCFunction)string_join, 1, join__doc__},
2276 {"split", (PyCFunction)string_split, 1, split__doc__},
2277 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2278 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2279 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2280 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2281 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2282 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2283 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002284 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2285 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002286 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2287 {"count", (PyCFunction)string_count, 1, count__doc__},
2288 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2289 {"find", (PyCFunction)string_find, 1, find__doc__},
2290 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002292 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2293 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2294 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2295 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2297 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2298 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002299 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2300 {"title", (PyCFunction)string_title, 1, title__doc__},
2301 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2302 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2303 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002304 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002305 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2306 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2307#if 0
2308 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2309#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310 {NULL, NULL} /* sentinel */
2311};
2312
2313static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002314string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002315{
2316 return Py_FindMethod(string_methods, (PyObject*)s, name);
2317}
2318
2319
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002320PyTypeObject PyString_Type = {
2321 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002322 0,
2323 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002324 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002325 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002326 (destructor)string_dealloc, /*tp_dealloc*/
2327 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002328 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002329 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002330 (cmpfunc)string_compare, /*tp_compare*/
2331 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002332 0, /*tp_as_number*/
2333 &string_as_sequence, /*tp_as_sequence*/
2334 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002335 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002336 0, /*tp_call*/
2337 0, /*tp_str*/
2338 0, /*tp_getattro*/
2339 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002340 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002341 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002342 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002343};
2344
2345void
Fred Drakeba096332000-07-09 07:04:36 +00002346PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002347{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002348 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002349 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002350 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002351 if (w == NULL || !PyString_Check(*pv)) {
2352 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002353 *pv = NULL;
2354 return;
2355 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002356 v = string_concat((PyStringObject *) *pv, w);
2357 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002358 *pv = v;
2359}
2360
Guido van Rossum013142a1994-08-30 08:19:36 +00002361void
Fred Drakeba096332000-07-09 07:04:36 +00002362PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002363{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002364 PyString_Concat(pv, w);
2365 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002366}
2367
2368
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002369/* The following function breaks the notion that strings are immutable:
2370 it changes the size of a string. We get away with this only if there
2371 is only one module referencing the object. You can also think of it
2372 as creating a new string object and destroying the old one, only
2373 more efficiently. In any case, don't use this if the string may
2374 already be known to some other part of the code... */
2375
2376int
Fred Drakeba096332000-07-09 07:04:36 +00002377_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002378{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002379 register PyObject *v;
2380 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002381 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002382 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002383 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002384 Py_DECREF(v);
2385 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002386 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002387 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002388 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002389#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002390 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002391#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002392 _Py_ForgetReference(v);
2393 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002394 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002395 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002396 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002397 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002398 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002399 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002400 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002401 _Py_NewReference(*pv);
2402 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002403 sv->ob_size = newsize;
2404 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002405 return 0;
2406}
Guido van Rossume5372401993-03-16 12:15:04 +00002407
2408/* Helpers for formatstring */
2409
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002410static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002411getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002412{
2413 int argidx = *p_argidx;
2414 if (argidx < arglen) {
2415 (*p_argidx)++;
2416 if (arglen < 0)
2417 return args;
2418 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002419 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002420 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002421 PyErr_SetString(PyExc_TypeError,
2422 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002423 return NULL;
2424}
2425
Tim Peters38fd5b62000-09-21 05:43:11 +00002426/* Format codes
2427 * F_LJUST '-'
2428 * F_SIGN '+'
2429 * F_BLANK ' '
2430 * F_ALT '#'
2431 * F_ZERO '0'
2432 */
Guido van Rossume5372401993-03-16 12:15:04 +00002433#define F_LJUST (1<<0)
2434#define F_SIGN (1<<1)
2435#define F_BLANK (1<<2)
2436#define F_ALT (1<<3)
2437#define F_ZERO (1<<4)
2438
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002439static int
Fred Drakeba096332000-07-09 07:04:36 +00002440formatfloat(char *buf, size_t buflen, int flags,
2441 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002442{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002443 /* fmt = '%#.' + `prec` + `type`
2444 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002445 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002446 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002447 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002448 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002449 if (prec < 0)
2450 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002451 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2452 type = 'g';
2453 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002454 /* worst case length calc to ensure no buffer overrun:
2455 fmt = %#.<prec>g
2456 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2457 for any double rep.)
2458 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2459 If prec=0 the effective precision is 1 (the leading digit is
2460 always given), therefore increase by one to 10+prec. */
2461 if (buflen <= (size_t)10 + (size_t)prec) {
2462 PyErr_SetString(PyExc_OverflowError,
2463 "formatted float is too long (precision too long?)");
2464 return -1;
2465 }
Guido van Rossume5372401993-03-16 12:15:04 +00002466 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002467 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002468}
2469
Tim Peters38fd5b62000-09-21 05:43:11 +00002470/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2471 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2472 * Python's regular ints.
2473 * Return value: a new PyString*, or NULL if error.
2474 * . *pbuf is set to point into it,
2475 * *plen set to the # of chars following that.
2476 * Caller must decref it when done using pbuf.
2477 * The string starting at *pbuf is of the form
2478 * "-"? ("0x" | "0X")? digit+
2479 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2480 * set in flags. The case of hex digits will be correct,
2481 * There will be at least prec digits, zero-filled on the left if
2482 * necessary to get that many.
2483 * val object to be converted
2484 * flags bitmask of format flags; only F_ALT is looked at
2485 * prec minimum number of digits; 0-fill on left if needed
2486 * type a character in [duoxX]; u acts the same as d
2487 *
2488 * CAUTION: o, x and X conversions on regular ints can never
2489 * produce a '-' sign, but can for Python's unbounded ints.
2490 */
2491PyObject*
2492_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2493 char **pbuf, int *plen)
2494{
2495 PyObject *result = NULL;
2496 char *buf;
2497 int i;
2498 int sign; /* 1 if '-', else 0 */
2499 int len; /* number of characters */
2500 int numdigits; /* len == numnondigits + numdigits */
2501 int numnondigits = 0;
2502
2503 switch (type) {
2504 case 'd':
2505 case 'u':
2506 result = val->ob_type->tp_str(val);
2507 break;
2508 case 'o':
2509 result = val->ob_type->tp_as_number->nb_oct(val);
2510 break;
2511 case 'x':
2512 case 'X':
2513 numnondigits = 2;
2514 result = val->ob_type->tp_as_number->nb_hex(val);
2515 break;
2516 default:
2517 assert(!"'type' not in [duoxX]");
2518 }
2519 if (!result)
2520 return NULL;
2521
2522 /* To modify the string in-place, there can only be one reference. */
2523 if (result->ob_refcnt != 1) {
2524 PyErr_BadInternalCall();
2525 return NULL;
2526 }
2527 buf = PyString_AsString(result);
2528 len = PyString_Size(result);
2529 if (buf[len-1] == 'L') {
2530 --len;
2531 buf[len] = '\0';
2532 }
2533 sign = buf[0] == '-';
2534 numnondigits += sign;
2535 numdigits = len - numnondigits;
2536 assert(numdigits > 0);
2537
2538 /* Get rid of base marker unless F_ALT */
2539 if ((flags & F_ALT) == 0) {
2540 /* Need to skip 0x, 0X or 0. */
2541 int skipped = 0;
2542 switch (type) {
2543 case 'o':
2544 assert(buf[sign] == '0');
2545 /* If 0 is only digit, leave it alone. */
2546 if (numdigits > 1) {
2547 skipped = 1;
2548 --numdigits;
2549 }
2550 break;
2551 case 'x':
2552 case 'X':
2553 assert(buf[sign] == '0');
2554 assert(buf[sign + 1] == 'x');
2555 skipped = 2;
2556 numnondigits -= 2;
2557 break;
2558 }
2559 if (skipped) {
2560 buf += skipped;
2561 len -= skipped;
2562 if (sign)
2563 buf[0] = '-';
2564 }
2565 assert(len == numnondigits + numdigits);
2566 assert(numdigits > 0);
2567 }
2568
2569 /* Fill with leading zeroes to meet minimum width. */
2570 if (prec > numdigits) {
2571 PyObject *r1 = PyString_FromStringAndSize(NULL,
2572 numnondigits + prec);
2573 char *b1;
2574 if (!r1) {
2575 Py_DECREF(result);
2576 return NULL;
2577 }
2578 b1 = PyString_AS_STRING(r1);
2579 for (i = 0; i < numnondigits; ++i)
2580 *b1++ = *buf++;
2581 for (i = 0; i < prec - numdigits; i++)
2582 *b1++ = '0';
2583 for (i = 0; i < numdigits; i++)
2584 *b1++ = *buf++;
2585 *b1 = '\0';
2586 Py_DECREF(result);
2587 result = r1;
2588 buf = PyString_AS_STRING(result);
2589 len = numnondigits + prec;
2590 }
2591
2592 /* Fix up case for hex conversions. */
2593 switch (type) {
2594 case 'x':
2595 /* Need to convert all upper case letters to lower case. */
2596 for (i = 0; i < len; i++)
2597 if (buf[i] >= 'A' && buf[i] <= 'F')
2598 buf[i] += 'a'-'A';
2599 break;
2600 case 'X':
2601 /* Need to convert 0x to 0X (and -0x to -0X). */
2602 if (buf[sign + 1] == 'x')
2603 buf[sign + 1] = 'X';
2604 break;
2605 }
2606 *pbuf = buf;
2607 *plen = len;
2608 return result;
2609}
2610
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002611static int
Fred Drakeba096332000-07-09 07:04:36 +00002612formatint(char *buf, size_t buflen, int flags,
2613 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002614{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002615 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002616 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2617 + 1 + 1 = 24 */
2618 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002619 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002620 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002621 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002622 if (prec < 0)
2623 prec = 1;
2624 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002625 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002626 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002627 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002628 PyErr_SetString(PyExc_OverflowError,
2629 "formatted integer is too long (precision too long?)");
2630 return -1;
2631 }
Guido van Rossume5372401993-03-16 12:15:04 +00002632 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002633 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002634}
2635
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002636static int
Fred Drakeba096332000-07-09 07:04:36 +00002637formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002638{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002639 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002640 if (PyString_Check(v)) {
2641 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002642 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002643 }
2644 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002645 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002646 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002647 }
2648 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002649 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002650}
2651
Guido van Rossum013142a1994-08-30 08:19:36 +00002652
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002653/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2654
2655 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2656 chars are formatted. XXX This is a magic number. Each formatting
2657 routine does bounds checking to ensure no overflow, but a better
2658 solution may be to malloc a buffer of appropriate size for each
2659 format. For now, the current solution is sufficient.
2660*/
2661#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002662
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002663PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002664PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002665{
2666 char *fmt, *res;
2667 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002668 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002669 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002670 PyObject *dict = NULL;
2671 if (format == NULL || !PyString_Check(format) || args == NULL) {
2672 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002673 return NULL;
2674 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002675 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002676 fmt = PyString_AsString(format);
2677 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002678 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002679 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002680 if (result == NULL)
2681 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002682 res = PyString_AsString(result);
2683 if (PyTuple_Check(args)) {
2684 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002685 argidx = 0;
2686 }
2687 else {
2688 arglen = -1;
2689 argidx = -2;
2690 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002691 if (args->ob_type->tp_as_mapping)
2692 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002693 while (--fmtcnt >= 0) {
2694 if (*fmt != '%') {
2695 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002696 rescnt = fmtcnt + 100;
2697 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002698 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002699 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002700 res = PyString_AsString(result)
2701 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002702 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002703 }
2704 *res++ = *fmt++;
2705 }
2706 else {
2707 /* Got a format specifier */
2708 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002709 int width = -1;
2710 int prec = -1;
2711 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002712 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002713 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002714 PyObject *v = NULL;
2715 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002716 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002717 int sign;
2718 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002719 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002720 char *fmt_start = fmt;
2721
Guido van Rossumda9c2711996-12-05 21:58:58 +00002722 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002723 if (*fmt == '(') {
2724 char *keystart;
2725 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002726 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002727 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002728
2729 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002730 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002731 "format requires a mapping");
2732 goto error;
2733 }
2734 ++fmt;
2735 --fmtcnt;
2736 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002737 /* Skip over balanced parentheses */
2738 while (pcount > 0 && --fmtcnt >= 0) {
2739 if (*fmt == ')')
2740 --pcount;
2741 else if (*fmt == '(')
2742 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002743 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002744 }
2745 keylen = fmt - keystart - 1;
2746 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002747 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002748 "incomplete format key");
2749 goto error;
2750 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002751 key = PyString_FromStringAndSize(keystart,
2752 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002753 if (key == NULL)
2754 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002755 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002756 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002757 args_owned = 0;
2758 }
2759 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002760 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002761 if (args == NULL) {
2762 goto error;
2763 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002764 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002765 arglen = -1;
2766 argidx = -2;
2767 }
Guido van Rossume5372401993-03-16 12:15:04 +00002768 while (--fmtcnt >= 0) {
2769 switch (c = *fmt++) {
2770 case '-': flags |= F_LJUST; continue;
2771 case '+': flags |= F_SIGN; continue;
2772 case ' ': flags |= F_BLANK; continue;
2773 case '#': flags |= F_ALT; continue;
2774 case '0': flags |= F_ZERO; continue;
2775 }
2776 break;
2777 }
2778 if (c == '*') {
2779 v = getnextarg(args, arglen, &argidx);
2780 if (v == NULL)
2781 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002782 if (!PyInt_Check(v)) {
2783 PyErr_SetString(PyExc_TypeError,
2784 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002785 goto error;
2786 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002787 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002788 if (width < 0) {
2789 flags |= F_LJUST;
2790 width = -width;
2791 }
Guido van Rossume5372401993-03-16 12:15:04 +00002792 if (--fmtcnt >= 0)
2793 c = *fmt++;
2794 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002795 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002796 width = c - '0';
2797 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002798 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002799 if (!isdigit(c))
2800 break;
2801 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002802 PyErr_SetString(
2803 PyExc_ValueError,
2804 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002805 goto error;
2806 }
2807 width = width*10 + (c - '0');
2808 }
2809 }
2810 if (c == '.') {
2811 prec = 0;
2812 if (--fmtcnt >= 0)
2813 c = *fmt++;
2814 if (c == '*') {
2815 v = getnextarg(args, arglen, &argidx);
2816 if (v == NULL)
2817 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002818 if (!PyInt_Check(v)) {
2819 PyErr_SetString(
2820 PyExc_TypeError,
2821 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002822 goto error;
2823 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002824 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002825 if (prec < 0)
2826 prec = 0;
2827 if (--fmtcnt >= 0)
2828 c = *fmt++;
2829 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002830 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002831 prec = c - '0';
2832 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002833 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002834 if (!isdigit(c))
2835 break;
2836 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002837 PyErr_SetString(
2838 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002839 "prec too big");
2840 goto error;
2841 }
2842 prec = prec*10 + (c - '0');
2843 }
2844 }
2845 } /* prec */
2846 if (fmtcnt >= 0) {
2847 if (c == 'h' || c == 'l' || c == 'L') {
2848 size = c;
2849 if (--fmtcnt >= 0)
2850 c = *fmt++;
2851 }
2852 }
2853 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002854 PyErr_SetString(PyExc_ValueError,
2855 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002856 goto error;
2857 }
2858 if (c != '%') {
2859 v = getnextarg(args, arglen, &argidx);
2860 if (v == NULL)
2861 goto error;
2862 }
2863 sign = 0;
2864 fill = ' ';
2865 switch (c) {
2866 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002867 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002868 len = 1;
2869 break;
2870 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002871 case 'r':
2872 if (PyUnicode_Check(v)) {
2873 fmt = fmt_start;
2874 goto unicode;
2875 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002876 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002877 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002878 else
2879 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002880 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002881 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002882 if (!PyString_Check(temp)) {
2883 PyErr_SetString(PyExc_TypeError,
2884 "%s argument has non-string str()");
2885 goto error;
2886 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002887 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002888 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002889 if (prec >= 0 && len > prec)
2890 len = prec;
2891 break;
2892 case 'i':
2893 case 'd':
2894 case 'u':
2895 case 'o':
2896 case 'x':
2897 case 'X':
2898 if (c == 'i')
2899 c = 'd';
Tim Peters38fd5b62000-09-21 05:43:11 +00002900 if (PyLong_Check(v) && PyLong_AsLong(v) == -1
2901 && PyErr_Occurred()) {
2902 /* Too big for a C long. */
2903 PyErr_Clear();
2904 temp = _PyString_FormatLong(v, flags,
2905 prec, c, &pbuf, &len);
2906 if (!temp)
2907 goto error;
2908 /* unbounded ints can always produce
2909 a sign character! */
2910 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002911 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002912 else {
2913 pbuf = formatbuf;
2914 len = formatint(pbuf, sizeof(formatbuf),
2915 flags, prec, c, v);
2916 if (len < 0)
2917 goto error;
2918 /* only d conversion is signed */
2919 sign = c == 'd';
2920 }
2921 if (flags & F_ZERO)
2922 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00002923 break;
2924 case 'e':
2925 case 'E':
2926 case 'f':
2927 case 'g':
2928 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002929 pbuf = formatbuf;
2930 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002931 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002932 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002933 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00002934 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00002935 fill = '0';
2936 break;
2937 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002938 pbuf = formatbuf;
2939 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002940 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002941 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002942 break;
2943 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002944 PyErr_Format(PyExc_ValueError,
2945 "unsupported format character '%c' (0x%x)",
2946 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002947 goto error;
2948 }
2949 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002950 if (*pbuf == '-' || *pbuf == '+') {
2951 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002952 len--;
2953 }
2954 else if (flags & F_SIGN)
2955 sign = '+';
2956 else if (flags & F_BLANK)
2957 sign = ' ';
2958 else
Tim Peters38fd5b62000-09-21 05:43:11 +00002959 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002960 }
2961 if (width < len)
2962 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00002963 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002964 reslen -= rescnt;
2965 rescnt = width + fmtcnt + 100;
2966 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002967 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002968 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002969 res = PyString_AsString(result)
2970 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002971 }
2972 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002973 if (fill != ' ')
2974 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002975 rescnt--;
2976 if (width > len)
2977 width--;
2978 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002979 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
2980 assert(pbuf[0] == '0');
2981 assert(pbuf[1] == c);
2982 if (fill != ' ') {
2983 *res++ = *pbuf++;
2984 *res++ = *pbuf++;
2985 }
2986 rescnt -= 2;
2987 width -= 2;
2988 if (width < 0)
2989 width = 0;
2990 len -= 2;
2991 }
2992 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002993 do {
2994 --rescnt;
2995 *res++ = fill;
2996 } while (--width > len);
2997 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002998 if (fill == ' ') {
2999 if (sign)
3000 *res++ = sign;
3001 if ((flags & F_ALT) &&
3002 (c == 'x' || c == 'X')) {
3003 assert(pbuf[0] == '0');
3004 assert(pbuf[1] == c);
3005 *res++ = *pbuf++;
3006 *res++ = *pbuf++;
3007 }
3008 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003009 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003010 res += len;
3011 rescnt -= len;
3012 while (--width >= len) {
3013 --rescnt;
3014 *res++ = ' ';
3015 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003016 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003017 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003018 "not all arguments converted");
3019 goto error;
3020 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003021 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003022 } /* '%' */
3023 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003024 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003025 PyErr_SetString(PyExc_TypeError,
3026 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003027 goto error;
3028 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003029 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003030 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003031 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003032 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003033 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003034
3035 unicode:
3036 if (args_owned) {
3037 Py_DECREF(args);
3038 args_owned = 0;
3039 }
3040 /* Fiddle args right (remove the first argidx-1 arguments) */
3041 --argidx;
3042 if (PyTuple_Check(orig_args) && argidx > 0) {
3043 PyObject *v;
3044 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3045 v = PyTuple_New(n);
3046 if (v == NULL)
3047 goto error;
3048 while (--n >= 0) {
3049 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3050 Py_INCREF(w);
3051 PyTuple_SET_ITEM(v, n, w);
3052 }
3053 args = v;
3054 } else {
3055 Py_INCREF(orig_args);
3056 args = orig_args;
3057 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003058 args_owned = 1;
3059 /* Take what we have of the result and let the Unicode formatting
3060 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003061 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003062 if (_PyString_Resize(&result, rescnt))
3063 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003064 fmtcnt = PyString_GET_SIZE(format) - \
3065 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003066 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3067 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003068 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003069 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003070 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003071 if (v == NULL)
3072 goto error;
3073 /* Paste what we have (result) to what the Unicode formatting
3074 function returned (v) and return the result (or error) */
3075 w = PyUnicode_Concat(result, v);
3076 Py_DECREF(result);
3077 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003078 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003079 return w;
Guido van Rossum90daa872000-04-10 13:47:21 +00003080
Guido van Rossume5372401993-03-16 12:15:04 +00003081 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003082 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003083 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003084 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003085 }
Guido van Rossume5372401993-03-16 12:15:04 +00003086 return NULL;
3087}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003088
3089
3090#ifdef INTERN_STRINGS
3091
Barry Warsaw4df762f2000-08-16 23:41:01 +00003092/* This dictionary will leak at PyString_Fini() time. That's acceptable
3093 * because PyString_Fini() specifically frees interned strings that are
3094 * only referenced by this dictionary. The CVS log entry for revision 2.45
3095 * says:
3096 *
3097 * Change the Fini function to only remove otherwise unreferenced
3098 * strings from the interned table. There are references in
3099 * hard-to-find static variables all over the interpreter, and it's not
3100 * worth trying to get rid of all those; but "uninterning" isn't fair
3101 * either and may cause subtle failures later -- so we have to keep them
3102 * in the interned table.
3103 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003104static PyObject *interned;
3105
3106void
Fred Drakeba096332000-07-09 07:04:36 +00003107PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003108{
3109 register PyStringObject *s = (PyStringObject *)(*p);
3110 PyObject *t;
3111 if (s == NULL || !PyString_Check(s))
3112 Py_FatalError("PyString_InternInPlace: strings only please!");
3113 if ((t = s->ob_sinterned) != NULL) {
3114 if (t == (PyObject *)s)
3115 return;
3116 Py_INCREF(t);
3117 *p = t;
3118 Py_DECREF(s);
3119 return;
3120 }
3121 if (interned == NULL) {
3122 interned = PyDict_New();
3123 if (interned == NULL)
3124 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003125 }
3126 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3127 Py_INCREF(t);
3128 *p = s->ob_sinterned = t;
3129 Py_DECREF(s);
3130 return;
3131 }
3132 t = (PyObject *)s;
3133 if (PyDict_SetItem(interned, t, t) == 0) {
3134 s->ob_sinterned = t;
3135 return;
3136 }
3137 PyErr_Clear();
3138}
3139
3140
3141PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003142PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003143{
3144 PyObject *s = PyString_FromString(cp);
3145 if (s == NULL)
3146 return NULL;
3147 PyString_InternInPlace(&s);
3148 return s;
3149}
3150
3151#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003152
3153void
Fred Drakeba096332000-07-09 07:04:36 +00003154PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003155{
3156 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003157 for (i = 0; i < UCHAR_MAX + 1; i++) {
3158 Py_XDECREF(characters[i]);
3159 characters[i] = NULL;
3160 }
3161#ifndef DONT_SHARE_SHORT_STRINGS
3162 Py_XDECREF(nullstring);
3163 nullstring = NULL;
3164#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003165#ifdef INTERN_STRINGS
3166 if (interned) {
3167 int pos, changed;
3168 PyObject *key, *value;
3169 do {
3170 changed = 0;
3171 pos = 0;
3172 while (PyDict_Next(interned, &pos, &key, &value)) {
3173 if (key->ob_refcnt == 2 && key == value) {
3174 PyDict_DelItem(interned, key);
3175 changed = 1;
3176 }
3177 }
3178 } while (changed);
3179 }
3180#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003181}