blob: afaa0541aa2a01bddd1fa8360c8a2f8f60d3c4ba [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters5b4d4772001-05-08 22:33:50 +000039 PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters5b4d4772001-05-08 22:33:50 +000076 PyString_InternInPlace(&(PyObject *)op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000078 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 } else if (size == 1 && str != NULL) {
Tim Peters5b4d4772001-05-08 22:33:50 +000080 PyString_InternInPlace(&(PyObject *)op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000084#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000086}
87
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000089PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000091 register size_t size = strlen(str);
Tim Peters5b4d4772001-05-08 22:33:50 +000092 PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000093 if (size > INT_MAX) {
94 PyErr_SetString(PyExc_OverflowError,
95 "string is too long for a Python string");
96 return NULL;
97 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000098#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000099 if (size == 0 && (op = nullstring) != NULL) {
100#ifdef COUNT_ALLOCS
101 null_strings++;
102#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000103 Py_INCREF(op);
104 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000105 }
106 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
107#ifdef COUNT_ALLOCS
108 one_strings++;
109#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 Py_INCREF(op);
111 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000113#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000114
115 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000116 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000117 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000118 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000119 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000120 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000121#ifdef CACHE_HASH
122 op->ob_shash = -1;
123#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000124#ifdef INTERN_STRINGS
125 op->ob_sinterned = NULL;
126#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000127 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000128#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000129 if (size == 0) {
Tim Peters5b4d4772001-05-08 22:33:50 +0000130 PyString_InternInPlace(&(PyObject *)op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 } else if (size == 1) {
Tim Peters5b4d4772001-05-08 22:33:50 +0000134 PyString_InternInPlace(&(PyObject *)op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000138#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000139 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140}
141
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000142PyObject *PyString_Decode(const char *s,
143 int size,
144 const char *encoding,
145 const char *errors)
146{
147 PyObject *buffer = NULL, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000148
149 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000150 encoding = PyUnicode_GetDefaultEncoding();
151
152 /* Decode via the codec registry */
153 buffer = PyBuffer_FromMemory((void *)s, size);
154 if (buffer == NULL)
155 goto onError;
156 str = PyCodec_Decode(buffer, encoding, errors);
157 if (str == NULL)
158 goto onError;
159 /* Convert Unicode to a string using the default encoding */
160 if (PyUnicode_Check(str)) {
161 PyObject *temp = str;
162 str = PyUnicode_AsEncodedString(str, NULL, NULL);
163 Py_DECREF(temp);
164 if (str == NULL)
165 goto onError;
166 }
167 if (!PyString_Check(str)) {
168 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000169 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000170 str->ob_type->tp_name);
171 Py_DECREF(str);
172 goto onError;
173 }
174 Py_DECREF(buffer);
175 return str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000176
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000177 onError:
178 Py_XDECREF(buffer);
179 return NULL;
180}
181
182PyObject *PyString_Encode(const char *s,
183 int size,
184 const char *encoding,
185 const char *errors)
186{
187 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000188
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000189 str = PyString_FromStringAndSize(s, size);
190 if (str == NULL)
191 return NULL;
192 v = PyString_AsEncodedString(str, encoding, errors);
193 Py_DECREF(str);
194 return v;
195}
196
197PyObject *PyString_AsEncodedString(PyObject *str,
198 const char *encoding,
199 const char *errors)
200{
201 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000202
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000203 if (!PyString_Check(str)) {
204 PyErr_BadArgument();
205 goto onError;
206 }
207
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000208 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000209 encoding = PyUnicode_GetDefaultEncoding();
210
211 /* Encode via the codec registry */
212 v = PyCodec_Encode(str, encoding, errors);
213 if (v == NULL)
214 goto onError;
215 /* Convert Unicode to a string using the default encoding */
216 if (PyUnicode_Check(v)) {
217 PyObject *temp = v;
218 v = PyUnicode_AsEncodedString(v, NULL, NULL);
219 Py_DECREF(temp);
220 if (v == NULL)
221 goto onError;
222 }
223 if (!PyString_Check(v)) {
224 PyErr_Format(PyExc_TypeError,
225 "encoder did not return a string object (type=%.400s)",
226 v->ob_type->tp_name);
227 Py_DECREF(v);
228 goto onError;
229 }
230 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000231
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000232 onError:
233 return NULL;
234}
235
Guido van Rossum234f9421993-06-17 12:35:49 +0000236static void
Fred Drakeba096332000-07-09 07:04:36 +0000237string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000238{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000239 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000240}
241
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000242static int
243string_getsize(register PyObject *op)
244{
245 char *s;
246 int len;
247 if (PyString_AsStringAndSize(op, &s, &len))
248 return -1;
249 return len;
250}
251
252static /*const*/ char *
253string_getbuffer(register PyObject *op)
254{
255 char *s;
256 int len;
257 if (PyString_AsStringAndSize(op, &s, &len))
258 return NULL;
259 return s;
260}
261
Guido van Rossumd7047b31995-01-02 19:07:15 +0000262int
Fred Drakeba096332000-07-09 07:04:36 +0000263PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000265 if (!PyString_Check(op))
266 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000267 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000268}
269
270/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000271PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000273 if (!PyString_Check(op))
274 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000275 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276}
277
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000278/* Internal API needed by PyString_AsStringAndSize(): */
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000279extern
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000280PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
281 const char *errors);
282
283int
284PyString_AsStringAndSize(register PyObject *obj,
285 register char **s,
286 register int *len)
287{
288 if (s == NULL) {
289 PyErr_BadInternalCall();
290 return -1;
291 }
292
293 if (!PyString_Check(obj)) {
294 if (PyUnicode_Check(obj)) {
295 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
296 if (obj == NULL)
297 return -1;
298 }
299 else {
300 PyErr_Format(PyExc_TypeError,
301 "expected string or Unicode object, "
302 "%.200s found", obj->ob_type->tp_name);
303 return -1;
304 }
305 }
306
307 *s = PyString_AS_STRING(obj);
308 if (len != NULL)
309 *len = PyString_GET_SIZE(obj);
310 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
311 PyErr_SetString(PyExc_TypeError,
312 "expected string without null bytes");
313 return -1;
314 }
315 return 0;
316}
317
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318/* Methods */
319
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000320static int
Fred Drakeba096332000-07-09 07:04:36 +0000321string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322{
323 int i;
324 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000325 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000326 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000327 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000328 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000329 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000330 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000331
Thomas Wouters7e474022000-07-16 12:04:32 +0000332 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000333 quote = '\'';
334 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
335 quote = '"';
336
337 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 for (i = 0; i < op->ob_size; i++) {
339 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000340 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000341 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000342 else if (c == '\t')
343 fprintf(fp, "\\t");
344 else if (c == '\n')
345 fprintf(fp, "\\n");
346 else if (c == '\r')
347 fprintf(fp, "\\r");
348 else if (c < ' ' || c >= 0x7f)
349 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000350 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000351 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000352 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000353 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000354 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000355}
356
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000357static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000358string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000359{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000360 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
361 PyObject *v;
362 if (newsize > INT_MAX) {
363 PyErr_SetString(PyExc_OverflowError,
364 "string is too large to make repr");
365 }
366 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000367 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000368 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000369 }
370 else {
371 register int i;
372 register char c;
373 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000374 int quote;
375
Thomas Wouters7e474022000-07-16 12:04:32 +0000376 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000377 quote = '\'';
378 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
379 quote = '"';
380
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000381 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000382 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000383 for (i = 0; i < op->ob_size; i++) {
384 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000385 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000386 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000387 else if (c == '\t')
388 *p++ = '\\', *p++ = 't';
389 else if (c == '\n')
390 *p++ = '\\', *p++ = 'n';
391 else if (c == '\r')
392 *p++ = '\\', *p++ = 'r';
393 else if (c < ' ' || c >= 0x7f) {
394 sprintf(p, "\\x%02x", c & 0xff);
395 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000396 }
397 else
398 *p++ = c;
399 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000400 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000401 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000402 _PyString_Resize(
403 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000404 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000405 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000406}
407
Guido van Rossum189f1df2001-05-01 16:51:53 +0000408static PyObject *
409string_str(PyObject *s)
410{
411 Py_INCREF(s);
412 return s;
413}
414
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000415static int
Fred Drakeba096332000-07-09 07:04:36 +0000416string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000417{
418 return a->ob_size;
419}
420
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000421static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000422string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000423{
424 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000425 register PyStringObject *op;
426 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000427 if (PyUnicode_Check(bb))
428 return PyUnicode_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000429 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000430 "cannot add type \"%.200s\" to string",
431 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000432 return NULL;
433 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000434#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000435 /* Optimize cases with empty left or right operand */
436 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000437 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000438 return bb;
439 }
440 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000441 Py_INCREF(a);
442 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000443 }
444 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000445 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000446 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000447 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000448 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000449 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000450 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000451#ifdef CACHE_HASH
452 op->ob_shash = -1;
453#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000454#ifdef INTERN_STRINGS
455 op->ob_sinterned = NULL;
456#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000457 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
458 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
459 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000460 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000461#undef b
462}
463
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000464static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000465string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000466{
467 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000468 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000469 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000470 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000471 if (n < 0)
472 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000473 /* watch out for overflows: the size can overflow int,
474 * and the # of bytes needed can overflow size_t
475 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000476 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000477 if (n && size / n != a->ob_size) {
478 PyErr_SetString(PyExc_OverflowError,
479 "repeated string is too long");
480 return NULL;
481 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000482 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000483 Py_INCREF(a);
484 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000485 }
Tim Peters8f422462000-09-09 06:13:41 +0000486 nbytes = size * sizeof(char);
487 if (nbytes / sizeof(char) != (size_t)size ||
488 nbytes + sizeof(PyStringObject) <= nbytes) {
489 PyErr_SetString(PyExc_OverflowError,
490 "repeated string is too long");
491 return NULL;
492 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000493 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000494 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000495 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000496 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000497 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000498#ifdef CACHE_HASH
499 op->ob_shash = -1;
500#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000501#ifdef INTERN_STRINGS
502 op->ob_sinterned = NULL;
503#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000504 for (i = 0; i < size; i += a->ob_size)
505 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
506 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000507 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000508}
509
510/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
511
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000512static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000513string_slice(register PyStringObject *a, register int i, register int j)
514 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000515{
516 if (i < 0)
517 i = 0;
518 if (j < 0)
519 j = 0; /* Avoid signed/unsigned bug in next line */
520 if (j > a->ob_size)
521 j = a->ob_size;
522 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000523 Py_INCREF(a);
524 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000525 }
526 if (j < i)
527 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000528 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529}
530
Guido van Rossum9284a572000-03-07 15:53:43 +0000531static int
Fred Drakeba096332000-07-09 07:04:36 +0000532string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000533{
534 register char *s, *end;
535 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000536 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000537 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000538 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000539 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000540 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000541 return -1;
542 }
543 c = PyString_AsString(el)[0];
544 s = PyString_AsString(a);
545 end = s + PyString_Size(a);
546 while (s < end) {
547 if (c == *s++)
548 return 1;
549 }
550 return 0;
551}
552
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000553static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000554string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000555{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000556 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000557 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000558 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000559 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000560 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000561 return NULL;
562 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000563 pchar = a->ob_sval + i;
564 c = *pchar & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565 v = (PyObject *) characters[c];
Tim Peters5b4d4772001-05-08 22:33:50 +0000566 if (v == NULL)
567 v = PyString_FromStringAndSize(pchar, 1);
568 Py_XINCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000569 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000570}
571
572static int
Fred Drakeba096332000-07-09 07:04:36 +0000573string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000574{
Guido van Rossum253919f1991-02-13 23:18:39 +0000575 int len_a = a->ob_size, len_b = b->ob_size;
576 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000577 int cmp;
578 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000579 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000580 if (cmp == 0)
581 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
582 if (cmp != 0)
583 return cmp;
584 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000585 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000586}
587
Guido van Rossum9bfef441993-03-29 10:43:31 +0000588static long
Fred Drakeba096332000-07-09 07:04:36 +0000589string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000590{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000591 register int len;
592 register unsigned char *p;
593 register long x;
594
595#ifdef CACHE_HASH
596 if (a->ob_shash != -1)
597 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000598#ifdef INTERN_STRINGS
599 if (a->ob_sinterned != NULL)
600 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000601 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000602#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000603#endif
604 len = a->ob_size;
605 p = (unsigned char *) a->ob_sval;
606 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000607 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000608 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000609 x ^= a->ob_size;
610 if (x == -1)
611 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000612#ifdef CACHE_HASH
613 a->ob_shash = x;
614#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000615 return x;
616}
617
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000618static int
Fred Drakeba096332000-07-09 07:04:36 +0000619string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000620{
621 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000622 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000623 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000624 return -1;
625 }
626 *ptr = (void *)self->ob_sval;
627 return self->ob_size;
628}
629
630static int
Fred Drakeba096332000-07-09 07:04:36 +0000631string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000632{
Guido van Rossum045e6881997-09-08 18:30:11 +0000633 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000634 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000635 return -1;
636}
637
638static int
Fred Drakeba096332000-07-09 07:04:36 +0000639string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000640{
641 if ( lenp )
642 *lenp = self->ob_size;
643 return 1;
644}
645
Guido van Rossum1db70701998-10-08 02:18:52 +0000646static int
Fred Drakeba096332000-07-09 07:04:36 +0000647string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000648{
649 if ( index != 0 ) {
650 PyErr_SetString(PyExc_SystemError,
651 "accessing non-existent string segment");
652 return -1;
653 }
654 *ptr = self->ob_sval;
655 return self->ob_size;
656}
657
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000658static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000659 (inquiry)string_length, /*sq_length*/
660 (binaryfunc)string_concat, /*sq_concat*/
661 (intargfunc)string_repeat, /*sq_repeat*/
662 (intargfunc)string_item, /*sq_item*/
663 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000664 0, /*sq_ass_item*/
665 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000666 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000667};
668
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000669static PyBufferProcs string_as_buffer = {
670 (getreadbufferproc)string_buffer_getreadbuf,
671 (getwritebufferproc)string_buffer_getwritebuf,
672 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000673 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000674};
675
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000676
677
678#define LEFTSTRIP 0
679#define RIGHTSTRIP 1
680#define BOTHSTRIP 2
681
682
683static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000684split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000685{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000686 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000687 PyObject* item;
688 PyObject *list = PyList_New(0);
689
690 if (list == NULL)
691 return NULL;
692
Guido van Rossum4c08d552000-03-10 22:55:18 +0000693 for (i = j = 0; i < len; ) {
694 while (i < len && isspace(Py_CHARMASK(s[i])))
695 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000696 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000697 while (i < len && !isspace(Py_CHARMASK(s[i])))
698 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000699 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000700 if (maxsplit-- <= 0)
701 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000702 item = PyString_FromStringAndSize(s+j, (int)(i-j));
703 if (item == NULL)
704 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000705 err = PyList_Append(list, item);
706 Py_DECREF(item);
707 if (err < 0)
708 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000709 while (i < len && isspace(Py_CHARMASK(s[i])))
710 i++;
711 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000712 }
713 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000714 if (j < len) {
715 item = PyString_FromStringAndSize(s+j, (int)(len - j));
716 if (item == NULL)
717 goto finally;
718 err = PyList_Append(list, item);
719 Py_DECREF(item);
720 if (err < 0)
721 goto finally;
722 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000723 return list;
724 finally:
725 Py_DECREF(list);
726 return NULL;
727}
728
729
730static char split__doc__[] =
731"S.split([sep [,maxsplit]]) -> list of strings\n\
732\n\
733Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000734delimiter string. If maxsplit is given, at most maxsplit\n\
735splits are done. If sep is not specified, any whitespace string\n\
736is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000737
738static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000739string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000740{
741 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000742 int maxsplit = -1;
743 const char *s = PyString_AS_STRING(self), *sub;
744 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000745
Guido van Rossum4c08d552000-03-10 22:55:18 +0000746 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000747 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000748 if (maxsplit < 0)
749 maxsplit = INT_MAX;
750 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000751 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000752 if (PyString_Check(subobj)) {
753 sub = PyString_AS_STRING(subobj);
754 n = PyString_GET_SIZE(subobj);
755 }
756 else if (PyUnicode_Check(subobj))
757 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
758 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
759 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000760 if (n == 0) {
761 PyErr_SetString(PyExc_ValueError, "empty separator");
762 return NULL;
763 }
764
765 list = PyList_New(0);
766 if (list == NULL)
767 return NULL;
768
769 i = j = 0;
770 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000771 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000772 if (maxsplit-- <= 0)
773 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000774 item = PyString_FromStringAndSize(s+j, (int)(i-j));
775 if (item == NULL)
776 goto fail;
777 err = PyList_Append(list, item);
778 Py_DECREF(item);
779 if (err < 0)
780 goto fail;
781 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000782 }
783 else
784 i++;
785 }
786 item = PyString_FromStringAndSize(s+j, (int)(len-j));
787 if (item == NULL)
788 goto fail;
789 err = PyList_Append(list, item);
790 Py_DECREF(item);
791 if (err < 0)
792 goto fail;
793
794 return list;
795
796 fail:
797 Py_DECREF(list);
798 return NULL;
799}
800
801
802static char join__doc__[] =
803"S.join(sequence) -> string\n\
804\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000805Return a string which is the concatenation of the strings in the\n\
806sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000807
808static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000809string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000810{
811 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +0000812 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000813 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000814 char *p;
815 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +0000816 size_t sz = 0;
817 int i;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000818 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000819
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000820 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000821 return NULL;
822
Tim Peters19fe14e2001-01-19 03:03:47 +0000823 seq = PySequence_Fast(orig, "");
824 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000825 if (PyErr_ExceptionMatches(PyExc_TypeError))
826 PyErr_Format(PyExc_TypeError,
827 "sequence expected, %.80s found",
828 orig->ob_type->tp_name);
829 return NULL;
830 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000831
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000832 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +0000833 if (seqlen == 0) {
834 Py_DECREF(seq);
835 return PyString_FromString("");
836 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000837 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000838 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +0000839 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
840 PyErr_Format(PyExc_TypeError,
841 "sequence item 0: expected string,"
842 " %.80s found",
843 item->ob_type->tp_name);
844 Py_DECREF(seq);
845 return NULL;
846 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000847 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000848 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000849 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000850 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000851
Tim Peters19fe14e2001-01-19 03:03:47 +0000852 /* There are at least two things to join. Do a pre-pass to figure out
853 * the total amount of space we'll need (sz), see whether any argument
854 * is absurd, and defer to the Unicode join if appropriate.
855 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000856 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +0000857 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000858 item = PySequence_Fast_GET_ITEM(seq, i);
859 if (!PyString_Check(item)){
860 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +0000861 /* Defer to Unicode join.
862 * CAUTION: There's no gurantee that the
863 * original sequence can be iterated over
864 * again, so we must pass seq here.
865 */
866 PyObject *result;
867 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000868 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +0000869 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000870 }
871 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000872 "sequence item %i: expected string,"
873 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000874 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +0000875 Py_DECREF(seq);
876 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000877 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000878 sz += PyString_GET_SIZE(item);
879 if (i != 0)
880 sz += seplen;
881 if (sz < old_sz || sz > INT_MAX) {
882 PyErr_SetString(PyExc_OverflowError,
883 "join() is too long for a Python string");
884 Py_DECREF(seq);
885 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000886 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000887 }
888
889 /* Allocate result space. */
890 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
891 if (res == NULL) {
892 Py_DECREF(seq);
893 return NULL;
894 }
895
896 /* Catenate everything. */
897 p = PyString_AS_STRING(res);
898 for (i = 0; i < seqlen; ++i) {
899 size_t n;
900 item = PySequence_Fast_GET_ITEM(seq, i);
901 n = PyString_GET_SIZE(item);
902 memcpy(p, PyString_AS_STRING(item), n);
903 p += n;
904 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000905 memcpy(p, sep, seplen);
906 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000907 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000908 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000909
Jeremy Hylton49048292000-07-11 03:28:17 +0000910 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000911 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000912}
913
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000914static long
Fred Drakeba096332000-07-09 07:04:36 +0000915string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000916{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000917 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000918 int len = PyString_GET_SIZE(self);
919 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000920 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000921
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000922 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +0000923 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000924 return -2;
925 if (PyString_Check(subobj)) {
926 sub = PyString_AS_STRING(subobj);
927 n = PyString_GET_SIZE(subobj);
928 }
929 else if (PyUnicode_Check(subobj))
930 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
931 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000932 return -2;
933
934 if (last > len)
935 last = len;
936 if (last < 0)
937 last += len;
938 if (last < 0)
939 last = 0;
940 if (i < 0)
941 i += len;
942 if (i < 0)
943 i = 0;
944
Guido van Rossum4c08d552000-03-10 22:55:18 +0000945 if (dir > 0) {
946 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000947 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000948 last -= n;
949 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000950 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000951 return (long)i;
952 }
953 else {
954 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000955
Guido van Rossum4c08d552000-03-10 22:55:18 +0000956 if (n == 0 && i <= last)
957 return (long)last;
958 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000959 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000960 return (long)j;
961 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000962
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000963 return -1;
964}
965
966
967static char find__doc__[] =
968"S.find(sub [,start [,end]]) -> int\n\
969\n\
970Return the lowest index in S where substring sub is found,\n\
971such that sub is contained within s[start,end]. Optional\n\
972arguments start and end are interpreted as in slice notation.\n\
973\n\
974Return -1 on failure.";
975
976static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000977string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000978{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000979 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000980 if (result == -2)
981 return NULL;
982 return PyInt_FromLong(result);
983}
984
985
986static char index__doc__[] =
987"S.index(sub [,start [,end]]) -> int\n\
988\n\
989Like S.find() but raise ValueError when the substring is not found.";
990
991static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000992string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000993{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000994 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000995 if (result == -2)
996 return NULL;
997 if (result == -1) {
998 PyErr_SetString(PyExc_ValueError,
999 "substring not found in string.index");
1000 return NULL;
1001 }
1002 return PyInt_FromLong(result);
1003}
1004
1005
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001006static char rfind__doc__[] =
1007"S.rfind(sub [,start [,end]]) -> int\n\
1008\n\
1009Return the highest index in S where substring sub is found,\n\
1010such that sub is contained within s[start,end]. Optional\n\
1011arguments start and end are interpreted as in slice notation.\n\
1012\n\
1013Return -1 on failure.";
1014
1015static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001016string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001017{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001018 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001019 if (result == -2)
1020 return NULL;
1021 return PyInt_FromLong(result);
1022}
1023
1024
1025static char rindex__doc__[] =
1026"S.rindex(sub [,start [,end]]) -> int\n\
1027\n\
1028Like S.rfind() but raise ValueError when the substring is not found.";
1029
1030static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001031string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001032{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001033 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001034 if (result == -2)
1035 return NULL;
1036 if (result == -1) {
1037 PyErr_SetString(PyExc_ValueError,
1038 "substring not found in string.rindex");
1039 return NULL;
1040 }
1041 return PyInt_FromLong(result);
1042}
1043
1044
1045static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001046do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001047{
1048 char *s = PyString_AS_STRING(self);
1049 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001050
Guido van Rossum43713e52000-02-29 13:59:29 +00001051 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052 return NULL;
1053
1054 i = 0;
1055 if (striptype != RIGHTSTRIP) {
1056 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1057 i++;
1058 }
1059 }
1060
1061 j = len;
1062 if (striptype != LEFTSTRIP) {
1063 do {
1064 j--;
1065 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1066 j++;
1067 }
1068
1069 if (i == 0 && j == len) {
1070 Py_INCREF(self);
1071 return (PyObject*)self;
1072 }
1073 else
1074 return PyString_FromStringAndSize(s+i, j-i);
1075}
1076
1077
1078static char strip__doc__[] =
1079"S.strip() -> string\n\
1080\n\
1081Return a copy of the string S with leading and trailing\n\
1082whitespace removed.";
1083
1084static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001085string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001086{
1087 return do_strip(self, args, BOTHSTRIP);
1088}
1089
1090
1091static char lstrip__doc__[] =
1092"S.lstrip() -> string\n\
1093\n\
1094Return a copy of the string S with leading whitespace removed.";
1095
1096static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001097string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001098{
1099 return do_strip(self, args, LEFTSTRIP);
1100}
1101
1102
1103static char rstrip__doc__[] =
1104"S.rstrip() -> string\n\
1105\n\
1106Return a copy of the string S with trailing whitespace removed.";
1107
1108static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001109string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001110{
1111 return do_strip(self, args, RIGHTSTRIP);
1112}
1113
1114
1115static char lower__doc__[] =
1116"S.lower() -> string\n\
1117\n\
1118Return a copy of the string S converted to lowercase.";
1119
1120static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001121string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001122{
1123 char *s = PyString_AS_STRING(self), *s_new;
1124 int i, n = PyString_GET_SIZE(self);
1125 PyObject *new;
1126
Guido van Rossum43713e52000-02-29 13:59:29 +00001127 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001128 return NULL;
1129 new = PyString_FromStringAndSize(NULL, n);
1130 if (new == NULL)
1131 return NULL;
1132 s_new = PyString_AsString(new);
1133 for (i = 0; i < n; i++) {
1134 int c = Py_CHARMASK(*s++);
1135 if (isupper(c)) {
1136 *s_new = tolower(c);
1137 } else
1138 *s_new = c;
1139 s_new++;
1140 }
1141 return new;
1142}
1143
1144
1145static char upper__doc__[] =
1146"S.upper() -> string\n\
1147\n\
1148Return a copy of the string S converted to uppercase.";
1149
1150static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001151string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001152{
1153 char *s = PyString_AS_STRING(self), *s_new;
1154 int i, n = PyString_GET_SIZE(self);
1155 PyObject *new;
1156
Guido van Rossum43713e52000-02-29 13:59:29 +00001157 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001158 return NULL;
1159 new = PyString_FromStringAndSize(NULL, n);
1160 if (new == NULL)
1161 return NULL;
1162 s_new = PyString_AsString(new);
1163 for (i = 0; i < n; i++) {
1164 int c = Py_CHARMASK(*s++);
1165 if (islower(c)) {
1166 *s_new = toupper(c);
1167 } else
1168 *s_new = c;
1169 s_new++;
1170 }
1171 return new;
1172}
1173
1174
Guido van Rossum4c08d552000-03-10 22:55:18 +00001175static char title__doc__[] =
1176"S.title() -> string\n\
1177\n\
1178Return a titlecased version of S, i.e. words start with uppercase\n\
1179characters, all remaining cased characters have lowercase.";
1180
1181static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001182string_title(PyStringObject *self, PyObject *args)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001183{
1184 char *s = PyString_AS_STRING(self), *s_new;
1185 int i, n = PyString_GET_SIZE(self);
1186 int previous_is_cased = 0;
1187 PyObject *new;
1188
1189 if (!PyArg_ParseTuple(args, ":title"))
1190 return NULL;
1191 new = PyString_FromStringAndSize(NULL, n);
1192 if (new == NULL)
1193 return NULL;
1194 s_new = PyString_AsString(new);
1195 for (i = 0; i < n; i++) {
1196 int c = Py_CHARMASK(*s++);
1197 if (islower(c)) {
1198 if (!previous_is_cased)
1199 c = toupper(c);
1200 previous_is_cased = 1;
1201 } else if (isupper(c)) {
1202 if (previous_is_cased)
1203 c = tolower(c);
1204 previous_is_cased = 1;
1205 } else
1206 previous_is_cased = 0;
1207 *s_new++ = c;
1208 }
1209 return new;
1210}
1211
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001212static char capitalize__doc__[] =
1213"S.capitalize() -> string\n\
1214\n\
1215Return a copy of the string S with only its first character\n\
1216capitalized.";
1217
1218static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001219string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001220{
1221 char *s = PyString_AS_STRING(self), *s_new;
1222 int i, n = PyString_GET_SIZE(self);
1223 PyObject *new;
1224
Guido van Rossum43713e52000-02-29 13:59:29 +00001225 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001226 return NULL;
1227 new = PyString_FromStringAndSize(NULL, n);
1228 if (new == NULL)
1229 return NULL;
1230 s_new = PyString_AsString(new);
1231 if (0 < n) {
1232 int c = Py_CHARMASK(*s++);
1233 if (islower(c))
1234 *s_new = toupper(c);
1235 else
1236 *s_new = c;
1237 s_new++;
1238 }
1239 for (i = 1; i < n; i++) {
1240 int c = Py_CHARMASK(*s++);
1241 if (isupper(c))
1242 *s_new = tolower(c);
1243 else
1244 *s_new = c;
1245 s_new++;
1246 }
1247 return new;
1248}
1249
1250
1251static char count__doc__[] =
1252"S.count(sub[, start[, end]]) -> int\n\
1253\n\
1254Return the number of occurrences of substring sub in string\n\
1255S[start:end]. Optional arguments start and end are\n\
1256interpreted as in slice notation.";
1257
1258static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001259string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001260{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001261 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001262 int len = PyString_GET_SIZE(self), n;
1263 int i = 0, last = INT_MAX;
1264 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001265 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001266
Guido van Rossumc6821402000-05-08 14:08:05 +00001267 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1268 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001269 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001270
Guido van Rossum4c08d552000-03-10 22:55:18 +00001271 if (PyString_Check(subobj)) {
1272 sub = PyString_AS_STRING(subobj);
1273 n = PyString_GET_SIZE(subobj);
1274 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001275 else if (PyUnicode_Check(subobj)) {
1276 int count;
1277 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1278 if (count == -1)
1279 return NULL;
1280 else
1281 return PyInt_FromLong((long) count);
1282 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001283 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1284 return NULL;
1285
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001286 if (last > len)
1287 last = len;
1288 if (last < 0)
1289 last += len;
1290 if (last < 0)
1291 last = 0;
1292 if (i < 0)
1293 i += len;
1294 if (i < 0)
1295 i = 0;
1296 m = last + 1 - n;
1297 if (n == 0)
1298 return PyInt_FromLong((long) (m-i));
1299
1300 r = 0;
1301 while (i < m) {
1302 if (!memcmp(s+i, sub, n)) {
1303 r++;
1304 i += n;
1305 } else {
1306 i++;
1307 }
1308 }
1309 return PyInt_FromLong((long) r);
1310}
1311
1312
1313static char swapcase__doc__[] =
1314"S.swapcase() -> string\n\
1315\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001316Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001317converted to lowercase and vice versa.";
1318
1319static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001320string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321{
1322 char *s = PyString_AS_STRING(self), *s_new;
1323 int i, n = PyString_GET_SIZE(self);
1324 PyObject *new;
1325
Guido van Rossum43713e52000-02-29 13:59:29 +00001326 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001327 return NULL;
1328 new = PyString_FromStringAndSize(NULL, n);
1329 if (new == NULL)
1330 return NULL;
1331 s_new = PyString_AsString(new);
1332 for (i = 0; i < n; i++) {
1333 int c = Py_CHARMASK(*s++);
1334 if (islower(c)) {
1335 *s_new = toupper(c);
1336 }
1337 else if (isupper(c)) {
1338 *s_new = tolower(c);
1339 }
1340 else
1341 *s_new = c;
1342 s_new++;
1343 }
1344 return new;
1345}
1346
1347
1348static char translate__doc__[] =
1349"S.translate(table [,deletechars]) -> string\n\
1350\n\
1351Return a copy of the string S, where all characters occurring\n\
1352in the optional argument deletechars are removed, and the\n\
1353remaining characters have been mapped through the given\n\
1354translation table, which must be a string of length 256.";
1355
1356static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001357string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 register char *input, *output;
1360 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001361 register int i, c, changed = 0;
1362 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001363 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001364 int inlen, tablen, dellen = 0;
1365 PyObject *result;
1366 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001367 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001368
Guido van Rossum4c08d552000-03-10 22:55:18 +00001369 if (!PyArg_ParseTuple(args, "O|O:translate",
1370 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001371 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001372
1373 if (PyString_Check(tableobj)) {
1374 table1 = PyString_AS_STRING(tableobj);
1375 tablen = PyString_GET_SIZE(tableobj);
1376 }
1377 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001378 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001379 parameter; instead a mapping to None will cause characters
1380 to be deleted. */
1381 if (delobj != NULL) {
1382 PyErr_SetString(PyExc_TypeError,
1383 "deletions are implemented differently for unicode");
1384 return NULL;
1385 }
1386 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1387 }
1388 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001390
1391 if (delobj != NULL) {
1392 if (PyString_Check(delobj)) {
1393 del_table = PyString_AS_STRING(delobj);
1394 dellen = PyString_GET_SIZE(delobj);
1395 }
1396 else if (PyUnicode_Check(delobj)) {
1397 PyErr_SetString(PyExc_TypeError,
1398 "deletions are implemented differently for unicode");
1399 return NULL;
1400 }
1401 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1402 return NULL;
1403
1404 if (tablen != 256) {
1405 PyErr_SetString(PyExc_ValueError,
1406 "translation table must be 256 characters long");
1407 return NULL;
1408 }
1409 }
1410 else {
1411 del_table = NULL;
1412 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413 }
1414
1415 table = table1;
1416 inlen = PyString_Size(input_obj);
1417 result = PyString_FromStringAndSize((char *)NULL, inlen);
1418 if (result == NULL)
1419 return NULL;
1420 output_start = output = PyString_AsString(result);
1421 input = PyString_AsString(input_obj);
1422
1423 if (dellen == 0) {
1424 /* If no deletions are required, use faster code */
1425 for (i = inlen; --i >= 0; ) {
1426 c = Py_CHARMASK(*input++);
1427 if (Py_CHARMASK((*output++ = table[c])) != c)
1428 changed = 1;
1429 }
1430 if (changed)
1431 return result;
1432 Py_DECREF(result);
1433 Py_INCREF(input_obj);
1434 return input_obj;
1435 }
1436
1437 for (i = 0; i < 256; i++)
1438 trans_table[i] = Py_CHARMASK(table[i]);
1439
1440 for (i = 0; i < dellen; i++)
1441 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1442
1443 for (i = inlen; --i >= 0; ) {
1444 c = Py_CHARMASK(*input++);
1445 if (trans_table[c] != -1)
1446 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1447 continue;
1448 changed = 1;
1449 }
1450 if (!changed) {
1451 Py_DECREF(result);
1452 Py_INCREF(input_obj);
1453 return input_obj;
1454 }
1455 /* Fix the size of the resulting string */
1456 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1457 return NULL;
1458 return result;
1459}
1460
1461
1462/* What follows is used for implementing replace(). Perry Stoll. */
1463
1464/*
1465 mymemfind
1466
1467 strstr replacement for arbitrary blocks of memory.
1468
Barry Warsaw51ac5802000-03-20 16:36:48 +00001469 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470 contents of memory pointed to by PAT. Returns the index into MEM if
1471 found, or -1 if not found. If len of PAT is greater than length of
1472 MEM, the function returns -1.
1473*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001474static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001475mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001476{
1477 register int ii;
1478
1479 /* pattern can not occur in the last pat_len-1 chars */
1480 len -= pat_len;
1481
1482 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001483 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 return ii;
1485 }
1486 }
1487 return -1;
1488}
1489
1490/*
1491 mymemcnt
1492
1493 Return the number of distinct times PAT is found in MEM.
1494 meaning mem=1111 and pat==11 returns 2.
1495 mem=11111 and pat==11 also return 2.
1496 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001497static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001498mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499{
1500 register int offset = 0;
1501 int nfound = 0;
1502
1503 while (len >= 0) {
1504 offset = mymemfind(mem, len, pat, pat_len);
1505 if (offset == -1)
1506 break;
1507 mem += offset + pat_len;
1508 len -= offset + pat_len;
1509 nfound++;
1510 }
1511 return nfound;
1512}
1513
1514/*
1515 mymemreplace
1516
Thomas Wouters7e474022000-07-16 12:04:32 +00001517 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001518 replaced with SUB.
1519
Thomas Wouters7e474022000-07-16 12:04:32 +00001520 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001521 of PAT in STR, then the original string is returned. Otherwise, a new
1522 string is allocated here and returned.
1523
1524 on return, out_len is:
1525 the length of output string, or
1526 -1 if the input string is returned, or
1527 unchanged if an error occurs (no memory).
1528
1529 return value is:
1530 the new string allocated locally, or
1531 NULL if an error occurred.
1532*/
1533static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001534mymemreplace(const char *str, int len, /* input string */
1535 const char *pat, int pat_len, /* pattern string to find */
1536 const char *sub, int sub_len, /* substitution string */
1537 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001538 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001539{
1540 char *out_s;
1541 char *new_s;
1542 int nfound, offset, new_len;
1543
1544 if (len == 0 || pat_len > len)
1545 goto return_same;
1546
1547 /* find length of output string */
1548 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001549 if (count < 0)
1550 count = INT_MAX;
1551 else if (nfound > count)
1552 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553 if (nfound == 0)
1554 goto return_same;
1555 new_len = len + nfound*(sub_len - pat_len);
1556
Guido van Rossumb18618d2000-05-03 23:44:39 +00001557 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001558 if (new_s == NULL) return NULL;
1559
1560 *out_len = new_len;
1561 out_s = new_s;
1562
1563 while (len > 0) {
1564 /* find index of next instance of pattern */
1565 offset = mymemfind(str, len, pat, pat_len);
1566 /* if not found, break out of loop */
1567 if (offset == -1) break;
1568
1569 /* copy non matching part of input string */
1570 memcpy(new_s, str, offset); /* copy part of str before pat */
1571 str += offset + pat_len; /* move str past pattern */
1572 len -= offset + pat_len; /* reduce length of str remaining */
1573
1574 /* copy substitute into the output string */
1575 new_s += offset; /* move new_s to dest for sub string */
1576 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1577 new_s += sub_len; /* offset new_s past sub string */
1578
1579 /* break when we've done count replacements */
1580 if (--count == 0) break;
1581 }
1582 /* copy any remaining values into output string */
1583 if (len > 0)
1584 memcpy(new_s, str, len);
1585 return out_s;
1586
1587 return_same:
1588 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001589 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590}
1591
1592
1593static char replace__doc__[] =
1594"S.replace (old, new[, maxsplit]) -> string\n\
1595\n\
1596Return a copy of string S with all occurrences of substring\n\
1597old replaced by new. If the optional argument maxsplit is\n\
1598given, only the first maxsplit occurrences are replaced.";
1599
1600static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001601string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001603 const char *str = PyString_AS_STRING(self), *sub, *repl;
1604 char *new_s;
1605 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1606 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001608 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609
Guido van Rossum4c08d552000-03-10 22:55:18 +00001610 if (!PyArg_ParseTuple(args, "OO|i:replace",
1611 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001613
1614 if (PyString_Check(subobj)) {
1615 sub = PyString_AS_STRING(subobj);
1616 sub_len = PyString_GET_SIZE(subobj);
1617 }
1618 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001619 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001620 subobj, replobj, count);
1621 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1622 return NULL;
1623
1624 if (PyString_Check(replobj)) {
1625 repl = PyString_AS_STRING(replobj);
1626 repl_len = PyString_GET_SIZE(replobj);
1627 }
1628 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001629 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001630 subobj, replobj, count);
1631 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1632 return NULL;
1633
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001634 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001635 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636 return NULL;
1637 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001638 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639 if (new_s == NULL) {
1640 PyErr_NoMemory();
1641 return NULL;
1642 }
1643 if (out_len == -1) {
1644 /* we're returning another reference to self */
1645 new = (PyObject*)self;
1646 Py_INCREF(new);
1647 }
1648 else {
1649 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001650 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651 }
1652 return new;
1653}
1654
1655
1656static char startswith__doc__[] =
1657"S.startswith(prefix[, start[, end]]) -> int\n\
1658\n\
1659Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1660optional start, test S beginning at that position. With optional end, stop\n\
1661comparing S at that position.";
1662
1663static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001664string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001665{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001666 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001667 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001668 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001669 int plen;
1670 int start = 0;
1671 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001672 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001673
Guido van Rossumc6821402000-05-08 14:08:05 +00001674 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1675 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001676 return NULL;
1677 if (PyString_Check(subobj)) {
1678 prefix = PyString_AS_STRING(subobj);
1679 plen = PyString_GET_SIZE(subobj);
1680 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001681 else if (PyUnicode_Check(subobj)) {
1682 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001683 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001684 subobj, start, end, -1);
1685 if (rc == -1)
1686 return NULL;
1687 else
1688 return PyInt_FromLong((long) rc);
1689 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001690 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001691 return NULL;
1692
1693 /* adopt Java semantics for index out of range. it is legal for
1694 * offset to be == plen, but this only returns true if prefix is
1695 * the empty string.
1696 */
1697 if (start < 0 || start+plen > len)
1698 return PyInt_FromLong(0);
1699
1700 if (!memcmp(str+start, prefix, plen)) {
1701 /* did the match end after the specified end? */
1702 if (end < 0)
1703 return PyInt_FromLong(1);
1704 else if (end - start < plen)
1705 return PyInt_FromLong(0);
1706 else
1707 return PyInt_FromLong(1);
1708 }
1709 else return PyInt_FromLong(0);
1710}
1711
1712
1713static char endswith__doc__[] =
1714"S.endswith(suffix[, start[, end]]) -> int\n\
1715\n\
1716Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1717optional start, test S beginning at that position. With optional end, stop\n\
1718comparing S at that position.";
1719
1720static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001721string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001722{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001723 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001725 const char* suffix;
1726 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727 int start = 0;
1728 int end = -1;
1729 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001730 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731
Guido van Rossumc6821402000-05-08 14:08:05 +00001732 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1733 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001734 return NULL;
1735 if (PyString_Check(subobj)) {
1736 suffix = PyString_AS_STRING(subobj);
1737 slen = PyString_GET_SIZE(subobj);
1738 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001739 else if (PyUnicode_Check(subobj)) {
1740 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001741 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001742 subobj, start, end, +1);
1743 if (rc == -1)
1744 return NULL;
1745 else
1746 return PyInt_FromLong((long) rc);
1747 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001749 return NULL;
1750
Guido van Rossum4c08d552000-03-10 22:55:18 +00001751 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752 return PyInt_FromLong(0);
1753
1754 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001755 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001756
Guido van Rossum4c08d552000-03-10 22:55:18 +00001757 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758 return PyInt_FromLong(1);
1759 else return PyInt_FromLong(0);
1760}
1761
1762
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001763static char encode__doc__[] =
1764"S.encode([encoding[,errors]]) -> string\n\
1765\n\
1766Return an encoded string version of S. Default encoding is the current\n\
1767default string encoding. errors may be given to set a different error\n\
1768handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1769a ValueError. Other possible values are 'ignore' and 'replace'.";
1770
1771static PyObject *
1772string_encode(PyStringObject *self, PyObject *args)
1773{
1774 char *encoding = NULL;
1775 char *errors = NULL;
1776 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1777 return NULL;
1778 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1779}
1780
1781
Guido van Rossum4c08d552000-03-10 22:55:18 +00001782static char expandtabs__doc__[] =
1783"S.expandtabs([tabsize]) -> string\n\
1784\n\
1785Return a copy of S where all tab characters are expanded using spaces.\n\
1786If tabsize is not given, a tab size of 8 characters is assumed.";
1787
1788static PyObject*
1789string_expandtabs(PyStringObject *self, PyObject *args)
1790{
1791 const char *e, *p;
1792 char *q;
1793 int i, j;
1794 PyObject *u;
1795 int tabsize = 8;
1796
1797 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1798 return NULL;
1799
Thomas Wouters7e474022000-07-16 12:04:32 +00001800 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001801 i = j = 0;
1802 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1803 for (p = PyString_AS_STRING(self); p < e; p++)
1804 if (*p == '\t') {
1805 if (tabsize > 0)
1806 j += tabsize - (j % tabsize);
1807 }
1808 else {
1809 j++;
1810 if (*p == '\n' || *p == '\r') {
1811 i += j;
1812 j = 0;
1813 }
1814 }
1815
1816 /* Second pass: create output string and fill it */
1817 u = PyString_FromStringAndSize(NULL, i + j);
1818 if (!u)
1819 return NULL;
1820
1821 j = 0;
1822 q = PyString_AS_STRING(u);
1823
1824 for (p = PyString_AS_STRING(self); p < e; p++)
1825 if (*p == '\t') {
1826 if (tabsize > 0) {
1827 i = tabsize - (j % tabsize);
1828 j += i;
1829 while (i--)
1830 *q++ = ' ';
1831 }
1832 }
1833 else {
1834 j++;
1835 *q++ = *p;
1836 if (*p == '\n' || *p == '\r')
1837 j = 0;
1838 }
1839
1840 return u;
1841}
1842
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001843static
1844PyObject *pad(PyStringObject *self,
1845 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001846 int right,
1847 char fill)
1848{
1849 PyObject *u;
1850
1851 if (left < 0)
1852 left = 0;
1853 if (right < 0)
1854 right = 0;
1855
1856 if (left == 0 && right == 0) {
1857 Py_INCREF(self);
1858 return (PyObject *)self;
1859 }
1860
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001861 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001862 left + PyString_GET_SIZE(self) + right);
1863 if (u) {
1864 if (left)
1865 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001866 memcpy(PyString_AS_STRING(u) + left,
1867 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868 PyString_GET_SIZE(self));
1869 if (right)
1870 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1871 fill, right);
1872 }
1873
1874 return u;
1875}
1876
1877static char ljust__doc__[] =
1878"S.ljust(width) -> string\n\
1879\n\
1880Return S left justified in a string of length width. Padding is\n\
1881done using spaces.";
1882
1883static PyObject *
1884string_ljust(PyStringObject *self, PyObject *args)
1885{
1886 int width;
1887 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1888 return NULL;
1889
1890 if (PyString_GET_SIZE(self) >= width) {
1891 Py_INCREF(self);
1892 return (PyObject*) self;
1893 }
1894
1895 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1896}
1897
1898
1899static char rjust__doc__[] =
1900"S.rjust(width) -> string\n\
1901\n\
1902Return S right justified in a string of length width. Padding is\n\
1903done using spaces.";
1904
1905static PyObject *
1906string_rjust(PyStringObject *self, PyObject *args)
1907{
1908 int width;
1909 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1910 return NULL;
1911
1912 if (PyString_GET_SIZE(self) >= width) {
1913 Py_INCREF(self);
1914 return (PyObject*) self;
1915 }
1916
1917 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1918}
1919
1920
1921static char center__doc__[] =
1922"S.center(width) -> string\n\
1923\n\
1924Return S centered in a string of length width. Padding is done\n\
1925using spaces.";
1926
1927static PyObject *
1928string_center(PyStringObject *self, PyObject *args)
1929{
1930 int marg, left;
1931 int width;
1932
1933 if (!PyArg_ParseTuple(args, "i:center", &width))
1934 return NULL;
1935
1936 if (PyString_GET_SIZE(self) >= width) {
1937 Py_INCREF(self);
1938 return (PyObject*) self;
1939 }
1940
1941 marg = width - PyString_GET_SIZE(self);
1942 left = marg / 2 + (marg & width & 1);
1943
1944 return pad(self, left, marg - left, ' ');
1945}
1946
1947#if 0
1948static char zfill__doc__[] =
1949"S.zfill(width) -> string\n\
1950\n\
1951Pad a numeric string x with zeros on the left, to fill a field\n\
1952of the specified width. The string x is never truncated.";
1953
1954static PyObject *
1955string_zfill(PyStringObject *self, PyObject *args)
1956{
1957 int fill;
1958 PyObject *u;
1959 char *str;
1960
1961 int width;
1962 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1963 return NULL;
1964
1965 if (PyString_GET_SIZE(self) >= width) {
1966 Py_INCREF(self);
1967 return (PyObject*) self;
1968 }
1969
1970 fill = width - PyString_GET_SIZE(self);
1971
1972 u = pad(self, fill, 0, '0');
1973 if (u == NULL)
1974 return NULL;
1975
1976 str = PyString_AS_STRING(u);
1977 if (str[fill] == '+' || str[fill] == '-') {
1978 /* move sign to beginning of string */
1979 str[0] = str[fill];
1980 str[fill] = '0';
1981 }
1982
1983 return u;
1984}
1985#endif
1986
1987static char isspace__doc__[] =
1988"S.isspace() -> int\n\
1989\n\
1990Return 1 if there are only whitespace characters in S,\n\
19910 otherwise.";
1992
1993static PyObject*
1994string_isspace(PyStringObject *self, PyObject *args)
1995{
Fred Drakeba096332000-07-09 07:04:36 +00001996 register const unsigned char *p
1997 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001998 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001999
2000 if (!PyArg_NoArgs(args))
2001 return NULL;
2002
2003 /* Shortcut for single character strings */
2004 if (PyString_GET_SIZE(self) == 1 &&
2005 isspace(*p))
2006 return PyInt_FromLong(1);
2007
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002008 /* Special case for empty strings */
2009 if (PyString_GET_SIZE(self) == 0)
2010 return PyInt_FromLong(0);
2011
Guido van Rossum4c08d552000-03-10 22:55:18 +00002012 e = p + PyString_GET_SIZE(self);
2013 for (; p < e; p++) {
2014 if (!isspace(*p))
2015 return PyInt_FromLong(0);
2016 }
2017 return PyInt_FromLong(1);
2018}
2019
2020
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002021static char isalpha__doc__[] =
2022"S.isalpha() -> int\n\
2023\n\
2024Return 1 if all characters in S are alphabetic\n\
2025and there is at least one character in S, 0 otherwise.";
2026
2027static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002028string_isalpha(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002029{
Fred Drakeba096332000-07-09 07:04:36 +00002030 register const unsigned char *p
2031 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002032 register const unsigned char *e;
2033
2034 if (!PyArg_NoArgs(args))
2035 return NULL;
2036
2037 /* Shortcut for single character strings */
2038 if (PyString_GET_SIZE(self) == 1 &&
2039 isalpha(*p))
2040 return PyInt_FromLong(1);
2041
2042 /* Special case for empty strings */
2043 if (PyString_GET_SIZE(self) == 0)
2044 return PyInt_FromLong(0);
2045
2046 e = p + PyString_GET_SIZE(self);
2047 for (; p < e; p++) {
2048 if (!isalpha(*p))
2049 return PyInt_FromLong(0);
2050 }
2051 return PyInt_FromLong(1);
2052}
2053
2054
2055static char isalnum__doc__[] =
2056"S.isalnum() -> int\n\
2057\n\
2058Return 1 if all characters in S are alphanumeric\n\
2059and there is at least one character in S, 0 otherwise.";
2060
2061static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002062string_isalnum(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002063{
Fred Drakeba096332000-07-09 07:04:36 +00002064 register const unsigned char *p
2065 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002066 register const unsigned char *e;
2067
2068 if (!PyArg_NoArgs(args))
2069 return NULL;
2070
2071 /* Shortcut for single character strings */
2072 if (PyString_GET_SIZE(self) == 1 &&
2073 isalnum(*p))
2074 return PyInt_FromLong(1);
2075
2076 /* Special case for empty strings */
2077 if (PyString_GET_SIZE(self) == 0)
2078 return PyInt_FromLong(0);
2079
2080 e = p + PyString_GET_SIZE(self);
2081 for (; p < e; p++) {
2082 if (!isalnum(*p))
2083 return PyInt_FromLong(0);
2084 }
2085 return PyInt_FromLong(1);
2086}
2087
2088
Guido van Rossum4c08d552000-03-10 22:55:18 +00002089static char isdigit__doc__[] =
2090"S.isdigit() -> int\n\
2091\n\
2092Return 1 if there are only digit characters in S,\n\
20930 otherwise.";
2094
2095static PyObject*
2096string_isdigit(PyStringObject *self, PyObject *args)
2097{
Fred Drakeba096332000-07-09 07:04:36 +00002098 register const unsigned char *p
2099 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002100 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002101
2102 if (!PyArg_NoArgs(args))
2103 return NULL;
2104
2105 /* Shortcut for single character strings */
2106 if (PyString_GET_SIZE(self) == 1 &&
2107 isdigit(*p))
2108 return PyInt_FromLong(1);
2109
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002110 /* Special case for empty strings */
2111 if (PyString_GET_SIZE(self) == 0)
2112 return PyInt_FromLong(0);
2113
Guido van Rossum4c08d552000-03-10 22:55:18 +00002114 e = p + PyString_GET_SIZE(self);
2115 for (; p < e; p++) {
2116 if (!isdigit(*p))
2117 return PyInt_FromLong(0);
2118 }
2119 return PyInt_FromLong(1);
2120}
2121
2122
2123static char islower__doc__[] =
2124"S.islower() -> int\n\
2125\n\
2126Return 1 if all cased characters in S are lowercase and there is\n\
2127at least one cased character in S, 0 otherwise.";
2128
2129static PyObject*
2130string_islower(PyStringObject *self, PyObject *args)
2131{
Fred Drakeba096332000-07-09 07:04:36 +00002132 register const unsigned char *p
2133 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002134 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002135 int cased;
2136
2137 if (!PyArg_NoArgs(args))
2138 return NULL;
2139
2140 /* Shortcut for single character strings */
2141 if (PyString_GET_SIZE(self) == 1)
2142 return PyInt_FromLong(islower(*p) != 0);
2143
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002144 /* Special case for empty strings */
2145 if (PyString_GET_SIZE(self) == 0)
2146 return PyInt_FromLong(0);
2147
Guido van Rossum4c08d552000-03-10 22:55:18 +00002148 e = p + PyString_GET_SIZE(self);
2149 cased = 0;
2150 for (; p < e; p++) {
2151 if (isupper(*p))
2152 return PyInt_FromLong(0);
2153 else if (!cased && islower(*p))
2154 cased = 1;
2155 }
2156 return PyInt_FromLong(cased);
2157}
2158
2159
2160static char isupper__doc__[] =
2161"S.isupper() -> int\n\
2162\n\
2163Return 1 if all cased characters in S are uppercase and there is\n\
2164at least one cased character in S, 0 otherwise.";
2165
2166static PyObject*
2167string_isupper(PyStringObject *self, PyObject *args)
2168{
Fred Drakeba096332000-07-09 07:04:36 +00002169 register const unsigned char *p
2170 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002171 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002172 int cased;
2173
2174 if (!PyArg_NoArgs(args))
2175 return NULL;
2176
2177 /* Shortcut for single character strings */
2178 if (PyString_GET_SIZE(self) == 1)
2179 return PyInt_FromLong(isupper(*p) != 0);
2180
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002181 /* Special case for empty strings */
2182 if (PyString_GET_SIZE(self) == 0)
2183 return PyInt_FromLong(0);
2184
Guido van Rossum4c08d552000-03-10 22:55:18 +00002185 e = p + PyString_GET_SIZE(self);
2186 cased = 0;
2187 for (; p < e; p++) {
2188 if (islower(*p))
2189 return PyInt_FromLong(0);
2190 else if (!cased && isupper(*p))
2191 cased = 1;
2192 }
2193 return PyInt_FromLong(cased);
2194}
2195
2196
2197static char istitle__doc__[] =
2198"S.istitle() -> int\n\
2199\n\
2200Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2201may only follow uncased characters and lowercase characters only cased\n\
2202ones. Return 0 otherwise.";
2203
2204static PyObject*
2205string_istitle(PyStringObject *self, PyObject *args)
2206{
Fred Drakeba096332000-07-09 07:04:36 +00002207 register const unsigned char *p
2208 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002209 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002210 int cased, previous_is_cased;
2211
2212 if (!PyArg_NoArgs(args))
2213 return NULL;
2214
2215 /* Shortcut for single character strings */
2216 if (PyString_GET_SIZE(self) == 1)
2217 return PyInt_FromLong(isupper(*p) != 0);
2218
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002219 /* Special case for empty strings */
2220 if (PyString_GET_SIZE(self) == 0)
2221 return PyInt_FromLong(0);
2222
Guido van Rossum4c08d552000-03-10 22:55:18 +00002223 e = p + PyString_GET_SIZE(self);
2224 cased = 0;
2225 previous_is_cased = 0;
2226 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002227 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228
2229 if (isupper(ch)) {
2230 if (previous_is_cased)
2231 return PyInt_FromLong(0);
2232 previous_is_cased = 1;
2233 cased = 1;
2234 }
2235 else if (islower(ch)) {
2236 if (!previous_is_cased)
2237 return PyInt_FromLong(0);
2238 previous_is_cased = 1;
2239 cased = 1;
2240 }
2241 else
2242 previous_is_cased = 0;
2243 }
2244 return PyInt_FromLong(cased);
2245}
2246
2247
2248static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002249"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002250\n\
2251Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002252Line breaks are not included in the resulting list unless keepends\n\
2253is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002254
2255#define SPLIT_APPEND(data, left, right) \
2256 str = PyString_FromStringAndSize(data + left, right - left); \
2257 if (!str) \
2258 goto onError; \
2259 if (PyList_Append(list, str)) { \
2260 Py_DECREF(str); \
2261 goto onError; \
2262 } \
2263 else \
2264 Py_DECREF(str);
2265
2266static PyObject*
2267string_splitlines(PyStringObject *self, PyObject *args)
2268{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002269 register int i;
2270 register int j;
2271 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002272 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002273 PyObject *list;
2274 PyObject *str;
2275 char *data;
2276
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002277 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278 return NULL;
2279
2280 data = PyString_AS_STRING(self);
2281 len = PyString_GET_SIZE(self);
2282
Guido van Rossum4c08d552000-03-10 22:55:18 +00002283 list = PyList_New(0);
2284 if (!list)
2285 goto onError;
2286
2287 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002288 int eol;
2289
Guido van Rossum4c08d552000-03-10 22:55:18 +00002290 /* Find a line and append it */
2291 while (i < len && data[i] != '\n' && data[i] != '\r')
2292 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293
2294 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002295 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002296 if (i < len) {
2297 if (data[i] == '\r' && i + 1 < len &&
2298 data[i+1] == '\n')
2299 i += 2;
2300 else
2301 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002302 if (keepends)
2303 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002305 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306 j = i;
2307 }
2308 if (j < len) {
2309 SPLIT_APPEND(data, j, len);
2310 }
2311
2312 return list;
2313
2314 onError:
2315 Py_DECREF(list);
2316 return NULL;
2317}
2318
2319#undef SPLIT_APPEND
2320
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002322static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002324 /* Counterparts of the obsolete stropmodule functions; except
2325 string.maketrans(). */
2326 {"join", (PyCFunction)string_join, 1, join__doc__},
2327 {"split", (PyCFunction)string_split, 1, split__doc__},
2328 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2329 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2330 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2331 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2332 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2333 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2334 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002335 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2336 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2338 {"count", (PyCFunction)string_count, 1, count__doc__},
2339 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2340 {"find", (PyCFunction)string_find, 1, find__doc__},
2341 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002342 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2344 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2345 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2346 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2348 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2349 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002350 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2351 {"title", (PyCFunction)string_title, 1, title__doc__},
2352 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2353 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2354 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002355 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2357 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2358#if 0
2359 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2360#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361 {NULL, NULL} /* sentinel */
2362};
2363
2364static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002365string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366{
2367 return Py_FindMethod(string_methods, (PyObject*)s, name);
2368}
2369
2370
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002371PyTypeObject PyString_Type = {
2372 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002373 0,
2374 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002375 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002376 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002377 (destructor)string_dealloc, /*tp_dealloc*/
2378 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002380 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002381 (cmpfunc)string_compare, /*tp_compare*/
2382 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002383 0, /*tp_as_number*/
2384 &string_as_sequence, /*tp_as_sequence*/
2385 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002386 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002387 0, /*tp_call*/
Guido van Rossum189f1df2001-05-01 16:51:53 +00002388 (reprfunc)string_str, /*tp_str*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002389 0, /*tp_getattro*/
2390 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002391 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002392 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002393 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002394};
2395
2396void
Fred Drakeba096332000-07-09 07:04:36 +00002397PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002398{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002399 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002400 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002401 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002402 if (w == NULL || !PyString_Check(*pv)) {
2403 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002404 *pv = NULL;
2405 return;
2406 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002407 v = string_concat((PyStringObject *) *pv, w);
2408 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002409 *pv = v;
2410}
2411
Guido van Rossum013142a1994-08-30 08:19:36 +00002412void
Fred Drakeba096332000-07-09 07:04:36 +00002413PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002414{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002415 PyString_Concat(pv, w);
2416 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002417}
2418
2419
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002420/* The following function breaks the notion that strings are immutable:
2421 it changes the size of a string. We get away with this only if there
2422 is only one module referencing the object. You can also think of it
2423 as creating a new string object and destroying the old one, only
2424 more efficiently. In any case, don't use this if the string may
2425 already be known to some other part of the code... */
2426
2427int
Fred Drakeba096332000-07-09 07:04:36 +00002428_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002429{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002430 register PyObject *v;
2431 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002432 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002433 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002434 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002435 Py_DECREF(v);
2436 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002437 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002438 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002439 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002440#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002441 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002442#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002443 _Py_ForgetReference(v);
2444 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002445 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002446 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002447 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002448 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002449 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002450 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002451 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002452 _Py_NewReference(*pv);
2453 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002454 sv->ob_size = newsize;
2455 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002456 return 0;
2457}
Guido van Rossume5372401993-03-16 12:15:04 +00002458
2459/* Helpers for formatstring */
2460
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002461static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002462getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002463{
2464 int argidx = *p_argidx;
2465 if (argidx < arglen) {
2466 (*p_argidx)++;
2467 if (arglen < 0)
2468 return args;
2469 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002470 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002471 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002472 PyErr_SetString(PyExc_TypeError,
2473 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002474 return NULL;
2475}
2476
Tim Peters38fd5b62000-09-21 05:43:11 +00002477/* Format codes
2478 * F_LJUST '-'
2479 * F_SIGN '+'
2480 * F_BLANK ' '
2481 * F_ALT '#'
2482 * F_ZERO '0'
2483 */
Guido van Rossume5372401993-03-16 12:15:04 +00002484#define F_LJUST (1<<0)
2485#define F_SIGN (1<<1)
2486#define F_BLANK (1<<2)
2487#define F_ALT (1<<3)
2488#define F_ZERO (1<<4)
2489
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002490static int
Fred Drakeba096332000-07-09 07:04:36 +00002491formatfloat(char *buf, size_t buflen, int flags,
2492 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002493{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002494 /* fmt = '%#.' + `prec` + `type`
2495 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002496 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002497 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002498 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002499 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002500 if (prec < 0)
2501 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002502 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2503 type = 'g';
2504 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002505 /* worst case length calc to ensure no buffer overrun:
2506 fmt = %#.<prec>g
2507 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002508 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002509 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2510 If prec=0 the effective precision is 1 (the leading digit is
2511 always given), therefore increase by one to 10+prec. */
2512 if (buflen <= (size_t)10 + (size_t)prec) {
2513 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002514 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002515 return -1;
2516 }
Guido van Rossume5372401993-03-16 12:15:04 +00002517 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002518 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002519}
2520
Tim Peters38fd5b62000-09-21 05:43:11 +00002521/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2522 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2523 * Python's regular ints.
2524 * Return value: a new PyString*, or NULL if error.
2525 * . *pbuf is set to point into it,
2526 * *plen set to the # of chars following that.
2527 * Caller must decref it when done using pbuf.
2528 * The string starting at *pbuf is of the form
2529 * "-"? ("0x" | "0X")? digit+
2530 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002531 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002532 * There will be at least prec digits, zero-filled on the left if
2533 * necessary to get that many.
2534 * val object to be converted
2535 * flags bitmask of format flags; only F_ALT is looked at
2536 * prec minimum number of digits; 0-fill on left if needed
2537 * type a character in [duoxX]; u acts the same as d
2538 *
2539 * CAUTION: o, x and X conversions on regular ints can never
2540 * produce a '-' sign, but can for Python's unbounded ints.
2541 */
2542PyObject*
2543_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2544 char **pbuf, int *plen)
2545{
2546 PyObject *result = NULL;
2547 char *buf;
2548 int i;
2549 int sign; /* 1 if '-', else 0 */
2550 int len; /* number of characters */
2551 int numdigits; /* len == numnondigits + numdigits */
2552 int numnondigits = 0;
2553
2554 switch (type) {
2555 case 'd':
2556 case 'u':
2557 result = val->ob_type->tp_str(val);
2558 break;
2559 case 'o':
2560 result = val->ob_type->tp_as_number->nb_oct(val);
2561 break;
2562 case 'x':
2563 case 'X':
2564 numnondigits = 2;
2565 result = val->ob_type->tp_as_number->nb_hex(val);
2566 break;
2567 default:
2568 assert(!"'type' not in [duoxX]");
2569 }
2570 if (!result)
2571 return NULL;
2572
2573 /* To modify the string in-place, there can only be one reference. */
2574 if (result->ob_refcnt != 1) {
2575 PyErr_BadInternalCall();
2576 return NULL;
2577 }
2578 buf = PyString_AsString(result);
2579 len = PyString_Size(result);
2580 if (buf[len-1] == 'L') {
2581 --len;
2582 buf[len] = '\0';
2583 }
2584 sign = buf[0] == '-';
2585 numnondigits += sign;
2586 numdigits = len - numnondigits;
2587 assert(numdigits > 0);
2588
Tim Petersfff53252001-04-12 18:38:48 +00002589 /* Get rid of base marker unless F_ALT */
2590 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002591 /* Need to skip 0x, 0X or 0. */
2592 int skipped = 0;
2593 switch (type) {
2594 case 'o':
2595 assert(buf[sign] == '0');
2596 /* If 0 is only digit, leave it alone. */
2597 if (numdigits > 1) {
2598 skipped = 1;
2599 --numdigits;
2600 }
2601 break;
2602 case 'x':
2603 case 'X':
2604 assert(buf[sign] == '0');
2605 assert(buf[sign + 1] == 'x');
2606 skipped = 2;
2607 numnondigits -= 2;
2608 break;
2609 }
2610 if (skipped) {
2611 buf += skipped;
2612 len -= skipped;
2613 if (sign)
2614 buf[0] = '-';
2615 }
2616 assert(len == numnondigits + numdigits);
2617 assert(numdigits > 0);
2618 }
2619
2620 /* Fill with leading zeroes to meet minimum width. */
2621 if (prec > numdigits) {
2622 PyObject *r1 = PyString_FromStringAndSize(NULL,
2623 numnondigits + prec);
2624 char *b1;
2625 if (!r1) {
2626 Py_DECREF(result);
2627 return NULL;
2628 }
2629 b1 = PyString_AS_STRING(r1);
2630 for (i = 0; i < numnondigits; ++i)
2631 *b1++ = *buf++;
2632 for (i = 0; i < prec - numdigits; i++)
2633 *b1++ = '0';
2634 for (i = 0; i < numdigits; i++)
2635 *b1++ = *buf++;
2636 *b1 = '\0';
2637 Py_DECREF(result);
2638 result = r1;
2639 buf = PyString_AS_STRING(result);
2640 len = numnondigits + prec;
2641 }
2642
2643 /* Fix up case for hex conversions. */
2644 switch (type) {
2645 case 'x':
2646 /* Need to convert all upper case letters to lower case. */
2647 for (i = 0; i < len; i++)
2648 if (buf[i] >= 'A' && buf[i] <= 'F')
2649 buf[i] += 'a'-'A';
2650 break;
2651 case 'X':
2652 /* Need to convert 0x to 0X (and -0x to -0X). */
2653 if (buf[sign + 1] == 'x')
2654 buf[sign + 1] = 'X';
2655 break;
2656 }
2657 *pbuf = buf;
2658 *plen = len;
2659 return result;
2660}
2661
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002662static int
Fred Drakeba096332000-07-09 07:04:36 +00002663formatint(char *buf, size_t buflen, int flags,
2664 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002665{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002666 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002667 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2668 + 1 + 1 = 24 */
2669 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002670 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002671 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002672 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002673 if (prec < 0)
2674 prec = 1;
2675 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002676 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002677 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002678 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002679 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002680 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002681 return -1;
2682 }
Guido van Rossume5372401993-03-16 12:15:04 +00002683 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00002684 /* When converting 0 under %#x or %#X, C leaves off the base marker,
2685 * but we want it (for consistency with other %#x conversions, and
2686 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002687 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
2688 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
2689 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00002690 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002691 if (x == 0 &&
2692 (flags & F_ALT) &&
2693 (type == 'x' || type == 'X') &&
2694 buf[1] != (char)type) /* this last always true under std C */
2695 {
Tim Petersfff53252001-04-12 18:38:48 +00002696 memmove(buf+2, buf, strlen(buf) + 1);
2697 buf[0] = '0';
2698 buf[1] = (char)type;
2699 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002700 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002701}
2702
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002703static int
Fred Drakeba096332000-07-09 07:04:36 +00002704formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002705{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002706 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002707 if (PyString_Check(v)) {
2708 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002709 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002710 }
2711 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002712 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002713 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002714 }
2715 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002716 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002717}
2718
Guido van Rossum013142a1994-08-30 08:19:36 +00002719
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002720/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2721
2722 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2723 chars are formatted. XXX This is a magic number. Each formatting
2724 routine does bounds checking to ensure no overflow, but a better
2725 solution may be to malloc a buffer of appropriate size for each
2726 format. For now, the current solution is sufficient.
2727*/
2728#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002729
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002730PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002731PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002732{
2733 char *fmt, *res;
2734 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002735 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002736 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002737 PyObject *dict = NULL;
2738 if (format == NULL || !PyString_Check(format) || args == NULL) {
2739 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002740 return NULL;
2741 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002742 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002743 fmt = PyString_AsString(format);
2744 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002745 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002746 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002747 if (result == NULL)
2748 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002749 res = PyString_AsString(result);
2750 if (PyTuple_Check(args)) {
2751 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002752 argidx = 0;
2753 }
2754 else {
2755 arglen = -1;
2756 argidx = -2;
2757 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002758 if (args->ob_type->tp_as_mapping)
2759 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002760 while (--fmtcnt >= 0) {
2761 if (*fmt != '%') {
2762 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002763 rescnt = fmtcnt + 100;
2764 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002765 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002766 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002767 res = PyString_AsString(result)
2768 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002769 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002770 }
2771 *res++ = *fmt++;
2772 }
2773 else {
2774 /* Got a format specifier */
2775 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002776 int width = -1;
2777 int prec = -1;
2778 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002779 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002780 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002781 PyObject *v = NULL;
2782 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002783 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002784 int sign;
2785 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002786 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002787 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002788 int argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002789
Guido van Rossumda9c2711996-12-05 21:58:58 +00002790 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002791 if (*fmt == '(') {
2792 char *keystart;
2793 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002794 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002795 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002796
2797 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002798 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002799 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00002800 goto error;
2801 }
2802 ++fmt;
2803 --fmtcnt;
2804 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002805 /* Skip over balanced parentheses */
2806 while (pcount > 0 && --fmtcnt >= 0) {
2807 if (*fmt == ')')
2808 --pcount;
2809 else if (*fmt == '(')
2810 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002811 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002812 }
2813 keylen = fmt - keystart - 1;
2814 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002815 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002816 "incomplete format key");
2817 goto error;
2818 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002819 key = PyString_FromStringAndSize(keystart,
2820 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002821 if (key == NULL)
2822 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002823 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002824 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002825 args_owned = 0;
2826 }
2827 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002828 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002829 if (args == NULL) {
2830 goto error;
2831 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002832 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002833 arglen = -1;
2834 argidx = -2;
2835 }
Guido van Rossume5372401993-03-16 12:15:04 +00002836 while (--fmtcnt >= 0) {
2837 switch (c = *fmt++) {
2838 case '-': flags |= F_LJUST; continue;
2839 case '+': flags |= F_SIGN; continue;
2840 case ' ': flags |= F_BLANK; continue;
2841 case '#': flags |= F_ALT; continue;
2842 case '0': flags |= F_ZERO; continue;
2843 }
2844 break;
2845 }
2846 if (c == '*') {
2847 v = getnextarg(args, arglen, &argidx);
2848 if (v == NULL)
2849 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002850 if (!PyInt_Check(v)) {
2851 PyErr_SetString(PyExc_TypeError,
2852 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002853 goto error;
2854 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002855 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002856 if (width < 0) {
2857 flags |= F_LJUST;
2858 width = -width;
2859 }
Guido van Rossume5372401993-03-16 12:15:04 +00002860 if (--fmtcnt >= 0)
2861 c = *fmt++;
2862 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002863 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002864 width = c - '0';
2865 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002866 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002867 if (!isdigit(c))
2868 break;
2869 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002870 PyErr_SetString(
2871 PyExc_ValueError,
2872 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002873 goto error;
2874 }
2875 width = width*10 + (c - '0');
2876 }
2877 }
2878 if (c == '.') {
2879 prec = 0;
2880 if (--fmtcnt >= 0)
2881 c = *fmt++;
2882 if (c == '*') {
2883 v = getnextarg(args, arglen, &argidx);
2884 if (v == NULL)
2885 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002886 if (!PyInt_Check(v)) {
2887 PyErr_SetString(
2888 PyExc_TypeError,
2889 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002890 goto error;
2891 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002892 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002893 if (prec < 0)
2894 prec = 0;
2895 if (--fmtcnt >= 0)
2896 c = *fmt++;
2897 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002898 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002899 prec = c - '0';
2900 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002901 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002902 if (!isdigit(c))
2903 break;
2904 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002905 PyErr_SetString(
2906 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002907 "prec too big");
2908 goto error;
2909 }
2910 prec = prec*10 + (c - '0');
2911 }
2912 }
2913 } /* prec */
2914 if (fmtcnt >= 0) {
2915 if (c == 'h' || c == 'l' || c == 'L') {
2916 size = c;
2917 if (--fmtcnt >= 0)
2918 c = *fmt++;
2919 }
2920 }
2921 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002922 PyErr_SetString(PyExc_ValueError,
2923 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002924 goto error;
2925 }
2926 if (c != '%') {
2927 v = getnextarg(args, arglen, &argidx);
2928 if (v == NULL)
2929 goto error;
2930 }
2931 sign = 0;
2932 fill = ' ';
2933 switch (c) {
2934 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002935 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002936 len = 1;
2937 break;
2938 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002939 case 'r':
2940 if (PyUnicode_Check(v)) {
2941 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002942 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00002943 goto unicode;
2944 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002945 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002946 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002947 else
2948 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002949 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002950 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002951 if (!PyString_Check(temp)) {
2952 PyErr_SetString(PyExc_TypeError,
2953 "%s argument has non-string str()");
2954 goto error;
2955 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002956 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002957 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002958 if (prec >= 0 && len > prec)
2959 len = prec;
2960 break;
2961 case 'i':
2962 case 'd':
2963 case 'u':
2964 case 'o':
2965 case 'x':
2966 case 'X':
2967 if (c == 'i')
2968 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00002969 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002970 temp = _PyString_FormatLong(v, flags,
2971 prec, c, &pbuf, &len);
2972 if (!temp)
2973 goto error;
2974 /* unbounded ints can always produce
2975 a sign character! */
2976 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002977 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002978 else {
2979 pbuf = formatbuf;
2980 len = formatint(pbuf, sizeof(formatbuf),
2981 flags, prec, c, v);
2982 if (len < 0)
2983 goto error;
2984 /* only d conversion is signed */
2985 sign = c == 'd';
2986 }
2987 if (flags & F_ZERO)
2988 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00002989 break;
2990 case 'e':
2991 case 'E':
2992 case 'f':
2993 case 'g':
2994 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002995 pbuf = formatbuf;
2996 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002997 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002998 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002999 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003000 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003001 fill = '0';
3002 break;
3003 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003004 pbuf = formatbuf;
3005 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003006 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003007 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003008 break;
3009 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003010 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003011 "unsupported format character '%c' (0x%x) "
3012 "at index %i",
3013 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003014 goto error;
3015 }
3016 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003017 if (*pbuf == '-' || *pbuf == '+') {
3018 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003019 len--;
3020 }
3021 else if (flags & F_SIGN)
3022 sign = '+';
3023 else if (flags & F_BLANK)
3024 sign = ' ';
3025 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003026 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003027 }
3028 if (width < len)
3029 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003030 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003031 reslen -= rescnt;
3032 rescnt = width + fmtcnt + 100;
3033 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003034 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003035 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003036 res = PyString_AsString(result)
3037 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003038 }
3039 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003040 if (fill != ' ')
3041 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003042 rescnt--;
3043 if (width > len)
3044 width--;
3045 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003046 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3047 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003048 assert(pbuf[1] == c);
3049 if (fill != ' ') {
3050 *res++ = *pbuf++;
3051 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003052 }
Tim Petersfff53252001-04-12 18:38:48 +00003053 rescnt -= 2;
3054 width -= 2;
3055 if (width < 0)
3056 width = 0;
3057 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003058 }
3059 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003060 do {
3061 --rescnt;
3062 *res++ = fill;
3063 } while (--width > len);
3064 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003065 if (fill == ' ') {
3066 if (sign)
3067 *res++ = sign;
3068 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003069 (c == 'x' || c == 'X')) {
3070 assert(pbuf[0] == '0');
3071 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003072 *res++ = *pbuf++;
3073 *res++ = *pbuf++;
3074 }
3075 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003076 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003077 res += len;
3078 rescnt -= len;
3079 while (--width >= len) {
3080 --rescnt;
3081 *res++ = ' ';
3082 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003083 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003084 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003085 "not all arguments converted");
3086 goto error;
3087 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003088 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003089 } /* '%' */
3090 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003091 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003092 PyErr_SetString(PyExc_TypeError,
3093 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003094 goto error;
3095 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003096 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003097 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003098 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003099 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003100 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003101
3102 unicode:
3103 if (args_owned) {
3104 Py_DECREF(args);
3105 args_owned = 0;
3106 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003107 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003108 if (PyTuple_Check(orig_args) && argidx > 0) {
3109 PyObject *v;
3110 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3111 v = PyTuple_New(n);
3112 if (v == NULL)
3113 goto error;
3114 while (--n >= 0) {
3115 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3116 Py_INCREF(w);
3117 PyTuple_SET_ITEM(v, n, w);
3118 }
3119 args = v;
3120 } else {
3121 Py_INCREF(orig_args);
3122 args = orig_args;
3123 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003124 args_owned = 1;
3125 /* Take what we have of the result and let the Unicode formatting
3126 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003127 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003128 if (_PyString_Resize(&result, rescnt))
3129 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003130 fmtcnt = PyString_GET_SIZE(format) - \
3131 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003132 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3133 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003134 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003135 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003136 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003137 if (v == NULL)
3138 goto error;
3139 /* Paste what we have (result) to what the Unicode formatting
3140 function returned (v) and return the result (or error) */
3141 w = PyUnicode_Concat(result, v);
3142 Py_DECREF(result);
3143 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003144 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003145 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003146
Guido van Rossume5372401993-03-16 12:15:04 +00003147 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003148 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003149 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003150 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003151 }
Guido van Rossume5372401993-03-16 12:15:04 +00003152 return NULL;
3153}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003154
3155
3156#ifdef INTERN_STRINGS
3157
Barry Warsaw4df762f2000-08-16 23:41:01 +00003158/* This dictionary will leak at PyString_Fini() time. That's acceptable
3159 * because PyString_Fini() specifically frees interned strings that are
3160 * only referenced by this dictionary. The CVS log entry for revision 2.45
3161 * says:
3162 *
3163 * Change the Fini function to only remove otherwise unreferenced
3164 * strings from the interned table. There are references in
3165 * hard-to-find static variables all over the interpreter, and it's not
3166 * worth trying to get rid of all those; but "uninterning" isn't fair
3167 * either and may cause subtle failures later -- so we have to keep them
3168 * in the interned table.
3169 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003170static PyObject *interned;
3171
3172void
Fred Drakeba096332000-07-09 07:04:36 +00003173PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003174{
3175 register PyStringObject *s = (PyStringObject *)(*p);
3176 PyObject *t;
3177 if (s == NULL || !PyString_Check(s))
3178 Py_FatalError("PyString_InternInPlace: strings only please!");
3179 if ((t = s->ob_sinterned) != NULL) {
3180 if (t == (PyObject *)s)
3181 return;
3182 Py_INCREF(t);
3183 *p = t;
3184 Py_DECREF(s);
3185 return;
3186 }
3187 if (interned == NULL) {
3188 interned = PyDict_New();
3189 if (interned == NULL)
3190 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003191 }
3192 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3193 Py_INCREF(t);
3194 *p = s->ob_sinterned = t;
3195 Py_DECREF(s);
3196 return;
3197 }
3198 t = (PyObject *)s;
3199 if (PyDict_SetItem(interned, t, t) == 0) {
3200 s->ob_sinterned = t;
3201 return;
3202 }
3203 PyErr_Clear();
3204}
3205
3206
3207PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003208PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003209{
3210 PyObject *s = PyString_FromString(cp);
3211 if (s == NULL)
3212 return NULL;
3213 PyString_InternInPlace(&s);
3214 return s;
3215}
3216
3217#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003218
3219void
Fred Drakeba096332000-07-09 07:04:36 +00003220PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003221{
3222 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003223 for (i = 0; i < UCHAR_MAX + 1; i++) {
3224 Py_XDECREF(characters[i]);
3225 characters[i] = NULL;
3226 }
3227#ifndef DONT_SHARE_SHORT_STRINGS
3228 Py_XDECREF(nullstring);
3229 nullstring = NULL;
3230#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003231#ifdef INTERN_STRINGS
3232 if (interned) {
3233 int pos, changed;
3234 PyObject *key, *value;
3235 do {
3236 changed = 0;
3237 pos = 0;
3238 while (PyDict_Next(interned, &pos, &key, &value)) {
3239 if (key->ob_refcnt == 2 && key == value) {
3240 PyDict_DelItem(interned, key);
3241 changed = 1;
3242 }
3243 }
3244 } while (changed);
3245 }
3246#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003247}
Barry Warsawa903ad982001-02-23 16:40:48 +00003248
3249#ifdef INTERN_STRINGS
3250void _Py_ReleaseInternedStrings(void)
3251{
3252 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003253 fprintf(stderr, "releasing interned strings\n");
3254 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003255 Py_DECREF(interned);
3256 interned = NULL;
3257 }
3258}
3259#endif /* INTERN_STRINGS */