blob: 6d25ddb4e792eb6503ace6e01b1879da58081421 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* String object implementation */
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000014
Guido van Rossum013142a1994-08-30 08:19:36 +000015#include <ctype.h>
16
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017#ifdef COUNT_ALLOCS
18int null_strings, one_strings;
19#endif
20
Guido van Rossum03093a21994-09-28 15:51:32 +000021#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000022#include <limits.h>
23#else
24#ifndef UCHAR_MAX
25#define UCHAR_MAX 255
26#endif
27#endif
28
Guido van Rossumc0b618a1997-05-02 03:12:38 +000029static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000030#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000031static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000032#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000033
34/*
35 Newsizedstringobject() and newstringobject() try in certain cases
36 to share string objects. When the size of the string is zero,
37 these routines always return a pointer to the same string object;
38 when the size is one, they return a pointer to an already existing
39 object if the contents of the string is known. For
40 newstringobject() this is always the case, for
41 newsizedstringobject() this is the case when the first argument in
42 not NULL.
43 A common practice to allocate a string and then fill it in or
44 change it must be done carefully. It is only allowed to change the
45 contents of the string if the obect was gotten from
46 newsizedstringobject() with a NULL first argument, because in the
47 future these routines may try to do even more sharing of objects.
48*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000049PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000050PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000051{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000053#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 if (size == 0 && (op = nullstring) != NULL) {
55#ifdef COUNT_ALLOCS
56 null_strings++;
57#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000058 Py_INCREF(op);
59 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000060 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 if (size == 1 && str != NULL &&
62 (op = characters[*str & UCHAR_MAX]) != NULL)
63 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064#ifdef COUNT_ALLOCS
65 one_strings++;
66#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067 Py_INCREF(op);
68 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000070#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000071
72 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000074 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078#ifdef CACHE_HASH
79 op->ob_shash = -1;
80#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000081#ifdef INTERN_STRINGS
82 op->ob_sinterned = NULL;
83#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000087#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
89 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
92 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000095#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097}
98
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000100PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000101{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000102 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000103 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000104 if (size > INT_MAX) {
105 PyErr_SetString(PyExc_OverflowError,
106 "string is too long for a Python string");
107 return NULL;
108 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000109#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000110 if (size == 0 && (op = nullstring) != NULL) {
111#ifdef COUNT_ALLOCS
112 null_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
117 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
118#ifdef COUNT_ALLOCS
119 one_strings++;
120#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 Py_INCREF(op);
122 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000124#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000125
126 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000127 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000129 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132#ifdef CACHE_HASH
133 op->ob_shash = -1;
134#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000135#ifdef INTERN_STRINGS
136 op->ob_sinterned = NULL;
137#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000138 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000139#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 if (size == 0) {
141 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 } else if (size == 1) {
144 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000145 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000147#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000149}
150
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000151PyObject *PyString_Decode(const char *s,
152 int size,
153 const char *encoding,
154 const char *errors)
155{
156 PyObject *buffer = NULL, *str;
157
158 if (encoding == NULL)
159 encoding = PyUnicode_GetDefaultEncoding();
160
161 /* Decode via the codec registry */
162 buffer = PyBuffer_FromMemory((void *)s, size);
163 if (buffer == NULL)
164 goto onError;
165 str = PyCodec_Decode(buffer, encoding, errors);
166 if (str == NULL)
167 goto onError;
168 /* Convert Unicode to a string using the default encoding */
169 if (PyUnicode_Check(str)) {
170 PyObject *temp = str;
171 str = PyUnicode_AsEncodedString(str, NULL, NULL);
172 Py_DECREF(temp);
173 if (str == NULL)
174 goto onError;
175 }
176 if (!PyString_Check(str)) {
177 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000178 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000179 str->ob_type->tp_name);
180 Py_DECREF(str);
181 goto onError;
182 }
183 Py_DECREF(buffer);
184 return str;
185
186 onError:
187 Py_XDECREF(buffer);
188 return NULL;
189}
190
191PyObject *PyString_Encode(const char *s,
192 int size,
193 const char *encoding,
194 const char *errors)
195{
196 PyObject *v, *str;
197
198 str = PyString_FromStringAndSize(s, size);
199 if (str == NULL)
200 return NULL;
201 v = PyString_AsEncodedString(str, encoding, errors);
202 Py_DECREF(str);
203 return v;
204}
205
206PyObject *PyString_AsEncodedString(PyObject *str,
207 const char *encoding,
208 const char *errors)
209{
210 PyObject *v;
211
212 if (!PyString_Check(str)) {
213 PyErr_BadArgument();
214 goto onError;
215 }
216
217 if (encoding == NULL)
218 encoding = PyUnicode_GetDefaultEncoding();
219
220 /* Encode via the codec registry */
221 v = PyCodec_Encode(str, encoding, errors);
222 if (v == NULL)
223 goto onError;
224 /* Convert Unicode to a string using the default encoding */
225 if (PyUnicode_Check(v)) {
226 PyObject *temp = v;
227 v = PyUnicode_AsEncodedString(v, NULL, NULL);
228 Py_DECREF(temp);
229 if (v == NULL)
230 goto onError;
231 }
232 if (!PyString_Check(v)) {
233 PyErr_Format(PyExc_TypeError,
234 "encoder did not return a string object (type=%.400s)",
235 v->ob_type->tp_name);
236 Py_DECREF(v);
237 goto onError;
238 }
239 return v;
240
241 onError:
242 return NULL;
243}
244
Guido van Rossum234f9421993-06-17 12:35:49 +0000245static void
Fred Drakeba096332000-07-09 07:04:36 +0000246string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000247{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000248 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000249}
250
Guido van Rossumd7047b31995-01-02 19:07:15 +0000251int
Fred Drakeba096332000-07-09 07:04:36 +0000252PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000253{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000254 if (!PyString_Check(op)) {
255 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000256 return -1;
257 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000258 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259}
260
261/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000262PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000263{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000264 if (!PyString_Check(op)) {
265 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000266 return NULL;
267 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000268 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000269}
270
271/* Methods */
272
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000273static int
Fred Drakeba096332000-07-09 07:04:36 +0000274string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000275{
276 int i;
277 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000278 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000279 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000280 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000281 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000282 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000283 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000284
Thomas Wouters7e474022000-07-16 12:04:32 +0000285 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000286 quote = '\'';
287 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
288 quote = '"';
289
290 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000291 for (i = 0; i < op->ob_size; i++) {
292 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000293 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000294 fprintf(fp, "\\%c", c);
295 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000296 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000297 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000298 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000299 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000300 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000301 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000302}
303
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000304static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000305string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000306{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000307 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
308 PyObject *v;
309 if (newsize > INT_MAX) {
310 PyErr_SetString(PyExc_OverflowError,
311 "string is too large to make repr");
312 }
313 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000314 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000315 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000316 }
317 else {
318 register int i;
319 register char c;
320 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000321 int quote;
322
Thomas Wouters7e474022000-07-16 12:04:32 +0000323 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000324 quote = '\'';
325 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
326 quote = '"';
327
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000328 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000329 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000330 for (i = 0; i < op->ob_size; i++) {
331 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000332 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000333 *p++ = '\\', *p++ = c;
334 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000335 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000336 while (*p != '\0')
337 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 }
339 else
340 *p++ = c;
341 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000342 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000343 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000344 _PyString_Resize(
345 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000346 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000347 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000348}
349
350static int
Fred Drakeba096332000-07-09 07:04:36 +0000351string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000352{
353 return a->ob_size;
354}
355
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000356static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000357string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000358{
359 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000360 register PyStringObject *op;
361 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000362 if (PyUnicode_Check(bb))
363 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000364 PyErr_Format(PyExc_TypeError,
365 "cannot add type \"%.200s\" to string",
366 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000367 return NULL;
368 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000369#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000370 /* Optimize cases with empty left or right operand */
371 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000372 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000373 return bb;
374 }
375 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000376 Py_INCREF(a);
377 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000378 }
379 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000380 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000381 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000382 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000383 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000384 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000385 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000386#ifdef CACHE_HASH
387 op->ob_shash = -1;
388#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000389#ifdef INTERN_STRINGS
390 op->ob_sinterned = NULL;
391#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000392 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
393 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
394 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000395 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000396#undef b
397}
398
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000399static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000400string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000401{
402 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000403 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000404 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000405 if (n < 0)
406 n = 0;
407 size = a->ob_size * n;
408 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000409 Py_INCREF(a);
410 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000411 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000412 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000413 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000414 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000415 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000416 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000417 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000418#ifdef CACHE_HASH
419 op->ob_shash = -1;
420#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000421#ifdef INTERN_STRINGS
422 op->ob_sinterned = NULL;
423#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000424 for (i = 0; i < size; i += a->ob_size)
425 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
426 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000427 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000428}
429
430/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
431
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000432static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000433string_slice(register PyStringObject *a, register int i, register int j)
434 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000435{
436 if (i < 0)
437 i = 0;
438 if (j < 0)
439 j = 0; /* Avoid signed/unsigned bug in next line */
440 if (j > a->ob_size)
441 j = a->ob_size;
442 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000443 Py_INCREF(a);
444 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000445 }
446 if (j < i)
447 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000448 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000449}
450
Guido van Rossum9284a572000-03-07 15:53:43 +0000451static int
Fred Drakeba096332000-07-09 07:04:36 +0000452string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000453{
454 register char *s, *end;
455 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000456 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000457 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000458 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000459 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000460 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000461 return -1;
462 }
463 c = PyString_AsString(el)[0];
464 s = PyString_AsString(a);
465 end = s + PyString_Size(a);
466 while (s < end) {
467 if (c == *s++)
468 return 1;
469 }
470 return 0;
471}
472
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000473static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000474string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000475{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000476 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000477 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000478 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000479 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000480 return NULL;
481 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000482 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000483 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000484#ifdef COUNT_ALLOCS
485 if (v != NULL)
486 one_strings++;
487#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000488 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000489 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000490 if (v == NULL)
491 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000492 characters[c] = (PyStringObject *) v;
493 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000494 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000495 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000496 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000497}
498
499static int
Fred Drakeba096332000-07-09 07:04:36 +0000500string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000501{
Guido van Rossum253919f1991-02-13 23:18:39 +0000502 int len_a = a->ob_size, len_b = b->ob_size;
503 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000504 int cmp;
505 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000506 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000507 if (cmp == 0)
508 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
509 if (cmp != 0)
510 return cmp;
511 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000512 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000513}
514
Guido van Rossum9bfef441993-03-29 10:43:31 +0000515static long
Fred Drakeba096332000-07-09 07:04:36 +0000516string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000517{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000518 register int len;
519 register unsigned char *p;
520 register long x;
521
522#ifdef CACHE_HASH
523 if (a->ob_shash != -1)
524 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000525#ifdef INTERN_STRINGS
526 if (a->ob_sinterned != NULL)
527 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000528 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000529#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000530#endif
531 len = a->ob_size;
532 p = (unsigned char *) a->ob_sval;
533 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000534 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000535 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000536 x ^= a->ob_size;
537 if (x == -1)
538 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000539#ifdef CACHE_HASH
540 a->ob_shash = x;
541#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000542 return x;
543}
544
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000545static int
Fred Drakeba096332000-07-09 07:04:36 +0000546string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000547{
548 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000549 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000550 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000551 return -1;
552 }
553 *ptr = (void *)self->ob_sval;
554 return self->ob_size;
555}
556
557static int
Fred Drakeba096332000-07-09 07:04:36 +0000558string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000559{
Guido van Rossum045e6881997-09-08 18:30:11 +0000560 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000561 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000562 return -1;
563}
564
565static int
Fred Drakeba096332000-07-09 07:04:36 +0000566string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000567{
568 if ( lenp )
569 *lenp = self->ob_size;
570 return 1;
571}
572
Guido van Rossum1db70701998-10-08 02:18:52 +0000573static int
Fred Drakeba096332000-07-09 07:04:36 +0000574string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000575{
576 if ( index != 0 ) {
577 PyErr_SetString(PyExc_SystemError,
578 "accessing non-existent string segment");
579 return -1;
580 }
581 *ptr = self->ob_sval;
582 return self->ob_size;
583}
584
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000585static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000586 (inquiry)string_length, /*sq_length*/
587 (binaryfunc)string_concat, /*sq_concat*/
588 (intargfunc)string_repeat, /*sq_repeat*/
589 (intargfunc)string_item, /*sq_item*/
590 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000591 0, /*sq_ass_item*/
592 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000593 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000594};
595
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000596static PyBufferProcs string_as_buffer = {
597 (getreadbufferproc)string_buffer_getreadbuf,
598 (getwritebufferproc)string_buffer_getwritebuf,
599 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000600 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000601};
602
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000603
604
605#define LEFTSTRIP 0
606#define RIGHTSTRIP 1
607#define BOTHSTRIP 2
608
609
610static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000611split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000612{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000613 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000614 PyObject* item;
615 PyObject *list = PyList_New(0);
616
617 if (list == NULL)
618 return NULL;
619
Guido van Rossum4c08d552000-03-10 22:55:18 +0000620 for (i = j = 0; i < len; ) {
621 while (i < len && isspace(Py_CHARMASK(s[i])))
622 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000623 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000624 while (i < len && !isspace(Py_CHARMASK(s[i])))
625 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000626 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000627 if (maxsplit-- <= 0)
628 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000629 item = PyString_FromStringAndSize(s+j, (int)(i-j));
630 if (item == NULL)
631 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000632 err = PyList_Append(list, item);
633 Py_DECREF(item);
634 if (err < 0)
635 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000636 while (i < len && isspace(Py_CHARMASK(s[i])))
637 i++;
638 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000639 }
640 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000641 if (j < len) {
642 item = PyString_FromStringAndSize(s+j, (int)(len - j));
643 if (item == NULL)
644 goto finally;
645 err = PyList_Append(list, item);
646 Py_DECREF(item);
647 if (err < 0)
648 goto finally;
649 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000650 return list;
651 finally:
652 Py_DECREF(list);
653 return NULL;
654}
655
656
657static char split__doc__[] =
658"S.split([sep [,maxsplit]]) -> list of strings\n\
659\n\
660Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000661delimiter string. If maxsplit is given, at most maxsplit\n\
662splits are done. If sep is not specified, any whitespace string\n\
663is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000664
665static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000666string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000667{
668 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000669 int maxsplit = -1;
670 const char *s = PyString_AS_STRING(self), *sub;
671 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000672
Guido van Rossum4c08d552000-03-10 22:55:18 +0000673 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000674 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000675 if (maxsplit < 0)
676 maxsplit = INT_MAX;
677 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000678 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000679 if (PyString_Check(subobj)) {
680 sub = PyString_AS_STRING(subobj);
681 n = PyString_GET_SIZE(subobj);
682 }
683 else if (PyUnicode_Check(subobj))
684 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
685 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
686 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000687 if (n == 0) {
688 PyErr_SetString(PyExc_ValueError, "empty separator");
689 return NULL;
690 }
691
692 list = PyList_New(0);
693 if (list == NULL)
694 return NULL;
695
696 i = j = 0;
697 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000698 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000699 if (maxsplit-- <= 0)
700 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000701 item = PyString_FromStringAndSize(s+j, (int)(i-j));
702 if (item == NULL)
703 goto fail;
704 err = PyList_Append(list, item);
705 Py_DECREF(item);
706 if (err < 0)
707 goto fail;
708 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000709 }
710 else
711 i++;
712 }
713 item = PyString_FromStringAndSize(s+j, (int)(len-j));
714 if (item == NULL)
715 goto fail;
716 err = PyList_Append(list, item);
717 Py_DECREF(item);
718 if (err < 0)
719 goto fail;
720
721 return list;
722
723 fail:
724 Py_DECREF(list);
725 return NULL;
726}
727
728
729static char join__doc__[] =
730"S.join(sequence) -> string\n\
731\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000732Return a string which is the concatenation of the strings in the\n\
733sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000734
735static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000736string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000737{
738 char *sep = PyString_AS_STRING(self);
739 int seplen = PyString_GET_SIZE(self);
740 PyObject *res = NULL;
741 int reslen = 0;
742 char *p;
743 int seqlen = 0;
744 int sz = 100;
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000745 int i, slen, sz_incr;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000746 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000747
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000748 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000749 return NULL;
750
Barry Warsaw771d0672000-07-11 04:58:12 +0000751 if (!(seq = PySequence_Fast(orig, ""))) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000752 if (PyErr_ExceptionMatches(PyExc_TypeError))
753 PyErr_Format(PyExc_TypeError,
754 "sequence expected, %.80s found",
755 orig->ob_type->tp_name);
756 return NULL;
757 }
Barry Warsaw771d0672000-07-11 04:58:12 +0000758 /* From here on out, errors go through finally: for proper
759 * reference count manipulations.
760 */
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000761 seqlen = PySequence_Size(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000762 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000763 item = PySequence_Fast_GET_ITEM(seq, 0);
764 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000765 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000766 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000767 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000768
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000769 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
Barry Warsaw771d0672000-07-11 04:58:12 +0000770 goto finally;
771
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000772 p = PyString_AS_STRING(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000773
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000774 for (i = 0; i < seqlen; i++) {
775 item = PySequence_Fast_GET_ITEM(seq, i);
776 if (!PyString_Check(item)){
777 if (PyUnicode_Check(item)) {
778 Py_DECREF(res);
Barry Warsaw771d0672000-07-11 04:58:12 +0000779 Py_DECREF(seq);
780 return PyUnicode_Join((PyObject *)self, seq);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000781 }
782 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000783 "sequence item %i: expected string,"
784 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000785 i, item->ob_type->tp_name);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000786 goto finally;
787 }
788 slen = PyString_GET_SIZE(item);
789 while (reslen + slen + seplen >= sz) {
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000790 /* at least double the size of the string */
791 sz_incr = slen + seplen > sz ? slen + seplen : sz;
792 if (_PyString_Resize(&res, sz + sz_incr)) {
Barry Warsawbf325832000-03-06 14:52:18 +0000793 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000794 }
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000795 sz += sz_incr;
796 p = PyString_AS_STRING(res) + reslen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000797 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000798 if (i > 0) {
799 memcpy(p, sep, seplen);
800 p += seplen;
801 reslen += seplen;
802 }
803 memcpy(p, PyString_AS_STRING(item), slen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000804 p += slen;
805 reslen += slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000806 }
807 if (_PyString_Resize(&res, reslen))
808 goto finally;
Jeremy Hylton49048292000-07-11 03:28:17 +0000809 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000810 return res;
811
812 finally:
Jeremy Hylton49048292000-07-11 03:28:17 +0000813 Py_DECREF(seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000814 Py_XDECREF(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000815 return NULL;
816}
817
818
819
820static long
Fred Drakeba096332000-07-09 07:04:36 +0000821string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000822{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000823 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000824 int len = PyString_GET_SIZE(self);
825 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000826 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000827
Guido van Rossumc6821402000-05-08 14:08:05 +0000828 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
829 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000830 return -2;
831 if (PyString_Check(subobj)) {
832 sub = PyString_AS_STRING(subobj);
833 n = PyString_GET_SIZE(subobj);
834 }
835 else if (PyUnicode_Check(subobj))
836 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
837 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000838 return -2;
839
840 if (last > len)
841 last = len;
842 if (last < 0)
843 last += len;
844 if (last < 0)
845 last = 0;
846 if (i < 0)
847 i += len;
848 if (i < 0)
849 i = 0;
850
Guido van Rossum4c08d552000-03-10 22:55:18 +0000851 if (dir > 0) {
852 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000853 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000854 last -= n;
855 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000856 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000857 return (long)i;
858 }
859 else {
860 int j;
861
862 if (n == 0 && i <= last)
863 return (long)last;
864 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000865 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000866 return (long)j;
867 }
868
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000869 return -1;
870}
871
872
873static char find__doc__[] =
874"S.find(sub [,start [,end]]) -> int\n\
875\n\
876Return the lowest index in S where substring sub is found,\n\
877such that sub is contained within s[start,end]. Optional\n\
878arguments start and end are interpreted as in slice notation.\n\
879\n\
880Return -1 on failure.";
881
882static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000883string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000884{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000885 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000886 if (result == -2)
887 return NULL;
888 return PyInt_FromLong(result);
889}
890
891
892static char index__doc__[] =
893"S.index(sub [,start [,end]]) -> int\n\
894\n\
895Like S.find() but raise ValueError when the substring is not found.";
896
897static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000898string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000899{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000900 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000901 if (result == -2)
902 return NULL;
903 if (result == -1) {
904 PyErr_SetString(PyExc_ValueError,
905 "substring not found in string.index");
906 return NULL;
907 }
908 return PyInt_FromLong(result);
909}
910
911
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000912static char rfind__doc__[] =
913"S.rfind(sub [,start [,end]]) -> int\n\
914\n\
915Return the highest index in S where substring sub is found,\n\
916such that sub is contained within s[start,end]. Optional\n\
917arguments start and end are interpreted as in slice notation.\n\
918\n\
919Return -1 on failure.";
920
921static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000922string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000923{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000924 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000925 if (result == -2)
926 return NULL;
927 return PyInt_FromLong(result);
928}
929
930
931static char rindex__doc__[] =
932"S.rindex(sub [,start [,end]]) -> int\n\
933\n\
934Like S.rfind() but raise ValueError when the substring is not found.";
935
936static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000937string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000938{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000939 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000940 if (result == -2)
941 return NULL;
942 if (result == -1) {
943 PyErr_SetString(PyExc_ValueError,
944 "substring not found in string.rindex");
945 return NULL;
946 }
947 return PyInt_FromLong(result);
948}
949
950
951static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000952do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000953{
954 char *s = PyString_AS_STRING(self);
955 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000956
Guido van Rossum43713e52000-02-29 13:59:29 +0000957 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000958 return NULL;
959
960 i = 0;
961 if (striptype != RIGHTSTRIP) {
962 while (i < len && isspace(Py_CHARMASK(s[i]))) {
963 i++;
964 }
965 }
966
967 j = len;
968 if (striptype != LEFTSTRIP) {
969 do {
970 j--;
971 } while (j >= i && isspace(Py_CHARMASK(s[j])));
972 j++;
973 }
974
975 if (i == 0 && j == len) {
976 Py_INCREF(self);
977 return (PyObject*)self;
978 }
979 else
980 return PyString_FromStringAndSize(s+i, j-i);
981}
982
983
984static char strip__doc__[] =
985"S.strip() -> string\n\
986\n\
987Return a copy of the string S with leading and trailing\n\
988whitespace removed.";
989
990static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000991string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000992{
993 return do_strip(self, args, BOTHSTRIP);
994}
995
996
997static char lstrip__doc__[] =
998"S.lstrip() -> string\n\
999\n\
1000Return a copy of the string S with leading whitespace removed.";
1001
1002static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001003string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001004{
1005 return do_strip(self, args, LEFTSTRIP);
1006}
1007
1008
1009static char rstrip__doc__[] =
1010"S.rstrip() -> string\n\
1011\n\
1012Return a copy of the string S with trailing whitespace removed.";
1013
1014static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001015string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001016{
1017 return do_strip(self, args, RIGHTSTRIP);
1018}
1019
1020
1021static char lower__doc__[] =
1022"S.lower() -> string\n\
1023\n\
1024Return a copy of the string S converted to lowercase.";
1025
1026static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001027string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001028{
1029 char *s = PyString_AS_STRING(self), *s_new;
1030 int i, n = PyString_GET_SIZE(self);
1031 PyObject *new;
1032
Guido van Rossum43713e52000-02-29 13:59:29 +00001033 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001034 return NULL;
1035 new = PyString_FromStringAndSize(NULL, n);
1036 if (new == NULL)
1037 return NULL;
1038 s_new = PyString_AsString(new);
1039 for (i = 0; i < n; i++) {
1040 int c = Py_CHARMASK(*s++);
1041 if (isupper(c)) {
1042 *s_new = tolower(c);
1043 } else
1044 *s_new = c;
1045 s_new++;
1046 }
1047 return new;
1048}
1049
1050
1051static char upper__doc__[] =
1052"S.upper() -> string\n\
1053\n\
1054Return a copy of the string S converted to uppercase.";
1055
1056static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001057string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001058{
1059 char *s = PyString_AS_STRING(self), *s_new;
1060 int i, n = PyString_GET_SIZE(self);
1061 PyObject *new;
1062
Guido van Rossum43713e52000-02-29 13:59:29 +00001063 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001064 return NULL;
1065 new = PyString_FromStringAndSize(NULL, n);
1066 if (new == NULL)
1067 return NULL;
1068 s_new = PyString_AsString(new);
1069 for (i = 0; i < n; i++) {
1070 int c = Py_CHARMASK(*s++);
1071 if (islower(c)) {
1072 *s_new = toupper(c);
1073 } else
1074 *s_new = c;
1075 s_new++;
1076 }
1077 return new;
1078}
1079
1080
Guido van Rossum4c08d552000-03-10 22:55:18 +00001081static char title__doc__[] =
1082"S.title() -> string\n\
1083\n\
1084Return a titlecased version of S, i.e. words start with uppercase\n\
1085characters, all remaining cased characters have lowercase.";
1086
1087static PyObject*
1088string_title(PyUnicodeObject *self, PyObject *args)
1089{
1090 char *s = PyString_AS_STRING(self), *s_new;
1091 int i, n = PyString_GET_SIZE(self);
1092 int previous_is_cased = 0;
1093 PyObject *new;
1094
1095 if (!PyArg_ParseTuple(args, ":title"))
1096 return NULL;
1097 new = PyString_FromStringAndSize(NULL, n);
1098 if (new == NULL)
1099 return NULL;
1100 s_new = PyString_AsString(new);
1101 for (i = 0; i < n; i++) {
1102 int c = Py_CHARMASK(*s++);
1103 if (islower(c)) {
1104 if (!previous_is_cased)
1105 c = toupper(c);
1106 previous_is_cased = 1;
1107 } else if (isupper(c)) {
1108 if (previous_is_cased)
1109 c = tolower(c);
1110 previous_is_cased = 1;
1111 } else
1112 previous_is_cased = 0;
1113 *s_new++ = c;
1114 }
1115 return new;
1116}
1117
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001118static char capitalize__doc__[] =
1119"S.capitalize() -> string\n\
1120\n\
1121Return a copy of the string S with only its first character\n\
1122capitalized.";
1123
1124static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001125string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001126{
1127 char *s = PyString_AS_STRING(self), *s_new;
1128 int i, n = PyString_GET_SIZE(self);
1129 PyObject *new;
1130
Guido van Rossum43713e52000-02-29 13:59:29 +00001131 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001132 return NULL;
1133 new = PyString_FromStringAndSize(NULL, n);
1134 if (new == NULL)
1135 return NULL;
1136 s_new = PyString_AsString(new);
1137 if (0 < n) {
1138 int c = Py_CHARMASK(*s++);
1139 if (islower(c))
1140 *s_new = toupper(c);
1141 else
1142 *s_new = c;
1143 s_new++;
1144 }
1145 for (i = 1; i < n; i++) {
1146 int c = Py_CHARMASK(*s++);
1147 if (isupper(c))
1148 *s_new = tolower(c);
1149 else
1150 *s_new = c;
1151 s_new++;
1152 }
1153 return new;
1154}
1155
1156
1157static char count__doc__[] =
1158"S.count(sub[, start[, end]]) -> int\n\
1159\n\
1160Return the number of occurrences of substring sub in string\n\
1161S[start:end]. Optional arguments start and end are\n\
1162interpreted as in slice notation.";
1163
1164static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001165string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001166{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001167 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001168 int len = PyString_GET_SIZE(self), n;
1169 int i = 0, last = INT_MAX;
1170 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001171 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001172
Guido van Rossumc6821402000-05-08 14:08:05 +00001173 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1174 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001175 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001176
Guido van Rossum4c08d552000-03-10 22:55:18 +00001177 if (PyString_Check(subobj)) {
1178 sub = PyString_AS_STRING(subobj);
1179 n = PyString_GET_SIZE(subobj);
1180 }
1181 else if (PyUnicode_Check(subobj))
1182 return PyInt_FromLong(
1183 PyUnicode_Count((PyObject *)self, subobj, i, last));
1184 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1185 return NULL;
1186
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001187 if (last > len)
1188 last = len;
1189 if (last < 0)
1190 last += len;
1191 if (last < 0)
1192 last = 0;
1193 if (i < 0)
1194 i += len;
1195 if (i < 0)
1196 i = 0;
1197 m = last + 1 - n;
1198 if (n == 0)
1199 return PyInt_FromLong((long) (m-i));
1200
1201 r = 0;
1202 while (i < m) {
1203 if (!memcmp(s+i, sub, n)) {
1204 r++;
1205 i += n;
1206 } else {
1207 i++;
1208 }
1209 }
1210 return PyInt_FromLong((long) r);
1211}
1212
1213
1214static char swapcase__doc__[] =
1215"S.swapcase() -> string\n\
1216\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001217Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001218converted to lowercase and vice versa.";
1219
1220static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001221string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001222{
1223 char *s = PyString_AS_STRING(self), *s_new;
1224 int i, n = PyString_GET_SIZE(self);
1225 PyObject *new;
1226
Guido van Rossum43713e52000-02-29 13:59:29 +00001227 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001228 return NULL;
1229 new = PyString_FromStringAndSize(NULL, n);
1230 if (new == NULL)
1231 return NULL;
1232 s_new = PyString_AsString(new);
1233 for (i = 0; i < n; i++) {
1234 int c = Py_CHARMASK(*s++);
1235 if (islower(c)) {
1236 *s_new = toupper(c);
1237 }
1238 else if (isupper(c)) {
1239 *s_new = tolower(c);
1240 }
1241 else
1242 *s_new = c;
1243 s_new++;
1244 }
1245 return new;
1246}
1247
1248
1249static char translate__doc__[] =
1250"S.translate(table [,deletechars]) -> string\n\
1251\n\
1252Return a copy of the string S, where all characters occurring\n\
1253in the optional argument deletechars are removed, and the\n\
1254remaining characters have been mapped through the given\n\
1255translation table, which must be a string of length 256.";
1256
1257static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001258string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001259{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001260 register char *input, *output;
1261 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001262 register int i, c, changed = 0;
1263 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001264 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001265 int inlen, tablen, dellen = 0;
1266 PyObject *result;
1267 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001268 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001269
Guido van Rossum4c08d552000-03-10 22:55:18 +00001270 if (!PyArg_ParseTuple(args, "O|O:translate",
1271 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001272 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001273
1274 if (PyString_Check(tableobj)) {
1275 table1 = PyString_AS_STRING(tableobj);
1276 tablen = PyString_GET_SIZE(tableobj);
1277 }
1278 else if (PyUnicode_Check(tableobj)) {
1279 /* Unicode .translate() does not support the deletechars
1280 parameter; instead a mapping to None will cause characters
1281 to be deleted. */
1282 if (delobj != NULL) {
1283 PyErr_SetString(PyExc_TypeError,
1284 "deletions are implemented differently for unicode");
1285 return NULL;
1286 }
1287 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1288 }
1289 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001291
1292 if (delobj != NULL) {
1293 if (PyString_Check(delobj)) {
1294 del_table = PyString_AS_STRING(delobj);
1295 dellen = PyString_GET_SIZE(delobj);
1296 }
1297 else if (PyUnicode_Check(delobj)) {
1298 PyErr_SetString(PyExc_TypeError,
1299 "deletions are implemented differently for unicode");
1300 return NULL;
1301 }
1302 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1303 return NULL;
1304
1305 if (tablen != 256) {
1306 PyErr_SetString(PyExc_ValueError,
1307 "translation table must be 256 characters long");
1308 return NULL;
1309 }
1310 }
1311 else {
1312 del_table = NULL;
1313 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001314 }
1315
1316 table = table1;
1317 inlen = PyString_Size(input_obj);
1318 result = PyString_FromStringAndSize((char *)NULL, inlen);
1319 if (result == NULL)
1320 return NULL;
1321 output_start = output = PyString_AsString(result);
1322 input = PyString_AsString(input_obj);
1323
1324 if (dellen == 0) {
1325 /* If no deletions are required, use faster code */
1326 for (i = inlen; --i >= 0; ) {
1327 c = Py_CHARMASK(*input++);
1328 if (Py_CHARMASK((*output++ = table[c])) != c)
1329 changed = 1;
1330 }
1331 if (changed)
1332 return result;
1333 Py_DECREF(result);
1334 Py_INCREF(input_obj);
1335 return input_obj;
1336 }
1337
1338 for (i = 0; i < 256; i++)
1339 trans_table[i] = Py_CHARMASK(table[i]);
1340
1341 for (i = 0; i < dellen; i++)
1342 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1343
1344 for (i = inlen; --i >= 0; ) {
1345 c = Py_CHARMASK(*input++);
1346 if (trans_table[c] != -1)
1347 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1348 continue;
1349 changed = 1;
1350 }
1351 if (!changed) {
1352 Py_DECREF(result);
1353 Py_INCREF(input_obj);
1354 return input_obj;
1355 }
1356 /* Fix the size of the resulting string */
1357 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1358 return NULL;
1359 return result;
1360}
1361
1362
1363/* What follows is used for implementing replace(). Perry Stoll. */
1364
1365/*
1366 mymemfind
1367
1368 strstr replacement for arbitrary blocks of memory.
1369
Barry Warsaw51ac5802000-03-20 16:36:48 +00001370 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001371 contents of memory pointed to by PAT. Returns the index into MEM if
1372 found, or -1 if not found. If len of PAT is greater than length of
1373 MEM, the function returns -1.
1374*/
1375static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001376mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001377{
1378 register int ii;
1379
1380 /* pattern can not occur in the last pat_len-1 chars */
1381 len -= pat_len;
1382
1383 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001384 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385 return ii;
1386 }
1387 }
1388 return -1;
1389}
1390
1391/*
1392 mymemcnt
1393
1394 Return the number of distinct times PAT is found in MEM.
1395 meaning mem=1111 and pat==11 returns 2.
1396 mem=11111 and pat==11 also return 2.
1397 */
1398static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001399mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400{
1401 register int offset = 0;
1402 int nfound = 0;
1403
1404 while (len >= 0) {
1405 offset = mymemfind(mem, len, pat, pat_len);
1406 if (offset == -1)
1407 break;
1408 mem += offset + pat_len;
1409 len -= offset + pat_len;
1410 nfound++;
1411 }
1412 return nfound;
1413}
1414
1415/*
1416 mymemreplace
1417
Thomas Wouters7e474022000-07-16 12:04:32 +00001418 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419 replaced with SUB.
1420
Thomas Wouters7e474022000-07-16 12:04:32 +00001421 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422 of PAT in STR, then the original string is returned. Otherwise, a new
1423 string is allocated here and returned.
1424
1425 on return, out_len is:
1426 the length of output string, or
1427 -1 if the input string is returned, or
1428 unchanged if an error occurs (no memory).
1429
1430 return value is:
1431 the new string allocated locally, or
1432 NULL if an error occurred.
1433*/
1434static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001435mymemreplace(const char *str, int len, /* input string */
1436 const char *pat, int pat_len, /* pattern string to find */
1437 const char *sub, int sub_len, /* substitution string */
1438 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001439 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001440{
1441 char *out_s;
1442 char *new_s;
1443 int nfound, offset, new_len;
1444
1445 if (len == 0 || pat_len > len)
1446 goto return_same;
1447
1448 /* find length of output string */
1449 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001450 if (count < 0)
1451 count = INT_MAX;
1452 else if (nfound > count)
1453 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454 if (nfound == 0)
1455 goto return_same;
1456 new_len = len + nfound*(sub_len - pat_len);
1457
Guido van Rossumb18618d2000-05-03 23:44:39 +00001458 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459 if (new_s == NULL) return NULL;
1460
1461 *out_len = new_len;
1462 out_s = new_s;
1463
1464 while (len > 0) {
1465 /* find index of next instance of pattern */
1466 offset = mymemfind(str, len, pat, pat_len);
1467 /* if not found, break out of loop */
1468 if (offset == -1) break;
1469
1470 /* copy non matching part of input string */
1471 memcpy(new_s, str, offset); /* copy part of str before pat */
1472 str += offset + pat_len; /* move str past pattern */
1473 len -= offset + pat_len; /* reduce length of str remaining */
1474
1475 /* copy substitute into the output string */
1476 new_s += offset; /* move new_s to dest for sub string */
1477 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1478 new_s += sub_len; /* offset new_s past sub string */
1479
1480 /* break when we've done count replacements */
1481 if (--count == 0) break;
1482 }
1483 /* copy any remaining values into output string */
1484 if (len > 0)
1485 memcpy(new_s, str, len);
1486 return out_s;
1487
1488 return_same:
1489 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001490 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491}
1492
1493
1494static char replace__doc__[] =
1495"S.replace (old, new[, maxsplit]) -> string\n\
1496\n\
1497Return a copy of string S with all occurrences of substring\n\
1498old replaced by new. If the optional argument maxsplit is\n\
1499given, only the first maxsplit occurrences are replaced.";
1500
1501static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001502string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001503{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001504 const char *str = PyString_AS_STRING(self), *sub, *repl;
1505 char *new_s;
1506 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1507 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001508 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001509 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001510
Guido van Rossum4c08d552000-03-10 22:55:18 +00001511 if (!PyArg_ParseTuple(args, "OO|i:replace",
1512 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001513 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001514
1515 if (PyString_Check(subobj)) {
1516 sub = PyString_AS_STRING(subobj);
1517 sub_len = PyString_GET_SIZE(subobj);
1518 }
1519 else if (PyUnicode_Check(subobj))
1520 return PyUnicode_Replace((PyObject *)self,
1521 subobj, replobj, count);
1522 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1523 return NULL;
1524
1525 if (PyString_Check(replobj)) {
1526 repl = PyString_AS_STRING(replobj);
1527 repl_len = PyString_GET_SIZE(replobj);
1528 }
1529 else if (PyUnicode_Check(replobj))
1530 return PyUnicode_Replace((PyObject *)self,
1531 subobj, replobj, count);
1532 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1533 return NULL;
1534
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001535 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001536 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001537 return NULL;
1538 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001539 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001540 if (new_s == NULL) {
1541 PyErr_NoMemory();
1542 return NULL;
1543 }
1544 if (out_len == -1) {
1545 /* we're returning another reference to self */
1546 new = (PyObject*)self;
1547 Py_INCREF(new);
1548 }
1549 else {
1550 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001551 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001552 }
1553 return new;
1554}
1555
1556
1557static char startswith__doc__[] =
1558"S.startswith(prefix[, start[, end]]) -> int\n\
1559\n\
1560Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1561optional start, test S beginning at that position. With optional end, stop\n\
1562comparing S at that position.";
1563
1564static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001565string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001566{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001567 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001569 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570 int plen;
1571 int start = 0;
1572 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001573 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001574
Guido van Rossumc6821402000-05-08 14:08:05 +00001575 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1576 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001577 return NULL;
1578 if (PyString_Check(subobj)) {
1579 prefix = PyString_AS_STRING(subobj);
1580 plen = PyString_GET_SIZE(subobj);
1581 }
1582 else if (PyUnicode_Check(subobj))
1583 return PyInt_FromLong(
1584 PyUnicode_Tailmatch((PyObject *)self,
1585 subobj, start, end, -1));
1586 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587 return NULL;
1588
1589 /* adopt Java semantics for index out of range. it is legal for
1590 * offset to be == plen, but this only returns true if prefix is
1591 * the empty string.
1592 */
1593 if (start < 0 || start+plen > len)
1594 return PyInt_FromLong(0);
1595
1596 if (!memcmp(str+start, prefix, plen)) {
1597 /* did the match end after the specified end? */
1598 if (end < 0)
1599 return PyInt_FromLong(1);
1600 else if (end - start < plen)
1601 return PyInt_FromLong(0);
1602 else
1603 return PyInt_FromLong(1);
1604 }
1605 else return PyInt_FromLong(0);
1606}
1607
1608
1609static char endswith__doc__[] =
1610"S.endswith(suffix[, start[, end]]) -> int\n\
1611\n\
1612Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1613optional start, test S beginning at that position. With optional end, stop\n\
1614comparing S at that position.";
1615
1616static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001617string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001619 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001620 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001621 const char* suffix;
1622 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623 int start = 0;
1624 int end = -1;
1625 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001626 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001627
Guido van Rossumc6821402000-05-08 14:08:05 +00001628 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1629 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001630 return NULL;
1631 if (PyString_Check(subobj)) {
1632 suffix = PyString_AS_STRING(subobj);
1633 slen = PyString_GET_SIZE(subobj);
1634 }
1635 else if (PyUnicode_Check(subobj))
1636 return PyInt_FromLong(
1637 PyUnicode_Tailmatch((PyObject *)self,
1638 subobj, start, end, +1));
1639 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640 return NULL;
1641
Guido van Rossum4c08d552000-03-10 22:55:18 +00001642 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643 return PyInt_FromLong(0);
1644
1645 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001646 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001647
Guido van Rossum4c08d552000-03-10 22:55:18 +00001648 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001649 return PyInt_FromLong(1);
1650 else return PyInt_FromLong(0);
1651}
1652
1653
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001654static char encode__doc__[] =
1655"S.encode([encoding[,errors]]) -> string\n\
1656\n\
1657Return an encoded string version of S. Default encoding is the current\n\
1658default string encoding. errors may be given to set a different error\n\
1659handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1660a ValueError. Other possible values are 'ignore' and 'replace'.";
1661
1662static PyObject *
1663string_encode(PyStringObject *self, PyObject *args)
1664{
1665 char *encoding = NULL;
1666 char *errors = NULL;
1667 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1668 return NULL;
1669 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1670}
1671
1672
Guido van Rossum4c08d552000-03-10 22:55:18 +00001673static char expandtabs__doc__[] =
1674"S.expandtabs([tabsize]) -> string\n\
1675\n\
1676Return a copy of S where all tab characters are expanded using spaces.\n\
1677If tabsize is not given, a tab size of 8 characters is assumed.";
1678
1679static PyObject*
1680string_expandtabs(PyStringObject *self, PyObject *args)
1681{
1682 const char *e, *p;
1683 char *q;
1684 int i, j;
1685 PyObject *u;
1686 int tabsize = 8;
1687
1688 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1689 return NULL;
1690
Thomas Wouters7e474022000-07-16 12:04:32 +00001691 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001692 i = j = 0;
1693 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1694 for (p = PyString_AS_STRING(self); p < e; p++)
1695 if (*p == '\t') {
1696 if (tabsize > 0)
1697 j += tabsize - (j % tabsize);
1698 }
1699 else {
1700 j++;
1701 if (*p == '\n' || *p == '\r') {
1702 i += j;
1703 j = 0;
1704 }
1705 }
1706
1707 /* Second pass: create output string and fill it */
1708 u = PyString_FromStringAndSize(NULL, i + j);
1709 if (!u)
1710 return NULL;
1711
1712 j = 0;
1713 q = PyString_AS_STRING(u);
1714
1715 for (p = PyString_AS_STRING(self); p < e; p++)
1716 if (*p == '\t') {
1717 if (tabsize > 0) {
1718 i = tabsize - (j % tabsize);
1719 j += i;
1720 while (i--)
1721 *q++ = ' ';
1722 }
1723 }
1724 else {
1725 j++;
1726 *q++ = *p;
1727 if (*p == '\n' || *p == '\r')
1728 j = 0;
1729 }
1730
1731 return u;
1732}
1733
1734static
1735PyObject *pad(PyStringObject *self,
1736 int left,
1737 int right,
1738 char fill)
1739{
1740 PyObject *u;
1741
1742 if (left < 0)
1743 left = 0;
1744 if (right < 0)
1745 right = 0;
1746
1747 if (left == 0 && right == 0) {
1748 Py_INCREF(self);
1749 return (PyObject *)self;
1750 }
1751
1752 u = PyString_FromStringAndSize(NULL,
1753 left + PyString_GET_SIZE(self) + right);
1754 if (u) {
1755 if (left)
1756 memset(PyString_AS_STRING(u), fill, left);
1757 memcpy(PyString_AS_STRING(u) + left,
1758 PyString_AS_STRING(self),
1759 PyString_GET_SIZE(self));
1760 if (right)
1761 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1762 fill, right);
1763 }
1764
1765 return u;
1766}
1767
1768static char ljust__doc__[] =
1769"S.ljust(width) -> string\n\
1770\n\
1771Return S left justified in a string of length width. Padding is\n\
1772done using spaces.";
1773
1774static PyObject *
1775string_ljust(PyStringObject *self, PyObject *args)
1776{
1777 int width;
1778 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1779 return NULL;
1780
1781 if (PyString_GET_SIZE(self) >= width) {
1782 Py_INCREF(self);
1783 return (PyObject*) self;
1784 }
1785
1786 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1787}
1788
1789
1790static char rjust__doc__[] =
1791"S.rjust(width) -> string\n\
1792\n\
1793Return S right justified in a string of length width. Padding is\n\
1794done using spaces.";
1795
1796static PyObject *
1797string_rjust(PyStringObject *self, PyObject *args)
1798{
1799 int width;
1800 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1801 return NULL;
1802
1803 if (PyString_GET_SIZE(self) >= width) {
1804 Py_INCREF(self);
1805 return (PyObject*) self;
1806 }
1807
1808 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1809}
1810
1811
1812static char center__doc__[] =
1813"S.center(width) -> string\n\
1814\n\
1815Return S centered in a string of length width. Padding is done\n\
1816using spaces.";
1817
1818static PyObject *
1819string_center(PyStringObject *self, PyObject *args)
1820{
1821 int marg, left;
1822 int width;
1823
1824 if (!PyArg_ParseTuple(args, "i:center", &width))
1825 return NULL;
1826
1827 if (PyString_GET_SIZE(self) >= width) {
1828 Py_INCREF(self);
1829 return (PyObject*) self;
1830 }
1831
1832 marg = width - PyString_GET_SIZE(self);
1833 left = marg / 2 + (marg & width & 1);
1834
1835 return pad(self, left, marg - left, ' ');
1836}
1837
1838#if 0
1839static char zfill__doc__[] =
1840"S.zfill(width) -> string\n\
1841\n\
1842Pad a numeric string x with zeros on the left, to fill a field\n\
1843of the specified width. The string x is never truncated.";
1844
1845static PyObject *
1846string_zfill(PyStringObject *self, PyObject *args)
1847{
1848 int fill;
1849 PyObject *u;
1850 char *str;
1851
1852 int width;
1853 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1854 return NULL;
1855
1856 if (PyString_GET_SIZE(self) >= width) {
1857 Py_INCREF(self);
1858 return (PyObject*) self;
1859 }
1860
1861 fill = width - PyString_GET_SIZE(self);
1862
1863 u = pad(self, fill, 0, '0');
1864 if (u == NULL)
1865 return NULL;
1866
1867 str = PyString_AS_STRING(u);
1868 if (str[fill] == '+' || str[fill] == '-') {
1869 /* move sign to beginning of string */
1870 str[0] = str[fill];
1871 str[fill] = '0';
1872 }
1873
1874 return u;
1875}
1876#endif
1877
1878static char isspace__doc__[] =
1879"S.isspace() -> int\n\
1880\n\
1881Return 1 if there are only whitespace characters in S,\n\
18820 otherwise.";
1883
1884static PyObject*
1885string_isspace(PyStringObject *self, PyObject *args)
1886{
Fred Drakeba096332000-07-09 07:04:36 +00001887 register const unsigned char *p
1888 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001889 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001890
1891 if (!PyArg_NoArgs(args))
1892 return NULL;
1893
1894 /* Shortcut for single character strings */
1895 if (PyString_GET_SIZE(self) == 1 &&
1896 isspace(*p))
1897 return PyInt_FromLong(1);
1898
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001899 /* Special case for empty strings */
1900 if (PyString_GET_SIZE(self) == 0)
1901 return PyInt_FromLong(0);
1902
Guido van Rossum4c08d552000-03-10 22:55:18 +00001903 e = p + PyString_GET_SIZE(self);
1904 for (; p < e; p++) {
1905 if (!isspace(*p))
1906 return PyInt_FromLong(0);
1907 }
1908 return PyInt_FromLong(1);
1909}
1910
1911
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001912static char isalpha__doc__[] =
1913"S.isalpha() -> int\n\
1914\n\
1915Return 1 if all characters in S are alphabetic\n\
1916and there is at least one character in S, 0 otherwise.";
1917
1918static PyObject*
1919string_isalpha(PyUnicodeObject *self, PyObject *args)
1920{
Fred Drakeba096332000-07-09 07:04:36 +00001921 register const unsigned char *p
1922 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001923 register const unsigned char *e;
1924
1925 if (!PyArg_NoArgs(args))
1926 return NULL;
1927
1928 /* Shortcut for single character strings */
1929 if (PyString_GET_SIZE(self) == 1 &&
1930 isalpha(*p))
1931 return PyInt_FromLong(1);
1932
1933 /* Special case for empty strings */
1934 if (PyString_GET_SIZE(self) == 0)
1935 return PyInt_FromLong(0);
1936
1937 e = p + PyString_GET_SIZE(self);
1938 for (; p < e; p++) {
1939 if (!isalpha(*p))
1940 return PyInt_FromLong(0);
1941 }
1942 return PyInt_FromLong(1);
1943}
1944
1945
1946static char isalnum__doc__[] =
1947"S.isalnum() -> int\n\
1948\n\
1949Return 1 if all characters in S are alphanumeric\n\
1950and there is at least one character in S, 0 otherwise.";
1951
1952static PyObject*
1953string_isalnum(PyUnicodeObject *self, PyObject *args)
1954{
Fred Drakeba096332000-07-09 07:04:36 +00001955 register const unsigned char *p
1956 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001957 register const unsigned char *e;
1958
1959 if (!PyArg_NoArgs(args))
1960 return NULL;
1961
1962 /* Shortcut for single character strings */
1963 if (PyString_GET_SIZE(self) == 1 &&
1964 isalnum(*p))
1965 return PyInt_FromLong(1);
1966
1967 /* Special case for empty strings */
1968 if (PyString_GET_SIZE(self) == 0)
1969 return PyInt_FromLong(0);
1970
1971 e = p + PyString_GET_SIZE(self);
1972 for (; p < e; p++) {
1973 if (!isalnum(*p))
1974 return PyInt_FromLong(0);
1975 }
1976 return PyInt_FromLong(1);
1977}
1978
1979
Guido van Rossum4c08d552000-03-10 22:55:18 +00001980static char isdigit__doc__[] =
1981"S.isdigit() -> int\n\
1982\n\
1983Return 1 if there are only digit characters in S,\n\
19840 otherwise.";
1985
1986static PyObject*
1987string_isdigit(PyStringObject *self, PyObject *args)
1988{
Fred Drakeba096332000-07-09 07:04:36 +00001989 register const unsigned char *p
1990 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001991 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001992
1993 if (!PyArg_NoArgs(args))
1994 return NULL;
1995
1996 /* Shortcut for single character strings */
1997 if (PyString_GET_SIZE(self) == 1 &&
1998 isdigit(*p))
1999 return PyInt_FromLong(1);
2000
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002001 /* Special case for empty strings */
2002 if (PyString_GET_SIZE(self) == 0)
2003 return PyInt_FromLong(0);
2004
Guido van Rossum4c08d552000-03-10 22:55:18 +00002005 e = p + PyString_GET_SIZE(self);
2006 for (; p < e; p++) {
2007 if (!isdigit(*p))
2008 return PyInt_FromLong(0);
2009 }
2010 return PyInt_FromLong(1);
2011}
2012
2013
2014static char islower__doc__[] =
2015"S.islower() -> int\n\
2016\n\
2017Return 1 if all cased characters in S are lowercase and there is\n\
2018at least one cased character in S, 0 otherwise.";
2019
2020static PyObject*
2021string_islower(PyStringObject *self, PyObject *args)
2022{
Fred Drakeba096332000-07-09 07:04:36 +00002023 register const unsigned char *p
2024 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002025 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002026 int cased;
2027
2028 if (!PyArg_NoArgs(args))
2029 return NULL;
2030
2031 /* Shortcut for single character strings */
2032 if (PyString_GET_SIZE(self) == 1)
2033 return PyInt_FromLong(islower(*p) != 0);
2034
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002035 /* Special case for empty strings */
2036 if (PyString_GET_SIZE(self) == 0)
2037 return PyInt_FromLong(0);
2038
Guido van Rossum4c08d552000-03-10 22:55:18 +00002039 e = p + PyString_GET_SIZE(self);
2040 cased = 0;
2041 for (; p < e; p++) {
2042 if (isupper(*p))
2043 return PyInt_FromLong(0);
2044 else if (!cased && islower(*p))
2045 cased = 1;
2046 }
2047 return PyInt_FromLong(cased);
2048}
2049
2050
2051static char isupper__doc__[] =
2052"S.isupper() -> int\n\
2053\n\
2054Return 1 if all cased characters in S are uppercase and there is\n\
2055at least one cased character in S, 0 otherwise.";
2056
2057static PyObject*
2058string_isupper(PyStringObject *self, PyObject *args)
2059{
Fred Drakeba096332000-07-09 07:04:36 +00002060 register const unsigned char *p
2061 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002062 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002063 int cased;
2064
2065 if (!PyArg_NoArgs(args))
2066 return NULL;
2067
2068 /* Shortcut for single character strings */
2069 if (PyString_GET_SIZE(self) == 1)
2070 return PyInt_FromLong(isupper(*p) != 0);
2071
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002072 /* Special case for empty strings */
2073 if (PyString_GET_SIZE(self) == 0)
2074 return PyInt_FromLong(0);
2075
Guido van Rossum4c08d552000-03-10 22:55:18 +00002076 e = p + PyString_GET_SIZE(self);
2077 cased = 0;
2078 for (; p < e; p++) {
2079 if (islower(*p))
2080 return PyInt_FromLong(0);
2081 else if (!cased && isupper(*p))
2082 cased = 1;
2083 }
2084 return PyInt_FromLong(cased);
2085}
2086
2087
2088static char istitle__doc__[] =
2089"S.istitle() -> int\n\
2090\n\
2091Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2092may only follow uncased characters and lowercase characters only cased\n\
2093ones. Return 0 otherwise.";
2094
2095static PyObject*
2096string_istitle(PyStringObject *self, PyObject *args)
2097{
Fred Drakeba096332000-07-09 07:04:36 +00002098 register const unsigned char *p
2099 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002100 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002101 int cased, previous_is_cased;
2102
2103 if (!PyArg_NoArgs(args))
2104 return NULL;
2105
2106 /* Shortcut for single character strings */
2107 if (PyString_GET_SIZE(self) == 1)
2108 return PyInt_FromLong(isupper(*p) != 0);
2109
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002110 /* Special case for empty strings */
2111 if (PyString_GET_SIZE(self) == 0)
2112 return PyInt_FromLong(0);
2113
Guido van Rossum4c08d552000-03-10 22:55:18 +00002114 e = p + PyString_GET_SIZE(self);
2115 cased = 0;
2116 previous_is_cased = 0;
2117 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002118 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002119
2120 if (isupper(ch)) {
2121 if (previous_is_cased)
2122 return PyInt_FromLong(0);
2123 previous_is_cased = 1;
2124 cased = 1;
2125 }
2126 else if (islower(ch)) {
2127 if (!previous_is_cased)
2128 return PyInt_FromLong(0);
2129 previous_is_cased = 1;
2130 cased = 1;
2131 }
2132 else
2133 previous_is_cased = 0;
2134 }
2135 return PyInt_FromLong(cased);
2136}
2137
2138
2139static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002140"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002141\n\
2142Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002143Line breaks are not included in the resulting list unless keepends\n\
2144is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002145
2146#define SPLIT_APPEND(data, left, right) \
2147 str = PyString_FromStringAndSize(data + left, right - left); \
2148 if (!str) \
2149 goto onError; \
2150 if (PyList_Append(list, str)) { \
2151 Py_DECREF(str); \
2152 goto onError; \
2153 } \
2154 else \
2155 Py_DECREF(str);
2156
2157static PyObject*
2158string_splitlines(PyStringObject *self, PyObject *args)
2159{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002160 register int i;
2161 register int j;
2162 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002163 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002164 PyObject *list;
2165 PyObject *str;
2166 char *data;
2167
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002168 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002169 return NULL;
2170
2171 data = PyString_AS_STRING(self);
2172 len = PyString_GET_SIZE(self);
2173
Guido van Rossum4c08d552000-03-10 22:55:18 +00002174 list = PyList_New(0);
2175 if (!list)
2176 goto onError;
2177
2178 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002179 int eol;
2180
Guido van Rossum4c08d552000-03-10 22:55:18 +00002181 /* Find a line and append it */
2182 while (i < len && data[i] != '\n' && data[i] != '\r')
2183 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002184
2185 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002186 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002187 if (i < len) {
2188 if (data[i] == '\r' && i + 1 < len &&
2189 data[i+1] == '\n')
2190 i += 2;
2191 else
2192 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002193 if (keepends)
2194 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002195 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002196 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002197 j = i;
2198 }
2199 if (j < len) {
2200 SPLIT_APPEND(data, j, len);
2201 }
2202
2203 return list;
2204
2205 onError:
2206 Py_DECREF(list);
2207 return NULL;
2208}
2209
2210#undef SPLIT_APPEND
2211
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002212
2213static PyMethodDef
2214string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002215 /* Counterparts of the obsolete stropmodule functions; except
2216 string.maketrans(). */
2217 {"join", (PyCFunction)string_join, 1, join__doc__},
2218 {"split", (PyCFunction)string_split, 1, split__doc__},
2219 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2220 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2221 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2222 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2223 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2224 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2225 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002226 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2227 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002228 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2229 {"count", (PyCFunction)string_count, 1, count__doc__},
2230 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2231 {"find", (PyCFunction)string_find, 1, find__doc__},
2232 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2235 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2236 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2237 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002238 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2239 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2240 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2242 {"title", (PyCFunction)string_title, 1, title__doc__},
2243 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2244 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2245 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002246 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002247 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2248 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2249#if 0
2250 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2251#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252 {NULL, NULL} /* sentinel */
2253};
2254
2255static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002256string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002257{
2258 return Py_FindMethod(string_methods, (PyObject*)s, name);
2259}
2260
2261
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002262PyTypeObject PyString_Type = {
2263 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002264 0,
2265 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002266 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002267 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002268 (destructor)string_dealloc, /*tp_dealloc*/
2269 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002270 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002271 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002272 (cmpfunc)string_compare, /*tp_compare*/
2273 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002274 0, /*tp_as_number*/
2275 &string_as_sequence, /*tp_as_sequence*/
2276 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002277 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002278 0, /*tp_call*/
2279 0, /*tp_str*/
2280 0, /*tp_getattro*/
2281 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002282 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002283 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002284 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002285};
2286
2287void
Fred Drakeba096332000-07-09 07:04:36 +00002288PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002289{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002290 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002291 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002292 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002293 if (w == NULL || !PyString_Check(*pv)) {
2294 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002295 *pv = NULL;
2296 return;
2297 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002298 v = string_concat((PyStringObject *) *pv, w);
2299 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002300 *pv = v;
2301}
2302
Guido van Rossum013142a1994-08-30 08:19:36 +00002303void
Fred Drakeba096332000-07-09 07:04:36 +00002304PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002305{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002306 PyString_Concat(pv, w);
2307 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002308}
2309
2310
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002311/* The following function breaks the notion that strings are immutable:
2312 it changes the size of a string. We get away with this only if there
2313 is only one module referencing the object. You can also think of it
2314 as creating a new string object and destroying the old one, only
2315 more efficiently. In any case, don't use this if the string may
2316 already be known to some other part of the code... */
2317
2318int
Fred Drakeba096332000-07-09 07:04:36 +00002319_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002320{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002321 register PyObject *v;
2322 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002323 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002324 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002325 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002326 Py_DECREF(v);
2327 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002328 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002329 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002330 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002331#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002332 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002333#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002334 _Py_ForgetReference(v);
2335 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002336 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002337 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002338 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002339 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002340 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002341 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002342 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002343 _Py_NewReference(*pv);
2344 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002345 sv->ob_size = newsize;
2346 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002347 return 0;
2348}
Guido van Rossume5372401993-03-16 12:15:04 +00002349
2350/* Helpers for formatstring */
2351
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002352static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002353getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002354{
2355 int argidx = *p_argidx;
2356 if (argidx < arglen) {
2357 (*p_argidx)++;
2358 if (arglen < 0)
2359 return args;
2360 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002361 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002362 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002363 PyErr_SetString(PyExc_TypeError,
2364 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002365 return NULL;
2366}
2367
2368#define F_LJUST (1<<0)
2369#define F_SIGN (1<<1)
2370#define F_BLANK (1<<2)
2371#define F_ALT (1<<3)
2372#define F_ZERO (1<<4)
2373
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002374static int
Fred Drakeba096332000-07-09 07:04:36 +00002375formatfloat(char *buf, size_t buflen, int flags,
2376 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002377{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002378 /* fmt = '%#.' + `prec` + `type`
2379 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002380 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002381 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002382 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002383 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002384 if (prec < 0)
2385 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002386 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2387 type = 'g';
2388 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002389 /* worst case length calc to ensure no buffer overrun:
2390 fmt = %#.<prec>g
2391 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2392 for any double rep.)
2393 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2394 If prec=0 the effective precision is 1 (the leading digit is
2395 always given), therefore increase by one to 10+prec. */
2396 if (buflen <= (size_t)10 + (size_t)prec) {
2397 PyErr_SetString(PyExc_OverflowError,
2398 "formatted float is too long (precision too long?)");
2399 return -1;
2400 }
Guido van Rossume5372401993-03-16 12:15:04 +00002401 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002402 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002403}
2404
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002405static int
Fred Drakeba096332000-07-09 07:04:36 +00002406formatint(char *buf, size_t buflen, int flags,
2407 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002408{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002409 /* fmt = '%#.' + `prec` + 'l' + `type`
2410 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002411 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002412 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002413 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002414 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002415 if (prec < 0)
2416 prec = 1;
2417 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002418 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2419 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2420 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2421 PyErr_SetString(PyExc_OverflowError,
2422 "formatted integer is too long (precision too long?)");
2423 return -1;
2424 }
Guido van Rossume5372401993-03-16 12:15:04 +00002425 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002426 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002427}
2428
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002429static int
Fred Drakeba096332000-07-09 07:04:36 +00002430formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002431{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002432 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002433 if (PyString_Check(v)) {
2434 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002435 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002436 }
2437 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002438 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002439 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002440 }
2441 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002442 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002443}
2444
Guido van Rossum013142a1994-08-30 08:19:36 +00002445
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002446/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2447
2448 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2449 chars are formatted. XXX This is a magic number. Each formatting
2450 routine does bounds checking to ensure no overflow, but a better
2451 solution may be to malloc a buffer of appropriate size for each
2452 format. For now, the current solution is sufficient.
2453*/
2454#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002455
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002456PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002457PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002458{
2459 char *fmt, *res;
2460 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002461 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002462 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002463 PyObject *dict = NULL;
2464 if (format == NULL || !PyString_Check(format) || args == NULL) {
2465 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002466 return NULL;
2467 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002468 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002469 fmt = PyString_AsString(format);
2470 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002471 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002472 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002473 if (result == NULL)
2474 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002475 res = PyString_AsString(result);
2476 if (PyTuple_Check(args)) {
2477 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002478 argidx = 0;
2479 }
2480 else {
2481 arglen = -1;
2482 argidx = -2;
2483 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002484 if (args->ob_type->tp_as_mapping)
2485 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002486 while (--fmtcnt >= 0) {
2487 if (*fmt != '%') {
2488 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002489 rescnt = fmtcnt + 100;
2490 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002491 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002492 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002493 res = PyString_AsString(result)
2494 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002495 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002496 }
2497 *res++ = *fmt++;
2498 }
2499 else {
2500 /* Got a format specifier */
2501 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002502 int width = -1;
2503 int prec = -1;
2504 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002505 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002506 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002507 PyObject *v = NULL;
2508 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002509 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002510 int sign;
2511 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002512 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002513 char *fmt_start = fmt;
2514
Guido van Rossumda9c2711996-12-05 21:58:58 +00002515 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002516 if (*fmt == '(') {
2517 char *keystart;
2518 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002519 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002520 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002521
2522 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002523 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002524 "format requires a mapping");
2525 goto error;
2526 }
2527 ++fmt;
2528 --fmtcnt;
2529 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002530 /* Skip over balanced parentheses */
2531 while (pcount > 0 && --fmtcnt >= 0) {
2532 if (*fmt == ')')
2533 --pcount;
2534 else if (*fmt == '(')
2535 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002536 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002537 }
2538 keylen = fmt - keystart - 1;
2539 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002540 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002541 "incomplete format key");
2542 goto error;
2543 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002544 key = PyString_FromStringAndSize(keystart,
2545 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002546 if (key == NULL)
2547 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002548 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002549 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002550 args_owned = 0;
2551 }
2552 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002553 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002554 if (args == NULL) {
2555 goto error;
2556 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002557 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002558 arglen = -1;
2559 argidx = -2;
2560 }
Guido van Rossume5372401993-03-16 12:15:04 +00002561 while (--fmtcnt >= 0) {
2562 switch (c = *fmt++) {
2563 case '-': flags |= F_LJUST; continue;
2564 case '+': flags |= F_SIGN; continue;
2565 case ' ': flags |= F_BLANK; continue;
2566 case '#': flags |= F_ALT; continue;
2567 case '0': flags |= F_ZERO; continue;
2568 }
2569 break;
2570 }
2571 if (c == '*') {
2572 v = getnextarg(args, arglen, &argidx);
2573 if (v == NULL)
2574 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002575 if (!PyInt_Check(v)) {
2576 PyErr_SetString(PyExc_TypeError,
2577 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002578 goto error;
2579 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002580 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002581 if (width < 0) {
2582 flags |= F_LJUST;
2583 width = -width;
2584 }
Guido van Rossume5372401993-03-16 12:15:04 +00002585 if (--fmtcnt >= 0)
2586 c = *fmt++;
2587 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002588 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002589 width = c - '0';
2590 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002591 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002592 if (!isdigit(c))
2593 break;
2594 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002595 PyErr_SetString(
2596 PyExc_ValueError,
2597 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002598 goto error;
2599 }
2600 width = width*10 + (c - '0');
2601 }
2602 }
2603 if (c == '.') {
2604 prec = 0;
2605 if (--fmtcnt >= 0)
2606 c = *fmt++;
2607 if (c == '*') {
2608 v = getnextarg(args, arglen, &argidx);
2609 if (v == NULL)
2610 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002611 if (!PyInt_Check(v)) {
2612 PyErr_SetString(
2613 PyExc_TypeError,
2614 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002615 goto error;
2616 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002617 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002618 if (prec < 0)
2619 prec = 0;
2620 if (--fmtcnt >= 0)
2621 c = *fmt++;
2622 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002623 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002624 prec = c - '0';
2625 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002626 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002627 if (!isdigit(c))
2628 break;
2629 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002630 PyErr_SetString(
2631 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002632 "prec too big");
2633 goto error;
2634 }
2635 prec = prec*10 + (c - '0');
2636 }
2637 }
2638 } /* prec */
2639 if (fmtcnt >= 0) {
2640 if (c == 'h' || c == 'l' || c == 'L') {
2641 size = c;
2642 if (--fmtcnt >= 0)
2643 c = *fmt++;
2644 }
2645 }
2646 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002647 PyErr_SetString(PyExc_ValueError,
2648 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002649 goto error;
2650 }
2651 if (c != '%') {
2652 v = getnextarg(args, arglen, &argidx);
2653 if (v == NULL)
2654 goto error;
2655 }
2656 sign = 0;
2657 fill = ' ';
2658 switch (c) {
2659 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002660 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002661 len = 1;
2662 break;
2663 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002664 case 'r':
2665 if (PyUnicode_Check(v)) {
2666 fmt = fmt_start;
2667 goto unicode;
2668 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002669 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002670 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002671 else
2672 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002673 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002674 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002675 if (!PyString_Check(temp)) {
2676 PyErr_SetString(PyExc_TypeError,
2677 "%s argument has non-string str()");
2678 goto error;
2679 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002680 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002681 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002682 if (prec >= 0 && len > prec)
2683 len = prec;
2684 break;
2685 case 'i':
2686 case 'd':
2687 case 'u':
2688 case 'o':
2689 case 'x':
2690 case 'X':
2691 if (c == 'i')
2692 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002693 pbuf = formatbuf;
2694 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002695 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002696 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002697 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002698 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002699 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002700 if ((flags&F_ALT) &&
2701 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002702 pbuf[0] == '0' && pbuf[1] == c) {
2703 *res++ = *pbuf++;
2704 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002705 rescnt -= 2;
2706 len -= 2;
2707 width -= 2;
2708 if (width < 0)
2709 width = 0;
2710 }
2711 }
Guido van Rossume5372401993-03-16 12:15:04 +00002712 break;
2713 case 'e':
2714 case 'E':
2715 case 'f':
2716 case 'g':
2717 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002718 pbuf = formatbuf;
2719 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002720 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002721 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002722 sign = 1;
2723 if (flags&F_ZERO)
2724 fill = '0';
2725 break;
2726 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002727 pbuf = formatbuf;
2728 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002729 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002730 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002731 break;
2732 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002733 PyErr_Format(PyExc_ValueError,
2734 "unsupported format character '%c' (0x%x)",
2735 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002736 goto error;
2737 }
2738 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002739 if (*pbuf == '-' || *pbuf == '+') {
2740 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002741 len--;
2742 }
2743 else if (flags & F_SIGN)
2744 sign = '+';
2745 else if (flags & F_BLANK)
2746 sign = ' ';
2747 else
2748 sign = '\0';
2749 }
2750 if (width < len)
2751 width = len;
2752 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002753 reslen -= rescnt;
2754 rescnt = width + fmtcnt + 100;
2755 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002756 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002757 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002758 res = PyString_AsString(result)
2759 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002760 }
2761 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002762 if (fill != ' ')
2763 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002764 rescnt--;
2765 if (width > len)
2766 width--;
2767 }
2768 if (width > len && !(flags&F_LJUST)) {
2769 do {
2770 --rescnt;
2771 *res++ = fill;
2772 } while (--width > len);
2773 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002774 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002775 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002776 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002777 res += len;
2778 rescnt -= len;
2779 while (--width >= len) {
2780 --rescnt;
2781 *res++ = ' ';
2782 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002783 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002784 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002785 "not all arguments converted");
2786 goto error;
2787 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002788 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002789 } /* '%' */
2790 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002791 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002792 PyErr_SetString(PyExc_TypeError,
2793 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002794 goto error;
2795 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002796 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002797 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002798 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002799 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002800 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002801
2802 unicode:
2803 if (args_owned) {
2804 Py_DECREF(args);
2805 args_owned = 0;
2806 }
2807 /* Fiddle args right (remove the first argidx-1 arguments) */
2808 --argidx;
2809 if (PyTuple_Check(orig_args) && argidx > 0) {
2810 PyObject *v;
2811 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2812 v = PyTuple_New(n);
2813 if (v == NULL)
2814 goto error;
2815 while (--n >= 0) {
2816 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2817 Py_INCREF(w);
2818 PyTuple_SET_ITEM(v, n, w);
2819 }
2820 args = v;
2821 } else {
2822 Py_INCREF(orig_args);
2823 args = orig_args;
2824 }
2825 /* Paste rest of format string to what we have of the result
2826 string; we reuse result for this */
2827 rescnt = res - PyString_AS_STRING(result);
2828 fmtcnt = PyString_GET_SIZE(format) - \
2829 (fmt - PyString_AS_STRING(format));
2830 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2831 Py_DECREF(args);
2832 goto error;
2833 }
2834 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2835 format = result;
2836 /* Let Unicode do its magic */
2837 result = PyUnicode_Format(format, args);
2838 Py_DECREF(format);
2839 Py_DECREF(args);
2840 return result;
2841
Guido van Rossume5372401993-03-16 12:15:04 +00002842 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002843 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002844 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002845 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002846 }
Guido van Rossume5372401993-03-16 12:15:04 +00002847 return NULL;
2848}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002849
2850
2851#ifdef INTERN_STRINGS
2852
2853static PyObject *interned;
2854
2855void
Fred Drakeba096332000-07-09 07:04:36 +00002856PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002857{
2858 register PyStringObject *s = (PyStringObject *)(*p);
2859 PyObject *t;
2860 if (s == NULL || !PyString_Check(s))
2861 Py_FatalError("PyString_InternInPlace: strings only please!");
2862 if ((t = s->ob_sinterned) != NULL) {
2863 if (t == (PyObject *)s)
2864 return;
2865 Py_INCREF(t);
2866 *p = t;
2867 Py_DECREF(s);
2868 return;
2869 }
2870 if (interned == NULL) {
2871 interned = PyDict_New();
2872 if (interned == NULL)
2873 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002874 }
2875 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2876 Py_INCREF(t);
2877 *p = s->ob_sinterned = t;
2878 Py_DECREF(s);
2879 return;
2880 }
2881 t = (PyObject *)s;
2882 if (PyDict_SetItem(interned, t, t) == 0) {
2883 s->ob_sinterned = t;
2884 return;
2885 }
2886 PyErr_Clear();
2887}
2888
2889
2890PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002891PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002892{
2893 PyObject *s = PyString_FromString(cp);
2894 if (s == NULL)
2895 return NULL;
2896 PyString_InternInPlace(&s);
2897 return s;
2898}
2899
2900#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002901
2902void
Fred Drakeba096332000-07-09 07:04:36 +00002903PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00002904{
2905 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002906 for (i = 0; i < UCHAR_MAX + 1; i++) {
2907 Py_XDECREF(characters[i]);
2908 characters[i] = NULL;
2909 }
2910#ifndef DONT_SHARE_SHORT_STRINGS
2911 Py_XDECREF(nullstring);
2912 nullstring = NULL;
2913#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002914#ifdef INTERN_STRINGS
2915 if (interned) {
2916 int pos, changed;
2917 PyObject *key, *value;
2918 do {
2919 changed = 0;
2920 pos = 0;
2921 while (PyDict_Next(interned, &pos, &key, &value)) {
2922 if (key->ob_refcnt == 2 && key == value) {
2923 PyDict_DelItem(interned, key);
2924 changed = 1;
2925 }
2926 }
2927 } while (changed);
2928 }
2929#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002930}