blob: f7c3f4b157d8adc49d3d4cfd31aec26bd4ba2a59 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Guido van Rossum03093a21994-09-28 15:51:32 +000012#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#include <limits.h>
14#else
15#ifndef UCHAR_MAX
16#define UCHAR_MAX 255
17#endif
18#endif
19
Guido van Rossumc0b618a1997-05-02 03:12:38 +000020static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000021#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000022static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000023#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000024
25/*
26 Newsizedstringobject() and newstringobject() try in certain cases
27 to share string objects. When the size of the string is zero,
28 these routines always return a pointer to the same string object;
29 when the size is one, they return a pointer to an already existing
30 object if the contents of the string is known. For
31 newstringobject() this is always the case, for
32 newsizedstringobject() this is the case when the first argument in
33 not NULL.
34 A common practice to allocate a string and then fill it in or
35 change it must be done carefully. It is only allowed to change the
36 contents of the string if the obect was gotten from
37 newsizedstringobject() with a NULL first argument, because in the
38 future these routines may try to do even more sharing of objects.
39*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000040PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000041PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000042{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000043 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000044#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000045 if (size == 0 && (op = nullstring) != NULL) {
46#ifdef COUNT_ALLOCS
47 null_strings++;
48#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000049 Py_INCREF(op);
50 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 if (size == 1 && str != NULL &&
53 (op = characters[*str & UCHAR_MAX]) != NULL)
54 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055#ifdef COUNT_ALLOCS
56 one_strings++;
57#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000058 Py_INCREF(op);
59 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000060 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000061#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000062
63 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000065 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000066 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000068 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef CACHE_HASH
70 op->ob_shash = -1;
71#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000072#ifdef INTERN_STRINGS
73 op->ob_sinterned = NULL;
74#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (str != NULL)
76 memcpy(op->ob_sval, str, size);
77 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000078#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 if (size == 0) {
80 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 } else if (size == 1 && str != NULL) {
83 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000084 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000086#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000087 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088}
89
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000091PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000092{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000093 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 if (size > INT_MAX) {
96 PyErr_SetString(PyExc_OverflowError,
97 "string is too long for a Python string");
98 return NULL;
99 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000100#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 if (size == 0 && (op = nullstring) != NULL) {
102#ifdef COUNT_ALLOCS
103 null_strings++;
104#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105 Py_INCREF(op);
106 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000107 }
108 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
109#ifdef COUNT_ALLOCS
110 one_strings++;
111#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000112 Py_INCREF(op);
113 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000115#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000116
117 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000119 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000120 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000122 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123#ifdef CACHE_HASH
124 op->ob_shash = -1;
125#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000126#ifdef INTERN_STRINGS
127 op->ob_sinterned = NULL;
128#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000129 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000130#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
132 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 } else if (size == 1) {
135 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000138#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000139 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140}
141
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000142PyObject *PyString_Decode(const char *s,
143 int size,
144 const char *encoding,
145 const char *errors)
146{
147 PyObject *buffer = NULL, *str;
148
149 if (encoding == NULL)
150 encoding = PyUnicode_GetDefaultEncoding();
151
152 /* Decode via the codec registry */
153 buffer = PyBuffer_FromMemory((void *)s, size);
154 if (buffer == NULL)
155 goto onError;
156 str = PyCodec_Decode(buffer, encoding, errors);
157 if (str == NULL)
158 goto onError;
159 /* Convert Unicode to a string using the default encoding */
160 if (PyUnicode_Check(str)) {
161 PyObject *temp = str;
162 str = PyUnicode_AsEncodedString(str, NULL, NULL);
163 Py_DECREF(temp);
164 if (str == NULL)
165 goto onError;
166 }
167 if (!PyString_Check(str)) {
168 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000169 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000170 str->ob_type->tp_name);
171 Py_DECREF(str);
172 goto onError;
173 }
174 Py_DECREF(buffer);
175 return str;
176
177 onError:
178 Py_XDECREF(buffer);
179 return NULL;
180}
181
182PyObject *PyString_Encode(const char *s,
183 int size,
184 const char *encoding,
185 const char *errors)
186{
187 PyObject *v, *str;
188
189 str = PyString_FromStringAndSize(s, size);
190 if (str == NULL)
191 return NULL;
192 v = PyString_AsEncodedString(str, encoding, errors);
193 Py_DECREF(str);
194 return v;
195}
196
197PyObject *PyString_AsEncodedString(PyObject *str,
198 const char *encoding,
199 const char *errors)
200{
201 PyObject *v;
202
203 if (!PyString_Check(str)) {
204 PyErr_BadArgument();
205 goto onError;
206 }
207
208 if (encoding == NULL)
209 encoding = PyUnicode_GetDefaultEncoding();
210
211 /* Encode via the codec registry */
212 v = PyCodec_Encode(str, encoding, errors);
213 if (v == NULL)
214 goto onError;
215 /* Convert Unicode to a string using the default encoding */
216 if (PyUnicode_Check(v)) {
217 PyObject *temp = v;
218 v = PyUnicode_AsEncodedString(v, NULL, NULL);
219 Py_DECREF(temp);
220 if (v == NULL)
221 goto onError;
222 }
223 if (!PyString_Check(v)) {
224 PyErr_Format(PyExc_TypeError,
225 "encoder did not return a string object (type=%.400s)",
226 v->ob_type->tp_name);
227 Py_DECREF(v);
228 goto onError;
229 }
230 return v;
231
232 onError:
233 return NULL;
234}
235
Guido van Rossum234f9421993-06-17 12:35:49 +0000236static void
Fred Drakeba096332000-07-09 07:04:36 +0000237string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000238{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000239 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000240}
241
Guido van Rossumd7047b31995-01-02 19:07:15 +0000242int
Fred Drakeba096332000-07-09 07:04:36 +0000243PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000244{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000245 if (!PyString_Check(op)) {
246 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000247 return -1;
248 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000249 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000250}
251
252/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000253PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000254{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000255 if (!PyString_Check(op)) {
256 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000257 return NULL;
258 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000259 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260}
261
262/* Methods */
263
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000264static int
Fred Drakeba096332000-07-09 07:04:36 +0000265string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000266{
267 int i;
268 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000269 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000270 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000271 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000273 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000274 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000275
Thomas Wouters7e474022000-07-16 12:04:32 +0000276 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000277 quote = '\'';
278 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
279 quote = '"';
280
281 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282 for (i = 0; i < op->ob_size; i++) {
283 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000284 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000285 fprintf(fp, "\\%c", c);
286 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000287 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000288 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000289 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000291 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000292 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000293}
294
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000295static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000296string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000297{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000298 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
299 PyObject *v;
300 if (newsize > INT_MAX) {
301 PyErr_SetString(PyExc_OverflowError,
302 "string is too large to make repr");
303 }
304 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000305 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000306 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000307 }
308 else {
309 register int i;
310 register char c;
311 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000312 int quote;
313
Thomas Wouters7e474022000-07-16 12:04:32 +0000314 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000315 quote = '\'';
316 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
317 quote = '"';
318
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000319 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000320 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000321 for (i = 0; i < op->ob_size; i++) {
322 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000323 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000324 *p++ = '\\', *p++ = c;
325 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000326 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000327 while (*p != '\0')
328 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000329 }
330 else
331 *p++ = c;
332 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000333 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000335 _PyString_Resize(
336 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000337 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000339}
340
341static int
Fred Drakeba096332000-07-09 07:04:36 +0000342string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000343{
344 return a->ob_size;
345}
346
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000347static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000348string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349{
350 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000351 register PyStringObject *op;
352 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000353 if (PyUnicode_Check(bb))
354 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000355 PyErr_Format(PyExc_TypeError,
356 "cannot add type \"%.200s\" to string",
357 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000358 return NULL;
359 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000360#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000361 /* Optimize cases with empty left or right operand */
362 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000363 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000364 return bb;
365 }
366 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000367 Py_INCREF(a);
368 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000369 }
370 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000371 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000372 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000373 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000374 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000375 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000376 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000377#ifdef CACHE_HASH
378 op->ob_shash = -1;
379#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000380#ifdef INTERN_STRINGS
381 op->ob_sinterned = NULL;
382#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000383 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
384 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
385 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000386 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000387#undef b
388}
389
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000390static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000391string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000392{
393 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000394 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000395 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000396 if (n < 0)
397 n = 0;
398 size = a->ob_size * n;
399 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000400 Py_INCREF(a);
401 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000403 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000404 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000405 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000406 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000407 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000408 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000409#ifdef CACHE_HASH
410 op->ob_shash = -1;
411#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000412#ifdef INTERN_STRINGS
413 op->ob_sinterned = NULL;
414#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000415 for (i = 0; i < size; i += a->ob_size)
416 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
417 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000418 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000419}
420
421/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
422
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000423static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000424string_slice(register PyStringObject *a, register int i, register int j)
425 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000426{
427 if (i < 0)
428 i = 0;
429 if (j < 0)
430 j = 0; /* Avoid signed/unsigned bug in next line */
431 if (j > a->ob_size)
432 j = a->ob_size;
433 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000434 Py_INCREF(a);
435 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000436 }
437 if (j < i)
438 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000439 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000440}
441
Guido van Rossum9284a572000-03-07 15:53:43 +0000442static int
Fred Drakeba096332000-07-09 07:04:36 +0000443string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000444{
445 register char *s, *end;
446 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000447 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000448 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000449 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000450 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000451 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000452 return -1;
453 }
454 c = PyString_AsString(el)[0];
455 s = PyString_AsString(a);
456 end = s + PyString_Size(a);
457 while (s < end) {
458 if (c == *s++)
459 return 1;
460 }
461 return 0;
462}
463
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000464static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000465string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000466{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000467 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000468 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000469 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000470 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000471 return NULL;
472 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000473 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000474 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000475#ifdef COUNT_ALLOCS
476 if (v != NULL)
477 one_strings++;
478#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000479 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000480 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000481 if (v == NULL)
482 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000483 characters[c] = (PyStringObject *) v;
484 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000485 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000486 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000487 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000488}
489
490static int
Fred Drakeba096332000-07-09 07:04:36 +0000491string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000492{
Guido van Rossum253919f1991-02-13 23:18:39 +0000493 int len_a = a->ob_size, len_b = b->ob_size;
494 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000495 int cmp;
496 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000497 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000498 if (cmp == 0)
499 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
500 if (cmp != 0)
501 return cmp;
502 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000503 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504}
505
Guido van Rossum9bfef441993-03-29 10:43:31 +0000506static long
Fred Drakeba096332000-07-09 07:04:36 +0000507string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000508{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000509 register int len;
510 register unsigned char *p;
511 register long x;
512
513#ifdef CACHE_HASH
514 if (a->ob_shash != -1)
515 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000516#ifdef INTERN_STRINGS
517 if (a->ob_sinterned != NULL)
518 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000519 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000520#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000521#endif
522 len = a->ob_size;
523 p = (unsigned char *) a->ob_sval;
524 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000525 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000526 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000527 x ^= a->ob_size;
528 if (x == -1)
529 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000530#ifdef CACHE_HASH
531 a->ob_shash = x;
532#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000533 return x;
534}
535
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000536static int
Fred Drakeba096332000-07-09 07:04:36 +0000537string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000538{
539 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000540 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000541 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000542 return -1;
543 }
544 *ptr = (void *)self->ob_sval;
545 return self->ob_size;
546}
547
548static int
Fred Drakeba096332000-07-09 07:04:36 +0000549string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000550{
Guido van Rossum045e6881997-09-08 18:30:11 +0000551 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000552 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000553 return -1;
554}
555
556static int
Fred Drakeba096332000-07-09 07:04:36 +0000557string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000558{
559 if ( lenp )
560 *lenp = self->ob_size;
561 return 1;
562}
563
Guido van Rossum1db70701998-10-08 02:18:52 +0000564static int
Fred Drakeba096332000-07-09 07:04:36 +0000565string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000566{
567 if ( index != 0 ) {
568 PyErr_SetString(PyExc_SystemError,
569 "accessing non-existent string segment");
570 return -1;
571 }
572 *ptr = self->ob_sval;
573 return self->ob_size;
574}
575
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000576static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000577 (inquiry)string_length, /*sq_length*/
578 (binaryfunc)string_concat, /*sq_concat*/
579 (intargfunc)string_repeat, /*sq_repeat*/
580 (intargfunc)string_item, /*sq_item*/
581 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000582 0, /*sq_ass_item*/
583 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000584 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585};
586
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000587static PyBufferProcs string_as_buffer = {
588 (getreadbufferproc)string_buffer_getreadbuf,
589 (getwritebufferproc)string_buffer_getwritebuf,
590 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000591 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000592};
593
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000594
595
596#define LEFTSTRIP 0
597#define RIGHTSTRIP 1
598#define BOTHSTRIP 2
599
600
601static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000602split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000603{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000604 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000605 PyObject* item;
606 PyObject *list = PyList_New(0);
607
608 if (list == NULL)
609 return NULL;
610
Guido van Rossum4c08d552000-03-10 22:55:18 +0000611 for (i = j = 0; i < len; ) {
612 while (i < len && isspace(Py_CHARMASK(s[i])))
613 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000614 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000615 while (i < len && !isspace(Py_CHARMASK(s[i])))
616 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000617 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000618 if (maxsplit-- <= 0)
619 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000620 item = PyString_FromStringAndSize(s+j, (int)(i-j));
621 if (item == NULL)
622 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000623 err = PyList_Append(list, item);
624 Py_DECREF(item);
625 if (err < 0)
626 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000627 while (i < len && isspace(Py_CHARMASK(s[i])))
628 i++;
629 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000630 }
631 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000632 if (j < len) {
633 item = PyString_FromStringAndSize(s+j, (int)(len - j));
634 if (item == NULL)
635 goto finally;
636 err = PyList_Append(list, item);
637 Py_DECREF(item);
638 if (err < 0)
639 goto finally;
640 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000641 return list;
642 finally:
643 Py_DECREF(list);
644 return NULL;
645}
646
647
648static char split__doc__[] =
649"S.split([sep [,maxsplit]]) -> list of strings\n\
650\n\
651Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000652delimiter string. If maxsplit is given, at most maxsplit\n\
653splits are done. If sep is not specified, any whitespace string\n\
654is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000655
656static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000657string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000658{
659 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000660 int maxsplit = -1;
661 const char *s = PyString_AS_STRING(self), *sub;
662 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000663
Guido van Rossum4c08d552000-03-10 22:55:18 +0000664 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000665 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000666 if (maxsplit < 0)
667 maxsplit = INT_MAX;
668 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000669 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000670 if (PyString_Check(subobj)) {
671 sub = PyString_AS_STRING(subobj);
672 n = PyString_GET_SIZE(subobj);
673 }
674 else if (PyUnicode_Check(subobj))
675 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
676 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
677 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000678 if (n == 0) {
679 PyErr_SetString(PyExc_ValueError, "empty separator");
680 return NULL;
681 }
682
683 list = PyList_New(0);
684 if (list == NULL)
685 return NULL;
686
687 i = j = 0;
688 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000689 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000690 if (maxsplit-- <= 0)
691 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000692 item = PyString_FromStringAndSize(s+j, (int)(i-j));
693 if (item == NULL)
694 goto fail;
695 err = PyList_Append(list, item);
696 Py_DECREF(item);
697 if (err < 0)
698 goto fail;
699 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000700 }
701 else
702 i++;
703 }
704 item = PyString_FromStringAndSize(s+j, (int)(len-j));
705 if (item == NULL)
706 goto fail;
707 err = PyList_Append(list, item);
708 Py_DECREF(item);
709 if (err < 0)
710 goto fail;
711
712 return list;
713
714 fail:
715 Py_DECREF(list);
716 return NULL;
717}
718
719
720static char join__doc__[] =
721"S.join(sequence) -> string\n\
722\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000723Return a string which is the concatenation of the strings in the\n\
724sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000725
726static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000727string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000728{
729 char *sep = PyString_AS_STRING(self);
730 int seplen = PyString_GET_SIZE(self);
731 PyObject *res = NULL;
732 int reslen = 0;
733 char *p;
734 int seqlen = 0;
735 int sz = 100;
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000736 int i, slen, sz_incr;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000737 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000738
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000739 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000740 return NULL;
741
Barry Warsaw771d0672000-07-11 04:58:12 +0000742 if (!(seq = PySequence_Fast(orig, ""))) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000743 if (PyErr_ExceptionMatches(PyExc_TypeError))
744 PyErr_Format(PyExc_TypeError,
745 "sequence expected, %.80s found",
746 orig->ob_type->tp_name);
747 return NULL;
748 }
Barry Warsaw771d0672000-07-11 04:58:12 +0000749 /* From here on out, errors go through finally: for proper
750 * reference count manipulations.
751 */
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000752 seqlen = PySequence_Size(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000753 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000754 item = PySequence_Fast_GET_ITEM(seq, 0);
755 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000756 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000757 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000758 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000759
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000760 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
Barry Warsaw771d0672000-07-11 04:58:12 +0000761 goto finally;
762
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000763 p = PyString_AS_STRING(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000764
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000765 for (i = 0; i < seqlen; i++) {
766 item = PySequence_Fast_GET_ITEM(seq, i);
767 if (!PyString_Check(item)){
768 if (PyUnicode_Check(item)) {
769 Py_DECREF(res);
Barry Warsaw771d0672000-07-11 04:58:12 +0000770 Py_DECREF(seq);
771 return PyUnicode_Join((PyObject *)self, seq);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000772 }
773 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000774 "sequence item %i: expected string,"
775 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000776 i, item->ob_type->tp_name);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000777 goto finally;
778 }
779 slen = PyString_GET_SIZE(item);
780 while (reslen + slen + seplen >= sz) {
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000781 /* at least double the size of the string */
782 sz_incr = slen + seplen > sz ? slen + seplen : sz;
783 if (_PyString_Resize(&res, sz + sz_incr)) {
Barry Warsawbf325832000-03-06 14:52:18 +0000784 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000785 }
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000786 sz += sz_incr;
787 p = PyString_AS_STRING(res) + reslen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000788 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000789 if (i > 0) {
790 memcpy(p, sep, seplen);
791 p += seplen;
792 reslen += seplen;
793 }
794 memcpy(p, PyString_AS_STRING(item), slen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000795 p += slen;
796 reslen += slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000797 }
798 if (_PyString_Resize(&res, reslen))
799 goto finally;
Jeremy Hylton49048292000-07-11 03:28:17 +0000800 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000801 return res;
802
803 finally:
Jeremy Hylton49048292000-07-11 03:28:17 +0000804 Py_DECREF(seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000805 Py_XDECREF(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000806 return NULL;
807}
808
809
810
811static long
Fred Drakeba096332000-07-09 07:04:36 +0000812string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000813{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000814 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000815 int len = PyString_GET_SIZE(self);
816 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000817 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000818
Guido van Rossumc6821402000-05-08 14:08:05 +0000819 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
820 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000821 return -2;
822 if (PyString_Check(subobj)) {
823 sub = PyString_AS_STRING(subobj);
824 n = PyString_GET_SIZE(subobj);
825 }
826 else if (PyUnicode_Check(subobj))
827 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
828 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000829 return -2;
830
831 if (last > len)
832 last = len;
833 if (last < 0)
834 last += len;
835 if (last < 0)
836 last = 0;
837 if (i < 0)
838 i += len;
839 if (i < 0)
840 i = 0;
841
Guido van Rossum4c08d552000-03-10 22:55:18 +0000842 if (dir > 0) {
843 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000844 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000845 last -= n;
846 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000847 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000848 return (long)i;
849 }
850 else {
851 int j;
852
853 if (n == 0 && i <= last)
854 return (long)last;
855 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000856 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000857 return (long)j;
858 }
859
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000860 return -1;
861}
862
863
864static char find__doc__[] =
865"S.find(sub [,start [,end]]) -> int\n\
866\n\
867Return the lowest index in S where substring sub is found,\n\
868such that sub is contained within s[start,end]. Optional\n\
869arguments start and end are interpreted as in slice notation.\n\
870\n\
871Return -1 on failure.";
872
873static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000874string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000875{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000876 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000877 if (result == -2)
878 return NULL;
879 return PyInt_FromLong(result);
880}
881
882
883static char index__doc__[] =
884"S.index(sub [,start [,end]]) -> int\n\
885\n\
886Like S.find() but raise ValueError when the substring is not found.";
887
888static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000889string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000890{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000891 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000892 if (result == -2)
893 return NULL;
894 if (result == -1) {
895 PyErr_SetString(PyExc_ValueError,
896 "substring not found in string.index");
897 return NULL;
898 }
899 return PyInt_FromLong(result);
900}
901
902
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000903static char rfind__doc__[] =
904"S.rfind(sub [,start [,end]]) -> int\n\
905\n\
906Return the highest index in S where substring sub is found,\n\
907such that sub is contained within s[start,end]. Optional\n\
908arguments start and end are interpreted as in slice notation.\n\
909\n\
910Return -1 on failure.";
911
912static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000913string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000914{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000915 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000916 if (result == -2)
917 return NULL;
918 return PyInt_FromLong(result);
919}
920
921
922static char rindex__doc__[] =
923"S.rindex(sub [,start [,end]]) -> int\n\
924\n\
925Like S.rfind() but raise ValueError when the substring is not found.";
926
927static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000928string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000929{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000930 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000931 if (result == -2)
932 return NULL;
933 if (result == -1) {
934 PyErr_SetString(PyExc_ValueError,
935 "substring not found in string.rindex");
936 return NULL;
937 }
938 return PyInt_FromLong(result);
939}
940
941
942static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000943do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000944{
945 char *s = PyString_AS_STRING(self);
946 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000947
Guido van Rossum43713e52000-02-29 13:59:29 +0000948 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000949 return NULL;
950
951 i = 0;
952 if (striptype != RIGHTSTRIP) {
953 while (i < len && isspace(Py_CHARMASK(s[i]))) {
954 i++;
955 }
956 }
957
958 j = len;
959 if (striptype != LEFTSTRIP) {
960 do {
961 j--;
962 } while (j >= i && isspace(Py_CHARMASK(s[j])));
963 j++;
964 }
965
966 if (i == 0 && j == len) {
967 Py_INCREF(self);
968 return (PyObject*)self;
969 }
970 else
971 return PyString_FromStringAndSize(s+i, j-i);
972}
973
974
975static char strip__doc__[] =
976"S.strip() -> string\n\
977\n\
978Return a copy of the string S with leading and trailing\n\
979whitespace removed.";
980
981static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000982string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000983{
984 return do_strip(self, args, BOTHSTRIP);
985}
986
987
988static char lstrip__doc__[] =
989"S.lstrip() -> string\n\
990\n\
991Return a copy of the string S with leading whitespace removed.";
992
993static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000994string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000995{
996 return do_strip(self, args, LEFTSTRIP);
997}
998
999
1000static char rstrip__doc__[] =
1001"S.rstrip() -> string\n\
1002\n\
1003Return a copy of the string S with trailing whitespace removed.";
1004
1005static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001006string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001007{
1008 return do_strip(self, args, RIGHTSTRIP);
1009}
1010
1011
1012static char lower__doc__[] =
1013"S.lower() -> string\n\
1014\n\
1015Return a copy of the string S converted to lowercase.";
1016
1017static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001018string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001019{
1020 char *s = PyString_AS_STRING(self), *s_new;
1021 int i, n = PyString_GET_SIZE(self);
1022 PyObject *new;
1023
Guido van Rossum43713e52000-02-29 13:59:29 +00001024 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001025 return NULL;
1026 new = PyString_FromStringAndSize(NULL, n);
1027 if (new == NULL)
1028 return NULL;
1029 s_new = PyString_AsString(new);
1030 for (i = 0; i < n; i++) {
1031 int c = Py_CHARMASK(*s++);
1032 if (isupper(c)) {
1033 *s_new = tolower(c);
1034 } else
1035 *s_new = c;
1036 s_new++;
1037 }
1038 return new;
1039}
1040
1041
1042static char upper__doc__[] =
1043"S.upper() -> string\n\
1044\n\
1045Return a copy of the string S converted to uppercase.";
1046
1047static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001048string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001049{
1050 char *s = PyString_AS_STRING(self), *s_new;
1051 int i, n = PyString_GET_SIZE(self);
1052 PyObject *new;
1053
Guido van Rossum43713e52000-02-29 13:59:29 +00001054 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001055 return NULL;
1056 new = PyString_FromStringAndSize(NULL, n);
1057 if (new == NULL)
1058 return NULL;
1059 s_new = PyString_AsString(new);
1060 for (i = 0; i < n; i++) {
1061 int c = Py_CHARMASK(*s++);
1062 if (islower(c)) {
1063 *s_new = toupper(c);
1064 } else
1065 *s_new = c;
1066 s_new++;
1067 }
1068 return new;
1069}
1070
1071
Guido van Rossum4c08d552000-03-10 22:55:18 +00001072static char title__doc__[] =
1073"S.title() -> string\n\
1074\n\
1075Return a titlecased version of S, i.e. words start with uppercase\n\
1076characters, all remaining cased characters have lowercase.";
1077
1078static PyObject*
1079string_title(PyUnicodeObject *self, PyObject *args)
1080{
1081 char *s = PyString_AS_STRING(self), *s_new;
1082 int i, n = PyString_GET_SIZE(self);
1083 int previous_is_cased = 0;
1084 PyObject *new;
1085
1086 if (!PyArg_ParseTuple(args, ":title"))
1087 return NULL;
1088 new = PyString_FromStringAndSize(NULL, n);
1089 if (new == NULL)
1090 return NULL;
1091 s_new = PyString_AsString(new);
1092 for (i = 0; i < n; i++) {
1093 int c = Py_CHARMASK(*s++);
1094 if (islower(c)) {
1095 if (!previous_is_cased)
1096 c = toupper(c);
1097 previous_is_cased = 1;
1098 } else if (isupper(c)) {
1099 if (previous_is_cased)
1100 c = tolower(c);
1101 previous_is_cased = 1;
1102 } else
1103 previous_is_cased = 0;
1104 *s_new++ = c;
1105 }
1106 return new;
1107}
1108
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001109static char capitalize__doc__[] =
1110"S.capitalize() -> string\n\
1111\n\
1112Return a copy of the string S with only its first character\n\
1113capitalized.";
1114
1115static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001116string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001117{
1118 char *s = PyString_AS_STRING(self), *s_new;
1119 int i, n = PyString_GET_SIZE(self);
1120 PyObject *new;
1121
Guido van Rossum43713e52000-02-29 13:59:29 +00001122 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001123 return NULL;
1124 new = PyString_FromStringAndSize(NULL, n);
1125 if (new == NULL)
1126 return NULL;
1127 s_new = PyString_AsString(new);
1128 if (0 < n) {
1129 int c = Py_CHARMASK(*s++);
1130 if (islower(c))
1131 *s_new = toupper(c);
1132 else
1133 *s_new = c;
1134 s_new++;
1135 }
1136 for (i = 1; i < n; i++) {
1137 int c = Py_CHARMASK(*s++);
1138 if (isupper(c))
1139 *s_new = tolower(c);
1140 else
1141 *s_new = c;
1142 s_new++;
1143 }
1144 return new;
1145}
1146
1147
1148static char count__doc__[] =
1149"S.count(sub[, start[, end]]) -> int\n\
1150\n\
1151Return the number of occurrences of substring sub in string\n\
1152S[start:end]. Optional arguments start and end are\n\
1153interpreted as in slice notation.";
1154
1155static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001156string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001157{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001158 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001159 int len = PyString_GET_SIZE(self), n;
1160 int i = 0, last = INT_MAX;
1161 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001162 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001163
Guido van Rossumc6821402000-05-08 14:08:05 +00001164 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1165 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001166 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001167
Guido van Rossum4c08d552000-03-10 22:55:18 +00001168 if (PyString_Check(subobj)) {
1169 sub = PyString_AS_STRING(subobj);
1170 n = PyString_GET_SIZE(subobj);
1171 }
1172 else if (PyUnicode_Check(subobj))
1173 return PyInt_FromLong(
1174 PyUnicode_Count((PyObject *)self, subobj, i, last));
1175 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1176 return NULL;
1177
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001178 if (last > len)
1179 last = len;
1180 if (last < 0)
1181 last += len;
1182 if (last < 0)
1183 last = 0;
1184 if (i < 0)
1185 i += len;
1186 if (i < 0)
1187 i = 0;
1188 m = last + 1 - n;
1189 if (n == 0)
1190 return PyInt_FromLong((long) (m-i));
1191
1192 r = 0;
1193 while (i < m) {
1194 if (!memcmp(s+i, sub, n)) {
1195 r++;
1196 i += n;
1197 } else {
1198 i++;
1199 }
1200 }
1201 return PyInt_FromLong((long) r);
1202}
1203
1204
1205static char swapcase__doc__[] =
1206"S.swapcase() -> string\n\
1207\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001208Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001209converted to lowercase and vice versa.";
1210
1211static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001212string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001213{
1214 char *s = PyString_AS_STRING(self), *s_new;
1215 int i, n = PyString_GET_SIZE(self);
1216 PyObject *new;
1217
Guido van Rossum43713e52000-02-29 13:59:29 +00001218 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001219 return NULL;
1220 new = PyString_FromStringAndSize(NULL, n);
1221 if (new == NULL)
1222 return NULL;
1223 s_new = PyString_AsString(new);
1224 for (i = 0; i < n; i++) {
1225 int c = Py_CHARMASK(*s++);
1226 if (islower(c)) {
1227 *s_new = toupper(c);
1228 }
1229 else if (isupper(c)) {
1230 *s_new = tolower(c);
1231 }
1232 else
1233 *s_new = c;
1234 s_new++;
1235 }
1236 return new;
1237}
1238
1239
1240static char translate__doc__[] =
1241"S.translate(table [,deletechars]) -> string\n\
1242\n\
1243Return a copy of the string S, where all characters occurring\n\
1244in the optional argument deletechars are removed, and the\n\
1245remaining characters have been mapped through the given\n\
1246translation table, which must be a string of length 256.";
1247
1248static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001249string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001250{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001251 register char *input, *output;
1252 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001253 register int i, c, changed = 0;
1254 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001255 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001256 int inlen, tablen, dellen = 0;
1257 PyObject *result;
1258 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001259 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001260
Guido van Rossum4c08d552000-03-10 22:55:18 +00001261 if (!PyArg_ParseTuple(args, "O|O:translate",
1262 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001263 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001264
1265 if (PyString_Check(tableobj)) {
1266 table1 = PyString_AS_STRING(tableobj);
1267 tablen = PyString_GET_SIZE(tableobj);
1268 }
1269 else if (PyUnicode_Check(tableobj)) {
1270 /* Unicode .translate() does not support the deletechars
1271 parameter; instead a mapping to None will cause characters
1272 to be deleted. */
1273 if (delobj != NULL) {
1274 PyErr_SetString(PyExc_TypeError,
1275 "deletions are implemented differently for unicode");
1276 return NULL;
1277 }
1278 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1279 }
1280 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001282
1283 if (delobj != NULL) {
1284 if (PyString_Check(delobj)) {
1285 del_table = PyString_AS_STRING(delobj);
1286 dellen = PyString_GET_SIZE(delobj);
1287 }
1288 else if (PyUnicode_Check(delobj)) {
1289 PyErr_SetString(PyExc_TypeError,
1290 "deletions are implemented differently for unicode");
1291 return NULL;
1292 }
1293 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1294 return NULL;
1295
1296 if (tablen != 256) {
1297 PyErr_SetString(PyExc_ValueError,
1298 "translation table must be 256 characters long");
1299 return NULL;
1300 }
1301 }
1302 else {
1303 del_table = NULL;
1304 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001305 }
1306
1307 table = table1;
1308 inlen = PyString_Size(input_obj);
1309 result = PyString_FromStringAndSize((char *)NULL, inlen);
1310 if (result == NULL)
1311 return NULL;
1312 output_start = output = PyString_AsString(result);
1313 input = PyString_AsString(input_obj);
1314
1315 if (dellen == 0) {
1316 /* If no deletions are required, use faster code */
1317 for (i = inlen; --i >= 0; ) {
1318 c = Py_CHARMASK(*input++);
1319 if (Py_CHARMASK((*output++ = table[c])) != c)
1320 changed = 1;
1321 }
1322 if (changed)
1323 return result;
1324 Py_DECREF(result);
1325 Py_INCREF(input_obj);
1326 return input_obj;
1327 }
1328
1329 for (i = 0; i < 256; i++)
1330 trans_table[i] = Py_CHARMASK(table[i]);
1331
1332 for (i = 0; i < dellen; i++)
1333 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1334
1335 for (i = inlen; --i >= 0; ) {
1336 c = Py_CHARMASK(*input++);
1337 if (trans_table[c] != -1)
1338 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1339 continue;
1340 changed = 1;
1341 }
1342 if (!changed) {
1343 Py_DECREF(result);
1344 Py_INCREF(input_obj);
1345 return input_obj;
1346 }
1347 /* Fix the size of the resulting string */
1348 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1349 return NULL;
1350 return result;
1351}
1352
1353
1354/* What follows is used for implementing replace(). Perry Stoll. */
1355
1356/*
1357 mymemfind
1358
1359 strstr replacement for arbitrary blocks of memory.
1360
Barry Warsaw51ac5802000-03-20 16:36:48 +00001361 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001362 contents of memory pointed to by PAT. Returns the index into MEM if
1363 found, or -1 if not found. If len of PAT is greater than length of
1364 MEM, the function returns -1.
1365*/
1366static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001367mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001368{
1369 register int ii;
1370
1371 /* pattern can not occur in the last pat_len-1 chars */
1372 len -= pat_len;
1373
1374 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001375 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001376 return ii;
1377 }
1378 }
1379 return -1;
1380}
1381
1382/*
1383 mymemcnt
1384
1385 Return the number of distinct times PAT is found in MEM.
1386 meaning mem=1111 and pat==11 returns 2.
1387 mem=11111 and pat==11 also return 2.
1388 */
1389static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001390mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391{
1392 register int offset = 0;
1393 int nfound = 0;
1394
1395 while (len >= 0) {
1396 offset = mymemfind(mem, len, pat, pat_len);
1397 if (offset == -1)
1398 break;
1399 mem += offset + pat_len;
1400 len -= offset + pat_len;
1401 nfound++;
1402 }
1403 return nfound;
1404}
1405
1406/*
1407 mymemreplace
1408
Thomas Wouters7e474022000-07-16 12:04:32 +00001409 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001410 replaced with SUB.
1411
Thomas Wouters7e474022000-07-16 12:04:32 +00001412 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413 of PAT in STR, then the original string is returned. Otherwise, a new
1414 string is allocated here and returned.
1415
1416 on return, out_len is:
1417 the length of output string, or
1418 -1 if the input string is returned, or
1419 unchanged if an error occurs (no memory).
1420
1421 return value is:
1422 the new string allocated locally, or
1423 NULL if an error occurred.
1424*/
1425static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001426mymemreplace(const char *str, int len, /* input string */
1427 const char *pat, int pat_len, /* pattern string to find */
1428 const char *sub, int sub_len, /* substitution string */
1429 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001430 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001431{
1432 char *out_s;
1433 char *new_s;
1434 int nfound, offset, new_len;
1435
1436 if (len == 0 || pat_len > len)
1437 goto return_same;
1438
1439 /* find length of output string */
1440 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001441 if (count < 0)
1442 count = INT_MAX;
1443 else if (nfound > count)
1444 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001445 if (nfound == 0)
1446 goto return_same;
1447 new_len = len + nfound*(sub_len - pat_len);
1448
Guido van Rossumb18618d2000-05-03 23:44:39 +00001449 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450 if (new_s == NULL) return NULL;
1451
1452 *out_len = new_len;
1453 out_s = new_s;
1454
1455 while (len > 0) {
1456 /* find index of next instance of pattern */
1457 offset = mymemfind(str, len, pat, pat_len);
1458 /* if not found, break out of loop */
1459 if (offset == -1) break;
1460
1461 /* copy non matching part of input string */
1462 memcpy(new_s, str, offset); /* copy part of str before pat */
1463 str += offset + pat_len; /* move str past pattern */
1464 len -= offset + pat_len; /* reduce length of str remaining */
1465
1466 /* copy substitute into the output string */
1467 new_s += offset; /* move new_s to dest for sub string */
1468 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1469 new_s += sub_len; /* offset new_s past sub string */
1470
1471 /* break when we've done count replacements */
1472 if (--count == 0) break;
1473 }
1474 /* copy any remaining values into output string */
1475 if (len > 0)
1476 memcpy(new_s, str, len);
1477 return out_s;
1478
1479 return_same:
1480 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001481 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001482}
1483
1484
1485static char replace__doc__[] =
1486"S.replace (old, new[, maxsplit]) -> string\n\
1487\n\
1488Return a copy of string S with all occurrences of substring\n\
1489old replaced by new. If the optional argument maxsplit is\n\
1490given, only the first maxsplit occurrences are replaced.";
1491
1492static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001493string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001495 const char *str = PyString_AS_STRING(self), *sub, *repl;
1496 char *new_s;
1497 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1498 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001500 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001501
Guido van Rossum4c08d552000-03-10 22:55:18 +00001502 if (!PyArg_ParseTuple(args, "OO|i:replace",
1503 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001505
1506 if (PyString_Check(subobj)) {
1507 sub = PyString_AS_STRING(subobj);
1508 sub_len = PyString_GET_SIZE(subobj);
1509 }
1510 else if (PyUnicode_Check(subobj))
1511 return PyUnicode_Replace((PyObject *)self,
1512 subobj, replobj, count);
1513 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1514 return NULL;
1515
1516 if (PyString_Check(replobj)) {
1517 repl = PyString_AS_STRING(replobj);
1518 repl_len = PyString_GET_SIZE(replobj);
1519 }
1520 else if (PyUnicode_Check(replobj))
1521 return PyUnicode_Replace((PyObject *)self,
1522 subobj, replobj, count);
1523 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1524 return NULL;
1525
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001526 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001527 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001528 return NULL;
1529 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001530 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531 if (new_s == NULL) {
1532 PyErr_NoMemory();
1533 return NULL;
1534 }
1535 if (out_len == -1) {
1536 /* we're returning another reference to self */
1537 new = (PyObject*)self;
1538 Py_INCREF(new);
1539 }
1540 else {
1541 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001542 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001543 }
1544 return new;
1545}
1546
1547
1548static char startswith__doc__[] =
1549"S.startswith(prefix[, start[, end]]) -> int\n\
1550\n\
1551Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1552optional start, test S beginning at that position. With optional end, stop\n\
1553comparing S at that position.";
1554
1555static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001556string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001557{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001558 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001559 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001560 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001561 int plen;
1562 int start = 0;
1563 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001564 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565
Guido van Rossumc6821402000-05-08 14:08:05 +00001566 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1567 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001568 return NULL;
1569 if (PyString_Check(subobj)) {
1570 prefix = PyString_AS_STRING(subobj);
1571 plen = PyString_GET_SIZE(subobj);
1572 }
1573 else if (PyUnicode_Check(subobj))
1574 return PyInt_FromLong(
1575 PyUnicode_Tailmatch((PyObject *)self,
1576 subobj, start, end, -1));
1577 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001578 return NULL;
1579
1580 /* adopt Java semantics for index out of range. it is legal for
1581 * offset to be == plen, but this only returns true if prefix is
1582 * the empty string.
1583 */
1584 if (start < 0 || start+plen > len)
1585 return PyInt_FromLong(0);
1586
1587 if (!memcmp(str+start, prefix, plen)) {
1588 /* did the match end after the specified end? */
1589 if (end < 0)
1590 return PyInt_FromLong(1);
1591 else if (end - start < plen)
1592 return PyInt_FromLong(0);
1593 else
1594 return PyInt_FromLong(1);
1595 }
1596 else return PyInt_FromLong(0);
1597}
1598
1599
1600static char endswith__doc__[] =
1601"S.endswith(suffix[, start[, end]]) -> int\n\
1602\n\
1603Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1604optional start, test S beginning at that position. With optional end, stop\n\
1605comparing S at that position.";
1606
1607static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001608string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001610 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001612 const char* suffix;
1613 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614 int start = 0;
1615 int end = -1;
1616 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001617 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618
Guido van Rossumc6821402000-05-08 14:08:05 +00001619 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1620 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001621 return NULL;
1622 if (PyString_Check(subobj)) {
1623 suffix = PyString_AS_STRING(subobj);
1624 slen = PyString_GET_SIZE(subobj);
1625 }
1626 else if (PyUnicode_Check(subobj))
1627 return PyInt_FromLong(
1628 PyUnicode_Tailmatch((PyObject *)self,
1629 subobj, start, end, +1));
1630 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631 return NULL;
1632
Guido van Rossum4c08d552000-03-10 22:55:18 +00001633 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001634 return PyInt_FromLong(0);
1635
1636 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001637 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638
Guido van Rossum4c08d552000-03-10 22:55:18 +00001639 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640 return PyInt_FromLong(1);
1641 else return PyInt_FromLong(0);
1642}
1643
1644
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001645static char encode__doc__[] =
1646"S.encode([encoding[,errors]]) -> string\n\
1647\n\
1648Return an encoded string version of S. Default encoding is the current\n\
1649default string encoding. errors may be given to set a different error\n\
1650handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1651a ValueError. Other possible values are 'ignore' and 'replace'.";
1652
1653static PyObject *
1654string_encode(PyStringObject *self, PyObject *args)
1655{
1656 char *encoding = NULL;
1657 char *errors = NULL;
1658 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1659 return NULL;
1660 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1661}
1662
1663
Guido van Rossum4c08d552000-03-10 22:55:18 +00001664static char expandtabs__doc__[] =
1665"S.expandtabs([tabsize]) -> string\n\
1666\n\
1667Return a copy of S where all tab characters are expanded using spaces.\n\
1668If tabsize is not given, a tab size of 8 characters is assumed.";
1669
1670static PyObject*
1671string_expandtabs(PyStringObject *self, PyObject *args)
1672{
1673 const char *e, *p;
1674 char *q;
1675 int i, j;
1676 PyObject *u;
1677 int tabsize = 8;
1678
1679 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1680 return NULL;
1681
Thomas Wouters7e474022000-07-16 12:04:32 +00001682 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001683 i = j = 0;
1684 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1685 for (p = PyString_AS_STRING(self); p < e; p++)
1686 if (*p == '\t') {
1687 if (tabsize > 0)
1688 j += tabsize - (j % tabsize);
1689 }
1690 else {
1691 j++;
1692 if (*p == '\n' || *p == '\r') {
1693 i += j;
1694 j = 0;
1695 }
1696 }
1697
1698 /* Second pass: create output string and fill it */
1699 u = PyString_FromStringAndSize(NULL, i + j);
1700 if (!u)
1701 return NULL;
1702
1703 j = 0;
1704 q = PyString_AS_STRING(u);
1705
1706 for (p = PyString_AS_STRING(self); p < e; p++)
1707 if (*p == '\t') {
1708 if (tabsize > 0) {
1709 i = tabsize - (j % tabsize);
1710 j += i;
1711 while (i--)
1712 *q++ = ' ';
1713 }
1714 }
1715 else {
1716 j++;
1717 *q++ = *p;
1718 if (*p == '\n' || *p == '\r')
1719 j = 0;
1720 }
1721
1722 return u;
1723}
1724
1725static
1726PyObject *pad(PyStringObject *self,
1727 int left,
1728 int right,
1729 char fill)
1730{
1731 PyObject *u;
1732
1733 if (left < 0)
1734 left = 0;
1735 if (right < 0)
1736 right = 0;
1737
1738 if (left == 0 && right == 0) {
1739 Py_INCREF(self);
1740 return (PyObject *)self;
1741 }
1742
1743 u = PyString_FromStringAndSize(NULL,
1744 left + PyString_GET_SIZE(self) + right);
1745 if (u) {
1746 if (left)
1747 memset(PyString_AS_STRING(u), fill, left);
1748 memcpy(PyString_AS_STRING(u) + left,
1749 PyString_AS_STRING(self),
1750 PyString_GET_SIZE(self));
1751 if (right)
1752 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1753 fill, right);
1754 }
1755
1756 return u;
1757}
1758
1759static char ljust__doc__[] =
1760"S.ljust(width) -> string\n\
1761\n\
1762Return S left justified in a string of length width. Padding is\n\
1763done using spaces.";
1764
1765static PyObject *
1766string_ljust(PyStringObject *self, PyObject *args)
1767{
1768 int width;
1769 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1770 return NULL;
1771
1772 if (PyString_GET_SIZE(self) >= width) {
1773 Py_INCREF(self);
1774 return (PyObject*) self;
1775 }
1776
1777 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1778}
1779
1780
1781static char rjust__doc__[] =
1782"S.rjust(width) -> string\n\
1783\n\
1784Return S right justified in a string of length width. Padding is\n\
1785done using spaces.";
1786
1787static PyObject *
1788string_rjust(PyStringObject *self, PyObject *args)
1789{
1790 int width;
1791 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1792 return NULL;
1793
1794 if (PyString_GET_SIZE(self) >= width) {
1795 Py_INCREF(self);
1796 return (PyObject*) self;
1797 }
1798
1799 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1800}
1801
1802
1803static char center__doc__[] =
1804"S.center(width) -> string\n\
1805\n\
1806Return S centered in a string of length width. Padding is done\n\
1807using spaces.";
1808
1809static PyObject *
1810string_center(PyStringObject *self, PyObject *args)
1811{
1812 int marg, left;
1813 int width;
1814
1815 if (!PyArg_ParseTuple(args, "i:center", &width))
1816 return NULL;
1817
1818 if (PyString_GET_SIZE(self) >= width) {
1819 Py_INCREF(self);
1820 return (PyObject*) self;
1821 }
1822
1823 marg = width - PyString_GET_SIZE(self);
1824 left = marg / 2 + (marg & width & 1);
1825
1826 return pad(self, left, marg - left, ' ');
1827}
1828
1829#if 0
1830static char zfill__doc__[] =
1831"S.zfill(width) -> string\n\
1832\n\
1833Pad a numeric string x with zeros on the left, to fill a field\n\
1834of the specified width. The string x is never truncated.";
1835
1836static PyObject *
1837string_zfill(PyStringObject *self, PyObject *args)
1838{
1839 int fill;
1840 PyObject *u;
1841 char *str;
1842
1843 int width;
1844 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1845 return NULL;
1846
1847 if (PyString_GET_SIZE(self) >= width) {
1848 Py_INCREF(self);
1849 return (PyObject*) self;
1850 }
1851
1852 fill = width - PyString_GET_SIZE(self);
1853
1854 u = pad(self, fill, 0, '0');
1855 if (u == NULL)
1856 return NULL;
1857
1858 str = PyString_AS_STRING(u);
1859 if (str[fill] == '+' || str[fill] == '-') {
1860 /* move sign to beginning of string */
1861 str[0] = str[fill];
1862 str[fill] = '0';
1863 }
1864
1865 return u;
1866}
1867#endif
1868
1869static char isspace__doc__[] =
1870"S.isspace() -> int\n\
1871\n\
1872Return 1 if there are only whitespace characters in S,\n\
18730 otherwise.";
1874
1875static PyObject*
1876string_isspace(PyStringObject *self, PyObject *args)
1877{
Fred Drakeba096332000-07-09 07:04:36 +00001878 register const unsigned char *p
1879 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001880 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001881
1882 if (!PyArg_NoArgs(args))
1883 return NULL;
1884
1885 /* Shortcut for single character strings */
1886 if (PyString_GET_SIZE(self) == 1 &&
1887 isspace(*p))
1888 return PyInt_FromLong(1);
1889
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001890 /* Special case for empty strings */
1891 if (PyString_GET_SIZE(self) == 0)
1892 return PyInt_FromLong(0);
1893
Guido van Rossum4c08d552000-03-10 22:55:18 +00001894 e = p + PyString_GET_SIZE(self);
1895 for (; p < e; p++) {
1896 if (!isspace(*p))
1897 return PyInt_FromLong(0);
1898 }
1899 return PyInt_FromLong(1);
1900}
1901
1902
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001903static char isalpha__doc__[] =
1904"S.isalpha() -> int\n\
1905\n\
1906Return 1 if all characters in S are alphabetic\n\
1907and there is at least one character in S, 0 otherwise.";
1908
1909static PyObject*
1910string_isalpha(PyUnicodeObject *self, PyObject *args)
1911{
Fred Drakeba096332000-07-09 07:04:36 +00001912 register const unsigned char *p
1913 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001914 register const unsigned char *e;
1915
1916 if (!PyArg_NoArgs(args))
1917 return NULL;
1918
1919 /* Shortcut for single character strings */
1920 if (PyString_GET_SIZE(self) == 1 &&
1921 isalpha(*p))
1922 return PyInt_FromLong(1);
1923
1924 /* Special case for empty strings */
1925 if (PyString_GET_SIZE(self) == 0)
1926 return PyInt_FromLong(0);
1927
1928 e = p + PyString_GET_SIZE(self);
1929 for (; p < e; p++) {
1930 if (!isalpha(*p))
1931 return PyInt_FromLong(0);
1932 }
1933 return PyInt_FromLong(1);
1934}
1935
1936
1937static char isalnum__doc__[] =
1938"S.isalnum() -> int\n\
1939\n\
1940Return 1 if all characters in S are alphanumeric\n\
1941and there is at least one character in S, 0 otherwise.";
1942
1943static PyObject*
1944string_isalnum(PyUnicodeObject *self, PyObject *args)
1945{
Fred Drakeba096332000-07-09 07:04:36 +00001946 register const unsigned char *p
1947 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001948 register const unsigned char *e;
1949
1950 if (!PyArg_NoArgs(args))
1951 return NULL;
1952
1953 /* Shortcut for single character strings */
1954 if (PyString_GET_SIZE(self) == 1 &&
1955 isalnum(*p))
1956 return PyInt_FromLong(1);
1957
1958 /* Special case for empty strings */
1959 if (PyString_GET_SIZE(self) == 0)
1960 return PyInt_FromLong(0);
1961
1962 e = p + PyString_GET_SIZE(self);
1963 for (; p < e; p++) {
1964 if (!isalnum(*p))
1965 return PyInt_FromLong(0);
1966 }
1967 return PyInt_FromLong(1);
1968}
1969
1970
Guido van Rossum4c08d552000-03-10 22:55:18 +00001971static char isdigit__doc__[] =
1972"S.isdigit() -> int\n\
1973\n\
1974Return 1 if there are only digit characters in S,\n\
19750 otherwise.";
1976
1977static PyObject*
1978string_isdigit(PyStringObject *self, PyObject *args)
1979{
Fred Drakeba096332000-07-09 07:04:36 +00001980 register const unsigned char *p
1981 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001982 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001983
1984 if (!PyArg_NoArgs(args))
1985 return NULL;
1986
1987 /* Shortcut for single character strings */
1988 if (PyString_GET_SIZE(self) == 1 &&
1989 isdigit(*p))
1990 return PyInt_FromLong(1);
1991
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001992 /* Special case for empty strings */
1993 if (PyString_GET_SIZE(self) == 0)
1994 return PyInt_FromLong(0);
1995
Guido van Rossum4c08d552000-03-10 22:55:18 +00001996 e = p + PyString_GET_SIZE(self);
1997 for (; p < e; p++) {
1998 if (!isdigit(*p))
1999 return PyInt_FromLong(0);
2000 }
2001 return PyInt_FromLong(1);
2002}
2003
2004
2005static char islower__doc__[] =
2006"S.islower() -> int\n\
2007\n\
2008Return 1 if all cased characters in S are lowercase and there is\n\
2009at least one cased character in S, 0 otherwise.";
2010
2011static PyObject*
2012string_islower(PyStringObject *self, PyObject *args)
2013{
Fred Drakeba096332000-07-09 07:04:36 +00002014 register const unsigned char *p
2015 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002016 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002017 int cased;
2018
2019 if (!PyArg_NoArgs(args))
2020 return NULL;
2021
2022 /* Shortcut for single character strings */
2023 if (PyString_GET_SIZE(self) == 1)
2024 return PyInt_FromLong(islower(*p) != 0);
2025
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002026 /* Special case for empty strings */
2027 if (PyString_GET_SIZE(self) == 0)
2028 return PyInt_FromLong(0);
2029
Guido van Rossum4c08d552000-03-10 22:55:18 +00002030 e = p + PyString_GET_SIZE(self);
2031 cased = 0;
2032 for (; p < e; p++) {
2033 if (isupper(*p))
2034 return PyInt_FromLong(0);
2035 else if (!cased && islower(*p))
2036 cased = 1;
2037 }
2038 return PyInt_FromLong(cased);
2039}
2040
2041
2042static char isupper__doc__[] =
2043"S.isupper() -> int\n\
2044\n\
2045Return 1 if all cased characters in S are uppercase and there is\n\
2046at least one cased character in S, 0 otherwise.";
2047
2048static PyObject*
2049string_isupper(PyStringObject *self, PyObject *args)
2050{
Fred Drakeba096332000-07-09 07:04:36 +00002051 register const unsigned char *p
2052 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002053 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002054 int cased;
2055
2056 if (!PyArg_NoArgs(args))
2057 return NULL;
2058
2059 /* Shortcut for single character strings */
2060 if (PyString_GET_SIZE(self) == 1)
2061 return PyInt_FromLong(isupper(*p) != 0);
2062
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002063 /* Special case for empty strings */
2064 if (PyString_GET_SIZE(self) == 0)
2065 return PyInt_FromLong(0);
2066
Guido van Rossum4c08d552000-03-10 22:55:18 +00002067 e = p + PyString_GET_SIZE(self);
2068 cased = 0;
2069 for (; p < e; p++) {
2070 if (islower(*p))
2071 return PyInt_FromLong(0);
2072 else if (!cased && isupper(*p))
2073 cased = 1;
2074 }
2075 return PyInt_FromLong(cased);
2076}
2077
2078
2079static char istitle__doc__[] =
2080"S.istitle() -> int\n\
2081\n\
2082Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2083may only follow uncased characters and lowercase characters only cased\n\
2084ones. Return 0 otherwise.";
2085
2086static PyObject*
2087string_istitle(PyStringObject *self, PyObject *args)
2088{
Fred Drakeba096332000-07-09 07:04:36 +00002089 register const unsigned char *p
2090 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002091 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002092 int cased, previous_is_cased;
2093
2094 if (!PyArg_NoArgs(args))
2095 return NULL;
2096
2097 /* Shortcut for single character strings */
2098 if (PyString_GET_SIZE(self) == 1)
2099 return PyInt_FromLong(isupper(*p) != 0);
2100
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002101 /* Special case for empty strings */
2102 if (PyString_GET_SIZE(self) == 0)
2103 return PyInt_FromLong(0);
2104
Guido van Rossum4c08d552000-03-10 22:55:18 +00002105 e = p + PyString_GET_SIZE(self);
2106 cased = 0;
2107 previous_is_cased = 0;
2108 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002109 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002110
2111 if (isupper(ch)) {
2112 if (previous_is_cased)
2113 return PyInt_FromLong(0);
2114 previous_is_cased = 1;
2115 cased = 1;
2116 }
2117 else if (islower(ch)) {
2118 if (!previous_is_cased)
2119 return PyInt_FromLong(0);
2120 previous_is_cased = 1;
2121 cased = 1;
2122 }
2123 else
2124 previous_is_cased = 0;
2125 }
2126 return PyInt_FromLong(cased);
2127}
2128
2129
2130static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002131"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002132\n\
2133Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002134Line breaks are not included in the resulting list unless keepends\n\
2135is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002136
2137#define SPLIT_APPEND(data, left, right) \
2138 str = PyString_FromStringAndSize(data + left, right - left); \
2139 if (!str) \
2140 goto onError; \
2141 if (PyList_Append(list, str)) { \
2142 Py_DECREF(str); \
2143 goto onError; \
2144 } \
2145 else \
2146 Py_DECREF(str);
2147
2148static PyObject*
2149string_splitlines(PyStringObject *self, PyObject *args)
2150{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002151 register int i;
2152 register int j;
2153 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002154 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002155 PyObject *list;
2156 PyObject *str;
2157 char *data;
2158
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002159 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002160 return NULL;
2161
2162 data = PyString_AS_STRING(self);
2163 len = PyString_GET_SIZE(self);
2164
Guido van Rossum4c08d552000-03-10 22:55:18 +00002165 list = PyList_New(0);
2166 if (!list)
2167 goto onError;
2168
2169 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002170 int eol;
2171
Guido van Rossum4c08d552000-03-10 22:55:18 +00002172 /* Find a line and append it */
2173 while (i < len && data[i] != '\n' && data[i] != '\r')
2174 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002175
2176 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002177 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002178 if (i < len) {
2179 if (data[i] == '\r' && i + 1 < len &&
2180 data[i+1] == '\n')
2181 i += 2;
2182 else
2183 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002184 if (keepends)
2185 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002186 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002187 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188 j = i;
2189 }
2190 if (j < len) {
2191 SPLIT_APPEND(data, j, len);
2192 }
2193
2194 return list;
2195
2196 onError:
2197 Py_DECREF(list);
2198 return NULL;
2199}
2200
2201#undef SPLIT_APPEND
2202
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203
2204static PyMethodDef
2205string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002206 /* Counterparts of the obsolete stropmodule functions; except
2207 string.maketrans(). */
2208 {"join", (PyCFunction)string_join, 1, join__doc__},
2209 {"split", (PyCFunction)string_split, 1, split__doc__},
2210 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2211 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2212 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2213 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2214 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2215 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2216 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002217 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2218 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2220 {"count", (PyCFunction)string_count, 1, count__doc__},
2221 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2222 {"find", (PyCFunction)string_find, 1, find__doc__},
2223 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2226 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2227 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2228 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2230 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2231 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002232 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2233 {"title", (PyCFunction)string_title, 1, title__doc__},
2234 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2235 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2236 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002237 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002238 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2239 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2240#if 0
2241 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2242#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243 {NULL, NULL} /* sentinel */
2244};
2245
2246static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002247string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248{
2249 return Py_FindMethod(string_methods, (PyObject*)s, name);
2250}
2251
2252
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002253PyTypeObject PyString_Type = {
2254 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002255 0,
2256 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002257 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002258 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002259 (destructor)string_dealloc, /*tp_dealloc*/
2260 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002262 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002263 (cmpfunc)string_compare, /*tp_compare*/
2264 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002265 0, /*tp_as_number*/
2266 &string_as_sequence, /*tp_as_sequence*/
2267 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002268 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002269 0, /*tp_call*/
2270 0, /*tp_str*/
2271 0, /*tp_getattro*/
2272 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002273 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002274 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002275 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002276};
2277
2278void
Fred Drakeba096332000-07-09 07:04:36 +00002279PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002280{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002281 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002282 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002283 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002284 if (w == NULL || !PyString_Check(*pv)) {
2285 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002286 *pv = NULL;
2287 return;
2288 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002289 v = string_concat((PyStringObject *) *pv, w);
2290 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002291 *pv = v;
2292}
2293
Guido van Rossum013142a1994-08-30 08:19:36 +00002294void
Fred Drakeba096332000-07-09 07:04:36 +00002295PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002296{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002297 PyString_Concat(pv, w);
2298 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002299}
2300
2301
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002302/* The following function breaks the notion that strings are immutable:
2303 it changes the size of a string. We get away with this only if there
2304 is only one module referencing the object. You can also think of it
2305 as creating a new string object and destroying the old one, only
2306 more efficiently. In any case, don't use this if the string may
2307 already be known to some other part of the code... */
2308
2309int
Fred Drakeba096332000-07-09 07:04:36 +00002310_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002311{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002312 register PyObject *v;
2313 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002314 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002315 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002316 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002317 Py_DECREF(v);
2318 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002319 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002320 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002321 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002322#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002323 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002324#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002325 _Py_ForgetReference(v);
2326 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002327 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002328 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002329 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002330 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002331 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002332 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002333 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002334 _Py_NewReference(*pv);
2335 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002336 sv->ob_size = newsize;
2337 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002338 return 0;
2339}
Guido van Rossume5372401993-03-16 12:15:04 +00002340
2341/* Helpers for formatstring */
2342
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002343static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002344getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002345{
2346 int argidx = *p_argidx;
2347 if (argidx < arglen) {
2348 (*p_argidx)++;
2349 if (arglen < 0)
2350 return args;
2351 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002352 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002353 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002354 PyErr_SetString(PyExc_TypeError,
2355 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002356 return NULL;
2357}
2358
2359#define F_LJUST (1<<0)
2360#define F_SIGN (1<<1)
2361#define F_BLANK (1<<2)
2362#define F_ALT (1<<3)
2363#define F_ZERO (1<<4)
2364
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002365static int
Fred Drakeba096332000-07-09 07:04:36 +00002366formatfloat(char *buf, size_t buflen, int flags,
2367 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002368{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002369 /* fmt = '%#.' + `prec` + `type`
2370 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002371 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002372 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002373 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002374 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002375 if (prec < 0)
2376 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002377 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2378 type = 'g';
2379 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002380 /* worst case length calc to ensure no buffer overrun:
2381 fmt = %#.<prec>g
2382 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2383 for any double rep.)
2384 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2385 If prec=0 the effective precision is 1 (the leading digit is
2386 always given), therefore increase by one to 10+prec. */
2387 if (buflen <= (size_t)10 + (size_t)prec) {
2388 PyErr_SetString(PyExc_OverflowError,
2389 "formatted float is too long (precision too long?)");
2390 return -1;
2391 }
Guido van Rossume5372401993-03-16 12:15:04 +00002392 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002393 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002394}
2395
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002396static int
Fred Drakeba096332000-07-09 07:04:36 +00002397formatint(char *buf, size_t buflen, int flags,
2398 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002399{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002400 /* fmt = '%#.' + `prec` + 'l' + `type`
2401 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002402 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002403 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002404 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002405 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002406 if (prec < 0)
2407 prec = 1;
2408 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002409 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2410 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2411 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2412 PyErr_SetString(PyExc_OverflowError,
2413 "formatted integer is too long (precision too long?)");
2414 return -1;
2415 }
Guido van Rossume5372401993-03-16 12:15:04 +00002416 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002417 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002418}
2419
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002420static int
Fred Drakeba096332000-07-09 07:04:36 +00002421formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002422{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002423 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002424 if (PyString_Check(v)) {
2425 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002426 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002427 }
2428 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002429 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002430 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002431 }
2432 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002433 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002434}
2435
Guido van Rossum013142a1994-08-30 08:19:36 +00002436
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002437/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2438
2439 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2440 chars are formatted. XXX This is a magic number. Each formatting
2441 routine does bounds checking to ensure no overflow, but a better
2442 solution may be to malloc a buffer of appropriate size for each
2443 format. For now, the current solution is sufficient.
2444*/
2445#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002446
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002447PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002448PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002449{
2450 char *fmt, *res;
2451 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002452 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002453 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002454 PyObject *dict = NULL;
2455 if (format == NULL || !PyString_Check(format) || args == NULL) {
2456 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002457 return NULL;
2458 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002459 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002460 fmt = PyString_AsString(format);
2461 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002462 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002463 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002464 if (result == NULL)
2465 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002466 res = PyString_AsString(result);
2467 if (PyTuple_Check(args)) {
2468 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002469 argidx = 0;
2470 }
2471 else {
2472 arglen = -1;
2473 argidx = -2;
2474 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002475 if (args->ob_type->tp_as_mapping)
2476 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002477 while (--fmtcnt >= 0) {
2478 if (*fmt != '%') {
2479 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002480 rescnt = fmtcnt + 100;
2481 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002482 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002483 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002484 res = PyString_AsString(result)
2485 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002486 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002487 }
2488 *res++ = *fmt++;
2489 }
2490 else {
2491 /* Got a format specifier */
2492 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002493 int width = -1;
2494 int prec = -1;
2495 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002496 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002497 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002498 PyObject *v = NULL;
2499 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002500 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002501 int sign;
2502 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002503 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002504 char *fmt_start = fmt;
2505
Guido van Rossumda9c2711996-12-05 21:58:58 +00002506 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002507 if (*fmt == '(') {
2508 char *keystart;
2509 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002510 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002511 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002512
2513 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002514 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002515 "format requires a mapping");
2516 goto error;
2517 }
2518 ++fmt;
2519 --fmtcnt;
2520 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002521 /* Skip over balanced parentheses */
2522 while (pcount > 0 && --fmtcnt >= 0) {
2523 if (*fmt == ')')
2524 --pcount;
2525 else if (*fmt == '(')
2526 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002527 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002528 }
2529 keylen = fmt - keystart - 1;
2530 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002531 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002532 "incomplete format key");
2533 goto error;
2534 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002535 key = PyString_FromStringAndSize(keystart,
2536 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002537 if (key == NULL)
2538 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002539 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002540 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002541 args_owned = 0;
2542 }
2543 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002544 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002545 if (args == NULL) {
2546 goto error;
2547 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002548 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002549 arglen = -1;
2550 argidx = -2;
2551 }
Guido van Rossume5372401993-03-16 12:15:04 +00002552 while (--fmtcnt >= 0) {
2553 switch (c = *fmt++) {
2554 case '-': flags |= F_LJUST; continue;
2555 case '+': flags |= F_SIGN; continue;
2556 case ' ': flags |= F_BLANK; continue;
2557 case '#': flags |= F_ALT; continue;
2558 case '0': flags |= F_ZERO; continue;
2559 }
2560 break;
2561 }
2562 if (c == '*') {
2563 v = getnextarg(args, arglen, &argidx);
2564 if (v == NULL)
2565 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002566 if (!PyInt_Check(v)) {
2567 PyErr_SetString(PyExc_TypeError,
2568 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002569 goto error;
2570 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002571 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002572 if (width < 0) {
2573 flags |= F_LJUST;
2574 width = -width;
2575 }
Guido van Rossume5372401993-03-16 12:15:04 +00002576 if (--fmtcnt >= 0)
2577 c = *fmt++;
2578 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002579 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002580 width = c - '0';
2581 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002582 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002583 if (!isdigit(c))
2584 break;
2585 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002586 PyErr_SetString(
2587 PyExc_ValueError,
2588 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002589 goto error;
2590 }
2591 width = width*10 + (c - '0');
2592 }
2593 }
2594 if (c == '.') {
2595 prec = 0;
2596 if (--fmtcnt >= 0)
2597 c = *fmt++;
2598 if (c == '*') {
2599 v = getnextarg(args, arglen, &argidx);
2600 if (v == NULL)
2601 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002602 if (!PyInt_Check(v)) {
2603 PyErr_SetString(
2604 PyExc_TypeError,
2605 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002606 goto error;
2607 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002608 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002609 if (prec < 0)
2610 prec = 0;
2611 if (--fmtcnt >= 0)
2612 c = *fmt++;
2613 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002614 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002615 prec = c - '0';
2616 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002617 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002618 if (!isdigit(c))
2619 break;
2620 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002621 PyErr_SetString(
2622 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002623 "prec too big");
2624 goto error;
2625 }
2626 prec = prec*10 + (c - '0');
2627 }
2628 }
2629 } /* prec */
2630 if (fmtcnt >= 0) {
2631 if (c == 'h' || c == 'l' || c == 'L') {
2632 size = c;
2633 if (--fmtcnt >= 0)
2634 c = *fmt++;
2635 }
2636 }
2637 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002638 PyErr_SetString(PyExc_ValueError,
2639 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002640 goto error;
2641 }
2642 if (c != '%') {
2643 v = getnextarg(args, arglen, &argidx);
2644 if (v == NULL)
2645 goto error;
2646 }
2647 sign = 0;
2648 fill = ' ';
2649 switch (c) {
2650 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002651 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002652 len = 1;
2653 break;
2654 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002655 case 'r':
2656 if (PyUnicode_Check(v)) {
2657 fmt = fmt_start;
2658 goto unicode;
2659 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002660 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002661 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002662 else
2663 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002664 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002665 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002666 if (!PyString_Check(temp)) {
2667 PyErr_SetString(PyExc_TypeError,
2668 "%s argument has non-string str()");
2669 goto error;
2670 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002671 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002672 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002673 if (prec >= 0 && len > prec)
2674 len = prec;
2675 break;
2676 case 'i':
2677 case 'd':
2678 case 'u':
2679 case 'o':
2680 case 'x':
2681 case 'X':
2682 if (c == 'i')
2683 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002684 pbuf = formatbuf;
2685 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002686 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002687 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002688 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002689 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002690 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002691 if ((flags&F_ALT) &&
2692 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002693 pbuf[0] == '0' && pbuf[1] == c) {
2694 *res++ = *pbuf++;
2695 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002696 rescnt -= 2;
2697 len -= 2;
2698 width -= 2;
2699 if (width < 0)
2700 width = 0;
2701 }
2702 }
Guido van Rossume5372401993-03-16 12:15:04 +00002703 break;
2704 case 'e':
2705 case 'E':
2706 case 'f':
2707 case 'g':
2708 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002709 pbuf = formatbuf;
2710 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002711 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002712 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002713 sign = 1;
2714 if (flags&F_ZERO)
2715 fill = '0';
2716 break;
2717 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002718 pbuf = formatbuf;
2719 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002720 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002721 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002722 break;
2723 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002724 PyErr_Format(PyExc_ValueError,
2725 "unsupported format character '%c' (0x%x)",
2726 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002727 goto error;
2728 }
2729 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002730 if (*pbuf == '-' || *pbuf == '+') {
2731 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002732 len--;
2733 }
2734 else if (flags & F_SIGN)
2735 sign = '+';
2736 else if (flags & F_BLANK)
2737 sign = ' ';
2738 else
2739 sign = '\0';
2740 }
2741 if (width < len)
2742 width = len;
2743 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002744 reslen -= rescnt;
2745 rescnt = width + fmtcnt + 100;
2746 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002747 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002748 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002749 res = PyString_AsString(result)
2750 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002751 }
2752 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002753 if (fill != ' ')
2754 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002755 rescnt--;
2756 if (width > len)
2757 width--;
2758 }
2759 if (width > len && !(flags&F_LJUST)) {
2760 do {
2761 --rescnt;
2762 *res++ = fill;
2763 } while (--width > len);
2764 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002765 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002766 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002767 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002768 res += len;
2769 rescnt -= len;
2770 while (--width >= len) {
2771 --rescnt;
2772 *res++ = ' ';
2773 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002774 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002775 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002776 "not all arguments converted");
2777 goto error;
2778 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002779 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002780 } /* '%' */
2781 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002782 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002783 PyErr_SetString(PyExc_TypeError,
2784 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002785 goto error;
2786 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002787 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002788 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002789 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002790 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002791 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002792
2793 unicode:
2794 if (args_owned) {
2795 Py_DECREF(args);
2796 args_owned = 0;
2797 }
2798 /* Fiddle args right (remove the first argidx-1 arguments) */
2799 --argidx;
2800 if (PyTuple_Check(orig_args) && argidx > 0) {
2801 PyObject *v;
2802 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2803 v = PyTuple_New(n);
2804 if (v == NULL)
2805 goto error;
2806 while (--n >= 0) {
2807 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2808 Py_INCREF(w);
2809 PyTuple_SET_ITEM(v, n, w);
2810 }
2811 args = v;
2812 } else {
2813 Py_INCREF(orig_args);
2814 args = orig_args;
2815 }
2816 /* Paste rest of format string to what we have of the result
2817 string; we reuse result for this */
2818 rescnt = res - PyString_AS_STRING(result);
2819 fmtcnt = PyString_GET_SIZE(format) - \
2820 (fmt - PyString_AS_STRING(format));
2821 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2822 Py_DECREF(args);
2823 goto error;
2824 }
2825 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2826 format = result;
2827 /* Let Unicode do its magic */
2828 result = PyUnicode_Format(format, args);
2829 Py_DECREF(format);
2830 Py_DECREF(args);
2831 return result;
2832
Guido van Rossume5372401993-03-16 12:15:04 +00002833 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002834 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002835 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002836 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002837 }
Guido van Rossume5372401993-03-16 12:15:04 +00002838 return NULL;
2839}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002840
2841
2842#ifdef INTERN_STRINGS
2843
Barry Warsaw4df762f2000-08-16 23:41:01 +00002844/* This dictionary will leak at PyString_Fini() time. That's acceptable
2845 * because PyString_Fini() specifically frees interned strings that are
2846 * only referenced by this dictionary. The CVS log entry for revision 2.45
2847 * says:
2848 *
2849 * Change the Fini function to only remove otherwise unreferenced
2850 * strings from the interned table. There are references in
2851 * hard-to-find static variables all over the interpreter, and it's not
2852 * worth trying to get rid of all those; but "uninterning" isn't fair
2853 * either and may cause subtle failures later -- so we have to keep them
2854 * in the interned table.
2855 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00002856static PyObject *interned;
2857
2858void
Fred Drakeba096332000-07-09 07:04:36 +00002859PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002860{
2861 register PyStringObject *s = (PyStringObject *)(*p);
2862 PyObject *t;
2863 if (s == NULL || !PyString_Check(s))
2864 Py_FatalError("PyString_InternInPlace: strings only please!");
2865 if ((t = s->ob_sinterned) != NULL) {
2866 if (t == (PyObject *)s)
2867 return;
2868 Py_INCREF(t);
2869 *p = t;
2870 Py_DECREF(s);
2871 return;
2872 }
2873 if (interned == NULL) {
2874 interned = PyDict_New();
2875 if (interned == NULL)
2876 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002877 }
2878 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2879 Py_INCREF(t);
2880 *p = s->ob_sinterned = t;
2881 Py_DECREF(s);
2882 return;
2883 }
2884 t = (PyObject *)s;
2885 if (PyDict_SetItem(interned, t, t) == 0) {
2886 s->ob_sinterned = t;
2887 return;
2888 }
2889 PyErr_Clear();
2890}
2891
2892
2893PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002894PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002895{
2896 PyObject *s = PyString_FromString(cp);
2897 if (s == NULL)
2898 return NULL;
2899 PyString_InternInPlace(&s);
2900 return s;
2901}
2902
2903#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002904
2905void
Fred Drakeba096332000-07-09 07:04:36 +00002906PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00002907{
2908 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002909 for (i = 0; i < UCHAR_MAX + 1; i++) {
2910 Py_XDECREF(characters[i]);
2911 characters[i] = NULL;
2912 }
2913#ifndef DONT_SHARE_SHORT_STRINGS
2914 Py_XDECREF(nullstring);
2915 nullstring = NULL;
2916#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002917#ifdef INTERN_STRINGS
2918 if (interned) {
2919 int pos, changed;
2920 PyObject *key, *value;
2921 do {
2922 changed = 0;
2923 pos = 0;
2924 while (PyDict_Next(interned, &pos, &key, &value)) {
2925 if (key->ob_refcnt == 2 && key == value) {
2926 PyDict_DelItem(interned, key);
2927 changed = 1;
2928 }
2929 }
2930 } while (changed);
2931 }
2932#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002933}