blob: 80b9defaae6ab06fe874c9d41a0a2046bf565638 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* String object implementation */
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000014
Guido van Rossum71160aa1997-06-03 18:03:18 +000015#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000016#include <ctype.h>
17
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000018#ifdef COUNT_ALLOCS
19int null_strings, one_strings;
20#endif
21
Guido van Rossum03093a21994-09-28 15:51:32 +000022#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000023#include <limits.h>
24#else
25#ifndef UCHAR_MAX
26#define UCHAR_MAX 255
27#endif
28#endif
29
Guido van Rossumc0b618a1997-05-02 03:12:38 +000030static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000031#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000032static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000033#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000034
35/*
36 Newsizedstringobject() and newstringobject() try in certain cases
37 to share string objects. When the size of the string is zero,
38 these routines always return a pointer to the same string object;
39 when the size is one, they return a pointer to an already existing
40 object if the contents of the string is known. For
41 newstringobject() this is always the case, for
42 newsizedstringobject() this is the case when the first argument in
43 not NULL.
44 A common practice to allocate a string and then fill it in or
45 change it must be done carefully. It is only allowed to change the
46 contents of the string if the obect was gotten from
47 newsizedstringobject() with a NULL first argument, because in the
48 future these routines may try to do even more sharing of objects.
49*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000050PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000051PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000052{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000054#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055 if (size == 0 && (op = nullstring) != NULL) {
56#ifdef COUNT_ALLOCS
57 null_strings++;
58#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000059 Py_INCREF(op);
60 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 if (size == 1 && str != NULL &&
63 (op = characters[*str & UCHAR_MAX]) != NULL)
64 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef COUNT_ALLOCS
66 one_strings++;
67#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 Py_INCREF(op);
69 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000071#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
73 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000074 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000075 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079#ifdef CACHE_HASH
80 op->ob_shash = -1;
81#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000082#ifdef INTERN_STRINGS
83 op->ob_sinterned = NULL;
84#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000085 if (str != NULL)
86 memcpy(op->ob_sval, str, size);
87 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 if (size == 0) {
90 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 } else if (size == 1 && str != NULL) {
93 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000103 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000105 if (size > INT_MAX) {
106 PyErr_SetString(PyExc_OverflowError,
107 "string is too long for a Python string");
108 return NULL;
109 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000110#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000111 if (size == 0 && (op = nullstring) != NULL) {
112#ifdef COUNT_ALLOCS
113 null_strings++;
114#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000115 Py_INCREF(op);
116 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 }
118 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
119#ifdef COUNT_ALLOCS
120 one_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000125#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126
127 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133#ifdef CACHE_HASH
134 op->ob_shash = -1;
135#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000136#ifdef INTERN_STRINGS
137 op->ob_sinterned = NULL;
138#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000139 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000140#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
145 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000148#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150}
151
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000152PyObject *PyString_Decode(const char *s,
153 int size,
154 const char *encoding,
155 const char *errors)
156{
157 PyObject *buffer = NULL, *str;
158
159 if (encoding == NULL)
160 encoding = PyUnicode_GetDefaultEncoding();
161
162 /* Decode via the codec registry */
163 buffer = PyBuffer_FromMemory((void *)s, size);
164 if (buffer == NULL)
165 goto onError;
166 str = PyCodec_Decode(buffer, encoding, errors);
167 if (str == NULL)
168 goto onError;
169 /* Convert Unicode to a string using the default encoding */
170 if (PyUnicode_Check(str)) {
171 PyObject *temp = str;
172 str = PyUnicode_AsEncodedString(str, NULL, NULL);
173 Py_DECREF(temp);
174 if (str == NULL)
175 goto onError;
176 }
177 if (!PyString_Check(str)) {
178 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000179 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000180 str->ob_type->tp_name);
181 Py_DECREF(str);
182 goto onError;
183 }
184 Py_DECREF(buffer);
185 return str;
186
187 onError:
188 Py_XDECREF(buffer);
189 return NULL;
190}
191
192PyObject *PyString_Encode(const char *s,
193 int size,
194 const char *encoding,
195 const char *errors)
196{
197 PyObject *v, *str;
198
199 str = PyString_FromStringAndSize(s, size);
200 if (str == NULL)
201 return NULL;
202 v = PyString_AsEncodedString(str, encoding, errors);
203 Py_DECREF(str);
204 return v;
205}
206
207PyObject *PyString_AsEncodedString(PyObject *str,
208 const char *encoding,
209 const char *errors)
210{
211 PyObject *v;
212
213 if (!PyString_Check(str)) {
214 PyErr_BadArgument();
215 goto onError;
216 }
217
218 if (encoding == NULL)
219 encoding = PyUnicode_GetDefaultEncoding();
220
221 /* Encode via the codec registry */
222 v = PyCodec_Encode(str, encoding, errors);
223 if (v == NULL)
224 goto onError;
225 /* Convert Unicode to a string using the default encoding */
226 if (PyUnicode_Check(v)) {
227 PyObject *temp = v;
228 v = PyUnicode_AsEncodedString(v, NULL, NULL);
229 Py_DECREF(temp);
230 if (v == NULL)
231 goto onError;
232 }
233 if (!PyString_Check(v)) {
234 PyErr_Format(PyExc_TypeError,
235 "encoder did not return a string object (type=%.400s)",
236 v->ob_type->tp_name);
237 Py_DECREF(v);
238 goto onError;
239 }
240 return v;
241
242 onError:
243 return NULL;
244}
245
Guido van Rossum234f9421993-06-17 12:35:49 +0000246static void
Fred Drakeba096332000-07-09 07:04:36 +0000247string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000248{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000249 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000250}
251
Guido van Rossumd7047b31995-01-02 19:07:15 +0000252int
Fred Drakeba096332000-07-09 07:04:36 +0000253PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000254{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000255 if (!PyString_Check(op)) {
256 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000257 return -1;
258 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000259 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260}
261
262/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000263PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000265 if (!PyString_Check(op)) {
266 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267 return NULL;
268 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000269 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000270}
271
272/* Methods */
273
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000274static int
Fred Drakeba096332000-07-09 07:04:36 +0000275string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276{
277 int i;
278 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000279 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000280 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000281 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000283 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000284 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000285
Thomas Wouters7e474022000-07-16 12:04:32 +0000286 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000287 quote = '\'';
288 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
289 quote = '"';
290
291 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000292 for (i = 0; i < op->ob_size; i++) {
293 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000294 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000295 fprintf(fp, "\\%c", c);
296 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000297 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000298 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000299 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000301 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000302 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000303}
304
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000305static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000306string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000307{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000308 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
309 PyObject *v;
310 if (newsize > INT_MAX) {
311 PyErr_SetString(PyExc_OverflowError,
312 "string is too large to make repr");
313 }
314 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000315 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000316 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000317 }
318 else {
319 register int i;
320 register char c;
321 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000322 int quote;
323
Thomas Wouters7e474022000-07-16 12:04:32 +0000324 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000325 quote = '\'';
326 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
327 quote = '"';
328
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000329 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000330 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000331 for (i = 0; i < op->ob_size; i++) {
332 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000333 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 *p++ = '\\', *p++ = c;
335 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000336 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 while (*p != '\0')
338 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000339 }
340 else
341 *p++ = c;
342 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000343 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000344 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000345 _PyString_Resize(
346 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000347 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000348 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349}
350
351static int
Fred Drakeba096332000-07-09 07:04:36 +0000352string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000353{
354 return a->ob_size;
355}
356
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000357static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000358string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000359{
360 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000361 register PyStringObject *op;
362 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000363 if (PyUnicode_Check(bb))
364 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000365 PyErr_Format(PyExc_TypeError,
366 "cannot add type \"%.200s\" to string",
367 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000368 return NULL;
369 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000370#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000371 /* Optimize cases with empty left or right operand */
372 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000373 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000374 return bb;
375 }
376 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000377 Py_INCREF(a);
378 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000379 }
380 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000381 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000382 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000383 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000384 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000385 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000386 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000387#ifdef CACHE_HASH
388 op->ob_shash = -1;
389#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000390#ifdef INTERN_STRINGS
391 op->ob_sinterned = NULL;
392#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000393 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
394 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
395 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000396 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000397#undef b
398}
399
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000400static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000401string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402{
403 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000404 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000405 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000406 if (n < 0)
407 n = 0;
408 size = a->ob_size * n;
409 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000410 Py_INCREF(a);
411 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000413 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000414 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000415 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000416 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000417 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000418 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000419#ifdef CACHE_HASH
420 op->ob_shash = -1;
421#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000422#ifdef INTERN_STRINGS
423 op->ob_sinterned = NULL;
424#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000425 for (i = 0; i < size; i += a->ob_size)
426 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
427 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000428 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000429}
430
431/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
432
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000434string_slice(register PyStringObject *a, register int i, register int j)
435 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000436{
437 if (i < 0)
438 i = 0;
439 if (j < 0)
440 j = 0; /* Avoid signed/unsigned bug in next line */
441 if (j > a->ob_size)
442 j = a->ob_size;
443 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000444 Py_INCREF(a);
445 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000446 }
447 if (j < i)
448 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000449 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450}
451
Guido van Rossum9284a572000-03-07 15:53:43 +0000452static int
Fred Drakeba096332000-07-09 07:04:36 +0000453string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000454{
455 register char *s, *end;
456 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000457 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000458 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000459 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000460 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000461 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000462 return -1;
463 }
464 c = PyString_AsString(el)[0];
465 s = PyString_AsString(a);
466 end = s + PyString_Size(a);
467 while (s < end) {
468 if (c == *s++)
469 return 1;
470 }
471 return 0;
472}
473
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000474static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000475string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000476{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000477 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000478 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000479 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000480 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481 return NULL;
482 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000483 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000484 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000485#ifdef COUNT_ALLOCS
486 if (v != NULL)
487 one_strings++;
488#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000489 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000490 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000491 if (v == NULL)
492 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000493 characters[c] = (PyStringObject *) v;
494 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000495 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000496 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000497 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000498}
499
500static int
Fred Drakeba096332000-07-09 07:04:36 +0000501string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000502{
Guido van Rossum253919f1991-02-13 23:18:39 +0000503 int len_a = a->ob_size, len_b = b->ob_size;
504 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000505 int cmp;
506 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000507 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000508 if (cmp == 0)
509 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
510 if (cmp != 0)
511 return cmp;
512 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000513 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514}
515
Guido van Rossum9bfef441993-03-29 10:43:31 +0000516static long
Fred Drakeba096332000-07-09 07:04:36 +0000517string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000518{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000519 register int len;
520 register unsigned char *p;
521 register long x;
522
523#ifdef CACHE_HASH
524 if (a->ob_shash != -1)
525 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000526#ifdef INTERN_STRINGS
527 if (a->ob_sinterned != NULL)
528 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000529 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000530#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000531#endif
532 len = a->ob_size;
533 p = (unsigned char *) a->ob_sval;
534 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000535 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000536 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000537 x ^= a->ob_size;
538 if (x == -1)
539 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000540#ifdef CACHE_HASH
541 a->ob_shash = x;
542#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000543 return x;
544}
545
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000546static int
Fred Drakeba096332000-07-09 07:04:36 +0000547string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000548{
549 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000550 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000551 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000552 return -1;
553 }
554 *ptr = (void *)self->ob_sval;
555 return self->ob_size;
556}
557
558static int
Fred Drakeba096332000-07-09 07:04:36 +0000559string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000560{
Guido van Rossum045e6881997-09-08 18:30:11 +0000561 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000562 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000563 return -1;
564}
565
566static int
Fred Drakeba096332000-07-09 07:04:36 +0000567string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000568{
569 if ( lenp )
570 *lenp = self->ob_size;
571 return 1;
572}
573
Guido van Rossum1db70701998-10-08 02:18:52 +0000574static int
Fred Drakeba096332000-07-09 07:04:36 +0000575string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000576{
577 if ( index != 0 ) {
578 PyErr_SetString(PyExc_SystemError,
579 "accessing non-existent string segment");
580 return -1;
581 }
582 *ptr = self->ob_sval;
583 return self->ob_size;
584}
585
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000587 (inquiry)string_length, /*sq_length*/
588 (binaryfunc)string_concat, /*sq_concat*/
589 (intargfunc)string_repeat, /*sq_repeat*/
590 (intargfunc)string_item, /*sq_item*/
591 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000592 0, /*sq_ass_item*/
593 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000594 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000595};
596
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000597static PyBufferProcs string_as_buffer = {
598 (getreadbufferproc)string_buffer_getreadbuf,
599 (getwritebufferproc)string_buffer_getwritebuf,
600 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000601 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000602};
603
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000604
605
606#define LEFTSTRIP 0
607#define RIGHTSTRIP 1
608#define BOTHSTRIP 2
609
610
611static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000612split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000613{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000614 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000615 PyObject* item;
616 PyObject *list = PyList_New(0);
617
618 if (list == NULL)
619 return NULL;
620
Guido van Rossum4c08d552000-03-10 22:55:18 +0000621 for (i = j = 0; i < len; ) {
622 while (i < len && isspace(Py_CHARMASK(s[i])))
623 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000624 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000625 while (i < len && !isspace(Py_CHARMASK(s[i])))
626 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000627 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000628 if (maxsplit-- <= 0)
629 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000630 item = PyString_FromStringAndSize(s+j, (int)(i-j));
631 if (item == NULL)
632 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000633 err = PyList_Append(list, item);
634 Py_DECREF(item);
635 if (err < 0)
636 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000637 while (i < len && isspace(Py_CHARMASK(s[i])))
638 i++;
639 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000640 }
641 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000642 if (j < len) {
643 item = PyString_FromStringAndSize(s+j, (int)(len - j));
644 if (item == NULL)
645 goto finally;
646 err = PyList_Append(list, item);
647 Py_DECREF(item);
648 if (err < 0)
649 goto finally;
650 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000651 return list;
652 finally:
653 Py_DECREF(list);
654 return NULL;
655}
656
657
658static char split__doc__[] =
659"S.split([sep [,maxsplit]]) -> list of strings\n\
660\n\
661Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000662delimiter string. If maxsplit is given, at most maxsplit\n\
663splits are done. If sep is not specified, any whitespace string\n\
664is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000665
666static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000667string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000668{
669 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000670 int maxsplit = -1;
671 const char *s = PyString_AS_STRING(self), *sub;
672 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000673
Guido van Rossum4c08d552000-03-10 22:55:18 +0000674 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000675 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000676 if (maxsplit < 0)
677 maxsplit = INT_MAX;
678 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000679 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000680 if (PyString_Check(subobj)) {
681 sub = PyString_AS_STRING(subobj);
682 n = PyString_GET_SIZE(subobj);
683 }
684 else if (PyUnicode_Check(subobj))
685 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
686 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
687 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000688 if (n == 0) {
689 PyErr_SetString(PyExc_ValueError, "empty separator");
690 return NULL;
691 }
692
693 list = PyList_New(0);
694 if (list == NULL)
695 return NULL;
696
697 i = j = 0;
698 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000699 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000700 if (maxsplit-- <= 0)
701 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000702 item = PyString_FromStringAndSize(s+j, (int)(i-j));
703 if (item == NULL)
704 goto fail;
705 err = PyList_Append(list, item);
706 Py_DECREF(item);
707 if (err < 0)
708 goto fail;
709 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000710 }
711 else
712 i++;
713 }
714 item = PyString_FromStringAndSize(s+j, (int)(len-j));
715 if (item == NULL)
716 goto fail;
717 err = PyList_Append(list, item);
718 Py_DECREF(item);
719 if (err < 0)
720 goto fail;
721
722 return list;
723
724 fail:
725 Py_DECREF(list);
726 return NULL;
727}
728
729
730static char join__doc__[] =
731"S.join(sequence) -> string\n\
732\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000733Return a string which is the concatenation of the strings in the\n\
734sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000735
736static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000737string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000738{
739 char *sep = PyString_AS_STRING(self);
740 int seplen = PyString_GET_SIZE(self);
741 PyObject *res = NULL;
742 int reslen = 0;
743 char *p;
744 int seqlen = 0;
745 int sz = 100;
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000746 int i, slen, sz_incr;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000747 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000748
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000749 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000750 return NULL;
751
Barry Warsaw771d0672000-07-11 04:58:12 +0000752 if (!(seq = PySequence_Fast(orig, ""))) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000753 if (PyErr_ExceptionMatches(PyExc_TypeError))
754 PyErr_Format(PyExc_TypeError,
755 "sequence expected, %.80s found",
756 orig->ob_type->tp_name);
757 return NULL;
758 }
Barry Warsaw771d0672000-07-11 04:58:12 +0000759 /* From here on out, errors go through finally: for proper
760 * reference count manipulations.
761 */
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000762 seqlen = PySequence_Size(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000763 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000764 item = PySequence_Fast_GET_ITEM(seq, 0);
765 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000766 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000767 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000768 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000769
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000770 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
Barry Warsaw771d0672000-07-11 04:58:12 +0000771 goto finally;
772
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000773 p = PyString_AS_STRING(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000774
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000775 for (i = 0; i < seqlen; i++) {
776 item = PySequence_Fast_GET_ITEM(seq, i);
777 if (!PyString_Check(item)){
778 if (PyUnicode_Check(item)) {
779 Py_DECREF(res);
Barry Warsaw771d0672000-07-11 04:58:12 +0000780 Py_DECREF(seq);
781 return PyUnicode_Join((PyObject *)self, seq);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000782 }
783 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000784 "sequence item %i: expected string,"
785 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000786 i, item->ob_type->tp_name);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000787 goto finally;
788 }
789 slen = PyString_GET_SIZE(item);
790 while (reslen + slen + seplen >= sz) {
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000791 /* at least double the size of the string */
792 sz_incr = slen + seplen > sz ? slen + seplen : sz;
793 if (_PyString_Resize(&res, sz + sz_incr)) {
Barry Warsawbf325832000-03-06 14:52:18 +0000794 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000795 }
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000796 sz += sz_incr;
797 p = PyString_AS_STRING(res) + reslen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000798 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000799 if (i > 0) {
800 memcpy(p, sep, seplen);
801 p += seplen;
802 reslen += seplen;
803 }
804 memcpy(p, PyString_AS_STRING(item), slen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000805 p += slen;
806 reslen += slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000807 }
808 if (_PyString_Resize(&res, reslen))
809 goto finally;
Jeremy Hylton49048292000-07-11 03:28:17 +0000810 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000811 return res;
812
813 finally:
Jeremy Hylton49048292000-07-11 03:28:17 +0000814 Py_DECREF(seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000815 Py_XDECREF(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000816 return NULL;
817}
818
819
820
821static long
Fred Drakeba096332000-07-09 07:04:36 +0000822string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000823{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000824 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000825 int len = PyString_GET_SIZE(self);
826 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000827 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000828
Guido van Rossumc6821402000-05-08 14:08:05 +0000829 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
830 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000831 return -2;
832 if (PyString_Check(subobj)) {
833 sub = PyString_AS_STRING(subobj);
834 n = PyString_GET_SIZE(subobj);
835 }
836 else if (PyUnicode_Check(subobj))
837 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
838 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000839 return -2;
840
841 if (last > len)
842 last = len;
843 if (last < 0)
844 last += len;
845 if (last < 0)
846 last = 0;
847 if (i < 0)
848 i += len;
849 if (i < 0)
850 i = 0;
851
Guido van Rossum4c08d552000-03-10 22:55:18 +0000852 if (dir > 0) {
853 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000854 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000855 last -= n;
856 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000857 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000858 return (long)i;
859 }
860 else {
861 int j;
862
863 if (n == 0 && i <= last)
864 return (long)last;
865 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000866 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000867 return (long)j;
868 }
869
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000870 return -1;
871}
872
873
874static char find__doc__[] =
875"S.find(sub [,start [,end]]) -> int\n\
876\n\
877Return the lowest index in S where substring sub is found,\n\
878such that sub is contained within s[start,end]. Optional\n\
879arguments start and end are interpreted as in slice notation.\n\
880\n\
881Return -1 on failure.";
882
883static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000884string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000885{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000886 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000887 if (result == -2)
888 return NULL;
889 return PyInt_FromLong(result);
890}
891
892
893static char index__doc__[] =
894"S.index(sub [,start [,end]]) -> int\n\
895\n\
896Like S.find() but raise ValueError when the substring is not found.";
897
898static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000899string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000900{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000901 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000902 if (result == -2)
903 return NULL;
904 if (result == -1) {
905 PyErr_SetString(PyExc_ValueError,
906 "substring not found in string.index");
907 return NULL;
908 }
909 return PyInt_FromLong(result);
910}
911
912
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000913static char rfind__doc__[] =
914"S.rfind(sub [,start [,end]]) -> int\n\
915\n\
916Return the highest index in S where substring sub is found,\n\
917such that sub is contained within s[start,end]. Optional\n\
918arguments start and end are interpreted as in slice notation.\n\
919\n\
920Return -1 on failure.";
921
922static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000923string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000924{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000925 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000926 if (result == -2)
927 return NULL;
928 return PyInt_FromLong(result);
929}
930
931
932static char rindex__doc__[] =
933"S.rindex(sub [,start [,end]]) -> int\n\
934\n\
935Like S.rfind() but raise ValueError when the substring is not found.";
936
937static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000938string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000939{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000940 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000941 if (result == -2)
942 return NULL;
943 if (result == -1) {
944 PyErr_SetString(PyExc_ValueError,
945 "substring not found in string.rindex");
946 return NULL;
947 }
948 return PyInt_FromLong(result);
949}
950
951
952static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000953do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000954{
955 char *s = PyString_AS_STRING(self);
956 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000957
Guido van Rossum43713e52000-02-29 13:59:29 +0000958 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000959 return NULL;
960
961 i = 0;
962 if (striptype != RIGHTSTRIP) {
963 while (i < len && isspace(Py_CHARMASK(s[i]))) {
964 i++;
965 }
966 }
967
968 j = len;
969 if (striptype != LEFTSTRIP) {
970 do {
971 j--;
972 } while (j >= i && isspace(Py_CHARMASK(s[j])));
973 j++;
974 }
975
976 if (i == 0 && j == len) {
977 Py_INCREF(self);
978 return (PyObject*)self;
979 }
980 else
981 return PyString_FromStringAndSize(s+i, j-i);
982}
983
984
985static char strip__doc__[] =
986"S.strip() -> string\n\
987\n\
988Return a copy of the string S with leading and trailing\n\
989whitespace removed.";
990
991static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000992string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000993{
994 return do_strip(self, args, BOTHSTRIP);
995}
996
997
998static char lstrip__doc__[] =
999"S.lstrip() -> string\n\
1000\n\
1001Return a copy of the string S with leading whitespace removed.";
1002
1003static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001004string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001005{
1006 return do_strip(self, args, LEFTSTRIP);
1007}
1008
1009
1010static char rstrip__doc__[] =
1011"S.rstrip() -> string\n\
1012\n\
1013Return a copy of the string S with trailing whitespace removed.";
1014
1015static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001016string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001017{
1018 return do_strip(self, args, RIGHTSTRIP);
1019}
1020
1021
1022static char lower__doc__[] =
1023"S.lower() -> string\n\
1024\n\
1025Return a copy of the string S converted to lowercase.";
1026
1027static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001028string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001029{
1030 char *s = PyString_AS_STRING(self), *s_new;
1031 int i, n = PyString_GET_SIZE(self);
1032 PyObject *new;
1033
Guido van Rossum43713e52000-02-29 13:59:29 +00001034 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001035 return NULL;
1036 new = PyString_FromStringAndSize(NULL, n);
1037 if (new == NULL)
1038 return NULL;
1039 s_new = PyString_AsString(new);
1040 for (i = 0; i < n; i++) {
1041 int c = Py_CHARMASK(*s++);
1042 if (isupper(c)) {
1043 *s_new = tolower(c);
1044 } else
1045 *s_new = c;
1046 s_new++;
1047 }
1048 return new;
1049}
1050
1051
1052static char upper__doc__[] =
1053"S.upper() -> string\n\
1054\n\
1055Return a copy of the string S converted to uppercase.";
1056
1057static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001058string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001059{
1060 char *s = PyString_AS_STRING(self), *s_new;
1061 int i, n = PyString_GET_SIZE(self);
1062 PyObject *new;
1063
Guido van Rossum43713e52000-02-29 13:59:29 +00001064 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001065 return NULL;
1066 new = PyString_FromStringAndSize(NULL, n);
1067 if (new == NULL)
1068 return NULL;
1069 s_new = PyString_AsString(new);
1070 for (i = 0; i < n; i++) {
1071 int c = Py_CHARMASK(*s++);
1072 if (islower(c)) {
1073 *s_new = toupper(c);
1074 } else
1075 *s_new = c;
1076 s_new++;
1077 }
1078 return new;
1079}
1080
1081
Guido van Rossum4c08d552000-03-10 22:55:18 +00001082static char title__doc__[] =
1083"S.title() -> string\n\
1084\n\
1085Return a titlecased version of S, i.e. words start with uppercase\n\
1086characters, all remaining cased characters have lowercase.";
1087
1088static PyObject*
1089string_title(PyUnicodeObject *self, PyObject *args)
1090{
1091 char *s = PyString_AS_STRING(self), *s_new;
1092 int i, n = PyString_GET_SIZE(self);
1093 int previous_is_cased = 0;
1094 PyObject *new;
1095
1096 if (!PyArg_ParseTuple(args, ":title"))
1097 return NULL;
1098 new = PyString_FromStringAndSize(NULL, n);
1099 if (new == NULL)
1100 return NULL;
1101 s_new = PyString_AsString(new);
1102 for (i = 0; i < n; i++) {
1103 int c = Py_CHARMASK(*s++);
1104 if (islower(c)) {
1105 if (!previous_is_cased)
1106 c = toupper(c);
1107 previous_is_cased = 1;
1108 } else if (isupper(c)) {
1109 if (previous_is_cased)
1110 c = tolower(c);
1111 previous_is_cased = 1;
1112 } else
1113 previous_is_cased = 0;
1114 *s_new++ = c;
1115 }
1116 return new;
1117}
1118
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001119static char capitalize__doc__[] =
1120"S.capitalize() -> string\n\
1121\n\
1122Return a copy of the string S with only its first character\n\
1123capitalized.";
1124
1125static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001126string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001127{
1128 char *s = PyString_AS_STRING(self), *s_new;
1129 int i, n = PyString_GET_SIZE(self);
1130 PyObject *new;
1131
Guido van Rossum43713e52000-02-29 13:59:29 +00001132 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001133 return NULL;
1134 new = PyString_FromStringAndSize(NULL, n);
1135 if (new == NULL)
1136 return NULL;
1137 s_new = PyString_AsString(new);
1138 if (0 < n) {
1139 int c = Py_CHARMASK(*s++);
1140 if (islower(c))
1141 *s_new = toupper(c);
1142 else
1143 *s_new = c;
1144 s_new++;
1145 }
1146 for (i = 1; i < n; i++) {
1147 int c = Py_CHARMASK(*s++);
1148 if (isupper(c))
1149 *s_new = tolower(c);
1150 else
1151 *s_new = c;
1152 s_new++;
1153 }
1154 return new;
1155}
1156
1157
1158static char count__doc__[] =
1159"S.count(sub[, start[, end]]) -> int\n\
1160\n\
1161Return the number of occurrences of substring sub in string\n\
1162S[start:end]. Optional arguments start and end are\n\
1163interpreted as in slice notation.";
1164
1165static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001166string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001167{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001168 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001169 int len = PyString_GET_SIZE(self), n;
1170 int i = 0, last = INT_MAX;
1171 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001172 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001173
Guido van Rossumc6821402000-05-08 14:08:05 +00001174 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1175 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001176 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001177
Guido van Rossum4c08d552000-03-10 22:55:18 +00001178 if (PyString_Check(subobj)) {
1179 sub = PyString_AS_STRING(subobj);
1180 n = PyString_GET_SIZE(subobj);
1181 }
1182 else if (PyUnicode_Check(subobj))
1183 return PyInt_FromLong(
1184 PyUnicode_Count((PyObject *)self, subobj, i, last));
1185 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1186 return NULL;
1187
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001188 if (last > len)
1189 last = len;
1190 if (last < 0)
1191 last += len;
1192 if (last < 0)
1193 last = 0;
1194 if (i < 0)
1195 i += len;
1196 if (i < 0)
1197 i = 0;
1198 m = last + 1 - n;
1199 if (n == 0)
1200 return PyInt_FromLong((long) (m-i));
1201
1202 r = 0;
1203 while (i < m) {
1204 if (!memcmp(s+i, sub, n)) {
1205 r++;
1206 i += n;
1207 } else {
1208 i++;
1209 }
1210 }
1211 return PyInt_FromLong((long) r);
1212}
1213
1214
1215static char swapcase__doc__[] =
1216"S.swapcase() -> string\n\
1217\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001218Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001219converted to lowercase and vice versa.";
1220
1221static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001222string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001223{
1224 char *s = PyString_AS_STRING(self), *s_new;
1225 int i, n = PyString_GET_SIZE(self);
1226 PyObject *new;
1227
Guido van Rossum43713e52000-02-29 13:59:29 +00001228 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001229 return NULL;
1230 new = PyString_FromStringAndSize(NULL, n);
1231 if (new == NULL)
1232 return NULL;
1233 s_new = PyString_AsString(new);
1234 for (i = 0; i < n; i++) {
1235 int c = Py_CHARMASK(*s++);
1236 if (islower(c)) {
1237 *s_new = toupper(c);
1238 }
1239 else if (isupper(c)) {
1240 *s_new = tolower(c);
1241 }
1242 else
1243 *s_new = c;
1244 s_new++;
1245 }
1246 return new;
1247}
1248
1249
1250static char translate__doc__[] =
1251"S.translate(table [,deletechars]) -> string\n\
1252\n\
1253Return a copy of the string S, where all characters occurring\n\
1254in the optional argument deletechars are removed, and the\n\
1255remaining characters have been mapped through the given\n\
1256translation table, which must be a string of length 256.";
1257
1258static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001259string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001260{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001261 register char *input, *output;
1262 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001263 register int i, c, changed = 0;
1264 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001265 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001266 int inlen, tablen, dellen = 0;
1267 PyObject *result;
1268 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001269 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001270
Guido van Rossum4c08d552000-03-10 22:55:18 +00001271 if (!PyArg_ParseTuple(args, "O|O:translate",
1272 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001273 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001274
1275 if (PyString_Check(tableobj)) {
1276 table1 = PyString_AS_STRING(tableobj);
1277 tablen = PyString_GET_SIZE(tableobj);
1278 }
1279 else if (PyUnicode_Check(tableobj)) {
1280 /* Unicode .translate() does not support the deletechars
1281 parameter; instead a mapping to None will cause characters
1282 to be deleted. */
1283 if (delobj != NULL) {
1284 PyErr_SetString(PyExc_TypeError,
1285 "deletions are implemented differently for unicode");
1286 return NULL;
1287 }
1288 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1289 }
1290 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001291 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001292
1293 if (delobj != NULL) {
1294 if (PyString_Check(delobj)) {
1295 del_table = PyString_AS_STRING(delobj);
1296 dellen = PyString_GET_SIZE(delobj);
1297 }
1298 else if (PyUnicode_Check(delobj)) {
1299 PyErr_SetString(PyExc_TypeError,
1300 "deletions are implemented differently for unicode");
1301 return NULL;
1302 }
1303 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1304 return NULL;
1305
1306 if (tablen != 256) {
1307 PyErr_SetString(PyExc_ValueError,
1308 "translation table must be 256 characters long");
1309 return NULL;
1310 }
1311 }
1312 else {
1313 del_table = NULL;
1314 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001315 }
1316
1317 table = table1;
1318 inlen = PyString_Size(input_obj);
1319 result = PyString_FromStringAndSize((char *)NULL, inlen);
1320 if (result == NULL)
1321 return NULL;
1322 output_start = output = PyString_AsString(result);
1323 input = PyString_AsString(input_obj);
1324
1325 if (dellen == 0) {
1326 /* If no deletions are required, use faster code */
1327 for (i = inlen; --i >= 0; ) {
1328 c = Py_CHARMASK(*input++);
1329 if (Py_CHARMASK((*output++ = table[c])) != c)
1330 changed = 1;
1331 }
1332 if (changed)
1333 return result;
1334 Py_DECREF(result);
1335 Py_INCREF(input_obj);
1336 return input_obj;
1337 }
1338
1339 for (i = 0; i < 256; i++)
1340 trans_table[i] = Py_CHARMASK(table[i]);
1341
1342 for (i = 0; i < dellen; i++)
1343 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1344
1345 for (i = inlen; --i >= 0; ) {
1346 c = Py_CHARMASK(*input++);
1347 if (trans_table[c] != -1)
1348 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1349 continue;
1350 changed = 1;
1351 }
1352 if (!changed) {
1353 Py_DECREF(result);
1354 Py_INCREF(input_obj);
1355 return input_obj;
1356 }
1357 /* Fix the size of the resulting string */
1358 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1359 return NULL;
1360 return result;
1361}
1362
1363
1364/* What follows is used for implementing replace(). Perry Stoll. */
1365
1366/*
1367 mymemfind
1368
1369 strstr replacement for arbitrary blocks of memory.
1370
Barry Warsaw51ac5802000-03-20 16:36:48 +00001371 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001372 contents of memory pointed to by PAT. Returns the index into MEM if
1373 found, or -1 if not found. If len of PAT is greater than length of
1374 MEM, the function returns -1.
1375*/
1376static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001377mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378{
1379 register int ii;
1380
1381 /* pattern can not occur in the last pat_len-1 chars */
1382 len -= pat_len;
1383
1384 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001385 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001386 return ii;
1387 }
1388 }
1389 return -1;
1390}
1391
1392/*
1393 mymemcnt
1394
1395 Return the number of distinct times PAT is found in MEM.
1396 meaning mem=1111 and pat==11 returns 2.
1397 mem=11111 and pat==11 also return 2.
1398 */
1399static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001400mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401{
1402 register int offset = 0;
1403 int nfound = 0;
1404
1405 while (len >= 0) {
1406 offset = mymemfind(mem, len, pat, pat_len);
1407 if (offset == -1)
1408 break;
1409 mem += offset + pat_len;
1410 len -= offset + pat_len;
1411 nfound++;
1412 }
1413 return nfound;
1414}
1415
1416/*
1417 mymemreplace
1418
Thomas Wouters7e474022000-07-16 12:04:32 +00001419 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001420 replaced with SUB.
1421
Thomas Wouters7e474022000-07-16 12:04:32 +00001422 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423 of PAT in STR, then the original string is returned. Otherwise, a new
1424 string is allocated here and returned.
1425
1426 on return, out_len is:
1427 the length of output string, or
1428 -1 if the input string is returned, or
1429 unchanged if an error occurs (no memory).
1430
1431 return value is:
1432 the new string allocated locally, or
1433 NULL if an error occurred.
1434*/
1435static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001436mymemreplace(const char *str, int len, /* input string */
1437 const char *pat, int pat_len, /* pattern string to find */
1438 const char *sub, int sub_len, /* substitution string */
1439 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001440 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441{
1442 char *out_s;
1443 char *new_s;
1444 int nfound, offset, new_len;
1445
1446 if (len == 0 || pat_len > len)
1447 goto return_same;
1448
1449 /* find length of output string */
1450 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001451 if (count < 0)
1452 count = INT_MAX;
1453 else if (nfound > count)
1454 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455 if (nfound == 0)
1456 goto return_same;
1457 new_len = len + nfound*(sub_len - pat_len);
1458
Guido van Rossumb18618d2000-05-03 23:44:39 +00001459 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460 if (new_s == NULL) return NULL;
1461
1462 *out_len = new_len;
1463 out_s = new_s;
1464
1465 while (len > 0) {
1466 /* find index of next instance of pattern */
1467 offset = mymemfind(str, len, pat, pat_len);
1468 /* if not found, break out of loop */
1469 if (offset == -1) break;
1470
1471 /* copy non matching part of input string */
1472 memcpy(new_s, str, offset); /* copy part of str before pat */
1473 str += offset + pat_len; /* move str past pattern */
1474 len -= offset + pat_len; /* reduce length of str remaining */
1475
1476 /* copy substitute into the output string */
1477 new_s += offset; /* move new_s to dest for sub string */
1478 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1479 new_s += sub_len; /* offset new_s past sub string */
1480
1481 /* break when we've done count replacements */
1482 if (--count == 0) break;
1483 }
1484 /* copy any remaining values into output string */
1485 if (len > 0)
1486 memcpy(new_s, str, len);
1487 return out_s;
1488
1489 return_same:
1490 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001491 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492}
1493
1494
1495static char replace__doc__[] =
1496"S.replace (old, new[, maxsplit]) -> string\n\
1497\n\
1498Return a copy of string S with all occurrences of substring\n\
1499old replaced by new. If the optional argument maxsplit is\n\
1500given, only the first maxsplit occurrences are replaced.";
1501
1502static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001503string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001505 const char *str = PyString_AS_STRING(self), *sub, *repl;
1506 char *new_s;
1507 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1508 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001509 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001510 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511
Guido van Rossum4c08d552000-03-10 22:55:18 +00001512 if (!PyArg_ParseTuple(args, "OO|i:replace",
1513 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001514 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001515
1516 if (PyString_Check(subobj)) {
1517 sub = PyString_AS_STRING(subobj);
1518 sub_len = PyString_GET_SIZE(subobj);
1519 }
1520 else if (PyUnicode_Check(subobj))
1521 return PyUnicode_Replace((PyObject *)self,
1522 subobj, replobj, count);
1523 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1524 return NULL;
1525
1526 if (PyString_Check(replobj)) {
1527 repl = PyString_AS_STRING(replobj);
1528 repl_len = PyString_GET_SIZE(replobj);
1529 }
1530 else if (PyUnicode_Check(replobj))
1531 return PyUnicode_Replace((PyObject *)self,
1532 subobj, replobj, count);
1533 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1534 return NULL;
1535
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001536 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001537 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001538 return NULL;
1539 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001540 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001541 if (new_s == NULL) {
1542 PyErr_NoMemory();
1543 return NULL;
1544 }
1545 if (out_len == -1) {
1546 /* we're returning another reference to self */
1547 new = (PyObject*)self;
1548 Py_INCREF(new);
1549 }
1550 else {
1551 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001552 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553 }
1554 return new;
1555}
1556
1557
1558static char startswith__doc__[] =
1559"S.startswith(prefix[, start[, end]]) -> int\n\
1560\n\
1561Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1562optional start, test S beginning at that position. With optional end, stop\n\
1563comparing S at that position.";
1564
1565static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001566string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001567{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001568 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001569 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001570 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001571 int plen;
1572 int start = 0;
1573 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001574 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575
Guido van Rossumc6821402000-05-08 14:08:05 +00001576 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1577 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001578 return NULL;
1579 if (PyString_Check(subobj)) {
1580 prefix = PyString_AS_STRING(subobj);
1581 plen = PyString_GET_SIZE(subobj);
1582 }
1583 else if (PyUnicode_Check(subobj))
1584 return PyInt_FromLong(
1585 PyUnicode_Tailmatch((PyObject *)self,
1586 subobj, start, end, -1));
1587 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001588 return NULL;
1589
1590 /* adopt Java semantics for index out of range. it is legal for
1591 * offset to be == plen, but this only returns true if prefix is
1592 * the empty string.
1593 */
1594 if (start < 0 || start+plen > len)
1595 return PyInt_FromLong(0);
1596
1597 if (!memcmp(str+start, prefix, plen)) {
1598 /* did the match end after the specified end? */
1599 if (end < 0)
1600 return PyInt_FromLong(1);
1601 else if (end - start < plen)
1602 return PyInt_FromLong(0);
1603 else
1604 return PyInt_FromLong(1);
1605 }
1606 else return PyInt_FromLong(0);
1607}
1608
1609
1610static char endswith__doc__[] =
1611"S.endswith(suffix[, start[, end]]) -> int\n\
1612\n\
1613Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1614optional start, test S beginning at that position. With optional end, stop\n\
1615comparing S at that position.";
1616
1617static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001618string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001620 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001622 const char* suffix;
1623 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624 int start = 0;
1625 int end = -1;
1626 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001627 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628
Guido van Rossumc6821402000-05-08 14:08:05 +00001629 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1630 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001631 return NULL;
1632 if (PyString_Check(subobj)) {
1633 suffix = PyString_AS_STRING(subobj);
1634 slen = PyString_GET_SIZE(subobj);
1635 }
1636 else if (PyUnicode_Check(subobj))
1637 return PyInt_FromLong(
1638 PyUnicode_Tailmatch((PyObject *)self,
1639 subobj, start, end, +1));
1640 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641 return NULL;
1642
Guido van Rossum4c08d552000-03-10 22:55:18 +00001643 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001644 return PyInt_FromLong(0);
1645
1646 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001647 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001648
Guido van Rossum4c08d552000-03-10 22:55:18 +00001649 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001650 return PyInt_FromLong(1);
1651 else return PyInt_FromLong(0);
1652}
1653
1654
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001655static char encode__doc__[] =
1656"S.encode([encoding[,errors]]) -> string\n\
1657\n\
1658Return an encoded string version of S. Default encoding is the current\n\
1659default string encoding. errors may be given to set a different error\n\
1660handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1661a ValueError. Other possible values are 'ignore' and 'replace'.";
1662
1663static PyObject *
1664string_encode(PyStringObject *self, PyObject *args)
1665{
1666 char *encoding = NULL;
1667 char *errors = NULL;
1668 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1669 return NULL;
1670 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1671}
1672
1673
Guido van Rossum4c08d552000-03-10 22:55:18 +00001674static char expandtabs__doc__[] =
1675"S.expandtabs([tabsize]) -> string\n\
1676\n\
1677Return a copy of S where all tab characters are expanded using spaces.\n\
1678If tabsize is not given, a tab size of 8 characters is assumed.";
1679
1680static PyObject*
1681string_expandtabs(PyStringObject *self, PyObject *args)
1682{
1683 const char *e, *p;
1684 char *q;
1685 int i, j;
1686 PyObject *u;
1687 int tabsize = 8;
1688
1689 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1690 return NULL;
1691
Thomas Wouters7e474022000-07-16 12:04:32 +00001692 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001693 i = j = 0;
1694 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1695 for (p = PyString_AS_STRING(self); p < e; p++)
1696 if (*p == '\t') {
1697 if (tabsize > 0)
1698 j += tabsize - (j % tabsize);
1699 }
1700 else {
1701 j++;
1702 if (*p == '\n' || *p == '\r') {
1703 i += j;
1704 j = 0;
1705 }
1706 }
1707
1708 /* Second pass: create output string and fill it */
1709 u = PyString_FromStringAndSize(NULL, i + j);
1710 if (!u)
1711 return NULL;
1712
1713 j = 0;
1714 q = PyString_AS_STRING(u);
1715
1716 for (p = PyString_AS_STRING(self); p < e; p++)
1717 if (*p == '\t') {
1718 if (tabsize > 0) {
1719 i = tabsize - (j % tabsize);
1720 j += i;
1721 while (i--)
1722 *q++ = ' ';
1723 }
1724 }
1725 else {
1726 j++;
1727 *q++ = *p;
1728 if (*p == '\n' || *p == '\r')
1729 j = 0;
1730 }
1731
1732 return u;
1733}
1734
1735static
1736PyObject *pad(PyStringObject *self,
1737 int left,
1738 int right,
1739 char fill)
1740{
1741 PyObject *u;
1742
1743 if (left < 0)
1744 left = 0;
1745 if (right < 0)
1746 right = 0;
1747
1748 if (left == 0 && right == 0) {
1749 Py_INCREF(self);
1750 return (PyObject *)self;
1751 }
1752
1753 u = PyString_FromStringAndSize(NULL,
1754 left + PyString_GET_SIZE(self) + right);
1755 if (u) {
1756 if (left)
1757 memset(PyString_AS_STRING(u), fill, left);
1758 memcpy(PyString_AS_STRING(u) + left,
1759 PyString_AS_STRING(self),
1760 PyString_GET_SIZE(self));
1761 if (right)
1762 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1763 fill, right);
1764 }
1765
1766 return u;
1767}
1768
1769static char ljust__doc__[] =
1770"S.ljust(width) -> string\n\
1771\n\
1772Return S left justified in a string of length width. Padding is\n\
1773done using spaces.";
1774
1775static PyObject *
1776string_ljust(PyStringObject *self, PyObject *args)
1777{
1778 int width;
1779 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1780 return NULL;
1781
1782 if (PyString_GET_SIZE(self) >= width) {
1783 Py_INCREF(self);
1784 return (PyObject*) self;
1785 }
1786
1787 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1788}
1789
1790
1791static char rjust__doc__[] =
1792"S.rjust(width) -> string\n\
1793\n\
1794Return S right justified in a string of length width. Padding is\n\
1795done using spaces.";
1796
1797static PyObject *
1798string_rjust(PyStringObject *self, PyObject *args)
1799{
1800 int width;
1801 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1802 return NULL;
1803
1804 if (PyString_GET_SIZE(self) >= width) {
1805 Py_INCREF(self);
1806 return (PyObject*) self;
1807 }
1808
1809 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1810}
1811
1812
1813static char center__doc__[] =
1814"S.center(width) -> string\n\
1815\n\
1816Return S centered in a string of length width. Padding is done\n\
1817using spaces.";
1818
1819static PyObject *
1820string_center(PyStringObject *self, PyObject *args)
1821{
1822 int marg, left;
1823 int width;
1824
1825 if (!PyArg_ParseTuple(args, "i:center", &width))
1826 return NULL;
1827
1828 if (PyString_GET_SIZE(self) >= width) {
1829 Py_INCREF(self);
1830 return (PyObject*) self;
1831 }
1832
1833 marg = width - PyString_GET_SIZE(self);
1834 left = marg / 2 + (marg & width & 1);
1835
1836 return pad(self, left, marg - left, ' ');
1837}
1838
1839#if 0
1840static char zfill__doc__[] =
1841"S.zfill(width) -> string\n\
1842\n\
1843Pad a numeric string x with zeros on the left, to fill a field\n\
1844of the specified width. The string x is never truncated.";
1845
1846static PyObject *
1847string_zfill(PyStringObject *self, PyObject *args)
1848{
1849 int fill;
1850 PyObject *u;
1851 char *str;
1852
1853 int width;
1854 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1855 return NULL;
1856
1857 if (PyString_GET_SIZE(self) >= width) {
1858 Py_INCREF(self);
1859 return (PyObject*) self;
1860 }
1861
1862 fill = width - PyString_GET_SIZE(self);
1863
1864 u = pad(self, fill, 0, '0');
1865 if (u == NULL)
1866 return NULL;
1867
1868 str = PyString_AS_STRING(u);
1869 if (str[fill] == '+' || str[fill] == '-') {
1870 /* move sign to beginning of string */
1871 str[0] = str[fill];
1872 str[fill] = '0';
1873 }
1874
1875 return u;
1876}
1877#endif
1878
1879static char isspace__doc__[] =
1880"S.isspace() -> int\n\
1881\n\
1882Return 1 if there are only whitespace characters in S,\n\
18830 otherwise.";
1884
1885static PyObject*
1886string_isspace(PyStringObject *self, PyObject *args)
1887{
Fred Drakeba096332000-07-09 07:04:36 +00001888 register const unsigned char *p
1889 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001890 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001891
1892 if (!PyArg_NoArgs(args))
1893 return NULL;
1894
1895 /* Shortcut for single character strings */
1896 if (PyString_GET_SIZE(self) == 1 &&
1897 isspace(*p))
1898 return PyInt_FromLong(1);
1899
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001900 /* Special case for empty strings */
1901 if (PyString_GET_SIZE(self) == 0)
1902 return PyInt_FromLong(0);
1903
Guido van Rossum4c08d552000-03-10 22:55:18 +00001904 e = p + PyString_GET_SIZE(self);
1905 for (; p < e; p++) {
1906 if (!isspace(*p))
1907 return PyInt_FromLong(0);
1908 }
1909 return PyInt_FromLong(1);
1910}
1911
1912
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001913static char isalpha__doc__[] =
1914"S.isalpha() -> int\n\
1915\n\
1916Return 1 if all characters in S are alphabetic\n\
1917and there is at least one character in S, 0 otherwise.";
1918
1919static PyObject*
1920string_isalpha(PyUnicodeObject *self, PyObject *args)
1921{
Fred Drakeba096332000-07-09 07:04:36 +00001922 register const unsigned char *p
1923 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001924 register const unsigned char *e;
1925
1926 if (!PyArg_NoArgs(args))
1927 return NULL;
1928
1929 /* Shortcut for single character strings */
1930 if (PyString_GET_SIZE(self) == 1 &&
1931 isalpha(*p))
1932 return PyInt_FromLong(1);
1933
1934 /* Special case for empty strings */
1935 if (PyString_GET_SIZE(self) == 0)
1936 return PyInt_FromLong(0);
1937
1938 e = p + PyString_GET_SIZE(self);
1939 for (; p < e; p++) {
1940 if (!isalpha(*p))
1941 return PyInt_FromLong(0);
1942 }
1943 return PyInt_FromLong(1);
1944}
1945
1946
1947static char isalnum__doc__[] =
1948"S.isalnum() -> int\n\
1949\n\
1950Return 1 if all characters in S are alphanumeric\n\
1951and there is at least one character in S, 0 otherwise.";
1952
1953static PyObject*
1954string_isalnum(PyUnicodeObject *self, PyObject *args)
1955{
Fred Drakeba096332000-07-09 07:04:36 +00001956 register const unsigned char *p
1957 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001958 register const unsigned char *e;
1959
1960 if (!PyArg_NoArgs(args))
1961 return NULL;
1962
1963 /* Shortcut for single character strings */
1964 if (PyString_GET_SIZE(self) == 1 &&
1965 isalnum(*p))
1966 return PyInt_FromLong(1);
1967
1968 /* Special case for empty strings */
1969 if (PyString_GET_SIZE(self) == 0)
1970 return PyInt_FromLong(0);
1971
1972 e = p + PyString_GET_SIZE(self);
1973 for (; p < e; p++) {
1974 if (!isalnum(*p))
1975 return PyInt_FromLong(0);
1976 }
1977 return PyInt_FromLong(1);
1978}
1979
1980
Guido van Rossum4c08d552000-03-10 22:55:18 +00001981static char isdigit__doc__[] =
1982"S.isdigit() -> int\n\
1983\n\
1984Return 1 if there are only digit characters in S,\n\
19850 otherwise.";
1986
1987static PyObject*
1988string_isdigit(PyStringObject *self, PyObject *args)
1989{
Fred Drakeba096332000-07-09 07:04:36 +00001990 register const unsigned char *p
1991 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001992 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001993
1994 if (!PyArg_NoArgs(args))
1995 return NULL;
1996
1997 /* Shortcut for single character strings */
1998 if (PyString_GET_SIZE(self) == 1 &&
1999 isdigit(*p))
2000 return PyInt_FromLong(1);
2001
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002002 /* Special case for empty strings */
2003 if (PyString_GET_SIZE(self) == 0)
2004 return PyInt_FromLong(0);
2005
Guido van Rossum4c08d552000-03-10 22:55:18 +00002006 e = p + PyString_GET_SIZE(self);
2007 for (; p < e; p++) {
2008 if (!isdigit(*p))
2009 return PyInt_FromLong(0);
2010 }
2011 return PyInt_FromLong(1);
2012}
2013
2014
2015static char islower__doc__[] =
2016"S.islower() -> int\n\
2017\n\
2018Return 1 if all cased characters in S are lowercase and there is\n\
2019at least one cased character in S, 0 otherwise.";
2020
2021static PyObject*
2022string_islower(PyStringObject *self, PyObject *args)
2023{
Fred Drakeba096332000-07-09 07:04:36 +00002024 register const unsigned char *p
2025 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002026 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002027 int cased;
2028
2029 if (!PyArg_NoArgs(args))
2030 return NULL;
2031
2032 /* Shortcut for single character strings */
2033 if (PyString_GET_SIZE(self) == 1)
2034 return PyInt_FromLong(islower(*p) != 0);
2035
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002036 /* Special case for empty strings */
2037 if (PyString_GET_SIZE(self) == 0)
2038 return PyInt_FromLong(0);
2039
Guido van Rossum4c08d552000-03-10 22:55:18 +00002040 e = p + PyString_GET_SIZE(self);
2041 cased = 0;
2042 for (; p < e; p++) {
2043 if (isupper(*p))
2044 return PyInt_FromLong(0);
2045 else if (!cased && islower(*p))
2046 cased = 1;
2047 }
2048 return PyInt_FromLong(cased);
2049}
2050
2051
2052static char isupper__doc__[] =
2053"S.isupper() -> int\n\
2054\n\
2055Return 1 if all cased characters in S are uppercase and there is\n\
2056at least one cased character in S, 0 otherwise.";
2057
2058static PyObject*
2059string_isupper(PyStringObject *self, PyObject *args)
2060{
Fred Drakeba096332000-07-09 07:04:36 +00002061 register const unsigned char *p
2062 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002063 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002064 int cased;
2065
2066 if (!PyArg_NoArgs(args))
2067 return NULL;
2068
2069 /* Shortcut for single character strings */
2070 if (PyString_GET_SIZE(self) == 1)
2071 return PyInt_FromLong(isupper(*p) != 0);
2072
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002073 /* Special case for empty strings */
2074 if (PyString_GET_SIZE(self) == 0)
2075 return PyInt_FromLong(0);
2076
Guido van Rossum4c08d552000-03-10 22:55:18 +00002077 e = p + PyString_GET_SIZE(self);
2078 cased = 0;
2079 for (; p < e; p++) {
2080 if (islower(*p))
2081 return PyInt_FromLong(0);
2082 else if (!cased && isupper(*p))
2083 cased = 1;
2084 }
2085 return PyInt_FromLong(cased);
2086}
2087
2088
2089static char istitle__doc__[] =
2090"S.istitle() -> int\n\
2091\n\
2092Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2093may only follow uncased characters and lowercase characters only cased\n\
2094ones. Return 0 otherwise.";
2095
2096static PyObject*
2097string_istitle(PyStringObject *self, PyObject *args)
2098{
Fred Drakeba096332000-07-09 07:04:36 +00002099 register const unsigned char *p
2100 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002101 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002102 int cased, previous_is_cased;
2103
2104 if (!PyArg_NoArgs(args))
2105 return NULL;
2106
2107 /* Shortcut for single character strings */
2108 if (PyString_GET_SIZE(self) == 1)
2109 return PyInt_FromLong(isupper(*p) != 0);
2110
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002111 /* Special case for empty strings */
2112 if (PyString_GET_SIZE(self) == 0)
2113 return PyInt_FromLong(0);
2114
Guido van Rossum4c08d552000-03-10 22:55:18 +00002115 e = p + PyString_GET_SIZE(self);
2116 cased = 0;
2117 previous_is_cased = 0;
2118 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002119 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002120
2121 if (isupper(ch)) {
2122 if (previous_is_cased)
2123 return PyInt_FromLong(0);
2124 previous_is_cased = 1;
2125 cased = 1;
2126 }
2127 else if (islower(ch)) {
2128 if (!previous_is_cased)
2129 return PyInt_FromLong(0);
2130 previous_is_cased = 1;
2131 cased = 1;
2132 }
2133 else
2134 previous_is_cased = 0;
2135 }
2136 return PyInt_FromLong(cased);
2137}
2138
2139
2140static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002141"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002142\n\
2143Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002144Line breaks are not included in the resulting list unless keepends\n\
2145is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002146
2147#define SPLIT_APPEND(data, left, right) \
2148 str = PyString_FromStringAndSize(data + left, right - left); \
2149 if (!str) \
2150 goto onError; \
2151 if (PyList_Append(list, str)) { \
2152 Py_DECREF(str); \
2153 goto onError; \
2154 } \
2155 else \
2156 Py_DECREF(str);
2157
2158static PyObject*
2159string_splitlines(PyStringObject *self, PyObject *args)
2160{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002161 register int i;
2162 register int j;
2163 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002164 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002165 PyObject *list;
2166 PyObject *str;
2167 char *data;
2168
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002169 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002170 return NULL;
2171
2172 data = PyString_AS_STRING(self);
2173 len = PyString_GET_SIZE(self);
2174
Guido van Rossum4c08d552000-03-10 22:55:18 +00002175 list = PyList_New(0);
2176 if (!list)
2177 goto onError;
2178
2179 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002180 int eol;
2181
Guido van Rossum4c08d552000-03-10 22:55:18 +00002182 /* Find a line and append it */
2183 while (i < len && data[i] != '\n' && data[i] != '\r')
2184 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002185
2186 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002187 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188 if (i < len) {
2189 if (data[i] == '\r' && i + 1 < len &&
2190 data[i+1] == '\n')
2191 i += 2;
2192 else
2193 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002194 if (keepends)
2195 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002196 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002197 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002198 j = i;
2199 }
2200 if (j < len) {
2201 SPLIT_APPEND(data, j, len);
2202 }
2203
2204 return list;
2205
2206 onError:
2207 Py_DECREF(list);
2208 return NULL;
2209}
2210
2211#undef SPLIT_APPEND
2212
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002213
2214static PyMethodDef
2215string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002216 /* Counterparts of the obsolete stropmodule functions; except
2217 string.maketrans(). */
2218 {"join", (PyCFunction)string_join, 1, join__doc__},
2219 {"split", (PyCFunction)string_split, 1, split__doc__},
2220 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2221 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2222 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2223 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2224 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2225 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2226 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002227 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2228 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2230 {"count", (PyCFunction)string_count, 1, count__doc__},
2231 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2232 {"find", (PyCFunction)string_find, 1, find__doc__},
2233 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002235 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2236 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2237 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2238 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2240 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2241 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002242 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2243 {"title", (PyCFunction)string_title, 1, title__doc__},
2244 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2245 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2246 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002247 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002248 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2249 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2250#if 0
2251 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2252#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253 {NULL, NULL} /* sentinel */
2254};
2255
2256static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002257string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258{
2259 return Py_FindMethod(string_methods, (PyObject*)s, name);
2260}
2261
2262
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002263PyTypeObject PyString_Type = {
2264 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002265 0,
2266 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002267 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002268 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002269 (destructor)string_dealloc, /*tp_dealloc*/
2270 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002272 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002273 (cmpfunc)string_compare, /*tp_compare*/
2274 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002275 0, /*tp_as_number*/
2276 &string_as_sequence, /*tp_as_sequence*/
2277 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002278 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002279 0, /*tp_call*/
2280 0, /*tp_str*/
2281 0, /*tp_getattro*/
2282 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002283 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002284 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002285 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002286};
2287
2288void
Fred Drakeba096332000-07-09 07:04:36 +00002289PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002290{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002291 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002292 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002293 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002294 if (w == NULL || !PyString_Check(*pv)) {
2295 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002296 *pv = NULL;
2297 return;
2298 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002299 v = string_concat((PyStringObject *) *pv, w);
2300 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002301 *pv = v;
2302}
2303
Guido van Rossum013142a1994-08-30 08:19:36 +00002304void
Fred Drakeba096332000-07-09 07:04:36 +00002305PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002306{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002307 PyString_Concat(pv, w);
2308 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002309}
2310
2311
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002312/* The following function breaks the notion that strings are immutable:
2313 it changes the size of a string. We get away with this only if there
2314 is only one module referencing the object. You can also think of it
2315 as creating a new string object and destroying the old one, only
2316 more efficiently. In any case, don't use this if the string may
2317 already be known to some other part of the code... */
2318
2319int
Fred Drakeba096332000-07-09 07:04:36 +00002320_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002321{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002322 register PyObject *v;
2323 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002324 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002325 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002326 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002327 Py_DECREF(v);
2328 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002329 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002330 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002331 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002332#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002333 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002334#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002335 _Py_ForgetReference(v);
2336 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002337 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002338 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002339 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002340 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002341 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002342 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002343 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002344 _Py_NewReference(*pv);
2345 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002346 sv->ob_size = newsize;
2347 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002348 return 0;
2349}
Guido van Rossume5372401993-03-16 12:15:04 +00002350
2351/* Helpers for formatstring */
2352
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002353static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002354getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002355{
2356 int argidx = *p_argidx;
2357 if (argidx < arglen) {
2358 (*p_argidx)++;
2359 if (arglen < 0)
2360 return args;
2361 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002362 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002363 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002364 PyErr_SetString(PyExc_TypeError,
2365 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002366 return NULL;
2367}
2368
2369#define F_LJUST (1<<0)
2370#define F_SIGN (1<<1)
2371#define F_BLANK (1<<2)
2372#define F_ALT (1<<3)
2373#define F_ZERO (1<<4)
2374
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002375static int
Fred Drakeba096332000-07-09 07:04:36 +00002376formatfloat(char *buf, size_t buflen, int flags,
2377 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002378{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002379 /* fmt = '%#.' + `prec` + `type`
2380 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002381 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002382 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002383 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002384 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002385 if (prec < 0)
2386 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002387 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2388 type = 'g';
2389 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002390 /* worst case length calc to ensure no buffer overrun:
2391 fmt = %#.<prec>g
2392 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2393 for any double rep.)
2394 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2395 If prec=0 the effective precision is 1 (the leading digit is
2396 always given), therefore increase by one to 10+prec. */
2397 if (buflen <= (size_t)10 + (size_t)prec) {
2398 PyErr_SetString(PyExc_OverflowError,
2399 "formatted float is too long (precision too long?)");
2400 return -1;
2401 }
Guido van Rossume5372401993-03-16 12:15:04 +00002402 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002403 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002404}
2405
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002406static int
Fred Drakeba096332000-07-09 07:04:36 +00002407formatint(char *buf, size_t buflen, int flags,
2408 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002409{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002410 /* fmt = '%#.' + `prec` + 'l' + `type`
2411 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002412 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002413 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002414 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002415 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002416 if (prec < 0)
2417 prec = 1;
2418 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002419 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2420 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2421 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2422 PyErr_SetString(PyExc_OverflowError,
2423 "formatted integer is too long (precision too long?)");
2424 return -1;
2425 }
Guido van Rossume5372401993-03-16 12:15:04 +00002426 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002427 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002428}
2429
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002430static int
Fred Drakeba096332000-07-09 07:04:36 +00002431formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002432{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002433 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002434 if (PyString_Check(v)) {
2435 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002436 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002437 }
2438 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002439 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002440 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002441 }
2442 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002443 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002444}
2445
Guido van Rossum013142a1994-08-30 08:19:36 +00002446
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002447/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2448
2449 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2450 chars are formatted. XXX This is a magic number. Each formatting
2451 routine does bounds checking to ensure no overflow, but a better
2452 solution may be to malloc a buffer of appropriate size for each
2453 format. For now, the current solution is sufficient.
2454*/
2455#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002456
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002457PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002458PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002459{
2460 char *fmt, *res;
2461 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002462 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002463 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002464 PyObject *dict = NULL;
2465 if (format == NULL || !PyString_Check(format) || args == NULL) {
2466 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002467 return NULL;
2468 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002469 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002470 fmt = PyString_AsString(format);
2471 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002472 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002473 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002474 if (result == NULL)
2475 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002476 res = PyString_AsString(result);
2477 if (PyTuple_Check(args)) {
2478 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002479 argidx = 0;
2480 }
2481 else {
2482 arglen = -1;
2483 argidx = -2;
2484 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002485 if (args->ob_type->tp_as_mapping)
2486 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002487 while (--fmtcnt >= 0) {
2488 if (*fmt != '%') {
2489 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002490 rescnt = fmtcnt + 100;
2491 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002492 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002493 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002494 res = PyString_AsString(result)
2495 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002496 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002497 }
2498 *res++ = *fmt++;
2499 }
2500 else {
2501 /* Got a format specifier */
2502 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002503 int width = -1;
2504 int prec = -1;
2505 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002506 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002507 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002508 PyObject *v = NULL;
2509 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002510 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002511 int sign;
2512 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002513 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002514 char *fmt_start = fmt;
2515
Guido van Rossumda9c2711996-12-05 21:58:58 +00002516 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002517 if (*fmt == '(') {
2518 char *keystart;
2519 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002520 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002521 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002522
2523 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002524 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002525 "format requires a mapping");
2526 goto error;
2527 }
2528 ++fmt;
2529 --fmtcnt;
2530 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002531 /* Skip over balanced parentheses */
2532 while (pcount > 0 && --fmtcnt >= 0) {
2533 if (*fmt == ')')
2534 --pcount;
2535 else if (*fmt == '(')
2536 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002537 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002538 }
2539 keylen = fmt - keystart - 1;
2540 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002541 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002542 "incomplete format key");
2543 goto error;
2544 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002545 key = PyString_FromStringAndSize(keystart,
2546 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002547 if (key == NULL)
2548 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002549 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002550 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002551 args_owned = 0;
2552 }
2553 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002554 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002555 if (args == NULL) {
2556 goto error;
2557 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002558 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002559 arglen = -1;
2560 argidx = -2;
2561 }
Guido van Rossume5372401993-03-16 12:15:04 +00002562 while (--fmtcnt >= 0) {
2563 switch (c = *fmt++) {
2564 case '-': flags |= F_LJUST; continue;
2565 case '+': flags |= F_SIGN; continue;
2566 case ' ': flags |= F_BLANK; continue;
2567 case '#': flags |= F_ALT; continue;
2568 case '0': flags |= F_ZERO; continue;
2569 }
2570 break;
2571 }
2572 if (c == '*') {
2573 v = getnextarg(args, arglen, &argidx);
2574 if (v == NULL)
2575 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002576 if (!PyInt_Check(v)) {
2577 PyErr_SetString(PyExc_TypeError,
2578 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002579 goto error;
2580 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002581 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002582 if (width < 0) {
2583 flags |= F_LJUST;
2584 width = -width;
2585 }
Guido van Rossume5372401993-03-16 12:15:04 +00002586 if (--fmtcnt >= 0)
2587 c = *fmt++;
2588 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002589 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002590 width = c - '0';
2591 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002592 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002593 if (!isdigit(c))
2594 break;
2595 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002596 PyErr_SetString(
2597 PyExc_ValueError,
2598 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002599 goto error;
2600 }
2601 width = width*10 + (c - '0');
2602 }
2603 }
2604 if (c == '.') {
2605 prec = 0;
2606 if (--fmtcnt >= 0)
2607 c = *fmt++;
2608 if (c == '*') {
2609 v = getnextarg(args, arglen, &argidx);
2610 if (v == NULL)
2611 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002612 if (!PyInt_Check(v)) {
2613 PyErr_SetString(
2614 PyExc_TypeError,
2615 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002616 goto error;
2617 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002618 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002619 if (prec < 0)
2620 prec = 0;
2621 if (--fmtcnt >= 0)
2622 c = *fmt++;
2623 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002624 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002625 prec = c - '0';
2626 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002627 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002628 if (!isdigit(c))
2629 break;
2630 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002631 PyErr_SetString(
2632 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002633 "prec too big");
2634 goto error;
2635 }
2636 prec = prec*10 + (c - '0');
2637 }
2638 }
2639 } /* prec */
2640 if (fmtcnt >= 0) {
2641 if (c == 'h' || c == 'l' || c == 'L') {
2642 size = c;
2643 if (--fmtcnt >= 0)
2644 c = *fmt++;
2645 }
2646 }
2647 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002648 PyErr_SetString(PyExc_ValueError,
2649 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002650 goto error;
2651 }
2652 if (c != '%') {
2653 v = getnextarg(args, arglen, &argidx);
2654 if (v == NULL)
2655 goto error;
2656 }
2657 sign = 0;
2658 fill = ' ';
2659 switch (c) {
2660 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002661 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002662 len = 1;
2663 break;
2664 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002665 case 'r':
2666 if (PyUnicode_Check(v)) {
2667 fmt = fmt_start;
2668 goto unicode;
2669 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002670 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002671 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002672 else
2673 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002674 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002675 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002676 if (!PyString_Check(temp)) {
2677 PyErr_SetString(PyExc_TypeError,
2678 "%s argument has non-string str()");
2679 goto error;
2680 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002681 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002682 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002683 if (prec >= 0 && len > prec)
2684 len = prec;
2685 break;
2686 case 'i':
2687 case 'd':
2688 case 'u':
2689 case 'o':
2690 case 'x':
2691 case 'X':
2692 if (c == 'i')
2693 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002694 pbuf = formatbuf;
2695 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002696 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002697 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002698 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002699 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002700 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002701 if ((flags&F_ALT) &&
2702 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002703 pbuf[0] == '0' && pbuf[1] == c) {
2704 *res++ = *pbuf++;
2705 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002706 rescnt -= 2;
2707 len -= 2;
2708 width -= 2;
2709 if (width < 0)
2710 width = 0;
2711 }
2712 }
Guido van Rossume5372401993-03-16 12:15:04 +00002713 break;
2714 case 'e':
2715 case 'E':
2716 case 'f':
2717 case 'g':
2718 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002719 pbuf = formatbuf;
2720 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002721 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002722 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002723 sign = 1;
2724 if (flags&F_ZERO)
2725 fill = '0';
2726 break;
2727 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002728 pbuf = formatbuf;
2729 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002730 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002731 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002732 break;
2733 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002734 PyErr_Format(PyExc_ValueError,
2735 "unsupported format character '%c' (0x%x)",
2736 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002737 goto error;
2738 }
2739 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002740 if (*pbuf == '-' || *pbuf == '+') {
2741 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002742 len--;
2743 }
2744 else if (flags & F_SIGN)
2745 sign = '+';
2746 else if (flags & F_BLANK)
2747 sign = ' ';
2748 else
2749 sign = '\0';
2750 }
2751 if (width < len)
2752 width = len;
2753 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002754 reslen -= rescnt;
2755 rescnt = width + fmtcnt + 100;
2756 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002757 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002758 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002759 res = PyString_AsString(result)
2760 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002761 }
2762 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002763 if (fill != ' ')
2764 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002765 rescnt--;
2766 if (width > len)
2767 width--;
2768 }
2769 if (width > len && !(flags&F_LJUST)) {
2770 do {
2771 --rescnt;
2772 *res++ = fill;
2773 } while (--width > len);
2774 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002775 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002776 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002777 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002778 res += len;
2779 rescnt -= len;
2780 while (--width >= len) {
2781 --rescnt;
2782 *res++ = ' ';
2783 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002784 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002785 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002786 "not all arguments converted");
2787 goto error;
2788 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002789 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002790 } /* '%' */
2791 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002792 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002793 PyErr_SetString(PyExc_TypeError,
2794 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002795 goto error;
2796 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002797 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002798 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002799 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002800 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002801 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002802
2803 unicode:
2804 if (args_owned) {
2805 Py_DECREF(args);
2806 args_owned = 0;
2807 }
2808 /* Fiddle args right (remove the first argidx-1 arguments) */
2809 --argidx;
2810 if (PyTuple_Check(orig_args) && argidx > 0) {
2811 PyObject *v;
2812 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2813 v = PyTuple_New(n);
2814 if (v == NULL)
2815 goto error;
2816 while (--n >= 0) {
2817 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2818 Py_INCREF(w);
2819 PyTuple_SET_ITEM(v, n, w);
2820 }
2821 args = v;
2822 } else {
2823 Py_INCREF(orig_args);
2824 args = orig_args;
2825 }
2826 /* Paste rest of format string to what we have of the result
2827 string; we reuse result for this */
2828 rescnt = res - PyString_AS_STRING(result);
2829 fmtcnt = PyString_GET_SIZE(format) - \
2830 (fmt - PyString_AS_STRING(format));
2831 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2832 Py_DECREF(args);
2833 goto error;
2834 }
2835 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2836 format = result;
2837 /* Let Unicode do its magic */
2838 result = PyUnicode_Format(format, args);
2839 Py_DECREF(format);
2840 Py_DECREF(args);
2841 return result;
2842
Guido van Rossume5372401993-03-16 12:15:04 +00002843 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002844 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002845 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002846 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002847 }
Guido van Rossume5372401993-03-16 12:15:04 +00002848 return NULL;
2849}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002850
2851
2852#ifdef INTERN_STRINGS
2853
2854static PyObject *interned;
2855
2856void
Fred Drakeba096332000-07-09 07:04:36 +00002857PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002858{
2859 register PyStringObject *s = (PyStringObject *)(*p);
2860 PyObject *t;
2861 if (s == NULL || !PyString_Check(s))
2862 Py_FatalError("PyString_InternInPlace: strings only please!");
2863 if ((t = s->ob_sinterned) != NULL) {
2864 if (t == (PyObject *)s)
2865 return;
2866 Py_INCREF(t);
2867 *p = t;
2868 Py_DECREF(s);
2869 return;
2870 }
2871 if (interned == NULL) {
2872 interned = PyDict_New();
2873 if (interned == NULL)
2874 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002875 }
2876 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2877 Py_INCREF(t);
2878 *p = s->ob_sinterned = t;
2879 Py_DECREF(s);
2880 return;
2881 }
2882 t = (PyObject *)s;
2883 if (PyDict_SetItem(interned, t, t) == 0) {
2884 s->ob_sinterned = t;
2885 return;
2886 }
2887 PyErr_Clear();
2888}
2889
2890
2891PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002892PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002893{
2894 PyObject *s = PyString_FromString(cp);
2895 if (s == NULL)
2896 return NULL;
2897 PyString_InternInPlace(&s);
2898 return s;
2899}
2900
2901#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002902
2903void
Fred Drakeba096332000-07-09 07:04:36 +00002904PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00002905{
2906 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002907 for (i = 0; i < UCHAR_MAX + 1; i++) {
2908 Py_XDECREF(characters[i]);
2909 characters[i] = NULL;
2910 }
2911#ifndef DONT_SHARE_SHORT_STRINGS
2912 Py_XDECREF(nullstring);
2913 nullstring = NULL;
2914#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002915#ifdef INTERN_STRINGS
2916 if (interned) {
2917 int pos, changed;
2918 PyObject *key, *value;
2919 do {
2920 changed = 0;
2921 pos = 0;
2922 while (PyDict_Next(interned, &pos, &key, &value)) {
2923 if (key->ob_refcnt == 2 && key == value) {
2924 PyDict_DelItem(interned, key);
2925 changed = 1;
2926 }
2927 }
2928 } while (changed);
2929 }
2930#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002931}